Spaces:

kritsadaK
/

US_political_fake_news_classification

Sleeping

kritsadaK commited on Sep 10, 2024

Commit

db87ccc

1 Parent(s): c1ec3ab

Fix WordNetLemmatizer import and download

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,9 @@ import string
 import pickle
 import os
 import os
 # Set NLTK data path to the local 'nltk_data' folder in your project
 nltk_data_path = 'nltk_data'
@@ -23,9 +26,12 @@ if not os.path.exists(os.path.join(nltk_data_path, 'corpora/stopwords')):
 if not os.path.exists(os.path.join(nltk_data_path, 'tokenizers/punkt')):
     nltk.download('punkt', download_dir=nltk_data_path)
-# Now you can safely use stopwords and punkt
-from nltk.corpus import stopwords
-stop_words = set(stopwords.words('english'))
 # 1. Remove punctuation
 def remove_punctuation(text):

 import pickle
 import os
 import os
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer  # Import WordNetLemmatizer
 # Set NLTK data path to the local 'nltk_data' folder in your project
 nltk_data_path = 'nltk_data'
 if not os.path.exists(os.path.join(nltk_data_path, 'tokenizers/punkt')):
     nltk.download('punkt', download_dir=nltk_data_path)
+# Check if 'wordnet' is already downloaded, if not, download it
+if not os.path.exists(os.path.join(nltk_data_path, 'corpora/wordnet')):
+    nltk.download('wordnet', download_dir=nltk_data_path)
+# Initialize the lemmatizer
+lemmatizer = WordNetLemmatizer()
 # 1. Remove punctuation
 def remove_punctuation(text):