kritsadaK commited on
Commit
db87ccc
·
1 Parent(s): c1ec3ab

Fix WordNetLemmatizer import and download

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -10,6 +10,9 @@ import string
10
  import pickle
11
  import os
12
  import os
 
 
 
13
 
14
  # Set NLTK data path to the local 'nltk_data' folder in your project
15
  nltk_data_path = 'nltk_data'
@@ -23,9 +26,12 @@ if not os.path.exists(os.path.join(nltk_data_path, 'corpora/stopwords')):
23
  if not os.path.exists(os.path.join(nltk_data_path, 'tokenizers/punkt')):
24
  nltk.download('punkt', download_dir=nltk_data_path)
25
 
26
- # Now you can safely use stopwords and punkt
27
- from nltk.corpus import stopwords
28
- stop_words = set(stopwords.words('english'))
 
 
 
29
 
30
  # 1. Remove punctuation
31
  def remove_punctuation(text):
 
10
  import pickle
11
  import os
12
  import os
13
+ from nltk.tokenize import word_tokenize
14
+ from nltk.corpus import stopwords
15
+ from nltk.stem import WordNetLemmatizer # Import WordNetLemmatizer
16
 
17
  # Set NLTK data path to the local 'nltk_data' folder in your project
18
  nltk_data_path = 'nltk_data'
 
26
  if not os.path.exists(os.path.join(nltk_data_path, 'tokenizers/punkt')):
27
  nltk.download('punkt', download_dir=nltk_data_path)
28
 
29
+ # Check if 'wordnet' is already downloaded, if not, download it
30
+ if not os.path.exists(os.path.join(nltk_data_path, 'corpora/wordnet')):
31
+ nltk.download('wordnet', download_dir=nltk_data_path)
32
+
33
+ # Initialize the lemmatizer
34
+ lemmatizer = WordNetLemmatizer()
35
 
36
  # 1. Remove punctuation
37
  def remove_punctuation(text):