|
import os |
|
|
|
|
|
def find_nltk_data_dir(): |
|
search_paths = [ |
|
os.path.expanduser('~/nltk_data'), |
|
os.path.join(os.environ.get('APPDATA', ''), 'nltk_data'), |
|
'/usr/local/share/nltk_data', |
|
'/usr/share/nltk_data', |
|
'/usr/local/lib/nltk_data', |
|
'/usr/lib/nltk_data', |
|
] |
|
|
|
for path in search_paths: |
|
punkt_path = os.path.join(path, 'tokenizers', 'punkt') |
|
if os.path.exists(punkt_path): |
|
print(f"'punkt' data found at: {punkt_path}") |
|
return punkt_path |
|
|
|
print("Could not find 'punkt' data. Please ensure it is downloaded.") |
|
return None |
|
|
|
|
|
find_nltk_data_dir() |
|
|