kritsadaK's picture
Added punkt data
0542bf8
raw
history blame contribute delete
816 Bytes
import os
# Function to search for the nltk_data directory containing the 'punkt' tokenizer
def find_nltk_data_dir():
search_paths = [
os.path.expanduser('~/nltk_data'), # Linux/Mac default
os.path.join(os.environ.get('APPDATA', ''), 'nltk_data'), # Windows default
'/usr/local/share/nltk_data',
'/usr/share/nltk_data',
'/usr/local/lib/nltk_data',
'/usr/lib/nltk_data',
]
for path in search_paths:
punkt_path = os.path.join(path, 'tokenizers', 'punkt')
if os.path.exists(punkt_path):
print(f"'punkt' data found at: {punkt_path}")
return punkt_path
print("Could not find 'punkt' data. Please ensure it is downloaded.")
return None
# Call the function to find 'punkt' data directory
find_nltk_data_dir()