File size: 816 Bytes
0542bf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import os

# Function to search for the nltk_data directory containing the 'punkt' tokenizer
def find_nltk_data_dir():
    search_paths = [
        os.path.expanduser('~/nltk_data'),  # Linux/Mac default
        os.path.join(os.environ.get('APPDATA', ''), 'nltk_data'),  # Windows default
        '/usr/local/share/nltk_data',
        '/usr/share/nltk_data',
        '/usr/local/lib/nltk_data',
        '/usr/lib/nltk_data',
    ]

    for path in search_paths:
        punkt_path = os.path.join(path, 'tokenizers', 'punkt')
        if os.path.exists(punkt_path):
            print(f"'punkt' data found at: {punkt_path}")
            return punkt_path

    print("Could not find 'punkt' data. Please ensure it is downloaded.")
    return None

# Call the function to find 'punkt' data directory
find_nltk_data_dir()