Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,27 +6,33 @@ import os
|
|
6 |
# Initialize Groq client with your API key
|
7 |
client = Groq(api_key="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")
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
"DSM": "
|
12 |
-
"Personality": "
|
13 |
-
"SearchForMeaning": "
|
14 |
}
|
15 |
|
16 |
-
# Function to load and preprocess the data from books
|
17 |
-
def load_data(
|
18 |
data = []
|
19 |
-
for title,
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
return Dataset.from_list(data)
|
27 |
|
28 |
# Load and preprocess dataset for fine-tuning
|
29 |
-
dataset = load_data(
|
30 |
|
31 |
# Load pretrained model and tokenizer from Hugging Face
|
32 |
model_name = "gpt2" # Replace with a larger model if needed and feasible
|
|
|
6 |
# Initialize Groq client with your API key
|
7 |
client = Groq(api_key="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")
|
8 |
|
9 |
+
# Book names (replace with your uploaded book names on Hugging Face)
|
10 |
+
book_names = {
|
11 |
+
"DSM": "Diagnostic_and_statistical_manual_of_mental_disorders_DSM5.pdf",
|
12 |
+
"Personality": "Theories_of_Personality_10.pdf",
|
13 |
+
"SearchForMeaning": "Mans_Search_For_Meaning.pdf"
|
14 |
}
|
15 |
|
16 |
+
# Function to load and preprocess the data from books (now using Hugging Face datasets)
|
17 |
+
def load_data(book_names):
|
18 |
data = []
|
19 |
+
for title, book_name in book_names.items():
|
20 |
+
# Assuming books are stored in Hugging Face datasets or other accessible locations
|
21 |
+
# Here you will load the dataset from Hugging Face directly if it's uploaded
|
22 |
+
# For example, use `datasets.load_dataset` to load the books if they are uploaded
|
23 |
+
|
24 |
+
# Example (replace with actual loading mechanism based on how the books are stored on Hugging Face):
|
25 |
+
dataset = Dataset.from_huggingface_dataset(book_name)
|
26 |
+
|
27 |
+
# Assuming the dataset contains the text, we split it by paragraphs
|
28 |
+
paragraphs = dataset["text"].split("\n\n") # Adjust based on actual dataset structure
|
29 |
+
for paragraph in paragraphs:
|
30 |
+
if paragraph.strip(): # Skip empty paragraphs
|
31 |
+
data.append({"text": paragraph.strip()})
|
32 |
return Dataset.from_list(data)
|
33 |
|
34 |
# Load and preprocess dataset for fine-tuning
|
35 |
+
dataset = load_data(book_names)
|
36 |
|
37 |
# Load pretrained model and tokenizer from Hugging Face
|
38 |
model_name = "gpt2" # Replace with a larger model if needed and feasible
|