Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
2 |
-
from datasets import Dataset
|
3 |
from groq import Groq
|
4 |
import os
|
5 |
|
@@ -8,27 +8,31 @@ client = Groq(api_key="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"
|
|
8 |
|
9 |
# Book names (replace with your uploaded book names on Hugging Face)
|
10 |
book_names = {
|
11 |
-
"DSM": "Diagnostic_and_statistical_manual_of_mental_disorders_DSM5
|
12 |
-
"Personality": "Theories_of_Personality_10
|
13 |
-
"SearchForMeaning": "Mans_Search_For_Meaning
|
14 |
}
|
15 |
|
16 |
# Function to load and preprocess the data from books (now using Hugging Face datasets)
|
17 |
def load_data(book_names):
|
18 |
data = []
|
19 |
for title, book_name in book_names.items():
|
20 |
-
#
|
21 |
-
#
|
22 |
-
#
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
return Dataset.from_list(data)
|
33 |
|
34 |
# Load and preprocess dataset for fine-tuning
|
|
|
1 |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
2 |
+
from datasets import load_dataset, Dataset
|
3 |
from groq import Groq
|
4 |
import os
|
5 |
|
|
|
8 |
|
9 |
# Book names (replace with your uploaded book names on Hugging Face)
|
10 |
book_names = {
|
11 |
+
"DSM": "Diagnostic_and_statistical_manual_of_mental_disorders_DSM5",
|
12 |
+
"Personality": "Theories_of_Personality_10",
|
13 |
+
"SearchForMeaning": "Mans_Search_For_Meaning"
|
14 |
}
|
15 |
|
16 |
# Function to load and preprocess the data from books (now using Hugging Face datasets)
|
17 |
def load_data(book_names):
|
18 |
data = []
|
19 |
for title, book_name in book_names.items():
|
20 |
+
# Load dataset from Hugging Face using the book name
|
21 |
+
# The dataset should be in the form of a text dataset or you should have pre-uploaded datasets
|
22 |
+
# Example: Assuming the datasets are pre-uploaded on Hugging Face and stored as text files
|
23 |
|
24 |
+
try:
|
25 |
+
dataset = load_dataset(book_name) # Try to load dataset by name
|
26 |
+
text = dataset['train']['text'] # Adjust depending on dataset structure
|
27 |
+
paragraphs = text.split("\n\n") # Split by paragraphs
|
28 |
+
|
29 |
+
for paragraph in paragraphs:
|
30 |
+
if paragraph.strip(): # Skip empty paragraphs
|
31 |
+
data.append({"text": paragraph.strip()})
|
32 |
+
except Exception as e:
|
33 |
+
print(f"Error loading dataset for {book_name}: {e}")
|
34 |
+
continue
|
35 |
+
|
36 |
return Dataset.from_list(data)
|
37 |
|
38 |
# Load and preprocess dataset for fine-tuning
|