Spaces:

Zeeshan42
/

personal-psycologist

Runtime error

App Files Files Community

Zeeshan42 commited on Nov 9, 2024

Commit

41ac1e9

verified ·

1 Parent(s): b2a59e1

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -15

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
-from datasets import Dataset
 from groq import Groq
 import os
@@ -8,27 +8,31 @@ client = Groq(api_key="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"
 # Book names (replace with your uploaded book names on Hugging Face)
 book_names = {
-    "DSM": "Diagnostic_and_statistical_manual_of_mental_disorders_DSM5.pdf",
-    "Personality": "Theories_of_Personality_10.pdf",
-    "SearchForMeaning": "Mans_Search_For_Meaning.pdf"
 }
 # Function to load and preprocess the data from books (now using Hugging Face datasets)
 def load_data(book_names):
     data = []
     for title, book_name in book_names.items():
-        # Assuming books are stored in Hugging Face datasets or other accessible locations
-        # Here you will load the dataset from Hugging Face directly if it's uploaded
-        # For example, use `datasets.load_dataset` to load the books if they are uploaded
-        # Example (replace with actual loading mechanism based on how the books are stored on Hugging Face):
-        dataset = Dataset.from_huggingface_dataset(book_name)
-        # Assuming the dataset contains the text, we split it by paragraphs
-        paragraphs = dataset["text"].split("\n\n")  # Adjust based on actual dataset structure
-        for paragraph in paragraphs:
-            if paragraph.strip():  # Skip empty paragraphs
-                data.append({"text": paragraph.strip()})
     return Dataset.from_list(data)
 # Load and preprocess dataset for fine-tuning

 from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
+from datasets import load_dataset, Dataset
 from groq import Groq
 import os
 # Book names (replace with your uploaded book names on Hugging Face)
 book_names = {
+    "DSM": "Diagnostic_and_statistical_manual_of_mental_disorders_DSM5",
+    "Personality": "Theories_of_Personality_10",
+    "SearchForMeaning": "Mans_Search_For_Meaning"
 }
 # Function to load and preprocess the data from books (now using Hugging Face datasets)
 def load_data(book_names):
     data = []
     for title, book_name in book_names.items():
+        # Load dataset from Hugging Face using the book name
+        # The dataset should be in the form of a text dataset or you should have pre-uploaded datasets
+        # Example: Assuming the datasets are pre-uploaded on Hugging Face and stored as text files
+        try:
+            dataset = load_dataset(book_name)  # Try to load dataset by name
+            text = dataset['train']['text']  # Adjust depending on dataset structure
+            paragraphs = text.split("\n\n")  # Split by paragraphs
+            for paragraph in paragraphs:
+                if paragraph.strip():  # Skip empty paragraphs
+                    data.append({"text": paragraph.strip()})
+        except Exception as e:
+            print(f"Error loading dataset for {book_name}: {e}")
+            continue
     return Dataset.from_list(data)
 # Load and preprocess dataset for fine-tuning