Spaces:

Zeeshan42
/

personal-psycologist

Runtime error

App Files Files Community

Zeeshan42 commited on Nov 9, 2024

Commit

b2a59e1

verified ·

1 Parent(s): 23fe81f

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -15

app.py CHANGED Viewed

@@ -6,27 +6,33 @@ import os
 # Initialize Groq client with your API key
 client = Groq(api_key="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")
-# Paths to your books
-book_paths = {
-    "DSM": "/app/Diagnostic and statistical manual of mental disorders _ DSM-5 ( PDFDrive.com ).pdf",
-    "Personality": "/b6c3v8_Theories_of_Personality_10.pdf",
-    "SearchForMeaning": "/Mans-Search-For-Meaning.pdf"
 }
-# Function to load and preprocess the data from books
-def load_data(paths):
     data = []
-    for title, path in paths.items():
-        with open(path, "r", encoding="utf-8", errors='ignore') as file:
-            text = file.read()
-            paragraphs = text.split("\n\n")  # Split by paragraphs (adjust as needed)
-            for paragraph in paragraphs:
-                if paragraph.strip():  # Skip empty paragraphs
-                    data.append({"text": paragraph.strip()})
     return Dataset.from_list(data)
 # Load and preprocess dataset for fine-tuning
-dataset = load_data(book_paths)
 # Load pretrained model and tokenizer from Hugging Face
 model_name = "gpt2"  # Replace with a larger model if needed and feasible

 # Initialize Groq client with your API key
 client = Groq(api_key="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")
+# Book names (replace with your uploaded book names on Hugging Face)
+book_names = {
+    "DSM": "Diagnostic_and_statistical_manual_of_mental_disorders_DSM5.pdf",
+    "Personality": "Theories_of_Personality_10.pdf",
+    "SearchForMeaning": "Mans_Search_For_Meaning.pdf"
 }
+# Function to load and preprocess the data from books (now using Hugging Face datasets)
+def load_data(book_names):
     data = []
+    for title, book_name in book_names.items():
+        # Assuming books are stored in Hugging Face datasets or other accessible locations
+        # Here you will load the dataset from Hugging Face directly if it's uploaded
+        # For example, use `datasets.load_dataset` to load the books if they are uploaded
+        # Example (replace with actual loading mechanism based on how the books are stored on Hugging Face):
+        dataset = Dataset.from_huggingface_dataset(book_name)
+        # Assuming the dataset contains the text, we split it by paragraphs
+        paragraphs = dataset["text"].split("\n\n")  # Adjust based on actual dataset structure
+        for paragraph in paragraphs:
+            if paragraph.strip():  # Skip empty paragraphs
+                data.append({"text": paragraph.strip()})
     return Dataset.from_list(data)
 # Load and preprocess dataset for fine-tuning
+dataset = load_data(book_names)
 # Load pretrained model and tokenizer from Hugging Face
 model_name = "gpt2"  # Replace with a larger model if needed and feasible