Spaces:

Daemontatox
/

Imagechat

Running

App Files Files Community

Daemontatox commited on 6 days ago

Commit

a9c0662

verified ·

1 Parent(s): 0dfba1d

Update app.py

Browse files

Files changed (1) hide show

app.py +515 -177

app.py CHANGED Viewed

@@ -8,17 +8,20 @@ from PIL import Image
 from huggingface_hub import InferenceClient
 from google.generativeai import configure, GenerativeModel
 from google.ai.generativelanguage import Content, Part
 # Load API keys from environment variables
 inference_api_key = os.environ.get("HF_TOKEN")
-google_api_key = os.environ.get("GOOGLE_API_KEY")  # New Google API key
 # Configure Google API
 configure(api_key=google_api_key)
-# Global variables to store the image data URL and prompt for the currently generated image.
 global_image_data_url = None
-global_image_prompt = None  # Still stored if needed elsewhere
 def update_difficulty_label(active_session):
     return f"**Current Difficulty:** {active_session.get('difficulty', 'Very Simple')}"
@@ -36,28 +39,53 @@ def generate_prompt_from_options(difficulty, age, autism_level, topic_focus, tre
           - Autism Level: {autism_level}
           - Topic Focus: {topic_focus}
           - Treatment Plan: {treatment_plan}
         Emphasize that the image should be clear, calming, and support understanding and communication. The style should match the difficulty level: for example, "Very Simple" produces very basic visuals while "Very Detailed" produces rich visuals.
         The image should specifically focus on the topic: "{topic_focus}".
         Please generate a prompt that instructs the image generation engine to produce an image with:
         1. Clarity and simplicity (minimalist backgrounds, clear subject)
         2. Literal representation with defined borders and consistent style
         3. Soft, muted colors and reduced visual complexity
         4. Positive, calm scenes
         5. Clear focus on the specified topic
         Use descriptive and detailed language.
         """
     )
     # Initialize the Gemini Pro model
     model = GenerativeModel('gemini-2.0-flash-lite')
     # Generate content using the Gemini model
     response = model.generate_content(query)
     return response.text.strip()
 def generate_image_fn(selected_prompt, guidance_scale=7.5,
@@ -84,100 +112,166 @@ def generate_image_fn(selected_prompt, guidance_scale=7.5,
     global_image_data_url = f"data:image/png;base64,{img_b64}"
     return image
 def generate_image_and_reset_chat(age, autism_level, topic_focus, treatment_plan, active_session, saved_sessions):
     """
     Generate a new image (with the current difficulty) and reset the chat.
     Now includes the topic_focus parameter to specify what the image should focus on.
     """
     new_sessions = saved_sessions.copy()
     if active_session.get("prompt"):
         new_sessions.append(active_session)
-    # Use the current difficulty from the active session (which should be updated if advanced)
     current_difficulty = active_session.get("difficulty", "Very Simple")
     generated_prompt = generate_prompt_from_options(current_difficulty, age, autism_level, topic_focus, treatment_plan)
     image = generate_image_fn(generated_prompt)
     new_active_session = {
         "prompt": generated_prompt,
         "image": global_image_data_url,
         "chat": [],
         "treatment_plan": treatment_plan,
         "topic_focus": topic_focus,
         "identified_details": [],
         "difficulty": current_difficulty,
         "autism_level": autism_level,
         "age": age
     }
-    return image, new_active_session, new_sessions
-def compare_details_chat_fn(user_details, treatment_plan, chat_history, identified_details):
     """
-    Evaluate the child's description using Google's Gemini Vision model.
     """
-    if not global_image_data_url:
         return "Please generate an image first."
     history_text = ""
     if chat_history:
         history_text = "\n\n### Previous Conversation:\n"
-        for idx, (user_msg, bot_msg) in enumerate(chat_history, 1):
-            history_text += f"Turn {idx}:\nUser: {user_msg}\nTeacher: {bot_msg}\n"
     identified_details_text = ""
     if identified_details:
         identified_details_text = "\n\n### Previously Identified Details:\n" + "\n".join(f"- {detail}" for detail in identified_details)
     message_text = (
-        f"{history_text}{identified_details_text}\n\n"
-        f"Based on the image provided above, please evaluate the following description given by the child:\n"
-        f"'{user_details}'\n\n"
-        "You are a kind and encouraging teacher speaking to a child. Use simple, clear language. "
-        "Praise the child's correct observations and provide a gentle hint if something is missing. "
-        "Keep your feedback positive and easy to understand.\n\n"
-        "Focus on these evaluation criteria:\n"
-        "1. **Object Identification** – Did the child mention the main objects?\n"
-        "2. **Color & Shape Accuracy** – Were the colors and shapes described correctly?\n"
-        "3. **Clarity & Simplicity** – Was the description clear and easy to understand?\n"
-        "4. **Overall Communication** – How well did the child communicate their thoughts?\n\n"
-        "Note: As difficulty increases, the expected level of detail is higher. Evaluate accordingly.\n\n"
-        "Return your evaluation strictly as a JSON object with the following keys:\n"
         "{\n"
-        "  \"scores\": {\n"
-        "    \"object_identification\": <number>,\n"
-        "    \"color_shape_accuracy\": <number>,\n"
-        "    \"clarity_simplicity\": <number>,\n"
-        "    \"overall_communication\": <number>\n"
-        "  },\n"
-        "  \"final_score\": <number>,\n"
-        "  \"feedback\": \"<string>\",\n"
-        "  \"hint\": \"<string>\",\n"
-        "  \"advance\": <boolean>\n"
         "}\n\n"
-        "Do not include any additional text outside the JSON."
     )
-    # Remove the data:image/png;base64, prefix to get just the base64 string
-    base64_img = global_image_data_url.split(",")[1]
-    # Create a Gemini Vision Pro model
-    vision_model = GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
-    # Create the content with image and text using the correct parameters
-    # Use 'inline_data' instead of 'content' for the image part
-    image_part = Part(inline_data={"mime_type": "image/png", "data": base64.b64decode(base64_img)})
-    text_part = Part(text=message_text)
-    multimodal_content = Content(parts=[image_part, text_part])
-    # Generate evaluation using the vision model
-    response = vision_model.generate_content(multimodal_content)
     return response.text
-def evaluate_scores(evaluation_text, current_difficulty):
-    """
-    Parse the JSON evaluation and decide if the child advances.
-    The threshold scales with difficulty:
-       Very Simple: 70, Simple: 75, Moderate: 80, Detailed: 85, Very Detailed: 90.
-    """
     try:
         json_match = re.search(r'\{.*\}', evaluation_text, re.DOTALL)
         if json_match:
@@ -185,73 +279,164 @@ def evaluate_scores(evaluation_text, current_difficulty):
             evaluation = json.loads(json_str)
         else:
             raise ValueError("No JSON object found in the response.")
-        final_score = evaluation.get("final_score", 0)
-        hint = evaluation.get("hint", "Keep trying!")
-        advance = evaluation.get("advance", False)
-        difficulty_thresholds = {
-            "Very Simple": 70,
-            "Simple": 75,
-            "Moderate": 80,
-            "Detailed": 85,
-            "Very Detailed": 90
-        }
-        current_threshold = difficulty_thresholds.get(current_difficulty, 70)
-        difficulty_mapping = {
-            "Very Simple": "Simple",
-            "Simple": "Moderate",
-            "Moderate": "Detailed",
-            "Detailed": "Very Detailed",
-            "Very Detailed": "Very Detailed"
-        }
-        if final_score >= current_threshold or advance:
-            new_difficulty = difficulty_mapping.get(current_difficulty, current_difficulty)
-            response_msg = (f"Great job! Your final score is {final_score}, which meets the target of {current_threshold}. "
-                            f"You've advanced to {new_difficulty} difficulty.")
-            return response_msg, new_difficulty
-        else:
-            response_msg = (f"Your final score is   {final_score} (\n target: {current_threshold}). {hint} \n "
-                            f"Please try again at the {current_difficulty} level.")
-            return response_msg, current_difficulty
     except Exception as e:
-        return f"Error processing evaluation output: {str(e)}", current_difficulty
-def chat_respond(user_message, active_session, saved_sessions):
     """
     Process a new chat message.
-    Evaluate the child's description. If the evaluation indicates advancement,
-    update the difficulty, generate a new image (resetting image and chat), and update the difficulty label.
     """
     if not active_session.get("image"):
         bot_message = "Please generate an image first."
-        updated_chat = active_session.get("chat", []) + [(user_message, bot_message)]
         active_session["chat"] = updated_chat
-        return "", updated_chat, saved_sessions, active_session
-    chat_history = active_session.get("chat", [])
-    identified_details = active_session.get("identified_details", [])
-    raw_evaluation = compare_details_chat_fn(user_message, "", chat_history, identified_details)
-    current_difficulty = active_session.get("difficulty", "Very Simple")
-    evaluation_response, updated_difficulty = evaluate_scores(raw_evaluation, current_difficulty)
-    bot_message = evaluation_response
-    # If the child advanced, update difficulty and generate a new image
-    if updated_difficulty != current_difficulty:
-        # Update the active session's difficulty before generating a new prompt
-        active_session["difficulty"] = updated_difficulty
         age = active_session.get("age", "3")
         autism_level = active_session.get("autism_level", "Level 1")
         topic_focus = active_session.get("topic_focus", "")
         treatment_plan = active_session.get("treatment_plan", "")
-        new_image, new_active_session, new_sessions = generate_image_and_reset_chat(age, autism_level, topic_focus, treatment_plan, active_session, saved_sessions)
-        new_active_session["chat"].append(("System", f"You advanced to {updated_difficulty} difficulty! A new image has been generated for you."))
-        active_session = new_active_session
-        bot_message = f"You advanced to {updated_difficulty} difficulty! A new image has been generated for you."
-        saved_sessions = new_sessions
-    else:
-        updated_chat = active_session.get("chat", []) + [(user_message, bot_message)]
-        active_session["chat"] = updated_chat
-    return "", active_session["chat"], saved_sessions, active_session
 def update_sessions(saved_sessions, active_session):
     """
@@ -265,87 +450,240 @@ def update_sessions(saved_sessions, active_session):
 # Gradio Interface
 ##############################################
 with gr.Blocks() as demo:
-    # The active session now starts with difficulty "Very Simple"
     active_session = gr.State({
         "prompt": None,
         "image": None,
         "chat": [],
         "treatment_plan": "",
         "topic_focus": "",
         "identified_details": [],
         "difficulty": "Very Simple",
         "age": "3",
         "autism_level": "Level 1"
     })
     saved_sessions = gr.State([])
-    with gr.Column():
-        gr.Markdown("# Image Generation & Chat Inference")
-        # Display current difficulty label
-        difficulty_label = gr.Markdown("**Current Difficulty:** Very Simple")
-        # ----- Image Generation Section -----
         with gr.Column():
-            gr.Markdown("## Generate Image")
-            gr.Markdown("Enter your age, select your autism level, specify a topic focus, and provide the treatment plan to generate an image based on the current difficulty level.")
-            with gr.Row():
-                age_input = gr.Textbox(label="Age", placeholder="Enter age...", value="3")
-                autism_level_dropdown = gr.Dropdown(label="Autism Level", choices=["Level 1", "Level 2", "Level 3"], value="Level 1")
-            topic_focus_input = gr.Textbox(
-                label="Topic Focus",
-                placeholder="Enter a specific topic or detail to focus on (e.g., 'animals', 'emotions', 'daily routines')...",
-                lines=1
-            )
-            treatment_plan_input = gr.Textbox(
-                label="Treatment Plan",
-                placeholder="Enter the treatment plan to guide the image generation...",
-                lines=2
-            )
-            generate_btn = gr.Button("Generate Image")
-            img_output = gr.Image(label="Generated Image")
-            generate_btn.click(
-                generate_image_and_reset_chat,
-                inputs=[age_input, autism_level_dropdown, topic_focus_input, treatment_plan_input, active_session, saved_sessions],
-                outputs=[img_output, active_session, saved_sessions]
-            )
-        # ----- Chat Section -----
-        with gr.Column():
-            gr.Markdown("## Chat about the Image")
             gr.Markdown(
-                "After generating an image, type details or descriptions about it. "
-                "Your message, along with the generated image and conversation history, will be sent for evaluation."
-            )
-            chatbot = gr.Chatbot(label="Chat History")
-            with gr.Row():
-                chat_input = gr.Textbox(label="Your Message", placeholder="Type your description here...", show_label=False)
-                send_btn = gr.Button("Send")
-            send_btn.click(
-                chat_respond,
-                inputs=[chat_input, active_session, saved_sessions],
-                outputs=[chat_input, chatbot, saved_sessions, active_session]
-            )
-            chat_input.submit(
-                chat_respond,
-                inputs=[chat_input, active_session, saved_sessions],
-                outputs=[chat_input, chatbot, saved_sessions, active_session]
             )
-    # ----- Sidebar Section for Session Details -----
-    with gr.Column(variant="sidebar"):
-        gr.Markdown("## Saved Chat Sessions")
-        gr.Markdown(
-            "This sidebar automatically saves finished chat sessions. "
-            "Each session includes the prompt used, the generated image (as a data URL), "
-            "the topic focus, the treatment plan, the list of identified details, and the full chat history."
         )
-        sessions_output = gr.JSON(label="Session Details", value={})
-        active_session.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
-        # Update the current difficulty label when active_session changes.
-        active_session.change(update_difficulty_label, inputs=[active_session], outputs=[difficulty_label])
-        saved_sessions.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
-# Launch the app with public sharing enabled.
 demo.launch()

 from huggingface_hub import InferenceClient
 from google.generativeai import configure, GenerativeModel
 from google.ai.generativelanguage import Content, Part
+from dotenv import load_dotenv
+load_dotenv()
 # Load API keys from environment variables
 inference_api_key = os.environ.get("HF_TOKEN")
+google_api_key = os.environ.get("GOOGLE_API_KEY")
 # Configure Google API
 configure(api_key=google_api_key)
+# Global variables to store the image data URL, prompt, and detailed description
 global_image_data_url = None
+global_image_prompt = None
+global_image_description = None  # New variable to store Gemini's detailed description
 def update_difficulty_label(active_session):
     return f"**Current Difficulty:** {active_session.get('difficulty', 'Very Simple')}"
           - Autism Level: {autism_level}
           - Topic Focus: {topic_focus}
           - Treatment Plan: {treatment_plan}
         Emphasize that the image should be clear, calming, and support understanding and communication. The style should match the difficulty level: for example, "Very Simple" produces very basic visuals while "Very Detailed" produces rich visuals.
         The image should specifically focus on the topic: "{topic_focus}".
         Please generate a prompt that instructs the image generation engine to produce an image with:
         1. Clarity and simplicity (minimalist backgrounds, clear subject)
         2. Literal representation with defined borders and consistent style
         3. Soft, muted colors and reduced visual complexity
         4. Positive, calm scenes
         5. Clear focus on the specified topic
         Use descriptive and detailed language.
         """
     )
     # Initialize the Gemini Pro model
     model = GenerativeModel('gemini-2.0-flash-lite')
     # Generate content using the Gemini model
     response = model.generate_content(query)
+    return response.text.strip()
+def generate_detailed_description(image_data_url, prompt, difficulty, topic_focus):
+    """
+    Generate a detailed description of the image using Gemini Vision.
+    """
+    # Remove the data:image/png;base64, prefix to get just the base64 string
+    base64_img = image_data_url.split(",")[1]
+    query = (
+        f"""
+        You are an expert educator specializing in teaching children with autism.
+        Please provide a detailed description of this image that was generated based on the prompt:
+        "{prompt}"
+        The image is intended for a child with autism, focusing on the topic: "{topic_focus}" at a {difficulty} difficulty level.
+        In your description:
+        1. List all key objects, characters, and elements present in the image
+        2. Describe colors, shapes, positions, and relationships between elements
+        3. Note any emotions, actions, or interactions depicted
+        4. Highlight details that would be important for the child to notice
+        5. Organize your description in a structured, clear way
+        Your description will be used as a reference to evaluate the child's observations,
+        so please be comprehensive but focus on observable details rather than interpretations.
+        """
+    )
+    # Create a Gemini Vision Pro model
+    vision_model = GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
+    # Create the content with image and text
+    image_part = Part(inline_data={"mime_type": "image/png", "data": base64.b64decode(base64_img)})
+    text_part = Part(text=query)
+    multimodal_content = Content(parts=[image_part, text_part])
+    # Generate description using the vision model
+    response = vision_model.generate_content(multimodal_content)
     return response.text.strip()
 def generate_image_fn(selected_prompt, guidance_scale=7.5,
     global_image_data_url = f"data:image/png;base64,{img_b64}"
     return image
+def extract_key_details(description):
+    """
+    Extract key details from Gemini's description to use for tracking.
+    Returns a list of key elements/details from the description.
+    """
+    # Create a query to extract key details
+    query = (
+        f"""
+        From the following detailed image description, extract a list of 10-15 key details that a child might identify.
+        Each detail should be a simple, clear phrase describing one observable element.
+        Description:
+        {description}
+        Format your response as a JSON array of strings, each representing one key detail.
+        Example format: ["red ball on the grass", "smiling girl with brown hair", "blue sky with clouds"]
+        """
+    )
+    # Use Gemini text model to extract key details
+    model = GenerativeModel('gemini-2.0-flash-lite')
+    response = model.generate_content(query)
+    try:
+        # Parse the JSON response
+        details_match = re.search(r'\[.*\]', response.text, re.DOTALL)
+        if details_match:
+            details_json = details_match.group(0)
+            key_details = json.loads(details_json)
+            return key_details
+        else:
+            # If no JSON found, do basic extraction
+            lines = description.split('\n')
+            details = []
+            for line in lines:
+                if line.strip().startswith('-') or line.strip().startswith('*'):
+                    details.append(line.strip()[1:].strip())
+            return details[:15] if details else ["object in image", "color", "shape", "background"]
+    except Exception as e:
+        print(f"Error extracting key details: {str(e)}")
+        return ["object in image", "color", "shape", "background"]
 def generate_image_and_reset_chat(age, autism_level, topic_focus, treatment_plan, active_session, saved_sessions):
     """
     Generate a new image (with the current difficulty) and reset the chat.
     Now includes the topic_focus parameter to specify what the image should focus on.
     """
+    global global_image_description
     new_sessions = saved_sessions.copy()
     if active_session.get("prompt"):
         new_sessions.append(active_session)
+    # Use the current difficulty from the active session
     current_difficulty = active_session.get("difficulty", "Very Simple")
+    # Generate the prompt for the image
     generated_prompt = generate_prompt_from_options(current_difficulty, age, autism_level, topic_focus, treatment_plan)
+    # Generate the image
     image = generate_image_fn(generated_prompt)
+    # Generate a detailed description of the image using Gemini Vision
+    image_description = generate_detailed_description(global_image_data_url, generated_prompt, current_difficulty, topic_focus)
+    global_image_description = image_description
+    # Extract key details to be identified
+    key_details = extract_key_details(image_description)
+    # Create a new active session with all the necessary information
     new_active_session = {
         "prompt": generated_prompt,
         "image": global_image_data_url,
+        "image_description": image_description,
         "chat": [],
         "treatment_plan": treatment_plan,
         "topic_focus": topic_focus,
+        "key_details": key_details,  # Store the list of key details
         "identified_details": [],
+        "used_hints": [],
         "difficulty": current_difficulty,
         "autism_level": autism_level,
         "age": age
     }
+    # Create the checklist of items to identify
+    checklist_items = []
+    for i, detail in enumerate(key_details):
+        checklist_items.append({"detail": detail, "identified": False, "id": i})
+    # Return the updated state and checklist
+    return image, new_active_session, new_sessions, checklist_items
+def compare_details_chat_fn(user_details, active_session):
     """
+    Evaluate the child's description using Google's Gemini model.
+    Now uses the image description and tracks identified details and used hints.
     """
+    if not global_image_data_url or not global_image_description:
         return "Please generate an image first."
+    # Get the detailed image description
+    image_description = active_session.get("image_description", global_image_description)
+    # Get chat history
+    chat_history = active_session.get("chat", [])
     history_text = ""
     if chat_history:
         history_text = "\n\n### Previous Conversation:\n"
+        for idx, (speaker, msg) in enumerate(chat_history, 1):
+            history_text += f"Turn {idx}:\n{speaker}: {msg}\n"
+    # Get key details, identified details and used hints
+    key_details = active_session.get("key_details", [])
+    identified_details = active_session.get("identified_details", [])
+    used_hints = active_session.get("used_hints", [])
+    # Format for the API
+    key_details_text = "\n\n### Key Details to Identify:\n" + "\n".join(f"- {detail}" for detail in key_details)
     identified_details_text = ""
     if identified_details:
         identified_details_text = "\n\n### Previously Identified Details:\n" + "\n".join(f"- {detail}" for detail in identified_details)
+    used_hints_text = ""
+    if used_hints:
+        used_hints_text = "\n\n### Previously Given Hints:\n" + "\n".join(f"- {hint}" for hint in used_hints)
+    # Current difficulty level
+    current_difficulty = active_session.get("difficulty", "Very Simple")
     message_text = (
+        f"You are a kind and encouraging teacher helping a child with autism describe an image.\n\n"
+        f"### Image Prompt:\n{active_session.get('prompt', 'No prompt available')}\n\n"
+        f"### Detailed Image Description (Reference):\n{image_description}\n\n"
+        f"### Current Difficulty Level: {current_difficulty}\n"
+        f"{key_details_text}{history_text}{identified_details_text}{used_hints_text}\n\n"
+        f"### Child's Current Description:\n'{user_details}'\n\n"
+        "Evaluate the child's description compared to the key details list. Use simple, clear language. "
+        "Praise specific correct observations. If something important is missing, provide a gentle hint "
+        "that hasn't been given before.\n\n"
+        "Follow these guidelines:\n"
+        "1. DO NOT mention that you're evaluating or scoring the child.\n"
+        "2. Keep feedback warm, positive, and encouraging.\n"
+        "3. If giving a hint, make it specific but not too obvious.\n"
+        "4. Never repeat hints that have already been given.\n"
+        "5. Focus on details the child hasn't yet identified.\n"
+        "6. Acknowledge the child's progress.\n\n"
+        "Return your response as a JSON object with the following format:\n"
         "{\n"
+        "  \"feedback\": \"Your encouraging response to the child\",\n"
+        "  \"newly_identified_details\": [\"list\", \"of\", \"new details\", \"the child identified\"],\n"
+        "  \"hint\": \"A new hint about something not yet identified\",\n"
+        "  \"score\": <number from 0-100 based on how complete the description is>,\n"
+        "  \"advance_difficulty\": <boolean indicating if child should advance>\n"
         "}\n\n"
+        "Ensure the JSON is valid and contains all fields."
     )
+    # Create a Gemini model for evaluation
+    model = GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
+    # Generate evaluation using the model
+    response = model.generate_content(message_text)
     return response.text
+def parse_evaluation(evaluation_text, active_session):
     try:
         json_match = re.search(r'\{.*\}', evaluation_text, re.DOTALL)
         if json_match:
             evaluation = json.loads(json_str)
         else:
             raise ValueError("No JSON object found in the response.")
+        # Extract data from the evaluation
+        feedback = evaluation.get("feedback", "Great effort! Keep describing what you see.")
+        newly_identified_details = evaluation.get("newly_identified_details", [])
+        hint = evaluation.get("hint", "")
+        score = evaluation.get("score", 0)
+        advance_difficulty = evaluation.get("advance_difficulty", False)
+        # Update the session with newly identified details
+        identified_details = active_session.get("identified_details", [])
+        for detail in newly_identified_details:
+            if detail not in identified_details:
+                identified_details.append(detail)
+        active_session["identified_details"] = identified_details
+        # Add the hint to used hints if one was provided
+        if hint:
+            used_hints = active_session.get("used_hints", [])
+            if hint not in used_hints:
+                used_hints.append(hint)
+            active_session["used_hints"] = used_hints
+            # Add the hint to the feedback if it's not already included
+            if hint.strip() and hint.strip() not in feedback:
+                feedback += f"\n\n💡 Hint: {hint}"
+        # Get current difficulty and check if it should be advanced
+        current_difficulty = active_session.get("difficulty", "Very Simple")
+        should_advance = False
+        if advance_difficulty:
+            difficulties = ["Very Simple", "Simple", "Moderate", "Detailed", "Very Detailed"]
+            current_index = difficulties.index(current_difficulty) if current_difficulty in difficulties else 0
+            if current_index < len(difficulties) - 1:
+                current_difficulty = difficulties[current_index + 1]
+                should_advance = True
+        return feedback, current_difficulty, should_advance, newly_identified_details
     except Exception as e:
+        print(f"Error processing evaluation: {str(e)}")
+        return f"That's interesting! Can you tell me more about what you see?", active_session.get("difficulty", "Very Simple"), False, []
+def update_checklist(checklist, newly_identified, key_details):
+    """
+    Update the checklist based on newly identified details.
+    Returns an updated checklist.
+    """
+    new_checklist = []
+    for item in checklist:
+        detail = item["detail"]
+        # Check if this detail has been identified
+        is_identified = item["identified"]
+        # If newly identified, update status
+        for identified in newly_identified:
+            # Check if the identified detail matches or is similar to the key detail
+            if (identified.lower() in detail.lower() or detail.lower() in identified.lower() or
+                any(word for word in identified.lower().split() if word in detail.lower() and len(word) > 3)):
+                is_identified = True
+                break
+        new_checklist.append({"detail": detail, "identified": is_identified, "id": item["id"]})
+    return new_checklist
+def chat_respond(user_message, active_session, saved_sessions, checklist):
     """
     Process a new chat message.
+    Evaluate the child's description, update identified details, and advance difficulty if needed.
     """
     if not active_session.get("image"):
         bot_message = "Please generate an image first."
+        updated_chat = active_session.get("chat", []) + [("Child", user_message), ("Teacher", bot_message)]
         active_session["chat"] = updated_chat
+        return "", updated_chat, saved_sessions, active_session, checklist, None  # Return None for image
+    # Get the evaluation from Gemini
+    raw_evaluation = compare_details_chat_fn(user_message, active_session)
+    # Parse the evaluation and update session
+    feedback, updated_difficulty, should_advance, newly_identified = parse_evaluation(raw_evaluation, active_session)
+    # Update the checklist with newly identified details
+    updated_checklist = update_checklist(checklist, newly_identified, active_session.get("key_details", []))
+    # Add the current exchange to the chat history
+    updated_chat = active_session.get("chat", []) + [("Child", user_message), ("Teacher", feedback)]
+    active_session["chat"] = updated_chat
+    # Check if all items have been identified
+    all_identified = all(item["identified"] for item in updated_checklist)
+    # Modify this line to generate new image when all details are identified
+    should_generate_new_image = should_advance or all_identified
+    # If the child should advance to a new difficulty or has identified all details
+    if should_generate_new_image:
+        # Save the current session
+        new_sessions = saved_sessions.copy()
+        new_sessions.append(active_session.copy())
+        # Get parameters for generating new image
         age = active_session.get("age", "3")
         autism_level = active_session.get("autism_level", "Level 1")
         topic_focus = active_session.get("topic_focus", "")
         treatment_plan = active_session.get("treatment_plan", "")
+        # Use current difficulty if not advancing, otherwise use updated difficulty
+        difficulty_to_use = updated_difficulty if updated_difficulty != active_session.get("difficulty", "Very Simple") else active_session.get("difficulty", "Very Simple")
+        # Generate a new prompt with the difficulty
+        generated_prompt = generate_prompt_from_options(difficulty_to_use, age, autism_level, topic_focus, treatment_plan)
+        # Generate the new image - returns a PIL Image
+        new_image = generate_image_fn(generated_prompt)
+        # Now the global_image_data_url should be updated
+        # Generate a detailed description of the image using Gemini Vision
+        image_description = generate_detailed_description(global_image_data_url, generated_prompt, difficulty_to_use, topic_focus)
+        # Extract key details to be identified
+        key_details = extract_key_details(image_description)
+        # Create fresh active session with the new image
+        new_active_session = {
+            "prompt": generated_prompt,
+            "image": global_image_data_url,
+            "image_description": image_description,
+            "chat": [],
+            "treatment_plan": treatment_plan,
+            "topic_focus": topic_focus,
+            "key_details": key_details,
+            "identified_details": [],
+            "used_hints": [],
+            "difficulty": difficulty_to_use,
+            "autism_level": autism_level,
+            "age": age
+        }
+        # Create new checklist for the new image
+        new_checklist = []
+        for i, detail in enumerate(key_details):
+            new_checklist.append({"detail": detail, "identified": False, "id": i})
+        # Initialize the new chat with an appropriate message
+        if updated_difficulty != active_session.get("difficulty", "Very Simple"):
+            advancement_message = f"Congratulations! You've advanced to {updated_difficulty} difficulty! Here's a new image to describe."
+        else:
+            advancement_message = "Great job identifying all the details! Here's a new image at the same difficulty level."
+        new_active_session["chat"] = [("System", advancement_message)]
+        return "", new_active_session["chat"], new_sessions, new_active_session, new_checklist, new_image
+    # If not advancing, return None for the image to indicate no change
+    return "", updated_chat, saved_sessions, active_session, updated_checklist, None
 def update_sessions(saved_sessions, active_session):
     """
 # Gradio Interface
 ##############################################
 with gr.Blocks() as demo:
+    # Initialize the active session with default values
     active_session = gr.State({
         "prompt": None,
         "image": None,
+        "image_description": None,
         "chat": [],
         "treatment_plan": "",
         "topic_focus": "",
+        "key_details": [],
         "identified_details": [],
+        "used_hints": [],
         "difficulty": "Very Simple",
         "age": "3",
         "autism_level": "Level 1"
     })
     saved_sessions = gr.State([])
+    checklist_state = gr.State([])
+    with gr.Row():
+        # Main content area
+        with gr.Column(scale=2):
+            gr.Markdown("# Autism Education Image Description Tool")
+            # Display current difficulty label
+            difficulty_label = gr.Markdown("**Current Difficulty:** Very Simple")
+            # ----- Image Generation Section -----
+            with gr.Column():
+                gr.Markdown("## Generate Image")
+                gr.Markdown("Enter the child's details to generate an appropriate educational image.")
+                with gr.Row():
+                    age_input = gr.Textbox(label="Child's Age", placeholder="Enter age...", value="3")
+                    autism_level_dropdown = gr.Dropdown(label="Autism Level", choices=["Level 1", "Level 2", "Level 3"], value="Level 1")
+                topic_focus_input = gr.Textbox(
+                    label="Topic Focus",
+                    placeholder="Enter a specific topic or detail to focus on (e.g., 'animals', 'emotions', 'daily routines')...",
+                    lines=1
+                )
+                treatment_plan_input = gr.Textbox(
+                    label="Treatment Plan",
+                    placeholder="Enter the treatment plan to guide the image generation...",
+                    lines=2
+                )
+                generate_btn = gr.Button("Generate Image")
+                img_output = gr.Image(label="Generated Image")
+            # ----- Chat Section -----
+            with gr.Column():
+                gr.Markdown("## Image Description Practice")
+                gr.Markdown(
+                    "After generating an image, ask the child to describe what they see. "
+                    "Type their description in the box below. The system will provide supportive feedback "
+                    "and track their progress in identifying details."
+                )
+                chatbot = gr.Chatbot(label="Conversation History")
+                with gr.Row():
+                    chat_input = gr.Textbox(label="Child's Description", placeholder="Type what the child says about the image...", show_label=True)
+                    send_btn = gr.Button("Submit")
+        # Sidebar - Checklist of items to identify
+        with gr.Column(scale=1):
+            gr.Markdown("## Details to Identify")
+            gr.Markdown("The child should try to identify these elements in the image:")
+            # Create a custom HTML component to display the checklist with checkboxes
+            checklist_html = gr.HTML("""
+                <div id="checklist-container">
+                    <p>Generate an image to see details to identify.</p>
+                </div>
+            """)
+            # Add a function to update the checklist HTML
+            def update_checklist_html(checklist):
+                if not checklist:
+                    return """
+                        <div id="checklist-container">
+                            <p>Generate an image to see details to identify.</p>
+                        </div>
+                    """
+                html_content = """
+                    <div id="checklist-container" style="padding: 10px;">
+                        <style>
+                            .checklist-item {
+                                display: flex;
+                                align-items: center;
+                                margin-bottom: 10px;
+                                padding: 8px;
+                                border-radius: 5px;
+                                transition: background-color 0.3s;
+                            }
+                            .identified {
+                                background-color: #e6f7e6;
+                                text-decoration: line-through;
+                                color: #4CAF50;
+                            }
+                            .not-identified {
+                                background-color: #f5f5f5;
+                            }
+                            .checkmark {
+                                margin-right: 10px;
+                                font-size: 1.2em;
+                            }
+                        </style>
+                """
+                for item in checklist:
+                    detail = item["detail"]
+                    identified = item["identified"]
+                    css_class = "identified" if identified else "not-identified"
+                    checkmark = "✅" if identified else "⬜"
+                    html_content += f"""
+                        <div class="checklist-item {css_class}">
+                            <span class="checkmark">{checkmark}</span>
+                            <span>{detail}</span>
+                        </div>
+                    """
+                html_content += """
+                    </div>
+                """
+                return html_content
+            # Progress summary
+            progress_html = gr.HTML("""
+                <div id="progress-container">
+                    <p>No active session.</p>
+                </div>
+            """)
+            def update_progress_html(checklist):
+                if not checklist:
+                    return """
+                        <div id="progress-container">
+                            <p>No active session.</p>
+                        </div>
+                    """
+                total_items = len(checklist)
+                identified_items = sum(1 for item in checklist if item["identified"])
+                percentage = (identified_items / total_items) * 100 if total_items > 0 else 0
+                progress_bar_width = f"{percentage}%"
+                all_identified = identified_items == total_items
+                html_content = f"""
+                    <div id="progress-container" style="padding: 10px;">
+                        <h3>Progress: {identified_items} / {total_items} details</h3>
+                        <div style="width: 100%; background-color: #f1f1f1; border-radius: 5px; margin-bottom: 10px;">
+                            <div style="width: {progress_bar_width}; height: 24px; background-color: #4CAF50; border-radius: 5px;"></div>
+                        </div>
+                        <p style="font-size: 16px; font-weight: bold; text-align: center;">
+                """
+                if all_identified:
+                    html_content += "🎉 Amazing! All details identified! 🎉"
+                elif percentage >= 75:
+                    html_content += "Almost there! Keep going!"
+                elif percentage >= 50:
+                    html_content += "Halfway there! You're doing great!"
+                elif percentage >= 25:
+                    html_content += "Good start! Keep looking!"
+                else:
+                    html_content += "Let's find more details!"
+                html_content += """
+                        </p>
+                    </div>
+                """
+                return html_content
+    # ----- Session Details Section -----
+    with gr.Row():
         with gr.Column():
+            gr.Markdown("## Progress Tracking")
             gr.Markdown(
+                "This section tracks the child's progress across sessions. "
+                "Each session includes the difficulty level, identified details, "
+                "and the full conversation history."
             )
+            sessions_output = gr.JSON(label="Session Details", value={})
+    # Process chat and update image as needed
+    def process_chat_and_image(user_msg, active_session, saved_sessions, checklist):
+        chat_input, chatbot, new_sessions, new_active_session, new_checklist, new_image = chat_respond(
+            user_msg, active_session, saved_sessions, checklist
         )
+        # Only return a new image if one was generated (advancement case)
+        if new_image is not None:
+            return chat_input, chatbot, new_sessions, new_active_session, new_checklist, new_image
+        else:
+            # Return a no-update flag for the image to keep the current one
+            return chat_input, chatbot, new_sessions, new_active_session, new_checklist, gr.update()
+    # Connect event handlers
+    generate_btn.click(
+        generate_image_and_reset_chat,
+        inputs=[age_input, autism_level_dropdown, topic_focus_input, treatment_plan_input, active_session, saved_sessions],
+        outputs=[img_output, active_session, saved_sessions, checklist_state]
+    )
+    send_btn.click(
+        process_chat_and_image,
+        inputs=[chat_input, active_session, saved_sessions, checklist_state],
+        outputs=[chat_input, chatbot, saved_sessions, active_session, checklist_state, img_output]
+    )
+    chat_input.submit(
+        process_chat_and_image,
+        inputs=[chat_input, active_session, saved_sessions, checklist_state],
+        outputs=[chat_input, chatbot, saved_sessions, active_session, checklist_state, img_output]
+    )
+    # Update the checklist HTML when checklist state changes
+    checklist_state.change(
+        update_checklist_html,
+        inputs=[checklist_state],
+        outputs=[checklist_html]
+    )
+    # Update the progress HTML when checklist state changes
+    checklist_state.change(
+        update_progress_html,
+        inputs=[checklist_state],
+        outputs=[progress_html]
+    )
+    # Update the current difficulty label when active_session changes
+    active_session.change(update_difficulty_label, inputs=[active_session], outputs=[difficulty_label])
+    # Update sessions when active_session or saved_sessions change
+    active_session.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
+    saved_sessions.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
+# Launch the app
 demo.launch()