Spaces:

Daemontatox
/

Imagechat

Running

App Files Files Community

Daemontatox commited on 2 days ago

Commit

7e52f0b

verified ·

1 Parent(s): 0039777

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -175

app.py CHANGED Viewed

@@ -49,9 +49,7 @@ def generate_prompt_from_options(difficulty, age, autism_level, topic_focus, tre
         Use descriptive and detailed language.
         """
     )
-    # Initialize the Gemini Pro model
-    model = GenerativeModel('gemini-2.0-flash-lite')
-    # Generate content using the Gemini model
     response = model.generate_content(query)
     return response.text.strip()
@@ -59,7 +57,6 @@ def generate_detailed_description(image_data_url, prompt, difficulty, topic_focu
     """
     Generate a detailed description of the image using Gemini Vision.
     """
-    # Remove the data:image/png;base64, prefix to get just the base64 string
     base64_img = image_data_url.split(",")[1]
     query = (
         f"""
@@ -77,69 +74,54 @@ def generate_detailed_description(image_data_url, prompt, difficulty, topic_focu
         so please be comprehensive but focus on observable details rather than interpretations.
         """
     )
-    # Create a Gemini Vision Pro model
     vision_model = GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
-    # Create the content with image and text
     image_part = Part(inline_data={"mime_type": "image/png", "data": base64.b64decode(base64_img)})
     text_part = Part(text=query)
     multimodal_content = Content(parts=[image_part, text_part])
-    # Generate description using the vision model
     response = vision_model.generate_content(multimodal_content)
     return response.text.strip()
-def generate_image_fn(selected_prompt, guidance_scale=7.5,
-                      negative_prompt="ugly, blurry, poorly drawn hands, lewd, nude, deformed, missing limbs, missing eyes, missing arms, missing legs",
-                      num_inference_steps=50):
-    """
-    Generate an image from the prompt via the Hugging Face Inference API.
-    Convert the image to a data URL.
-    """
-    global global_image_data_url, global_image_prompt
-    global_image_prompt = selected_prompt
-    image_client = InferenceClient(provider="hf-inference", api_key=inference_api_key)
-    image = image_client.text_to_image(
-        selected_prompt,
-        model="stabilityai/stable-diffusion-3.5-large-turbo",
-        guidance_scale=guidance_scale,
-        negative_prompt=negative_prompt,
-        num_inference_steps=num_inference_steps
-    )
-    buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
-    img_bytes = buffered.getvalue()
-    img_b64 = base64.b64encode(img_bytes).decode("utf-8")
-    global_image_data_url = f"data:image/png;base64,{img_b64}"
-    return image
-def extract_key_details(description):
     """
-    Extract key details from Gemini's description to use for tracking.
-    Returns a list of key elements/details from the description.
     """
-    # Create a query to extract key details
     query = (
         f"""
-        From the following detailed image description, extract a list of 10-15 key details that a child might identify.
         Each detail should be a simple, clear phrase describing one observable element.
-        Description:
-        {description}
         Format your response as a JSON array of strings, each representing one key detail.
         Example format: ["red ball on the grass", "smiling girl with brown hair", "blue sky with clouds"]
         """
     )
-    # Use Gemini text model to extract key details
-    model = GenerativeModel('gemini-2.0-flash-lite')
-    response = model.generate_content(query)
     try:
-        # Parse the JSON response
         details_match = re.search(r'\[.*\]', response.text, re.DOTALL)
         if details_match:
             details_json = details_match.group(0)
             key_details = json.loads(details_json)
             return key_details
         else:
-            # If no JSON found, do basic extraction
-            lines = description.split('\n')
             details = []
             for line in lines:
                 if line.strip().startswith('-') or line.strip().startswith('*'):
@@ -149,33 +131,47 @@ def extract_key_details(description):
         print(f"Error extracting key details: {str(e)}")
         return ["object in image", "color", "shape", "background"]
-def generate_image_and_reset_chat(age, autism_level, topic_focus, treatment_plan, active_session, saved_sessions):
     """
     Generate a new image (with the current difficulty) and reset the chat.
-    Now includes the topic_focus parameter to specify what the image should focus on.
     """
     global global_image_description
     new_sessions = saved_sessions.copy()
     if active_session.get("prompt"):
         new_sessions.append(active_session)
-    # Use the current difficulty from the active session
     current_difficulty = active_session.get("difficulty", "Very Simple")
-    # Generate the prompt for the image
     generated_prompt = generate_prompt_from_options(current_difficulty, age, autism_level, topic_focus, treatment_plan)
-    # Generate the image
     image = generate_image_fn(generated_prompt)
-    # Generate a detailed description of the image using Gemini Vision
     image_description = generate_detailed_description(global_image_data_url, generated_prompt, current_difficulty, topic_focus)
     global_image_description = image_description
-    # Extract key details to be identified
-    key_details = extract_key_details(image_description)
-    # Create a new active session with all the necessary information
     new_active_session = {
         "prompt": generated_prompt,
         "image": global_image_data_url,
@@ -183,34 +179,30 @@ def generate_image_and_reset_chat(age, autism_level, topic_focus, treatment_plan
         "chat": [],
         "treatment_plan": treatment_plan,
         "topic_focus": topic_focus,
-        "key_details": key_details,  # Store the list of key details
         "identified_details": [],
         "used_hints": [],
         "difficulty": current_difficulty,
         "autism_level": autism_level,
-        "age": age
     }
-    # Create the checklist of items to identify
     checklist_items = []
     for i, detail in enumerate(key_details):
         checklist_items.append({"detail": detail, "identified": False, "id": i})
-    # Return the updated state and checklist
     return image, new_active_session, new_sessions, checklist_items
 def compare_details_chat_fn(user_details, active_session):
     """
     Evaluate the child's description using Google's Gemini model.
-    Now uses the image description and tracks identified details and used hints.
     """
     if not global_image_data_url or not global_image_description:
         return "Please generate an image first."
-    # Get the detailed image description
     image_description = active_session.get("image_description", global_image_description)
-    # Get chat history
     chat_history = active_session.get("chat", [])
     history_text = ""
     if chat_history:
@@ -218,12 +210,10 @@ def compare_details_chat_fn(user_details, active_session):
         for idx, (speaker, msg) in enumerate(chat_history, 1):
             history_text += f"Turn {idx}:\n{speaker}: {msg}\n"
-    # Get key details, identified details and used hints
     key_details = active_session.get("key_details", [])
     identified_details = active_session.get("identified_details", [])
     used_hints = active_session.get("used_hints", [])
-    # Format for the API
     key_details_text = "\n\n### Key Details to Identify:\n" + "\n".join(f"- {detail}" for detail in key_details)
     identified_details_text = ""
     if identified_details:
@@ -232,9 +222,7 @@ def compare_details_chat_fn(user_details, active_session):
     if used_hints:
         used_hints_text = "\n\n### Previously Given Hints:\n" + "\n".join(f"- {hint}" for hint in used_hints)
-    # Current difficulty level
     current_difficulty = active_session.get("difficulty", "Very Simple")
     message_text = (
         f"You are a kind and encouraging teacher helping a child with autism describe an image.\n\n"
         f"### Image Prompt:\n{active_session.get('prompt', 'No prompt available')}\n\n"
@@ -263,14 +251,15 @@ def compare_details_chat_fn(user_details, active_session):
         "Ensure the JSON is valid and contains all fields."
     )
-    # Create a Gemini model for evaluation
-    model = GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
-    # Generate evaluation using the model
     response = model.generate_content(message_text)
     return response.text
 def parse_evaluation(evaluation_text, active_session):
     try:
         json_match = re.search(r'\{.*\}', evaluation_text, re.DOTALL)
         if json_match:
@@ -279,35 +268,28 @@ def parse_evaluation(evaluation_text, active_session):
         else:
             raise ValueError("No JSON object found in the response.")
-        # Extract data from the evaluation
         feedback = evaluation.get("feedback", "Great effort! Keep describing what you see.")
         newly_identified_details = evaluation.get("newly_identified_details", [])
         hint = evaluation.get("hint", "")
         score = evaluation.get("score", 0)
         advance_difficulty = evaluation.get("advance_difficulty", False)
-        # Update the session with newly identified details
         identified_details = active_session.get("identified_details", [])
         for detail in newly_identified_details:
             if detail not in identified_details:
                 identified_details.append(detail)
         active_session["identified_details"] = identified_details
-        # Add the hint to used hints if one was provided
         if hint:
             used_hints = active_session.get("used_hints", [])
             if hint not in used_hints:
                 used_hints.append(hint)
             active_session["used_hints"] = used_hints
-            # Add the hint to the feedback if it's not already included
             if hint.strip() and hint.strip() not in feedback:
                 feedback += f"\n\n💡 Hint: {hint}"
-        # Get current difficulty and check if it should be advanced
         current_difficulty = active_session.get("difficulty", "Very Simple")
         should_advance = False
         if advance_difficulty:
             difficulties = ["Very Simple", "Simple", "Moderate", "Detailed", "Very Detailed"]
             current_index = difficulties.index(current_difficulty) if current_difficulty in difficulties else 0
@@ -315,95 +297,71 @@ def parse_evaluation(evaluation_text, active_session):
                 current_difficulty = difficulties[current_index + 1]
                 should_advance = True
-        return feedback, current_difficulty, should_advance, newly_identified_details
     except Exception as e:
         print(f"Error processing evaluation: {str(e)}")
-        return f"That's interesting! Can you tell me more about what you see?", active_session.get("difficulty", "Very Simple"), False, []
 def update_checklist(checklist, newly_identified, key_details):
     """
     Update the checklist based on newly identified details.
-    Returns an updated checklist.
     """
     new_checklist = []
     for item in checklist:
         detail = item["detail"]
-        # Check if this detail has been identified
         is_identified = item["identified"]
-        # If newly identified, update status
         for identified in newly_identified:
-            # Check if the identified detail matches or is similar to the key detail
             if (identified.lower() in detail.lower() or detail.lower() in identified.lower() or
                 any(word for word in identified.lower().split() if word in detail.lower() and len(word) > 3)):
                 is_identified = True
                 break
         new_checklist.append({"detail": detail, "identified": is_identified, "id": item["id"]})
     return new_checklist
 def chat_respond(user_message, active_session, saved_sessions, checklist):
     """
     Process a new chat message.
     Evaluate the child's description, update identified details, and advance difficulty if needed.
     """
     if not active_session.get("image"):
         bot_message = "Please generate an image first."
         updated_chat = active_session.get("chat", []) + [("Child", user_message), ("Teacher", bot_message)]
         active_session["chat"] = updated_chat
-        return "", updated_chat, saved_sessions, active_session, checklist, None  # Return None for image
-    # Get the evaluation from Gemini
     raw_evaluation = compare_details_chat_fn(user_message, active_session)
-    # Parse the evaluation and update session
-    feedback, updated_difficulty, should_advance, newly_identified = parse_evaluation(raw_evaluation, active_session)
-    # Update the checklist with newly identified details
     updated_checklist = update_checklist(checklist, newly_identified, active_session.get("key_details", []))
-    # Add the current exchange to the chat history
     updated_chat = active_session.get("chat", []) + [("Child", user_message), ("Teacher", feedback)]
     active_session["chat"] = updated_chat
-    # Check if all items have been identified
     all_identified = all(item["identified"] for item in updated_checklist)
-    # Modify this line to generate new image when all details are identified
-    should_generate_new_image = should_advance or all_identified
-    # If the child should advance to a new difficulty or has identified all details
     if should_generate_new_image:
-        # Save the current session
         new_sessions = saved_sessions.copy()
         new_sessions.append(active_session.copy())
-        # Get parameters for generating new image
         age = active_session.get("age", "3")
         autism_level = active_session.get("autism_level", "Level 1")
         topic_focus = active_session.get("topic_focus", "")
         treatment_plan = active_session.get("treatment_plan", "")
-        # Use current difficulty if not advancing, otherwise use updated difficulty
         difficulty_to_use = updated_difficulty if updated_difficulty != active_session.get("difficulty", "Very Simple") else active_session.get("difficulty", "Very Simple")
-        # Generate a new prompt with the difficulty
         generated_prompt = generate_prompt_from_options(difficulty_to_use, age, autism_level, topic_focus, treatment_plan)
-        # Generate the new image - returns a PIL Image
         new_image = generate_image_fn(generated_prompt)
-        # Now the global_image_data_url should be updated
-        # Generate a detailed description of the image using Gemini Vision
         image_description = generate_detailed_description(global_image_data_url, generated_prompt, difficulty_to_use, topic_focus)
-        # Extract key details to be identified
-        key_details = extract_key_details(image_description)
-        # Create fresh active session with the new image
         new_active_session = {
             "prompt": generated_prompt,
             "image": global_image_data_url,
@@ -416,25 +374,25 @@ def chat_respond(user_message, active_session, saved_sessions, checklist):
             "used_hints": [],
             "difficulty": difficulty_to_use,
             "autism_level": autism_level,
-            "age": age
         }
-        # Create new checklist for the new image
         new_checklist = []
         for i, detail in enumerate(key_details):
             new_checklist.append({"detail": detail, "identified": False, "id": i})
-        # Initialize the new chat with an appropriate message
-        if updated_difficulty != active_session.get("difficulty", "Very Simple"):
             advancement_message = f"Congratulations! You've advanced to {updated_difficulty} difficulty! Here's a new image to describe."
         else:
             advancement_message = "Great job identifying all the details! Here's a new image at the same difficulty level."
         new_active_session["chat"] = [("System", advancement_message)]
         return "", new_active_session["chat"], new_sessions, new_active_session, new_checklist, new_image
-    # If not advancing, return None for the image to indicate no change
     return "", updated_chat, saved_sessions, active_session, updated_checklist, None
 def update_sessions(saved_sessions, active_session):
@@ -449,7 +407,6 @@ def update_sessions(saved_sessions, active_session):
 # Gradio Interface
 ##############################################
 with gr.Blocks() as demo:
-    # Initialize the active session with default values
     active_session = gr.State({
         "prompt": None,
         "image": None,
@@ -462,19 +419,17 @@ with gr.Blocks() as demo:
         "used_hints": [],
         "difficulty": "Very Simple",
         "age": "3",
-        "autism_level": "Level 1"
     })
     saved_sessions = gr.State([])
     checklist_state = gr.State([])
     with gr.Row():
-        # Main content area
         with gr.Column(scale=2):
             gr.Markdown("# Autism Education Image Description Tool")
-            # Display current difficulty label
             difficulty_label = gr.Markdown("**Current Difficulty:** Very Simple")
-            # ----- Image Generation Section -----
             with gr.Column():
                 gr.Markdown("## Generate Image")
                 gr.Markdown("Enter the child's details to generate an appropriate educational image.")
@@ -491,10 +446,9 @@ with gr.Blocks() as demo:
                     placeholder="Enter the treatment plan to guide the image generation...",
                     lines=2
                 )
                 generate_btn = gr.Button("Generate Image")
                 img_output = gr.Image(label="Generated Image")
-            # ----- Chat Section -----
             with gr.Column():
                 gr.Markdown("## Image Description Practice")
                 gr.Markdown(
@@ -506,20 +460,19 @@ with gr.Blocks() as demo:
                 with gr.Row():
                     chat_input = gr.Textbox(label="Child's Description", placeholder="Type what the child says about the image...", show_label=True)
                     send_btn = gr.Button("Submit")
-        # Sidebar - Checklist of items to identify
         with gr.Column(scale=1):
             gr.Markdown("## Details to Identify")
             gr.Markdown("The child should try to identify these elements in the image:")
-            # Create a custom HTML component to display the checklist with checkboxes
             checklist_html = gr.HTML("""
                 <div id="checklist-container">
                     <p>Generate an image to see details to identify.</p>
                 </div>
             """)
-            # Add a function to update the checklist HTML
             def update_checklist_html(checklist):
                 if not checklist:
                     return """
@@ -527,7 +480,6 @@ with gr.Blocks() as demo:
                             <p>Generate an image to see details to identify.</p>
                         </div>
                     """
                 html_content = """
                     <div id="checklist-container" style="padding: 10px;">
                         <style>
@@ -553,32 +505,26 @@ with gr.Blocks() as demo:
                             }
                         </style>
                 """
                 for item in checklist:
                     detail = item["detail"]
                     identified = item["identified"]
                     css_class = "identified" if identified else "not-identified"
                     checkmark = "✅" if identified else "⬜"
                     html_content += f"""
                         <div class="checklist-item {css_class}">
                             <span class="checkmark">{checkmark}</span>
                             <span>{detail}</span>
                         </div>
                     """
                 html_content += """
                     </div>
                 """
                 return html_content
-            # Progress summary
             progress_html = gr.HTML("""
                 <div id="progress-container">
                     <p>No active session.</p>
                 </div>
             """)
             def update_progress_html(checklist):
                 if not checklist:
                     return """
@@ -586,14 +532,11 @@ with gr.Blocks() as demo:
                             <p>No active session.</p>
                         </div>
                     """
                 total_items = len(checklist)
                 identified_items = sum(1 for item in checklist if item["identified"])
                 percentage = (identified_items / total_items) * 100 if total_items > 0 else 0
                 progress_bar_width = f"{percentage}%"
                 all_identified = identified_items == total_items
                 html_content = f"""
                     <div id="progress-container" style="padding: 10px;">
                         <h3>Progress: {identified_items} / {total_items} details</h3>
@@ -602,7 +545,6 @@ with gr.Blocks() as demo:
                         </div>
                         <p style="font-size: 16px; font-weight: bold; text-align: center;">
                 """
                 if all_identified:
                     html_content += "🎉 Amazing! All details identified! 🎉"
                 elif percentage >= 75:
@@ -613,14 +555,20 @@ with gr.Blocks() as demo:
                     html_content += "Good start! Keep looking!"
                 else:
                     html_content += "Let's find more details!"
                 html_content += """
                         </p>
                     </div>
                 """
                 return html_content
-    # ----- Session Details Section -----
     with gr.Row():
         with gr.Column():
             gr.Markdown("## Progress Tracking")
@@ -630,59 +578,41 @@ with gr.Blocks() as demo:
                 "and the full conversation history."
             )
             sessions_output = gr.JSON(label="Session Details", value={})
-    # Process chat and update image as needed
     def process_chat_and_image(user_msg, active_session, saved_sessions, checklist):
-        chat_input, chatbot, new_sessions, new_active_session, new_checklist, new_image = chat_respond(
             user_msg, active_session, saved_sessions, checklist
         )
-        # Only return a new image if one was generated (advancement case)
         if new_image is not None:
-            return chat_input, chatbot, new_sessions, new_active_session, new_checklist, new_image
         else:
-            # Return a no-update flag for the image to keep the current one
-            return chat_input, chatbot, new_sessions, new_active_session, new_checklist, gr.update()
-    # Connect event handlers
     generate_btn.click(
         generate_image_and_reset_chat,
-        inputs=[age_input, autism_level_dropdown, topic_focus_input, treatment_plan_input, active_session, saved_sessions],
         outputs=[img_output, active_session, saved_sessions, checklist_state]
     )
     send_btn.click(
         process_chat_and_image,
         inputs=[chat_input, active_session, saved_sessions, checklist_state],
         outputs=[chat_input, chatbot, saved_sessions, active_session, checklist_state, img_output]
     )
     chat_input.submit(
         process_chat_and_image,
         inputs=[chat_input, active_session, saved_sessions, checklist_state],
         outputs=[chat_input, chatbot, saved_sessions, active_session, checklist_state, img_output]
     )
-    # Update the checklist HTML when checklist state changes
     checklist_state.change(
         update_checklist_html,
         inputs=[checklist_state],
         outputs=[checklist_html]
     )
-    # Update the progress HTML when checklist state changes
     checklist_state.change(
         update_progress_html,
         inputs=[checklist_state],
         outputs=[progress_html]
     )
-    # Update the current difficulty label when active_session changes
     active_session.change(update_difficulty_label, inputs=[active_session], outputs=[difficulty_label])
-    # Update sessions when active_session or saved_sessions change
     active_session.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
     saved_sessions.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
-# Launch the app
 demo.launch()

         Use descriptive and detailed language.
         """
     )
+    model = GenerativeModel('gemini-2.0-flash')
     response = model.generate_content(query)
     return response.text.strip()
     """
     Generate a detailed description of the image using Gemini Vision.
     """
     base64_img = image_data_url.split(",")[1]
     query = (
         f"""
         so please be comprehensive but focus on observable details rather than interpretations.
         """
     )
     vision_model = GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
     image_part = Part(inline_data={"mime_type": "image/png", "data": base64.b64decode(base64_img)})
     text_part = Part(text=query)
     multimodal_content = Content(parts=[image_part, text_part])
     response = vision_model.generate_content(multimodal_content)
     return response.text.strip()
+def extract_key_details(image_data_url, prompt, topic_focus):
     """
+    Extract key details directly from the image using Gemini Vision.
+    Returns a list of key elements/details from the image.
     """
+    base64_img = image_data_url.split(",")[1]
     query = (
         f"""
+        You are analyzing an educational image created for a child with autism, based on the prompt: "{prompt}".
+        The image focuses on the topic: "{topic_focus}".
+        Please extract a list of 10-15 key details that a child might identify in this image.
         Each detail should be a simple, clear phrase describing one observable element.
+        Focus on concrete, visible elements rather than abstract concepts.
         Format your response as a JSON array of strings, each representing one key detail.
         Example format: ["red ball on the grass", "smiling girl with brown hair", "blue sky with clouds"]
+        Ensure each detail is:
+        1. Directly observable in the image
+        2. Unique (not a duplicate)
+        3. Described in simple, concrete language
+        4. Relevant to what a child would notice
         """
     )
+    vision_model = GenerativeModel('gemini-2.0-flash')
+    image_part = Part(inline_data={"mime_type": "image/png", "data": base64.b64decode(base64_img)})
+    text_part = Part(text=query)
+    multimodal_content = Content(parts=[image_part, text_part])
+    response = vision_model.generate_content(multimodal_content)
     try:
         details_match = re.search(r'\[.*\]', response.text, re.DOTALL)
         if details_match:
             details_json = details_match.group(0)
             key_details = json.loads(details_json)
             return key_details
         else:
+            # If no JSON array is found, try to extract bullet points or lines
+            lines = response.text.split('\n')
             details = []
             for line in lines:
                 if line.strip().startswith('-') or line.strip().startswith('*'):
         print(f"Error extracting key details: {str(e)}")
         return ["object in image", "color", "shape", "background"]
+def generate_image_fn(selected_prompt, guidance_scale=7.5,
+                      negative_prompt="ugly, blurry, poorly drawn hands, nude, deformed, missing limbs, missing body parts",
+                      num_inference_steps=45):
+    """
+    Generate an image from the prompt via the Hugging Face Inference API.
+    Convert the image to a data URL.
+    """
+    global global_image_data_url, global_image_prompt
+    global_image_prompt = selected_prompt
+    image_client = InferenceClient(provider="hf-inference", api_key=inference_api_key)
+    image = image_client.text_to_image(
+        selected_prompt,
+        model="stabilityai/stable-diffusion-3.5-large-turbo",
+        guidance_scale=guidance_scale,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps
+    )
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    img_bytes = buffered.getvalue()
+    img_b64 = base64.b64encode(img_bytes).decode("utf-8")
+    global_image_data_url = f"data:image/png;base64,{img_b64}"
+    return image
+def generate_image_and_reset_chat(age, autism_level, topic_focus, treatment_plan, attempt_limit_input, active_session, saved_sessions):
     """
     Generate a new image (with the current difficulty) and reset the chat.
+    Also resets the attempt count and uses the user-entered attempt limit.
     """
     global global_image_description
     new_sessions = saved_sessions.copy()
     if active_session.get("prompt"):
         new_sessions.append(active_session)
     current_difficulty = active_session.get("difficulty", "Very Simple")
     generated_prompt = generate_prompt_from_options(current_difficulty, age, autism_level, topic_focus, treatment_plan)
     image = generate_image_fn(generated_prompt)
     image_description = generate_detailed_description(global_image_data_url, generated_prompt, current_difficulty, topic_focus)
     global_image_description = image_description
+    key_details = extract_key_details(global_image_data_url, generated_prompt, topic_focus)
     new_active_session = {
         "prompt": generated_prompt,
         "image": global_image_data_url,
         "chat": [],
         "treatment_plan": treatment_plan,
         "topic_focus": topic_focus,
+        "key_details": key_details,
         "identified_details": [],
         "used_hints": [],
         "difficulty": current_difficulty,
         "autism_level": autism_level,
+        "age": age,
+        "attempt_limit": int(attempt_limit_input) if attempt_limit_input else 3,
+        "attempt_count": 0
     }
     checklist_items = []
     for i, detail in enumerate(key_details):
         checklist_items.append({"detail": detail, "identified": False, "id": i})
     return image, new_active_session, new_sessions, checklist_items
 def compare_details_chat_fn(user_details, active_session):
     """
     Evaluate the child's description using Google's Gemini model.
     """
     if not global_image_data_url or not global_image_description:
         return "Please generate an image first."
     image_description = active_session.get("image_description", global_image_description)
     chat_history = active_session.get("chat", [])
     history_text = ""
     if chat_history:
         for idx, (speaker, msg) in enumerate(chat_history, 1):
             history_text += f"Turn {idx}:\n{speaker}: {msg}\n"
     key_details = active_session.get("key_details", [])
     identified_details = active_session.get("identified_details", [])
     used_hints = active_session.get("used_hints", [])
     key_details_text = "\n\n### Key Details to Identify:\n" + "\n".join(f"- {detail}" for detail in key_details)
     identified_details_text = ""
     if identified_details:
     if used_hints:
         used_hints_text = "\n\n### Previously Given Hints:\n" + "\n".join(f"- {hint}" for hint in used_hints)
     current_difficulty = active_session.get("difficulty", "Very Simple")
     message_text = (
         f"You are a kind and encouraging teacher helping a child with autism describe an image.\n\n"
         f"### Image Prompt:\n{active_session.get('prompt', 'No prompt available')}\n\n"
         "Ensure the JSON is valid and contains all fields."
     )
+    model = GenerativeModel('gemini-2.0-flash')
     response = model.generate_content(message_text)
     return response.text
 def parse_evaluation(evaluation_text, active_session):
+    """
+    Parse the evaluation JSON and return feedback, updated difficulty, whether to advance,
+    newly identified details, and the score.
+    """
     try:
         json_match = re.search(r'\{.*\}', evaluation_text, re.DOTALL)
         if json_match:
         else:
             raise ValueError("No JSON object found in the response.")
         feedback = evaluation.get("feedback", "Great effort! Keep describing what you see.")
         newly_identified_details = evaluation.get("newly_identified_details", [])
         hint = evaluation.get("hint", "")
         score = evaluation.get("score", 0)
         advance_difficulty = evaluation.get("advance_difficulty", False)
         identified_details = active_session.get("identified_details", [])
         for detail in newly_identified_details:
             if detail not in identified_details:
                 identified_details.append(detail)
         active_session["identified_details"] = identified_details
         if hint:
             used_hints = active_session.get("used_hints", [])
             if hint not in used_hints:
                 used_hints.append(hint)
             active_session["used_hints"] = used_hints
             if hint.strip() and hint.strip() not in feedback:
                 feedback += f"\n\n💡 Hint: {hint}"
         current_difficulty = active_session.get("difficulty", "Very Simple")
         should_advance = False
         if advance_difficulty:
             difficulties = ["Very Simple", "Simple", "Moderate", "Detailed", "Very Detailed"]
             current_index = difficulties.index(current_difficulty) if current_difficulty in difficulties else 0
                 current_difficulty = difficulties[current_index + 1]
                 should_advance = True
+        return feedback, current_difficulty, should_advance, newly_identified_details, score
     except Exception as e:
         print(f"Error processing evaluation: {str(e)}")
+        return ("That's interesting! Can you tell me more about what you see?",
+                active_session.get("difficulty", "Very Simple"),
+                False,
+                [],
+                0)
 def update_checklist(checklist, newly_identified, key_details):
     """
     Update the checklist based on newly identified details.
     """
     new_checklist = []
     for item in checklist:
         detail = item["detail"]
         is_identified = item["identified"]
         for identified in newly_identified:
             if (identified.lower() in detail.lower() or detail.lower() in identified.lower() or
                 any(word for word in identified.lower().split() if word in detail.lower() and len(word) > 3)):
                 is_identified = True
                 break
         new_checklist.append({"detail": detail, "identified": is_identified, "id": item["id"]})
     return new_checklist
 def chat_respond(user_message, active_session, saved_sessions, checklist):
     """
     Process a new chat message.
     Evaluate the child's description, update identified details, and advance difficulty if needed.
+    Only increment the attempt count if no new details were identified.
     """
     if not active_session.get("image"):
         bot_message = "Please generate an image first."
         updated_chat = active_session.get("chat", []) + [("Child", user_message), ("Teacher", bot_message)]
         active_session["chat"] = updated_chat
+        return "", updated_chat, saved_sessions, active_session, checklist, None
     raw_evaluation = compare_details_chat_fn(user_message, active_session)
+    feedback, updated_difficulty, should_advance, newly_identified, score = parse_evaluation(raw_evaluation, active_session)
+    # Only count a failed attempt if no new details were identified
+    if not newly_identified:
+        active_session["attempt_count"] = active_session.get("attempt_count", 0) + 1
     updated_checklist = update_checklist(checklist, newly_identified, active_session.get("key_details", []))
     updated_chat = active_session.get("chat", []) + [("Child", user_message), ("Teacher", feedback)]
     active_session["chat"] = updated_chat
     all_identified = all(item["identified"] for item in updated_checklist)
+    attempts_exhausted = active_session.get("attempt_count", 0) >= active_session.get("attempt_limit", 3)
+    should_generate_new_image = should_advance or all_identified or attempts_exhausted
     if should_generate_new_image:
         new_sessions = saved_sessions.copy()
         new_sessions.append(active_session.copy())
         age = active_session.get("age", "3")
         autism_level = active_session.get("autism_level", "Level 1")
         topic_focus = active_session.get("topic_focus", "")
         treatment_plan = active_session.get("treatment_plan", "")
         difficulty_to_use = updated_difficulty if updated_difficulty != active_session.get("difficulty", "Very Simple") else active_session.get("difficulty", "Very Simple")
         generated_prompt = generate_prompt_from_options(difficulty_to_use, age, autism_level, topic_focus, treatment_plan)
         new_image = generate_image_fn(generated_prompt)
         image_description = generate_detailed_description(global_image_data_url, generated_prompt, difficulty_to_use, topic_focus)
+        key_details = extract_key_details(global_image_data_url, generated_prompt, topic_focus)
         new_active_session = {
             "prompt": generated_prompt,
             "image": global_image_data_url,
             "used_hints": [],
             "difficulty": difficulty_to_use,
             "autism_level": autism_level,
+            "age": age,
+            "attempt_limit": active_session.get("attempt_limit", 3),
+            "attempt_count": 0
         }
         new_checklist = []
         for i, detail in enumerate(key_details):
             new_checklist.append({"detail": detail, "identified": False, "id": i})
+        if attempts_exhausted:
+            advancement_message = "You've used all your allowed attempts. Let's try a new image."
+        elif updated_difficulty != active_session.get("difficulty", "Very Simple"):
             advancement_message = f"Congratulations! You've advanced to {updated_difficulty} difficulty! Here's a new image to describe."
         else:
             advancement_message = "Great job identifying all the details! Here's a new image at the same difficulty level."
         new_active_session["chat"] = [("System", advancement_message)]
         return "", new_active_session["chat"], new_sessions, new_active_session, new_checklist, new_image
     return "", updated_chat, saved_sessions, active_session, updated_checklist, None
 def update_sessions(saved_sessions, active_session):
 # Gradio Interface
 ##############################################
 with gr.Blocks() as demo:
     active_session = gr.State({
         "prompt": None,
         "image": None,
         "used_hints": [],
         "difficulty": "Very Simple",
         "age": "3",
+        "autism_level": "Level 1",
+        "attempt_limit": 3,
+        "attempt_count": 0
     })
     saved_sessions = gr.State([])
     checklist_state = gr.State([])
     with gr.Row():
         with gr.Column(scale=2):
             gr.Markdown("# Autism Education Image Description Tool")
             difficulty_label = gr.Markdown("**Current Difficulty:** Very Simple")
             with gr.Column():
                 gr.Markdown("## Generate Image")
                 gr.Markdown("Enter the child's details to generate an appropriate educational image.")
                     placeholder="Enter the treatment plan to guide the image generation...",
                     lines=2
                 )
+                attempt_limit_input = gr.Number(label="Allowed Attempts", value=3, precision=0)
                 generate_btn = gr.Button("Generate Image")
                 img_output = gr.Image(label="Generated Image")
             with gr.Column():
                 gr.Markdown("## Image Description Practice")
                 gr.Markdown(
                 with gr.Row():
                     chat_input = gr.Textbox(label="Child's Description", placeholder="Type what the child says about the image...", show_label=True)
                     send_btn = gr.Button("Submit")
         with gr.Column(scale=1):
             gr.Markdown("## Details to Identify")
             gr.Markdown("The child should try to identify these elements in the image:")
             checklist_html = gr.HTML("""
                 <div id="checklist-container">
                     <p>Generate an image to see details to identify.</p>
                 </div>
             """)
+            attempt_counter_html = gr.HTML("""
+                <div id="attempt-counter" style="margin-top: 10px; padding: 10px; background-color: #030404; border-radius: 5px;">
+                    <p style="margin: 0; font-weight: bold;">Attempts: 0/3</p>
+                </div>
+            """)
             def update_checklist_html(checklist):
                 if not checklist:
                     return """
                             <p>Generate an image to see details to identify.</p>
                         </div>
                     """
                 html_content = """
                     <div id="checklist-container" style="padding: 10px;">
                         <style>
                             }
                         </style>
                 """
                 for item in checklist:
                     detail = item["detail"]
                     identified = item["identified"]
                     css_class = "identified" if identified else "not-identified"
                     checkmark = "✅" if identified else "⬜"
                     html_content += f"""
                         <div class="checklist-item {css_class}">
                             <span class="checkmark">{checkmark}</span>
                             <span>{detail}</span>
                         </div>
                     """
                 html_content += """
                     </div>
                 """
                 return html_content
             progress_html = gr.HTML("""
                 <div id="progress-container">
                     <p>No active session.</p>
                 </div>
             """)
             def update_progress_html(checklist):
                 if not checklist:
                     return """
                             <p>No active session.</p>
                         </div>
                     """
                 total_items = len(checklist)
                 identified_items = sum(1 for item in checklist if item["identified"])
                 percentage = (identified_items / total_items) * 100 if total_items > 0 else 0
                 progress_bar_width = f"{percentage}%"
                 all_identified = identified_items == total_items
                 html_content = f"""
                     <div id="progress-container" style="padding: 10px;">
                         <h3>Progress: {identified_items} / {total_items} details</h3>
                         </div>
                         <p style="font-size: 16px; font-weight: bold; text-align: center;">
                 """
                 if all_identified:
                     html_content += "🎉 Amazing! All details identified! 🎉"
                 elif percentage >= 75:
                     html_content += "Good start! Keep looking!"
                 else:
                     html_content += "Let's find more details!"
                 html_content += """
                         </p>
                     </div>
                 """
                 return html_content
+            def update_attempt_counter(active_session):
+                current_count = active_session.get("attempt_count", 0)
+                limit = active_session.get("attempt_limit", 3)
+                return f"""
+                    <div id="attempt-counter" style="margin-top: 10px; padding: 10px; background-color: #bfbfbf; border-radius: 5px; border: 1px solid #ddd;">
+                        <p style="margin: 0; font-weight: bold; text-align: center;">Attempts: {current_count}/{limit}</p>
+                    </div>
+                """
     with gr.Row():
         with gr.Column():
             gr.Markdown("## Progress Tracking")
                 "and the full conversation history."
             )
             sessions_output = gr.JSON(label="Session Details", value={})
     def process_chat_and_image(user_msg, active_session, saved_sessions, checklist):
+        chat_input_val, chatbot_val, new_sessions, new_active_session, new_checklist, new_image = chat_respond(
             user_msg, active_session, saved_sessions, checklist
         )
         if new_image is not None:
+            return chat_input_val, chatbot_val, new_sessions, new_active_session, new_checklist, new_image
         else:
+            return chat_input_val, chatbot_val, new_sessions, new_active_session, new_checklist, gr.update()
     generate_btn.click(
         generate_image_and_reset_chat,
+        inputs=[age_input, autism_level_dropdown, topic_focus_input, treatment_plan_input, attempt_limit_input, active_session, saved_sessions],
         outputs=[img_output, active_session, saved_sessions, checklist_state]
     )
     send_btn.click(
         process_chat_and_image,
         inputs=[chat_input, active_session, saved_sessions, checklist_state],
         outputs=[chat_input, chatbot, saved_sessions, active_session, checklist_state, img_output]
     )
     chat_input.submit(
         process_chat_and_image,
         inputs=[chat_input, active_session, saved_sessions, checklist_state],
         outputs=[chat_input, chatbot, saved_sessions, active_session, checklist_state, img_output]
     )
     checklist_state.change(
         update_checklist_html,
         inputs=[checklist_state],
         outputs=[checklist_html]
     )
     checklist_state.change(
         update_progress_html,
         inputs=[checklist_state],
         outputs=[progress_html]
     )
     active_session.change(update_difficulty_label, inputs=[active_session], outputs=[difficulty_label])
+    active_session.change(update_attempt_counter, inputs=[active_session], outputs=[attempt_counter_html])
     active_session.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
     saved_sessions.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
 demo.launch()