Daemontatox commited on
Commit
9b134bd
·
verified ·
1 Parent(s): 44904f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -99
app.py CHANGED
@@ -11,66 +11,85 @@ inference_api_key = os.environ.get("HF_TOKEN")
11
  chat_api_key = os.environ.get("HF_TOKEN")
12
  chat_api_key2 = os.environ.get("OPENROUTER_TOKEN")
13
 
14
- # Global variables to store the image data URL and prompt for the currently generated image.
 
 
15
  global_image_data_url = None
16
  global_image_prompt = None # Still stored if needed elsewhere
17
 
18
- def generate_prompt_from_options(difficulty, age, level, treatment_plan=""):
19
  """
20
  Uses the OpenAI chat model (via Hugging Face Inference API) to generate an image generation prompt
21
- based on the selected difficulty, age, autism level, and the treatment plan the user provides.
22
  """
23
  query = (
 
24
  f"""
25
- Follow the instructions below to generate an image generation prompt for an educational image intended for autistic children.
26
- Consider the following parameters:
27
- - Difficulty: {difficulty}
28
- - Age: {age}
29
- - Autism Level: {level}
30
- - Treatment Plan: {treatment_plan}
 
31
 
32
- Use the following system prompt to guide the image generation process:
33
 
34
- You are an image generation engine specializing in creating clear, calming, and visually supportive images designed for children with autism spectrum disorder (ASD). Your primary goal is to produce images that aid in understanding, communication, emotional regulation, and daily routines. Prioritize the following characteristics:
35
 
36
  **1. Clarity and Simplicity:**
37
- - **Minimalist Backgrounds:** Use solid, muted colors (e.g., soft blues, greens, light grays, pastels) or very simple, uncluttered backgrounds. Avoid busy patterns, highly contrasting colors, or distracting elements.
38
- - **Clear Subject Focus:** The main subject of the image should be prominent and easily identifiable. Avoid unnecessary details that could cause confusion or sensory overload.
39
- - **Unambiguous Representations:** Objects and people should be depicted in a realistic and straightforward manner. Avoid abstract art or overly stylized representations. If depicting emotions, make them very clear and easily recognizable.
 
40
 
41
  **2. Visual Structure and Predictability:**
42
- - **Literal Interpretation:** The images should be highly literal. Avoid metaphors, symbolism, or implied meanings. If depicting a sequence of events, make each step visually distinct.
43
- - **Defined Borders:** Consider using clear outlines or borders around objects and people to enhance visual separation and definition.
44
- - **Consistent Style:** Maintain a consistent visual style across multiple images. This helps build familiarity and predictability.
 
45
 
46
  **3. Sensory Considerations:**
47
- - **Soft Color Palette:** Favor muted, calming colors. Avoid overly bright, saturated, or fluorescent colors.
48
- - **Reduced Visual Complexity:** Limit the number of elements in the image to prevent sensory overload.
49
- - **Smooth Textures:** If textures are depicted, they should appear smooth and non-threatening. Avoid rough, jagged, or overly detailed textures.
 
50
 
51
  **4. Positive and Supportive Imagery:**
52
- - **Positive Reinforcement:** Images should be encouraging and positive. Depict success, cooperation, and positive social interactions.
53
- - **Calm and Relaxing Scenes:** Consider scenes that promote calmness, such as nature scenes (e.g., a quiet forest, a calm beach) or familiar, safe environments (e.g., a cozy bedroom, a well-organized classroom).
54
- - **Avoidance of Triggers:** Be mindful of potential triggers for anxiety or distress. Avoid images that depict conflict, overwhelming crowds, or potentially frightening situations.
 
55
 
56
  **5. Specific Use Cases (Adapt as needed):**
57
- - **Social Stories:** If generating images for a social story, ensure each image clearly illustrates a single step in the sequence. Use consistent characters and settings throughout the story.
58
- - **Visual Schedules:** If creating images for a visual schedule, make each activity easily identifiable and visually distinct.
59
- - **Emotion Recognition:** If depicting emotions, use clear facial expressions and body language. Consider using a consistent character to represent different emotions.
60
- - **Communication Aids:** If creating images for communication, ensure the objects or actions are clearly depicted and easily recognizable.
61
- - **Daily Routines:** e.g., brushing teeth, eating food, going to school.
62
- - **Learning Concepts:** e.g., shapes, colors, animals, numbers, alphabet.
 
63
 
64
  **Prompting Instructions:**
 
65
  When providing a prompt to the model, be as specific as possible, including:
66
- - **The subject of the image:** e.g., "A boy brushing his teeth."
67
- - **The desired style:** e.g., "Simple, clear, with a solid light blue background."
68
- - **The intended use:** e.g., "For a visual schedule."
69
- - **Any specific details:** e.g., "The boy should be smiling. The toothbrush should be blue."
70
- - **Emotions:** Clearly state the emotion ("happy" or "calm").
71
-
72
- Ensure your prompt is accurate and the generated images are clear without irregularities or deformations.
73
- Use descriptive and detailed language.
 
 
 
 
 
 
 
 
 
74
  """
75
  )
76
 
@@ -87,8 +106,8 @@ def generate_prompt_from_options(difficulty, age, level, treatment_plan=""):
87
  )
88
 
89
  stream = client.chat.completions.create(
90
- model="google/gemini-2.0-pro-exp-02-05:free", # google/gemini-2.0-pro-exp-02-05:free # sophosympatheia/rogue-rose-103b-v0.2:free
91
- temperature=0.9,
92
  messages=messages,
93
  max_tokens=8192,
94
  stream=True
@@ -99,10 +118,15 @@ def generate_prompt_from_options(difficulty, age, level, treatment_plan=""):
99
  response_text += chunk.choices[0].delta.content
100
  return response_text.strip()
101
 
102
- def generate_image_fn(selected_prompt, guidance_scale=7.5, negative_prompt="ugly, blurry, poorly drawn hands, lewd, nude , deformed , missing limbs, missing eyes, missing arms, missing legs, missing nose, missing mouth, missing ears, missing teeth", num_inference_steps=50):
103
  """
104
  Uses the Hugging Face Inference API to generate an image from the provided prompt.
105
  Converts the image to a data URL for later use and stores the prompt globally.
 
 
 
 
 
106
  """
107
  global global_image_data_url, global_image_prompt
108
 
@@ -116,7 +140,7 @@ def generate_image_fn(selected_prompt, guidance_scale=7.5, negative_prompt="ugly
116
 
117
  image = image_client.text_to_image(
118
  selected_prompt,
119
- model="stabilityai/stable-diffusion-3.5-large-turbo",
120
  guidance_scale=guidance_scale,
121
  negative_prompt=negative_prompt,
122
  num_inference_steps=num_inference_steps
@@ -130,66 +154,32 @@ def generate_image_fn(selected_prompt, guidance_scale=7.5, negative_prompt="ugly
130
 
131
  return image
132
 
133
- def generate_image_and_reset_chat(difficulty, age, level, treatment_plan, active_session, saved_sessions):
134
  """
135
- Saves any current active session into the saved sessions list. Then, using the selected options and treatment plan,
136
  generates an image generation prompt, creates an image, and starts a new active session.
137
  """
138
  new_sessions = saved_sessions.copy()
139
  if active_session.get("prompt"):
140
  new_sessions.append(active_session)
141
 
142
- generated_prompt = generate_prompt_from_options(difficulty, age, level, treatment_plan)
143
- image = generate_image_fn(generated_prompt)
144
 
145
- new_active_session = {
146
- "prompt": generated_prompt,
147
- "image": global_image_data_url,
148
- "chat": [],
149
- "treatment_plan": treatment_plan
150
- }
151
  return image, new_active_session, new_sessions
152
 
153
- def compare_details_chat_fn(user_details, treatment_plan):
154
  """
155
  Uses the vision language model to evaluate the user description based solely on the generated image.
156
  The message includes both the image (using its data URL) and the user’s text.
157
- The provided treatment plan is included so the model keeps it in mind during evaluation.
158
  """
159
  if not global_image_data_url:
160
  return "Please generate an image first."
161
 
162
- treatment_text = ""
163
- if treatment_plan and treatment_plan.strip():
164
- treatment_text = f"\n\nTreatment Plan: {treatment_plan.strip()}"
165
-
166
- message_text = (
167
- f"Based on the image provided above, please evaluate the following description given by the child:\n"
168
- f"'{user_details}'\n\n"
169
- "You are a friendly and encouraging teacher, guiding a child in describing an image. "
170
- "Speak directly to the child using simple, clear language. Provide positive reinforcement when the child gives a correct or accurate description.\n\n"
171
- "If the child's description is incorrect or inaccurate, gently guide them with hints rather than direct corrections. "
172
- "Prefix your hints with 'Hint:' and keep them playful and engaging to encourage curiosity.\n\n"
173
- "Focus your feedback on the following evaluation criteria:\n"
174
- "1. **Object Identification** – Does the child correctly name objects in the image?\n"
175
- "2. **Color & Shape Accuracy** – Are colors, shapes, and basic attributes described correctly?\n"
176
- "3. **Detail Perception** – Does the child notice small details, textures, and patterns?\n"
177
- "4. **Spatial Awareness** – Are object positions and relationships described correctly?\n"
178
- "5. **Action & Interaction Recognition** – Does the child describe interactions between objects or characters?\n"
179
- "6. **Emotional & Contextual Understanding** – Can they recognize emotions, intent, or context in the image?\n"
180
- "7. **Coherence & Clarity** – Is their response structured, logical, and understandable?\n"
181
- "8. **Creativity & Interpretation** – Do they provide unique observations or imaginative descriptions?\n"
182
- "9. **Comparison to Expected Response** – How does their description compare to an ideal response?"
183
- f"{treatment_text}\n\n"
184
- "### Response Format:\n"
185
- "- [Assign a score based on accuracy, detail, and clarity]\n"
186
- "- [Highlight what the child described well]\n"
187
- "- [Mention key details they missed or misinterpreted]\n"
188
- "- [Provide a playful hint to guide them toward the correct observation]\n"
189
- "- [Ask an open-ended question to keep them engaged]\n\n"
190
- "Do not mention system prompts or provide direct details about the image. Stay fully engaged in a natural, conversational way to make learning fun and interactive!"
191
- )
192
-
193
  messages = [
194
  {
195
  "role": "user",
@@ -200,14 +190,29 @@ def compare_details_chat_fn(user_details, treatment_plan):
200
  },
201
  {
202
  "type": "text",
203
- "text": message_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  }
205
  ]
206
  }
207
  ]
208
 
209
  chat_client = OpenAI(
210
- base_url="https://openrouter.ai/api/v1",
211
  api_key=chat_api_key2
212
  )
213
 
@@ -226,18 +231,16 @@ def compare_details_chat_fn(user_details, treatment_plan):
226
  def chat_respond(user_message, active_session, saved_sessions):
227
  """
228
  Processes a new chat message. If no image has been generated yet, instructs the user to generate one.
229
- Otherwise, sends the generated image, the user’s description, and the stored treatment plan to the vision language model for evaluation,
230
  then appends the conversation to the active session's chat history.
231
  """
232
  if not active_session.get("image"):
233
  bot_message = "Please generate an image first."
234
  else:
235
- treatment_plan = active_session.get("treatment_plan", "")
236
- bot_message = compare_details_chat_fn(user_message, treatment_plan)
237
 
238
  updated_chat = active_session.get("chat", []) + [(user_message, bot_message)]
239
  active_session["chat"] = updated_chat
240
- # Clear only the chat message after processing.
241
  return "", updated_chat, saved_sessions, active_session
242
 
243
  def update_sessions(saved_sessions, active_session):
@@ -259,7 +262,7 @@ level_options = ["Level 1", "Level 2", "Level 3"]
259
  # Create the Gradio Interface (Single-Page) with a Sidebar for Session Details
260
  ##############################################
261
  with gr.Blocks() as demo:
262
- active_session = gr.State({"prompt": None, "image": None, "chat": [], "treatment_plan": ""})
263
  saved_sessions = gr.State([])
264
 
265
  with gr.Column():
@@ -271,18 +274,20 @@ with gr.Blocks() as demo:
271
  gr.Markdown("Select options to create a custom prompt for image generation:")
272
  with gr.Row():
273
  difficulty_dropdown = gr.Dropdown(label="Difficulty", choices=difficulty_options, value=difficulty_options[0])
274
- age_input = gr.Textbox(label="Age", placeholder="Enter age...", value="5")
 
275
  level_dropdown = gr.Dropdown(label="Level", choices=level_options, value=level_options[0])
276
- treatment_plan_input = gr.Textbox(
277
- label="Treatment Plan",
278
- placeholder="Enter the treatment plan to guide the image generation...",
 
279
  lines=2
280
  )
281
  generate_btn = gr.Button("Generate Image")
282
  img_output = gr.Image(label="Generated Image")
283
  generate_btn.click(
284
  generate_image_and_reset_chat,
285
- inputs=[difficulty_dropdown, age_input, level_dropdown, treatment_plan_input, active_session, saved_sessions],
286
  outputs=[img_output, active_session, saved_sessions]
287
  )
288
 
@@ -291,8 +296,9 @@ with gr.Blocks() as demo:
291
  gr.Markdown("## Chat about the Image")
292
  gr.Markdown(
293
  "After generating an image, type details or descriptions about it. "
294
- "Your message, along with the generated image and the stored treatment plan, "
295
- "will be sent to a vision language model for evaluation."
 
296
  )
297
  chatbot = gr.Chatbot(label="Chat History")
298
  with gr.Row():
@@ -316,7 +322,7 @@ with gr.Blocks() as demo:
316
  gr.Markdown(
317
  "This sidebar automatically saves finished chat sessions. "
318
  "Each session includes the prompt used, the generated image (as a data URL), "
319
- "the treatment plan, and the chat history (user messages and corresponding bot responses)."
320
  )
321
  sessions_output = gr.JSON(label="Session Details", value={})
322
  active_session.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)
 
11
  chat_api_key = os.environ.get("HF_TOKEN")
12
  chat_api_key2 = os.environ.get("OPENROUTER_TOKEN")
13
 
14
+
15
+
16
+ # Global variable to store the image data URL and prompt for the currently generated image.
17
  global_image_data_url = None
18
  global_image_prompt = None # Still stored if needed elsewhere
19
 
20
+ def generate_prompt_from_options(difficulty, age, level, extra_details=""):
21
  """
22
  Uses the OpenAI chat model (via Hugging Face Inference API) to generate an image generation prompt
23
+ based on the selected difficulty, age, autism level, and any extra details the user provides.
24
  """
25
  query = (
26
+
27
  f"""
28
+ Follow the instructions below to Generate an image generation prompt for an educational image intended for Autistic children.
29
+ Consider the following parameters:\n
30
+ - Difficulty: {difficulty}\n
31
+ - Age: {age}\n
32
+ - Autism Level: {level}\n
33
+ - Extra Details: {extra_details}\n\n
34
+ Use the following system prompt to guide the image generation process:\n
35
 
36
+ System Prompt:
37
 
38
+ You are an image generation engine specializing in creating clear, calming, and visually supportive images designed for children with autism spectrum disorder (ASD). Your primary goal is to produce images that aid in understanding, communication, emotional regulation, and daily routines. Prioritize the following characteristics:
39
 
40
  **1. Clarity and Simplicity:**
41
+
42
+ # * **Minimalist Backgrounds:** Use solid, muted colors (e.g., soft blues, greens, light grays, pastels) or very simple, uncluttered backgrounds. Avoid busy patterns, highly contrasting colors, or distracting elements.
43
+ * **Clear Subject Focus:** The main subject of the image should be prominent and easily identifiable. Avoid unnecessary details that could cause confusion or sensory overload.
44
+ * **Unambiguous Representations:** Objects and people should be depicted in a realistic and straightforward manner. Avoid abstract art or overly stylized representations. If depicting emotions, make them very clear and easily recognizable (e.g., a simple, wide smile for happiness, a single tear for sadness).
45
 
46
  **2. Visual Structure and Predictability:**
47
+
48
+ * **Literal Interpretation:** The images should be highly literal. Avoid metaphors, symbolism, or implied meanings. If depicting a sequence of events, make each step visually distinct.
49
+ * **Defined Borders:** Consider using clear outlines or borders around objects and people to enhance visual separation and definition.
50
+ * **Consistent Style:** Maintain a consistent visual style across multiple images. This helps build familiarity and predictability.
51
 
52
  **3. Sensory Considerations:**
53
+
54
+ * **Soft Color Palette:** Favor muted, calming colors. Avoid overly bright, saturated, or fluorescent colors.
55
+ * **Reduced Visual Complexity:** Limit the number of elements in the image to prevent sensory overload.
56
+ * **Smooth Textures:** If textures are depicted, they should appear smooth and non-threatening. Avoid rough, jagged, or overly detailed textures.
57
 
58
  **4. Positive and Supportive Imagery:**
59
+
60
+ * **Positive Reinforcement:** Images should be encouraging and positive. Depict success, cooperation, and positive social interactions.
61
+ * **Calm and Relaxing Scenes:** Consider scenes that promote calmness, such as nature scenes (e.g., a quiet forest, a calm beach), or familiar, safe environments (e.g., a cozy bedroom, a well-organized classroom).
62
+ * **Avoidance of Triggers:** Be mindful of potential triggers for anxiety or distress. Avoid images that depict conflict, overwhelming crowds, or potentially frightening situations.
63
 
64
  **5. Specific Use Cases (Adapt as needed):**
65
+
66
+ * **Social Stories:** If generating images for a social story, ensure each image clearly illustrates a single step in the sequence. Use consistent characters and settings throughout the story.
67
+ * **Visual Schedules:** If creating images for a visual schedule, make each activity easily identifiable and visually distinct.
68
+ * **Emotion Recognition:** If depicting emotions, use clear facial expressions and body language. Consider using a consistent character to represent different emotions.
69
+ * **Communication Aids:** If creating images for communication, ensure the objects or actions are clearly depicted and easily recognizable.
70
+ * **Daily Routines**: Brushing teeth, eating food, going to school.
71
+ * **Learning concepts**: Shapes, colors, animals, numbers, alphabet.
72
 
73
  **Prompting Instructions:**
74
+
75
  When providing a prompt to the model, be as specific as possible, including:
76
+
77
+ * **The subject of the image:** "A boy brushing his teeth."
78
+ * **The desired style:** "Simple, clear, with a solid light blue background."
79
+ * **The intended use:** "For a visual schedule."
80
+ * **Any specific details:** "The boy should be smiling. The toothbrush should be blue."
81
+ * **Emotions:** Clearly state the emotion "happy" or "calm."
82
+
83
+ **Example Prompts (using the above system prompt as a base):**
84
+
85
+ * "Generate an image for a visual schedule. The subject is 'eating lunch.' Show a child sitting at a table with a plate of food (sandwich, apple slices, and a glass of milk). The background should be a solid, pale green. The child should be smiling. Use a clear, simple style with defined outlines."
86
+ * "Generate an image to help with emotion recognition. The subject is 'sad.' Show a child's face with a single tear rolling down their cheek and a downturned mouth. The background should be a solid, light gray. Use a simple, realistic style."
87
+ * "Generate an image for a social story about going to the doctor. Show a child sitting in a doctor's waiting room, calmly looking at a book. The room should have a few simple toys and a window. The background should be a soft blue. The style should be clear and uncluttered."
88
+ * "Generate a picture of two block shapes in a simple, cartoon style. One red square and one blue circle. Place them on a white background."
89
+ * "Generate a cartoon image of a dog. Make the dog appear to be friendly and non-threatening. Use warm colors."
90
+
91
+ Ensure your Prompts are acccurate and ensure the images are accurate and dont have any irregularities or deforamtions in them.
92
+ use descriptive and detailed prompts
93
  """
94
  )
95
 
 
106
  )
107
 
108
  stream = client.chat.completions.create(
109
+ model="sophosympatheia/rogue-rose-103b-v0.2:free",
110
+ temperature=0.5,
111
  messages=messages,
112
  max_tokens=8192,
113
  stream=True
 
118
  response_text += chunk.choices[0].delta.content
119
  return response_text.strip()
120
 
121
+ def generate_image_fn(selected_prompt, guidance_scale=7.5, negative_prompt="ugly, blurry, poorly drawn hands , Lewd , nude", num_inference_steps=50):
122
  """
123
  Uses the Hugging Face Inference API to generate an image from the provided prompt.
124
  Converts the image to a data URL for later use and stores the prompt globally.
125
+
126
+ Additional parameters:
127
+ - guidance_scale: Influences how strongly the image generation adheres to the prompt.
128
+ - negative_prompt: Specifies undesirable elements to avoid in the generated image.
129
+ - num_inference_steps: The number of denoising steps for image generation.
130
  """
131
  global global_image_data_url, global_image_prompt
132
 
 
140
 
141
  image = image_client.text_to_image(
142
  selected_prompt,
143
+ model="stabilityai/stable-diffusion-3.5-large-turbo", #|| black-forest-labs/FLUX.1-dev || stabilityai/stable-diffusion-3.5-large-turbo
144
  guidance_scale=guidance_scale,
145
  negative_prompt=negative_prompt,
146
  num_inference_steps=num_inference_steps
 
154
 
155
  return image
156
 
157
+ def generate_image_and_reset_chat(difficulty, age, level, extra_details, active_session, saved_sessions):
158
  """
159
+ Saves any current active session into the saved sessions list. Then, using the three selected options and extra details,
160
  generates an image generation prompt, creates an image, and starts a new active session.
161
  """
162
  new_sessions = saved_sessions.copy()
163
  if active_session.get("prompt"):
164
  new_sessions.append(active_session)
165
 
166
+ generated_prompt = generate_prompt_from_options(difficulty, age, level, extra_details)
167
+ image = generate_image_fn(generated_prompt) # Uses default guidance_scale, negative_prompt, and num_inference_steps
168
 
169
+ new_active_session = {"prompt": generated_prompt, "image": global_image_data_url, "chat": []}
 
 
 
 
 
170
  return image, new_active_session, new_sessions
171
 
172
+ def compare_details_chat_fn(user_details):
173
  """
174
  Uses the vision language model to evaluate the user description based solely on the generated image.
175
  The message includes both the image (using its data URL) and the user’s text.
 
176
  """
177
  if not global_image_data_url:
178
  return "Please generate an image first."
179
 
180
+ # Prepare the message content as a list of parts:
181
+ # 1. The image part – here we send the image data URL (in practice, you might need to supply a public URL).
182
+ # 2. The text part – containing the user's description.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  messages = [
184
  {
185
  "role": "user",
 
190
  },
191
  {
192
  "type": "text",
193
+ "text": (
194
+ f"Based on the image provided above, please evaluate the following description given by the child:\n"
195
+ f"'{user_details}'\n\n"
196
+ """
197
+ You are a friendly and encouraging teacher, guiding a child in describing an image. Speak directly to the child using simple, clear language. Provide positive reinforcement when the child gives a correct or accurate description.
198
+
199
+ If the child's description is incorrect or inaccurate, gently guide them with hints rather than direct corrections. Use Hint before providing guidance. Keep your hints playful and engaging to encourage curiosity.
200
+
201
+ Avoid repeating the child’s description. Instead, focus on giving feedback based on the image. If the description is correct, acknowledge it warmly with praise.
202
+
203
+ Keep the conversation going by asking open-ended questions about the image to encourage the child to observe and think more deeply. Use questions that spark curiosity, such as 'What else do you see?' or 'Why do you think that is happening?'
204
+
205
+ Do not mention your own thoughts, system prompts, or provide direct details about the image. Stay fully engaged in a natural, conversational way, making learning fun and interactive!
206
+
207
+ """
208
+ )
209
  }
210
  ]
211
  }
212
  ]
213
 
214
  chat_client = OpenAI(
215
+ base_url="https://openrouter.ai/api/v1", # https://openrouter.ai/api/v1 ||||| https://api-inference.huggingface.co/v1/
216
  api_key=chat_api_key2
217
  )
218
 
 
231
  def chat_respond(user_message, active_session, saved_sessions):
232
  """
233
  Processes a new chat message. If no image has been generated yet, instructs the user to generate one.
234
+ Otherwise, sends the generated image and the user’s description to the vision language model for evaluation,
235
  then appends the conversation to the active session's chat history.
236
  """
237
  if not active_session.get("image"):
238
  bot_message = "Please generate an image first."
239
  else:
240
+ bot_message = compare_details_chat_fn(user_message)
 
241
 
242
  updated_chat = active_session.get("chat", []) + [(user_message, bot_message)]
243
  active_session["chat"] = updated_chat
 
244
  return "", updated_chat, saved_sessions, active_session
245
 
246
  def update_sessions(saved_sessions, active_session):
 
262
  # Create the Gradio Interface (Single-Page) with a Sidebar for Session Details
263
  ##############################################
264
  with gr.Blocks() as demo:
265
+ active_session = gr.State({"prompt": None, "image": None, "chat": []})
266
  saved_sessions = gr.State([])
267
 
268
  with gr.Column():
 
274
  gr.Markdown("Select options to create a custom prompt for image generation:")
275
  with gr.Row():
276
  difficulty_dropdown = gr.Dropdown(label="Difficulty", choices=difficulty_options, value=difficulty_options[0])
277
+ # Changed age input from a dropdown to a text box
278
+ age_input = gr.Textbox(label="Age", placeholder="Enter your age...", value="3")
279
  level_dropdown = gr.Dropdown(label="Level", choices=level_options, value=level_options[0])
280
+ # New textbox for extra details
281
+ extra_details_input = gr.Textbox(
282
+ label="Extra Details (optional)",
283
+ placeholder="Enter any additional details for the image...",
284
  lines=2
285
  )
286
  generate_btn = gr.Button("Generate Image")
287
  img_output = gr.Image(label="Generated Image")
288
  generate_btn.click(
289
  generate_image_and_reset_chat,
290
+ inputs=[difficulty_dropdown, age_input, level_dropdown, extra_details_input, active_session, saved_sessions],
291
  outputs=[img_output, active_session, saved_sessions]
292
  )
293
 
 
296
  gr.Markdown("## Chat about the Image")
297
  gr.Markdown(
298
  "After generating an image, type details or descriptions about it. "
299
+ "Your message will be sent along with the image to a vision language model, "
300
+ "which will evaluate your description based on what it sees in the image. "
301
+ "The response will include a correctness percentage and hints if needed."
302
  )
303
  chatbot = gr.Chatbot(label="Chat History")
304
  with gr.Row():
 
322
  gr.Markdown(
323
  "This sidebar automatically saves finished chat sessions. "
324
  "Each session includes the prompt used, the generated image (as a data URL), "
325
+ "and the chat history (user messages and corresponding bot responses)."
326
  )
327
  sessions_output = gr.JSON(label="Session Details", value={})
328
  active_session.change(update_sessions, inputs=[saved_sessions, active_session], outputs=sessions_output)