hyzhang00 commited on
Commit
c343f53
·
1 Parent(s): f18fd81

update on code

Browse files
Files changed (23) hide show
  1. app.py +27 -34
  2. {caption_anything → backend/caption_anything}/__init__.py +0 -0
  3. {caption_anything → backend/caption_anything}/captioner/README.md +0 -0
  4. {caption_anything → backend/caption_anything}/captioner/__init__.py +0 -0
  5. {caption_anything → backend/caption_anything}/captioner/base_captioner.py +0 -0
  6. {caption_anything → backend/caption_anything}/captioner/blip.py +0 -0
  7. {caption_anything → backend/caption_anything}/captioner/blip2.py +0 -0
  8. {caption_anything → backend/caption_anything}/captioner/git.py +0 -0
  9. {caption_anything → backend/caption_anything}/captioner/modeling_blip.py +0 -0
  10. {caption_anything → backend/caption_anything}/captioner/modeling_git.py +0 -0
  11. {caption_anything → backend/caption_anything}/captioner/vit_pixel_masks_utils.py +0 -0
  12. {caption_anything → backend/caption_anything}/model.py +0 -0
  13. {caption_anything → backend/caption_anything}/segmenter/__init__.py +0 -0
  14. {caption_anything → backend/caption_anything}/segmenter/base_segmenter.py +0 -0
  15. {caption_anything → backend/caption_anything}/segmenter/readme.md +0 -0
  16. {caption_anything → backend/caption_anything}/text_refiner/README.md +0 -0
  17. {caption_anything → backend/caption_anything}/text_refiner/__init__.py +0 -0
  18. {caption_anything → backend/caption_anything}/text_refiner/text_refiner.py +0 -0
  19. {caption_anything → backend/caption_anything}/utils/chatbot.py +0 -0
  20. {caption_anything → backend/caption_anything}/utils/densecap_painter.py +0 -0
  21. {caption_anything → backend/caption_anything}/utils/image_editing_utils.py +0 -0
  22. {caption_anything → backend/caption_anything}/utils/parser.py +0 -0
  23. {caption_anything → backend/caption_anything}/utils/utils.py +0 -0
app.py CHANGED
@@ -11,11 +11,11 @@ from PIL import Image
11
  import emoji
12
  from langchain_community.chat_models import ChatOpenAI
13
  from langchain.schema import HumanMessage
14
- from caption_anything.model import CaptionAnything
15
- from caption_anything.utils.utils import mask_painter, seg_model_map, prepare_segmenter, image_resize
16
- from caption_anything.utils.parser import parse_augment
17
- from caption_anything.captioner import build_captioner
18
- from caption_anything.segmenter import build_segmenter
19
  from backend.chatbox import build_chatbot_tools, get_new_image_name
20
  from segment_anything import sam_model_registry
21
  import easyocr
@@ -82,7 +82,19 @@ try:
82
  except Exception as e:
83
  print(f"Error in building chatbot tools: {e}")
84
 
 
 
 
 
 
 
85
 
 
 
 
 
 
 
86
 
87
  def build_caption_anything_with_models(args, api_key="", captioner=None, sam_model=None, ocr_reader=None, text_refiner=None,
88
  session_id=None):
@@ -190,7 +202,6 @@ async def chat_input_callback(*args):
190
  audio = await texttospeech(read_info,language,gender)
191
  return state, state, aux_state, audio,log_state,history
192
 
193
-
194
 
195
  async def upload_callback(image_input,state, log_state, task_type, openai_api_key=None,language="English",narritive=None,history=None,autoplay=True,session="Session 1"):
196
  print("narritive", narritive)
@@ -221,7 +232,6 @@ async def upload_callback(image_input,state, log_state, task_type, openai_api_ke
221
  print('upload_callback: add caption to chatGPT memory')
222
  new_image_path = get_new_image_name('chat_image', func_name='upload')
223
  image_input.save(new_image_path)
224
- print("img_path",new_image_path)
225
  paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
226
  if task_type=="task 3":
227
  name="Along the River During the Qingming Festival"
@@ -253,23 +263,23 @@ async def upload_callback(image_input,state, log_state, task_type, openai_api_ke
253
  gender=gender.lower()
254
 
255
  if language=="English":
256
- if naritive_mapping[narritive]==0 :
257
  msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
258
 
259
- elif naritive_mapping[narritive]==1:
260
  msg=f"🧑‍🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
261
 
262
- elif naritive_mapping[narritive]==2:
263
  msg=f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
264
 
265
  elif language=="Chinese":
266
- if naritive_mapping[narritive]==0:
267
  msg=f"🤖 你好,我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供相关的信息。"
268
 
269
- elif naritive_mapping[narritive]==1:
270
  msg=f"🧑‍🎨 你好,我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供我的创作背后的相关见解和想法。"
271
 
272
- elif naritive_mapping[narritive]==2:
273
  msg=f"🎨 你好,让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会从画面上事物的视角为你提供相关的见解和想法。"
274
 
275
 
@@ -331,8 +341,6 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
331
  out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
332
  # state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
333
 
334
-
335
-
336
  if language=="English":
337
  if prompt["input_label"][-1]==1:
338
  msg="You've added an area at {}. ".format(prompt["input_point"][-1])
@@ -362,21 +370,6 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
362
  return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
363
 
364
 
365
- query_focus_en = [
366
- "Provide a description of the item.",
367
- "Provide a description and analysis of the item.",
368
- "Provide a description, analysis, and interpretation of the item.",
369
- "Evaluate the item."
370
- ]
371
-
372
- query_focus_zh = [
373
- "请描述一下这个物体。",
374
- "请描述和分析一下这个物体。",
375
- "请描述、分析和解释一下这个物体。",
376
- "请以艺术鉴赏的角度评价一下这个物体。"
377
- ]
378
-
379
-
380
  async def submit_caption(naritive, state,length, sentiment, factuality, language,
381
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
382
  autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender,log_state,history):
@@ -422,7 +415,6 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
422
  # save history
423
  history.append({"role": "user", "content": user_query})
424
  history.append({"role": "assistant", "content": focus_info})
425
-
426
 
427
  print("new_cap",focus_info)
428
  read_info = re.sub(r'[#[\]!*]','',focus_info)
@@ -430,7 +422,6 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
430
  print("read info",read_info)
431
  gender="male"
432
 
433
-
434
  try:
435
  if autoplay==False:
436
  return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,log_state,history
@@ -598,6 +589,9 @@ def get_recommendationscore(index,score,log_state):
598
  log_state+=[("%% recommendation %%",None)]
599
  return log_state
600
 
 
 
 
601
 
602
  def toggle_icons_and_update_prompt(point_prompt):
603
  new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
@@ -608,8 +602,7 @@ def toggle_icons_and_update_prompt(point_prompt):
608
 
609
  return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
610
 
611
- add_icon_path="assets/icons/plus-square-blue.png"
612
- minus_icon_path="assets/icons/minus-square.png"
613
 
614
 
615
  with open('styles.css', 'r') as file:
 
11
  import emoji
12
  from langchain_community.chat_models import ChatOpenAI
13
  from langchain.schema import HumanMessage
14
+ from backend.caption_anything.model import CaptionAnything
15
+ from backend.caption_anything.utils.utils import mask_painter, seg_model_map, prepare_segmenter, image_resize
16
+ from backend.caption_anything.utils.parser import parse_augment
17
+ from backend.caption_anything.captioner import build_captioner
18
+ from backend.caption_anything.segmenter import build_segmenter
19
  from backend.chatbox import build_chatbot_tools, get_new_image_name
20
  from segment_anything import sam_model_registry
21
  import easyocr
 
82
  except Exception as e:
83
  print(f"Error in building chatbot tools: {e}")
84
 
85
+ query_focus_en = [
86
+ "Provide a description of the item.",
87
+ "Provide a description and analysis of the item.",
88
+ "Provide a description, analysis, and interpretation of the item.",
89
+ "Evaluate the item."
90
+ ]
91
 
92
+ query_focus_zh = [
93
+ "请描述一下这个物体。",
94
+ "请描述和分析一下这个物体。",
95
+ "请描述、分析和解释一下这个物体。",
96
+ "请以艺术鉴赏的角度评价一下这个物体。"
97
+ ]
98
 
99
  def build_caption_anything_with_models(args, api_key="", captioner=None, sam_model=None, ocr_reader=None, text_refiner=None,
100
  session_id=None):
 
202
  audio = await texttospeech(read_info,language,gender)
203
  return state, state, aux_state, audio,log_state,history
204
 
 
205
 
206
  async def upload_callback(image_input,state, log_state, task_type, openai_api_key=None,language="English",narritive=None,history=None,autoplay=True,session="Session 1"):
207
  print("narritive", narritive)
 
232
  print('upload_callback: add caption to chatGPT memory')
233
  new_image_path = get_new_image_name('chat_image', func_name='upload')
234
  image_input.save(new_image_path)
 
235
  paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
236
  if task_type=="task 3":
237
  name="Along the River During the Qingming Festival"
 
263
  gender=gender.lower()
264
 
265
  if language=="English":
266
+ if PromptTemplates.NARRATIVE_MAPPING[narritive]==0 :
267
  msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
268
 
269
+ elif PromptTemplates.NARRATIVE_MAPPING[narritive]==1:
270
  msg=f"🧑‍🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
271
 
272
+ elif PromptTemplates.NARRATIVE_MAPPING[narritive]==2:
273
  msg=f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
274
 
275
  elif language=="Chinese":
276
+ if PromptTemplates.NARRATIVE_MAPPING[narritive]==0:
277
  msg=f"🤖 你好,我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供相关的信息。"
278
 
279
+ elif PromptTemplates.NARRATIVE_MAPPING[narritive]==1:
280
  msg=f"🧑‍🎨 你好,我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供我的创作背后的相关见解和想法。"
281
 
282
+ elif PromptTemplates.NARRATIVE_MAPPING[narritive]==2:
283
  msg=f"🎨 你好,让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会从画面上事物的视角为你提供相关的见解和想法。"
284
 
285
 
 
341
  out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
342
  # state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
343
 
 
 
344
  if language=="English":
345
  if prompt["input_label"][-1]==1:
346
  msg="You've added an area at {}. ".format(prompt["input_point"][-1])
 
370
  return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
371
 
372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  async def submit_caption(naritive, state,length, sentiment, factuality, language,
374
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
375
  autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender,log_state,history):
 
415
  # save history
416
  history.append({"role": "user", "content": user_query})
417
  history.append({"role": "assistant", "content": focus_info})
 
418
 
419
  print("new_cap",focus_info)
420
  read_info = re.sub(r'[#[\]!*]','',focus_info)
 
422
  print("read info",read_info)
423
  gender="male"
424
 
 
425
  try:
426
  if autoplay==False:
427
  return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,log_state,history
 
589
  log_state+=[("%% recommendation %%",None)]
590
  return log_state
591
 
592
+
593
+ add_icon_path="assets/icons/plus-square-blue.png"
594
+ minus_icon_path="assets/icons/minus-square.png"
595
 
596
  def toggle_icons_and_update_prompt(point_prompt):
597
  new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
 
602
 
603
  return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
604
 
605
+
 
606
 
607
 
608
  with open('styles.css', 'r') as file:
{caption_anything → backend/caption_anything}/__init__.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/README.md RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/__init__.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/base_captioner.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/blip.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/blip2.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/git.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/modeling_blip.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/modeling_git.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/captioner/vit_pixel_masks_utils.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/model.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/segmenter/__init__.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/segmenter/base_segmenter.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/segmenter/readme.md RENAMED
File without changes
{caption_anything → backend/caption_anything}/text_refiner/README.md RENAMED
File without changes
{caption_anything → backend/caption_anything}/text_refiner/__init__.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/text_refiner/text_refiner.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/utils/chatbot.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/utils/densecap_painter.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/utils/image_editing_utils.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/utils/parser.py RENAMED
File without changes
{caption_anything → backend/caption_anything}/utils/utils.py RENAMED
File without changes