suayptalha commited on
Commit
62d3eaa
·
verified ·
1 Parent(s): 0c4ca35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -12
app.py CHANGED
@@ -1,10 +1,15 @@
1
  import gradio as gr
2
  from gradio_client import Client, handle_file
3
  from huggingface_hub import InferenceClient
 
 
 
4
 
 
5
  moondream_client = Client("vikhyatk/moondream2")
6
  qwq_client = InferenceClient("Qwen/QwQ-32B-Preview")
7
 
 
8
  def describe_image(image, user_message):
9
  result = moondream_client.predict(
10
  img=handle_file(image),
@@ -13,9 +18,9 @@ def describe_image(image, user_message):
13
  )
14
 
15
  description = result
16
-
17
  user_message = description + "\n" + user_message
18
 
 
19
  qwq_result = qwq_client.chat_completion(
20
  messages=[{"role": "user", "content": user_message}],
21
  max_tokens=512,
@@ -25,18 +30,61 @@ def describe_image(image, user_message):
25
 
26
  return qwq_result['choices'][0]['message']['content']
27
 
28
- def chat_or_image(image, user_message):
29
- if image:
30
- return describe_image(image, user_message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  else:
32
- qwq_result = qwq_client.chat_completion(
33
- messages=[{"role": "user", "content": user_message}],
34
- max_tokens=512,
35
- temperature=0.7,
36
- top_p=0.95
37
- )
38
- return qwq_result['choices'][0]['message']['content']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
40
  demo = gr.Interface(
41
  fn=chat_or_image,
42
  inputs=[
@@ -44,7 +92,10 @@ demo = gr.Interface(
44
  gr.Textbox(label="Ask anything", placeholder="Ask...", lines=2)
45
  ],
46
  outputs="text",
 
 
 
47
  )
48
 
49
  if __name__ == "__main__":
50
- demo.launch(show_error=True)
 
1
  import gradio as gr
2
  from gradio_client import Client, handle_file
3
  from huggingface_hub import InferenceClient
4
+ from PIL import Image
5
+ from threading import Thread
6
+ import time
7
 
8
+ # Initialize clients for Moondream and QwQ
9
  moondream_client = Client("vikhyatk/moondream2")
10
  qwq_client = InferenceClient("Qwen/QwQ-32B-Preview")
11
 
12
+ # Function to describe the image using Moondream API
13
  def describe_image(image, user_message):
14
  result = moondream_client.predict(
15
  img=handle_file(image),
 
18
  )
19
 
20
  description = result
 
21
  user_message = description + "\n" + user_message
22
 
23
+ # Using QwQ model for conversation after description
24
  qwq_result = qwq_client.chat_completion(
25
  messages=[{"role": "user", "content": user_message}],
26
  max_tokens=512,
 
30
 
31
  return qwq_result['choices'][0]['message']['content']
32
 
33
+ # Function to handle chat or image-based conversation
34
+ def chat_or_image(message, history, max_new_tokens=250):
35
+ txt = message["text"]
36
+ ext_buffer = f"{txt}"
37
+
38
+ messages = []
39
+ images = []
40
+
41
+ # Process the conversation history
42
+ for i, msg in enumerate(history):
43
+ if isinstance(msg[0], tuple):
44
+ messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
45
+ messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
46
+ images.append(Image.open(msg[0][0]).convert("RGB"))
47
+ elif isinstance(msg[0], str) and isinstance(history[i-1][0], str): # text only turn
48
+ messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
49
+ messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
50
+
51
+ # Add current message
52
+ if len(message["files"]) == 1:
53
+ if isinstance(message["files"][0], str): # Example images
54
+ image = Image.open(message["files"][0]).convert("RGB")
55
+ else: # Regular image input
56
+ image = Image.open(message["files"][0]["path"]).convert("RGB")
57
+ images.append(image)
58
+ messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
59
  else:
60
+ messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
61
+
62
+ # Processing the conversation to send to the model
63
+ texts = moondream_client.apply_chat_template(messages, add_generation_prompt=True)
64
+
65
+ if images == []:
66
+ inputs = moondream_client(text=texts, return_tensors="pt").to("cuda")
67
+ else:
68
+ inputs = moondream_client(text=texts, images=images, return_tensors="pt").to("cuda")
69
+
70
+ streamer = TextIteratorStreamer(moondream_client, skip_special_tokens=True, skip_prompt=True)
71
+
72
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
73
+ generated_text = ""
74
+
75
+ # Generating the response with threading to avoid blocking
76
+ thread = Thread(target=qwq_client.chat_completion, kwargs=generation_kwargs)
77
+ thread.start()
78
+ buffer = ""
79
+
80
+ # Stream the generated text
81
+ for new_text in streamer:
82
+ buffer += new_text
83
+ generated_text_without_prompt = buffer
84
+ time.sleep(0.01)
85
+ yield buffer
86
 
87
+ # Gradio Interface setup
88
  demo = gr.Interface(
89
  fn=chat_or_image,
90
  inputs=[
 
92
  gr.Textbox(label="Ask anything", placeholder="Ask...", lines=2)
93
  ],
94
  outputs="text",
95
+ title="Multimodal Llama Chatbot",
96
+ description="Interact with the Llama chatbot. Upload an image, ask a question, or both!",
97
+ live=True
98
  )
99
 
100
  if __name__ == "__main__":
101
+ demo.launch(show_error=True)