Spaces:

suayptalha
/

QwQ-32B-Preview-Vision

Running

App Files Files Community

suayptalha commited on Dec 19, 2024

Commit

62d3eaa

verified ·

1 Parent(s): 0c4ca35

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -12

app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import InferenceClient
 moondream_client = Client("vikhyatk/moondream2")
 qwq_client = InferenceClient("Qwen/QwQ-32B-Preview")
 def describe_image(image, user_message):
     result = moondream_client.predict(
         img=handle_file(image),
@@ -13,9 +18,9 @@ def describe_image(image, user_message):
     )
     description = result
     user_message = description + "\n" + user_message
     qwq_result = qwq_client.chat_completion(
         messages=[{"role": "user", "content": user_message}],
         max_tokens=512,
@@ -25,18 +30,61 @@ def describe_image(image, user_message):
     return qwq_result['choices'][0]['message']['content']
-def chat_or_image(image, user_message):
-    if image:
-        return describe_image(image, user_message)
     else:
-        qwq_result = qwq_client.chat_completion(
-            messages=[{"role": "user", "content": user_message}],
-            max_tokens=512,
-            temperature=0.7,
-            top_p=0.95
-        )
-        return qwq_result['choices'][0]['message']['content']
 demo = gr.Interface(
     fn=chat_or_image,
     inputs=[
@@ -44,7 +92,10 @@ demo = gr.Interface(
         gr.Textbox(label="Ask anything", placeholder="Ask...", lines=2)
     ],
     outputs="text",
 )
 if __name__ == "__main__":
-    demo.launch(show_error=True)

 import gradio as gr
 from gradio_client import Client, handle_file
 from huggingface_hub import InferenceClient
+from PIL import Image
+from threading import Thread
+import time
+# Initialize clients for Moondream and QwQ
 moondream_client = Client("vikhyatk/moondream2")
 qwq_client = InferenceClient("Qwen/QwQ-32B-Preview")
+# Function to describe the image using Moondream API
 def describe_image(image, user_message):
     result = moondream_client.predict(
         img=handle_file(image),
     )
     description = result
     user_message = description + "\n" + user_message
+    # Using QwQ model for conversation after description
     qwq_result = qwq_client.chat_completion(
         messages=[{"role": "user", "content": user_message}],
         max_tokens=512,
     return qwq_result['choices'][0]['message']['content']
+# Function to handle chat or image-based conversation
+def chat_or_image(message, history, max_new_tokens=250):
+    txt = message["text"]
+    ext_buffer = f"{txt}"
+    messages = []
+    images = []
+    # Process the conversation history
+    for i, msg in enumerate(history):
+        if isinstance(msg[0], tuple):
+            messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
+            messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
+            images.append(Image.open(msg[0][0]).convert("RGB"))
+        elif isinstance(msg[0], str) and isinstance(history[i-1][0], str): # text only turn
+            messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
+            messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
+    # Add current message
+    if len(message["files"]) == 1:
+        if isinstance(message["files"][0], str):  # Example images
+            image = Image.open(message["files"][0]).convert("RGB")
+        else:  # Regular image input
+            image = Image.open(message["files"][0]["path"]).convert("RGB")
+        images.append(image)
+        messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
     else:
+        messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
+    # Processing the conversation to send to the model
+    texts = moondream_client.apply_chat_template(messages, add_generation_prompt=True)
+    if images == []:
+        inputs = moondream_client(text=texts, return_tensors="pt").to("cuda")
+    else:
+        inputs = moondream_client(text=texts, images=images, return_tensors="pt").to("cuda")
+    streamer = TextIteratorStreamer(moondream_client, skip_special_tokens=True, skip_prompt=True)
+    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
+    generated_text = ""
+    # Generating the response with threading to avoid blocking
+    thread = Thread(target=qwq_client.chat_completion, kwargs=generation_kwargs)
+    thread.start()
+    buffer = ""
+    # Stream the generated text
+    for new_text in streamer:
+        buffer += new_text
+        generated_text_without_prompt = buffer
+        time.sleep(0.01)
+        yield buffer
+# Gradio Interface setup
 demo = gr.Interface(
     fn=chat_or_image,
     inputs=[
         gr.Textbox(label="Ask anything", placeholder="Ask...", lines=2)
     ],
     outputs="text",
+    title="Multimodal Llama Chatbot",
+    description="Interact with the Llama chatbot. Upload an image, ask a question, or both!",
+    live=True
 )
 if __name__ == "__main__":
+    demo.launch(show_error=True)