Spaces:
Running
on
Zero
Running
on
Zero
Samuel L Meyers
commited on
Commit
·
4d31c25
1
Parent(s):
e96f7f5
v0.3
Browse files- app.py +68 -8
- packages.txt +1 -0
- requirements.txt +7 -7
app.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import json
|
@@ -73,8 +76,9 @@ import os
|
|
73 |
def merge_descriptions_to_prompt(mi, d1, d2):
|
74 |
from together import Together
|
75 |
tog = Together(api_key=os.getenv("TOGETHER_KEY"))
|
76 |
-
res = tog.completions.create(prompt=f"""Describe what would result if the following two descriptions were describing one thing.
|
77 |
### Description 1:
|
|
|
78 |
```text
|
79 |
{d1}
|
80 |
```
|
@@ -89,7 +93,9 @@ Merge-Specific Instructions:
|
|
89 |
Ensure you end your output with ```\\n
|
90 |
---
|
91 |
Complete Description:
|
92 |
-
```text"""
|
|
|
|
|
93 |
return res.choices[0].text.split("```")[0]
|
94 |
|
95 |
def xform_image_description(img, inst):
|
@@ -97,7 +103,7 @@ def xform_image_description(img, inst):
|
|
97 |
from together import Together
|
98 |
desc = dual_images(img)
|
99 |
tog = Together(api_key=os.getenv("TOGETHER_KEY"))
|
100 |
-
prompt=f"""Describe the image in aggressively verbose detail. I must know every freckle upon a man's brow and each blade of the grass intimately.\nDescription: ```text\n{desc}\n```\nInstructions:\n```text\n{inst}\n```\n\n\n---\nDetailed Description:\n```text"""
|
101 |
res = tog.completions.create(prompt=prompt, model="meta-llama/Meta-Llama-3-70B", stop=["```"], max_tokens=1024)
|
102 |
return res.choices[0].text[len(prompt):].split("```")[0]
|
103 |
|
@@ -275,18 +281,18 @@ with gr.Blocks() as arch_room:
|
|
275 |
gr.Markdown(f"""
|
276 |
## Arcanistry
|
277 |
|
278 |
-
|
279 |
*POOF* -- You walk in, to a cloudy room filled with heavy smoke. In the center of the room rests a waist-height table. Upon the table, you see a... You don't understand... It's dark and light and cold and warm but... As you extend your hand, you hear the voice travel up your arm and into your ears...
|
280 |
|
281 |
---
|
282 |
-
""")
|
283 |
with gr.Row():
|
284 |
-
cdd = gr.Code("""### Human
|
285 |
I require a Python script that serves a simple file server in Python over MongoDB.
|
286 |
|
287 |
### Wizard
|
288 |
Sure! Here's the script:
|
289 |
-
```python""", language="markdown")
|
290 |
with gr.Row():
|
291 |
wzs = gr.Code(json.dumps({
|
292 |
'token': '<|wizard|>',
|
@@ -301,4 +307,58 @@ Sure! Here's the script:
|
|
301 |
|
302 |
with gr.TabbedInterface([ifc_imgprompt2text, c_ifc := gr.ChatInterface(chat, chatbot=chatbot, submit_btn=gr.Button(scale=1)), gr.ChatInterface(wizard_chat), arch_room], ["Prompt & Image 2 Text", "Chat w/ Llama 3 70b", "Chat w/ WizardLM 8x22B", "Arcanistry"]) as ifc:
|
303 |
shrd = gr.JSON(visible=False)
|
304 |
-
ifc.launch(share=False, debug=True, show_error=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
"""
|
3 |
+
|
4 |
import gradio as gr
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
import json
|
|
|
76 |
def merge_descriptions_to_prompt(mi, d1, d2):
|
77 |
from together import Together
|
78 |
tog = Together(api_key=os.getenv("TOGETHER_KEY"))
|
79 |
+
res = tog.completions.create(prompt=f""" """Describe what would result if the following two descriptions were describing one thing.
|
80 |
### Description 1:
|
81 |
+
""" """
|
82 |
```text
|
83 |
{d1}
|
84 |
```
|
|
|
93 |
Ensure you end your output with ```\\n
|
94 |
---
|
95 |
Complete Description:
|
96 |
+
```text"""
|
97 |
+
|
98 |
+
""", model="meta-llama/Meta-Llama-3-70B", stop=["```"], max_tokens=1024)
|
99 |
return res.choices[0].text.split("```")[0]
|
100 |
|
101 |
def xform_image_description(img, inst):
|
|
|
103 |
from together import Together
|
104 |
desc = dual_images(img)
|
105 |
tog = Together(api_key=os.getenv("TOGETHER_KEY"))
|
106 |
+
prompt=f""" """Describe the image in aggressively verbose detail. I must know every freckle upon a man's brow and each blade of the grass intimately.\nDescription: ```text\n{desc}\n```\nInstructions:\n```text\n{inst}\n```\n\n\n---\nDetailed Description:\n```text """ """
|
107 |
res = tog.completions.create(prompt=prompt, model="meta-llama/Meta-Llama-3-70B", stop=["```"], max_tokens=1024)
|
108 |
return res.choices[0].text[len(prompt):].split("```")[0]
|
109 |
|
|
|
281 |
gr.Markdown(f"""
|
282 |
## Arcanistry
|
283 |
|
284 |
+
"""
|
285 |
*POOF* -- You walk in, to a cloudy room filled with heavy smoke. In the center of the room rests a waist-height table. Upon the table, you see a... You don't understand... It's dark and light and cold and warm but... As you extend your hand, you hear the voice travel up your arm and into your ears...
|
286 |
|
287 |
---
|
288 |
+
""" """)
|
289 |
with gr.Row():
|
290 |
+
cdd = gr.Code(""" """### Human
|
291 |
I require a Python script that serves a simple file server in Python over MongoDB.
|
292 |
|
293 |
### Wizard
|
294 |
Sure! Here's the script:
|
295 |
+
```python""" """, language="markdown")
|
296 |
with gr.Row():
|
297 |
wzs = gr.Code(json.dumps({
|
298 |
'token': '<|wizard|>',
|
|
|
307 |
|
308 |
with gr.TabbedInterface([ifc_imgprompt2text, c_ifc := gr.ChatInterface(chat, chatbot=chatbot, submit_btn=gr.Button(scale=1)), gr.ChatInterface(wizard_chat), arch_room], ["Prompt & Image 2 Text", "Chat w/ Llama 3 70b", "Chat w/ WizardLM 8x22B", "Arcanistry"]) as ifc:
|
309 |
shrd = gr.JSON(visible=False)
|
310 |
+
ifc.launch(share=False, debug=True, show_error=True) """
|
311 |
+
|
312 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
313 |
+
import gradio as gr
|
314 |
+
import spaces
|
315 |
+
from PIL import Image
|
316 |
+
import hashlib
|
317 |
+
import base64
|
318 |
+
|
319 |
+
def load_md2():
|
320 |
+
model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", device_map="cpu", trust_remote_code=True,revision="2025-01-09")
|
321 |
+
return model
|
322 |
+
|
323 |
+
global md2
|
324 |
+
|
325 |
+
md2 = load_md2()
|
326 |
+
|
327 |
+
@spaces.GPU()
|
328 |
+
def moondream2(question, image, history=None):
|
329 |
+
global md2
|
330 |
+
model = md2
|
331 |
+
model.cuda()
|
332 |
+
hsh = hashlib.sha256(bts := image.resize((224,224), Image.NEAREST).tobytes()).hexdigest()
|
333 |
+
b64 = base64.b64encode(bts).decode('utf-8')
|
334 |
+
res = model.query(image, question) if question is not None and question != "" else model.caption(image)
|
335 |
+
model.cpu()
|
336 |
+
ress = []
|
337 |
+
if history is not None:
|
338 |
+
for itm in history:
|
339 |
+
ress.append(itm)
|
340 |
+
ress.append({
|
341 |
+
"answer": res if question is not None and question != "" else None,
|
342 |
+
"caption": res if question is None or question == "" else None,
|
343 |
+
"sha256": hsh,
|
344 |
+
"image_b64": b64
|
345 |
+
})
|
346 |
+
return ress, ress
|
347 |
+
|
348 |
+
def gui():
|
349 |
+
with gr.Blocks() as blk:
|
350 |
+
with gr.Row():
|
351 |
+
imgs = gr.Image(label="input", type="pil", elem_id="imgs")
|
352 |
+
with gr.Row():
|
353 |
+
txt = gr.Textbox(label="prompt")
|
354 |
+
with gr.Row():
|
355 |
+
btn = gr.Button("Run")
|
356 |
+
with gr.Row():
|
357 |
+
res = gr.JSON(label="output")
|
358 |
+
with gr.Row(visible=False):
|
359 |
+
history = gr.JSON(label="history")
|
360 |
+
btn.click(moondream2, inputs=[txt, imgs, history], outputs=[res, history])
|
361 |
+
blk.launch(share=False)
|
362 |
+
|
363 |
+
if __name__ == "__main__":
|
364 |
+
gui()
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
libvips-dev
|
requirements.txt
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
-
gradio
|
2 |
-
transformers
|
3 |
-
accelerate
|
4 |
-
einops
|
5 |
-
pillow
|
6 |
-
together
|
7 |
torch
|
8 |
torchvision
|
9 |
-
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
accelerate
|
4 |
+
einops
|
5 |
+
pillow
|
6 |
+
together
|
7 |
torch
|
8 |
torchvision
|
9 |
+
pyvips
|