Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -10,9 +10,36 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
10 |
|
11 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
|
12 |
model_vqa = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large").to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def inference_chat(input_image,input_text):
|
14 |
inputs = processor(images=input_image, text=input_text,return_tensors="pt")
|
15 |
-
inputs["max_length"] =
|
16 |
inputs["num_beams"] = 5
|
17 |
inputs['num_return_sequences'] =4
|
18 |
out = model_vqa.generate(**inputs)
|
@@ -41,9 +68,12 @@ with gr.Blocks(
|
|
41 |
submit_button = gr.Button(
|
42 |
value="Submit", interactive=True, variant="primary"
|
43 |
)
|
|
|
|
|
|
|
44 |
with gr.Column():
|
45 |
caption_output = gr.Textbox(lines=0, label="VQA Output(模型答案输出)")
|
46 |
-
|
47 |
|
48 |
image_input.change(
|
49 |
lambda: ("", "", []),
|
@@ -73,6 +103,14 @@ with gr.Blocks(
|
|
73 |
],
|
74 |
[caption_output],
|
75 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
# examples = gr.Examples(
|
78 |
# examples=examples,
|
|
|
10 |
|
11 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
|
12 |
model_vqa = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large").to(device)
|
13 |
+
|
14 |
+
from transformers import BlipProcessor, Blip2ForConditionalGeneration
|
15 |
+
|
16 |
+
cap_processor = BlipProcessor.from_pretrained("Salesforce/blip2-flan-t5-xl")
|
17 |
+
cap_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
def caption(input_image):
|
22 |
+
inputs = processor(input_image, return_tensors="pt")
|
23 |
+
inputs["num_beams"] = 4
|
24 |
+
inputs['num_return_sequences'] =4
|
25 |
+
out = model.generate(**inputs)
|
26 |
+
return "\n".join(processor.decode(out[0], skip_special_tokens=True))
|
27 |
+
def gpt3(input_text):
|
28 |
+
response = openai.Completion.create(
|
29 |
+
engine="text-davinci-003",
|
30 |
+
prompt=input_text,
|
31 |
+
max_tokens=10,
|
32 |
+
n=1,
|
33 |
+
stop=None,
|
34 |
+
temperature=0.7,
|
35 |
+
)
|
36 |
+
answer = response.choices[0].text.strip()
|
37 |
+
return answer
|
38 |
+
|
39 |
+
|
40 |
def inference_chat(input_image,input_text):
|
41 |
inputs = processor(images=input_image, text=input_text,return_tensors="pt")
|
42 |
+
inputs["max_length"] = 10
|
43 |
inputs["num_beams"] = 5
|
44 |
inputs['num_return_sequences'] =4
|
45 |
out = model_vqa.generate(**inputs)
|
|
|
68 |
submit_button = gr.Button(
|
69 |
value="Submit", interactive=True, variant="primary"
|
70 |
)
|
71 |
+
cap_submit_button = gr.Button(
|
72 |
+
value="Submit", interactive=True, variant="primary"
|
73 |
+
)
|
74 |
with gr.Column():
|
75 |
caption_output = gr.Textbox(lines=0, label="VQA Output(模型答案输出)")
|
76 |
+
caption_output_v1 = gr.Textbox(lines=0, label="Caption Output(模型caption输出)")
|
77 |
|
78 |
image_input.change(
|
79 |
lambda: ("", "", []),
|
|
|
103 |
],
|
104 |
[caption_output],
|
105 |
)
|
106 |
+
cap_submit_button.click(
|
107 |
+
caption,
|
108 |
+
[
|
109 |
+
image_input,
|
110 |
+
|
111 |
+
],
|
112 |
+
[caption_output_v1],
|
113 |
+
)
|
114 |
|
115 |
# examples = gr.Examples(
|
116 |
# examples=examples,
|