noamrot commited on
Commit
40e14d7
·
verified ·
1 Parent(s): 4582063

tried to fix error

Browse files
Files changed (1) hide show
  1. app.py +8 -20
app.py CHANGED
@@ -4,44 +4,32 @@ import spaces
4
  import torch
5
  from transformers import BlipProcessor, BlipForConditionalGeneration
6
 
7
-
8
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
  processor = BlipProcessor.from_pretrained("noamrot/FuseCap_Image_Captioning")
10
  model = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap_Image_Captioning").to(device)
11
 
12
  @spaces.GPU(duration=15)
13
  def inference(raw_image):
14
- global model
15
- cur_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
- model = model.to(device)
17
  text = "a picture of "
18
- inputs = processor(raw_image, text, return_tensors="pt").to(cur_device)
19
  out = model.generate(**inputs)
20
  caption = processor.decode(out[0], skip_special_tokens=True)
21
  return caption
22
 
23
-
24
- inputs = [gr.Image(type='pil', interactive=False),]
25
- # outputs = gr.outputs.Textbox(label="Caption")
26
  outputs = gr.Textbox(label="Caption")
27
 
28
  description = "Gradio demo for FuseCap: Leveraging Large Language Models for Enriched Fused Image Captions. This demo features a BLIP-based model, trained using FuseCap."
29
  examples = [["surfer.jpg"], ["bike.jpg"]]
30
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2305.17718' target='_blank'>FuseCap: Leveraging Large Language Models for Enriched Fused Image Captions</a>"
31
 
32
-
33
  iface = gr.Interface(fn=inference,
34
- inputs="image",
35
- outputs="text",
36
- title="FuseCap",
37
- description=description,
38
- article=article,
39
- examples=examples,
40
- # enable_queue=True
41
- )
42
 
43
  iface.queue()
44
-
45
-
46
  iface.launch()
47
-
 
4
  import torch
5
  from transformers import BlipProcessor, BlipForConditionalGeneration
6
 
 
7
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8
  processor = BlipProcessor.from_pretrained("noamrot/FuseCap_Image_Captioning")
9
  model = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap_Image_Captioning").to(device)
10
 
11
  @spaces.GPU(duration=15)
12
  def inference(raw_image):
 
 
 
13
  text = "a picture of "
14
+ inputs = processor(raw_image, text, return_tensors="pt").to(device)
15
  out = model.generate(**inputs)
16
  caption = processor.decode(out[0], skip_special_tokens=True)
17
  return caption
18
 
19
+ inputs = gr.Image(type="pil", interactive=False)
 
 
20
  outputs = gr.Textbox(label="Caption")
21
 
22
  description = "Gradio demo for FuseCap: Leveraging Large Language Models for Enriched Fused Image Captions. This demo features a BLIP-based model, trained using FuseCap."
23
  examples = [["surfer.jpg"], ["bike.jpg"]]
24
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2305.17718' target='_blank'>FuseCap: Leveraging Large Language Models for Enriched Fused Image Captions</a>"
25
 
 
26
  iface = gr.Interface(fn=inference,
27
+ inputs=inputs,
28
+ outputs=outputs,
29
+ title="FuseCap",
30
+ description=description,
31
+ article=article,
32
+ examples=examples)
 
 
33
 
34
  iface.queue()
 
 
35
  iface.launch()