ovi054 commited on
Commit
87e9ce2
1 Parent(s): 21583ef

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from transformers import AutoProcessor, AutoModelForCausalLM
3
+
4
+ # Initialize Florence model
5
+ device = "cuda" if torch.cuda.is_available() else "cpu"
6
+ florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
7
+ florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
8
+
9
+ def generate_caption():
10
+ if not isinstance(image, Image.Image):
11
+ image = Image.fromarray(image)
12
+
13
+ inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
14
+ generated_ids = florence_model.generate(
15
+ input_ids=inputs["input_ids"],
16
+ pixel_values=inputs["pixel_values"],
17
+ max_new_tokens=1024,
18
+ early_stopping=False,
19
+ do_sample=False,
20
+ num_beams=3,
21
+ )
22
+ generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
23
+ parsed_answer = florence_processor.post_process_generation(
24
+ generated_text,
25
+ task="<MORE_DETAILED_CAPTION>",
26
+ image_size=(image.width, image.height)
27
+ )
28
+ return parsed_answer["<MORE_DETAILED_CAPTION>"]
29
+
30
+ io = gr.Interface(generate_caption,
31
+ inputs=[gr.Image()],
32
+ outputs = [gr.Textbox(label="Input Image", lines=2, show_copy_button = True]
33
+ )
34
+ io.launch(debug=True)