mc6666 commited on
Commit
e83010b
·
verified ·
1 Parent(s): 58e85c8

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +34 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
3
+ import torch
4
+ # Load model and processor
5
+ model_id = "pyimagesearch/finetuned_paligemma_vqav2_small"
6
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
7
+ processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224")
8
+ # Define inference function
9
+ def process_image(image, prompt):
10
+ # Process the image and prompt using the processor
11
+ inputs = processor(image.convert("RGB"), prompt, return_tensors="pt")
12
+
13
+ try:
14
+ # Generate output from the model
15
+ output = model.generate(**inputs, max_new_tokens=20)
16
+
17
+ # Decode and return the output
18
+ decoded_output = processor.decode(output[0], skip_special_tokens=True)
19
+
20
+ # Return the answer (exclude the prompt part from output)
21
+ return decoded_output[len(prompt):]
22
+ except IndexError as e:
23
+ print(f"IndexError: {e}")
24
+ return "An error occurred during processing."
25
+ # Define the Gradio interface
26
+ inputs = [
27
+ gr.Image(type="pil"),
28
+ gr.Textbox(label="Prompt", placeholder="Enter your question")
29
+ ]
30
+ outputs = gr.Textbox(label="Answer")
31
+ # Create the Gradio app
32
+ demo = gr.Interface(fn=process_image, inputs=inputs, outputs=outputs, title="Visual Question Answering with Fine-tuned PaliGemma Model", description="Upload an image and ask questions to get answers.")
33
+ # Launch the app
34
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ peft
4
+ bitsandbytes
5
+ gradio