work-jenilsoni commited on
Commit
3d05221
·
verified ·
1 Parent(s): 666238a

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -51
app.py DELETED
@@ -1,51 +0,0 @@
1
- import gradio as gr
2
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
3
- from qwen_vl_utils import process_vision_info
4
- from PIL import Image
5
- import torch
6
-
7
- # Function to load the model and processor
8
- @torch.inference_mode()
9
- def load_model():
10
- model_name = "Qwen/Qwen2-VL-2B-Instruct"
11
- model = Qwen2VLForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True)
12
- processor = AutoProcessor.from_pretrained(model_name)
13
- return model, processor
14
-
15
- # Load the model and processor
16
- model, processor = load_model()
17
-
18
- # Function to process the image and extract text
19
- def ocr_from_image(image):
20
- image = Image.open(image).convert('RGB')
21
-
22
- # Prepare input for the model
23
- messages = [{
24
- "role": "user",
25
- "content": [
26
- {
27
- "type": "image",
28
- "image": image,
29
- },
30
- {"type": "text", "text": "Extract the text from the image"},
31
- ]
32
- }]
33
-
34
- text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
35
- image_inputs, video_inputs = process_vision_info(messages)
36
- inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
37
-
38
- # Generate the output
39
- generated_ids = model.generate(**inputs, max_new_tokens=128)
40
- generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
41
- output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
42
-
43
- return output_text[0]
44
-
45
- # Define the Gradio interface
46
- def interface(image):
47
- result = ocr_from_image(image)
48
- return result
49
-
50
- # Gradio app layout
51
- gr.Interface(fn=interface, inputs="image", outputs="text", title="OCR Web App using Qwen2-VL").launch()