Aekanun commited on
Commit
531f528
·
1 Parent(s): 2a13300

fixed app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import warnings
3
+ import torch
4
+ import gc
5
+ from transformers import AutoModelForVision2Seq, AutoProcessor
6
+ from peft import PeftModel
7
+ from PIL import Image
8
+ import gradio as gr
9
+ from huggingface_hub import login
10
+
11
+ # Basic settings
12
+ warnings.filterwarnings('ignore')
13
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # ปิดการใช้ CUDA
14
+
15
+ # Global variables
16
+ model = None
17
+ processor = None
18
+
19
+ # Login to Hugging Face Hub
20
+ if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
21
+ print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
22
+ login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
23
+ else:
24
+ print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
25
+
26
+ def load_model_and_processor():
27
+ """โหลดโมเดลและ processor"""
28
+ global model, processor
29
+ print("กำลังโหลดโมเดลและ processor...")
30
+ try:
31
+ # Model paths
32
+ base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
33
+ adapter_path = "Aekanun/thai-handwriting-llm"
34
+
35
+ # Load processor from base model
36
+ print("กำลังโหลด processor...")
37
+ processor = AutoProcessor.from_pretrained(base_model_path, use_auth_token=True)
38
+
39
+ # Load base model
40
+ print("กำลังโหลด base model...")
41
+ base_model = AutoModelForVision2Seq.from_pretrained(
42
+ base_model_path,
43
+ device_map={"": "cpu"}, # ใช้ CPU
44
+ torch_dtype=torch.float32, # ใช้ float32 แทน bfloat16
45
+ trust_remote_code=True,
46
+ use_auth_token=True
47
+ )
48
+
49
+ # Load adapter
50
+ print("กำลังโหลด adapter...")
51
+ model = PeftModel.from_pretrained(
52
+ base_model,
53
+ adapter_path,
54
+ torch_dtype=torch.float32, # ใช้ float32
55
+ device_map={"": "cpu"}, # ใช้ CPU
56
+ use_auth_token=True
57
+ )
58
+
59
+ print("โหลดโมเดลสำเร็จ!")
60
+ return True
61
+ except Exception as e:
62
+ print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
63
+ return False
64
+
65
+ def process_handwriting(image):
66
+ """ฟังก์ชันสำหรับ Gradio interface"""
67
+ global model, processor
68
+
69
+ if image is None:
70
+ return "กรุณาอัพโหลดรูปภาพ"
71
+
72
+ try:
73
+ # Ensure image is in PIL format
74
+ if not isinstance(image, Image.Image):
75
+ image = Image.fromarray(image)
76
+
77
+ # Create prompt
78
+ prompt = """Transcribe the Thai handwritten text from the provided image.
79
+ Only return the transcription in Thai language."""
80
+
81
+ # Create model inputs
82
+ messages = [
83
+ {
84
+ "role": "user",
85
+ "content": [
86
+ {"type": "text", "text": prompt},
87
+ {"type": "image", "image": image}
88
+ ],
89
+ }
90
+ ]
91
+
92
+ # Process with model
93
+ text = processor.apply_chat_template(messages, tokenize=False)
94
+ inputs = processor(text=text, images=image, return_tensors="pt")
95
+
96
+ # Move inputs to CPU
97
+ inputs = {k: v.to('cpu') for k, v in inputs.items()}
98
+
99
+ # Generate
100
+ with torch.no_grad():
101
+ outputs = model.generate(
102
+ **inputs,
103
+ max_new_tokens=256,
104
+ do_sample=False,
105
+ pad_token_id=processor.tokenizer.pad_token_id
106
+ )
107
+
108
+ # Decode output
109
+ transcription = processor.decode(outputs[0], skip_special_tokens=True)
110
+ return transcription.strip()
111
+ except Exception as e:
112
+ return f"เกิดข้อผิดพลาด: {str(e)}"
113
+
114
+ # Initialize application
115
+ print("กำลังเริ่มต้นแอปพลิเคชัน...")
116
+ if load_model_and_processor():
117
+ # Create Gradio interface
118
+ demo = gr.Interface(
119
+ fn=process_handwriting,
120
+ inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
121
+ outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
122
+ title="Thai Handwriting Recognition",
123
+ description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
124
+ examples=[["example1.jpg"], ["example2.jpg"]]
125
+ )
126
+
127
+ if __name__ == "__main__":
128
+ demo.launch()
129
+ else:
130
+ print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")