File size: 2,969 Bytes
1a517f1
 
 
592ad8f
 
 
b31bef1
1a517f1
 
 
 
 
 
 
 
592ad8f
 
b31bef1
1a517f1
592ad8f
ddc67bf
592ad8f
1a517f1
592ad8f
1a517f1
 
ddc67bf
1a517f1
 
 
592ad8f
 
 
 
 
1a517f1
592ad8f
 
1a517f1
592ad8f
1a517f1
592ad8f
 
 
1a517f1
 
 
 
 
 
592ad8f
1a517f1
 
 
 
 
592ad8f
 
 
1a517f1
 
 
 
592ad8f
1a517f1
 
 
 
592ad8f
ddc67bf
592ad8f
 
1a517f1
592ad8f
1a517f1
 
592ad8f
1a517f1
592ad8f
 
1a517f1
 
 
592ad8f
1a517f1
592ad8f
 
 
1a517f1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
from huggingface_hub import login
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image
import gradio as gr

# Login to Hugging Face Hub
if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
    print("Logging in to Hugging Face Hub...")
    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
else:
    print("Warning: HUGGING_FACE_HUB_TOKEN not found")

# Global variables
model = None
processor = None

def load_model():
    global model, processor
    try:
        model_path = "Aekanun/thai-handwriting-llm"
        print(f"Loading model and processor from {model_path}...")
        
        processor = AutoProcessor.from_pretrained(model_path)
        model = AutoModelForVision2Seq.from_pretrained(model_path)
        
        if torch.cuda.is_available():
            model = model.to("cuda")
            
        return True
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return False

def process_image(image):
    if image is None:
        return "กรุณาอัพโหลดรูปภาพ"
        
    try:
        # Ensure image is in PIL format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)
        
        # Convert to RGB if needed
        if image.mode != "RGB":
            image = image.convert("RGB")
            
        # Process image
        inputs = processor(images=image, return_tensors="pt")
        
        # Move to GPU if available
        if torch.cuda.is_available():
            inputs = {k: v.to("cuda") for k, v in inputs.items()}
            
        # Generate text
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=100,
                num_beams=4,
                pad_token_id=processor.tokenizer.pad_token_id,
                eos_token_id=processor.tokenizer.eos_token_id
            )
            
        # Decode output
        predicted_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
        return predicted_text.strip()
        
    except Exception as e:
        return f"เกิดข้อผิดพลาด: {str(e)}"

# Initialize
print("Initializing application...")
if load_model():
    # Create Gradio interface
    demo = gr.Interface(
        fn=process_image,
        inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
        outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
        title="Thai Handwriting Recognition",
        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
        examples=[["example1.jpg"], ["example2.jpg"]]
    )

    if __name__ == "__main__":
        demo.launch()
else:
    print("Failed to initialize the application")