Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,969 Bytes
1a517f1 592ad8f b31bef1 1a517f1 592ad8f b31bef1 1a517f1 592ad8f ddc67bf 592ad8f 1a517f1 592ad8f 1a517f1 ddc67bf 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f ddc67bf 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 592ad8f 1a517f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
from huggingface_hub import login
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image
import gradio as gr
# Login to Hugging Face Hub
if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
print("Logging in to Hugging Face Hub...")
login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
else:
print("Warning: HUGGING_FACE_HUB_TOKEN not found")
# Global variables
model = None
processor = None
def load_model():
global model, processor
try:
model_path = "Aekanun/thai-handwriting-llm"
print(f"Loading model and processor from {model_path}...")
processor = AutoProcessor.from_pretrained(model_path)
model = AutoModelForVision2Seq.from_pretrained(model_path)
if torch.cuda.is_available():
model = model.to("cuda")
return True
except Exception as e:
print(f"Error loading model: {str(e)}")
return False
def process_image(image):
if image is None:
return "กรุณาอัพโหลดรูปภาพ"
try:
# Ensure image is in PIL format
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Convert to RGB if needed
if image.mode != "RGB":
image = image.convert("RGB")
# Process image
inputs = processor(images=image, return_tensors="pt")
# Move to GPU if available
if torch.cuda.is_available():
inputs = {k: v.to("cuda") for k, v in inputs.items()}
# Generate text
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=100,
num_beams=4,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id
)
# Decode output
predicted_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
return predicted_text.strip()
except Exception as e:
return f"เกิดข้อผิดพลาด: {str(e)}"
# Initialize
print("Initializing application...")
if load_model():
# Create Gradio interface
demo = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
title="Thai Handwriting Recognition",
description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
examples=[["example1.jpg"], ["example2.jpg"]]
)
if __name__ == "__main__":
demo.launch()
else:
print("Failed to initialize the application") |