import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import requests import gradio as gr import spaces # Load model and tokenizer model_name = 'scb10x/llama-3-typhoon-v1.5-8b-instruct-vision-preview' @spaces.GPU(duration=120) # ใช้ GPU เป็นเวลา 120 วินาที def load_model(): model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map='auto', trust_remote_code=True ) return model model = load_model() tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) def prepare_inputs(text, image): messages = [ {"role": "system", "content": "You are a helpful vision-capable assistant who eagerly converses with the user in their language."}, {"role": "user", "content": f"\n{text}"} ] inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device) return inputs @spaces.GPU(duration=60) # ใช้ GPU เป็นเวลา 60 วินาที def predict(prompt, img_url): try: image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB') image = image.resize((model.config.image_size, model.config.image_size)) image_tensor = model.preprocess_images([image]).to(model.device) inputs = prepare_inputs(prompt, image) output_ids = model.generate( inputs, images=image_tensor, max_new_tokens=100, do_sample=True, temperature=0.2, top_p=0.2, repetition_penalty=1.0 )[0] result = tokenizer.decode(output_ids[inputs.shape[1]:], skip_special_tokens=True).strip() return result except Exception as e: return str(e) # Gradio Interface inputs = [ gr.Textbox(label="Prompt", placeholder="Ask about the food in the image"), gr.Textbox(label="Image URL", placeholder="Enter an image URL") ] outputs = gr.Textbox(label="Generated Output") gr.Interface( fn=predict, inputs=inputs, outputs=outputs, title="Food Image AI Assistant", description="This model can analyze food images and answer questions about them." ).launch()