import spaces import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch MODEL_NAME = "osmankoc/llama-2-7b-zoa" # Model ve tokenizer'ı önceden yükle tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16, device_map="auto" ) # ZeroGPU için model GPU'ya sadece gerektiğinde yüklenecek @spaces.GPU def generate(prompt): system_prompt = ( "Generate HTML code using Tailwind CSS framework and Shadcn UI components. Add HTML tags to the code. Don't forget to use the correct classes. Don't write inline styles and descriptions. " "Here is the user prompt: " ) full_prompt = system_prompt + prompt inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda") output = model.generate(**inputs, max_length=2500) response = tokenizer.decode(output[0], skip_special_tokens=True) return response # Gradio UI (Basit bir API arayüzü gibi çalışacak) demo = gr.Interface( fn=generate, inputs=gr.Textbox(placeholder="Enter prompt..."), outputs=gr.Textbox(), ) # API endpoint ekle demo.add_api_route("/predict", generate) demo.launch()