import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch from peft import PeftModel # Model and tokenizer names model_name = "google/gemma-2-2b-it" lora_model_name = "Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini" # Configure 4-bit quantization bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, ) # Initialize tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) # Load the base model with 4-bit quantization model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", quantization_config=bnb_config ) # Load the LoRA adapter model = PeftModel.from_pretrained(model, lora_model_name) def generate_response(input_text): inputs = tokenizer(input_text, return_tensors="pt").to(model.device) generation_config = { "max_length": 512, "temperature": 0.01, "do_sample": True, "pad_token_id": tokenizer.pad_token_id, "eos_token_id": tokenizer.eos_token_id, } with torch.no_grad(): outputs = model.generate(**inputs, **generation_config) response = tokenizer.decode(outputs[0]) return response.split("model\n")[1].split("")[0] # Create Gradio interface iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."), outputs=gr.Textbox(lines=5, label="Model Çıktısı"), title="Anlam-Lab" ) if __name__ == "__main__": iface.launch()