Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
import torch | |
from peft import PeftModel | |
# Model and tokenizer names | |
model_name = "google/gemma-2-2b-it" | |
lora_model_name = "Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini" | |
# Configure 4-bit quantization | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.float16, | |
bnb_4bit_use_double_quant=True, | |
) | |
# Initialize tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Load the base model with 4-bit quantization | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="auto", | |
quantization_config=bnb_config | |
) | |
# Load the LoRA adapter | |
model = PeftModel.from_pretrained(model, lora_model_name) | |
def generate_response(input_text): | |
inputs = tokenizer(input_text, return_tensors="pt").to(model.device) | |
generation_config = { | |
"max_length": 512, | |
"temperature": 0.01, | |
"do_sample": True, | |
"pad_token_id": tokenizer.pad_token_id, | |
"eos_token_id": tokenizer.eos_token_id, | |
} | |
with torch.no_grad(): | |
outputs = model.generate(**inputs, **generation_config) | |
response = tokenizer.decode(outputs[0]) | |
return response.split("<start_of_turn>model\n")[1].split("<end_of_turn>")[0] | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."), | |
outputs=gr.Textbox(lines=5, label="Model Çıktısı"), | |
title="Anlam-Lab" | |
) | |
if __name__ == "__main__": | |
iface.launch() |