Spaces:
Runtime error
Runtime error
File size: 1,803 Bytes
7a44780 f66fd4e 7a44780 615e457 7a44780 f66fd4e 7a44780 ded1cee eef4ff9 7a44780 3afacd8 7a44780 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# import gradio as gr
# demo = gr.load("models/NSTiwari/fine_tuned_science_gemma2b-it")
# demo.launch()
import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import time
# Replace with your fine-tuned model ID from Hugging Face Hub
model_id = "NSTiwari/fine_tuned_science_gemma2b-it"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
def inference(input_text):
"""
Performs inference on the science question and returns answer and latency.
"""
start_time = time.time()
input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
input_length = input_ids["input_ids"].shape[1]
outputs = model.generate(
input_ids=input_ids["input_ids"],
max_length=512, # Adjust max_length as needed
do_sample=False
)
generated_sequence = outputs[:, input_length:].tolist()
response = tokenizer.decode(generated_sequence[0])
end_time = time.time()
return {"answer": response, "latency": f"{end_time - start_time:.2f} seconds"}
def gradio_interface(question):
"""
Gradio interface function that calls inference and returns answer/latency.
"""
result = inference(question)
return result["answer"], result["latency"]
# Gradio interface definition
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(label="Science Question", lines=4),
outputs=[gr.Textbox(label="Answer"), gr.Textbox(label="Latency")],
title="SciGemma",
description="Ask a science question and get an answer from the fine-tuned Gemma 2b-it model.",
examples=[
["What does air consist of?"],
["What is an atom?"],
]
)
if __name__ == "__main__":
iface.launch() |