AbdulHadi806's picture
Update app.py
5626287 verified
raw
history blame
1.34 kB
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Llama-2-7b-finetuned-with-QLoRa"
@st.cache_resource
def load_model_and_tokenizer(model_name):
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
model, tokenizer = load_model_and_tokenizer(model_name)
# Function to generate response
def generate_response(topic):
input_text = f"Response about {topic}:"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# Generate text
output = model.generate(input_ids, max_length=500, num_return_sequences=1, no_repeat_ngram_size=2)
# Decode and return text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
return generated_text
# Streamlit app
def main():
st.title("Llama 2 Fine-Tuned Demo with QLoRa")
# Sidebar input for topic
topic = st.sidebar.text_input("Enter your topic", "a crazy person driving a car")
# Generate button
if st.sidebar.button("Generate Response"):
with st.spinner("Generating response..."):
response = generate_response(f"[INST] {topic} [/INST]" )
st.subheader(f"Generated response on '{topic}':")
st.write(response)
if __name__ == "__main__":
main()