import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer model_name = "AbdulHadi806/Llama-2-7b-finetuned-with-QLoRa" @st.cache_resource def load_model_and_tokenizer(model_name): model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) return model, tokenizer model, tokenizer = load_model_and_tokenizer(model_name) # Function to generate response def generate_response(topic): input_text = f"Response about {topic}:" input_ids = tokenizer.encode(input_text, return_tensors="pt") # Generate text output = model.generate(input_ids, max_length=500, num_return_sequences=1, no_repeat_ngram_size=2) # Decode and return text generated_text = tokenizer.decode(output[0], skip_special_tokens=True) return generated_text # Streamlit app def main(): st.title("Llama 2 Fine-Tuned Demo with QLoRa") # Sidebar input for topic topic = st.sidebar.text_input("Enter your topic", "a crazy person driving a car") # Generate button if st.sidebar.button("Generate Response"): with st.spinner("Generating response..."): response = generate_response(f"[INST] {topic} [/INST]" ) st.subheader(f"Generated response on '{topic}':") st.write(response) if __name__ == "__main__": main()