|
import streamlit as st |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
model_name = "Llama-2-7b-finetuned-with-QLoRa" |
|
|
|
@st.cache_resource |
|
def load_model_and_tokenizer(model_name): |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
return model, tokenizer |
|
|
|
model, tokenizer = load_model_and_tokenizer(model_name) |
|
|
|
|
|
def generate_response(topic): |
|
input_text = f"Response about {topic}:" |
|
input_ids = tokenizer.encode(input_text, return_tensors="pt") |
|
|
|
|
|
output = model.generate(input_ids, max_length=500, num_return_sequences=1, no_repeat_ngram_size=2) |
|
|
|
|
|
generated_text = tokenizer.decode(output[0], skip_special_tokens=True) |
|
return generated_text |
|
|
|
|
|
def main(): |
|
st.title("Llama 2 Fine-Tuned Demo with QLoRa") |
|
|
|
|
|
topic = st.sidebar.text_input("Enter your topic", "a crazy person driving a car") |
|
|
|
|
|
if st.sidebar.button("Generate Response"): |
|
with st.spinner("Generating response..."): |
|
response = generate_response(f"[INST] {topic} [/INST]" ) |
|
st.subheader(f"Generated response on '{topic}':") |
|
st.write(response) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|