File size: 2,275 Bytes
86b946a 3d9735d 86b946a 3d9735d 2e97054 3d9735d 2e97054 86b946a 9978831 3d9735d 019cdf0 86b946a 3d9735d 883b37e 3d9735d 31ed649 3d9735d 31ed649 883b37e 3d9735d 883b37e 3d9735d 883b37e 3d9735d 883b37e 3d9735d 9978831 3d9735d 883b37e e0d541d 883b37e 3d9735d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import streamlit as st
from gradio_client import Client
# Constants
APP_TITLE = "Llama2 70B Chatbot"
APP_DESCRIPTION = """
This application demonstrates the Llama-2-70b chatbot model by Meta,
fine-tuned for chat instructions. You can interact with the model and ask questions.
"""
# Initialize client
llama2_client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
with st.sidebar:
system_prompt_input = st.text_input("Optional system prompt:")
temperature_slider = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.9, step=0.05)
max_new_tokens_slider = st.slider("Max new tokens", min_value=0.0, max_value=4096.0, value=4096.0, step=64.0)
# Prediction function
def get_llama2_response(user_message, system_prompt, temperature, max_new_tokens, topp=0.6, repetition_penalty=1.2):
with st.status("Requesting Llama-2"):
st.write("Requesting API...")
response = llama2_client.predict(
user_message,
system_prompt,
temperature,
max_new_tokens,
topp,
repetition_penalty,
api_name="/chat"
)
st.write("Done")
return response
# Streamlit UI
st.title(APP_TITLE)
st.write(APP_DESCRIPTION)
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Display chat messages from history on app rerun
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# React to user input
if user_input := st.chat_input("Ask Llama-2-70B anything..."):
# Display user message in chat message container
st.chat_message("user", avatar="🧑💻").markdown(user_input)
# Add user message to chat history
st.session_state.chat_history.append({"role": "user", "content": user_input})
response = get_llama2_response(
user_input,
system_prompt_input,
temperature_slider,
max_new_tokens_slider
)
# Display assistant response in chat message container
with st.chat_message("assistant", avatar='🦙'):
st.markdown(response)
# Add assistant response to chat history
st.session_state.chat_history.append({"role": "assistant", "content": response})
|