File size: 2,275 Bytes
86b946a
 
 
 
3d9735d
 
 
 
86b946a
 
 
3d9735d
2e97054
 
3d9735d
 
 
2e97054
86b946a
9978831
3d9735d
 
 
 
 
 
 
 
 
 
019cdf0
 
 
86b946a
 
3d9735d
 
883b37e
3d9735d
 
31ed649
 
3d9735d
31ed649
 
 
883b37e
3d9735d
883b37e
3d9735d
883b37e
3d9735d
883b37e
3d9735d
 
 
 
9978831
3d9735d
883b37e
 
e0d541d
883b37e
3d9735d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
from gradio_client import Client

# Constants
APP_TITLE = "Llama2 70B Chatbot"
APP_DESCRIPTION = """
This application demonstrates the Llama-2-70b chatbot model by Meta, 
fine-tuned for chat instructions. You can interact with the model and ask questions.
"""

# Initialize client
llama2_client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")

with st.sidebar:
    system_prompt_input = st.text_input("Optional system prompt:")
    temperature_slider = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.9, step=0.05)
    max_new_tokens_slider = st.slider("Max new tokens", min_value=0.0, max_value=4096.0, value=4096.0, step=64.0)

# Prediction function
def get_llama2_response(user_message, system_prompt, temperature, max_new_tokens, topp=0.6, repetition_penalty=1.2):
    with st.status("Requesting Llama-2"):
        st.write("Requesting API...")
        response = llama2_client.predict(
            user_message,
            system_prompt,
            temperature,
            max_new_tokens,
            topp,
            repetition_penalty,
            api_name="/chat"
        )
        st.write("Done")
        return response

# Streamlit UI
st.title(APP_TITLE)
st.write(APP_DESCRIPTION)

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# Display chat messages from history on app rerun
for message in st.session_state.chat_history:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])
        
# React to user input
if user_input := st.chat_input("Ask Llama-2-70B anything..."):
    # Display user message in chat message container
    st.chat_message("user", avatar="🧑‍💻").markdown(user_input)
    # Add user message to chat history
    st.session_state.chat_history.append({"role": "user", "content": user_input})

    response = get_llama2_response(
        user_input,
        system_prompt_input,
        temperature_slider,
        max_new_tokens_slider
    )
    # Display assistant response in chat message container
    with st.chat_message("assistant", avatar='🦙'):
        st.markdown(response)
    # Add assistant response to chat history
    st.session_state.chat_history.append({"role": "assistant", "content": response})