Spaces:

islam23
/

llama3-8b-RAG_News_Finance

Sleeping

App Files Files Community

Islam YAHIAOUI commited on May 18, 2024

Commit

96f677c

1 Parent(s): 31e6eb8

Update UI

Browse files

Files changed (2) hide show

app.py +166 -40
example.py +0 -102

app.py CHANGED Viewed

@@ -9,16 +9,33 @@ from rag import run_rag
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def chat(
     message,
-    history: list[tuple[str, str]],
-    system_message,
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     for val in history:
@@ -26,14 +43,17 @@ def chat(
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
-    message =run_rag(message, history)
-    messages.append({"role": "user", "content": message})
     response = ""
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
@@ -41,18 +61,13 @@ def chat(
     ):
         token = message.choices[0].delta.content
         response += str(token)
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.Chatbot(
-    label="Retrieval Augmented Generation News & Finance",
-    # avatar_images=[None, BOT_AVATAR],
-    show_copy_button=True,
-    likeable=True,
-    layout="bubble")
 theme = gr.themes.Base(
     font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
 )
@@ -65,7 +80,7 @@ EXAMPLES = [
 max_new_tokens = gr.Slider(
     minimum=1,
     maximum=2048,
-    value=512,
     step=1,
     interactive=True,
     label="Max new tokens",
@@ -90,28 +105,139 @@ top_p = gr.Slider(
     label="Top-p (nucleus sampling)",
     info="Higher values is equivalent to sampling more low-probability tokens.",
 )
-with gr.Blocks(
-    fill_height=True,
-    css=""".gradio-container .avatar-container {height: 40px width: 40px !important;} #duplicate-button {margin: auto; color: white; background: #f1a139; border-radius: 100vh; margin-top: 2px; margin-bottom: 2px;}""",
-) as main:
-    gr.ChatInterface(
         chat,
-        chatbot=chatbot,
-        title="Retrieval Augmented Generation (RAG) Chatbot",
-        description="A chatbot that uses a RAG model to generate responses based on the input query.",
-        examples=EXAMPLES,
-        theme=theme,
-        fill_height=True,
-        multimodal=True,
-        additional_inputs=[
-            max_new_tokens,
-            temperature,
-            top_p,
-        ],
     )
-with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="RAG") as demo:
-    gr.TabbedInterface([main] , tab_names=["Chatbot"]  )
 demo.launch()

 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+TOKEN = os.getenv("HF_TOKEN")
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta" , token=TOKEN)
+system_message ="You are a capable and freindly assistant."
+history = []
+no_change_btn = gr.Button()
+enable_btn = gr.Button(interactive=True)
+disable_btn = gr.Button(interactive=False)
+#  ================================================================================================================================
+#  ================================================================================================================================
 def chat(
+    state,
     message,
+    # history: list[tuple[str, str]],
     max_tokens,
     temperature,
     top_p,
 ):
+    print("Message: ", message)
+    print("History: ", history)
+    print("System Message: ", system_message)
+    print("Max Tokens: ", max_tokens)
+    print("Temperature: ", temperature)
+    print("Top P: ", top_p)
     messages = [{"role": "system", "content": system_message}]
     for val in history:
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
+    # message =run_rag(message, history)
+    messages.append({"role": "user", "content": run_rag(message)})
     response = ""
+    if state is None:
+        state = gr.State()
+        state.messages = [[("assistant", "")]]
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
     ):
         token = message.choices[0].delta.content
         response += str(token)
+        state.messages[-1][-1] = str(token)
+        yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
+    yield (state, state.to_gradio_chatbot(), "", None) + (enable_btn,) * 5
+#  ================================================================================================================================
 theme = gr.themes.Base(
     font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
 )
 max_new_tokens = gr.Slider(
     minimum=1,
     maximum=2048,
+    value=1024,
     step=1,
     interactive=True,
     label="Max new tokens",
     label="Top-p (nucleus sampling)",
     info="Higher values is equivalent to sampling more low-probability tokens.",
 )
+textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
+#  ================================================================================================================================
+# with gr.Blocks(
+#     fill_height=True,
+#     css=""".gradio-container .avatar-container {height: 40px width: 40px !important;} #duplicate-button {margin: auto; color: white; background: #f1a139; border-radius: 100vh; margin-top: 2px; margin-bottom: 2px;}""",
+# ) as main:
+#     gr.ChatInterface(
+#         chat,
+#         chatbot=chatbot,
+#         title="Retrieval Augmented Generation (RAG) Chatbot",
+#         examples=EXAMPLES,
+#         theme=theme,
+#         fill_height=True,
+#         additional_inputs=[
+#             max_new_tokens,
+#             temperature,
+#             top_p,
+#         ],
+#     )
+# with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="RAG") as demo:
+#     gr.TabbedInterface([main ] , tab_names=["Chatbot"]  )
+# demo.launch()
+def upvote_last_response(state):
+    return ("",) + (disable_btn,) * 3
+def downvote_last_response(state):
+    return ("",) + (disable_btn,) * 3
+def flag_last_response(state):
+    return ("",) + (disable_btn,) * 3
+def add_text(state ,textbox ):
+    print("textbox: ", textbox)
+    if state is None:
+        state = gr.State()
+        state.messages = [[("assistant", "")]]
+    state.text = textbox
+    history=""
+    state.append_message(state.roles[0], textbox)#
+    state.append_message(state.roles[1], "")
+    yield (state, None,  history) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
+block_css = """
+#buttons button {
+    min-width: min(120px,100%);
+}
+"""
+#  ================================================================================================================================
+with gr.Blocks(title="CuMo", theme=theme, css=block_css) as demo:
+    state = gr.State()
+    gr.Markdown("Retrieval Augmented Generation (RAG) Chatbot" )
+    with gr.Row():
+        with gr.Column(scale=8):
+            chatbot = gr.Chatbot(
+                elem_id="chatbot",
+                label="Retrieval Augmented Generation (RAG) Chatbot",
+                height=400,
+                layout="bubble",
+            )
+            with gr.Row():
+                with gr.Column(scale=8):
+                    textbox.render()
+                with gr.Column(scale=1, min_width=100):
+                    submit_btn = gr.Button(value="Submit", variant="primary" )
+            with gr.Row(elem_id="buttons") as button_row:
+                upvote_btn = gr.Button(value="👍  Upvote", interactive=False)
+                downvote_btn = gr.Button(value="👎  Downvote", interactive=False)
+                flag_btn = gr.Button(value="⚠️  Flag", interactive=False)
+                #stop_btn = gr.Button(value="⏹️  Stop Generation", interactive=False)
+                regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
+                clear_btn = gr.Button(value="🗑️  Clear", interactive=False)
+            with gr.Column(scale=3):
+                gr.Examples(examples=[
+                [f"Tell me about the latest news in the world ?"],
+                [f"Tell me about the increase in the price of Bitcoin ?"],
+                [f"Tell me about the actual situation in Ukraine ?"],
+                [f"Tell me about current situation in palestinian ?"],
+            ],inputs=[textbox], label="Examples")
+            with gr.Accordion("Parameters", open=False) as parameter_row:
+                    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
+                    top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
+                    max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
+#  ================================================================================================================================
+    btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
+    upvote_btn.click(
+        upvote_last_response,
+        [state],
+        [textbox, upvote_btn, downvote_btn, flag_btn]
+    )
+    downvote_btn.click(
+        downvote_last_response,
+        [state],
+        [textbox, upvote_btn, downvote_btn, flag_btn]
+    )
+    flag_btn.click(
+        flag_last_response,
+        [state],
+        [textbox, upvote_btn, downvote_btn, flag_btn]
+    )
+    textbox.submit(
+            add_text,
+            [state,  textbox],
+            [state, chatbot, textbox] + btn_list,
+        ).then(
+            chat,
+            [state,  textbox,max_output_tokens,  temperature, top_p],
+            [state, chatbot, textbox] + btn_list,
+        )
+    submit_btn.click(
+        add_text,
+        [state , textbox],
+        [state,chatbot,  textbox] + btn_list,
+    ).then(
         chat,
+        [state,  textbox, max_output_tokens , temperature, top_p ],
+        [state,chatbot,  textbox] + btn_list,
     )
+#  ================================================================================================================================
 demo.launch()
+#  ================================================================================================================================

example.py DELETED Viewed

@@ -1,102 +0,0 @@
-import gradio as gr
-import torch
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    TextIteratorStreamer,
-    BitsAndBytesConfig,
-)
-import os
-from threading import Thread
-import spaces
-import time
-token = os.environ["HF_TOKEN"]
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
-)
-model = AutoModelForCausalLM.from_pretrained(
-    "NousResearch/Hermes-2-Pro-Llama-3-8B", quantization_config=quantization_config, token=token
-)
-tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B", token=token)
-terminators = [
-    tok.eos_token_id,
-    tok.convert_tokens_to_ids("<|eot_id|>")
-]
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
-else:
-    device = torch.device("cpu")
-    print("Using CPU")
-# model = model.to(device)
-# Dispatch Errors
-@spaces.GPU(duration=150)
-def chat(message, history, temperature,do_sample, max_tokens):
-    chat = []
-    for item in history:
-        chat.append({"role": "user", "content": item[0]})
-        if item[1] is not None:
-            chat.append({"role": "assistant", "content": item[1]})
-    chat.append({"role": "user", "content": message})
-    messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-    model_inputs = tok([messages], return_tensors="pt").to(device)
-    streamer = TextIteratorStreamer(
-        tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True
-    )
-    generate_kwargs = dict(
-        model_inputs,
-        streamer=streamer,
-        max_new_tokens=max_tokens,
-        do_sample=True,
-        temperature=temperature,
-        eos_token_id=terminators,
-    )
-    if temperature == 0:
-        generate_kwargs['do_sample'] = False
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    partial_text = ""
-    for new_text in streamer:
-        partial_text += new_text
-        yield partial_text
-    tokens = len(tok.tokenize(partial_text))
-    yield partial_text
-demo = gr.ChatInterface(
-    fn=chat,
-    examples=[["Write me a poem about Machine Learning."]],
-    # multimodal=False,
-    additional_inputs_accordion=gr.Accordion(
-        label="⚙️ Parameters", open=False, render=False
-    ),
-    additional_inputs=[
-        gr.Slider(
-            minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
-        ),
-        gr.Checkbox(label="Sampling",value=True),
-        gr.Slider(
-            minimum=128,
-            maximum=4096,
-            step=1,
-            value=512,
-            label="Max new tokens",
-            render=False,
-        ),
-    ],
-    stop_btn="Stop Generation",
-    title="Chat With LLMs",
-    description="Now Running [NousResearch/Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) in 4bit"
-)
-demo.launch()