Spaces:

Mahadih534
/

Rapid_TGI

Runtime error

App Files Files Community

Mahadih534 commited on Feb 1, 2024

Commit

0d2deb3

verified ·

1 Parent(s): cf5bb80

app code modified

Browse files

Files changed (1) hide show

app.py +24 -33

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1")
 def format_prompt(message, history):
   prompt = "<s>"
   for user_prompt, bot_response in history:
@@ -11,46 +9,38 @@ def format_prompt(message, history):
   prompt += f"[INST] {message} [/INST]"
   return prompt
-def kwargs_get(Temperature, tokens, top_k, top_p, r_p):
- generate_kwargs = dict(
         temperature=Temperature,
         max_new_tokens=tokens,
         top_p=top_p,
         repetition_penalty=r_p,
         do_sample=True,
-        top_k=top_k,
         seed=42,
     )
- return generate_kwargs
-def inference(message, history, Temperature, tokens, top_k, top_p, r_p, model):
     prompt = format_prompt(message, history)
     client = InferenceClient(model=model)
-    kwargs = kwargs_get(Temperature, tokens, top_k, top_p, r_p)
     partial_message = ""
     for response in client.text_generation(prompt,**kwargs, stream=True, details=True, return_full_text=False):
         partial_message += response.token.text
         yield partial_message
-with gr.Blocks() as UI:
-  with gr.Column():
-    gr.Markdown("Model Selection & Configuration")
-    models=gr.Dropdown(value="mistralai/Mixtral-8x7B-Instruct-v0.1",
-                       choices =["mistralai/Mixtral-8x7B-Instruct-v0.1","codellama/CodeLlama-7b-hf",
-                        "bigcode/starcoder","bigcode/santacoder","codellama/CodeLlama-70b-Instruct-hf",
-                        "google/flan-t5-xxl","facebook/opt-66b","tiiuae/falcon-40b", "bigscience/bloom",
-                        "EleutherAI/gpt-neox-20b"], label="Available models",
-                        info="default model is Mixtral-8x7B-Instruct-v0.1",interactive=True,)
-  with gr.Column():
-    gr.ChatInterface(
         inference,
-        description="This is the demo for Gradio UI consuming TGI endpoint with LLaMA 7B-Chat model.",
-        title="Gradio 🤝 TGI",
         additional_inputs_accordion="Additional Configuration to get better response",
         retry_btn=None,
         undo_btn=None,
@@ -58,14 +48,15 @@ with gr.Blocks() as UI:
         theme="soft",
         submit_btn="Send",
         additional_inputs=[
-                                gr.Slider(value=0.1, maximum=0.99,label="Temperature"),
-                                gr.Slider(value=352, maximum=1020,label="Max New Tokens"),
-                                gr.Slider(value=980, maximum=1000,label="Top K"),
-                                gr.Slider(value=0.90, maximum=0.99,label="Top P"),
-                                gr.Slider(value=0.99, maximum=1.0,label="Repetition Penalty"),
-                                models
                             ],
-        examples=[["Hello", "Am I cool?", "Are tomatoes vegetables?"]],
     )
-UI.queue().launch(debug=True)

 import gradio as gr
 from huggingface_hub import InferenceClient
 def format_prompt(message, history):
   prompt = "<s>"
   for user_prompt, bot_response in history:
   prompt += f"[INST] {message} [/INST]"
   return prompt
+def inference(message, history, model="mistralai/Mixtral-8x7B-Instruct-v0.1", Temperature=0.3, tokens=512,top_p=0.95, r_p=0.93):
+    Temperature = float(Temperature)
+    if Temperature < 1e-2:
+        Temperature = 1e-2
+    top_p = float(top_p)
+    kwargs = dict(
         temperature=Temperature,
         max_new_tokens=tokens,
         top_p=top_p,
         repetition_penalty=r_p,
         do_sample=True,
         seed=42,
     )
     prompt = format_prompt(message, history)
     client = InferenceClient(model=model)
     partial_message = ""
     for response in client.text_generation(prompt,**kwargs, stream=True, details=True, return_full_text=False):
         partial_message += response.token.text
         yield partial_message
+chatbot = gr.Chatbot(bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
+UI=  gr.ChatInterface(
         inference,
+        chatbot=chatbot,
+        description="The Rapid TGI (Text Generation Inference) has developed by learning purpose",
+        title="Rapid TGI",
         additional_inputs_accordion="Additional Configuration to get better response",
         retry_btn=None,
         undo_btn=None,
         theme="soft",
         submit_btn="Send",
         additional_inputs=[
+                                gr.Dropdown(value="mistralai/Mixtral-8x7B-Instruct-v0.1",
+                                  choices =["mistralai/Mixtral-8x7B-Instruct-v0.1","HuggingFaceH4/zephyr-7b-beta",
+                                    "mistralai/Mistral-7B-Instruct-v0.1"], label="Available models",
+                                    info="default model is Mixtral-8x7B-Instruct-v0.1",interactive=True,),
+                                gr.Slider(value=0.3, maximum=1.0,label="Temperature"),
+                                gr.Slider(value=512, maximum=1020,label="Max New Tokens"),
+                                gr.Slider(value=0.95, maximum=1.0,label="Top P"),
+                                gr.Slider(value=0.93, maximum=1.0,label="Repetition Penalty"),
                             ],
+        examples=[["Hello"], ["Hello"]],
     )
+UI.queue().launch(debug=True)