Mahadih534 commited on
Commit
0d2deb3
·
verified ·
1 Parent(s): cf5bb80

app code modified

Browse files
Files changed (1) hide show
  1. app.py +24 -33
app.py CHANGED
@@ -1,8 +1,6 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1")
5
-
6
  def format_prompt(message, history):
7
  prompt = "<s>"
8
  for user_prompt, bot_response in history:
@@ -11,46 +9,38 @@ def format_prompt(message, history):
11
  prompt += f"[INST] {message} [/INST]"
12
  return prompt
13
 
14
- def kwargs_get(Temperature, tokens, top_k, top_p, r_p):
15
- generate_kwargs = dict(
 
 
 
 
 
 
 
16
  temperature=Temperature,
17
  max_new_tokens=tokens,
18
  top_p=top_p,
19
  repetition_penalty=r_p,
20
  do_sample=True,
21
- top_k=top_k,
22
  seed=42,
23
  )
24
- return generate_kwargs
25
-
26
-
27
- def inference(message, history, Temperature, tokens, top_k, top_p, r_p, model):
28
-
29
  prompt = format_prompt(message, history)
30
  client = InferenceClient(model=model)
31
- kwargs = kwargs_get(Temperature, tokens, top_k, top_p, r_p)
32
  partial_message = ""
33
  for response in client.text_generation(prompt,**kwargs, stream=True, details=True, return_full_text=False):
34
  partial_message += response.token.text
35
  yield partial_message
36
 
37
- with gr.Blocks() as UI:
38
- with gr.Column():
39
- gr.Markdown("Model Selection & Configuration")
40
 
41
- models=gr.Dropdown(value="mistralai/Mixtral-8x7B-Instruct-v0.1",
42
- choices =["mistralai/Mixtral-8x7B-Instruct-v0.1","codellama/CodeLlama-7b-hf",
43
- "bigcode/starcoder","bigcode/santacoder","codellama/CodeLlama-70b-Instruct-hf",
44
- "google/flan-t5-xxl","facebook/opt-66b","tiiuae/falcon-40b", "bigscience/bloom",
45
- "EleutherAI/gpt-neox-20b"], label="Available models",
46
- info="default model is Mixtral-8x7B-Instruct-v0.1",interactive=True,)
47
 
48
 
49
- with gr.Column():
50
- gr.ChatInterface(
51
  inference,
52
- description="This is the demo for Gradio UI consuming TGI endpoint with LLaMA 7B-Chat model.",
53
- title="Gradio 🤝 TGI",
 
54
  additional_inputs_accordion="Additional Configuration to get better response",
55
  retry_btn=None,
56
  undo_btn=None,
@@ -58,14 +48,15 @@ with gr.Blocks() as UI:
58
  theme="soft",
59
  submit_btn="Send",
60
  additional_inputs=[
61
- gr.Slider(value=0.1, maximum=0.99,label="Temperature"),
62
- gr.Slider(value=352, maximum=1020,label="Max New Tokens"),
63
- gr.Slider(value=980, maximum=1000,label="Top K"),
64
- gr.Slider(value=0.90, maximum=0.99,label="Top P"),
65
- gr.Slider(value=0.99, maximum=1.0,label="Repetition Penalty"),
66
- models
 
 
67
  ],
68
- examples=[["Hello", "Am I cool?", "Are tomatoes vegetables?"]],
69
  )
70
-
71
- UI.queue().launch(debug=True)
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
4
  def format_prompt(message, history):
5
  prompt = "<s>"
6
  for user_prompt, bot_response in history:
 
9
  prompt += f"[INST] {message} [/INST]"
10
  return prompt
11
 
12
+
13
+ def inference(message, history, model="mistralai/Mixtral-8x7B-Instruct-v0.1", Temperature=0.3, tokens=512,top_p=0.95, r_p=0.93):
14
+
15
+ Temperature = float(Temperature)
16
+ if Temperature < 1e-2:
17
+ Temperature = 1e-2
18
+ top_p = float(top_p)
19
+
20
+ kwargs = dict(
21
  temperature=Temperature,
22
  max_new_tokens=tokens,
23
  top_p=top_p,
24
  repetition_penalty=r_p,
25
  do_sample=True,
 
26
  seed=42,
27
  )
 
 
 
 
 
28
  prompt = format_prompt(message, history)
29
  client = InferenceClient(model=model)
 
30
  partial_message = ""
31
  for response in client.text_generation(prompt,**kwargs, stream=True, details=True, return_full_text=False):
32
  partial_message += response.token.text
33
  yield partial_message
34
 
 
 
 
35
 
36
+ chatbot = gr.Chatbot(bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
 
 
 
 
 
37
 
38
 
39
+ UI= gr.ChatInterface(
 
40
  inference,
41
+ chatbot=chatbot,
42
+ description="The Rapid TGI (Text Generation Inference) has developed by learning purpose",
43
+ title="Rapid TGI",
44
  additional_inputs_accordion="Additional Configuration to get better response",
45
  retry_btn=None,
46
  undo_btn=None,
 
48
  theme="soft",
49
  submit_btn="Send",
50
  additional_inputs=[
51
+ gr.Dropdown(value="mistralai/Mixtral-8x7B-Instruct-v0.1",
52
+ choices =["mistralai/Mixtral-8x7B-Instruct-v0.1","HuggingFaceH4/zephyr-7b-beta",
53
+ "mistralai/Mistral-7B-Instruct-v0.1"], label="Available models",
54
+ info="default model is Mixtral-8x7B-Instruct-v0.1",interactive=True,),
55
+ gr.Slider(value=0.3, maximum=1.0,label="Temperature"),
56
+ gr.Slider(value=512, maximum=1020,label="Max New Tokens"),
57
+ gr.Slider(value=0.95, maximum=1.0,label="Top P"),
58
+ gr.Slider(value=0.93, maximum=1.0,label="Repetition Penalty"),
59
  ],
60
+ examples=[["Hello"], ["Hello"]],
61
  )
62
+ UI.queue().launch(debug=True)