ayush5710 commited on
Commit
c166553
1 Parent(s): 7b97f3b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -0
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import shutil
4
+ import requests
5
+
6
+ import gradio as gr
7
+ from huggingface_hub import Repository, InferenceClient
8
+
9
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
+ API_URL = "https://api-inference.huggingface.co/models/WizardLM/WizardCoder-Python-34B-V1.0"
11
+ BOT_NAME = "Falcon"
12
+
13
+ STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
14
+
15
+ EXAMPLES = [
16
+ ["what are the benefits of programming in python?"],
17
+ ["explain binary search in java?"],
18
+ ]
19
+
20
+ client = InferenceClient(
21
+ API_URL,
22
+ headers={"Authorization": f"Bearer {HF_TOKEN}"},
23
+ )
24
+
25
+ def format_prompt(message, history, system_prompt):
26
+ prompt = ""
27
+ if system_prompt:
28
+ prompt += f"System: {system_prompt}\n"
29
+ for user_prompt, bot_response in history:
30
+ prompt += f"User: {user_prompt}\n"
31
+ prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
32
+ prompt += f"""User: {message}
33
+ Falcon:"""
34
+ return prompt
35
+
36
+ seed = 42
37
+
38
+ def generate(
39
+ prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
40
+ ):
41
+ temperature = float(temperature)
42
+ if temperature < 1e-2:
43
+ temperature = 1e-2
44
+ top_p = float(top_p)
45
+ global seed
46
+ generate_kwargs = dict(
47
+ temperature=temperature,
48
+ max_new_tokens=max_new_tokens,
49
+ top_p=top_p,
50
+ repetition_penalty=repetition_penalty,
51
+ stop_sequences=STOP_SEQUENCES,
52
+ do_sample=True,
53
+ seed=seed,
54
+ )
55
+ seed = seed + 1
56
+ formatted_prompt = format_prompt(prompt, history, system_prompt)
57
+
58
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
59
+ output = ""
60
+
61
+ for response in stream:
62
+ output += response.token.text
63
+
64
+ for stop_str in STOP_SEQUENCES:
65
+ if output.endswith(stop_str):
66
+ output = output[:-len(stop_str)]
67
+ output = output.rstrip()
68
+ yield output
69
+ yield output
70
+ return output
71
+
72
+
73
+ additional_inputs=[
74
+ gr.Textbox("", label="Optional system prompt"),
75
+ gr.Slider(
76
+ label="Temperature",
77
+ value=0.1,
78
+ minimum=0.0,
79
+ maximum=1.0,
80
+ step=0.05,
81
+ interactive=True,
82
+ info="Higher values produce more diverse outputs",
83
+ ),
84
+ gr.Slider(
85
+ label="Max new tokens",
86
+ value=256,
87
+ minimum=0,
88
+ maximum=8192,
89
+ step=64,
90
+ interactive=True,
91
+ info="The maximum numbers of new tokens",
92
+ ),
93
+ gr.Slider(
94
+ label="Top-p (nucleus sampling)",
95
+ value=0.90,
96
+ minimum=0.0,
97
+ maximum=1,
98
+ step=0.05,
99
+ interactive=True,
100
+ info="Higher values sample more low-probability tokens",
101
+ ),
102
+ gr.Slider(
103
+ label="Repetition penalty",
104
+ value=1.2,
105
+ minimum=1.0,
106
+ maximum=2.0,
107
+ step=0.05,
108
+ interactive=True,
109
+ info="Penalize repeated tokens",
110
+ )
111
+ ]
112
+
113
+
114
+ def vote(data: gr.LikeData):
115
+ if data.liked:
116
+ print("You upvoted this response: " + data.value)
117
+ else:
118
+ print("You downvoted this response: " + data.value)
119
+
120
+
121
+ chatbot = gr.Chatbot(avatar_images=('user.png', 'bot.png'),bubble_full_width = False)
122
+
123
+ chat_interface = gr.ChatInterface(
124
+ generate,
125
+ chatbot = chatbot,
126
+ examples=EXAMPLES,
127
+ additional_inputs=additional_inputs,
128
+ )
129
+
130
+
131
+ with gr.Blocks() as demo:
132
+ with gr.Row():
133
+ with gr.Column(scale=0.4):
134
+ gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
135
+ with gr.Column():
136
+ gr.Markdown(
137
+ """# Wizard Coder 34b Demo
138
+ ##
139
+ This app provides a way of using wizard coder via a demo
140
+
141
+ ⚠️ **Limitations**: the model can produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
142
+ """
143
+ )
144
+
145
+ chatbot.like(vote, None, None)
146
+ chat_interface.render()
147
+
148
+ demo.queue(concurrency_count=100, api_open=False).launch(show_api=False)