loubnabnl HF staff commited on
Commit
94cc48e
·
1 Parent(s): 321451b

Delete app_threading.py

Browse files
Files changed (1) hide show
  1. app_threading.py +0 -205
app_threading.py DELETED
@@ -1,205 +0,0 @@
1
- import json
2
- import os
3
- import pandas as pd
4
- import requests
5
- from multiprocessing import Pool
6
- from functools import partial
7
- import streamlit as st
8
- from datasets import load_dataset, load_metric
9
-
10
- MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
11
- GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
12
-
13
-
14
- @st.cache()
15
- def load_examples():
16
- with open("utils/examples.json", "r") as f:
17
- examples = json.load(f)
18
- return examples
19
-
20
-
21
- def load_evaluation():
22
- # load task 2 of HumanEval and code_eval_metric
23
- os.environ["HF_ALLOW_CODE_EVAL"] = "1"
24
- human_eval = load_dataset("openai_humaneval")
25
- entry_point = f"check({human_eval['test'][2]['entry_point']})"
26
- test_func = "\n" + human_eval["test"][2]["test"] + "\n" + entry_point
27
- code_eval = load_metric("code_eval")
28
- return code_eval, test_func
29
-
30
-
31
- def read_markdown(path):
32
- with open(path, "r") as f:
33
- output = f.read()
34
- st.markdown(output, unsafe_allow_html=True)
35
-
36
- def generate_code(model_name, gen_prompt, max_new_tokens, temperature, seed):
37
- url = (
38
- f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
39
- )
40
- r = requests.post(
41
- url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
42
- )
43
- generated_text = r.json()["data"][0]
44
- return generated_text
45
-
46
- def generate_code_threads(
47
- generations, models, gen_prompt, max_new_tokens, temperature, seed
48
- ):
49
- threads = []
50
- for model_name in models:
51
- # create the thread
52
- threads.append(
53
- threading.Thread(
54
- target=generate_code,
55
- args=(
56
- generations,
57
- model_name,
58
- gen_prompt,
59
- max_new_tokens,
60
- temperature,
61
- seed,
62
- ),
63
- )
64
- )
65
- threads[-1].start()
66
-
67
- for t in threads:
68
- t.join()
69
-
70
- @st.cache(show_spinner=False)
71
- def generate_teaser(gen_prompt):
72
- generations = []
73
- generate_code(generations, "CodeGen", gen_prompt, 10, 0.2, 42)
74
- return generations[0]
75
-
76
- st.set_page_config(page_icon=":laptop:", layout="wide")
77
- with open("utils/table_contents.md", "r") as f:
78
- contents = f.read()
79
-
80
- st.sidebar.markdown(contents)
81
-
82
- # Introduction
83
- st.title("Code generation with 🤗")
84
- read_markdown("utils/summary.md")
85
- ## teaser
86
- example_text = "def print_hello_world():"
87
- col1, col2, col3 = st.columns([1, 2, 1])
88
- with col2:
89
- gen_prompt = st.text_area(
90
- "",
91
- value=example_text,
92
- height=100,
93
- ).strip()
94
- if st.button("Generate code!", key=1):
95
- with st.spinner("Generating code..."):
96
- st.code(generate_teaser(gen_prompt))
97
- read_markdown("utils/intro.md")
98
-
99
- # Code datasets
100
- st.subheader("1 - Code datasets")
101
- read_markdown("datasets/intro.md")
102
- read_markdown("datasets/github_code.md")
103
- col1, col2 = st.columns([1, 2])
104
- with col1:
105
- selected_model = st.selectbox("", MODELS, key=1)
106
- read_markdown(f"datasets/{selected_model.lower()}.md")
107
-
108
-
109
- # Model architecture
110
- st.subheader("2 - Model architecture")
111
- read_markdown("architectures/intro.md")
112
- col1, col2 = st.columns([1, 2])
113
- with col1:
114
- selected_model = st.selectbox("", MODELS, key=2)
115
- read_markdown(f"architectures/{selected_model.lower()}.md")
116
-
117
- # Model evaluation
118
- st.subheader("3 - Code model evaluation")
119
- read_markdown("evaluation/intro.md")
120
- read_markdown("evaluation/demo_humaneval.md")
121
- ## quiz
122
- st.markdown("Below you can try solving this problem or visualize the solution of CodeParrot:")
123
- with open("evaluation/problem.md", "r") as f:
124
- problem = f.read()
125
- with open("evaluation/solution.md", "r") as f:
126
- solution = f.read()
127
-
128
- candidate_solution = st.text_area(
129
- "Complete the problem:",
130
- value=problem,
131
- height=240,
132
- ).strip()
133
- if st.button("Test my solution", key=2):
134
- with st.spinner("Testing..."):
135
- code_eval, test_func = load_evaluation()
136
- test_cases = [test_func]
137
- candidates = [[candidate_solution]]
138
- pass_at_k, _ = code_eval.compute(references=test_cases, predictions=candidates)
139
- text = "Your solution didn't pass the test, pass@1 is 0 😕" if pass_at_k['pass@1'] < 1 else "Congrats your pass@1 is 1! 🎉"
140
- st.markdown(text)
141
- if st.button("Show model solution", key=3):
142
- st.markdown(solution)
143
-
144
- # Code generation
145
- st.subheader("4 - Code generation ✨")
146
- read_markdown("generation/intro.md")
147
- col1, col2, col3 = st.columns([7, 1, 6])
148
- with col1:
149
- st.markdown("**Models**")
150
- selected_models = st.multiselect(
151
- "Select code generation models to compare:",
152
- GENERATION_MODELS,
153
- default=GENERATION_MODELS,
154
- key=3,
155
- )
156
- st.markdown(" ")
157
- st.markdown("**Examples**")
158
- examples = load_examples()
159
- example_names = [example["name"] for example in examples]
160
- name2id = dict([(name, i) for i, name in enumerate(example_names)])
161
- selected_example = st.selectbox(
162
- "Select one of the following examples or implement yours:", example_names
163
- )
164
- example_text = examples[name2id[selected_example]]["value"]
165
- default_length = examples[name2id[selected_example]]["length"]
166
- with col3:
167
- st.markdown("**Generation settings**")
168
- temperature = st.slider(
169
- "Temperature:", value=0.2, min_value=0.0, step=0.1, max_value=2.0
170
- )
171
- max_new_tokens = st.slider(
172
- "Number of tokens to generate:",
173
- value=default_length,
174
- min_value=8,
175
- step=4,
176
- max_value=256,
177
- )
178
- seed = st.slider("Random seed:", value=42, min_value=0, step=1, max_value=1000)
179
- gen_prompt = st.text_area(
180
- "Generate code with prompt:",
181
- value=example_text,
182
- height=200,
183
- ).strip()
184
- if st.button("Generate code!"):
185
- with st.spinner("Generating code..."):
186
- # Create a multiprocessing Pool
187
- pool = Pool()
188
- generate_parallel = partial(
189
- generate_code,
190
-
191
- gen_prompt=gen_prompt,
192
- max_new_tokens=max_new_tokens,
193
- temperature=temperature,
194
- seed=seed,
195
- )
196
- output = pool.map(generate_parallel, selected_models)
197
- for i in range(len(output)):
198
- st.markdown(f"**{selected_models[i]}**")
199
- st.code(output[i])
200
- if len(output) < len(selected_models):
201
- st.markdown("<span style='color:red'>Warning: Some models run into timeout, you can try generating code using the original subspaces: [InCoder](https://huggingface.co/spaces/loubnabnl/incoder-subspace), [CodeGen](https://huggingface.co/spaces/loubnabnl/codegen-subspace), [CodeParrot](https://huggingface.co/spaces/loubnabnl/codeparrot-subspace)</span>", unsafe_allow_html=True)
202
-
203
- # Resources
204
- st.subheader("Resources")
205
- read_markdown("utils/resources.md")