loubnabnl HF staff commited on
Commit
aa07439
·
1 Parent(s): cddb272

use multithreading instead of multiprocessing

Browse files
Files changed (1) hide show
  1. app.py +34 -16
app.py CHANGED
@@ -1,12 +1,10 @@
1
  import json
2
  import pandas as pd
3
  import requests
4
- from multiprocessing import Pool
5
- from functools import partial
6
  import streamlit as st
7
 
8
 
9
- GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
10
  MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
11
  GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
12
 
@@ -17,7 +15,14 @@ def load_examples():
17
  return examples
18
 
19
 
20
- def generate_code(model_name, gen_prompt, max_new_tokens, temperature, seed):
 
 
 
 
 
 
 
21
  url = (
22
  f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
23
  )
@@ -25,12 +30,21 @@ def generate_code(model_name, gen_prompt, max_new_tokens, temperature, seed):
25
  url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
26
  )
27
  generated_text = r.json()["data"][0]
28
- return generated_text
29
 
30
- def read_markdown(path):
31
- with open(path, "r") as f:
32
- output = f.read()
33
- st.markdown(output, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
34
 
35
  st.set_page_config(page_icon=":laptop:", layout="wide")
36
  with open("utils/table_contents.txt", "r") as f:
@@ -45,9 +59,11 @@ read_markdown("utils/intro.txt")
45
  st.subheader("1 - Code datasets")
46
  read_markdown("datasets/intro.txt")
47
  read_markdown("datasets/github_code.txt")
 
48
  #st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
49
  #df = pd.read_csv("utils/data_preview.csv")
50
  #st.dataframe(df)
 
51
  col1, col2= st.columns([1,2])
52
  with col1:
53
  selected_model = st.selectbox("", MODELS, key=1)
@@ -107,19 +123,21 @@ gen_prompt = st.text_area(
107
  ).strip()
108
  if st.button("Generate code!"):
109
  with st.spinner("Generating code..."):
110
- # Create a multiprocessing Pool
111
- pool = Pool()
112
- generate_parallel = partial(
113
- generate_code,
 
114
  gen_prompt=gen_prompt,
115
  max_new_tokens=max_new_tokens,
116
  temperature=temperature,
117
  seed=seed,
118
  )
119
- output = pool.map(generate_parallel, selected_models)
120
- for i in range(len(output)):
 
121
  st.markdown(f"**{selected_models[i]}**")
122
- st.code(output[i])
123
 
124
  # Resources
125
  st.subheader("Resources")
 
1
  import json
2
  import pandas as pd
3
  import requests
4
+ import threading
 
5
  import streamlit as st
6
 
7
 
 
8
  MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
9
  GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
10
 
 
15
  return examples
16
 
17
 
18
+ def read_markdown(path):
19
+ with open(path, "r") as f:
20
+ output = f.read()
21
+ st.markdown(output, unsafe_allow_html=True)
22
+
23
+
24
+ def generate_code(generations, model_name, gen_prompt, max_new_tokens, temperature, seed):
25
+ # call space using its API endpoint
26
  url = (
27
  f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
28
  )
 
30
  url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
31
  )
32
  generated_text = r.json()["data"][0]
33
+ generations.append(generated_text)
34
 
35
+
36
+ def generate_code_threads(generations, models, gen_prompt, max_new_tokens, temperature, seed):
37
+ threads = []
38
+ for model_name in models:
39
+ # create the thread
40
+ threads.append(
41
+ threading.Thread(target=generate_code, args=(generations, model_name, gen_prompt, max_new_tokens, temperature, seed))
42
+ )
43
+ threads[-1].start()
44
+
45
+ for t in threads:
46
+ t.join()
47
+
48
 
49
  st.set_page_config(page_icon=":laptop:", layout="wide")
50
  with open("utils/table_contents.txt", "r") as f:
 
59
  st.subheader("1 - Code datasets")
60
  read_markdown("datasets/intro.txt")
61
  read_markdown("datasets/github_code.txt")
62
+ #GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
63
  #st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
64
  #df = pd.read_csv("utils/data_preview.csv")
65
  #st.dataframe(df)
66
+
67
  col1, col2= st.columns([1,2])
68
  with col1:
69
  selected_model = st.selectbox("", MODELS, key=1)
 
123
  ).strip()
124
  if st.button("Generate code!"):
125
  with st.spinner("Generating code..."):
126
+ # use threading
127
+ generations = []
128
+ generate_code_threads(
129
+ generations,
130
+ selected_models,
131
  gen_prompt=gen_prompt,
132
  max_new_tokens=max_new_tokens,
133
  temperature=temperature,
134
  seed=seed,
135
  )
136
+ for i in range(len(generations)):
137
+ print(generations[i])
138
+ for i in range(len(generations)):
139
  st.markdown(f"**{selected_models[i]}**")
140
+ st.code(generations[i])
141
 
142
  # Resources
143
  st.subheader("Resources")