loubnabnl HF staff commited on
Commit
c207158
·
1 Parent(s): ebafa77
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: CodeGen Subspace
3
+ emoji: 🔮
4
+ colorFrom: green
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.0.4
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
.ipynb_checkpoints/app-checkpoint.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
3
+ from transformers import pipeline
4
+
5
+
6
+ title = "CodeGen Generator"
7
+ description = "This is a subspace to make code generation with [CodeGen](https://huggingface.co/Salesforce/codegen-16B-mono), it is used in a larger [space](https://huggingface.co/spaces/loubnabnl/Code-generation-models-v1) for model comparison. We use the 2B parameters model in this space.
8
+ example = [
9
+ ["def print_hello_world():", 8, 0.6, 42],
10
+ ["def get_file_size(filepath):", 24, 0.6, 42],
11
+ ["def count_lines(filename):", 40, 0.6, 42],
12
+ ["def count_words(filename):", 40, 0.6, 42]]
13
+ tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono")
14
+ model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono", low_cpu_mem_usage=True)
15
+
16
+
17
+ def code_generation(gen_prompt, max_tokens, temperature=0.6, seed=42):
18
+ set_seed(seed)
19
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
+ generated_text = pipe(gen_prompt, do_sample=True, top_p=0.95, temperature=temperature, max_new_tokens=max_tokens)[0]['generated_text']
21
+ return generated_text
22
+
23
+
24
+ iface = gr.Interface(
25
+ fn=code_generation,
26
+ inputs=[
27
+ gr.Textbox(lines=10, label="Input code"),
28
+ gr.inputs.Slider(
29
+ minimum=8,
30
+ maximum=256,
31
+ step=1,
32
+ default=8,
33
+ label="Number of tokens to generate",
34
+ ),
35
+ gr.inputs.Slider(
36
+ minimum=0,
37
+ maximum=2,
38
+ step=0.1,
39
+ default=0.6,
40
+ label="Temperature",
41
+ ),
42
+ gr.inputs.Slider(
43
+ minimum=0,
44
+ maximum=1000,
45
+ step=1,
46
+ default=42,
47
+ label="Random seed to use for the generation"
48
+ )
49
+ ],
50
+ outputs=gr.Textbox(label="Predicted code", lines=10),
51
+ examples=example,
52
+ layout="horizontal",
53
+ theme="peach",
54
+ description=description,
55
+ title=title
56
+ )
57
+ iface.launch()
.ipynb_checkpoints/requirements-checkpoint.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/rooa/transformers.git@add_codegen
2
+ torch
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: Codegen Space
3
- emoji: 🐠
4
- colorFrom: purple
5
  colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.9.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
 
1
  ---
2
+ title: CodeGen Subspace
3
+ emoji: 🔮
4
+ colorFrom: green
5
  colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.0.4
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
3
+ from transformers import pipeline
4
+
5
+
6
+ title = "CodeGen Generator"
7
+ description = "This is a subspace to make code generation with [CodeGen](https://huggingface.co/Salesforce/codegen-16B-mono), it is used in a larger [space](https://huggingface.co/spaces/loubnabnl/Code-generation-models-v1) for model comparison. We use the 2B parameters model in this space.
8
+ example = [
9
+ ["def print_hello_world():", 8, 0.6, 42],
10
+ ["def get_file_size(filepath):", 24, 0.6, 42],
11
+ ["def count_lines(filename):", 40, 0.6, 42],
12
+ ["def count_words(filename):", 40, 0.6, 42]]
13
+ tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono")
14
+ model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono", low_cpu_mem_usage=True)
15
+
16
+
17
+ def code_generation(gen_prompt, max_tokens, temperature=0.6, seed=42):
18
+ set_seed(seed)
19
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
+ generated_text = pipe(gen_prompt, do_sample=True, top_p=0.95, temperature=temperature, max_new_tokens=max_tokens)[0]['generated_text']
21
+ return generated_text
22
+
23
+
24
+ iface = gr.Interface(
25
+ fn=code_generation,
26
+ inputs=[
27
+ gr.Textbox(lines=10, label="Input code"),
28
+ gr.inputs.Slider(
29
+ minimum=8,
30
+ maximum=256,
31
+ step=1,
32
+ default=8,
33
+ label="Number of tokens to generate",
34
+ ),
35
+ gr.inputs.Slider(
36
+ minimum=0,
37
+ maximum=2,
38
+ step=0.1,
39
+ default=0.6,
40
+ label="Temperature",
41
+ ),
42
+ gr.inputs.Slider(
43
+ minimum=0,
44
+ maximum=1000,
45
+ step=1,
46
+ default=42,
47
+ label="Random seed to use for the generation"
48
+ )
49
+ ],
50
+ outputs=gr.Textbox(label="Predicted code", lines=10),
51
+ examples=example,
52
+ layout="horizontal",
53
+ theme="peach",
54
+ description=description,
55
+ title=title
56
+ )
57
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/rooa/transformers.git@add_codegen
2
+ torch