loubnabnl HF staff commited on
Commit
c723338
Β·
1 Parent(s): 3283b93

remove unecessary file

Browse files
Files changed (1) hide show
  1. .ipynb_checkpoints/app-checkpoint.py +0 -109
.ipynb_checkpoints/app-checkpoint.py DELETED
@@ -1,109 +0,0 @@
1
- import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
3
- from transformers import pipeline
4
- import torch
5
- import json
6
- import pandas as pd
7
-
8
- @st.cache(allow_output_mutation=True)
9
- def load_tokenizer(model_ckpt):
10
- return AutoTokenizer.from_pretrained(model_ckpt)
11
-
12
- @st.cache(allow_output_mutation=True)
13
- def load_model(model_ckpt):
14
- model = AutoModelForCausalLM.from_pretrained(model_ckpt, low_cpu_mem_usage=True)
15
- return model
16
-
17
- @st.cache()
18
- def load_examples():
19
- with open("examples.json", "r") as f:
20
- examples = json.load(f)
21
- return examples
22
-
23
- st.set_page_config(page_icon=':laptop:', layout="wide")
24
-
25
-
26
- st.sidebar.header("Models")
27
- models = ["CodeParrot", "OPT", "InCoder"]
28
- selected_models = st.sidebar.multiselect('Select code generation models to compare:',
29
- models,
30
- default=["CodeParrot"])
31
- st.sidebar.header("Tasks")
32
- tasks = [" ", "Model evaluation", "Pretraining datasets", "Model architecture", "Code generation"]
33
- selected_task = st.sidebar.selectbox("Select a task:", tasks)
34
-
35
-
36
- tokenizer1 = load_tokenizer("lvwerra/codeparrot")
37
- model1 = load_model("lvwerra/codeparrot")
38
- tokenizer2 = load_tokenizer("facebook/incoder-1B")
39
- model2 = load_model("facebook/incoder-1B")
40
- #tokenizer3 = load_tokenizer("facebook/opt-1.3b")
41
- #model3 = load_model("facebook/opt-1.3b")
42
- pipelines = {}
43
- for element in models:
44
- if element == "CodeParrot":
45
- pipelines[element] = pipeline("text-generation", model=model1, tokenizer=tokenizer1)
46
- elif element == "InCoder":
47
- tokenizer = load_tokenizer("facebook/incoder-1B")
48
- model = load_model("facebook/incoder-1B")
49
- pipelines[element] = pipeline("text-generation", model=model2, tokenizer=tokenizer2)
50
- #else:
51
- # tokenizer = load_tokenizer("facebook/opt-1.3b")
52
- # model = load_model("facebook/opt-1.3b")
53
- # pipelines[element] = pipeline("text-generation", model=model3, tokenizer=tokenizer3)
54
-
55
- examples = load_examples()
56
- example_names = [example["name"] for example in examples]
57
- name2id = dict([(name, i) for i, name in enumerate(example_names)])
58
- set_seed(42)
59
- gen_kwargs = {}
60
-
61
- if selected_task == " ":
62
- st.title("Code Generation Models comparison")
63
- with open("intro.txt", "r") as f:
64
- intro = f.read()
65
- st.markdown(intro)
66
- elif selected_task == "Pretraining datasets":
67
- st.title("Pretraining datasets πŸ“š")
68
- st.markdown("Preview of some code files from Github repositories")
69
- df = pd.read_csv("preview-github-data.csv")
70
- st.dataframe(df)
71
- for model in selected_models:
72
- with open(f"datasets/{model.lower()}.txt", "r") as f:
73
- text = f.read()
74
- st.markdown(f"### {model}:")
75
- st.markdown(text)
76
- elif selected_task == "Model architecture":
77
- st.title("Model architecture πŸ”¨")
78
- for model in selected_models:
79
- with open(f"architectures/{model.lower()}.txt", "r") as f:
80
- text = f.read()
81
- st.markdown(f"## {model}:")
82
- st.markdown(text)
83
- elif selected_task == "Model evaluation":
84
- st.title("Code models evaluation πŸ“Š")
85
- with open("evaluation/intro.txt", "r") as f:
86
- intro = f.read()
87
- st.markdown(intro)
88
- elif selected_task == "Code generation":
89
- st.title("Code generation πŸ’»")
90
- st.sidebar.header("Examples")
91
- selected_example = st.sidebar.selectbox("Select one of the following examples:", example_names)
92
- example_text = examples[name2id[selected_example]]["value"]
93
- default_length = examples[name2id[selected_example]]["length"]
94
- st.sidebar.header("Generation settings")
95
- gen_kwargs["do_sample"] = st.sidebar.radio("Decoding strategy:", ["Greedy", "Sample"]) == "Sample"
96
- gen_kwargs["max_new_tokens"] = st.sidebar.slider("Number of tokens to generate:", value=default_length, min_value=8, step=8, max_value=256)
97
- if gen_kwargs["do_sample"]:
98
- gen_kwargs["temperature"] = 0.2
99
- gen_kwargs["top_k"] = 0
100
- gen_kwargs["top_p"] = 0.95
101
- gen_prompt = st.text_area("Generate code with prompt:", value=example_text, height=220,).strip()
102
- if st.button("Generate code!"):
103
- with st.spinner("Generating code..."):
104
- for model in selected_models:
105
- if model != "OPT":
106
- pipe = pipelines[model]
107
- generated_text = pipe(gen_prompt, **gen_kwargs)[0]['generated_text']
108
- st.markdown(f"{model}:")
109
- st.code(generated_text)