xqt commited on
Commit
7b28e1c
β€’
1 Parent(s): 365ef9d

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +3 -3
  2. app.py +205 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Code With Llama2
3
- emoji: πŸ’»
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
 
1
  ---
2
  title: Code With Llama2
3
+ emoji: πŸ‘€
4
+ colorFrom: gray
5
+ colorTo: gray
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio
2
+ import re
3
+ import spaces
4
+ import torch
5
+ import transformers
6
+
7
+ MODEL_DICT = {
8
+ "NousResearch/Llama-2-7b-chat-hf": "Llama 2 7B Chat",
9
+ "xqt/llama_2_7b_chat_mbpp_base": "Llama 2 7B Chat fine tuned with Base MBPP",
10
+ "xqt/llama_2_7b_chat_mbpp_synthetic": "Llama 2 7B Chat fine tuned with Synthetic MBPP",
11
+ "xqt/llama_2_7b_chat_mbpp_mixed": "Llama 2 7B Chat fine tuned with Base and Synthetic MBPP"
12
+ }
13
+
14
+ def generate_prompt(sample):
15
+ sample = f"""<s>[INST] <<SYS>>
16
+ You are a python programming assistant that obeys the constraints and passes the example test case.
17
+ You wrap the code answer without any comments between [PYTHON] and [/PYTHON] tags.
18
+ In case a test case is available, it is written inside [TEST] and [/TEST] tags.
19
+ <</SYS>>
20
+ {sample}
21
+ [TEST][/TEST]
22
+ [/INST]
23
+ [PYTHON]
24
+ """
25
+ return sample
26
+
27
+ def extract_text_between_tags(input_string, tag1, tag2):
28
+ pattern = r'' + tag1 + '(.*?)' + tag2 + ''
29
+ return re.findall(pattern, input_string, re.DOTALL)
30
+
31
+
32
+ def load_model(name):
33
+ gradio.Info(f"Loading Model {name} πŸ€—", duration = 5)
34
+
35
+ current_key = None
36
+ for model_key in MODEL_DICT.keys():
37
+ if name == MODEL_DICT[model_key]:
38
+ current_key = model_key
39
+
40
+ if current_key == None:
41
+ raise gradio.Error(f"Model {name} could not be found 😭", duration = 5)
42
+
43
+ try:
44
+ nf4_config = transformers.BitsAndBytesConfig(
45
+ load_in_4bit = True,
46
+ bnb_4bit_quant_type = "nf4",
47
+ bnb_4bit_use_double_quant = True,
48
+ bnb_4bit_compute_dtype = torch.bfloat16
49
+ )
50
+
51
+ MODEL = transformers.AutoModelForCausalLM.from_pretrained(
52
+ current_key,
53
+ quantization_config = nf4_config,
54
+ device_map = {"": 0},
55
+ use_cache = True
56
+ )
57
+ MODEL.config.pretraining_tp = 1
58
+ MODEL_NAME = current_key
59
+
60
+ tokenizer = transformers.AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf", trust_remote_code=True)
61
+ tokenizer.pad_token = tokenizer.eos_token
62
+ tokenizer.padding_side = "right"
63
+
64
+ gradio.Info(f"Loaded Model {name} from {current_key} successfully πŸ”₯.", duration = 5)
65
+
66
+ return MODEL, tokenizer
67
+ except Exception as e:
68
+ raise gradio.Error(f"Encountered a problem πŸ₯Ί: {e}")
69
+ return None, None
70
+
71
+ @spaces.GPU(duration = 120)
72
+ def respond(model, message, chat_history):
73
+ model, tokenizer = load_model(model)
74
+
75
+ if model is None or tokenizer is None:
76
+ raise gradio.Error("Could not load model πŸ˜”", duration = 5)
77
+
78
+ prompt = generate_prompt(message)
79
+ max_new_tokens = 20
80
+
81
+ input = None
82
+ while max_new_tokens <= 500:
83
+ if input is None:
84
+ prompt = [prompt]
85
+ else:
86
+ prompt = [output]
87
+
88
+ input = tokenizer(prompt, return_tensors = "pt", padding = True).to(model.device)
89
+ output_sequences = model.generate(**input, max_new_tokens = 500, do_sample = True, top_p = 0.9)
90
+ output = tokenizer.batch_decode(output_sequences, skip_special_tokens = True)[0]
91
+
92
+ try:
93
+ code = extract_text_between_tags(output, r"\[PYTHON\]", r"\[/PYTHON\]")[1]
94
+ break
95
+ except:
96
+ code = ""
97
+ max_new_tokens += 20
98
+
99
+ if len(code) > 0:
100
+ response = f"""Here is what I could write πŸ’­
101
+ ```python
102
+ {code}
103
+ ```
104
+ """
105
+ else:
106
+ response = "Could not generate the code with the following configurations 😦."
107
+
108
+ chat_history.append((message, response))
109
+ return "", chat_history
110
+
111
+ with gradio.Blocks() as base_app:
112
+ header = gradio.Markdown("""
113
+ # πŸ§‘β€πŸ’» Python Code Generation Assistant
114
+
115
+ Welcome to the **Python Code Generation Assistant** powered by **Llama 2** models! This application helps generate Python code solutions by leveraging fine-tuned large language models (LLMs) on benchmark and synthetic datasets. Whether you need help solving basic Python problems or want to explore code generation from AI, this app has you covered. Use the documentation below for help.
116
+ """)
117
+ model_choice_dropdown = gradio.Dropdown(
118
+ choices = MODEL_DICT.values(),
119
+ value = "Llama 2 7B Chat fine tuned with Base MBPP",
120
+ interactive = True
121
+ )
122
+ chatbot = gradio.Chatbot()
123
+ with gradio.Row():
124
+ with gradio.Column():
125
+ message_box = gradio.Textbox(placeholder = "Write a python programming question you need the code for.")
126
+
127
+ with gradio.Column():
128
+ send_button = gradio.Button()
129
+ clear_button = gradio.ClearButton([message_box, chatbot])
130
+
131
+ send_button.click(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])
132
+ message_box.submit(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])
133
+
134
+ example_dataset = gradio.Dataset(components = [message_box], samples = [
135
+ ["Write a function to find sequences of lowercase letters joined with an underscore."],
136
+ ["Write a python function to count hexadecimal numbers for a given range."],
137
+ ["Write a function to perform the concatenation of two string tuples."]
138
+ ])
139
+
140
+ example_dataset.select(lambda x: x[0], [example_dataset], [message_box])
141
+
142
+ with gradio.Accordion("Documentation", open = False):
143
+ documentation = gradio.Markdown("""
144
+ ## πŸš€ Features
145
+
146
+ - **Model Selection**: Choose from multiple fine-tuned models:
147
+ - **Llama 2 7B Chat**: Standard pre-trained model.
148
+ - **Llama 2 7B Chat (Base MBPP)**: Fine-tuned on the MBPP benchmark dataset.
149
+ - **Llama 2 7B Chat (Synthetic MBPP)**: Fine-tuned on synthetic data generated from MBPP.
150
+ - **Llama 2 7B Chat (Mixed)**: Fine-tuned on both base and synthetic MBPP data.
151
+
152
+ - **Automatic Python Code Generation**: Generate Python code for your problem.
153
+
154
+ ## 🎯 How to Use the App
155
+
156
+ 1. **Select a Model**:
157
+ - Use the dropdown menu to choose the model you'd like to use for code generation.
158
+ - By default, the app selects **Llama 2 7B Chat fine-tuned with Base MBPP**.
159
+
160
+ 2. **Ask a Python Question**:
161
+ - Type a Python-related question or problem in the text box.
162
+ - Example: `"Write a function to find sequences of lowercase letters joined with an underscore."`
163
+
164
+ 3. **Generate Python Code**:
165
+ - Press the **Send** button or hit **Enter** to generate the code.
166
+ - The model will respond with Python code wrapped in ```python blocks.
167
+
168
+ 4. **Explore Example Questions**:
169
+ - You can try out some preloaded examples from the **Examples Dataset** at the bottom. Just click on one to automatically populate the input.
170
+
171
+ 5. **Clear the Chat**:
172
+ - Use the **Clear** button to reset the chat and start fresh.
173
+
174
+ ## πŸ“Š Models
175
+
176
+ | Model Name | Description |
177
+ |------------|-------------|
178
+ | **Llama 2 7B Chat** | A pre-trained model for general Python code generation. |
179
+ | **Base MBPP** | Fine-tuned on the **MBPP** (Most Basic Python Problems) dataset. |
180
+ | **Synthetic MBPP** | Fine-tuned on a synthetic dataset generated from MBPP. |
181
+ | **Mixed MBPP** | Fine-tuned on both base and synthetic MBPP datasets. |
182
+
183
+ ## πŸ› οΈ Troubleshooting
184
+
185
+ If you encounter issues:
186
+ - Ensure you're selecting the correct model.
187
+ - If the code isn't generating as expected, try reformulating the question.
188
+ - For further debugging, error messages will be displayed if something goes wrong.
189
+
190
+ ## πŸ“œ Example Prompts
191
+ - `"Write a Python function to count hexadecimal numbers for a given range."`
192
+ - `"Write a function to perform the concatenation of two string tuples."`
193
+ - `"Generate a Python program to reverse a string."`
194
+
195
+ ## πŸ–₯️ About the Technology
196
+
197
+ This app uses a **4-bit quantized version of Llama 2 7B** models to enhance performance while minimizing resource consumption. These models have been fine-tuned on **MBPP** and **synthetic datasets** to provide optimized code generation for Python programming tasks.
198
+
199
+ ---
200
+
201
+ Happy Coding! πŸ˜„βœ¨
202
+ """)
203
+
204
+ if __name__=="__main__":
205
+ base_app.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ accelerate==0.29.3
2
+ bitsandbytes==0.43.1
3
+ peft==0.10.0
4
+ transformers
5
+ huggingface_hub
6
+ trl==0.8.6