sugiv commited on
Commit
fa451c0
1 Parent(s): 6cbdb33

Leetmonkey In Action. Darn LeetMonkey these days

Browse files
Files changed (2) hide show
  1. app.py +56 -58
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,30 +1,29 @@
1
  import gradio as gr
 
2
  from llama_cpp import Llama
3
  import re
4
  from datasets import load_dataset
5
- from transformers import AutoTokenizer
6
- import autopep8
7
  import random
 
8
  import textwrap
9
 
10
- # Load the dataset
11
- dataset = load_dataset("sugiv/leetmonkey_python_dataset")
12
- val_dataset = dataset["train"].train_test_split(test_size=0.1)["test"]
13
-
14
- # Load the tokenizer
15
- tokenizer = AutoTokenizer.from_pretrained("sugiv/Leetmonkey-peft")
16
-
17
- # GGUF model options
18
  gguf_models = {
19
- "Exact Copy": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft_exact_copy.gguf",
20
- "F16": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft_f16.gguf",
21
- "Q8_0": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft__q8_0.gguf",
22
- "Super Block Q6": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft_super_block_q6.gguf"
23
  }
24
 
 
 
 
 
 
 
25
  generation_kwargs = {
26
  "max_tokens": 2048,
27
- "stop": ["```", "### Instruction:", "### Response:"],
28
  "echo": False,
29
  "temperature": 0.2,
30
  "top_k": 50,
@@ -32,15 +31,7 @@ generation_kwargs = {
32
  "repeat_penalty": 1.1
33
  }
34
 
35
- def run_llama_cpp(instruction, model_path):
36
- llm = Llama(
37
- model_path=model_path,
38
- n_ctx=2048,
39
- n_threads=4,
40
- n_gpu_layers=0,
41
- verbose=False
42
- )
43
-
44
  system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
45
  full_prompt = f"""<|begin▁of▁sentence|>
46
  ### Instruction:
@@ -56,26 +47,36 @@ Here's the complete Python function implementation:
56
  ```python
57
  """
58
 
59
- res = llm(full_prompt, **generation_kwargs)
60
- return res["choices"][0]["text"]
61
 
62
  def extract_and_format_code(text):
 
63
  code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
64
  if code_match:
65
  code = code_match.group(1)
66
  else:
67
  code = text
68
 
 
69
  code = re.sub(r'^.*?(?=def\s+\w+\s*\()', '', code, flags=re.DOTALL)
 
 
70
  code = textwrap.dedent(code)
 
 
71
  lines = code.split('\n')
 
 
72
  func_def_index = next((i for i, line in enumerate(lines) if line.strip().startswith('def ')), 0)
73
- indented_lines = [lines[func_def_index]]
 
 
74
  for line in lines[func_def_index + 1:]:
75
- if line.strip():
76
- indented_lines.append(' ' + line)
77
  else:
78
- indented_lines.append(line)
79
 
80
  formatted_code = '\n'.join(indented_lines)
81
 
@@ -84,34 +85,31 @@ def extract_and_format_code(text):
84
  except:
85
  return formatted_code
86
 
87
- def select_random_problem():
88
- sample = random.choice(val_dataset)
89
- return sample['instruction']
90
-
91
- def generate_solution(problem, model_name):
92
- model_path = gguf_models[model_name]
93
- generated_output = run_llama_cpp(problem, model_path)
94
- formatted_code = extract_and_format_code(generated_output)
95
- return formatted_code
96
-
97
- def gradio_interface(problem, model_name):
98
- solution = generate_solution(problem, model_name)
99
- return solution
100
 
101
- with gr.Blocks() as demo:
102
- gr.Markdown("# LeetCode Problem Solver")
 
 
 
103
 
104
- with gr.Row():
105
- with gr.Column():
106
- problem_display = gr.Textbox(label="LeetCode Problem", lines=10)
107
- select_problem_btn = gr.Button("Select Random Problem")
108
-
109
- with gr.Column():
110
- model_dropdown = gr.Dropdown(choices=list(gguf_models.keys()), label="Select GGUF Model", value="Exact Copy")
111
- solution_display = gr.Code(label="Generated Solution", language="python")
112
- generate_btn = gr.Button("Generate Solution")
113
 
114
- select_problem_btn.click(select_random_problem, outputs=problem_display)
115
- generate_btn.click(gradio_interface, inputs=[problem_display, model_dropdown], outputs=solution_display)
116
-
117
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
  import re
5
  from datasets import load_dataset
 
 
6
  import random
7
+ import autopep8
8
  import textwrap
9
 
10
+ # Define the model options
 
 
 
 
 
 
 
11
  gguf_models = {
12
+ "Exact Copy": "leetmonkey_peft_exact_copy.gguf",
13
+ "F16": "leetmonkey_peft_f16.gguf",
14
+ "Q8_0": "leetmonkey_peft__q8_0.gguf",
15
+ "Super Block Q6": "leetmonkey_peft_super_block_q6.gguf"
16
  }
17
 
18
+ # Function to download and load the model
19
+ def load_model(model_name):
20
+ model_path = hf_hub_download(repo_id="sugiv/leetmonkey-peft-gguf", filename=model_name)
21
+ return Llama(model_path=model_path, n_ctx=2048, n_threads=4, n_gpu_layers=0, verbose=False)
22
+
23
+ # Generation parameters
24
  generation_kwargs = {
25
  "max_tokens": 2048,
26
+ "stop": ["<|end▁of▁sentence|>", "### Instruction:", "### Response:"],
27
  "echo": False,
28
  "temperature": 0.2,
29
  "top_k": 50,
 
31
  "repeat_penalty": 1.1
32
  }
33
 
34
+ def generate_solution(instruction, model):
 
 
 
 
 
 
 
 
35
  system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
36
  full_prompt = f"""<|begin▁of▁sentence|>
37
  ### Instruction:
 
47
  ```python
48
  """
49
 
50
+ response = model(full_prompt, **generation_kwargs)
51
+ return response["choices"][0]["text"]
52
 
53
  def extract_and_format_code(text):
54
+ # Extract code between triple backticks
55
  code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
56
  if code_match:
57
  code = code_match.group(1)
58
  else:
59
  code = text
60
 
61
+ # Remove any text before the function definition
62
  code = re.sub(r'^.*?(?=def\s+\w+\s*\()', '', code, flags=re.DOTALL)
63
+
64
+ # Dedent the code to remove any common leading whitespace
65
  code = textwrap.dedent(code)
66
+
67
+ # Split the code into lines
68
  lines = code.split('\n')
69
+
70
+ # Find the function definition line
71
  func_def_index = next((i for i, line in enumerate(lines) if line.strip().startswith('def ')), 0)
72
+
73
+ # Ensure proper indentation
74
+ indented_lines = [lines[func_def_index]] # Keep the function definition as is
75
  for line in lines[func_def_index + 1:]:
76
+ if line.strip(): # If the line is not empty
77
+ indented_lines.append(' ' + line) # Add 4 spaces of indentation
78
  else:
79
+ indented_lines.append(line) # Keep empty lines as is
80
 
81
  formatted_code = '\n'.join(indented_lines)
82
 
 
85
  except:
86
  return formatted_code
87
 
88
+ # Load the dataset
89
+ dataset = load_dataset("sugiv/leetmonkey_python_dataset")
90
+ val_dataset = dataset["train"].train_test_split(test_size=0.1)["test"]
 
 
 
 
 
 
 
 
 
 
91
 
92
+ def gradio_interface(model_name):
93
+ model = load_model(gguf_models[model_name])
94
+ sample = random.choice(val_dataset)
95
+ instruction = sample['instruction']
96
+ original_output = sample['output']
97
 
98
+ generated_output = generate_solution(instruction, model)
99
+ python_code = extract_and_format_code(generated_output)
 
 
 
 
 
 
 
100
 
101
+ return instruction, python_code, original_output
102
+
103
+ iface = gr.Interface(
104
+ fn=gradio_interface,
105
+ inputs=gr.Dropdown(choices=list(gguf_models.keys()), label="Select GGUF Model"),
106
+ outputs=[
107
+ gr.Textbox(label="LeetCode Problem", lines=10),
108
+ gr.Code(label="Generated Solution", language="python"),
109
+ gr.Code(label="Original Solution", language="python")
110
+ ],
111
+ title="LeetCode Problem Solver",
112
+ description="Select a model to generate a solution for a random LeetCode problem."
113
+ )
114
+
115
+ iface.launch(share=True)
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio
2
  llama-cpp-python
3
  datasets
4
  transformers
5
- autopep8
 
 
2
  llama-cpp-python
3
  datasets
4
  transformers
5
+ autopep8
6
+ huggingface_hub