Spaces:

sugiv
/

LeetMonkey_In_Action

Sleeping

App Files Files Community

sugiv commited on Sep 8

Commit

fa451c0

•

1 Parent(s): 6cbdb33

Leetmonkey In Action. Darn LeetMonkey these days

Browse files

Files changed (2) hide show

app.py +56 -58
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,30 +1,29 @@
 import gradio as gr
 from llama_cpp import Llama
 import re
 from datasets import load_dataset
-from transformers import AutoTokenizer
-import autopep8
 import random
 import textwrap
-# Load the dataset
-dataset = load_dataset("sugiv/leetmonkey_python_dataset")
-val_dataset = dataset["train"].train_test_split(test_size=0.1)["test"]
-# Load the tokenizer
-tokenizer = AutoTokenizer.from_pretrained("sugiv/Leetmonkey-peft")
-# GGUF model options
 gguf_models = {
-    "Exact Copy": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft_exact_copy.gguf",
-    "F16": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft_f16.gguf",
-    "Q8_0": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft__q8_0.gguf",
-    "Super Block Q6": "sugiv/leetmonkey-peft-gguf/leetmonkey_peft_super_block_q6.gguf"
 }
 generation_kwargs = {
     "max_tokens": 2048,
-    "stop": ["```", "### Instruction:", "### Response:"],
     "echo": False,
     "temperature": 0.2,
     "top_k": 50,
@@ -32,15 +31,7 @@ generation_kwargs = {
     "repeat_penalty": 1.1
 }
-def run_llama_cpp(instruction, model_path):
-    llm = Llama(
-        model_path=model_path,
-        n_ctx=2048,
-        n_threads=4,
-        n_gpu_layers=0,
-        verbose=False
-    )
     system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
     full_prompt = f"""<｜begin▁of▁sentence｜>
 ### Instruction:
@@ -56,26 +47,36 @@ Here's the complete Python function implementation:
 ```python
 """
-    res = llm(full_prompt, **generation_kwargs)
-    return res["choices"][0]["text"]
 def extract_and_format_code(text):
     code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
     if code_match:
         code = code_match.group(1)
     else:
         code = text
     code = re.sub(r'^.*?(?=def\s+\w+\s*\()', '', code, flags=re.DOTALL)
     code = textwrap.dedent(code)
     lines = code.split('\n')
     func_def_index = next((i for i, line in enumerate(lines) if line.strip().startswith('def ')), 0)
-    indented_lines = [lines[func_def_index]]
     for line in lines[func_def_index + 1:]:
-        if line.strip():
-            indented_lines.append('    ' + line)
         else:
-            indented_lines.append(line)
     formatted_code = '\n'.join(indented_lines)
@@ -84,34 +85,31 @@ def extract_and_format_code(text):
     except:
         return formatted_code
-def select_random_problem():
-    sample = random.choice(val_dataset)
-    return sample['instruction']
-def generate_solution(problem, model_name):
-    model_path = gguf_models[model_name]
-    generated_output = run_llama_cpp(problem, model_path)
-    formatted_code = extract_and_format_code(generated_output)
-    return formatted_code
-def gradio_interface(problem, model_name):
-    solution = generate_solution(problem, model_name)
-    return solution
-with gr.Blocks() as demo:
-    gr.Markdown("# LeetCode Problem Solver")
-    with gr.Row():
-        with gr.Column():
-            problem_display = gr.Textbox(label="LeetCode Problem", lines=10)
-            select_problem_btn = gr.Button("Select Random Problem")
-        with gr.Column():
-            model_dropdown = gr.Dropdown(choices=list(gguf_models.keys()), label="Select GGUF Model", value="Exact Copy")
-            solution_display = gr.Code(label="Generated Solution", language="python")
-            generate_btn = gr.Button("Generate Solution")
-    select_problem_btn.click(select_random_problem, outputs=problem_display)
-    generate_btn.click(gradio_interface, inputs=[problem_display, model_dropdown], outputs=solution_display)
-demo.launch()

 import gradio as gr
+from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 import re
 from datasets import load_dataset
 import random
+import autopep8
 import textwrap
+# Define the model options
 gguf_models = {
+    "Exact Copy": "leetmonkey_peft_exact_copy.gguf",
+    "F16": "leetmonkey_peft_f16.gguf",
+    "Q8_0": "leetmonkey_peft__q8_0.gguf",
+    "Super Block Q6": "leetmonkey_peft_super_block_q6.gguf"
 }
+# Function to download and load the model
+def load_model(model_name):
+    model_path = hf_hub_download(repo_id="sugiv/leetmonkey-peft-gguf", filename=model_name)
+    return Llama(model_path=model_path, n_ctx=2048, n_threads=4, n_gpu_layers=0, verbose=False)
+# Generation parameters
 generation_kwargs = {
     "max_tokens": 2048,
+    "stop": ["<｜end▁of▁sentence｜>", "### Instruction:", "### Response:"],
     "echo": False,
     "temperature": 0.2,
     "top_k": 50,
     "repeat_penalty": 1.1
 }
+def generate_solution(instruction, model):
     system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
     full_prompt = f"""<｜begin▁of▁sentence｜>
 ### Instruction:
 ```python
 """
+    response = model(full_prompt, **generation_kwargs)
+    return response["choices"][0]["text"]
 def extract_and_format_code(text):
+    # Extract code between triple backticks
     code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
     if code_match:
         code = code_match.group(1)
     else:
         code = text
+    # Remove any text before the function definition
     code = re.sub(r'^.*?(?=def\s+\w+\s*\()', '', code, flags=re.DOTALL)
+    # Dedent the code to remove any common leading whitespace
     code = textwrap.dedent(code)
+    # Split the code into lines
     lines = code.split('\n')
+    # Find the function definition line
     func_def_index = next((i for i, line in enumerate(lines) if line.strip().startswith('def ')), 0)
+    # Ensure proper indentation
+    indented_lines = [lines[func_def_index]]  # Keep the function definition as is
     for line in lines[func_def_index + 1:]:
+        if line.strip():  # If the line is not empty
+            indented_lines.append('    ' + line)  # Add 4 spaces of indentation
         else:
+            indented_lines.append(line)  # Keep empty lines as is
     formatted_code = '\n'.join(indented_lines)
     except:
         return formatted_code
+# Load the dataset
+dataset = load_dataset("sugiv/leetmonkey_python_dataset")
+val_dataset = dataset["train"].train_test_split(test_size=0.1)["test"]
+def gradio_interface(model_name):
+    model = load_model(gguf_models[model_name])
+    sample = random.choice(val_dataset)
+    instruction = sample['instruction']
+    original_output = sample['output']
+    generated_output = generate_solution(instruction, model)
+    python_code = extract_and_format_code(generated_output)
+    return instruction, python_code, original_output
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Dropdown(choices=list(gguf_models.keys()), label="Select GGUF Model"),
+    outputs=[
+        gr.Textbox(label="LeetCode Problem", lines=10),
+        gr.Code(label="Generated Solution", language="python"),
+        gr.Code(label="Original Solution", language="python")
+    ],
+    title="LeetCode Problem Solver",
+    description="Select a model to generate a solution for a random LeetCode problem."
+)
+iface.launch(share=True)

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio
 llama-cpp-python
 datasets
 transformers
-autopep8

 llama-cpp-python
 datasets
 transformers
+autopep8
+huggingface_hub