Spaces:

sugiv
/

LeetMonkey_8Bit_GGUF_Stream_Tokens

Sleeping

App Files Files Community

LeetMonkey_8Bit_GGUF_Stream_Tokens / app.py

sugiv

Changing the code extract logic to preserve all code

447ff45 2 months ago

raw

history blame

4.21 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import re
	from datasets import load_dataset
	import random
	import logging
	import os
	import autopep8
	import textwrap

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Model settings
	MODEL_NAME = "leetmonkey_peft__q8_0.gguf"
	REPO_ID = "sugiv/leetmonkey-peft-gguf"

	def download_model(model_name):
	logger.info(f"Downloading model: {model_name}")
	model_path = hf_hub_download(
	repo_id=REPO_ID,
	filename=model_name,
	cache_dir="./models",
	force_download=True,
	resume_download=True
	)
	logger.info(f"Model downloaded: {model_path}")
	return model_path

	# Download and load the 8-bit model at startup
	model_path = download_model(MODEL_NAME)
	llm = Llama(
	model_path=model_path,
	n_ctx=1024,
	n_threads=8,
	n_gpu_layers=-1, # Use all available GPU layers
	verbose=False,
	n_batch=512,
	mlock=True
	)
	logger.info("8-bit model loaded successfully")

	# Load the dataset
	dataset = load_dataset("sugiv/leetmonkey_python_dataset")
	train_dataset = dataset["train"]

	# Generation parameters
	generation_kwargs = {
	"max_tokens": 512,
	"stop": ["```", "### Instruction:", "### Response:"],
	"echo": False,
	"temperature": 0.05,
	"top_k": 10,
	"top_p": 0.9,
	"repeat_penalty": 1.1
	}

	def generate_solution(instruction):
	system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
	full_prompt = f"""### Instruction:
	{system_prompt}

	Implement the following function for the LeetCode problem:

	{instruction}

	### Response:
	Here's the complete Python function implementation:

	```python
	"""

	for chunk in llm(full_prompt, stream=True, **generation_kwargs):
	yield chunk["choices"][0]["text"]

	def extract_and_format_code(text):
	# Extract code between triple backticks
	code_match = re.search(r'```python\s(.?)\s*```', text, re.DOTALL)
	if code_match:
	code = code_match.group(1)
	else:
	code = text

	# Dedent the code to remove any common leading whitespace
	code = textwrap.dedent(code)

	# Split the code into lines
	lines = code.split('\n')

	# Ensure proper indentation
	indented_lines = []
	for line in lines:
	if line.strip().startswith('class') or line.strip().startswith('def'):
	indented_lines.append(line) # Keep class and function definitions as is
	elif line.strip(): # If the line is not empty
	indented_lines.append(' ' + line) # Add 4 spaces of indentation
	else:
	indented_lines.append(line) # Keep empty lines as is

	formatted_code = '\n'.join(indented_lines)

	try:
	return autopep8.fix_code(formatted_code)
	except:
	return formatted_code


	def select_random_problem():
	return random.choice(train_dataset)['instruction']

	def stream_solution(problem):
	logger.info("Generating solution")
	generated_text = ""
	for token in generate_solution(problem):
	generated_text += token
	yield generated_text

	formatted_code = extract_and_format_code(generated_text)
	logger.info("Solution generated successfully")
	yield formatted_code

	with gr.Blocks() as demo:
	gr.Markdown("# LeetCode Problem Solver (8-bit GGUF Model)")

	with gr.Row():
	with gr.Column():
	problem_display = gr.Textbox(label="LeetCode Problem", lines=10)
	select_problem_btn = gr.Button("Select Random Problem")

	with gr.Column():
	solution_display = gr.Code(label="Generated Solution", language="python", lines=25)
	generate_btn = gr.Button("Generate Solution")

	select_problem_btn.click(select_random_problem, outputs=problem_display)
	generate_btn.click(stream_solution, inputs=[problem_display], outputs=solution_display)

	if __name__ == "__main__":
	logger.info("Starting Gradio interface")
	demo.launch(share=True)