import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import os # Check if the token is being accessed hf_token = os.environ.get("HF_HOME", None) # Load the model and tokenizer model_name = "meta-llama/CodeLlama-7b-Python-hf" model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, torch_dtype="float16", device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token) def generate_code(prompt): batch_size = 10 inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) outputs = model.generate(inputs['input_ids'], max_length=512, num_return_sequences=batch_size) code = tokenizer.decode(outputs[0], skip_special_tokens=True) return code # Set up the Gradio interface demo = gr.Interface(fn=generate_code, inputs="text", outputs="text", title="CodeLlama 7B Model", description="Generate code with CodeLlama-7b-hf.").launch()