import gradio as gr | |
import subprocess | |
# Define the inference function using llama.cpp | |
def predict(text): | |
# Call llama.cpp with the input text | |
result = subprocess.run( | |
["./llama.cpp/main", "-m", "path/to/your/model", "-p", text], | |
capture_output=True, | |
text=True | |
) | |
return result.stdout | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=predict, | |
inputs=gr.Textbox(lines=2, placeholder="Enter text here..."), | |
outputs="text", | |
title="LLaMA Model Inference", | |
description="Enter text to generate using the LLaMA model." | |
) | |
# Launch the interface | |
iface.launch() | |