File size: 685 Bytes
11990f4 64be64c 11990f4 64be64c 11990f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import gradio as gr
import subprocess
# Define the inference function using llama.cpp
def predict(text):
# Call llama.cpp with the input text
result = subprocess.run(
["./llama.cpp/main", "-m", "GSridhar1982/QA_Llama31_Quantized_GGUF", "-p", text],
capture_output=True,
text=True
)
return result.stdout
# Create a Gradio interface
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(lines=2, placeholder="Enter question here..."),
outputs="Answer",
title="LLaMA Model Inference",
description="Enter text to generate using the LLaMA model."
)
# Launch the interface
iface.launch()
|