GSridhar1982's picture
Updated the model
64be64c verified
raw
history blame
685 Bytes
import gradio as gr
import subprocess
# Define the inference function using llama.cpp
def predict(text):
# Call llama.cpp with the input text
result = subprocess.run(
["./llama.cpp/main", "-m", "GSridhar1982/QA_Llama31_Quantized_GGUF", "-p", text],
capture_output=True,
text=True
)
return result.stdout
# Create a Gradio interface
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(lines=2, placeholder="Enter question here..."),
outputs="Answer",
title="LLaMA Model Inference",
description="Enter text to generate using the LLaMA model."
)
# Launch the interface
iface.launch()