GSridhar1982's picture
Modified model path
408ac44 verified
raw
history blame
866 Bytes
import gradio as gr
import subprocess
from llama_cpp import Llama
# Initialize the model
model = Llama(model_path="QA_Llama31_Quantized_GGUF")
def generate_response(prompt):
response = model.create_chat_completion(messages=[{"role": "user", "content": prompt}])
return response['choices'][0]['message']['content']
# Define the inference function using llama.cpp
def predict(text):
# Call llama.cpp with the input text
result = subprocess.run(
["./llama.cpp/main", "-m", "QA_Llama31_Quantized_GGUF", "-p", text],
capture_output=True,
text=True
)
return result.stdout
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter question here..."),
outputs="Answer",
)
# Launch the interface
iface.launch()