File size: 1,111 Bytes
11990f4 f51df2c c9dd695 f51df2c 11990f4 c9dd695 11990f4 afe5cf1 11990f4 e682ac6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
import subprocess
from llama_cpp import Llama
# Initialize the model
model = Llama(model_path="QA_llama31_unsloth.Q4_K_M.gguf")
def generate_response(prompt):
response = model.create_chat_completion(messages=[{"role": "user", "content": prompt}])
return response['choices'][0]['message']['content']
# Define the inference function using llama.cpp
def predict(text):
# Call llama.cpp with the input text
result = subprocess.run(
["./llama.cpp/main", "-m", "QA_llama31_unsloth.Q4_K_M.gguf", "-p", text],
capture_output=True,
text=True
)
return result.stdout
# Create a Gradio interface
#iface = gr.Interface(
# fn=generate_response,
# inputs=gr.Textbox(lines=2, placeholder="Enter question here..."),
# outputs="Answer",
#)
iface = gr.Interface(
fn=generate_response,
inputs="textbox",
outputs="text",
title="AIML Q&A Chatbot",
description="Ask questions related to AIML and get answers from the fine-tuned Llama model."
)
# Launch the app
iface.launch()
|