|
import gradio as gr |
|
import subprocess |
|
|
|
from llama_cpp import Llama |
|
|
|
|
|
model = Llama(model_path="QA_llama31_unsloth.Q4_K_M.gguf") |
|
|
|
def generate_response(prompt): |
|
response = model.create_chat_completion(messages=[{"role": "user", "content": prompt}]) |
|
return response['choices'][0]['message']['content'] |
|
|
|
|
|
def predict(text): |
|
|
|
result = subprocess.run( |
|
["./llama.cpp/main", "-m", "QA_llama31_unsloth.Q4_K_M.gguf", "-p", text], |
|
capture_output=True, |
|
text=True |
|
) |
|
return result.stdout |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter question here..."), |
|
outputs="Answer", |
|
) |
|
|
|
|
|
iface.launch() |
|
|