File size: 1,111 Bytes
11990f4
 
 
f51df2c
 
 
c9dd695
f51df2c
 
 
 
 
11990f4
 
 
 
c9dd695
11990f4
 
 
 
 
 
afe5cf1
 
 
 
 
11990f4
e682ac6
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr  
import subprocess  

from llama_cpp import Llama

# Initialize the model
model = Llama(model_path="QA_llama31_unsloth.Q4_K_M.gguf")

def generate_response(prompt):
    response = model.create_chat_completion(messages=[{"role": "user", "content": prompt}])
    return response['choices'][0]['message']['content']

# Define the inference function using llama.cpp  
def predict(text):  
    # Call llama.cpp with the input text  
    result = subprocess.run(  
        ["./llama.cpp/main", "-m", "QA_llama31_unsloth.Q4_K_M.gguf", "-p", text],  
        capture_output=True,  
        text=True  
    )  
    return result.stdout  

# Create a Gradio interface  
#iface = gr.Interface(  
#    fn=generate_response,  
#    inputs=gr.Textbox(lines=2, placeholder="Enter question here..."),  
#    outputs="Answer",  
#)  

iface = gr.Interface(  
    fn=generate_response,  
    inputs="textbox",  
    outputs="text",  
    title="AIML Q&A Chatbot",  
    description="Ask questions related to AIML and get answers from the fine-tuned Llama model."  
)  
  
# Launch the app  
iface.launch()