File size: 685 Bytes
11990f4
 
 
 
 
 
 
64be64c
11990f4
 
 
 
 
 
 
 
64be64c
 
11990f4
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr  
import subprocess  

# Define the inference function using llama.cpp  
def predict(text):  
    # Call llama.cpp with the input text  
    result = subprocess.run(  
        ["./llama.cpp/main", "-m", "GSridhar1982/QA_Llama31_Quantized_GGUF", "-p", text],  
        capture_output=True,  
        text=True  
    )  
    return result.stdout  

# Create a Gradio interface  
iface = gr.Interface(  
    fn=predict,  
    inputs=gr.Textbox(lines=2, placeholder="Enter question here..."),  
    outputs="Answer",  
    title="LLaMA Model Inference",  
    description="Enter text to generate using the LLaMA model."  
)  

# Launch the interface  
iface.launch()