import gradio as gr import subprocess # Define the inference function using llama.cpp def predict(text): # Call llama.cpp with the input text result = subprocess.run( ["./llama.cpp/main", "-m", "GSridhar1982/QA_Llama31_Quantized_GGUF", "-p", text], capture_output=True, text=True ) return result.stdout # Create a Gradio interface iface = gr.Interface( fn=predict, inputs=gr.Textbox(lines=2, placeholder="Enter question here..."), outputs="Answer", title="LLaMA Model Inference", description="Enter text to generate using the LLaMA model." ) # Launch the interface iface.launch()