Spaces:

GSridhar1982
/

QA_Llama31_Quantized_GGUF

Sleeping

Updated the model

64be64c verified 2 months ago

685 Bytes

	import gradio as gr
	import subprocess

	# Define the inference function using llama.cpp
	def predict(text):
	# Call llama.cpp with the input text
	result = subprocess.run(
	["./llama.cpp/main", "-m", "GSridhar1982/QA_Llama31_Quantized_GGUF", "-p", text],
	capture_output=True,
	text=True
	)
	return result.stdout

	# Create a Gradio interface
	iface = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(lines=2, placeholder="Enter question here..."),
	outputs="Answer",
	title="LLaMA Model Inference",
	description="Enter text to generate using the LLaMA model."
	)

	# Launch the interface
	iface.launch()