Spaces:

lukestanley
/

ChillTranslator

Sleeping

Luke Stanley

Avoid unneeded imports, make serverless output more sensible, removing some debugging and comments

469f650 9 months ago

1.6 kB

	from os import environ as env
	from os import system as run
	from subprocess import check_output

	import gradio as gr


	def inference_binary_check():
	# Without a GPU, we need to re-install llama-cpp-python to avoid an error.
	# We use a shell command to detect if we have an NVIDIA GPU available:
	use_gpu = True
	try:
	command = "nvidia-debugdump --list\|grep Device"
	output = str(check_output(command, shell=True).decode())
	if "NVIDIA" in output and "ID" in output:
	print("NVIDIA GPU detected.")
	except Exception as e:
	print("No NVIDIA GPU detected, using CPU. GPU check result:", e)
	use_gpu = False

	if use_gpu:
	print("GPU detected, existing GPU focused llama-cpp-python should work.")
	else:
	print("Avoiding error by re-installing non-GPU llama-cpp-python build because no GPU was detected.")
	run('pip uninstall llama-cpp-python -y')
	run('pip install git+https://github.com/lukestanley/llama-cpp-python.git@expose_json_grammar_convert_function --upgrade --no-cache-dir --force-reinstall')
	print("llama-cpp-python re-installed, will now attempt to load.")


	LLM_WORKER = env.get("LLM_WORKER", "runpod")

	if LLM_WORKER == "http" or LLM_WORKER == "in_memory":
	inference_binary_check()

	# Now chill can import llama-cpp-python without an error:
	from chill import improvement_loop


	def chill_out(text):
	print("Got this input:", text)
	return str(improvement_loop(text))

	demo = gr.Interface(fn=chill_out, inputs="text", outputs="text")
	demo.launch(max_threads=1, share=True)