import gradio as gr import huggingface_hub as hf_hub import os hf_client = hf_hub.InferenceClient(token = os.environ['HF_TOKEN']) client.headers["x-use-cache"] = "0" def interface(question): response = hf_client.text_generation( prompt = question, model = 'google/flan-t5-xxl', max_new_tokens = 1024, stream = False ) return response app = gr.Interface( fn = interface, inputs = gr.Textbox(label = 'Input Question'), outputs = gr.Textbox(label = 'Generated Response') )