import gradio as gr 
import huggingface_hub as hf_hub 
import os 

hf_client = hf_hub.InferenceClient(token = os.environ['HF_TOKEN']) 
client.headers["x-use-cache"] = "0"


def interface(question):
    response = hf_client.text_generation(
        prompt = question,
        model = 'google/flan-t5-xxl',
        max_new_tokens = 1024,
        stream = False
    )

    return response

app = gr.Interface(
    fn = interface,
    inputs = gr.Textbox(label = 'Input Question'),
    outputs = gr.Textbox(label = 'Generated Response')
)