import spaces
import gradio as gr
from langchain_huggingface.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

# Initialize the model pipeline
llm = HuggingFacePipeline.from_model_id(
    model_id="ibm-granite/granite-3.2-2b-instruct",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 512,
    },
)

prompt = PromptTemplate(
    input_variables=["question"],
    template="Answer the following question: {question}",
)

# Create a chain with the model pipeline and prompt template
chain = prompt | llm

@spaces.GPU
def ask_question(question: str) -> str:
    # Invoke your chain and return the generated answer.
    response = chain.invoke({"question": question})
    return response

# Create a Gradio interface
iface = gr.Interface(
    fn=ask_question,
    inputs="text",
    outputs="text",
    title="Question Answering Bot",
    description="Ask any question and get an answer from the model."
)

# Launch the Gradio interface
if __name__ == "__main__":
    iface.launch()