import gradio as gr

from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.callbacks import StreamingStdOutCallbackHandler


callbacks = [StreamingStdOutCallbackHandler()]
print("creating ll started")
llm = LlamaCpp(
    model_path="cerebras_Llama3-DocChat-1.0-8B_Base_adapt_basic_model_16bit.gguf",
    temperature=0.75,
    max_tokens=30,
    top_p=4,
    callback_manager=callbacks,
    verbose=True,  # Verbose is required to pass to the callback manager
)
print("creating ll ended")

template = """You are the Finiantial expert:
### Instruction:
{question}
### Input:
### Response:
"""

prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain_model = LLMChain(prompt=prompt, llm=llm)
print("creating model created")


def greet(question):
    print(f"question is {question}")

    out_gen = llm_chain_model.run(question)  
    print(f"out is {out_gen}")
    return out_gen

demo = gr.Interface(fn=greet, inputs="text", outputs="text")
demo.launch(debug=True, share=True)