import gradio as gr from langchain_community.llms import LlamaCpp from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain_core.callbacks import StreamingStdOutCallbackHandler callbacks = [StreamingStdOutCallbackHandler()] print("creating ll started") llm = LlamaCpp( model_path="cerebras_Llama3-DocChat-1.0-8B_Base_adapt_basic_model_16bit.gguf", temperature=0.75, max_tokens=30, top_p=4, callback_manager=callbacks, verbose=True, # Verbose is required to pass to the callback manager ) print("creating ll ended") template = """You are the Finiantial expert: ### Instruction: {question} ### Input: ### Response: """ prompt = PromptTemplate(template=template, input_variables=["question"]) llm_chain_model = LLMChain(prompt=prompt, llm=llm) print("creating model created") def greet(question): print(f"question is {question}") out_gen = llm_chain_model.run(question) print(f"out is {out_gen}") return out_gen demo = gr.Interface(fn=greet, inputs="text", outputs="text") demo.launch(debug=True, share=True)