import streamlit as st from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate from llama_index.llms.huggingface import HuggingFaceInferenceAPI from dotenv import load_dotenv from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import Settings import os import base64 # Load environment variables load_dotenv() Settings.llm = HuggingFaceInferenceAPI( model_name="google/gemma-1.1-7b-it", tokenizer_name="google/gemma-1.1-7b-it", context_window=3000, token=os.getenv("HF_TOKEN"), max_new_tokens=512, generate_kwargs={"temperature": 0.1}, ) Settings.embed_model = HuggingFaceEmbedding( model_name="BAAI/bge-small-en-v1.5" ) # Define the directory for persistent storage and data PERSIST_DIR = "db" DATA_DIR = "data" os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(PERSIST_DIR, exist_ok=True) def displayPDF(file): with open(file, "rb") as f: base64_pdf = base64.b64encode(f.read()).decode('utf-8') pdf_display = f'' st.markdown(pdf_display, unsafe_allow_html=True) def data_ingestion(): documents = SimpleDirectoryReader(DATA_DIR).load_data() storage_context = StorageContext.from_defaults() index = VectorStoreIndex.from_documents(documents) index.storage_context.persist(persist_dir=PERSIST_DIR) def handle_query(query): storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) index = load_index_from_storage(storage_context) chat_text_qa_msgs = [ ( "user", """You are a Q&A assistant named FAB, created by Rohit. You have a specific response programmed for when users specifically ask about your creator, Rohit. The response is: "I was created by Rohit as a prototype for solving the FIR queries for BPR&D.I am just a prototype focused on solving complex problems through best practices. " For all other inquiries, your main goal is to help police officers they are the primary users by providing answers related to COI(contitution of india) and FIR as accurately as possible. Here's a refined guide prompt to assist you in this role. Context: {context_str} Question: {query_str} """ ) ] text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs) query_engine = index.as_query_engine(text_qa_template=text_qa_template) answer = query_engine.query(query) if hasattr(answer, 'response'): return answer.response elif isinstance(answer, dict) and 'response' in answer: return answer['response'] else: return "Sorry, I couldn't find an answer." # Streamlit app initialization st.markdown("### FAB FIR assistant Bot") st.markdown("The model is still under finetuning and updates\nThe model is designed to give generalized answers, not specific facts.") st.markdown("Start chat ...") if 'messages' not in st.session_state: st.session_state.messages = [{'role': 'assistant', "content": "Hello! Upload a PDF related to your case and ask me anything about its content."}] with st.sidebar: st.title("Menu:") uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button") if st.button("Submit & Process"): with st.spinner("Processing..."): filepath = "data/saved_pdf.pdf" with open(filepath, "wb") as f: f.write(uploaded_file.getbuffer()) # displayPDF(filepath) data_ingestion() # Process PDF every time new file is uploaded st.success("Done") user_prompt = st.chat_input("Ask me anything about the content of the PDF:") if user_prompt: st.session_state.messages.append({'role': 'user', "content": user_prompt}) response = handle_query(user_prompt) st.session_state.messages.append({'role': 'assistant', "content": response}) for message in st.session_state.messages: with st.chat_message(message['role']): st.write(message['content'])