import streamlit as st from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate from llama_index.llms.huggingface import HuggingFaceInferenceAPI from dotenv import load_dotenv from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import Settings import os import base64 import os import shutil # Load environment variables load_dotenv() # Configure the Llama index settings with TinyLlama Settings.llm = HuggingFaceInferenceAPI( model_name="jzhang38/tinyllama-1.1b", tokenizer_name="jzhang38/tinyllama-1.1b", context_window=2048, # Adjusted for TinyLlama's capabilities token=os.getenv("HF_TOKEN"), max_new_tokens=512, generate_kwargs={"temperature": 0.1}, ) Settings.embed_model = HuggingFaceEmbedding( model_name="BAAI/bge-small-en-v1.5" ) # Define the directory for persistent storage and data PERSIST_DIR = "./db" DATA_DIR = "data" # Ensure data directory exists try: if os.path.exists(DATA_DIR): shutil.rmtree(DATA_DIR) # Remove the directory if it exists os.makedirs(DATA_DIR) # Create the directory except Exception as e: print(f"Error creating {DATA_DIR}: {e}") try: if os.path.exists(PERSIST_DIR): shutil.rmtree(PERSIST_DIR) # Remove the directory if it exists os.makedirs(PERSIST_DIR) # Create the directory except Exception as e: print(f"Error creating {PERSIST_DIR}: {e}") def displayPDF(file): with open(file, "rb") as f: base64_pdf = base64.b64encode(f.read()).decode('utf-8') pdf_display = f'' st.markdown(pdf_display, unsafe_allow_html=True) def data_ingestion(): documents = SimpleDirectoryReader(DATA_DIR).load_data() storage_context = StorageContext.from_defaults() index = VectorStoreIndex.from_documents(documents) index.storage_context.persist(persist_dir=PERSIST_DIR) def handle_query(query): storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) index = load_index_from_storage(storage_context) chat_text_qa_msgs = [ ( "user", """You are a Q&A assistant named ĀpaḥSmṛtiḥ, created by Rohit. You have a specific response programmed for when users specifically ask about your creator, Rohit. The response is: "I was created by Rohit as a prototype for solving the water crisis in India. He is an AI enthusiast focused on solving complex problems through innovative solutions. He specializes in machine learning, deep learning, and NLP, striving to push the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers related to water conservation and management in India as accurately as possible. Here’s a refined guide prompt to assist you in this role. Context: {context_str} Question: {query_str} """ ) ] text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs) query_engine = index.as_query_engine(text_qa_template=text_qa_template) answer = query_engine.query(query) if hasattr(answer, 'response'): return answer.response elif isinstance(answer, dict) and 'response' in answer: return answer['response'] else: return "Sorry, I couldn't find an answer." # Streamlit app initialization st.markdown("ĀpaḥSmṛtiḥ Flowing Memories of Water Conservation. ") st.markdown("The model is still under finetuning and updates\nThe model is designed to give genralized answer not specific facts....") st.markdown("start chat ...") if 'messages' not in st.session_state: st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}] with st.sidebar: st.title("Menu:") uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button") if st.button("Submit & Process"): with st.spinner("Processing..."): filepath = "data/saved_pdf.pdf" with open(filepath, "wb") as f: f.write(uploaded_file.getbuffer()) # displayPDF(filepath) # Display the uploaded PDF data_ingestion() # Process PDF every time new file is uploaded st.success("Done") user_prompt = st.chat_input("Ask me anything about the content of the PDF:") if user_prompt: st.session_state.messages.append({'role': 'user', "content": user_prompt}) response = handle_query(user_prompt) st.session_state.messages.append({'role': 'assistant', "content": response}) for message in st.session_state.messages: with st.chat_message(message['role']): st.write(message['content'])