chaithanyashaji commited on
Commit
30ab543
·
verified ·
1 Parent(s): f599216

Upload 18 files

Browse files
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ TOGETHER_AI="92b5eec828c7ac0c34b60bf5daf97aebc2594196c5005e6c0d9ec2826e3eee11"
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ipc_vector_db/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ ipc-data/Indian[[:space:]]Penal[[:space:]]Code[[:space:]]Book.pdf filter=lfs diff=lfs merge=lfs -text
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/aws.xml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="accountSettings">
4
+ <option name="activeRegion" value="us-east-1" />
5
+ <option name="recentlyUsedRegions">
6
+ <list>
7
+ <option value="us-east-1" />
8
+ </list>
9
+ </option>
10
+ </component>
11
+ </project>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ </profile>
6
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/lawforher.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="Python 3.11 (lawforher)" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.11 (lawforher)" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (lawforher)" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/lawforher.iml" filepath="$PROJECT_DIR$/.idea/lawforher.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
.idea/workspace.xml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="AutoImportSettings">
4
+ <option name="autoReloadType" value="SELECTIVE" />
5
+ </component>
6
+ <component name="ChangeListManager">
7
+ <list default="true" id="858a48fb-95a2-4510-8cfc-e62f98d77bef" name="Changes" comment="" />
8
+ <option name="SHOW_DIALOG" value="false" />
9
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
10
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
11
+ <option name="LAST_RESOLUTION" value="IGNORE" />
12
+ </component>
13
+ <component name="FileTemplateManagerImpl">
14
+ <option name="RECENT_TEMPLATES">
15
+ <list>
16
+ <option value="Python Script" />
17
+ </list>
18
+ </option>
19
+ </component>
20
+ <component name="FlaskConsoleOptions" custom-start-script="import sys&#10;sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS])&#10;from flask.cli import ScriptInfo&#10;locals().update(ScriptInfo(create_app=None).load_app().make_shell_context())&#10;print(&quot;Python %s on %s\nApp: %s [%s]\nInstance: %s&quot; % (sys.version, sys.platform, app.import_name, app.env, app.instance_path))">
21
+ <envs>
22
+ <env key="FLASK_APP" value="app" />
23
+ </envs>
24
+ <option name="myCustomStartScript" value="import sys&#10;sys.path.extend([WORKING_DIR_AND_PYTHON_PATHS])&#10;from flask.cli import ScriptInfo&#10;locals().update(ScriptInfo(create_app=None).load_app().make_shell_context())&#10;print(&quot;Python %s on %s\nApp: %s [%s]\nInstance: %s&quot; % (sys.version, sys.platform, app.import_name, app.env, app.instance_path))" />
25
+ <option name="myEnvs">
26
+ <map>
27
+ <entry key="FLASK_APP" value="app" />
28
+ </map>
29
+ </option>
30
+ </component>
31
+ <component name="Git.Settings">
32
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
33
+ </component>
34
+ <component name="GitHubPullRequestSearchHistory"><![CDATA[{
35
+ "lastFilter": {
36
+ "state": "OPEN",
37
+ "assignee": "chaithanyashaji"
38
+ }
39
+ }]]></component>
40
+ <component name="GithubPullRequestsUISettings"><![CDATA[{
41
+ "selectedUrlAndAccountId": {
42
+ "url": "https://github.com/chaithanyashaji/lawforher_chatbot.git",
43
+ "accountId": "a9c524c8-9c87-4292-85a5-6ee1841e6195"
44
+ }
45
+ }]]></component>
46
+ <component name="ProblemsViewState">
47
+ <option name="selectedTabId" value="CurrentFile" />
48
+ </component>
49
+ <component name="ProjectColorInfo">{
50
+ &quot;associatedIndex&quot;: 2
51
+ }</component>
52
+ <component name="ProjectId" id="2quUzOFT2DnZdoWyesJ4S7EFORH" />
53
+ <component name="ProjectViewState">
54
+ <option name="hideEmptyMiddlePackages" value="true" />
55
+ <option name="showLibraryContents" value="true" />
56
+ </component>
57
+ <component name="PropertiesComponent"><![CDATA[{
58
+ "keyToString": {
59
+ "RunOnceActivity.ShowReadmeOnStart": "true",
60
+ "git-widget-placeholder": "main",
61
+ "last_opened_file_path": "C:/Users/Chaithanya/PycharmProjects/law4her_test",
62
+ "node.js.detected.package.eslint": "true",
63
+ "node.js.detected.package.tslint": "true",
64
+ "node.js.selected.package.eslint": "(autodetect)",
65
+ "node.js.selected.package.tslint": "(autodetect)",
66
+ "nodejs_package_manager_path": "npm",
67
+ "settings.editor.selected.configurable": "project.propVCSSupport.DirectoryMappings",
68
+ "vue.rearranger.settings.migration": "true"
69
+ }
70
+ }]]></component>
71
+ <component name="RecentsManager">
72
+ <key name="CopyFile.RECENT_KEYS">
73
+ <recent name="C:\Users\Chaithanya\PycharmProjects\lawforher\ipc-data" />
74
+ </key>
75
+ <key name="MoveFile.RECENT_KEYS">
76
+ <recent name="C:\Users\Chaithanya\PycharmProjects\lawforher\models\llama-2-7b-chat" />
77
+ </key>
78
+ </component>
79
+ <component name="SharedIndexes">
80
+ <attachedChunks>
81
+ <set>
82
+ <option value="bundled-js-predefined-1d06a55b98c1-0b3e54e931b4-JavaScript-PY-241.18034.82" />
83
+ <option value="bundled-python-sdk-975db3bf15a3-2767605e8bc2-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-241.18034.82" />
84
+ </set>
85
+ </attachedChunks>
86
+ </component>
87
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
88
+ <component name="TaskManager">
89
+ <task active="true" id="Default" summary="Default task">
90
+ <changelist id="858a48fb-95a2-4510-8cfc-e62f98d77bef" name="Changes" comment="" />
91
+ <created>1735512215507</created>
92
+ <option name="number" value="Default" />
93
+ <option name="presentableId" value="Default" />
94
+ <updated>1735512215507</updated>
95
+ <workItem from="1735512216915" duration="1122000" />
96
+ <workItem from="1735517820889" duration="182000" />
97
+ <workItem from="1735529743314" duration="1093000" />
98
+ <workItem from="1735532197204" duration="1335000" />
99
+ <workItem from="1735540268170" duration="615000" />
100
+ <workItem from="1735571841326" duration="8671000" />
101
+ <workItem from="1735586361032" duration="5331000" />
102
+ <workItem from="1735988510923" duration="22397000" />
103
+ <workItem from="1736014550072" duration="4536000" />
104
+ <workItem from="1736019186160" duration="12295000" />
105
+ <workItem from="1736064097307" duration="21545000" />
106
+ </task>
107
+ <servers />
108
+ </component>
109
+ <component name="TypeScriptGeneratedFilesManager">
110
+ <option name="version" value="3" />
111
+ </component>
112
+ </project>
.streamlit/config.toml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [server]
2
+ headless = true
3
+ port = 8501
4
+ enableCORS = false
embedding.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+
6
+ loader = DirectoryLoader('ipc-data', glob="./*.pdf", loader_cls=PyPDFLoader)
7
+ documents = loader.load()
8
+
9
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
10
+ texts = text_splitter.split_documents(documents)
11
+
12
+ embeddings = HuggingFaceEmbeddings(
13
+ model_name="nomic-ai/nomic-embed-text-v1",
14
+ model_kwargs={"trust_remote_code": True, "revision": "289f532e14dbbbd5a04753fa58739e9ba766f3c7"},
15
+ )
16
+
17
+ # Creates vector embeddings and saves it in the FAISS DB
18
+ faiss_db = FAISS.from_documents(texts, embeddings)
19
+
20
+ # Saves and export the vector embeddings databse
21
+ faiss_db.save_local("ipc_vector_db")
ipc-data/Indian Penal Code Book.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5706a1b995df774c4c4ea1868223e18a13ba619977d323d3cab76a1cc095e237
3
+ size 20095787
ipc_vector_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95e6cc1b7e77537e6bff530fa443dfcf2638efcb858eccf44de03185fc52c7c
3
+ size 18284589
ipc_vector_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88316941d341e914e168e058c26609184e62baf5223b96b510ec65a1da8313cd
3
+ size 5879209
main.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import logging
3
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain_together import Together
11
+ from langchain.memory import ConversationBufferMemory
12
+ from langchain.chains import ConversationalRetrievalChain
13
+ import streamlit as st
14
+ import os
15
+
16
+ from dotenv import load_dotenv
17
+ import warnings
18
+ logging.basicConfig(level=logging.DEBUG) # Logs at DEBUG level and above
19
+ logger = logging.getLogger(__name__)
20
+
21
+ logger.debug("Starting Streamlit app...")
22
+ # Suppress PyTorch FutureWarning
23
+ warnings.filterwarnings("ignore", message="You are using `torch.load` with `weights_only=False`")
24
+ warnings.filterwarnings("ignore", message="Tried to instantiate class '__path__._path'")
25
+ warnings.filterwarnings("ignore", category=FutureWarning)
26
+ # Suppress generic DeprecationWarnings (including LangChain)
27
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
28
+
29
+
30
+
31
+ load_dotenv()
32
+ TOGETHER_AI_API = os.getenv("TOGETHER_AI")
33
+
34
+ # Streamlit Page Config
35
+ st.set_page_config(page_title="Law4her")
36
+ col1, col2, col3 = st.columns([1, 4, 1])
37
+ with col2:
38
+ st.image(
39
+ "https://res.cloudinary.com/dzzhbgbnp/image/upload/v1736073326/lawforher_logo1_yznqxr.png"
40
+ )
41
+
42
+ st.markdown(
43
+ """
44
+ <style>
45
+ div.stButton > button:first-child {
46
+ background-color: #ffffff; /* White background */
47
+ color: #000000; /* Black text */
48
+ border: 1px solid #000000; /* Optional: Add a black border */
49
+ }
50
+
51
+ div.stButton > button:active {
52
+ background-color: #e0e0e0; /* Slightly darker white for active state */
53
+ color: #000000; /* Black text remains the same */
54
+ }
55
+
56
+ div[data-testid="stStatusWidget"] div button {
57
+ display: none;
58
+ }
59
+ .reportview-container {
60
+ margin-top: -2em;
61
+ }
62
+ #MainMenu {visibility: hidden;}
63
+ .stDeployButton {display:none;}
64
+ footer {visibility: hidden;}
65
+ #stDecoration {display:none;}
66
+ button[title="View fullscreen"]{
67
+ visibility: hidden;}
68
+ </style>
69
+ """,
70
+ unsafe_allow_html=True,
71
+ )
72
+
73
+ # Reset Conversation
74
+ def reset_conversation():
75
+ st.session_state.messages = [{"role": "assistant", "content": "Hi, how can I help you?"}]
76
+ st.session_state.memory.clear()
77
+
78
+ # Initialize chat messages and memory
79
+ if "messages" not in st.session_state:
80
+ st.session_state.messages = [{"role": "assistant", "content": "Hi, how can I help you?"}]
81
+
82
+ if "memory" not in st.session_state:
83
+ st.session_state.memory = ConversationBufferMemory(
84
+ memory_key="chat_history",
85
+ return_messages=True
86
+ )
87
+
88
+ # Load embeddings and vectorstore
89
+ embeddings = HuggingFaceEmbeddings(
90
+ model_name="nomic-ai/nomic-embed-text-v1",
91
+ model_kwargs={"trust_remote_code": True, "revision": "289f532e14dbbbd5a04753fa58739e9ba766f3c7"},
92
+ )
93
+
94
+ # Enable dangerous deserialization (safe only if the file is trusted and created by you)
95
+ db = FAISS.load_local("ipc_vector_db", embeddings, allow_dangerous_deserialization=True)
96
+ db_retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2, "max_length": 512})
97
+
98
+ prompt_template = """<s>[INST]As a legal chatbot specializing in the Indian Penal Code, provide a concise and accurate answer based on the given context. Avoid unnecessary details or unrelated content. Only respond if the answer can be derived from the provided context; otherwise, say "The information is not available in the provided context."
99
+ CONTEXT: {context}
100
+ CHAT HISTORY: {chat_history}
101
+ QUESTION: {question}
102
+ ANSWER:
103
+ </s>[INST]
104
+ """
105
+
106
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question", "chat_history"])
107
+
108
+ # Initialize the Together API
109
+ llm = Together(
110
+ model="mistralai/Mistral-7B-Instruct-v0.2",
111
+ temperature=0.5,
112
+ max_tokens=1024,
113
+ together_api_key=TOGETHER_AI_API,
114
+ )
115
+
116
+ qa = ConversationalRetrievalChain.from_llm(
117
+ llm=llm,
118
+ memory=st.session_state.memory,
119
+ retriever=db_retriever,
120
+ combine_docs_chain_kwargs={"prompt": prompt},
121
+ )
122
+
123
+ # Display chat history
124
+ for message in st.session_state.messages:
125
+ with st.chat_message(message.get("role")):
126
+ st.write(message.get("content"))
127
+
128
+ # User input
129
+ input_prompt = st.chat_input("Ask a legal question about the Indian Penal Code")
130
+
131
+ if input_prompt:
132
+ with st.chat_message("user"):
133
+ st.write(input_prompt)
134
+
135
+ st.session_state.messages.append({"role": "user", "content": input_prompt})
136
+
137
+ with st.chat_message("assistant"):
138
+ with st.status("Thinking 💡...", expanded=True):
139
+ try:
140
+ # Pass the user question
141
+ result = qa.invoke(input=input_prompt)
142
+ full_response = result.get("answer", "")
143
+
144
+ # Ensure the answer is a string
145
+ if isinstance(full_response, list):
146
+ full_response = " ".join(full_response)
147
+ elif not isinstance(full_response, str):
148
+ full_response = str(full_response)
149
+
150
+ # Display the response
151
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
152
+ st.write(full_response)
153
+
154
+ except Exception as e:
155
+ st.error(f"Error occurred: {e}")
156
+
157
+ # Add reset button
158
+ st.button("Reset All Chat 🗑", on_click=reset_conversation)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ langchain==0.3.14
3
+ langchain_community==0.3.14
4
+ langchain_together==0.2.0
5
+ python-dotenv==1.0.1
6
+ streamlit==1.41.1
7
+ transformers==4.47.1
8
+ uvicorn==0.34.0
9
+ sentence-transformers==3.3.1
10
+ einops==0.8.0
11
+ faiss-cpu==1.9.0.post1
12
+ langchain_huggingface==0.1.2
13
+ torch==2.5.1
secrets.toml ADDED
File without changes