sabazo commited on
Commit
e1f572c
1 Parent(s): 98a0397

cleaned unneeded folder

Browse files
example/Langgraph_CorrectiveRAG_mistral_chroma.ipynb DELETED
@@ -1,580 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "authorship_tag": "ABX9TyP8lUVuJ31ic7qIWsz2xSyw",
8
- "include_colab_link": true
9
- },
10
- "kernelspec": {
11
- "name": "python3",
12
- "display_name": "Python 3"
13
- },
14
- "language_info": {
15
- "name": "python"
16
- }
17
- },
18
- "cells": [
19
- {
20
- "cell_type": "markdown",
21
- "metadata": {
22
- "id": "view-in-github",
23
- "colab_type": "text"
24
- },
25
- "source": [
26
- "<a href=\"https://colab.research.google.com/github/almutareb/InnovationPathfinderAI/blob/main/example/Langgraph_CorrectiveRAG_mistral_chroma.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
- ]
28
- },
29
- {
30
- "cell_type": "code",
31
- "execution_count": 1,
32
- "metadata": {
33
- "id": "jLMHfRq9kAP9"
34
- },
35
- "outputs": [],
36
- "source": [
37
- "!pip install -Uq langchain-community\n",
38
- "!pip install -Uq langchain\n",
39
- "!pip install -Uq langgraph\n",
40
- "!pip install -Uq chromadb\n",
41
- "!pip install -Uq sentence-transformers\n",
42
- "!pip install -Uq gpt4all\n",
43
- "!pip install -qU google-search-results"
44
- ]
45
- },
46
- {
47
- "cell_type": "code",
48
- "source": [
49
- "import os\n",
50
- "from google.colab import userdata\n",
51
- "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = userdata.get('HUGGINGFACEHUB_API_TOKEN')\n",
52
- "os.environ[\"GOOGLE_CSE_ID\"] = userdata.get('GOOGLE_CSE_ID')\n",
53
- "os.environ[\"GOOGLE_API_KEY\"] = userdata.get('GOOGLE_API_KEY')"
54
- ],
55
- "metadata": {
56
- "id": "kPF-3dzGuAfT"
57
- },
58
- "execution_count": 2,
59
- "outputs": []
60
- },
61
- {
62
- "cell_type": "markdown",
63
- "source": [
64
- "### LLMs"
65
- ],
66
- "metadata": {
67
- "id": "XTtbWrue9l3E"
68
- }
69
- },
70
- {
71
- "cell_type": "code",
72
- "source": [
73
- "# HF libraries\n",
74
- "from langchain_community.llms import HuggingFaceEndpoint\n",
75
- "\n",
76
- "# Load the model from the Hugging Face Hub\n",
77
- "llm_mid = HuggingFaceEndpoint(repo_id=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
78
- " temperature=0.1,\n",
79
- " max_new_tokens=1024,\n",
80
- " repetition_penalty=1.2,\n",
81
- " return_full_text=False\n",
82
- " )\n",
83
- "\n",
84
- "llm_small = HuggingFaceEndpoint(repo_id=\"mistralai/Mistral-7B-Instruct-v0.2\",\n",
85
- " temperature=0.1,\n",
86
- " max_new_tokens=1024,\n",
87
- " repetition_penalty=1.2,\n",
88
- " return_full_text=False\n",
89
- " )"
90
- ],
91
- "metadata": {
92
- "id": "EDZyRq-wuIuy"
93
- },
94
- "execution_count": null,
95
- "outputs": []
96
- },
97
- {
98
- "cell_type": "markdown",
99
- "source": [
100
- "### Chroma DB"
101
- ],
102
- "metadata": {
103
- "id": "mdMx_T8V9npk"
104
- }
105
- },
106
- {
107
- "cell_type": "code",
108
- "source": [
109
- "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
110
- "from langchain_community.document_loaders import WebBaseLoader\n",
111
- "from langchain_community.vectorstores import Chroma\n",
112
- "from langchain_community.embeddings import GPT4AllEmbeddings\n",
113
- "from langchain.embeddings import HuggingFaceEmbeddings\n",
114
- "\n",
115
- "# Load\n",
116
- "url = \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\n",
117
- "loader = WebBaseLoader(url)\n",
118
- "docs = loader.load()\n",
119
- "\n",
120
- "# Split\n",
121
- "text_splitter = RecursiveCharacterTextSplitter(\n",
122
- " chunk_size=500, chunk_overlap=100\n",
123
- ")\n",
124
- "all_splits = text_splitter.split_documents(docs)\n",
125
- "\n",
126
- "# Embed and index\n",
127
- "#embedding = GPT4AllEmbeddings()\n",
128
- "embedding = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n",
129
- "\n",
130
- "\n",
131
- "# Index\n",
132
- "vectorstore = Chroma.from_documents(\n",
133
- " documents=all_splits,\n",
134
- " collection_name=\"rag-chroma\",\n",
135
- " embedding=embedding,\n",
136
- ")\n",
137
- "retriever = vectorstore.as_retriever()"
138
- ],
139
- "metadata": {
140
- "id": "LkX9ehoeupSz"
141
- },
142
- "execution_count": null,
143
- "outputs": []
144
- },
145
- {
146
- "cell_type": "markdown",
147
- "source": [
148
- "###State"
149
- ],
150
- "metadata": {
151
- "id": "0A-7_d3G9b8h"
152
- }
153
- },
154
- {
155
- "cell_type": "code",
156
- "source": [
157
- "from typing import Annotated, Dict, TypedDict\n",
158
- "from langchain_core.messages import BaseMessage\n",
159
- "\n",
160
- "class GraphState(TypedDict):\n",
161
- " \"\"\"\n",
162
- " Represents the state of our graph.\n",
163
- "\n",
164
- " Attributes:\n",
165
- " key: A dictionary where each key is a string.\n",
166
- " \"\"\"\n",
167
- "\n",
168
- " keys: Dict[str, any]"
169
- ],
170
- "metadata": {
171
- "id": "fRzYhmOs7_GJ"
172
- },
173
- "execution_count": 5,
174
- "outputs": []
175
- },
176
- {
177
- "cell_type": "markdown",
178
- "source": [
179
- "### Nodes"
180
- ],
181
- "metadata": {
182
- "id": "bPhIdcVD9pgV"
183
- }
184
- },
185
- {
186
- "cell_type": "code",
187
- "source": [
188
- "import json\n",
189
- "import operator\n",
190
- "from typing import Annotated, Sequence, TypedDict\n",
191
- "\n",
192
- "from langchain_core.output_parsers import JsonOutputParser\n",
193
- "from langchain.prompts import PromptTemplate\n",
194
- "from langchain.schema import Document\n",
195
- "from langchain.tools import Tool\n",
196
- "from langchain_community.utilities import GoogleSearchAPIWrapper\n",
197
- "from langchain_community.vectorstores import Chroma\n",
198
- "from langchain_core.output_parsers import StrOutputParser\n",
199
- "from langchain_core.runnables import RunnablePassthrough\n",
200
- "\n",
201
- "### Nodes ###\n",
202
- "\n",
203
- "def retrieve(state):\n",
204
- " \"\"\"\n",
205
- " Retrieve documents\n",
206
- "\n",
207
- " Args:\n",
208
- " state (dict): The current graph state\n",
209
- "\n",
210
- " Returns:\n",
211
- " state (dict): New key added to state, documents, that contains retrieved documents\n",
212
- " \"\"\"\n",
213
- " print(\"---RETRIEVE---\")\n",
214
- " state_dict = state[\"keys\"]\n",
215
- " question = state_dict[\"question\"]\n",
216
- " local = state_dict[\"local\"]\n",
217
- " documents = retriever.get_relevant_documents(question)\n",
218
- "\n",
219
- " return {\"keys\": {\"documents\": documents, \"local\": local, \"question\": question}}\n",
220
- "\n",
221
- "def generate(state):\n",
222
- " \"\"\"\n",
223
- " Generate answer\n",
224
- "\n",
225
- " Args:\n",
226
- " state (dict): The current graph state\n",
227
- "\n",
228
- " Returns:\n",
229
- " state (dict): New key added to state, generation, that contains generation\n",
230
- " \"\"\"\n",
231
- " print(\"---GENERATE---\")\n",
232
- " state_dict = state[\"keys\"]\n",
233
- " question = state_dict[\"question\"]\n",
234
- " documents = state_dict[\"documents\"]\n",
235
- " local = state_dict[\"local\"]\n",
236
- "\n",
237
- " # Prompt\n",
238
- " prompt = PromptTemplate(\n",
239
- " template=\"\"\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. \\n\n",
240
- " If you don't know the answer, just say that you don't know. Keep the answer concise. \\n\n",
241
- " Question: {question} \\n\n",
242
- " Context: {context} \\n\n",
243
- " \"\"\",\n",
244
- " input_variables=[\"question\",\"context\"],\n",
245
- " )\n",
246
- "\n",
247
- " # LLM\n",
248
- " llm = llm_mid\n",
249
- "\n",
250
- " # Post-processing\n",
251
- " def format_docs(docs):\n",
252
- " return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
253
- "\n",
254
- " # Chain\n",
255
- " rag_chain = prompt | llm | StrOutputParser()\n",
256
- "\n",
257
- "\n",
258
- " # Run\n",
259
- " generation = rag_chain.invoke({\"context\": documents, \"question\": question})\n",
260
- "\n",
261
- " return {\n",
262
- " \"keys\": {\"documents\": documents, \"question\": question, \"generation\": generation}\n",
263
- " }\n",
264
- "\n",
265
- "def grade_documents(state):\n",
266
- " \"\"\"\n",
267
- " Determines whether the retrieved documents are relevant to the question.\n",
268
- "\n",
269
- " Args:\n",
270
- " state (dict): The current graph state\n",
271
- "\n",
272
- " Returns:\n",
273
- " state (dict): Update documents key with relevant documents\n",
274
- " \"\"\"\n",
275
- "\n",
276
- " print(\"---CHECK RELEVANCE---\")\n",
277
- " state_dict = state[\"keys\"]\n",
278
- " question = state_dict[\"question\"]\n",
279
- " documents = state_dict[\"documents\"]\n",
280
- " local = state_dict[\"local\"]\n",
281
- "\n",
282
- " # LLM\n",
283
- " llm = llm_small\n",
284
- "\n",
285
- " prompt = PromptTemplate(\n",
286
- " template=\"\"\"You are a grader assessing relevance of a retrieved document to a user question. \\n\n",
287
- " Here is the retrieved document: \\n\\n {context} \\n\\n\n",
288
- " Here is the user question: {question} \\n\n",
289
- " If the document contains keywords related to the user question, grade it as relevant. \\n\n",
290
- " It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \\n\n",
291
- " Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \\n\n",
292
- " Provide the binary score as a JSON with a single key 'score' and no premable or explaination.\n",
293
- " \"\"\",\n",
294
- " input_variables=[\"question\",\"context\"],\n",
295
- " )\n",
296
- "\n",
297
- " chain = prompt | llm | JsonOutputParser()\n",
298
- "\n",
299
- " # Score\n",
300
- " filtered_docs = []\n",
301
- " search = \"No\" #Default to do not opt for web search to supplement retrieval\n",
302
- " for d in documents:\n",
303
- " score = chain.invoke(\n",
304
- " {\n",
305
- " \"question\": question,\n",
306
- " \"context\": d.page_content,\n",
307
- " }\n",
308
- " )\n",
309
- " grade = score[\"score\"]\n",
310
- " if grade == \"yes\":\n",
311
- " print(\"---GRADE: DOCUMENT RELEVANT---\")\n",
312
- " filtered_docs.append(d)\n",
313
- " else:\n",
314
- " print(\"---GRADE: DOCUMENT IRRELEVANT---\")\n",
315
- " search = \"Yes\" #Perform web search\n",
316
- " continue\n",
317
- "\n",
318
- " return {\n",
319
- " \"keys\": {\n",
320
- " \"documents\": filtered_docs,\n",
321
- " \"question\": question,\n",
322
- " \"local\": local,\n",
323
- " \"run_web_search\": search,\n",
324
- " }\n",
325
- " }\n",
326
- "\n",
327
- "def transform_query(state):\n",
328
- " \"\"\"\n",
329
- " Transform the query to produce a better question.\n",
330
- "\n",
331
- " Args:\n",
332
- " state (dict): The current graph state\n",
333
- "\n",
334
- " Returns:\n",
335
- " state (dict): Updates question key with a re-phrased question\n",
336
- " \"\"\"\n",
337
- " print(\"---TRANSFORM QUERY---\")\n",
338
- " state_dict = state[\"keys\"]\n",
339
- " question = state_dict[\"question\"]\n",
340
- " documents = state_dict[\"documents\"]\n",
341
- " local = state_dict[\"local\"]\n",
342
- "\n",
343
- " # Create a prompt template with format instructions and the query\n",
344
- " prompt = PromptTemplate(\n",
345
- " template=\"\"\"You are generating questions that are well optimized for retrieval. \\n\n",
346
- " Look at the input and try to reasin about the underlying sematic intent / meaning . \\n\n",
347
- " Here is the initial question:\n",
348
- " \\n -------- \\n\n",
349
- " {question}\n",
350
- " \\n -------- \\n\n",
351
- " Provide an improved question without any premable, only respond with the updated question: \"\"\",\n",
352
- " input_variables=[\"question\"],\n",
353
- " )\n",
354
- "\n",
355
- " # Grader\n",
356
- " # LLM\n",
357
- " llm = llm_mid\n",
358
- "\n",
359
- " # Prompt\n",
360
- " chain = prompt | llm | StrOutputParser()\n",
361
- " better_question = chain.invoke({\"question\": question})\n",
362
- "\n",
363
- " return {\n",
364
- " \"keys\": {\"documents\": documents, \"question\": better_question, \"local\": local}\n",
365
- " }\n",
366
- "\n",
367
- "\n",
368
- "def web_search(state):\n",
369
- " \"\"\"\n",
370
- " Web search based on the re-phrased question using google\n",
371
- "\n",
372
- " Args:\n",
373
- " state (dict): The current graph state\n",
374
- " Returns:\n",
375
- " state (dict): Web results appended to documents.\n",
376
- " \"\"\"\n",
377
- "\n",
378
- " print(\"---WEB SEARCH---\")\n",
379
- " state_dict = state[\"keys\"]\n",
380
- " question = state_dict[\"question\"]\n",
381
- " documents = state_dict[\"documents\"]\n",
382
- " local = state_dict[\"local\"]\n",
383
- "\n",
384
- " websearch = GoogleSearchAPIWrapper(k=3)\n",
385
- " google_search = Tool(\n",
386
- " name=\"google_search\",\n",
387
- " description=\"Search Google for recent results.\",\n",
388
- " func=websearch.run,\n",
389
- " )\n",
390
- " web_search = google_search.run(question)\n",
391
- " #filtered_contents = [d[\"page_content\"] for d in web_search if d[\"page_content\"] is not None]\n",
392
- " #web_results = \"\\n\".join(filtered_contents)\n",
393
- " web_results = Document(page_content=web_search)\n",
394
- " documents.append(web_results)\n",
395
- "\n",
396
- " return {\"keys\": {\"documents\": documents, \"local\": local, \"question\": question}}"
397
- ],
398
- "metadata": {
399
- "id": "1Sn5NCyl9pRE"
400
- },
401
- "execution_count": 6,
402
- "outputs": []
403
- },
404
- {
405
- "cell_type": "markdown",
406
- "source": [
407
- "### Edges"
408
- ],
409
- "metadata": {
410
- "id": "7n6TeQcrugvF"
411
- }
412
- },
413
- {
414
- "cell_type": "code",
415
- "source": [
416
- "def decide_to_generate(state):\n",
417
- " \"\"\"\n",
418
- " Determines whether to generate an answer or re-generate a question for web search.\n",
419
- "\n",
420
- " Args:\n",
421
- " state (dict): The current state of the agent, including all keys.\n",
422
- "\n",
423
- " Returns:\n",
424
- " str: Next node to call\n",
425
- " \"\"\"\n",
426
- "\n",
427
- " print(\"---DECIDE TO GENERATE---\")\n",
428
- " state_dict = state[\"keys\"]\n",
429
- " question = state_dict[\"question\"]\n",
430
- " filtered_documents = state_dict[\"documents\"]\n",
431
- " search = state_dict[\"run_web_search\"]\n",
432
- "\n",
433
- " if search == \"Yes\":\n",
434
- " # All documents have been filtered check_relevance\n",
435
- " # We will re-generate a new query\n",
436
- " print(\"---DECISION: TRANSFORM QUERY and RUN WEB SEARCH---\")\n",
437
- " return \"transform_query\"\n",
438
- " else:\n",
439
- " # We have relevant documents, so generate answer\n",
440
- " print(\"---DECISION: GENERATE---\")\n",
441
- " return \"generate\""
442
- ],
443
- "metadata": {
444
- "id": "l9djuUIx-_ZK"
445
- },
446
- "execution_count": 7,
447
- "outputs": []
448
- },
449
- {
450
- "cell_type": "markdown",
451
- "source": [
452
- "### Graph"
453
- ],
454
- "metadata": {
455
- "id": "Z6g94SltdUEc"
456
- }
457
- },
458
- {
459
- "cell_type": "code",
460
- "source": [
461
- "import pprint\n",
462
- "from langgraph.graph import END, StateGraph\n",
463
- "\n",
464
- "workflow = StateGraph(GraphState)\n",
465
- "\n",
466
- "# Define the nodes\n",
467
- "workflow.add_node(\"retrieve\", retrieve) #retrieve\n",
468
- "workflow.add_node(\"grade_documents\", grade_documents) # grade documents\n",
469
- "workflow.add_node(\"generate\", generate)\n",
470
- "workflow.add_node(\"transform_query\", transform_query)\n",
471
- "workflow.add_node(\"web_search\", web_search)\n",
472
- "\n",
473
- "# Build graph\n",
474
- "workflow.set_entry_point(\"retrieve\")\n",
475
- "workflow.add_edge(\"retrieve\", \"grade_documents\")\n",
476
- "workflow.add_conditional_edges(\n",
477
- " \"grade_documents\",\n",
478
- " decide_to_generate,\n",
479
- " {\n",
480
- " \"transform_query\": \"transform_query\",\n",
481
- " \"generate\": \"generate\",\n",
482
- " },\n",
483
- ")\n",
484
- "workflow.add_edge(\"transform_query\", \"web_search\")\n",
485
- "workflow.add_edge(\"web_search\", \"generate\")\n",
486
- "workflow.add_edge(\"generate\", END)\n",
487
- "\n",
488
- "# Compile\n",
489
- "app = workflow.compile()"
490
- ],
491
- "metadata": {
492
- "id": "5pyAWscidTUt"
493
- },
494
- "execution_count": 8,
495
- "outputs": []
496
- },
497
- {
498
- "cell_type": "markdown",
499
- "source": [
500
- "### RUN"
501
- ],
502
- "metadata": {
503
- "id": "Yb4oGR4Dfoud"
504
- }
505
- },
506
- {
507
- "cell_type": "code",
508
- "source": [
509
- "# Run\n",
510
- "inputs = {\n",
511
- " \"keys\": {\n",
512
- " \"question\": \"Explain how the different types of agent memory work?\",\n",
513
- " \"local\": \"No\",\n",
514
- " }\n",
515
- "}\n",
516
- "for output in app.stream(inputs):\n",
517
- " for key, value in output.items():\n",
518
- " # Node\n",
519
- " pprint.pprint(f\"Node '{key}':\")\n",
520
- " # Optional: print full state at each node\n",
521
- " # pprint.pprint(value[\"keys\"], indent=2, width=80, depth=None)\n",
522
- " pprint.pprint(\"\\n---\\n\")\n",
523
- "\n",
524
- "# Final generation\n",
525
- "pprint.pprint(value['keys']['generation'])"
526
- ],
527
- "metadata": {
528
- "colab": {
529
- "base_uri": "https://localhost:8080/"
530
- },
531
- "id": "AR4jotJqrLY1",
532
- "outputId": "a620caec-13ec-454d-c4f7-f034633b2f1d"
533
- },
534
- "execution_count": 9,
535
- "outputs": [
536
- {
537
- "output_type": "stream",
538
- "name": "stdout",
539
- "text": [
540
- "---RETRIEVE---\n",
541
- "\"Node 'retrieve':\"\n",
542
- "'\\n---\\n'\n",
543
- "---CHECK RELEVANCE---\n",
544
- "---GRADE: DOCUMENT IRRELEVANT---\n",
545
- "---GRADE: DOCUMENT RELEVANT---\n",
546
- "---GRADE: DOCUMENT RELEVANT---\n",
547
- "---GRADE: DOCUMENT IRRELEVANT---\n",
548
- "\"Node 'grade_documents':\"\n",
549
- "'\\n---\\n'\n",
550
- "---DECIDE TO GENERATE---\n",
551
- "---DECISION: TRANSFORM QUERY and RUN WEB SEARCH---\n",
552
- "---TRANSFORM QUERY---\n",
553
- "\"Node 'transform_query':\"\n",
554
- "'\\n---\\n'\n",
555
- "---WEB SEARCH---\n",
556
- "\"Node 'web_search':\"\n",
557
- "'\\n---\\n'\n",
558
- "---GENERATE---\n",
559
- "\"Node 'generate':\"\n",
560
- "'\\n---\\n'\n",
561
- "\"Node '__end__':\"\n",
562
- "'\\n---\\n'\n",
563
- "('----\\n'\n",
564
- " '\\n'\n",
565
- " 'The functionalities of sensory memory include learning embedding '\n",
566
- " 'representations for raw inputs like text, images, or other modalities. '\n",
567
- " 'Short-term memory serves as in-context learning with a limited capacity due '\n",
568
- " 'to the finite context window length of Transformers. Long-term memory acts '\n",
569
- " 'as an external vector store that the agent can access during query time '\n",
570
- " 'through fast retrieval. Reflection mechanisms help synthesize memories into '\n",
571
- " \"higher-level inferences over time and guide the agent's future behavior \"\n",
572
- " 'using higher-level summaries of past events. Working memory has been defined '\n",
573
- " 'differently across sources but generally refers to short-term memory used '\n",
574
- " 'for cognitive tasks.')\n"
575
- ]
576
- }
577
- ]
578
- }
579
- ]
580
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/Langgraph_CorrectiveRAG_mistral_chroma.ipynb CHANGED
@@ -4,7 +4,7 @@
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
- "authorship_tag": "ABX9TyMp8bhKotk3mdZcc3U4qqKP",
8
  "include_colab_link": true
9
  },
10
  "kernelspec": {
@@ -23,12 +23,12 @@
23
  "colab_type": "text"
24
  },
25
  "source": [
26
- "<a href=\"https://colab.research.google.com/github/almutareb/InnovationPathfinderAI/blob/main/examples/Langgraph_CorrectiveRAG_mistral_chroma.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
  ]
28
  },
29
  {
30
  "cell_type": "code",
31
- "execution_count": null,
32
  "metadata": {
33
  "id": "jLMHfRq9kAP9"
34
  },
@@ -36,10 +36,7 @@
36
  "source": [
37
  "!pip install -Uq langchain-community\n",
38
  "!pip install -Uq langchain\n",
39
- "!pip install -Uq langchainhub\n",
40
  "!pip install -Uq langgraph\n",
41
- "!pip install -Uq wikipedia\n",
42
- "!pip install -Uq scikit-learn\n",
43
  "!pip install -Uq chromadb\n",
44
  "!pip install -Uq sentence-transformers\n",
45
  "!pip install -Uq gpt4all\n",
@@ -173,13 +170,13 @@
173
  "metadata": {
174
  "id": "fRzYhmOs7_GJ"
175
  },
176
- "execution_count": 9,
177
  "outputs": []
178
  },
179
  {
180
  "cell_type": "markdown",
181
  "source": [
182
- "### Nodes and Edges"
183
  ],
184
  "metadata": {
185
  "id": "bPhIdcVD9pgV"
@@ -192,7 +189,6 @@
192
  "import operator\n",
193
  "from typing import Annotated, Sequence, TypedDict\n",
194
  "\n",
195
- "from langchain import hub\n",
196
  "from langchain_core.output_parsers import JsonOutputParser\n",
197
  "from langchain.prompts import PromptTemplate\n",
198
  "from langchain.schema import Document\n",
@@ -239,7 +235,14 @@
239
  " local = state_dict[\"local\"]\n",
240
  "\n",
241
  " # Prompt\n",
242
- " prompt = hub.pull(\"rlm/rag-prompt\")\n",
 
 
 
 
 
 
 
243
  "\n",
244
  " # LLM\n",
245
  " llm = llm_mid\n",
@@ -277,7 +280,7 @@
277
  " local = state_dict[\"local\"]\n",
278
  "\n",
279
  " # LLM\n",
280
- " llm = llm_mid\n",
281
  "\n",
282
  " prompt = PromptTemplate(\n",
283
  " template=\"\"\"You are a grader assessing relevance of a retrieved document to a user question. \\n\n",
@@ -395,14 +398,21 @@
395
  "metadata": {
396
  "id": "1Sn5NCyl9pRE"
397
  },
398
- "execution_count": 88,
399
  "outputs": []
400
  },
 
 
 
 
 
 
 
 
 
401
  {
402
  "cell_type": "code",
403
  "source": [
404
- "### Edges ###\n",
405
- "\n",
406
  "def decide_to_generate(state):\n",
407
  " \"\"\"\n",
408
  " Determines whether to generate an answer or re-generate a question for web search.\n",
@@ -433,7 +443,7 @@
433
  "metadata": {
434
  "id": "l9djuUIx-_ZK"
435
  },
436
- "execution_count": 89,
437
  "outputs": []
438
  },
439
  {
@@ -481,7 +491,7 @@
481
  "metadata": {
482
  "id": "5pyAWscidTUt"
483
  },
484
- "execution_count": 90,
485
  "outputs": []
486
  },
487
  {
@@ -497,21 +507,19 @@
497
  "cell_type": "code",
498
  "source": [
499
  "# Run\n",
500
- "\n",
501
  "inputs = {\n",
502
  " \"keys\": {\n",
503
  " \"question\": \"Explain how the different types of agent memory work?\",\n",
504
  " \"local\": \"No\",\n",
505
  " }\n",
506
  "}\n",
507
- "\n",
508
  "for output in app.stream(inputs):\n",
509
- " for key, value in output.items():\n",
510
- " # Node\n",
511
- " pprint.pprint(f\"Node '{key}':\")\n",
512
- " # Optional: print full state at each node\n",
513
- " # pprint.pprint(value[\"keys\"], ident=2, width=80, depth=None)\n",
514
- " pprint.pprint(\"\\n---\\n\")\n",
515
  "\n",
516
  "# Final generation\n",
517
  "pprint.pprint(value['keys']['generation'])"
@@ -520,10 +528,10 @@
520
  "colab": {
521
  "base_uri": "https://localhost:8080/"
522
  },
523
- "id": "bJH68dQffp_e",
524
- "outputId": "4318d425-7284-4275-83b1-f1fcd85c9b38"
525
  },
526
- "execution_count": 92,
527
  "outputs": [
528
  {
529
  "output_type": "stream",
@@ -552,17 +560,18 @@
552
  "'\\n---\\n'\n",
553
  "\"Node '__end__':\"\n",
554
  "'\\n---\\n'\n",
555
- "(' \\n'\n",
556
  " '\\n'\n",
557
- " 'The functionalities of agent memory include recency, importance, relevance, '\n",
558
- " 'reflection mechanism, sensory memory, short-term memory, and long-term '\n",
559
- " 'memory. Recency gives higher scores to recent events, while Importance '\n",
560
- " 'distinguishes mundane from core memories. Relevance depends on how related '\n",
561
- " 'the memory is to the current situation or query. Reflection mechanism '\n",
562
- " 'synthesizes memories into higher-level inferences over time. Sensory memory '\n",
563
- " 'learns embedding representations for raw inputs, Short-term memory handles '\n",
564
- " 'in-context learning, and Long-term memory serves as an external vector store '\n",
565
- " 'attended to at query time.')\n"
 
566
  ]
567
  }
568
  ]
 
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
+ "authorship_tag": "ABX9TyP8lUVuJ31ic7qIWsz2xSyw",
8
  "include_colab_link": true
9
  },
10
  "kernelspec": {
 
23
  "colab_type": "text"
24
  },
25
  "source": [
26
+ "<a href=\"https://colab.research.google.com/github/almutareb/InnovationPathfinderAI/blob/main/example/Langgraph_CorrectiveRAG_mistral_chroma.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
  ]
28
  },
29
  {
30
  "cell_type": "code",
31
+ "execution_count": 1,
32
  "metadata": {
33
  "id": "jLMHfRq9kAP9"
34
  },
 
36
  "source": [
37
  "!pip install -Uq langchain-community\n",
38
  "!pip install -Uq langchain\n",
 
39
  "!pip install -Uq langgraph\n",
 
 
40
  "!pip install -Uq chromadb\n",
41
  "!pip install -Uq sentence-transformers\n",
42
  "!pip install -Uq gpt4all\n",
 
170
  "metadata": {
171
  "id": "fRzYhmOs7_GJ"
172
  },
173
+ "execution_count": 5,
174
  "outputs": []
175
  },
176
  {
177
  "cell_type": "markdown",
178
  "source": [
179
+ "### Nodes"
180
  ],
181
  "metadata": {
182
  "id": "bPhIdcVD9pgV"
 
189
  "import operator\n",
190
  "from typing import Annotated, Sequence, TypedDict\n",
191
  "\n",
 
192
  "from langchain_core.output_parsers import JsonOutputParser\n",
193
  "from langchain.prompts import PromptTemplate\n",
194
  "from langchain.schema import Document\n",
 
235
  " local = state_dict[\"local\"]\n",
236
  "\n",
237
  " # Prompt\n",
238
+ " prompt = PromptTemplate(\n",
239
+ " template=\"\"\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. \\n\n",
240
+ " If you don't know the answer, just say that you don't know. Keep the answer concise. \\n\n",
241
+ " Question: {question} \\n\n",
242
+ " Context: {context} \\n\n",
243
+ " \"\"\",\n",
244
+ " input_variables=[\"question\",\"context\"],\n",
245
+ " )\n",
246
  "\n",
247
  " # LLM\n",
248
  " llm = llm_mid\n",
 
280
  " local = state_dict[\"local\"]\n",
281
  "\n",
282
  " # LLM\n",
283
+ " llm = llm_small\n",
284
  "\n",
285
  " prompt = PromptTemplate(\n",
286
  " template=\"\"\"You are a grader assessing relevance of a retrieved document to a user question. \\n\n",
 
398
  "metadata": {
399
  "id": "1Sn5NCyl9pRE"
400
  },
401
+ "execution_count": 6,
402
  "outputs": []
403
  },
404
+ {
405
+ "cell_type": "markdown",
406
+ "source": [
407
+ "### Edges"
408
+ ],
409
+ "metadata": {
410
+ "id": "7n6TeQcrugvF"
411
+ }
412
+ },
413
  {
414
  "cell_type": "code",
415
  "source": [
 
 
416
  "def decide_to_generate(state):\n",
417
  " \"\"\"\n",
418
  " Determines whether to generate an answer or re-generate a question for web search.\n",
 
443
  "metadata": {
444
  "id": "l9djuUIx-_ZK"
445
  },
446
+ "execution_count": 7,
447
  "outputs": []
448
  },
449
  {
 
491
  "metadata": {
492
  "id": "5pyAWscidTUt"
493
  },
494
+ "execution_count": 8,
495
  "outputs": []
496
  },
497
  {
 
507
  "cell_type": "code",
508
  "source": [
509
  "# Run\n",
 
510
  "inputs = {\n",
511
  " \"keys\": {\n",
512
  " \"question\": \"Explain how the different types of agent memory work?\",\n",
513
  " \"local\": \"No\",\n",
514
  " }\n",
515
  "}\n",
 
516
  "for output in app.stream(inputs):\n",
517
+ " for key, value in output.items():\n",
518
+ " # Node\n",
519
+ " pprint.pprint(f\"Node '{key}':\")\n",
520
+ " # Optional: print full state at each node\n",
521
+ " # pprint.pprint(value[\"keys\"], indent=2, width=80, depth=None)\n",
522
+ " pprint.pprint(\"\\n---\\n\")\n",
523
  "\n",
524
  "# Final generation\n",
525
  "pprint.pprint(value['keys']['generation'])"
 
528
  "colab": {
529
  "base_uri": "https://localhost:8080/"
530
  },
531
+ "id": "AR4jotJqrLY1",
532
+ "outputId": "a620caec-13ec-454d-c4f7-f034633b2f1d"
533
  },
534
+ "execution_count": 9,
535
  "outputs": [
536
  {
537
  "output_type": "stream",
 
560
  "'\\n---\\n'\n",
561
  "\"Node '__end__':\"\n",
562
  "'\\n---\\n'\n",
563
+ "('----\\n'\n",
564
  " '\\n'\n",
565
+ " 'The functionalities of sensory memory include learning embedding '\n",
566
+ " 'representations for raw inputs like text, images, or other modalities. '\n",
567
+ " 'Short-term memory serves as in-context learning with a limited capacity due '\n",
568
+ " 'to the finite context window length of Transformers. Long-term memory acts '\n",
569
+ " 'as an external vector store that the agent can access during query time '\n",
570
+ " 'through fast retrieval. Reflection mechanisms help synthesize memories into '\n",
571
+ " \"higher-level inferences over time and guide the agent's future behavior \"\n",
572
+ " 'using higher-level summaries of past events. Working memory has been defined '\n",
573
+ " 'differently across sources but generally refers to short-term memory used '\n",
574
+ " 'for cognitive tasks.')\n"
575
  ]
576
  }
577
  ]