danielsuarez-mash commited on
Commit
bdcb863
1 Parent(s): f45a84a

New changes

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -8,7 +8,6 @@ from langchain_community.vectorstores import FAISS
8
  from langchain.prompts import PromptTemplate
9
  from langchain_community.llms import HuggingFaceHub
10
  from langchain_core.runnables import RunnablePassthrough
11
- from langchain_core.runnables import RunnableSequence
12
  from langchain_core.output_parsers import StrOutputParser
13
 
14
  st.title('LLM - Retrieval Augmented Generation')
@@ -21,7 +20,6 @@ def authenticate():
21
 
22
  # if running on cloud
23
  try:
24
- os.environ["HUGGINGFACEHUB_API_TOKEN"]
25
  st.write(
26
  "Has environment variables been set:",
27
  os.environ["HUGGINGFACEHUB_API_TOKEN"] == st.secrets["HUGGINGFACEHUB_API_TOKEN"])
@@ -50,12 +48,12 @@ def load_pdf(pdf):
50
 
51
  return text
52
 
53
- def split_text(text):
54
 
55
  # split
56
  text_splitter = RecursiveCharacterTextSplitter(
57
- chunk_size=400,
58
- chunk_overlap=20,
59
  separators=["\n\n", "\n", " ", ""]
60
  )
61
 
@@ -123,6 +121,7 @@ def main():
123
 
124
  # load split store
125
  vectorstore = load_split_store(pdf)
 
126
 
127
  # create a retriever using vectorstore
128
  retriever = vectorstore.as_retriever()
 
8
  from langchain.prompts import PromptTemplate
9
  from langchain_community.llms import HuggingFaceHub
10
  from langchain_core.runnables import RunnablePassthrough
 
11
  from langchain_core.output_parsers import StrOutputParser
12
 
13
  st.title('LLM - Retrieval Augmented Generation')
 
20
 
21
  # if running on cloud
22
  try:
 
23
  st.write(
24
  "Has environment variables been set:",
25
  os.environ["HUGGINGFACEHUB_API_TOKEN"] == st.secrets["HUGGINGFACEHUB_API_TOKEN"])
 
48
 
49
  return text
50
 
51
+ def split_text(text, chunk_size=400, chunk_overlap=20):
52
 
53
  # split
54
  text_splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size=chunk_size,
56
+ chunk_overlap=chunk_overlap,
57
  separators=["\n\n", "\n", " ", ""]
58
  )
59
 
 
121
 
122
  # load split store
123
  vectorstore = load_split_store(pdf)
124
+ st.write('PDF vectorized')
125
 
126
  # create a retriever using vectorstore
127
  retriever = vectorstore.as_retriever()
example_documents/Daniel's Resume-2.pdf ADDED
Binary file (82.8 kB). View file
 
llm_handbook.ipynb CHANGED
@@ -37,7 +37,7 @@
37
  },
38
  {
39
  "cell_type": "code",
40
- "execution_count": 3,
41
  "id": "9fcd2583-d0ab-4649-a241-4526f6a3b83d",
42
  "metadata": {
43
  "id": "9fcd2583-d0ab-4649-a241-4526f6a3b83d"
@@ -46,10 +46,9 @@
46
  "source": [
47
  "# import packages\n",
48
  "import os\n",
49
- "import langchain\n",
50
- "import getpass\n",
51
- "from langchain import HuggingFaceHub, LLMChain\n",
52
- "from dotenv import load_dotenv"
53
  ]
54
  },
55
  {
@@ -59,21 +58,29 @@
59
  "id": "AyRxKsE4qPR1"
60
  },
61
  "source": [
62
- "#API KEY"
63
  ]
64
  },
65
  {
66
  "cell_type": "code",
67
- "execution_count": 4,
68
  "id": "cf146257-5014-4041-980c-0ead2c3932c3",
69
  "metadata": {
70
  "id": "cf146257-5014-4041-980c-0ead2c3932c3"
71
  },
72
- "outputs": [],
 
 
 
 
 
 
 
 
73
  "source": [
74
  "# LOCAL\n",
75
  "load_dotenv()\n",
76
- "os.environ.get('HUGGINGFACEHUB_API_TOKEN');"
77
  ]
78
  },
79
  {
@@ -90,14 +97,14 @@
90
  },
91
  {
92
  "cell_type": "code",
93
- "execution_count": 5,
94
  "id": "06c54d35-e9a2-4043-b3c3-588ac4f4a0d1",
95
  "metadata": {
96
  "id": "06c54d35-e9a2-4043-b3c3-588ac4f4a0d1"
97
  },
98
  "outputs": [],
99
  "source": [
100
- "from langchain import PromptTemplate\n",
101
  "\n",
102
  "# create template\n",
103
  "template = \"\"\"\n",
@@ -125,7 +132,7 @@
125
  },
126
  {
127
  "cell_type": "code",
128
- "execution_count": 6,
129
  "id": "03290cad-f6be-4002-b177-00220f22333a",
130
  "metadata": {
131
  "colab": {
@@ -136,11 +143,16 @@
136
  },
137
  "outputs": [
138
  {
139
- "name": "stderr",
140
- "output_type": "stream",
141
- "text": [
142
- "/Users/danielsuarez-mash/anaconda3/envs/llm/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:127: FutureWarning: '__init__' (from 'huggingface_hub.inference_api') is deprecated and will be removed from version '0.19.0'. `InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out this guide to learn how to convert your script to use it: https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client.\n",
143
- " warnings.warn(warning_message, FutureWarning)\n"
 
 
 
 
 
144
  ]
145
  }
146
  ],
 
37
  },
38
  {
39
  "cell_type": "code",
40
+ "execution_count": 11,
41
  "id": "9fcd2583-d0ab-4649-a241-4526f6a3b83d",
42
  "metadata": {
43
  "id": "9fcd2583-d0ab-4649-a241-4526f6a3b83d"
 
46
  "source": [
47
  "# import packages\n",
48
  "import os\n",
49
+ "from dotenv import load_dotenv\n",
50
+ "from langchain_community.llms import HuggingFaceHub\n",
51
+ "from langchain.chains import LLMChain"
 
52
  ]
53
  },
54
  {
 
58
  "id": "AyRxKsE4qPR1"
59
  },
60
  "source": [
61
+ "# API KEY"
62
  ]
63
  },
64
  {
65
  "cell_type": "code",
66
+ "execution_count": 17,
67
  "id": "cf146257-5014-4041-980c-0ead2c3932c3",
68
  "metadata": {
69
  "id": "cf146257-5014-4041-980c-0ead2c3932c3"
70
  },
71
+ "outputs": [
72
+ {
73
+ "name": "stdout",
74
+ "output_type": "stream",
75
+ "text": [
76
+ "None\n"
77
+ ]
78
+ }
79
+ ],
80
  "source": [
81
  "# LOCAL\n",
82
  "load_dotenv()\n",
83
+ "print(os.environ.get('HUGGINGFACEHUB_API_TOKEN'))"
84
  ]
85
  },
86
  {
 
97
  },
98
  {
99
  "cell_type": "code",
100
+ "execution_count": 18,
101
  "id": "06c54d35-e9a2-4043-b3c3-588ac4f4a0d1",
102
  "metadata": {
103
  "id": "06c54d35-e9a2-4043-b3c3-588ac4f4a0d1"
104
  },
105
  "outputs": [],
106
  "source": [
107
+ "from langchain.prompts import PromptTemplate\n",
108
  "\n",
109
  "# create template\n",
110
  "template = \"\"\"\n",
 
132
  },
133
  {
134
  "cell_type": "code",
135
+ "execution_count": 14,
136
  "id": "03290cad-f6be-4002-b177-00220f22333a",
137
  "metadata": {
138
  "colab": {
 
143
  },
144
  "outputs": [
145
  {
146
+ "ename": "ValidationError",
147
+ "evalue": "1 validation error for HuggingFaceHub\n__root__\n Did not find huggingfacehub_api_token, please add an environment variable `HUGGINGFACEHUB_API_TOKEN` which contains it, or pass `huggingfacehub_api_token` as a named parameter. (type=value_error)",
148
+ "output_type": "error",
149
+ "traceback": [
150
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
151
+ "\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)",
152
+ "Cell \u001b[0;32mIn[14], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# instantiate llm\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mHuggingFaceHub\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtiiuae/falcon-7b-instruct\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpenalty_alpha\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m50\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmax_length\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1000\u001b[39;49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# instantiate chain\u001b[39;00m\n\u001b[1;32m 13\u001b[0m llm_chain \u001b[38;5;241m=\u001b[39m LLMChain(\n\u001b[1;32m 14\u001b[0m llm\u001b[38;5;241m=\u001b[39mllm,\n\u001b[1;32m 15\u001b[0m prompt\u001b[38;5;241m=\u001b[39mprompt,\n\u001b[1;32m 16\u001b[0m verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 17\u001b[0m )\n",
153
+ "File \u001b[0;32m~/anaconda3/envs/llm/lib/python3.11/site-packages/langchain_core/load/serializable.py:107\u001b[0m, in \u001b[0;36mSerializable.__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lc_kwargs \u001b[38;5;241m=\u001b[39m kwargs\n",
154
+ "File \u001b[0;32m~/anaconda3/envs/llm/lib/python3.11/site-packages/pydantic/v1/main.py:341\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(__pydantic_self__, **data)\u001b[0m\n\u001b[1;32m 339\u001b[0m values, fields_set, validation_error \u001b[38;5;241m=\u001b[39m validate_model(__pydantic_self__\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m, data)\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validation_error:\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m validation_error\n\u001b[1;32m 342\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 343\u001b[0m object_setattr(__pydantic_self__, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__dict__\u001b[39m\u001b[38;5;124m'\u001b[39m, values)\n",
155
+ "\u001b[0;31mValidationError\u001b[0m: 1 validation error for HuggingFaceHub\n__root__\n Did not find huggingfacehub_api_token, please add an environment variable `HUGGINGFACEHUB_API_TOKEN` which contains it, or pass `huggingfacehub_api_token` as a named parameter. (type=value_error)"
156
  ]
157
  }
158
  ],