Spaces:
Runtime error
Runtime error
File size: 27,284 Bytes
03b400c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders.base import Document\n",
"from langchain.indexes import VectorstoreIndexCreator\n",
"from langchain.utilities import ApifyWrapper"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[4], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m apify \u001b[39m=\u001b[39m ApifyWrapper()\n\u001b[1;32m----> 3\u001b[0m loader \u001b[39m=\u001b[39m apify\u001b[39m.\u001b[39;49mcall_actor(\n\u001b[0;32m 4\u001b[0m actor_id\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mapify/website-content-crawler\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 5\u001b[0m run_input\u001b[39m=\u001b[39;49m{\u001b[39m\"\u001b[39;49m\u001b[39mstartUrls\u001b[39;49m\u001b[39m\"\u001b[39;49m: [{\u001b[39m\"\u001b[39;49m\u001b[39murl\u001b[39;49m\u001b[39m\"\u001b[39;49m: \u001b[39m\"\u001b[39;49m\u001b[39mhttps://python.langchain.com/en/latest/\u001b[39;49m\u001b[39m\"\u001b[39;49m}]},\n\u001b[0;32m 6\u001b[0m dataset_mapping_function\u001b[39m=\u001b[39;49m\u001b[39mlambda\u001b[39;49;00m item: Document(\n\u001b[0;32m 7\u001b[0m page_content\u001b[39m=\u001b[39;49mitem[\u001b[39m\"\u001b[39;49m\u001b[39mtext\u001b[39;49m\u001b[39m\"\u001b[39;49m] \u001b[39mor\u001b[39;49;00m \u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m, metadata\u001b[39m=\u001b[39;49m{\u001b[39m\"\u001b[39;49m\u001b[39msource\u001b[39;49m\u001b[39m\"\u001b[39;49m: item[\u001b[39m\"\u001b[39;49m\u001b[39murl\u001b[39;49m\u001b[39m\"\u001b[39;49m]}\n\u001b[0;32m 8\u001b[0m ),\n\u001b[0;32m 9\u001b[0m )\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\langchain\\utilities\\apify.py:73\u001b[0m, in \u001b[0;36mApifyWrapper.call_actor\u001b[1;34m(self, actor_id, run_input, dataset_mapping_function, build, memory_mbytes, timeout_secs)\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcall_actor\u001b[39m(\n\u001b[0;32m 46\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 47\u001b[0m actor_id: \u001b[39mstr\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m timeout_secs: Optional[\u001b[39mint\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 54\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m ApifyDatasetLoader:\n\u001b[0;32m 55\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Run an Actor on the Apify platform and wait for results to be ready.\u001b[39;00m\n\u001b[0;32m 56\u001b[0m \n\u001b[0;32m 57\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[39m Actor run's default dataset.\u001b[39;00m\n\u001b[0;32m 72\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 73\u001b[0m actor_call \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapify_client\u001b[39m.\u001b[39;49mactor(actor_id)\u001b[39m.\u001b[39;49mcall(\n\u001b[0;32m 74\u001b[0m run_input\u001b[39m=\u001b[39;49mrun_input,\n\u001b[0;32m 75\u001b[0m build\u001b[39m=\u001b[39;49mbuild,\n\u001b[0;32m 76\u001b[0m memory_mbytes\u001b[39m=\u001b[39;49mmemory_mbytes,\n\u001b[0;32m 77\u001b[0m timeout_secs\u001b[39m=\u001b[39;49mtimeout_secs,\n\u001b[0;32m 78\u001b[0m )\n\u001b[0;32m 80\u001b[0m \u001b[39mreturn\u001b[39;00m ApifyDatasetLoader(\n\u001b[0;32m 81\u001b[0m dataset_id\u001b[39m=\u001b[39mactor_call[\u001b[39m\"\u001b[39m\u001b[39mdefaultDatasetId\u001b[39m\u001b[39m\"\u001b[39m],\n\u001b[0;32m 82\u001b[0m dataset_mapping_function\u001b[39m=\u001b[39mdataset_mapping_function,\n\u001b[0;32m 83\u001b[0m )\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\_logging.py:68\u001b[0m, in \u001b[0;36m_injects_client_details_to_log_context.<locals>.wrapper\u001b[1;34m(resource_client, *args, **kwargs)\u001b[0m\n\u001b[0;32m 65\u001b[0m ctx_resource_id\u001b[39m.\u001b[39mset(resource_client\u001b[39m.\u001b[39mresource_id)\n\u001b[0;32m 66\u001b[0m ctx_url\u001b[39m.\u001b[39mset(resource_client\u001b[39m.\u001b[39murl)\n\u001b[1;32m---> 68\u001b[0m \u001b[39mreturn\u001b[39;00m fun(resource_client, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\clients\\resource_clients\\actor.py:258\u001b[0m, in \u001b[0;36mActorClient.call\u001b[1;34m(self, run_input, content_type, build, memory_mbytes, timeout_secs, webhooks, wait_secs)\u001b[0m\n\u001b[0;32m 226\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Start the actor and wait for it to finish before returning the Run object.\u001b[39;00m\n\u001b[0;32m 227\u001b[0m \n\u001b[0;32m 228\u001b[0m \u001b[39mIt waits indefinitely, unless the wait_secs argument is provided.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[39m dict: The run object\u001b[39;00m\n\u001b[0;32m 248\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 249\u001b[0m started_run \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstart(\n\u001b[0;32m 250\u001b[0m run_input\u001b[39m=\u001b[39mrun_input,\n\u001b[0;32m 251\u001b[0m content_type\u001b[39m=\u001b[39mcontent_type,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 255\u001b[0m webhooks\u001b[39m=\u001b[39mwebhooks,\n\u001b[0;32m 256\u001b[0m )\n\u001b[1;32m--> 258\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mroot_client\u001b[39m.\u001b[39;49mrun(started_run[\u001b[39m'\u001b[39;49m\u001b[39mid\u001b[39;49m\u001b[39m'\u001b[39;49m])\u001b[39m.\u001b[39;49mwait_for_finish(wait_secs\u001b[39m=\u001b[39;49mwait_secs)\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\_logging.py:68\u001b[0m, in \u001b[0;36m_injects_client_details_to_log_context.<locals>.wrapper\u001b[1;34m(resource_client, *args, **kwargs)\u001b[0m\n\u001b[0;32m 65\u001b[0m ctx_resource_id\u001b[39m.\u001b[39mset(resource_client\u001b[39m.\u001b[39mresource_id)\n\u001b[0;32m 66\u001b[0m ctx_url\u001b[39m.\u001b[39mset(resource_client\u001b[39m.\u001b[39murl)\n\u001b[1;32m---> 68\u001b[0m \u001b[39mreturn\u001b[39;00m fun(resource_client, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\clients\\resource_clients\\run.py:81\u001b[0m, in \u001b[0;36mRunClient.wait_for_finish\u001b[1;34m(self, wait_secs)\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwait_for_finish\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m, wait_secs: Optional[\u001b[39mint\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Optional[Dict]:\n\u001b[0;32m 72\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Wait synchronously until the run finishes or the server times out.\u001b[39;00m\n\u001b[0;32m 73\u001b[0m \n\u001b[0;32m 74\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 79\u001b[0m \u001b[39m (SUCEEDED, FAILED, TIMED_OUT, ABORTED), then the run has not yet finished.\u001b[39;00m\n\u001b[0;32m 80\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 81\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_wait_for_finish(wait_secs\u001b[39m=\u001b[39;49mwait_secs)\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\clients\\base\\actor_job_base_client.py:34\u001b[0m, in \u001b[0;36mActorJobBaseClient._wait_for_finish\u001b[1;34m(self, wait_secs)\u001b[0m\n\u001b[0;32m 31\u001b[0m wait_for_finish \u001b[39m=\u001b[39m wait_secs \u001b[39m-\u001b[39m seconds_elapsed\n\u001b[0;32m 33\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 34\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_client\u001b[39m.\u001b[39;49mcall(\n\u001b[0;32m 35\u001b[0m url\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_url(),\n\u001b[0;32m 36\u001b[0m method\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[0;32m 37\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_params(waitForFinish\u001b[39m=\u001b[39;49mwait_for_finish),\n\u001b[0;32m 38\u001b[0m )\n\u001b[0;32m 39\u001b[0m job \u001b[39m=\u001b[39m _parse_date_fields(_pluck_data(response\u001b[39m.\u001b[39mjson()))\n\u001b[0;32m 41\u001b[0m seconds_elapsed \u001b[39m=\u001b[39m math\u001b[39m.\u001b[39mfloor(((datetime\u001b[39m.\u001b[39mnow(timezone\u001b[39m.\u001b[39mutc) \u001b[39m-\u001b[39m started_at)\u001b[39m.\u001b[39mtotal_seconds()))\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\_http_client.py:190\u001b[0m, in \u001b[0;36m_HTTPClient.call\u001b[1;34m(self, method, url, headers, params, data, json, stream, parse_response)\u001b[0m\n\u001b[0;32m 187\u001b[0m stop_retrying()\n\u001b[0;32m 188\u001b[0m \u001b[39mraise\u001b[39;00m ApifyApiError(response, attempt)\n\u001b[1;32m--> 190\u001b[0m \u001b[39mreturn\u001b[39;00m _retry_with_exp_backoff(\n\u001b[0;32m 191\u001b[0m _make_request,\n\u001b[0;32m 192\u001b[0m max_retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_retries,\n\u001b[0;32m 193\u001b[0m backoff_base_millis\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmin_delay_between_retries_millis,\n\u001b[0;32m 194\u001b[0m backoff_factor\u001b[39m=\u001b[39;49mDEFAULT_BACKOFF_EXPONENTIAL_FACTOR,\n\u001b[0;32m 195\u001b[0m random_factor\u001b[39m=\u001b[39;49mDEFAULT_BACKOFF_RANDOM_FACTOR,\n\u001b[0;32m 196\u001b[0m )\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\_utils.py:114\u001b[0m, in \u001b[0;36m_retry_with_exp_backoff\u001b[1;34m(func, max_retries, backoff_base_millis, backoff_factor, random_factor)\u001b[0m\n\u001b[0;32m 112\u001b[0m \u001b[39mfor\u001b[39;00m attempt \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m1\u001b[39m, max_retries \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m):\n\u001b[0;32m 113\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 114\u001b[0m \u001b[39mreturn\u001b[39;00m func(stop_retrying, attempt)\n\u001b[0;32m 115\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 116\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m swallow:\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\apify_client\\_http_client.py:158\u001b[0m, in \u001b[0;36m_HTTPClient.call.<locals>._make_request\u001b[1;34m(stop_retrying, attempt)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 151\u001b[0m request \u001b[39m=\u001b[39m httpx_client\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 152\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 153\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 156\u001b[0m content\u001b[39m=\u001b[39mcontent,\n\u001b[0;32m 157\u001b[0m )\n\u001b[1;32m--> 158\u001b[0m response \u001b[39m=\u001b[39m httpx_client\u001b[39m.\u001b[39;49msend(\n\u001b[0;32m 159\u001b[0m request\u001b[39m=\u001b[39;49mrequest,\n\u001b[0;32m 160\u001b[0m stream\u001b[39m=\u001b[39;49mstream \u001b[39mor\u001b[39;49;00m \u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 161\u001b[0m )\n\u001b[0;32m 163\u001b[0m \u001b[39m# If response status is < 300, the request was successful, and we can return the result\u001b[39;00m\n\u001b[0;32m 164\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m \u001b[39m300\u001b[39m:\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpx\\_client.py:922\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 920\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 921\u001b[0m response\u001b[39m.\u001b[39mclose()\n\u001b[1;32m--> 922\u001b[0m \u001b[39mraise\u001b[39;00m exc\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpx\\_client.py:916\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n\u001b[1;32m--> 916\u001b[0m response\u001b[39m.\u001b[39;49mread()\n\u001b[0;32m 918\u001b[0m \u001b[39mreturn\u001b[39;00m response\n\u001b[0;32m 920\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpx\\_models.py:805\u001b[0m, in \u001b[0;36mResponse.read\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 801\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 802\u001b[0m \u001b[39mRead and return the response content.\u001b[39;00m\n\u001b[0;32m 803\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 804\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mhasattr\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m_content\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[1;32m--> 805\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_content \u001b[39m=\u001b[39m \u001b[39mb\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m.\u001b[39;49mjoin(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49miter_bytes())\n\u001b[0;32m 806\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_content\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpx\\_models.py:823\u001b[0m, in \u001b[0;36mResponse.iter_bytes\u001b[1;34m(self, chunk_size)\u001b[0m\n\u001b[0;32m 821\u001b[0m chunker \u001b[39m=\u001b[39m ByteChunker(chunk_size\u001b[39m=\u001b[39mchunk_size)\n\u001b[0;32m 822\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_request):\n\u001b[1;32m--> 823\u001b[0m \u001b[39mfor\u001b[39;00m raw_bytes \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39miter_raw():\n\u001b[0;32m 824\u001b[0m decoded \u001b[39m=\u001b[39m decoder\u001b[39m.\u001b[39mdecode(raw_bytes)\n\u001b[0;32m 825\u001b[0m \u001b[39mfor\u001b[39;00m chunk \u001b[39min\u001b[39;00m chunker\u001b[39m.\u001b[39mdecode(decoded):\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpx\\_models.py:881\u001b[0m, in \u001b[0;36mResponse.iter_raw\u001b[1;34m(self, chunk_size)\u001b[0m\n\u001b[0;32m 878\u001b[0m chunker \u001b[39m=\u001b[39m ByteChunker(chunk_size\u001b[39m=\u001b[39mchunk_size)\n\u001b[0;32m 880\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_request):\n\u001b[1;32m--> 881\u001b[0m \u001b[39mfor\u001b[39;00m raw_stream_bytes \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstream:\n\u001b[0;32m 882\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_num_bytes_downloaded \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(raw_stream_bytes)\n\u001b[0;32m 883\u001b[0m \u001b[39mfor\u001b[39;00m chunk \u001b[39min\u001b[39;00m chunker\u001b[39m.\u001b[39mdecode(raw_stream_bytes):\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpx\\_client.py:124\u001b[0m, in \u001b[0;36mBoundSyncStream.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 123\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__iter__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m typing\u001b[39m.\u001b[39mIterator[\u001b[39mbytes\u001b[39m]:\n\u001b[1;32m--> 124\u001b[0m \u001b[39mfor\u001b[39;00m chunk \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_stream:\n\u001b[0;32m 125\u001b[0m \u001b[39myield\u001b[39;00m chunk\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpx\\_transports\\default.py:104\u001b[0m, in \u001b[0;36mResponseStream.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 102\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__iter__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m typing\u001b[39m.\u001b[39mIterator[\u001b[39mbytes\u001b[39m]:\n\u001b[0;32m 103\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 104\u001b[0m \u001b[39mfor\u001b[39;00m part \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_httpcore_stream:\n\u001b[0;32m 105\u001b[0m \u001b[39myield\u001b[39;00m part\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpcore\\_sync\\connection_pool.py:338\u001b[0m, in \u001b[0;36mConnectionPoolByteStream.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__iter__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[\u001b[39mbytes\u001b[39m]:\n\u001b[1;32m--> 338\u001b[0m \u001b[39mfor\u001b[39;00m part \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_stream:\n\u001b[0;32m 339\u001b[0m \u001b[39myield\u001b[39;00m part\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpcore\\_sync\\http11.py:315\u001b[0m, in \u001b[0;36mHTTP11ConnectionByteStream.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 310\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 311\u001b[0m \u001b[39m# If we get an exception while streaming the response,\u001b[39;00m\n\u001b[0;32m 312\u001b[0m \u001b[39m# we want to close the response (and possibly the connection)\u001b[39;00m\n\u001b[0;32m 313\u001b[0m \u001b[39m# before raising that exception.\u001b[39;00m\n\u001b[0;32m 314\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n\u001b[1;32m--> 315\u001b[0m \u001b[39mraise\u001b[39;00m exc\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpcore\\_sync\\http11.py:308\u001b[0m, in \u001b[0;36mHTTP11ConnectionByteStream.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 306\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 307\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_body\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_request, kwargs):\n\u001b[1;32m--> 308\u001b[0m \u001b[39mfor\u001b[39;00m chunk \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection\u001b[39m.\u001b[39m_receive_response_body(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m 309\u001b[0m \u001b[39myield\u001b[39;00m chunk\n\u001b[0;32m 310\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 311\u001b[0m \u001b[39m# If we get an exception while streaming the response,\u001b[39;00m\n\u001b[0;32m 312\u001b[0m \u001b[39m# we want to close the response (and possibly the connection)\u001b[39;00m\n\u001b[0;32m 313\u001b[0m \u001b[39m# before raising that exception.\u001b[39;00m\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpcore\\_sync\\http11.py:177\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_body\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 174\u001b[0m timeout \u001b[39m=\u001b[39m timeouts\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mread\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[0;32m 176\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 177\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 178\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mData):\n\u001b[0;32m 179\u001b[0m \u001b[39myield\u001b[39;00m \u001b[39mbytes\u001b[39m(event\u001b[39m.\u001b[39mdata)\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 188\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_h11_state\u001b[39m.\u001b[39mnext_event()\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n\u001b[0;32m 203\u001b[0m \u001b[39mif\u001b[39;00m data \u001b[39m==\u001b[39m \u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_h11_state\u001b[39m.\u001b[39mtheir_state \u001b[39m==\u001b[39m h11\u001b[39m.\u001b[39mSEND_RESPONSE:\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n",
"File \u001b[1;32mc:\\Program Files\\Python311\\Lib\\ssl.py:1263\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1259\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m 1260\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1261\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1262\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1263\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1264\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 1265\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv(buflen, flags)\n",
"File \u001b[1;32mc:\\Program Files\\Python311\\Lib\\ssl.py:1136\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1134\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m, buffer)\n\u001b[0;32m 1135\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1136\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1137\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n\u001b[0;32m 1138\u001b[0m \u001b[39mif\u001b[39;00m x\u001b[39m.\u001b[39margs[\u001b[39m0\u001b[39m] \u001b[39m==\u001b[39m SSL_ERROR_EOF \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39msuppress_ragged_eofs:\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"apify = ApifyWrapper()\n",
"\n",
"loader = apify.call_actor(\n",
" actor_id=\"apify/website-content-crawler\",\n",
" run_input={\"startUrls\": [{\"url\": \"https://python.langchain.com/en/latest/\"}]},\n",
" dataset_mapping_function=lambda item: Document(\n",
" page_content=item[\"text\"] or \"\", metadata={\"source\": item[\"url\"]}\n",
" ),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index = VectorstoreIndexCreator().from_loaders([loader])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query = \"What is LangChain?\"\n",
"result = index.query_with_sources(query)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
|