{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from langchain.indexes import VectorstoreIndexCreator\n", "from langchain.document_loaders.hugging_face_dataset import HuggingFaceDatasetLoader" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "dataset_name=\"tweet_eval\"\n", "page_content_column=\"text\"\n", "name=\"stance_climate\"\n", "\n", "\n", "loader=HuggingFaceDatasetLoader(dataset_name,page_content_column,name)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/tutu/anaconda3/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "Found cached dataset tweet_eval (/Users/tutu/.cache/huggingface/datasets/tweet_eval/stance_climate/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343)\n", "100%|██████████| 3/3 [00:00<00:00, 301.87it/s]\n", "Using embedded DuckDB without persistence: data will be transient\n" ] } ], "source": [ "index = VectorstoreIndexCreator().from_loaders([loader])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "query = \"What are the most used hashtag?\"\n", "result = index.query(query)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "' The most used hashtags in this context are #SemST, #TakeDownTheFlag, #LoveWins, #Sustainability, and #Environmental.'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }