{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Config" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/tranxuanhuy/Documents/GitHub/CustomerReviewSentiment\n" ] } ], "source": [ "%cd .." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "README.md \u001b[34mdata\u001b[m\u001b[m \u001b[34mtest\u001b[m\u001b[m \u001b[34mutils\u001b[m\u001b[m\n" ] } ], "source": [ "# assure \"data\" folder in the output list\n", "!ls" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Cleaning\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from utils.cleaner import cleaning" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "content | \n", "score | \n", "thumbsUpCount | \n", "Application | \n", "
---|---|---|---|---|---|
0 | \n", "0 | \n", "rất thuận tiện , có lợi ích . | \n", "5 | \n", "2 | \n", "Messenger | \n", "
1 | \n", "1 | \n", "tốt | \n", "4 | \n", "213 | \n", "Messenger | \n", "
2 | \n", "2 | \n", "ok | \n", "5 | \n", "1 | \n", "Messenger | \n", "
3 | \n", "3 | \n", "xài rất tốt , nên tải nha | \n", "5 | \n", "8 | \n", "Messenger | \n", "
4 | \n", "4 | \n", "gútttt | \n", "5 | \n", "1 | \n", "Messenger | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
310741 | \n", "99995 | \n", "ok | \n", "5 | \n", "0 | \n", "|
310742 | \n", "99996 | \n", "g o o d | \n", "5 | \n", "0 | \n", "|
310743 | \n", "99997 | \n", "có | \n", "5 | \n", "0 | \n", "|
310744 | \n", "99998 | \n", "nhiều lúc tôi k lấy đc hiệu ứng | \n", "1 | \n", "0 | \n", "|
310745 | \n", "99999 | \n", "tui chưa chơi ko biết có hay ko . | \n", "3 | \n", "0 | \n", "
310746 rows × 5 columns
\n", "