{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'NAME', 'host id', 'host name', 'neighbourhood group',\n", " 'neighbourhood', 'lat', 'long', 'country', 'country code',\n", " 'instant_bookable', 'cancellation_policy', 'room type',\n", " 'Construction year', 'price', 'service fee', 'minimum nights',\n", " 'number of reviews', 'last review', 'reviews per month',\n", " 'review rate number', 'calculated host listings count',\n", " 'availability 365', 'house_rules', 'license'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ardit\\AppData\\Local\\Temp\\ipykernel_25752\\2207992772.py:4: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv('Airbnb_Open_Data.csv')\n" ] } ], "source": [ "import pandas as pd\n", "import random\n", "\n", "df = pd.read_csv('Airbnb_Open_Data.csv')\n", "df = df.drop('host_identity_verified', axis=1)\n", "df['description'] = df['NAME']\n", "df['price'] = df['price'].dropna().apply(lambda x : int(x[1:].strip().replace(',', '')))\n", "df['sq. meters'] = df['price'].apply(lambda x : random.choices([25, 40, 45, 55, 60, 70], weights=[5, 5, 4, 3, 2, 1])[0])\n", "df = df[['price', 'sq. meters', 'description', 'neighbourhood group', 'host name', 'cancellation_policy', 'house_rules']]\n", "df = df[df['house_rules']!='#NAME?'].dropna().reset_index(drop=True)\n", "df = df[0:10000]" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10000/10000 [17:37<00:00, 9.45it/s]\n" ] }, { "data": { "text/html": [ "
\n", " | price | \n", "sq. meters | \n", "description | \n", "neighbourhood group | \n", "host name | \n", "cancellation_policy | \n", "house_rules | \n", "text_vector_ | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "966.0 | \n", "25 | \n", "Clean & quiet apt home by the park | \n", "Brooklyn | \n", "Madaline | \n", "strict | \n", "Clean up and treat the home the way you'd like... | \n", "[-0.047521110624074936, 0.03044620156288147, 0... | \n", "
1 | \n", "142.0 | \n", "25 | \n", "Skylit Midtown Castle | \n", "Manhattan | \n", "Jenna | \n", "moderate | \n", "Pet friendly but please confirm with me if the... | \n", "[-0.04690079391002655, 0.061329323798418045, 0... | \n", "
2 | \n", "620.0 | \n", "45 | \n", "THE VILLAGE OF HARLEM....NEW YORK ! | \n", "Manhattan | \n", "Elise | \n", "flexible | \n", "I encourage you to use my kitchen, cooking and... | \n", "[0.00039011164335533977, 0.018310122191905975,... | \n", "
3 | \n", "204.0 | \n", "55 | \n", "Entire Apt: Spacious Studio/Loft by central park | \n", "Manhattan | \n", "Lyndon | \n", "moderate | \n", "Please no smoking in the house, porch or on th... | \n", "[-0.04602213576436043, 0.015605293214321136, 0... | \n", "
4 | \n", "577.0 | \n", "25 | \n", "Large Cozy 1 BR Apartment In Midtown East | \n", "Manhattan | \n", "Michelle | \n", "flexible | \n", "No smoking, please, and no drugs. | \n", "[-0.04859349876642227, -0.01263828668743372, 0... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9995 | \n", "745.0 | \n", "60 | \n", "Upper West Side 1BR next to subway/Central Park | \n", "Manhattan | \n", "Doreen | \n", "strict | \n", "Our Herbivorian House manual with detailed rul... | \n", "[-0.0346745029091835, -0.005859952419996262, 0... | \n", "
9996 | \n", "1135.0 | \n", "45 | \n", "Modern and Bright Studio Apt in Williamsburg | \n", "Brooklyn | \n", "Shannon | \n", "strict | \n", "No smoking please! | \n", "[-0.016586357727646828, 0.020517650991678238, ... | \n", "
9997 | \n", "59.0 | \n", "45 | \n", "Holiday in Trendy Williamsburg Apt! | \n", "Brooklyn | \n", "Peter | \n", "strict | \n", "We suggest you use email or texting contact us... | \n", "[-0.05095353722572327, 0.08510775864124298, -0... | \n", "
9998 | \n", "1055.0 | \n", "25 | \n", "Greenwich Village| Private Queen room | \n", "Manhattan | \n", "Kelly | \n", "flexible | \n", "Please treat this house as if it is your own. ... | \n", "[0.00017118529649451375, 0.010939894244074821,... | \n", "
9999 | \n", "285.0 | \n", "25 | \n", "Comfortable bedroom in spacious apt | \n", "Brooklyn | \n", "Arthur | \n", "strict | \n", "Please, No smoking and no pets. We do require ... | \n", "[-0.01795135624706745, -0.029596544802188873, ... | \n", "
10000 rows × 8 columns
\n", "