Michelangiolo commited on
Commit
e4c59f6
1 Parent(s): 00bf920
Files changed (3) hide show
  1. airbnb.ipynb +167 -49
  2. app.py +6 -6
  3. history.ipynb +107 -0
airbnb.ipynb CHANGED
@@ -337,56 +337,9 @@
337
  },
338
  {
339
  "cell_type": "code",
340
- "execution_count": 213,
341
  "metadata": {},
342
- "outputs": [
343
- {
344
- "name": "stderr",
345
- "output_type": "stream",
346
- "text": [
347
- "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Slider, please remove them: {'step_size': 100}\n",
348
- " warnings.warn(\n",
349
- "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n",
350
- " warnings.warn(\n"
351
- ]
352
- },
353
- {
354
- "name": "stdout",
355
- "output_type": "stream",
356
- "text": [
357
- "Running on local URL: http://127.0.0.1:7901\n",
358
- "\n",
359
- "To create a public link, set `share=True` in `launch()`.\n"
360
- ]
361
- },
362
- {
363
- "data": {
364
- "text/html": [
365
- "<div><iframe src=\"http://127.0.0.1:7901/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
366
- ],
367
- "text/plain": [
368
- "<IPython.core.display.HTML object>"
369
- ]
370
- },
371
- "metadata": {},
372
- "output_type": "display_data"
373
- },
374
- {
375
- "data": {
376
- "text/plain": []
377
- },
378
- "execution_count": 213,
379
- "metadata": {},
380
- "output_type": "execute_result"
381
- },
382
- {
383
- "name": "stdout",
384
- "output_type": "stream",
385
- "text": [
386
- "[[700, 45, 'Queens', 'I want to take a break from work 😴!!!']]\n"
387
- ]
388
- }
389
- ],
390
  "source": [
391
  "import gradio as gr\n",
392
  "import statistics\n",
@@ -459,6 +412,171 @@
459
  " btn.click(predict, [input1, input2, input3, input4], [output1, output2])\n",
460
  "demo.launch(share=False)"
461
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  }
463
  ],
464
  "metadata": {
 
337
  },
338
  {
339
  "cell_type": "code",
340
+ "execution_count": null,
341
  "metadata": {},
342
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  "source": [
344
  "import gradio as gr\n",
345
  "import statistics\n",
 
412
  " btn.click(predict, [input1, input2, input3, input4], [output1, output2])\n",
413
  "demo.launch(share=False)"
414
  ]
415
+ },
416
+ {
417
+ "cell_type": "code",
418
+ "execution_count": null,
419
+ "metadata": {},
420
+ "outputs": [],
421
+ "source": [
422
+ "import os\n",
423
+ "os.system('pip install openpyxl')\n",
424
+ "os.system('pip install sentence-transformers')\n",
425
+ "import pandas as pd\n",
426
+ "import gradio as gr\n",
427
+ "import statistics\n",
428
+ "from sklearn.neighbors import NearestNeighbors\n",
429
+ "from sentence_transformers import SentenceTransformer\n",
430
+ "\n",
431
+ "df = pd.read_parquet('df_encoded.parquet')\n",
432
+ "df['neighbourhood group'][0:2500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Manhattan')\n",
433
+ "df['neighbourhood group'][2500:5000] = df['neighbourhood group'][0:2500].apply(lambda x : 'Brooklyn')\n",
434
+ "df['neighbourhood group'][5000:7500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Queens')\n",
435
+ "df['neighbourhood group'][7500:] = df['neighbourhood group'][0:2500].apply(lambda x : 'Bronx')\n",
436
+ "df['location'] = df['neighbourhood group']\n",
437
+ "df = df[['price', 'sq. meters', 'description', 'location', 'host name', 'cancellation_policy', 'house_rules', 'text_vector_']]\n",
438
+ "df = df.reset_index(drop=True)\n",
439
+ "df\n",
440
+ "\n",
441
+ "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n",
442
+ "\n",
443
+ "#prepare model #we run it anew in the search function every time, after the initial filtering\n",
444
+ "# nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n",
445
+ "\n",
446
+ "def closest_number(x):\n",
447
+ " closest_numbers = [25, 40, 45, 55, 60, 70]\n",
448
+ " closest_number = closest_numbers[0]\n",
449
+ " min_distance = abs(x - closest_number)\n",
450
+ " for number in closest_numbers[1:]:\n",
451
+ " distance = abs(x - number)\n",
452
+ " if distance < min_distance:\n",
453
+ " closest_number = number\n",
454
+ " min_distance = distance\n",
455
+ " return closest_number\n",
456
+ "\n",
457
+ "def search(df, query):\n",
458
+ " product = model.encode(query).tolist()\n",
459
+ " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n",
460
+ "\n",
461
+ " nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n",
462
+ " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n",
463
+ "\n",
464
+ " #print out the description of every recommended product\n",
465
+ " df_search = df.iloc[list(indices)[0]].drop(['text_vector_'], axis=1) #.sort_values('avgFeedbackScore', ascending=False)\n",
466
+ "\n",
467
+ " return df_search.sort_values('price', ascending=False)\n",
468
+ "\n",
469
+ "def filter_df(df, column_name, filter_type, filter_value):\n",
470
+ " if filter_type == '==':\n",
471
+ " df_filtered = df[df[column_name]==filter_value]\n",
472
+ " elif filter_type == '>=':\n",
473
+ " df_filtered = df[df[column_name]>=filter_value]\n",
474
+ " elif filter_type == '<=':\n",
475
+ " df_filtered = df[df[column_name]<=filter_value]\n",
476
+ " return df_filtered"
477
+ ]
478
+ },
479
+ {
480
+ "cell_type": "code",
481
+ "execution_count": 7,
482
+ "metadata": {},
483
+ "outputs": [],
484
+ "source": [
485
+ "def predict(history, input1, input2, input3, input4):\n",
486
+ " history.append([input1, input2, input3, input4])\n",
487
+ "\n",
488
+ " print(history)\n",
489
+ " df_location = filter_df(df, 'location', '==', input3)\n",
490
+ " df_size = filter_df(df_location, 'sq. meters', '==', input2)\n",
491
+ " df_price = filter_df(df_size, 'price', '<=', input1)\n",
492
+ " df_result = search(df_price, input4)\n",
493
+ "\n",
494
+ " prediction = [\n",
495
+ " round(statistics.mean([x[0] for x in history])), #price\n",
496
+ " closest_number(statistics.mean([x[1] for x in history])), #square meters\n",
497
+ " statistics.mode([x[2] for x in history]) #state\n",
498
+ " ]\n",
499
+ "\n",
500
+ " print(history)\n",
501
+ "\n",
502
+ " return df_result, prediction"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": 8,
508
+ "metadata": {},
509
+ "outputs": [
510
+ {
511
+ "name": "stderr",
512
+ "output_type": "stream",
513
+ "text": [
514
+ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Slider, please remove them: {'step_size': 100}\n",
515
+ " warnings.warn(\n",
516
+ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n",
517
+ " warnings.warn(\n"
518
+ ]
519
+ },
520
+ {
521
+ "name": "stdout",
522
+ "output_type": "stream",
523
+ "text": [
524
+ "Running on local URL: http://127.0.0.1:7863\n",
525
+ "\n",
526
+ "To create a public link, set `share=True` in `launch()`.\n"
527
+ ]
528
+ },
529
+ {
530
+ "data": {
531
+ "text/html": [
532
+ "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
533
+ ],
534
+ "text/plain": [
535
+ "<IPython.core.display.HTML object>"
536
+ ]
537
+ },
538
+ "metadata": {},
539
+ "output_type": "display_data"
540
+ },
541
+ {
542
+ "data": {
543
+ "text/plain": []
544
+ },
545
+ "execution_count": 8,
546
+ "metadata": {},
547
+ "output_type": "execute_result"
548
+ },
549
+ {
550
+ "name": "stdout",
551
+ "output_type": "stream",
552
+ "text": [
553
+ "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n",
554
+ "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n",
555
+ "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!'], [700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n",
556
+ "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!'], [700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n"
557
+ ]
558
+ }
559
+ ],
560
+ "source": [
561
+ "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n",
562
+ " history = gr.Variable(value=[]) #beginning\n",
563
+ " gr.Markdown(\n",
564
+ " \"\"\"\n",
565
+ " # Airbnb Search Engine\n",
566
+ " \"\"\"\n",
567
+ " )\n",
568
+ " input1 = gr.Slider(100, 1200, value=700, step_size=100, label=\"Max Price\")\n",
569
+ " input2 = gr.Radio([25, 40, 45, 55, 60, 70], multiselect=False, label='square meters', value=45)\n",
570
+ " input3 = gr.Radio(['Manhattan', 'Brooklyn', 'Queens', 'Bronx'], multiselect=False, label='State', value='Brooklyn')\n",
571
+ " input4 = gr.Textbox(label='Query', value='I want to take a break from work 😴!!!')\n",
572
+ "\n",
573
+ " btn = gr.Button(value=\"Search for a Room\")\n",
574
+ " output1 = gr.Dataframe()\n",
575
+ " output2 = gr.Textbox(label='prediction for the next search')\n",
576
+ " # btn.click(greet, inputs='text', outputs=['dataframe'])\n",
577
+ " btn.click(predict, [history, input1, input2, input3, input4], [output1, output2])\n",
578
+ "demo.launch(share=False)"
579
+ ]
580
  }
581
  ],
582
  "metadata": {
app.py CHANGED
@@ -23,7 +23,7 @@ model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-ba
23
  # nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())
24
 
25
  def closest_number(x):
26
- closest_numbers = [10, 20, 30, 40]
27
  closest_number = closest_numbers[0]
28
  min_distance = abs(x - closest_number)
29
  for number in closest_numbers[1:]:
@@ -54,8 +54,7 @@ def filter_df(df, column_name, filter_type, filter_value):
54
  df_filtered = df[df[column_name]<=filter_value]
55
  return df_filtered
56
 
57
- history = list()
58
- def predict(input1, input2, input3, input4):
59
  history.append([input1, input2, input3, input4])
60
 
61
  print(history)
@@ -66,13 +65,14 @@ def predict(input1, input2, input3, input4):
66
 
67
  prediction = [
68
  round(statistics.mean([x[0] for x in history])), #price
69
- closest_number(statistics.mean([x[1] for x in history])), #square room
70
  statistics.mode([x[2] for x in history]) #state
71
  ]
72
 
73
  return df_result, prediction
74
 
75
  with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:
 
76
  gr.Markdown(
77
  """
78
  # Airbnb Search Engine
@@ -80,12 +80,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', n
80
  )
81
  input1 = gr.Slider(100, 1200, value=700, step_size=100, label="Max Price")
82
  input2 = gr.Radio([25, 40, 45, 55, 60, 70], multiselect=False, label='square meters', value=45)
83
- input3 = gr.Radio(['Manhattan', 'Brooklyn', 'Queens', 'Bronx'], multiselect=False, label='State', value='Queens')
84
  input4 = gr.Textbox(label='Query', value='I want to take a break from work 😴!!!')
85
 
86
  btn = gr.Button(value="Search for a Room")
87
  output1 = gr.Dataframe()
88
  output2 = gr.Textbox(label='prediction for the next search')
89
  # btn.click(greet, inputs='text', outputs=['dataframe'])
90
- btn.click(predict, [input1, input2, input3, input4], [output1, output2])
91
  demo.launch(share=False)
 
23
  # nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())
24
 
25
  def closest_number(x):
26
+ closest_numbers = [25, 40, 45, 55, 60, 70]
27
  closest_number = closest_numbers[0]
28
  min_distance = abs(x - closest_number)
29
  for number in closest_numbers[1:]:
 
54
  df_filtered = df[df[column_name]<=filter_value]
55
  return df_filtered
56
 
57
+ def predict(history, input1, input2, input3, input4):
 
58
  history.append([input1, input2, input3, input4])
59
 
60
  print(history)
 
65
 
66
  prediction = [
67
  round(statistics.mean([x[0] for x in history])), #price
68
+ closest_number(statistics.mean([x[1] for x in history])), #square meters
69
  statistics.mode([x[2] for x in history]) #state
70
  ]
71
 
72
  return df_result, prediction
73
 
74
  with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:
75
+ history = gr.Variable(value=[]) #beginning
76
  gr.Markdown(
77
  """
78
  # Airbnb Search Engine
 
80
  )
81
  input1 = gr.Slider(100, 1200, value=700, step_size=100, label="Max Price")
82
  input2 = gr.Radio([25, 40, 45, 55, 60, 70], multiselect=False, label='square meters', value=45)
83
+ input3 = gr.Radio(['Manhattan', 'Brooklyn', 'Queens', 'Bronx'], multiselect=False, label='State', value='Brooklyn')
84
  input4 = gr.Textbox(label='Query', value='I want to take a break from work 😴!!!')
85
 
86
  btn = gr.Button(value="Search for a Room")
87
  output1 = gr.Dataframe()
88
  output2 = gr.Textbox(label='prediction for the next search')
89
  # btn.click(greet, inputs='text', outputs=['dataframe'])
90
+ btn.click(predict, [history, input1, input2, input3, input4], [output1, output2])
91
  demo.launch(share=False)
history.ipynb ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n",
13
+ " warnings.warn(\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "Running on local URL: http://127.0.0.1:7861\n",
21
+ "\n",
22
+ "To create a public link, set `share=True` in `launch()`.\n"
23
+ ]
24
+ },
25
+ {
26
+ "data": {
27
+ "text/html": [
28
+ "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
29
+ ],
30
+ "text/plain": [
31
+ "<IPython.core.display.HTML object>"
32
+ ]
33
+ },
34
+ "metadata": {},
35
+ "output_type": "display_data"
36
+ },
37
+ {
38
+ "data": {
39
+ "text/plain": []
40
+ },
41
+ "execution_count": 2,
42
+ "metadata": {},
43
+ "output_type": "execute_result"
44
+ },
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "[40]\n",
50
+ "[40, 30]\n",
51
+ "[40, 30, 10]\n",
52
+ "[40, 30, 10, 10]\n",
53
+ "[40, 30, 10, 10, 10]\n"
54
+ ]
55
+ }
56
+ ],
57
+ "source": [
58
+ "import gradio as gr\n",
59
+ "import statistics\n",
60
+ "\n",
61
+ "def predict(history, input1):\n",
62
+ " history.append(input1)\n",
63
+ "\n",
64
+ " print(history)\n",
65
+ " total = statistics.mean(history)\n",
66
+ "\n",
67
+ " return total\n",
68
+ "\n",
69
+ "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n",
70
+ " gr.Markdown(\n",
71
+ " \"\"\"\n",
72
+ " # Gradio with History\n",
73
+ " \"\"\"\n",
74
+ " )\n",
75
+ " history = gr.Variable(value=[]) #beginning\n",
76
+ " input1 = gr.Radio([10, 20, 30, 40, 50], multiselect=False, label='value')\n",
77
+ " btn = gr.Button(value=\"Search for a Room\")\n",
78
+ " output1 = gr.Textbox(label='value')\n",
79
+ " # btn.click(greet, inputs='text', outputs=['dataframe'])\n",
80
+ " btn.click(predict, [history, input1], [output1])\n",
81
+ "demo.launch(share=False)"
82
+ ]
83
+ }
84
+ ],
85
+ "metadata": {
86
+ "kernelspec": {
87
+ "display_name": "Python 3",
88
+ "language": "python",
89
+ "name": "python3"
90
+ },
91
+ "language_info": {
92
+ "codemirror_mode": {
93
+ "name": "ipython",
94
+ "version": 3
95
+ },
96
+ "file_extension": ".py",
97
+ "mimetype": "text/x-python",
98
+ "name": "python",
99
+ "nbconvert_exporter": "python",
100
+ "pygments_lexer": "ipython3",
101
+ "version": "3.9.13"
102
+ },
103
+ "orig_nbformat": 4
104
+ },
105
+ "nbformat": 4,
106
+ "nbformat_minor": 2
107
+ }