mindwrapped commited on
Commit
cfb8b3f
·
1 Parent(s): 7f94470

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -9,13 +9,8 @@ import gradio as gr
9
  from huggingface_hub import from_pretrained_keras
10
 
11
  # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
12
- # Use the ratings.csv file
13
- movielens_data_file_url = (
14
- "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
15
- )
16
- movielens_zipped_file = keras.utils.get_file(
17
- "ml-latest-small.zip", movielens_data_file_url, extract=False
18
- )
19
  keras_datasets_path = Path(movielens_zipped_file).parents[0]
20
  movielens_dir = keras_datasets_path / "ml-latest-small"
21
 
@@ -27,25 +22,29 @@ if not movielens_dir.exists():
27
  zip.extractall(path=keras_datasets_path)
28
  print("Done!")
29
 
 
30
  ratings_file = movielens_dir / "ratings.csv"
31
  df = pd.read_csv(ratings_file)
32
 
33
- # Make all the encodings
34
  user_ids = df["userId"].unique().tolist()
35
  user2user_encoded = {x: i for i, x in enumerate(user_ids)}
36
- userencoded2user = {i: x for i, x in enumerate(user_ids)}
 
 
 
 
37
  movie_ids = df["movieId"].unique().tolist()
38
  movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
39
  movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
40
- df["user"] = df["userId"].map(user2user_encoded)
41
  df["movie"] = df["movieId"].map(movie2movie_encoded)
42
-
43
- num_users = len(user2user_encoded)
44
  num_movies = len(movie_encoded2movie)
 
 
45
  df["rating"] = df["rating"].values.astype(np.float32)
46
  # min and max ratings will be used to normalize the ratings later
47
- min_rating = min(df["rating"])
48
- max_rating = max(df["rating"])
49
 
50
  # Load model
51
  model = from_pretrained_keras('mindwrapped/collaborative-filtering-movielens')
@@ -53,14 +52,14 @@ movie_df = pd.read_csv(movielens_dir / "movies.csv")
53
 
54
 
55
  def update_user(id):
56
- return get_top_rated_from_user(id), get_recommendations(id)
57
 
58
 
59
- def get_top_rated_from_user(id):
60
- decoded_id = userencoded2user.get(id)
61
- movies_watched_by_user = df[df.userId == decoded_id]
62
-
63
  # Get the top rated movies by this user
 
64
  top_movies_user = (
65
  movies_watched_by_user.sort_values(by="rating", ascending=False)
66
  .head(5)
@@ -76,10 +75,10 @@ def random_user():
76
 
77
 
78
  def get_recommendations(id):
79
- decoded_id = userencoded2user.get(id)
80
- movies_watched_by_user = df[df.userId == decoded_id]
81
-
82
  # Get the top 10 recommended movies for this user
 
83
  movies_not_watched = movie_df[
84
  ~movie_df["movieId"].isin(movies_watched_by_user.movieId.values)
85
  ]["movieId"]
@@ -88,12 +87,12 @@ def get_recommendations(id):
88
  )
89
  movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]
90
 
91
- # Encode user
92
- user_encoder = id
93
 
94
  # Create data [[user_id, movie_id],...]
95
  user_movie_array = np.hstack(
96
- ([[user_encoder]] * len(movies_not_watched), movies_not_watched)
97
  )
98
 
99
  # Predict ratings for movies not watched
 
9
  from huggingface_hub import from_pretrained_keras
10
 
11
  # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
12
+ movielens_data_file_url = "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
13
+ movielens_zipped_file = keras.utils.get_file("ml-latest-small.zip", movielens_data_file_url, extract=False)
 
 
 
 
 
14
  keras_datasets_path = Path(movielens_zipped_file).parents[0]
15
  movielens_dir = keras_datasets_path / "ml-latest-small"
16
 
 
22
  zip.extractall(path=keras_datasets_path)
23
  print("Done!")
24
 
25
+ # Get the ratings file
26
  ratings_file = movielens_dir / "ratings.csv"
27
  df = pd.read_csv(ratings_file)
28
 
29
+ # Make the encodings for users
30
  user_ids = df["userId"].unique().tolist()
31
  user2user_encoded = {x: i for i, x in enumerate(user_ids)}
32
+ user_encoded2user = {i: x for i, x in enumerate(user_ids)}
33
+ df["user"] = df["userId"].map(user2user_encoded)
34
+ num_users = len(user2user_encoded)
35
+
36
+ # Make the encodings for movies
37
  movie_ids = df["movieId"].unique().tolist()
38
  movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
39
  movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
 
40
  df["movie"] = df["movieId"].map(movie2movie_encoded)
 
 
41
  num_movies = len(movie_encoded2movie)
42
+
43
+ # Set ratings type
44
  df["rating"] = df["rating"].values.astype(np.float32)
45
  # min and max ratings will be used to normalize the ratings later
46
+ # min_rating = min(df["rating"])
47
+ # max_rating = max(df["rating"])
48
 
49
  # Load model
50
  model = from_pretrained_keras('mindwrapped/collaborative-filtering-movielens')
 
52
 
53
 
54
  def update_user(id):
55
+ return get_top_rated_movies_from_user(id), get_recommendations(id)
56
 
57
 
58
+ def get_top_rated_movies_from_user(id):
59
+ decoded_id = user_encoded2user.get(id)
60
+
 
61
  # Get the top rated movies by this user
62
+ movies_watched_by_user = df[df.userId == decoded_id]
63
  top_movies_user = (
64
  movies_watched_by_user.sort_values(by="rating", ascending=False)
65
  .head(5)
 
75
 
76
 
77
  def get_recommendations(id):
78
+ decoded_id = user_encoded2user.get(id)
79
+
 
80
  # Get the top 10 recommended movies for this user
81
+ movies_watched_by_user = df[df.userId == decoded_id]
82
  movies_not_watched = movie_df[
83
  ~movie_df["movieId"].isin(movies_watched_by_user.movieId.values)
84
  ]["movieId"]
 
87
  )
88
  movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]
89
 
90
+ # Encoded user id
91
+ encoded_id = id
92
 
93
  # Create data [[user_id, movie_id],...]
94
  user_movie_array = np.hstack(
95
+ ([[encoded_id]] * len(movies_not_watched), movies_not_watched)
96
  )
97
 
98
  # Predict ratings for movies not watched