Igor Santana
rnn model sent from github to huggingface
9c58361
import pandas as pd
import random
import numpy as np
import pickle
from os import makedirs
from os.path import exists
from gensim.models import Word2Vec, Doc2Vec
from glove import Glove
from sklearn.model_selection import KFold
def _rnn_load(path, songs):
data = pickle.load(open(path, 'rb'))
emb_dict = {}
for song in songs:
emb_dict[song] = data[song]
return emb_dict
def __w2v_load(path, songs):
wv = Word2Vec.load(path).wv
emb_dict = {}
for song in songs:
emb_dict[song] = wv[song]
return emb_dict
def __g_load(path, songs):
glove = Glove.load(path)
emb_dict = {}
for song in songs:
emb_dict[song] = glove.word_vectors[glove.dictionary[song]]
return emb_dict
def __load_exp(path, songs):
data = pickle.load(open(path, 'rb'))
return data
def get_embeddings(path, songs):
path_arr = path.split('/')
session_file = '/'.join(path_arr[:-1] + ['s' + path_arr[-1]])
user_file = path
if 'experiments' in path:
return __load_exp(user_file, songs), __load_exp(session_file, songs)
if 'glove' in path:
return __g_load(user_file, songs),__g_load(session_file, songs)
if 'music2vec' in path:
return __w2v_load(user_file, songs), __w2v_load(session_file, songs)
if 'doc2vec' in path:
return __w2v_load(user_file, songs), __w2v_load(session_file, songs)
if 'rnn' in path:
return _rnn_load(user_file, songs), _rnn_load(session_file, songs)
return {},{}
def prepare_data(df, conf):
ds = conf['evaluation']['dataset']
path_kfold = 'tmp/{}/kfold/'.format(ds)
if exists(path_kfold):
kfold = []
for i in range(0, conf['evaluation']['k']):
j = i + 1
train = pd.read_pickle(path_kfold + 'train_{}.pkl'.format(j))
test = pd.read_pickle(path_kfold + 'test_{}.pkl'.format(j))
kfold.append((train, test))
return kfold
makedirs('tmp/{}/kfold/'.format(ds))
sessions = df.groupby('session')['song'].apply(lambda x: x.tolist())
users = df.groupby('user').agg(list)
users['history'] = users['session'].apply(lambda x: [sessions[session] for session in list(set(x))])
users = users.drop(['song', 'timestamp','session'], axis=1)
unique_users = df.user.unique()
kf = KFold(n_splits=conf['evaluation']['k'], shuffle=True)
i = 1
kfold = []
for train, test in kf.split(unique_users):
train_df = users[users.index.isin(unique_users[train])]
test_df = users[users.index.isin(unique_users[test])]
train_df.to_pickle('tmp/{}/kfold/train_{}.pkl'.format(ds, i))
test_df.to_pickle('tmp/{}/kfold/test_{}.pkl'.format(ds, i))
kfold.append((train_df, test_df))
i += 1
return kfold