import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.models import Sequential from keras.layers import Activation, Dense from keras.layers import LSTM from keras.layers import Dropout from tqdm import tqdm neurons = 512 # number of hidden units in the LSTM layer activation_function = "tanh" # activation function for LSTM and Dense layer loss = ( "mse" # loss function for calculating the gradient, in this case Mean Squared Error ) optimizer = "adam" # optimizer for appljying gradient decent dropout = 0.25 # dropout ratio used after each LSTM layer to avoid overfitting batch_size = 128 def preprocess(df): df = df.copy() df["ts"] = df["ts"].astype(np.int64) df["ts"] = df["ts"] / 1000 df["timestamp"] = pd.to_datetime(df["ts"], unit="s") df = df[["timestamp", "low", "high", "close", "open", "quoteVol"]] for col in ["low", "high", "close", "open", "quoteVol"]: df[col] = df[col].astype(float) df.set_index(df["timestamp"], inplace=True) df.drop(["timestamp"], axis=1, inplace=True) df["Date"] = pd.to_datetime(df.index.values.tolist()).date return df def normalize(closedf): scaler = MinMaxScaler(feature_range=(0, 1)) closedfsc = scaler.fit_transform( np.array(closedf.drop("Date", axis=1)).reshape(-1, 1) ) return closedfsc, scaler def split_train_test(closedfsc, training_size, test_size): train_data, test_data = ( closedfsc[0:training_size, :], closedfsc[training_size : len(closedfsc), :1], ) return train_data, test_data def create_dataset(dataset, time_step=1): dataX, dataY = [], [] for i in range(len(dataset) - time_step - 1): a = dataset[i : (i + time_step), 0] ###i=0, 0,1,2,3-----99 100 dataX.append(a) dataY.append(dataset[i + time_step, 0]) return np.array(dataX), np.array(dataY) # def build_model(inputs): # model = Sequential() # model.add( # LSTM( # neurons, # return_sequences=True, # input_shape=(inputs.shape[1], inputs.shape[2]), # activation=activation_function, # ) # ) # model.add(Dropout(dropout)) # model.add(LSTM(neurons, return_sequences=True, activation=activation_function)) # model.add(Dropout(dropout)) # model.add(LSTM(neurons, activation=activation_function)) # model.add(Dropout(dropout)) # model.add(Dense(units=1)) # model.add(Activation(activation_function)) # model.compile(loss=loss, optimizer=optimizer, metrics=["mae"]) # return model def build_model(): model = Sequential() model.add(LSTM(256, input_shape=(None, 1), activation="relu")) model.add(Dense(1)) model.compile(loss="mean_squared_error", optimizer="adam") return model def train_model( model, x_train, y_train, X_test, y_test, epochs, progress_callback=None ): train_losses = [] # To store training losses val_losses = [] # To store validation losses for epoch in tqdm(range(epochs)): history = model.fit( x_train, y_train, epochs=1, verbose=0, validation_data=(X_test, y_test), batch_size=32, ) train_loss = history.history["loss"][0] val_loss = history.history["val_loss"][0] train_losses.append(train_loss) val_losses.append(val_loss) if progress_callback: progress_callback(epoch, history) return model, train_losses, val_losses