{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "nwaAZRu1NTiI" }, "source": [ "# Q-learning \n", "\n", "#### This version implements q-learning using a custom enviroment 1 day, with synthetic data, this version implements qtable with SQLITE so you can add several features in the state \n", "\n", "##### Experiments\n", "- Change the reward function and see the results on trading \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "DDf1gLC2NTiK" }, "outputs": [], "source": [ "# !pip install -r ./requirements.txt\n", "# !pip install stable_baselines3\n", "# !pip install yfinance\n", "# !pip install talib-binary\n", "# !pip install huggingface_sb3\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LNXxxKojNTiL" }, "outputs": [], "source": [ "import gym\n", "from gym import spaces\n", "from gym.utils import seeding\n", "\n", "import talib as ta\n", "from tqdm.notebook import tqdm\n", "\n", "import yfinance as yf\n", "import pandas as pd\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "import timeit\n", "import sqlite3\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_syntetic_data(tf, start_date, end_date, plot=True, add_noise=None):\n", " df = pd.date_range(start=start_date, end=end_date, freq=tf)\n", " df = df.to_frame()\n", "\n", " df['v1'] = np.arange(len(df.index))\n", " df[['Open','High','Low','Close','Volume']] = 0.0\n", " df = df.drop([0], axis=1)\n", "\n", " df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x/3)+10 )\n", " # df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x)+10 + np.sin(x/2) )\n", " if add_noise is not None: # could be 0.5\n", " noise = np.random.normal(0, add_noise, len(df))\n", " df[\"Close\"] += noise\n", "\n", " if plot:\n", " plt.figure(figsize=(15,6))\n", " df['Close'].tail(30).plot()\n", "\n", " df[\"Open\"]=df[\"Close\"].shift(1)\n", " df = df.dropna()\n", " x = 1.5\n", " df[\"High\"] = np.where( df[\"Close\"] > df['Open'], df[\"Close\"]+x, df[\"Open\"]+x )\n", " df[\"Low\"] = np.where( df[\"Close\"] < df['Open'], df[\"Close\"]-x, df[\"Open\"]-x )\n", " df[\"Volume\"] = 10\n", " return df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dmAuEhZZNTiL" }, "outputs": [], "source": [ "# Get data\n", "eth_usd = yf.Ticker(\"ETH-USD\")\n", "eth = eth_usd.history(period=\"max\")\n", "\n", "btc_usd = yf.Ticker(\"BTC-USD\")\n", "btc = btc_usd.history(period=\"max\")\n", "print(len(btc))\n", "print(len(eth))\n", "\n", "btc_train = eth[-3015:-200]\n", "# btc_test = eth[-200:]\n", "eth_train = eth[-1864:-200]\n", "eth_test = eth[-200:]\n", "# len(eth_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# use synthetic data\n", "# synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2015-02-05\", add_noise=None)\n", "synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2023-01-01\", add_noise=None)\n", "eth_train = synthetic_data[-1864:-200]\n", "eth_test = synthetic_data[-200:]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "class Qtable:\n", " def __init__(self):\n", " self.conn = sqlite3.connect(':memory:')\n", " self.cursor = self.conn.cursor()\n", "\n", " def create_table(self):\n", " columns = [(\"s_position\", \"INTEGER\"),(\"s_di\", \"INTEGER\"), (\"s_mfi\", \"INTEGER\"), (\"s_stock_d\", \"INTEGER\"),(\"s_adx\", \"INTEGER\"), (\"action\", \"INTEGER\"), (\"qvalue\", \"REAL\")]\n", " columns_string = \", \".join([f\"{name} {data_type}\" for name, data_type in columns])\n", " columns_keys = \"(s_position, s_di, s_mfi, s_stock_d, s_adx, action)\"\n", " query = f\"CREATE TABLE IF NOT EXISTS QTABLE ({columns_string}, PRIMARY KEY {columns_keys})\"\n", " self.cursor.execute(query)\n", " self.conn.commit()\n", "\n", " def set_q_value(self, state, action, qvalue):\n", " query = f\"INSERT INTO QTABLE (s_position, s_di, s_mfi, s_stock_d, s_adx, action, qvalue) VALUES (?,?,?,?,?,?,?) ON CONFLICT (s_position, s_di, s_mfi, s_stock_d, s_adx, action) DO UPDATE SET qvalue=?\"\n", " self.cursor.execute(query,state.tolist()+[action]+[qvalue]+[qvalue])\n", " self.conn.commit()\n", "\n", " def get_q_value(self, state, action):\n", " self.cursor.execute(\"SELECT qvalue from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=? and action=?\",state.tolist()+[action])\n", " rows = self.cursor.fetchall()\n", " if len(rows) > 0:\n", " return rows[0][0]\n", " return None\n", "\n", " def get_max_q_value(self, state):\n", " self.cursor.execute(\"SELECT max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n", " rows = self.cursor.fetchall()\n", " if len(rows) > 0:\n", " return rows[0][0]\n", " return None\n", "\n", " def get_max_action(self, state):\n", " self.cursor.execute(\"SELECT action, max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n", " rows = self.cursor.fetchall()\n", " if len(rows) > 0:\n", " return rows[0][0]\n", " return None\n", "\n", " def getall(self):\n", " self.cursor.execute(\"SELECT * from QTABLE \")\n", " return self.cursor.fetchall()\n", " \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def initialize_q_table():\n", " # s_ state variables\n", " qtable = Qtable()\n", " qtable.create_table() \n", " return qtable" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Policy\n", "\n", "def greedy_policy(Qtable, state):\n", " # Exploitation: take the action with the highest state, action value\n", " # if we dont have a state with values return DO_NOTHING \n", " action = Qtable.get_max_action(state)\n", " # if action is None:\n", " # action = 2\n", " # action = np.argmax(Qtable[state])\n", " return action\n", "\n", "\n", "def epsilon_greedy_policy(Qtable, state, epsilon, env):\n", " # Randomly generate a number between 0 and 1\n", " random_num = np.random.uniform(size=1)\n", " # if random_num > greater than epsilon --> exploitation\n", " if random_num > epsilon:\n", " # Take the action with the highest value given a state\n", " # np.argmax can be useful here\n", " action = greedy_policy(Qtable, state)\n", " # else --> exploration\n", " else:\n", " # action = np.random.random_integers(4,size=1)[0]\n", " action = env.action_space.sample()\n", " \n", " return action" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wlC-EdLENTiN" }, "outputs": [], "source": [ "\n", "def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable, learning_rate, gamma):\n", " state_history = []\n", "# np.random.seed(42)\n", " for episode in range(n_training_episodes):\n", " # Reduce epsilon (because we need less and less exploration)\n", " epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n", " # Reset the environment\n", " state = env.reset()\n", " step = 0\n", " done = False\n", "\n", " # repeat\n", " for step in range(max_steps):\n", " # Choose the action At using epsilon greedy policy\n", " action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n", "\n", " # Take action At and observe Rt+1 and St+1\n", " # Take the action (a) and observe the outcome state(s') and reward (r)\n", " new_state, reward, done, info = env.step(action)\n", "\n", " # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n", " # Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * ( np.max(Qtable[new_state]) ) - Qtable[state][action] )\n", " qvalue = Qtable.get_q_value(state, action)\n", " if qvalue is None:\n", " qvalue = 0\n", "\n", " q_max_state = Qtable.get_max_q_value(new_state)\n", " if q_max_state is None:\n", " q_max_state = 0\n", " \n", " n_qvalue = qvalue + learning_rate * (reward + gamma * ( q_max_state ) - qvalue )\n", " Qtable.set_q_value(state, action, n_qvalue)\n", "\n", " # If done, finish the episode\n", " if done:\n", " break\n", " \n", " # Our next state is the new state\n", " state = new_state\n", "\n", " state_history.append(state) \n", "\n", " return Qtable, state_history" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def evaluate_agent(env, max_steps, n_eval_episodes, Q, random=False):\n", " \"\"\"\n", " Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n", " :param env: The evaluation environment\n", " :param n_eval_episodes: Number of episode to evaluate the agent\n", " :param Q: The Q-table\n", " :param seed: The evaluation seed array (for taxi-v3)\n", " \"\"\"\n", " episode_positive_perc_trades = []\n", " episode_rewards = []\n", " episode_profits = []\n", " for episode in tqdm(range(n_eval_episodes), disable=random):\n", " state = env.reset()\n", " step = 0\n", " done = False\n", " total_rewards_ep = 0\n", " total_profit_ep = 0\n", " \n", " for step in range(max_steps):\n", " # Take the action (index) that have the maximum expected future reward given that state\n", " if random:\n", " action = env.action_space.sample()\n", " else:\n", " action = greedy_policy(Q, state)\n", "\n", " new_state, reward, done, info = env.step(action)\n", " total_rewards_ep += reward\n", " \n", " if done:\n", " break\n", " state = new_state\n", "\n", " if len(env._trade_history) > 0:\n", " episode_positive_perc_trades.append(np.count_nonzero(np.array(env._trade_history) > 0)/len(env._trade_history))\n", " episode_rewards.append(total_rewards_ep)\n", " episode_profits.append(env.history['total_profit'][-1])\n", " # print(env.history)\n", " # env.render()\n", " # assert 0\n", "\n", " mean_reward = np.mean(episode_rewards)\n", " std_reward = np.std(episode_rewards)\n", " mean_profit = np.mean(episode_profits)\n", " std_profit = np.std(episode_profits)\n", " positive_perc_trades = np.mean(episode_positive_perc_trades)\n", "\n", " return mean_reward, std_reward, mean_profit, std_profit, positive_perc_trades" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from enum import Enum\n", "class Actions(Enum):\n", " Sell = 0\n", " Buy = 1\n", " Do_nothing = 2\n", "\n", "class CustTradingEnv(gym.Env):\n", "\n", " def __init__(self, df, max_steps=0, random_start=True):\n", " self.seed(seed=43)\n", " self.df = df\n", " self.prices, self.signal_features = self._process_data()\n", "\n", " # spaces\n", " self.action_space = spaces.Discrete(3)\n", " self.observation_space = spaces.Box(low=0, high=1999, shape=(1,) , dtype=np.float64)\n", "\n", " # episode\n", " self._start_tick = 0\n", " self._end_tick = 0\n", " self._done = None\n", " self._current_tick = None\n", " self._last_trade_tick = None\n", " self._position = None\n", " self._position_history = None\n", " self._total_reward = None\n", " self._total_profit = None\n", " self._first_rendering = None\n", " self.history = None\n", " self._max_steps = max_steps\n", " self._start_episode_tick = None\n", " self._trade_history = None\n", " self._trade_tick_history = None\n", " self._random_start = random_start\n", " self._action_history = None\n", "\n", " def reset(self):\n", " self._done = False\n", " if self._random_start:\n", " self._start_episode_tick = np.random.randint(1,high=len(self.df)- self._max_steps )\n", " self._end_tick = self._start_episode_tick + self._max_steps\n", " else:\n", " self._start_episode_tick = 1\n", " self._end_tick = len(self.df)-1\n", " # self._start_episode_tick = np.random.randint(1,len(self.df)- self._max_steps )\n", " # self._end_tick = self._start_episode_tick + self._max_steps\n", " self._current_tick = self._start_episode_tick\n", " self._last_trade_tick = self._current_tick - 1\n", " self._position = 0\n", " self._action_history = [-1] * (len(self.prices)) \n", " # self._position_history = (self.window_size * [None]) + [self._position]\n", " self._total_reward = 0.\n", " self._total_profit = 0.\n", " self._trade_history = []\n", " self._trade_tick_history = []\n", " self.history = {}\n", " return self._get_observation()\n", "\n", "\n", " def step(self, action):\n", " self._done = False\n", " self._current_tick += 1\n", "\n", " if self._current_tick == self._end_tick:\n", " self._done = True\n", "\n", " self._do_act(action)\n", " step_reward = self._calculate_reward(action)\n", " self._total_reward += step_reward\n", "\n", " observation = self._get_observation()\n", " info = dict(\n", " total_reward = self._total_reward,\n", " total_profit = self._total_profit,\n", " position = self._position,\n", " action = action\n", " )\n", " self._update_history(info)\n", "\n", " return observation, step_reward, self._done, info\n", "\n", " def seed(self, seed=None):\n", " self.np_random, seed = seeding.np_random(seed)\n", " return [seed]\n", " \n", " def _get_observation(self):\n", " if self._position > 0:\n", " position = 1\n", " elif self._position < 0:\n", " position = -1\n", " else:\n", " position = 0\n", " return np.concatenate( [[position], self.signal_features[self._current_tick]] )\n", "\n", " def _update_history(self, info):\n", " if not self.history:\n", " self.history = {key: [] for key in info.keys()}\n", "\n", " for key, value in info.items():\n", " self.history[key].append(value)\n", "\n", "\n", " def render(self, mode='human'):\n", " window_ticks = np.arange(len(self.prices))\n", " prices = self.prices\n", " # prices = self.prices[self._start_episode_tick:self._end_tick+1]\n", " plt.plot(prices)\n", "\n", " open_buy = []\n", " close_buy = []\n", " open_sell = []\n", " close_sell = []\n", " do_nothing = []\n", " penalty = []\n", " action_not_in_table = []\n", "\n", " for i, tick in enumerate(window_ticks):\n", " if self._action_history[i] == 1:\n", " open_buy.append(tick)\n", " elif self._action_history[i] == 2 :\n", " close_buy.append(tick)\n", " elif self._action_history[i] == 3 :\n", " open_sell.append(tick)\n", " elif self._action_history[i] == 4 :\n", " close_sell.append(tick)\n", " elif self._action_history[i] == 0 :\n", " do_nothing.append(tick)\n", " elif self._action_history[i] == 5 :\n", " penalty.append(tick)\n", " elif self._action_history[i] == 6 :\n", " action_not_in_table.append(tick)\n", "\n", " plt.plot(open_buy, prices[open_buy], 'go', marker=\"^\")\n", " plt.plot(close_buy, prices[close_buy], 'go', marker=\"v\")\n", " plt.plot(open_sell, prices[open_sell], 'ro', marker=\"v\")\n", " plt.plot(close_sell, prices[close_sell], 'ro', marker=\"^\")\n", " \n", " plt.plot(do_nothing, prices[do_nothing], 'oc')\n", " plt.plot(penalty, prices[penalty], 'yo')\n", "\n", " plt.plot(action_not_in_table, prices[action_not_in_table], 'ob')\n", "\n", " plt.suptitle(\n", " \"Total Reward: %.6f\" % self._total_reward + ' ~ ' +\n", " \"Total Profit: %.6f\" % self._total_profit\n", " )\n", "\n", " def _do_bin(self,df):\n", " df = pd.cut(df,bins=np.arange(0,105,5),labels=False, include_lowest=True)\n", " return df\n", "\n", " # Our state will be encode with 4 features MFI and Stochastic(only D line), ADX and DI+DI-\n", " # the values of each feature will be binned in 10 bins, ex:\n", " # MFI goes from 0-100, if we get 25 will put on the second bin \n", " # DI+DI- if DI+ is over DI- set (1 otherwise 0) \n", " # \n", " # that will give a state space of 10(MFI) * 10(STOCH) * 10(ADX) * 2(DI) = 2000 states\n", " # encoded as bins of DI MFI STOCH ADX = 1 45.2 25.4 90.1 , binned = 1 4 2 9 state = 1429 \n", " def _process_data(self):\n", " timeperiod = 14\n", " self.df = self.df.copy()\n", " \n", " self.df['adx_r'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", " self.df['mfi_r'] = ta.MFI(self.df['High'], self.df['Low'], self.df['Close'],self.df['Volume'], timeperiod=timeperiod)\n", " _, self.df['stock_d_r'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)\n", " self.df['p_di'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", " self.df['m_di'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", " self.df['di'] = np.where( self.df['p_di'] > self.df['m_di'], 1, 0)\n", " self.df = self.df.dropna()\n", " self.df['mfi'] = self._do_bin(self.df['mfi_r'])\n", " self.df['stock_d'] = self._do_bin(self.df['stock_d_r'])\n", " self.df['adx'] = self._do_bin(self.df['adx_r'])\n", "\n", " # self.df['state'] = self.df['di']*1000+ self.df['mfi']*100 + self.df['stock_d']*10 + self.df['adx']\n", "\n", " prices = self.df.loc[:, 'Close'].to_numpy()\n", " # signal_features = self.df.loc[:, 'state'].to_numpy()\n", " signal_features = self.df.loc[:, ['di', 'mfi', 'stock_d','adx']].to_numpy()\n", "\n", " return prices, signal_features\n", "\n", "\n", " def _do_act(self, action):\n", " if action is None:\n", " self._action_history[self._current_tick-1]=6\n", "\n", " current_price = self.prices[self._current_tick]\n", " last_price = self.prices[self._current_tick - 1]\n", " price_diff = current_price - last_price\n", "\n", " # OPEN BUY - 1\n", " if action == Actions.Buy.value and self._position == 0:\n", " self._position = last_price\n", " # step_reward += price_diff\n", " self._last_trade_tick = self._current_tick - 1\n", " self._action_history[self._current_tick-1]=1\n", "\n", " # CLOSE BUY - 2\n", " elif action == Actions.Sell.value and self._position > 0:\n", " self._position = 0\n", " profit = self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n", " self._total_profit += profit\n", " self._action_history[self._current_tick-1]=2\n", " self._trade_history.append(profit)\n", " self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n", "\n", " elif action == Actions.Buy.value and self._position > 0:\n", " self._action_history[self._current_tick-1]=5\n", "\n", " # OPEN SELL - 3\n", " elif action == Actions.Sell.value and self._position == 0:\n", " self._position = -1 * last_price\n", " self._last_trade_tick = self._current_tick - 1\n", " self._action_history[self._current_tick-1]=3\n", "\n", " # CLOSE SELL - 4\n", " elif action == Actions.Buy.value and self._position < 0:\n", " self._position = 0\n", " profit = -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n", " self._total_profit += profit\n", " self._action_history[self._current_tick-1]=4\n", " self._trade_history.append(profit)\n", " self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n", "\n", " elif action == Actions.Sell.value and self._position < 0:\n", " self._action_history[self._current_tick-1]=5\n", "\n", " # DO NOTHING - 0\n", " elif action == Actions.Do_nothing.value and self._position > 0:\n", " self._action_history[self._current_tick-1]=0\n", " elif action == Actions.Do_nothing.value and self._position < 0:\n", " self._action_history[self._current_tick-1]=0\n", " elif action == Actions.Do_nothing.value and self._position == 0:\n", " self._action_history[self._current_tick-1]=0\n", "\n", " \n", " def _calculate_reward(self, action):\n", " current_price = self.prices[self._current_tick]\n", " last_price = self.prices[self._current_tick - 1]\n", " price_diff = current_price - last_price\n", "\n", " if not self.history:\n", " return 0\n", "\n", " # simple strategy, reward when close the buy or sell\n", " # closed buy\n", " if self._position == 0 and self.history['position'][-1] > 0 :\n", " return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n", " \n", " # close sell\n", " if self._position == 0 and self.history['position'][-1] < 0:\n", " return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n", "\n", "\n", " # # reward when open the buy or sell (DONT WORK)\n", " # # open buy\n", " # if self._position > 0 and self.history['position'][-1] == 0 :\n", " # return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n", " \n", " # # open sell\n", " # if self._position < 0 and self.history['position'][-1] == 0:\n", " # return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n", "\n", " # # PRB\n", " # return price_diff * self._position\n", "\n", "\n", " return 0\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Training parameters\n", "n_training_episodes = 20000 # Total training episodes\n", "learning_rate = 0.2 # Learning rate\n", "\n", "# Environment parameters\n", "max_steps = 20 # Max steps per episode\n", "gamma = 0.95 # Discounting rate\n", "\n", "# Exploration parameters\n", "max_epsilon = 1.0 # Exploration probability at start\n", "# max_epsilon = 1.0 # Exploration probability at start\n", "min_epsilon = 0.05 # Minimum exploration probability \n", "# min_epsilon = 0.05 # Minimum exploration probability \n", "decay_rate = 0.0005 # Exponential decay rate for exploration prob" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "REhmfLkYNTiN", "outputId": "cf676f6d-83df-43f5-89fe-3258e0041d9d" }, "outputs": [], "source": [ "# create env\n", "env = CustTradingEnv(df=eth_train, max_steps=max_steps, random_start=True)\n", "Qtable_trading = initialize_q_table()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# train \n", "Qtable_trading, state_history = train(n_training_episodes, min_epsilon, max_epsilon, \n", " decay_rate, env, max_steps, Qtable_trading, learning_rate, gamma )\n", "\n", "len(Qtable_trading.getall())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Qtable_trading.getall()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "max_steps = 60 \n", "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True)\n", "n_eval_episodes = 1000\n", "\n", "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(15,6))\n", "plt.cla()\n", "env_test.render()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# trade sequential\n", "max_steps = len(eth_test)\n", "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False)\n", "n_eval_episodes = 1\n", "\n", "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(15,6))\n", "plt.cla()\n", "env_test.render()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# env_test._trade_tick_history\n", "# Qtable_trading.getall()[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3.8.13 ('rl2')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1" } }, "widgets": { "application/vnd.jupyter.widget-state+json": { "01a2dbcb714e40148b41c761fcf43147": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "20b0f38ec3234ff28a62a286cd57b933": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "PasswordModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "PasswordModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "PasswordView", "continuous_update": true, "description": "Token:", "description_tooltip": null, "disabled": false, "layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147", "placeholder": "", "style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce", "value": "" } }, "270cbb5d6e9c4b1e9e2f39c8b3b0c15f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "VBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_a02224a43d8d4af3bd31d326540d25da", "IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933", "IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77", "IPY_MODEL_f1675c09d16a4251b403f9c56255f168", "IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2" ], "layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d" } }, "2dc5fa9aa3334dfcbdee9c238f2ef60b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3e753b0212644990b558c68853ff2041": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3fa248114ac24656ba74923936a94d2d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": "center", "align_self": null, "border": null, "bottom": null, "display": "flex", "flex": null, "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "50%" } }, "42d140b838b844819bc127afc1b7bc84": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "90c874e91b304ee1a7ef147767ac00ce": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9d847f9a7d47458d8cd57d9b599e47c6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a02224a43d8d4af3bd31d326540d25da": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284", "placeholder": "", "style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b", "value": "