File size: 227,012 Bytes
b13dc5d |
|
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "nwaAZRu1NTiI"
},
"source": [
"# Q-learning \n",
"\n",
"#### This version implements q-learning using a custom enviroment 1 day, with synthetic data\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "DDf1gLC2NTiK"
},
"outputs": [],
"source": [
"# !pip install -r ./requirements.txt\n",
"# !pip install stable_baselines3[extra]\n",
"# !pip install yfinance\n",
"# !pip install talib-binary\n",
"# !pip install huggingface_sb3\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "LNXxxKojNTiL"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import gym\n",
"from gym import spaces\n",
"from gym.utils import seeding\n",
"\n",
"import talib as ta\n",
"from tqdm.notebook import tqdm\n",
"\n",
"import yfinance as yf\n",
"import pandas as pd\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def get_syntetic_data(tf, start_date, end_date, plot=True, add_noise=None):\n",
" df = pd.date_range(start=start_date, end=end_date, freq=tf)\n",
" df = df.to_frame()\n",
"\n",
" df['v1'] = np.arange(len(df.index))\n",
" df[['Open','High','Low','Close','Volume']] = 0.0\n",
" df = df.drop([0], axis=1)\n",
"\n",
" # df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x)+10 )\n",
" df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x)+10 + np.sin(x/2) )\n",
" if add_noise is not None: # could be 0.5\n",
" noise = np.random.normal(0, add_noise, len(df))\n",
" df[\"Close\"] += noise\n",
"\n",
" if plot:\n",
" plt.figure(figsize=(15,6))\n",
" df['Close'].tail(30).plot()\n",
"\n",
" df[\"Open\"]=df[\"Close\"].shift(1)\n",
" df = df.dropna()\n",
" x = 1.5\n",
" df[\"High\"] = np.where( df[\"Close\"] > df['Open'], df[\"Close\"]+x, df[\"Open\"]+x )\n",
" df[\"Low\"] = np.where( df[\"Close\"] < df['Open'], df[\"Close\"]-x, df[\"Open\"]-x )\n",
" df[\"Volume\"] = 10\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "dmAuEhZZNTiL"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3075\n",
"1926\n"
]
}
],
"source": [
"# Get data\n",
"eth_usd = yf.Ticker(\"ETH-USD\")\n",
"eth = eth_usd.history(period=\"max\")\n",
"\n",
"btc_usd = yf.Ticker(\"BTC-USD\")\n",
"btc = btc_usd.history(period=\"max\")\n",
"print(len(btc))\n",
"print(len(eth))\n",
"\n",
"btc_train = eth[-3015:-200]\n",
"# btc_test = eth[-200:]\n",
"eth_train = eth[-1864:-200]\n",
"eth_test = eth[-200:]\n",
"# len(eth_train)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1080x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# use synthetic data\n",
"synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2023-01-01\", add_noise=None)\n",
"eth_train = synthetic_data[-1864:-200]\n",
"eth_test = synthetic_data[-200:]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def initialize_q_table(state_space, action_space):\n",
" Qtable = np.zeros((state_space, action_space))\n",
" return Qtable"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>v1</th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2022-06-16</th>\n",
" <td>2723</td>\n",
" <td>10.345167</td>\n",
" <td>11.845167</td>\n",
" <td>8.261013</td>\n",
" <td>9.761013</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-06-17</th>\n",
" <td>2724</td>\n",
" <td>9.761013</td>\n",
" <td>11.261013</td>\n",
" <td>7.270246</td>\n",
" <td>8.770246</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-06-18</th>\n",
" <td>2725</td>\n",
" <td>8.770246</td>\n",
" <td>10.270246</td>\n",
" <td>6.740366</td>\n",
" <td>8.240366</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-06-19</th>\n",
" <td>2726</td>\n",
" <td>8.240366</td>\n",
" <td>10.279113</td>\n",
" <td>6.740366</td>\n",
" <td>8.779113</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-06-20</th>\n",
" <td>2727</td>\n",
" <td>8.779113</td>\n",
" <td>11.646191</td>\n",
" <td>7.279113</td>\n",
" <td>10.146191</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-12-28</th>\n",
" <td>2918</td>\n",
" <td>11.717571</td>\n",
" <td>13.217571</td>\n",
" <td>9.977599</td>\n",
" <td>11.477599</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-12-29</th>\n",
" <td>2919</td>\n",
" <td>11.477599</td>\n",
" <td>12.977599</td>\n",
" <td>9.029289</td>\n",
" <td>10.529289</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-12-30</th>\n",
" <td>2920</td>\n",
" <td>10.529289</td>\n",
" <td>12.029289</td>\n",
" <td>8.251117</td>\n",
" <td>9.751117</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-12-31</th>\n",
" <td>2921</td>\n",
" <td>9.751117</td>\n",
" <td>11.251117</td>\n",
" <td>8.204337</td>\n",
" <td>9.704337</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-01</th>\n",
" <td>2922</td>\n",
" <td>9.704337</td>\n",
" <td>11.654716</td>\n",
" <td>8.204337</td>\n",
" <td>10.154716</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>200 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" v1 Open High Low Close Volume\n",
"2022-06-16 2723 10.345167 11.845167 8.261013 9.761013 10\n",
"2022-06-17 2724 9.761013 11.261013 7.270246 8.770246 10\n",
"2022-06-18 2725 8.770246 10.270246 6.740366 8.240366 10\n",
"2022-06-19 2726 8.240366 10.279113 6.740366 8.779113 10\n",
"2022-06-20 2727 8.779113 11.646191 7.279113 10.146191 10\n",
"... ... ... ... ... ... ...\n",
"2022-12-28 2918 11.717571 13.217571 9.977599 11.477599 10\n",
"2022-12-29 2919 11.477599 12.977599 9.029289 10.529289 10\n",
"2022-12-30 2920 10.529289 12.029289 8.251117 9.751117 10\n",
"2022-12-31 2921 9.751117 11.251117 8.204337 9.704337 10\n",
"2023-01-01 2922 9.704337 11.654716 8.204337 10.154716 10\n",
"\n",
"[200 rows x 6 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eth_test"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Policy\n",
"\n",
"def greedy_policy(Qtable, state):\n",
" # Exploitation: take the action with the highest state, action value\n",
" # if we dont have a state with values return DO_NOTHING \n",
" if abs(np.max(Qtable[state])) > 0:\n",
" action = np.argmax(Qtable[state])\n",
" else:\n",
" action = 2\n",
" # action = np.argmax(Qtable[state])\n",
" return action\n",
"\n",
"\n",
"def epsilon_greedy_policy(Qtable, state, epsilon, env):\n",
" # Randomly generate a number between 0 and 1\n",
" random_num = np.random.uniform(size=1)\n",
" # if random_num > greater than epsilon --> exploitation\n",
" if random_num > epsilon:\n",
" # Take the action with the highest value given a state\n",
" # np.argmax can be useful here\n",
" action = greedy_policy(Qtable, state)\n",
" # else --> exploration\n",
" else:\n",
" # action = np.random.random_integers(4,size=1)[0]\n",
" action = env.action_space.sample()\n",
" \n",
" return action"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"id": "wlC-EdLENTiN"
},
"outputs": [],
"source": [
"def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable, learning_rate, gamma):\n",
" state_history = []\n",
"# np.random.seed(42)\n",
" for episode in range(n_training_episodes):\n",
" # Reduce epsilon (because we need less and less exploration)\n",
" epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n",
" # Reset the environment\n",
" state = env.reset()\n",
" step = 0\n",
" done = False\n",
"\n",
" # repeat\n",
" for step in range(max_steps):\n",
" # Choose the action At using epsilon greedy policy\n",
" action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n",
"\n",
" # Take action At and observe Rt+1 and St+1\n",
" # Take the action (a) and observe the outcome state(s') and reward (r)\n",
" new_state, reward, done, info = env.step(action)\n",
"\n",
" # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n",
" Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * ( np.max(Qtable[new_state]) ) - Qtable[state][action] )\n",
"\n",
" # If done, finish the episode\n",
" if done:\n",
" break\n",
" \n",
" # Our next state is the new state\n",
" state = new_state\n",
"\n",
" state_history.append(state) \n",
"\n",
" return Qtable, state_history"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from enum import Enum\n",
"class Actions(Enum):\n",
" Sell = 0\n",
" Buy = 1\n",
" Do_nothing = 2\n",
"\n",
"class CustTradingEnv(gym.Env):\n",
"\n",
" def __init__(self, df, max_steps=0, random_start=True):\n",
" self.seed()\n",
" self.df = df\n",
" self.prices, self.signal_features = self._process_data()\n",
"\n",
" # spaces\n",
" self.action_space = spaces.Discrete(3)\n",
" self.observation_space = spaces.Box(low=0, high=1999, shape=(1,) , dtype=np.float64)\n",
"\n",
" # episode\n",
" self._start_tick = 0\n",
" self._end_tick = 0\n",
" self._done = None\n",
" self._current_tick = None\n",
" self._last_trade_tick = None\n",
" self._position = None\n",
" self._position_history = None\n",
" self._total_reward = None\n",
" self._total_profit = None\n",
" self._first_rendering = None\n",
" self.history = None\n",
" self._max_steps = max_steps\n",
" self._start_episode_tick = None\n",
" self._trade_history = None\n",
" self._random_start = random_start\n",
"\n",
" def reset(self):\n",
" self._done = False\n",
" if self._random_start:\n",
" self._start_episode_tick = np.random.randint(1,high=len(self.df)- self._max_steps )\n",
" self._end_tick = self._start_episode_tick + self._max_steps\n",
" else:\n",
" self._start_episode_tick = 1\n",
" self._end_tick = len(self.df)-1\n",
" # self._start_episode_tick = np.random.randint(1,len(self.df)- self._max_steps )\n",
" # self._end_tick = self._start_episode_tick + self._max_steps\n",
" self._current_tick = self._start_episode_tick\n",
" self._last_trade_tick = self._current_tick - 1\n",
" self._position = 0\n",
" self._position_history = []\n",
" # self._position_history = (self.window_size * [None]) + [self._position]\n",
" self._total_reward = 0.\n",
" self._total_profit = 0.\n",
" self._trade_history = []\n",
" self.history = {}\n",
" return self._get_observation()\n",
"\n",
"\n",
" def step(self, action):\n",
" self._done = False\n",
" self._current_tick += 1\n",
"\n",
" if self._current_tick == self._end_tick:\n",
" self._done = True\n",
"\n",
" step_reward = self._calculate_reward(action)\n",
" self._total_reward += step_reward\n",
"\n",
" observation = self._get_observation()\n",
" info = dict(\n",
" total_reward = self._total_reward,\n",
" total_profit = self._total_profit,\n",
" position = self._position,\n",
" action = action\n",
" )\n",
" self._update_history(info)\n",
"\n",
" return observation, step_reward, self._done, info\n",
"\n",
" def seed(self, seed=None):\n",
" self.np_random, seed = seeding.np_random(seed)\n",
" return [seed]\n",
" \n",
" def _get_observation(self):\n",
" return self.signal_features[self._current_tick]\n",
"\n",
" def _update_history(self, info):\n",
" if not self.history:\n",
" self.history = {key: [] for key in info.keys()}\n",
"\n",
" for key, value in info.items():\n",
" self.history[key].append(value)\n",
"\n",
"\n",
" def render(self, mode='human'):\n",
" window_ticks = np.arange(len(self._position_history))\n",
" prices = self.prices[self._start_episode_tick:self._end_tick+1]\n",
" plt.plot(prices)\n",
"\n",
" open_buy = []\n",
" close_buy = []\n",
" open_sell = []\n",
" close_sell = []\n",
" do_nothing = []\n",
"\n",
" for i, tick in enumerate(window_ticks):\n",
" if self._position_history[i] == 1:\n",
" open_buy.append(tick)\n",
" elif self._position_history[i] == 2 :\n",
" close_buy.append(tick)\n",
" elif self._position_history[i] == 3 :\n",
" open_sell.append(tick)\n",
" elif self._position_history[i] == 4 :\n",
" close_sell.append(tick)\n",
" elif self._position_history[i] == 0 :\n",
" do_nothing.append(tick)\n",
"\n",
" plt.plot(open_buy, prices[open_buy], 'go', marker=\"^\")\n",
" plt.plot(close_buy, prices[close_buy], 'go', marker=\"v\")\n",
" plt.plot(open_sell, prices[open_sell], 'ro', marker=\"v\")\n",
" plt.plot(close_sell, prices[close_sell], 'ro', marker=\"^\")\n",
" \n",
" plt.plot(do_nothing, prices[do_nothing], 'yo')\n",
"\n",
" plt.suptitle(\n",
" \"Total Reward: %.6f\" % self._total_reward + ' ~ ' +\n",
" \"Total Profit: %.6f\" % self._total_profit\n",
" )\n",
"\n",
" def _calculate_reward(self, action):\n",
" step_reward = 0\n",
"\n",
" current_price = self.prices[self._current_tick]\n",
" last_price = self.prices[self._current_tick - 1]\n",
" price_diff = current_price - last_price\n",
"\n",
" penalty = -1 * last_price * 0.01\n",
" # OPEN BUY - 1\n",
" if action == Actions.Buy.value and self._position == 0:\n",
" self._position = 1\n",
" step_reward += price_diff\n",
" self._last_trade_tick = self._current_tick - 1\n",
" self._position_history.append(1)\n",
"\n",
" elif action == Actions.Buy.value and self._position > 0:\n",
" step_reward += penalty\n",
" self._position_history.append(-1)\n",
" # CLOSE SELL - 4\n",
" elif action == Actions.Buy.value and self._position < 0:\n",
" self._position = 0\n",
" step_reward += -1 * (self.prices[self._current_tick -1] - self.prices[self._last_trade_tick]) \n",
" self._total_profit += step_reward\n",
" self._position_history.append(4)\n",
" self._trade_history.append(step_reward)\n",
"\n",
" # OPEN SELL - 3\n",
" elif action == Actions.Sell.value and self._position == 0:\n",
" self._position = -1\n",
" step_reward += -1 * price_diff\n",
" self._last_trade_tick = self._current_tick - 1\n",
" self._position_history.append(3)\n",
" # CLOSE BUY - 2\n",
" elif action == Actions.Sell.value and self._position > 0:\n",
" self._position = 0\n",
" step_reward += self.prices[self._current_tick -1] - self.prices[self._last_trade_tick] \n",
" self._total_profit += step_reward\n",
" self._position_history.append(2)\n",
" self._trade_history.append(step_reward)\n",
" elif action == Actions.Sell.value and self._position < 0:\n",
" step_reward += penalty\n",
" self._position_history.append(-1)\n",
"\n",
" # DO NOTHING - 0\n",
" elif action == Actions.Do_nothing.value and self._position > 0:\n",
" step_reward += price_diff\n",
" self._position_history.append(0)\n",
" elif action == Actions.Do_nothing.value and self._position < 0:\n",
" step_reward += -1 * price_diff\n",
" self._position_history.append(0)\n",
" elif action == Actions.Do_nothing.value and self._position == 0:\n",
" step_reward += -1 * abs(price_diff)\n",
" self._position_history.append(0)\n",
"\n",
" return step_reward\n",
"\n",
" def _do_bin(self,df):\n",
" df = pd.cut(df,bins=[0,10,20,30,40,50,60,70,80,90,100],labels=False, include_lowest=True)\n",
" return df\n",
" # Our state will be encode with 4 features MFI and Stochastic(only D line), ADX and DI+DI-\n",
" # the values of each feature will be binned in 10 bins, ex:\n",
" # MFI goes from 0-100, if we get 25 will put on the second bin \n",
" # DI+DI- if DI+ is over DI- set (1 otherwise 0) \n",
" # \n",
" # that will give a state space of 10(MFI) * 10(STOCH) * 10(ADX) * 2(DI) = 2000 states\n",
" # encoded as bins of DI MFI STOCH ADX = 1 45.2 25.4 90.1 , binned = 1 4 2 9 state = 1429 \n",
" def _process_data(self):\n",
" timeperiod = 14\n",
" self.df = self.df.copy()\n",
" \n",
" self.df['mfi_r'] = ta.MFI(self.df['High'], self.df['Low'], self.df['Close'],self.df['Volume'], timeperiod=timeperiod)\n",
" _, self.df['stock_d_r'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)\n",
" self.df['adx_r'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
" self.df['p_di'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
" self.df['m_di'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
" self.df['di'] = np.where( self.df['p_di'] > self.df['m_di'], 1, 0)\n",
"\n",
" self.df = self.df.dropna()\n",
" self.df['mfi'] = self._do_bin(self.df['mfi_r'])\n",
" self.df['stock_d'] = self._do_bin(self.df['stock_d_r'])\n",
" self.df['adx'] = self._do_bin(self.df['adx_r'])\n",
" self.df['state'] = self.df['di']*1000+ self.df['mfi']*100 + self.df['stock_d']*10 + self.df['adx']\n",
"\n",
" prices = self.df.loc[:, 'Close'].to_numpy()\n",
" # print(self.df.head(30))\n",
"\n",
" signal_features = self.df.loc[:, 'state'].to_numpy()\n",
"\n",
" return prices, signal_features"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Training parameters\n",
"n_training_episodes = 20000 # Total training episodes\n",
"learning_rate = 0.2 # Learning rate\n",
"\n",
"# Environment parameters\n",
"max_steps = 20 # Max steps per episode\n",
"gamma = 0.95 # Discounting rate\n",
"\n",
"# Exploration parameters\n",
"max_epsilon = 1.0 # Exploration probability at start\n",
"# max_epsilon = 1.0 # Exploration probability at start\n",
"min_epsilon = 0.05 # Minimum exploration probability \n",
"# min_epsilon = 0.05 # Minimum exploration probability \n",
"decay_rate = 0.0005 # Exponential decay rate for exploration prob"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "REhmfLkYNTiN",
"outputId": "cf676f6d-83df-43f5-89fe-3258e0041d9d"
},
"outputs": [],
"source": [
"# create env\n",
"env = CustTradingEnv(df=eth_train, max_steps=max_steps)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# create q-table\n",
"\n",
"action_space = env.action_space.n # buy sell do_nothing\n",
"state_space = 2000\n",
"\n",
"Qtable_trading = initialize_q_table(state_space, action_space)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"99"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# train with ETH\n",
"Qtable_trading, state_history = train(n_training_episodes, min_epsilon, max_epsilon, \n",
" decay_rate, env, max_steps, Qtable_trading, learning_rate, gamma )\n",
"len(np.where( Qtable_trading > 0 )[0])\n",
"\n",
"# #train with BTC\n",
"# env = CustTradingEnv(df=btc_train, max_steps=max_steps)\n",
"# Qtable_trading, state_history = train(n_training_episodes, min_epsilon, max_epsilon, \n",
"# decay_rate, env, max_steps, Qtable_trading, learning_rate, gamma )\n",
"# len(np.where( Qtable_trading > 0 )[0])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def evaluate_agent(env, max_steps, n_eval_episodes, Q, random=False):\n",
" \"\"\"\n",
" Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n",
" :param env: The evaluation environment\n",
" :param n_eval_episodes: Number of episode to evaluate the agent\n",
" :param Q: The Q-table\n",
" :param seed: The evaluation seed array (for taxi-v3)\n",
" \"\"\"\n",
" episode_positive_perc_trades = []\n",
" episode_rewards = []\n",
" episode_profits = []\n",
" for episode in tqdm(range(n_eval_episodes), disable=random):\n",
" state = env.reset()\n",
" step = 0\n",
" done = False\n",
" total_rewards_ep = 0\n",
" total_profit_ep = 0\n",
" \n",
" for step in range(max_steps):\n",
" # Take the action (index) that have the maximum expected future reward given that state\n",
" if random:\n",
" action = env.action_space.sample()\n",
" else:\n",
" action = greedy_policy(Q, state)\n",
"\n",
" new_state, reward, done, info = env.step(action)\n",
" total_rewards_ep += reward\n",
" \n",
" if done:\n",
" break\n",
" state = new_state\n",
"\n",
" if len(env._trade_history) > 0:\n",
" episode_positive_perc_trades.append(np.count_nonzero(np.array(env._trade_history) > 0)/len(env._trade_history))\n",
" episode_rewards.append(total_rewards_ep)\n",
" episode_profits.append(env.history['total_profit'][-1])\n",
" # print(env.history)\n",
" # env.render()\n",
" # assert 0\n",
"\n",
" mean_reward = np.mean(episode_rewards)\n",
" std_reward = np.std(episode_rewards)\n",
" mean_profit = np.mean(episode_profits)\n",
" std_profit = np.std(episode_profits)\n",
" positive_perc_trades = np.mean(episode_positive_perc_trades)\n",
"\n",
" return mean_reward, std_reward, mean_profit, std_profit, positive_perc_trades"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "831f1fb725f640c39c55dc9895d015bd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"(7.366191151612702,\n",
" 3.8748133853943463,\n",
" 5.053396330156885,\n",
" 1.8437773613293116,\n",
" 0.9042809523809524)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max_steps = 20 \n",
"env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True)\n",
"n_eval_episodes = 1000\n",
"\n",
"evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1080x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(15,6))\n",
"plt.cla()\n",
"env_test.render()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5ed3da62f4a84f59b4785bb5a59e9bd7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"(68.78781733724034, 0.0, 47.24655221993224, 0.0, 0.9019607843137255)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# trade sequential\n",
"max_steps = len(eth_test)\n",
"env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False)\n",
"n_eval_episodes = 1\n",
"\n",
"evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1080x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(15,6))\n",
"plt.cla()\n",
"env_test.render()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"env_test._trade_history"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(-2.0168048208113647,\n",
" 4.899233122278165,\n",
" 0.06689442639249506,\n",
" 2.7236034572875427,\n",
" 0.5067877401210734)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Test for random n_eval_episodes\n",
"max_steps = 20 \n",
"env_test_rand = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True)\n",
"n_eval_episodes = 1000\n",
"\n",
"evaluate_agent(env_test_rand, max_steps, n_eval_episodes, Qtable_trading, random=True)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean profit 0.06453745988505065\n"
]
}
],
"source": [
"# trade sequentially with random actions \n",
"max_steps = len(eth_test)\n",
"env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False)\n",
"n_eval_episodes = 1\n",
"\n",
"all_profit=[]\n",
"for i in range(1000):\n",
" _,_,profit,_,_=evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading, random=True)\n",
" all_profit.append(profit)\n",
"print(f\"Mean profit {np.mean(all_profit)}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## This is the result\n",
"\n",
"| Model | 1000 trades 20 steps | Sequential trading | 1000 trades 20 steps random actions | Sequential random|\n",
"|------------|----------------------|--------------------|-------------------------------------|------------------|\n",
"|Q-learning | 113.14 | 563.67 | -18.10 | 39.30 |\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def count_equal(env, Qtable):\n",
" count=0\n",
" for i in env.signal_features:\n",
" if abs(np.max(Qtable[i])) > 0:\n",
" count+=1\n",
" # else:\n",
" # print(i)\n",
" # assert 0\n",
" \n",
" print(len(env.signal_features), count, count / len(env.signal_features))\n",
"\n",
"count_equal(env_test, Qtable_trading)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.8.13 ('rl2')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1"
}
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"01a2dbcb714e40148b41c761fcf43147": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"20b0f38ec3234ff28a62a286cd57b933": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "PasswordModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "PasswordModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "PasswordView",
"continuous_update": true,
"description": "Token:",
"description_tooltip": null,
"disabled": false,
"layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147",
"placeholder": "",
"style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce",
"value": ""
}
},
"270cbb5d6e9c4b1e9e2f39c8b3b0c15f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "VBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "VBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "VBoxView",
"box_style": "",
"children": [
"IPY_MODEL_a02224a43d8d4af3bd31d326540d25da",
"IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933",
"IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77",
"IPY_MODEL_f1675c09d16a4251b403f9c56255f168",
"IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2"
],
"layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d"
}
},
"2dc5fa9aa3334dfcbdee9c238f2ef60b": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3e753b0212644990b558c68853ff2041": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3fa248114ac24656ba74923936a94d2d": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": "center",
"align_self": null,
"border": null,
"bottom": null,
"display": "flex",
"flex": null,
"flex_flow": "column",
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": "50%"
}
},
"42d140b838b844819bc127afc1b7bc84": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"90c874e91b304ee1a7ef147767ac00ce": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9d847f9a7d47458d8cd57d9b599e47c6": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a02224a43d8d4af3bd31d326540d25da": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284",
"placeholder": "",
"style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b",
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
}
},
"a2cfb91cf66447d7899292854bd64a07": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c1a82965ae26479a98e4fdbde1e64ec2": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9d847f9a7d47458d8cd57d9b599e47c6",
"placeholder": "",
"style": "IPY_MODEL_42d140b838b844819bc127afc1b7bc84",
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
}
},
"caef095934ec47bbb8b64eab22049284": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"eaba3f1de4444aabadfea2a3dadb1d80": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ee4a21bedc504171ad09d205d634b528": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ButtonStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"f1675c09d16a4251b403f9c56255f168": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ButtonModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Login",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_a2cfb91cf66447d7899292854bd64a07",
"style": "IPY_MODEL_ee4a21bedc504171ad09d205d634b528",
"tooltip": ""
}
},
"f6c845330d6743c0b35c2c7ad834de77": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "CheckboxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "CheckboxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "CheckboxView",
"description": "Add token as git credential?",
"description_tooltip": null,
"disabled": false,
"indent": true,
"layout": "IPY_MODEL_3e753b0212644990b558c68853ff2041",
"style": "IPY_MODEL_eaba3f1de4444aabadfea2a3dadb1d80",
"value": true
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}
|