cyberosa
removing data folder and adding dataset import
8db40cd
import pandas as pd
from tqdm import tqdm
from scripts.utils import get_next_week
DEFAULT_MECH_FEE = 0.01 # xDAI
def get_weekly_total_mech_calls(
trader_data: pd.DataFrame, all_mech_calls_df: pd.DataFrame
) -> int:
"""Function to compute the total weekly number of mech calls for all markets
that the trader bet upon"""
trading_weeks = trader_data.month_year_week.unique()
trader_address = trader_data.trader_address.unique()[0]
if len(trading_weeks) > 1:
raise ValueError("The trader data should contain only one week information")
trading_week = trading_weeks[0]
try:
return all_mech_calls_df.loc[
(all_mech_calls_df["trader_address"] == trader_address)
& (all_mech_calls_df["month_year_week"] == trading_week),
"total_mech_calls",
].iloc[0]
except Exception as e:
print(
f"Error getting the number of mech calls for the trader {trader_address} and week {trading_week}"
)
return 280 # average number 40 mech calls in 7 days
def compute_metrics(
trader_address: str,
trader_data: pd.DataFrame,
all_mech_calls: pd.DataFrame,
live_metrics: bool = False,
unknown_trader: bool = False,
) -> dict:
if len(trader_data) == 0:
# print("No data to compute metrics")
return {}
agg_metrics = {}
agg_metrics["trader_address"] = trader_address
total_bet_amounts = trader_data.collateral_amount.sum()
if live_metrics:
# the total is already computed in daily_info per trader address and trading day
total_nr_mech_calls_all_markets = trader_data["num_mech_calls"].iloc[0]
elif unknown_trader:
# num of mech calls is always zero
total_nr_mech_calls_all_markets = 0
else:
total_nr_mech_calls_all_markets = get_weekly_total_mech_calls(
trader_data=trader_data, all_mech_calls_df=all_mech_calls
)
agg_metrics["bet_amount"] = total_bet_amounts
agg_metrics["nr_mech_calls"] = total_nr_mech_calls_all_markets
agg_metrics["staking"] = trader_data.iloc[0].staking
agg_metrics["nr_trades"] = len(trader_data)
if live_metrics:
return agg_metrics
total_earnings = trader_data.earnings.sum()
agg_metrics["earnings"] = total_earnings
total_fee_amounts = trader_data.mech_fee_amount.sum()
total_costs = (
total_bet_amounts
+ total_fee_amounts
+ (total_nr_mech_calls_all_markets * DEFAULT_MECH_FEE)
)
total_net_earnings = total_earnings - total_costs
agg_metrics["net_earnings"] = total_net_earnings
agg_metrics["roi"] = total_net_earnings / total_costs
return agg_metrics
def compute_trader_metrics_by_market_creator(
trader_address: str,
traders_data: pd.DataFrame,
all_mech_calls: pd.DataFrame,
market_creator: str = "all",
live_metrics: bool = False,
unknown_trader: bool = False,
) -> dict:
"""This function computes for a specific time window (week or day) the different metrics:
roi, net_earnings, earnings, bet_amount, nr_mech_calls and nr_trades.
The global roi of the trader by computing the individual net profit and the individual costs values
achieved per market and dividing both.
It is possible to filter by market creator: quickstart, pearl, all"""
assert "market_creator" in traders_data.columns
filtered_traders_data = traders_data.loc[
traders_data["trader_address"] == trader_address
]
if market_creator != "all": # compute only for the specific market creator
filtered_traders_data = filtered_traders_data.loc[
filtered_traders_data["market_creator"] == market_creator
]
if len(filtered_traders_data) == 0:
# tqdm.write(f"No data. Skipping market creator {market_creator}")
return {} # No Data
metrics = compute_metrics(
trader_address,
filtered_traders_data,
all_mech_calls,
live_metrics,
unknown_trader,
)
return metrics
def merge_trader_weekly_metrics(
trader: str,
weekly_data: pd.DataFrame,
all_mech_calls: pd.DataFrame,
week: str,
unknown_trader: bool = False,
) -> pd.DataFrame:
trader_metrics = []
# computation as specification 1 for all types of markets
weekly_metrics_all = compute_trader_metrics_by_market_creator(
trader,
weekly_data,
all_mech_calls=all_mech_calls,
market_creator="all",
live_metrics=False,
unknown_trader=unknown_trader,
)
weekly_metrics_all["month_year_week"] = week
weekly_metrics_all["market_creator"] = "all"
trader_metrics.append(weekly_metrics_all)
# computation as specification 1 for quickstart markets
weekly_metrics_qs = compute_trader_metrics_by_market_creator(
trader,
weekly_data,
all_mech_calls=all_mech_calls,
market_creator="quickstart",
live_metrics=False,
unknown_trader=unknown_trader,
)
if len(weekly_metrics_qs) > 0:
weekly_metrics_qs["month_year_week"] = week
weekly_metrics_qs["market_creator"] = "quickstart"
trader_metrics.append(weekly_metrics_qs)
# computation as specification 1 for pearl markets
weekly_metrics_pearl = compute_trader_metrics_by_market_creator(
trader,
weekly_data,
all_mech_calls=all_mech_calls,
market_creator="pearl",
live_metrics=False,
unknown_trader=unknown_trader,
)
if len(weekly_metrics_pearl) > 0:
weekly_metrics_pearl["month_year_week"] = week
weekly_metrics_pearl["market_creator"] = "pearl"
trader_metrics.append(weekly_metrics_pearl)
result = pd.DataFrame.from_dict(trader_metrics, orient="columns")
return result
def merge_trader_daily_metrics(
trader: str,
daily_data: pd.DataFrame,
day: str,
live_metrics: bool = False,
) -> pd.DataFrame:
trader_metrics = []
# computation as specification 1 for all types of markets
daily_metrics_all = compute_trader_metrics_by_market_creator(
trader,
daily_data,
all_mech_calls=None,
market_creator="all",
live_metrics=live_metrics,
)
daily_metrics_all["creation_date"] = day
# staking label is at the trader level
daily_metrics_all["market_creator"] = "all"
trader_metrics.append(daily_metrics_all)
# computation as specification 1 for quickstart markets
daily_metrics_qs = compute_trader_metrics_by_market_creator(
trader,
daily_data,
all_mech_calls=None,
market_creator="quickstart",
live_metrics=live_metrics,
)
if len(daily_metrics_qs) > 0:
daily_metrics_qs["creation_date"] = day
daily_metrics_qs["market_creator"] = "quickstart"
trader_metrics.append(daily_metrics_qs)
# computation as specification 1 for pearl markets
daily_metrics_pearl = compute_trader_metrics_by_market_creator(
trader,
daily_data,
all_mech_calls=None,
market_creator="pearl",
live_metrics=live_metrics,
)
if len(daily_metrics_pearl) > 0:
daily_metrics_pearl["creation_date"] = day
daily_metrics_pearl["market_creator"] = "pearl"
trader_metrics.append(daily_metrics_pearl)
result = pd.DataFrame.from_dict(trader_metrics, orient="columns")
return result
def win_metrics_trader_level(weekly_data):
winning_trades = (
weekly_data.groupby(
["month_year_week", "market_creator", "trader_address"], sort=False
)["winning_trade"].sum()
/ weekly_data.groupby(
["month_year_week", "market_creator", "trader_address"], sort=False
)["winning_trade"].count()
* 100
)
# winning_trades is a series, give it a dataframe
winning_trades = winning_trades.reset_index()
winning_trades.columns = winning_trades.columns.astype(str)
winning_trades.rename(columns={"winning_trade": "winning_perc"}, inplace=True)
return winning_trades
def compute_weekly_metrics_by_market_creator(
traders_data: pd.DataFrame,
all_mech_calls: pd.DataFrame,
trader_filter: str = None,
unknown_trader: bool = False,
) -> pd.DataFrame:
"""Function to compute the metrics at the trader level per week
and with different categories by market creator"""
contents = []
all_weeks = list(traders_data.month_year_week.unique())
next_week = get_next_week()
print(f"next week = {next_week}")
for week in all_weeks:
# skip the next week since data is not complete
if week == next_week:
continue
weekly_data = traders_data.loc[traders_data["month_year_week"] == week]
print(f"Computing weekly metrics for week ={week} by market creator")
# traverse each trader
traders = list(weekly_data.trader_address.unique())
for trader in tqdm(traders, desc=f"Trader' metrics", unit="metrics"):
if trader_filter is None:
contents.append(
merge_trader_weekly_metrics(
trader, weekly_data, all_mech_calls, week, unknown_trader
)
)
elif trader_filter == "Olas":
filtered_data = weekly_data.loc[weekly_data["staking"] != "non_Olas"]
contents.append(
merge_trader_weekly_metrics(
trader, filtered_data, all_mech_calls, week
)
)
else: # non_Olas traders
filtered_data = weekly_data.loc[weekly_data["staking"] == "non_Olas"]
contents.append(
merge_trader_weekly_metrics(
trader, filtered_data, all_mech_calls, week
)
)
print("End computing all weekly metrics by market creator")
return pd.concat(contents, ignore_index=True)
def compute_daily_metrics_by_market_creator(
traders_data: pd.DataFrame,
trader_filter: str = None,
live_metrics: bool = False,
) -> pd.DataFrame:
"""Function to compute the metrics at the trader level per day
and with different categories by market creator"""
contents = []
all_days = list(traders_data.creation_date.unique())
for day in all_days:
daily_data = traders_data.loc[traders_data["creation_date"] == day]
print(f"Computing daily metrics for {day}")
# traverse each trader
traders = list(daily_data.trader_address.unique())
for trader in tqdm(traders, desc=f"Trader' daily metrics", unit="metrics"):
if trader_filter is None:
contents.append(
merge_trader_daily_metrics(trader, daily_data, day, live_metrics)
)
elif trader_filter == "Olas":
filtered_data = daily_data.loc[daily_data["staking"] != "non_Olas"]
contents.append(
merge_trader_daily_metrics(trader, filtered_data, day, live_metrics)
)
else: # non_Olas traders
filtered_data = daily_data.loc[daily_data["staking"] == "non_Olas"]
contents.append(
merge_trader_daily_metrics(trader, filtered_data, day, live_metrics)
)
print("End computing all daily metrics by market creator")
print(f"length of contents = {len(contents)}")
return pd.concat(contents, ignore_index=True)
def compute_winning_metrics_by_trader(
traders_data: pd.DataFrame, unknown_info: pd.DataFrame, trader_filter: str = None
) -> pd.DataFrame:
"""Function to compute the winning metrics at the trader level per week and with different market creators"""
if len(unknown_info) > 0:
all_data = pd.concat([traders_data, unknown_info], axis=0)
else:
all_data = traders_data
market_all = all_data.copy(deep=True)
market_all["market_creator"] = "all"
# merging both dataframes
final_traders = pd.concat([market_all, all_data], ignore_index=True)
final_traders = final_traders.sort_values(by="creation_timestamp", ascending=True)
if trader_filter == "non_Olas": # non_Olas
final_traders = final_traders.loc[final_traders["staking"] == "non_Olas"]
elif trader_filter == "Olas":
final_traders = final_traders.loc[final_traders["staking"] != "non_Olas"]
else: # all traders
print("No filtering")
if len(final_traders) == 0:
return pd.DataFrame()
winning_df = win_metrics_trader_level(final_traders)
winning_df.head()
return winning_df