cyberosa commited on
Commit
2628969
·
1 Parent(s): 0b5bfca

trying to fix error with loading files

Browse files
Files changed (4) hide show
  1. app.py +13 -12
  2. requirements.txt +1 -0
  3. scripts/retention_metrics.py +1 -8
  4. scripts/utils.py +0 -1
app.py CHANGED
@@ -65,21 +65,21 @@ def load_all_data():
65
 
66
  # all trades profitability
67
  # Download the compressed file
68
- gz_file_path = hf_hub_download(
69
  repo_id="valory/Olas-predict-dataset",
70
  filename="all_trades_profitability.parquet.gz",
71
  repo_type="dataset",
72
  )
73
 
74
- parquet_file_path = gz_file_path.replace(".gz", "")
 
75
 
76
- with gzip.open(gz_file_path, "rb") as f_in:
77
- with open(parquet_file_path, "wb") as f_out:
78
  shutil.copyfileobj(f_in, f_out)
79
 
80
  # Now read the decompressed parquet file
81
- df1 = pd.read_parquet(parquet_file_path)
82
- os.remove(parquet_file_path)
83
 
84
  # closed_markets_div
85
  closed_markets_df = hf_hub_download(
@@ -106,18 +106,18 @@ def load_all_data():
106
  df4 = pd.read_parquet(unknown_df)
107
 
108
  # retention activity
109
- gz_file_path2 = hf_hub_download(
110
  repo_id="valory/Olas-predict-dataset",
111
  filename="retention_activity.parquet.gz",
112
  repo_type="dataset",
113
  )
114
- parquet_file_path2 = gz_file_path2.replace(".gz", "")
115
 
116
- with gzip.open(gz_file_path2, "rb") as f_in:
117
- with open(parquet_file_path2, "wb") as f_out:
118
  shutil.copyfileobj(f_in, f_out)
119
- df5 = pd.read_parquet(parquet_file_path2)
120
- os.remove(parquet_file_path2)
121
 
122
  # active_traders.parquet
123
  active_traders_df = hf_hub_download(
@@ -208,6 +208,7 @@ def prepare_data():
208
  active_traders,
209
  all_mech_calls,
210
  ) = prepare_data()
 
211
  retention_df = prepare_retention_dataset(
212
  retention_df=raw_retention_df, unknown_df=unknown_traders
213
  )
 
65
 
66
  # all trades profitability
67
  # Download the compressed file
68
+ gz_file_path_trades = hf_hub_download(
69
  repo_id="valory/Olas-predict-dataset",
70
  filename="all_trades_profitability.parquet.gz",
71
  repo_type="dataset",
72
  )
73
 
74
+ parquet_file_path_trades = gz_file_path_trades.replace(".gz", "")
75
+ parquet_file_path_trades = parquet_file_path_trades.replace("all", "")
76
 
77
+ with gzip.open(gz_file_path_trades, "rb") as f_in:
78
+ with open(parquet_file_path_trades, "wb") as f_out:
79
  shutil.copyfileobj(f_in, f_out)
80
 
81
  # Now read the decompressed parquet file
82
+ df1 = pd.read_parquet(parquet_file_path_trades)
 
83
 
84
  # closed_markets_div
85
  closed_markets_df = hf_hub_download(
 
106
  df4 = pd.read_parquet(unknown_df)
107
 
108
  # retention activity
109
+ gz_file_path_ret = hf_hub_download(
110
  repo_id="valory/Olas-predict-dataset",
111
  filename="retention_activity.parquet.gz",
112
  repo_type="dataset",
113
  )
114
+ parquet_file_path_ret = gz_file_path_ret.replace(".gz", "")
115
 
116
+ with gzip.open(gz_file_path_ret, "rb") as f_in:
117
+ with open(parquet_file_path_ret, "wb") as f_out:
118
  shutil.copyfileobj(f_in, f_out)
119
+ df5 = pd.read_parquet(parquet_file_path_ret)
120
+ # os.remove(parquet_file_path_ret)
121
 
122
  # active_traders.parquet
123
  active_traders_df = hf_hub_download(
 
208
  active_traders,
209
  all_mech_calls,
210
  ) = prepare_data()
211
+
212
  retention_df = prepare_retention_dataset(
213
  retention_df=raw_retention_df, unknown_df=unknown_traders
214
  )
requirements.txt CHANGED
@@ -10,4 +10,5 @@ pydantic
10
  pydantic_core
11
  nbformat
12
  pytz
 
13
  ipfshttpclient
 
10
  pydantic_core
11
  nbformat
12
  pytz
13
+ duckdb
14
  ipfshttpclient
scripts/retention_metrics.py CHANGED
@@ -1,6 +1,5 @@
1
  import pandas as pd
2
  from datetime import datetime, timedelta
3
- from scripts.utils import DATA_DIR
4
 
5
 
6
  # Basic Week over Week Retention
@@ -263,10 +262,4 @@ def prepare_retention_dataset(
263
 
264
 
265
  if __name__ == "__main__":
266
- # read all datasets
267
- traders_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
268
- unknown_df = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
269
- all_traders = prepare_retention_dataset(traders_df, unknown_df)
270
- # Usage example:
271
- wow_retention = calculate_wow_retention_by_type(all_traders)
272
- cohort_retention = calculate_cohort_retention(all_traders)
 
1
  import pandas as pd
2
  from datetime import datetime, timedelta
 
3
 
4
 
5
  # Basic Week over Week Retention
 
262
 
263
 
264
  if __name__ == "__main__":
265
+ print("WIP")
 
 
 
 
 
 
scripts/utils.py CHANGED
@@ -4,7 +4,6 @@ from datetime import datetime, timedelta
4
 
5
  SCRIPTS_DIR = Path(__file__).parent
6
  ROOT_DIR = SCRIPTS_DIR.parent
7
- DATA_DIR = ROOT_DIR / "data"
8
  TMP_DIR = ROOT_DIR / "tmp"
9
 
10
 
 
4
 
5
  SCRIPTS_DIR = Path(__file__).parent
6
  ROOT_DIR = SCRIPTS_DIR.parent
 
7
  TMP_DIR = ROOT_DIR / "tmp"
8
 
9