Spaces:
Sleeping
Sleeping
import os | |
import pandas as pd | |
import regionmask | |
import xarray as xr | |
from huggingface_hub import hf_hub_download | |
LAND_FEATURES = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per', | |
'pastr', 'primf', 'primn', 'range', 'secdf', 'secdn', 'urban', 'cell_area'] | |
LAND_DIFF_FEATURES = ['c3ann_diff', 'c3nfx_diff', 'c3per_diff','c4ann_diff', 'c4per_diff', | |
'pastr_diff', 'primf_diff', 'primn_diff', 'range_diff', 'secdf_diff', 'secdn_diff', 'urban_diff'] | |
FEATURES = LAND_FEATURES + LAND_DIFF_FEATURES | |
LABEL = "ELUC" | |
PATH_TO_DATASET = "merged_aggregated_dataset_1850_2022.zarr.zip" | |
def import_data(path, update_path): | |
raw = xr.open_zarr(path, consolidated=True) | |
# Get updated ELUC | |
if update_path: | |
eluc = xr.open_dataset(update_path) | |
raw = raw.drop_vars(["ELUC", "cell_area"]) | |
raw = raw.merge(eluc) | |
# Shift actions back a year | |
raw[LAND_DIFF_FEATURES] = raw[LAND_DIFF_FEATURES].shift(time=-1) | |
# Old time shifting | |
# raw['ELUC'] = raw['ELUC'].shift(time=1) | |
# raw['ELUC_diff'] = raw['ELUC_diff'].shift(time=1) | |
# raw['time'] = raw.time - 1 | |
# assert(list(np.unique(raw.time)) == list(range(1849, 2022))) | |
mask = raw["ELUC_diff"].isnull().compute() | |
raw = raw.where(~mask, drop=True) | |
country_mask = regionmask.defined_regions.natural_earth_v5_0_0.countries_110.mask(raw) | |
raw["country"] = country_mask | |
return raw | |
def da_to_df(da, countries_df): | |
df = da.to_dataframe() | |
df = df.dropna() | |
df['country_name'] = countries_df.loc[df['country'], 'names'].values | |
return df | |
def main(): | |
raw = import_data(PATH_TO_DATASET, None) | |
countries_df = regionmask.defined_regions.natural_earth_v5_0_0.countries_110.to_dataframe() | |
df = da_to_df(raw, countries_df) | |
df = df.loc[1982:][FEATURES + [LABEL]] | |
df.to_csv("processed/eluc_1982.csv", index=True) | |
if __name__ == "__main__": | |
main() |