MarcSkovMadsen commited on
Commit
57681d6
·
1 Parent(s): f3b7f1c

Add core-1 dataset

Browse files
Files changed (4) hide show
  1. .gitignore +1 -1
  2. components.py +39 -12
  3. download_datasets.py +2 -1
  4. utils.py +28 -6
.gitignore CHANGED
@@ -103,5 +103,5 @@ venv.bak/
103
  # mypy
104
  .mypy_cache/
105
 
106
- data/
107
  script.py
 
103
  # mypy
104
  .mypy_cache/
105
 
106
+ data/*.parquet
107
  script.py
components.py CHANGED
@@ -6,9 +6,11 @@ import param
6
  from holoviews.operation.datashader import dynspread, rasterize
7
 
8
  from utils import (
 
9
  DATASETS,
10
  DATASHADER_LOGO,
11
  DATASHADER_URL,
 
12
  DESCRIPTION,
13
  ESA_EASTING,
14
  ESA_NORTHING,
@@ -16,7 +18,6 @@ from utils import (
16
  MAJOR_TOM_LYRICS,
17
  MAJOR_TOM_PICTURE,
18
  MAJOR_TOM_REF_URL,
19
- META_DATA_COLUMNS,
20
  PANEL_LOGO,
21
  PANEL_URL,
22
  get_closest_rows,
@@ -26,9 +27,16 @@ from utils import (
26
 
27
 
28
  class DatasetInput(pn.viewable.Viewer):
29
- value = param.Selector(objects=DATASETS, allow_None=False, label="Dataset")
 
 
 
 
 
 
30
 
31
- data = param.DataFrame(allow_None=False)
 
32
 
33
  def __panel__(self):
34
  return pn.widgets.RadioButtonGroup.from_param(
@@ -37,7 +45,9 @@ class DatasetInput(pn.viewable.Viewer):
37
 
38
  @pn.depends("value", watch=True, on_init=True)
39
  def _update_data(self):
40
- self.data = pn.cache(get_meta_data)(dataset=self.value)
 
 
41
 
42
 
43
  class MapInput(pn.viewable.Viewer):
@@ -129,17 +139,25 @@ class MapInput(pn.viewable.Viewer):
129
 
130
 
131
  class ImageInput(pn.viewable.Viewer):
132
- data = param.DataFrame(allow_refs=True, allow_None=False)
 
 
 
 
 
133
  column_name = param.Selector(
134
- default="Thumbnail", objects=list(META_DATA_COLUMNS), label="Image Type"
 
 
135
  )
 
136
  updating = param.Boolean()
137
 
138
  meta_data = param.DataFrame()
139
  image = param.Parameter()
140
  plot = param.Parameter()
141
 
142
- _timestamp = param.Selector(label="Timestamp", objects=[None])
143
 
144
  def __panel__(self):
145
  return pn.Column(
@@ -182,23 +200,30 @@ class ImageInput(pn.viewable.Viewer):
182
  if self.data.empty:
183
  default_value = None
184
  options = [None]
185
- print("empty options")
186
  else:
187
  options = sorted(self.data["timestamp"].unique())
188
  default_value = options[0]
189
- print("options", options)
190
 
191
  self.param._timestamp.objects = options
192
  if not self._timestamp in options:
193
  self._timestamp = default_value
194
 
 
 
 
 
 
 
 
 
 
195
  @property
196
  def column(self):
197
- return META_DATA_COLUMNS[self.column_name]
198
 
199
  @pn.depends("_timestamp", "column_name", watch=True, on_init=True)
200
  def _update_plot(self):
201
- if self.data.empty or not self._timestamp:
202
  self.meta_data = self.data.T
203
  self.image = None
204
  self.plot = hv.RGB(np.array([]))
@@ -309,7 +334,9 @@ class App(param.Parameterized):
309
  def _create_main_content(self):
310
  dataset = DatasetInput()
311
  map_input = MapInput(data=dataset.param.data)
312
- image_input = ImageInput(data=map_input.param.data_selected)
 
 
313
 
314
  return pn.Column(dataset, map_input), image_input
315
 
 
6
  from holoviews.operation.datashader import dynspread, rasterize
7
 
8
  from utils import (
9
+ DATASET_COLUMNS,
10
  DATASETS,
11
  DATASHADER_LOGO,
12
  DATASHADER_URL,
13
+ DEFAULT_DATASET,
14
  DESCRIPTION,
15
  ESA_EASTING,
16
  ESA_NORTHING,
 
18
  MAJOR_TOM_LYRICS,
19
  MAJOR_TOM_PICTURE,
20
  MAJOR_TOM_REF_URL,
 
21
  PANEL_LOGO,
22
  PANEL_URL,
23
  get_closest_rows,
 
27
 
28
 
29
  class DatasetInput(pn.viewable.Viewer):
30
+ value = param.Selector(
31
+ default=DEFAULT_DATASET,
32
+ objects=DATASETS,
33
+ allow_None=False,
34
+ label="Dataset",
35
+ doc="""The name of the dataset""",
36
+ )
37
 
38
+ data = param.DataFrame(allow_None=False, doc="""The metadata dataset""")
39
+ columns = param.Dict(allow_None=False, doc="""The columns of the dataset""")
40
 
41
  def __panel__(self):
42
  return pn.widgets.RadioButtonGroup.from_param(
 
45
 
46
  @pn.depends("value", watch=True, on_init=True)
47
  def _update_data(self):
48
+ columns = DATASET_COLUMNS[self.value]
49
+ data = pn.cache(get_meta_data)(dataset=self.value)
50
+ self.param.update(columns=columns, data=data)
51
 
52
 
53
  class MapInput(pn.viewable.Viewer):
 
139
 
140
 
141
  class ImageInput(pn.viewable.Viewer):
142
+ data = param.DataFrame(
143
+ allow_refs=True, allow_None=False, doc="""The metadata selected"""
144
+ )
145
+ columns = param.Dict(
146
+ allow_refs=True, allow_None=False, doc="""The list of columns of the dataset"""
147
+ )
148
  column_name = param.Selector(
149
+ label="Image Type",
150
+ allow_None=False,
151
+ doc="""The name of the image type to view""",
152
  )
153
+
154
  updating = param.Boolean()
155
 
156
  meta_data = param.DataFrame()
157
  image = param.Parameter()
158
  plot = param.Parameter()
159
 
160
+ _timestamp = param.Selector(label="Timestamp", objects=[None], doc="""The timestamp of the sample to view""")
161
 
162
  def __panel__(self):
163
  return pn.Column(
 
200
  if self.data.empty:
201
  default_value = None
202
  options = [None]
 
203
  else:
204
  options = sorted(self.data["timestamp"].unique())
205
  default_value = options[0]
 
206
 
207
  self.param._timestamp.objects = options
208
  if not self._timestamp in options:
209
  self._timestamp = default_value
210
 
211
+ @pn.depends("columns", watch=True, on_init=True)
212
+ def _update_column_names(self):
213
+ options = sorted(self.columns)
214
+ default_value = "Thumbnail"
215
+
216
+ self.param.column_name.objects = options
217
+ if not self.column_name in options:
218
+ self.column_name = default_value
219
+
220
  @property
221
  def column(self):
222
+ return self.columns[self.column_name]
223
 
224
  @pn.depends("_timestamp", "column_name", watch=True, on_init=True)
225
  def _update_plot(self):
226
+ if self.data.empty or not self._timestamp or not self.column_name:
227
  self.meta_data = self.data.T
228
  self.image = None
229
  self.plot = hv.RGB(np.array([]))
 
334
  def _create_main_content(self):
335
  dataset = DatasetInput()
336
  map_input = MapInput(data=dataset.param.data)
337
+ image_input = ImageInput(
338
+ data=map_input.param.data_selected, columns=dataset.param.columns
339
+ )
340
 
341
  return pn.Column(dataset, map_input), image_input
342
 
download_datasets.py CHANGED
@@ -2,6 +2,7 @@ from utils import DATASETS, get_meta_data
2
 
3
  for dataset in DATASETS:
4
  print(f"downloading {dataset}")
5
- get_meta_data(dataset=dataset)
 
6
 
7
  print("finished downloading datasets")
 
2
 
3
  for dataset in DATASETS:
4
  print(f"downloading {dataset}")
5
+ data = get_meta_data(dataset=dataset)
6
+
7
 
8
  print("finished downloading datasets")
utils.py CHANGED
@@ -1,14 +1,18 @@
 
1
  from io import BytesIO
2
  from pathlib import Path
3
 
4
  import holoviews as hv
5
  import numpy as np
6
  import pandas as pd
 
7
  import pyarrow.parquet as pq
8
  from fsspec.parquet import open_parquet_file
9
  from holoviews import opts
10
  from PIL import Image
11
 
 
 
12
  MAJOR_TOM_LOGO = "assets/major-tom-esa-logo.png"
13
  MAJOR_TOM_PICTURE = (
14
  "https://upload.wikimedia.org/wikipedia/en/6/6d/Major_tom_space_oddity_video.JPG"
@@ -20,10 +24,16 @@ PANEL_URL = "https://panel.holoviz.org"
20
  DATASHADER_LOGO = "https://datashader.org/_static/logo_horizontal.svg"
21
  DATASHADER_URL = "https://datashader.org/"
22
  REPOSITORY = "Major-TOM"
23
- DATASETS = ["Core-S2L2A", "Core-S2L1C"]
 
24
  ESA_EASTING = 250668.73322714816
25
  ESA_NORTHING = 6259216.653115547
26
- META_DATA_COLUMNS = {
 
 
 
 
 
27
  "Coastal aerosol": "B01",
28
  "Blue": "B02",
29
  "Green": "B03",
@@ -39,7 +49,11 @@ META_DATA_COLUMNS = {
39
  "Cloud Mask": "cloud_mask",
40
  "Thumbnail": "thumbnail",
41
  }
42
-
 
 
 
 
43
  DATA_PATH = Path(__file__).parent / "data"
44
 
45
  DESCRIPTION = f"""\
@@ -135,7 +149,7 @@ def _meta_data_path(dataset="Core-S2L2A", repository=REPOSITORY):
135
 
136
 
137
  def get_meta_data(dataset="Core-S2L2A", repository=REPOSITORY):
138
- print(f"Loading {dataset}")
139
  path = _meta_data_path(dataset=dataset)
140
  if not path.exists():
141
  data = pd.read_parquet(_meta_data_url(dataset=dataset))
@@ -149,7 +163,7 @@ def get_meta_data(dataset="Core-S2L2A", repository=REPOSITORY):
149
  )
150
  # Optimize Performance
151
  data["timestamp"] = pd.to_datetime(data["timestamp"])
152
- numeric_cols = ["cloud_cover", "nodata", "centre_lat", "centre_lon"]
153
  data[numeric_cols] = data[numeric_cols].astype("float32")
154
 
155
  return data
@@ -158,7 +172,6 @@ def get_meta_data(dataset="Core-S2L2A", repository=REPOSITORY):
158
  def get_image(row, column="thumbnail"):
159
  parquet_url = row["parquet_url"]
160
  parquet_row = row["parquet_row"]
161
- print(parquet_url, parquet_row, column)
162
  with open_parquet_file(parquet_url, columns=[column]) as f:
163
  with pq.ParquetFile(f) as pf:
164
  first_row_group = pf.read_row_group(parquet_row, columns=[column])
@@ -186,3 +199,12 @@ def get_closest_rows(data, target_easting, target_northing):
186
  )
187
  closest_rows = data[distance == distance.min()]
188
  return closest_rows
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
  from io import BytesIO
3
  from pathlib import Path
4
 
5
  import holoviews as hv
6
  import numpy as np
7
  import pandas as pd
8
+ import panel as pn
9
  import pyarrow.parquet as pq
10
  from fsspec.parquet import open_parquet_file
11
  from holoviews import opts
12
  from PIL import Image
13
 
14
+ FORMAT = "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
15
+
16
  MAJOR_TOM_LOGO = "assets/major-tom-esa-logo.png"
17
  MAJOR_TOM_PICTURE = (
18
  "https://upload.wikimedia.org/wikipedia/en/6/6d/Major_tom_space_oddity_video.JPG"
 
24
  DATASHADER_LOGO = "https://datashader.org/_static/logo_horizontal.svg"
25
  DATASHADER_URL = "https://datashader.org/"
26
  REPOSITORY = "Major-TOM"
27
+ DEFAULT_DATASET = "Core-S2L2A"
28
+ DATASETS = ["Core-S1RTC", "Core-S2L2A", "Core-S2L1C"]
29
  ESA_EASTING = 250668.73322714816
30
  ESA_NORTHING = 6259216.653115547
31
+ _DATASET_COLUMNS_1 = {
32
+ # "Linear Power in the VV Polarization": "vv",
33
+ # "Linear Power in the VH Polarization": "vh",
34
+ "Thumbnail": "thumbnail",
35
+ }
36
+ _DATASET_COLUMNS_2 = {
37
  "Coastal aerosol": "B01",
38
  "Blue": "B02",
39
  "Green": "B03",
 
49
  "Cloud Mask": "cloud_mask",
50
  "Thumbnail": "thumbnail",
51
  }
52
+ DATASET_COLUMNS = {
53
+ "Core-S1RTC": _DATASET_COLUMNS_1,
54
+ "Core-S2L2A": _DATASET_COLUMNS_2,
55
+ "Core-S2L1C": _DATASET_COLUMNS_2,
56
+ }
57
  DATA_PATH = Path(__file__).parent / "data"
58
 
59
  DESCRIPTION = f"""\
 
149
 
150
 
151
  def get_meta_data(dataset="Core-S2L2A", repository=REPOSITORY):
152
+ logging.info("Loading %s", dataset)
153
  path = _meta_data_path(dataset=dataset)
154
  if not path.exists():
155
  data = pd.read_parquet(_meta_data_url(dataset=dataset))
 
163
  )
164
  # Optimize Performance
165
  data["timestamp"] = pd.to_datetime(data["timestamp"])
166
+ numeric_cols = data.select_dtypes(include=["float64"]).columns
167
  data[numeric_cols] = data[numeric_cols].astype("float32")
168
 
169
  return data
 
172
  def get_image(row, column="thumbnail"):
173
  parquet_url = row["parquet_url"]
174
  parquet_row = row["parquet_row"]
 
175
  with open_parquet_file(parquet_url, columns=[column]) as f:
176
  with pq.ParquetFile(f) as pf:
177
  first_row_group = pf.read_row_group(parquet_row, columns=[column])
 
199
  )
200
  closest_rows = data[distance == distance.min()]
201
  return closest_rows
202
+
203
+
204
+ def reconfig_basic_config(format_=FORMAT, level=logging.INFO):
205
+ """(Re-)configure logging"""
206
+ logging.basicConfig(format=format_, level=level, force=True)
207
+ logging.info("Logging.basicConfig completed successfully")
208
+
209
+
210
+ reconfig_basic_config()