Skip to content

Commit c542e86

Browse files
authored
Add readers for JMA, DELFT, CSWR FARM and NASA PIERS (#226)
1 parent 23fbeb0 commit c542e86

27 files changed

Lines changed: 1099 additions & 55 deletions

File tree

disdrodb/data_transfer/download_data.py

Lines changed: 145 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import logging
2222
import os
2323
import shutil
24+
import subprocess
25+
import urllib.parse
2426
from typing import Optional, Union
2527

2628
import click
@@ -213,7 +215,7 @@ def download_station(
213215
check_exists=True,
214216
)
215217
# Download data
216-
_download_station_data(metadata_filepath, data_archive_dir=data_archive_dir, force=force)
218+
download_station_data(metadata_filepath, data_archive_dir=data_archive_dir, force=force)
217219

218220

219221
def _is_valid_disdrodb_data_url(disdrodb_data_url):
@@ -228,13 +230,25 @@ def _extract_station_files(zip_filepath, station_dir):
228230
os.remove(zip_filepath)
229231

230232

231-
def _download_station_data(metadata_filepath: str, data_archive_dir: str, force: bool = False) -> None:
233+
def check_consistent_station_name(metadata_filepath, station_name):
234+
"""Check consistent station_name between YAML file name and metadata key."""
235+
# Check consistent station name
236+
expected_station_name = os.path.basename(metadata_filepath).replace(".yml", "")
237+
if station_name and str(station_name) != str(expected_station_name):
238+
raise ValueError(f"Inconsistent station_name values in the {metadata_filepath} file. Download aborted.")
239+
return station_name
240+
241+
242+
def download_station_data(metadata_filepath: str, data_archive_dir: str, force: bool = False) -> None:
232243
"""Download and unzip the station data .
233244
234245
Parameters
235246
----------
236247
metadata_filepaths : str
237248
Metadata file path.
249+
data_archive_dir : str (optional)
250+
DISDRODB Data Archive directory. Format: ``<...>/DISDRODB``.
251+
If ``None`` (the default), the disdrodb config variable ``data_archive_dir`` is used.
238252
force : bool, optional
239253
If ``True``, delete existing files and redownload it. The default value is ``False``.
240254
@@ -247,7 +261,7 @@ def _download_station_data(metadata_filepath: str, data_archive_dir: str, force:
247261
campaign_name = metadata_dict["campaign_name"]
248262
station_name = metadata_dict["station_name"]
249263
station_name = check_consistent_station_name(metadata_filepath, station_name)
250-
# Define the destination local filepath path
264+
# Define the path to the station RAW data directory
251265
station_dir = define_station_dir(
252266
data_archive_dir=data_archive_dir,
253267
data_source=data_source,
@@ -259,19 +273,136 @@ def _download_station_data(metadata_filepath: str, data_archive_dir: str, force:
259273
disdrodb_data_url = metadata_dict.get("disdrodb_data_url", None)
260274
if not _is_valid_disdrodb_data_url(disdrodb_data_url):
261275
raise ValueError(f"Invalid disdrodb_data_url '{disdrodb_data_url}' for station {station_name}")
262-
# Download file
263-
zip_filepath = _download_file_from_url(disdrodb_data_url, dst_dir=station_dir, force=force)
264-
# Extract the stations files from the downloaded station.zip file
265-
_extract_station_files(zip_filepath, station_dir=station_dir)
266276

277+
# Download files
278+
# - Option 1: Zip file from Zenodo containing all station raw data
279+
if disdrodb_data_url.startswith("https://zenodo.org/"):
280+
download_zenodo_zip_file(url=disdrodb_data_url, dst_dir=station_dir, force=force)
281+
# - Option 2: Recursive download from a web server via HTTP or HTTPS.
282+
elif disdrodb_data_url.startswith("http"):
283+
download_web_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=force, verbose=True)
284+
else:
285+
raise NotImplementedError(f"Open a GitHub Issue to enable the download of data from {disdrodb_data_url}.")
267286

268-
def check_consistent_station_name(metadata_filepath, station_name):
269-
"""Check consistent station_name between YAML file name and metadata key."""
270-
# Check consistent station name
271-
expected_station_name = os.path.basename(metadata_filepath).replace(".yml", "")
272-
if station_name and str(station_name) != str(expected_station_name):
273-
raise ValueError(f"Inconsistent station_name values in the {metadata_filepath} file. Download aborted.")
274-
return station_name
287+
288+
####-----------------------------------------------------------------------------------------.
289+
#### Download from Web Server via HTTP or HTTPS
290+
291+
292+
def download_web_server_data(url: str, dst_dir: str, force=True, verbose=True) -> None:
293+
"""Download data from a web server via HTTP or HTTPS.
294+
295+
Use the system's wget command to recursively download all files and subdirectories
296+
under the given HTTPS “directory” URL. Works on both Windows and Linux, provided
297+
that wget is installed and on the PATH.
298+
299+
1. Ensure wget is available.
300+
2. Normalize URL to end with '/'.
301+
3. Compute cut-dirs so that only the last segment of the path remains locally.
302+
4. Build and run the wget command.
303+
304+
Example:
305+
download_with_wget("https://ruisdael.citg.tudelft.nl/parsivel/PAR001_Cabauw/2021/202101/")
306+
# → Creates a local folder "202101/" with all files and subfolders.
307+
"""
308+
# 1. Ensure wget exists
309+
ensure_wget_available()
310+
311+
# 2. Normalize URL
312+
url = ensure_trailing_slash(url)
313+
314+
# 3. Compute cut-dirs so that only the last URL segment remains locally
315+
cut_dirs = compute_cut_dirs(url)
316+
317+
# 4. Create destination directory if needed
318+
os.makedirs(dst_dir, exist_ok=True)
319+
320+
# 5. Build wget command
321+
cmd = build_webserver_wget_command(url, cut_dirs=cut_dirs, dst_dir=dst_dir, force=force, verbose=verbose)
322+
323+
# 6. Run wget command
324+
try:
325+
subprocess.run(cmd, check=True)
326+
except subprocess.CalledProcessError as e:
327+
raise subprocess.CalledProcessError(
328+
returncode=e.returncode,
329+
cmd=e.cmd,
330+
output=e.output,
331+
stderr=e.stderr,
332+
)
333+
334+
335+
def ensure_wget_available() -> None:
336+
"""Raise FileNotFoundError if 'wget' is not on the system PATH."""
337+
if shutil.which("wget") is None:
338+
raise FileNotFoundError("The WGET software was not found. Please install WGET or add it to PATH.")
339+
340+
341+
def ensure_trailing_slash(url: str) -> str:
342+
"""Return `url` guaranteed to end with a slash."""
343+
return url if url.endswith("/") else url.rstrip("/") + "/"
344+
345+
346+
def compute_cut_dirs(url: str) -> int:
347+
"""Compute the wget cut_dirs value to download directly in `dst_dir`.
348+
349+
Given a URL ending with '/', compute the total number of path segments.
350+
By returning len(segments), we strip away all of them—so that files
351+
within that final directory land directly in `dst_dir` without creating
352+
an extra subfolder.
353+
"""
354+
parsed = urllib.parse.urlparse(url)
355+
path = parsed.path.strip("/") # remove leading/trailing '/'
356+
segments = path.split("/") if path else []
357+
return len(segments)
358+
359+
360+
def build_webserver_wget_command(url: str, cut_dirs: int, dst_dir: str, force: bool, verbose: bool) -> list[str]:
361+
"""Construct the wget command list for subprocess.run.
362+
363+
Notes
364+
-----
365+
The following wget arguments are used
366+
- -q : quiet mode (no detailed progress)
367+
- -r : recursive
368+
- -np : no parent
369+
- -nH : no host directories
370+
- --timestamping: download missing files or when remote version is newer
371+
- --cut-dirs : strip all but the last path segment from the remote path
372+
- -P dst_dir : download into `dst_dir`
373+
- url
374+
"""
375+
cmd = ["wget"]
376+
if verbose:
377+
cmd.append("-q")
378+
cmd += [
379+
"-r",
380+
"-np",
381+
"-nH",
382+
f"--cut-dirs={cut_dirs}",
383+
]
384+
if force:
385+
cmd.append("--timestamping") # -N
386+
387+
# Define source and destination directory
388+
cmd += [
389+
"-P",
390+
dst_dir,
391+
url,
392+
]
393+
return cmd
394+
395+
396+
####--------------------------------------------------------------------.
397+
#### Download from Zenodo
398+
399+
400+
def download_zenodo_zip_file(url, dst_dir, force):
401+
"""Download zip file from zenodo and extract station raw data."""
402+
# Download zip file
403+
zip_filepath = _download_file_from_url(url, dst_dir=dst_dir, force=force)
404+
# Extract the stations files from the downloaded station.zip file
405+
_extract_station_files(zip_filepath, station_dir=dst_dir)
275406

276407

277408
def _download_file_from_url(url: str, dst_dir: str, force: bool = False) -> str:

disdrodb/l0/configs/LPM/bins_diameter.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ center:
2020
18: 6.75
2121
19: 7.25
2222
20: 7.75
23-
21: 54
23+
21: 9
2424
bounds:
2525
0:
2626
- 0.125
@@ -87,7 +87,7 @@ bounds:
8787
- 8.0
8888
21:
8989
- 8.0
90-
- 100
90+
- 10.0
9191
width:
9292
0: 0.125
9393
1: 0.125
@@ -110,4 +110,4 @@ width:
110110
18: 0.5
111111
19: 0.5
112112
20: 0.5
113-
21: 92
113+
21: 2

disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,7 @@ list_particles: "str"
3737
raw_drop_concentration: "str"
3838
raw_drop_average_velocity: "str"
3939
raw_drop_number: "str"
40+
air_temperature: "float32"
41+
relative_humidity: "float32"
42+
wind_speed: "float32"
43+
wind_direction: "float32"

disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,19 @@ raw_drop_number:
158158
description: Drop counts per diameter and velocity class
159159
long_name: Raw drop number
160160
units: ""
161+
air_temperature:
162+
description: "Air temperature in degrees Celsius (C)"
163+
long_name: Air temperature
164+
units: "C"
165+
relative_humidity:
166+
description: "Relative humidity in percent (%)"
167+
long_name: Relative humidity
168+
units: "%"
169+
wind_speed:
170+
description: "Wind speed in m/s"
171+
long_name: Wind speed
172+
units: "m/s"
173+
wind_direction:
174+
description: "Wind direction in degrees (0-360)"
175+
long_name: Wind direction
176+
units: "degrees"

disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,3 +331,44 @@ raw_drop_number:
331331
- 5000
332332
- 32
333333
- 32
334+
air_temperature:
335+
dtype: uint16
336+
scale_factor: 0.1
337+
add_offset: -99.9
338+
zlib: true
339+
complevel: 3
340+
shuffle: true
341+
fletcher32: false
342+
contiguous: false
343+
_FillValue: 65535
344+
chunksizes: 5000
345+
relative_humidity:
346+
dtype: uint16
347+
scale_factor: 0.01
348+
zlib: true
349+
complevel: 3
350+
shuffle: true
351+
fletcher32: false
352+
contiguous: false
353+
_FillValue: 65535
354+
chunksizes: 5000
355+
wind_speed:
356+
dtype: uint16
357+
scale_factor: 0.1
358+
add_offset: -99.9
359+
zlib: true
360+
complevel: 3
361+
shuffle: true
362+
fletcher32: false
363+
contiguous: false
364+
_FillValue: 65535
365+
chunksizes: 5000
366+
wind_direction:
367+
dtype: uint16
368+
zlib: true
369+
complevel: 3
370+
shuffle: true
371+
fletcher32: false
372+
contiguous: false
373+
_FillValue: 65535
374+
chunksizes: 5000

disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,43 @@ raw_drop_number:
379379
- diameter_bin_center
380380
n_values: 1024
381381
field_number: "93"
382+
air_temperature:
383+
n_digits: 4
384+
n_characters: 5
385+
n_decimals: 1
386+
n_naturals: 2
387+
data_range:
388+
- -40
389+
- 70
390+
nan_flags: 99999
391+
field_number: "521"
392+
relative_humidity:
393+
n_digits: 5
394+
n_characters: 5
395+
n_decimals: 0
396+
n_naturals: 5
397+
data_range:
398+
- 0
399+
- 99999
400+
nan_flags: 99999
401+
field_number: "522"
402+
wind_speed:
403+
n_digits: 3
404+
n_characters: 4
405+
n_decimals: 1
406+
n_naturals: 2
407+
data_range:
408+
- 0
409+
- 60
410+
nan_flags: null
411+
field_number: "523"
412+
wind_direction:
413+
n_digits: 3
414+
n_characters: 3
415+
n_decimals: 0
416+
n_naturals: 3
417+
data_range:
418+
- 0
419+
- 360
420+
nan_flags: 999
421+
field_number: "524"

disdrodb/l0/manuals/SWS250.pdf

2.18 MB
Binary file not shown.

disdrodb/l0/manuals/VPF730.pdf

2.59 MB
Binary file not shown.

disdrodb/l0/manuals/VPF750.pdf

2.59 MB
Binary file not shown.

0 commit comments

Comments
 (0)