From 584abab6345f78f8e50d8270bc75275012f2e8ec Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 10:51:08 -0700 Subject: [PATCH 1/8] Add realtime SBD shore.nc4 processing pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - New `sbd2netcdf.py`: discovers `shore.nc4` files by YYYYMM month directory, reads each variable via the shore.nc4 parallel-time pattern, interpolates all variables to a common 1S grid, computes `profile_number` using the same scipy peak-detection as delayed-mode data, and writes `shore_1S.nc` alongside each source file - New `process_lrauv_sbd.py`: entry point that calls `SbdExtract.process()`, concatenates the month's `shore_1S.nc` files, and drives `CreateProducts` for quick-look plots - `create_products.py`: resolve `bin_median_`/`bin_mean_` SBD variable names; skip gulpers for realtime missions; fix `distnav` NaN propagation from sparse GPS by pre-filling lat/lon gaps before distance computation; use `np.nanmedian` for UTC offset; align scatter arrays to `distnav` length - `resample.py`: extract `add_profile()` logic into a standalone `compute_profile_number()` shared by both pipelines ## Test plan - [ ] Run `process_lrauv_sbd.py --auv_name ahi --start 20260406 --end 20260412 -v 1` and confirm `shore_1S.nc` files are written alongside each `shore.nc4` - [ ] Confirm `shore_1S.nc` is readable with `xr.open_dataset()` and variables follow `_` naming with `bin_median_` prefix preserved - [ ] Confirm `profile_number` is present and monotonically non-decreasing - [ ] Confirm quick-look PNG plots are produced in the YYYYMM output directory - [ ] Confirm delayed-mode `resample.py` pipeline still produces correct `profile_number` (regression check) 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- .vscode/launch.json | 18 +- src/data/archive.py | 44 ++++ src/data/create_products.py | 152 +++++++++++-- src/data/process_lrauv_sbd.py | 184 +++++++++++++++ src/data/resample.py | 101 +++++---- src/data/sbd2netcdf.py | 416 ++++++++++++++++++++++++++++++++++ 6 files changed, 844 insertions(+), 71 deletions(-) create mode 100755 src/data/process_lrauv_sbd.py create mode 100755 src/data/sbd2netcdf.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 43cfaea..72ed7f7 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -480,12 +480,24 @@ // Test ESP markers - Shallow log file from Denmark deployment in June 2024, has large depth values in self.ds.depth.values[6500:6800] //"args": ["-v", "1", "--log_file", "makai/missionlogs/2024/20240607_20240615/20240611T082709/202406110827_202406111026.nc4", "--update_ssds_provenance", "--clobber"] // Test making engineering plot with a short log file - "args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250128_20250131/20250129T145420/202501291454_202501292233.nc4", "--update_ssds_provenance", "--clobber"] + "args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250128_20250131/20250129T145420/202501291454_202501292233.nc4"] // and for the rest of the log files of this deployment //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250127T000000", "--end", "20250201T000000", "--update_ssds_provenance", "--clobber", "--no_cleanup"] // Test having a better y-max that puts data instead of white space at the bottom //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2026/20260406_20260412/20260410T010521/202604100105_202604100800.nc4"] }, + { + "name": "process_lrauv_sbd", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/process_lrauv_sbd.py", + "console": "integratedTerminal", + // ahi CFE deployment March 2026 - large shore.nc4 file, good test case + //"args": ["--auv_name", "ahi", "--start", "20260317", "--end", "20260318", "-v", "--noproducts"] + // ahi 47 CFE deployment April 2026 + //"args": ["--auv_name", "ahi", "--start", "20260406", "--end", "20260412", "-v"] + "args": ["--auv_name", "ahi", "--start", "20260303", "--end", "20260331", "-v", "2", "--clobber"] + }, { "name": "lrauv_deployment_plots", "type": "debugpy", @@ -525,9 +537,9 @@ // Test --build_index for 2026 to test a fixed layout //"args": ["-v", "1", "--build_index", "--start", "20260101", "--end", "20261231", "--force"] // Test fixing the "folded" bathymetry - //"args": ["-v", "1", "--dlist", "ahi/missionlogs/2026/20260218_20260220.dlist", "--force", "--update_ssds_provenance", "--notify", "mccann@mbari.org"] + "args": ["-v", "1", "--dlist", "ahi/missionlogs/2026/20260218_20260220.dlist", "--force", "--update_ssds_provenance", "--notify", "mccann@mbari.org"] // Repair a badly plotted deployment with fixed bathymetry plotting - Pontus 54 Docking - "args": ["-v", "1", "--dlist", "pontus/missionlogs/2025/20250604_20250616.dlist"] + //"args": ["-v", "1", "--dlist", "pontus/missionlogs/2025/20250604_20250616.dlist"] }, ] diff --git a/src/data/archive.py b/src/data/archive.py index 34f6645..c31b75b 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -294,6 +294,50 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, src_file.name, ) + def copy_sbd_to_LRAUV(self, sbd_nc_path: Path) -> None: + """Copy SBD resampled netCDF and any product files to the LRAUV archive volume. + + Maps: + data/lrauv_data/{auv}/realtime/sbdlogs/{YYYY}/{date_range}/ + → /Volumes/LRAUV/{auv}/realtime/sbdlogs/{YYYY}/{date_range}/ + + Args: + sbd_nc_path: Absolute path to the *_sbd_1S.nc output file. + """ + src_dir = sbd_nc_path.parent + # Derive the destination by replacing BASE_LRAUV_PATH prefix with LRAUV_VOL + try: + rel = src_dir.relative_to(BASE_LRAUV_PATH) + except ValueError: + self.logger.exception( + "SBD path %s is not under BASE_LRAUV_PATH %s", src_dir, BASE_LRAUV_PATH + ) + return + dst_dir = Path(LRAUV_VOL) / rel + try: + dst_dir.stat() + except FileNotFoundError: + self.logger.warning("Destination %s not found — is %s mounted?", dst_dir, LRAUV_VOL) + return + + stem = sbd_nc_path.stem # e.g. ahi_20260317_20260318_sbd_1S + for pattern in (f"{stem}.nc", f"{stem}_*.png", f"{stem}_*.txt"): + for src_file in sorted(src_dir.glob(pattern)): + dst_file = dst_dir / src_file.name + if self.clobber: + if dst_file.exists(): + self.logger.info("Removing %s", dst_file) + dst_file.unlink() + self.logger.info("copyfile %s %s", src_file, dst_dir) + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + elif not dst_file.exists(): + self.logger.info("copyfile %s %s", src_file, dst_dir) + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info("%s exists, not overwriting (use --clobber)", dst_file.name) + def copy_lrauv_deployment(self, deployment_dir: Path, plot_name_stem: str) -> None: """Copy LRAUV deployment plots and HTML index to the LRAUV archive volume. diff --git a/src/data/create_products.py b/src/data/create_products.py index bf367b1..2d87fe8 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -530,11 +530,14 @@ def _get_dorado_plot_variables(self, best_ctd: str) -> list: ] def _get_lrauv_plot_variables(self) -> list: - """Get LRAUV-specific plot variables. + """Get LRAUV-specific plot variables, resolved against self.ds. - Returns variables commonly available in LRAUV log files. + Each canonical name is looked up in self.ds via _resolve_var_in_ds so + that SBD realtime datasets (which store variables as bin_median_ variants) + return the name actually present in the dataset. Variables absent from + the dataset keep their canonical name so downstream checks can detect them. """ - return [ + candidates = [ ("density", "linear"), ("ctdseabird_sea_water_temperature", "linear"), ("ctdseabird_sea_water_salinity", "linear"), @@ -545,6 +548,7 @@ def _get_lrauv_plot_variables(self) -> list: ("wetlabsbb2fl_mass_concentration_of_chlorophyll_in_sea_water", "linear"), ("universals_platform_speed_wrt_sea_water", "linear"), ] + return [(self._resolve_var_in_ds(var) or var, scale) for var, scale in candidates] def _get_dorado_biolume_variables(self) -> list: """Get Dorado-specific bioluminescence plot variables for plot_biolume_2column().""" @@ -825,7 +829,7 @@ def write_per_log_html(self) -> str | None: self.logger.info("Wrote per-log HTML to %s", html_path) return str(html_path) - def _plot_nighttime_indicator( # noqa: PLR0915 + def _plot_nighttime_indicator( # noqa: PLR0915, C901 self, fig: matplotlib.figure.Figure, ref_ax: matplotlib.axes.Axes, @@ -849,6 +853,17 @@ def _plot_nighttime_indicator( # noqa: PLR0915 lons = self.ds.cf["longitude"].to_numpy() dist_km = distnav.to_numpy() / 1000.0 + # distnav may be NaN-filtered to fewer points than the full dataset + # (e.g. sparse GPS in realtime SBD data); align all arrays to it. + n_ds = len(times) + if len(distnav) != n_ds: + ds_time = self.ds.cf["time"].to_numpy() + nav_idx = np.searchsorted(ds_time, distnav.coords["time"].to_numpy()) + nav_idx = nav_idx[nav_idx < n_ds] + times = times[nav_idx] + lats = lats[nav_idx] + lons = lons[nav_idx] + # Subsample for speed (pysolar is slow) n = len(times) step = max(1, n // 500) @@ -891,7 +906,7 @@ def _plot_nighttime_indicator( # noqa: PLR0915 night_ax.axvspan(night_start, sub_dist[-1], color="black", lw=0) # Draw a short tick and local date label at each local midnight - utc_offset_h = int(round(float(np.median(lons)) / 15)) + utc_offset_h = int(round(float(np.nanmedian(lons)) / 15)) local_tz = timezone(timedelta(hours=utc_offset_h)) times_s = np.array([t.timestamp() for t in times]) day = times[0].to_pydatetime().astimezone(local_tz).date() @@ -962,15 +977,32 @@ def _grid_dims(self, plot_vars: list[str] | None = None) -> tuple: return np.array([]), np.array([]), xr.DataArray() MAX_LONGITUDE_VALUES = 400 n_subsample = 200 if len(self.ds.cf["longitude"].to_numpy()) > MAX_LONGITUDE_VALUES else 1 + # Fill NaN gaps in lat/lon before subsampling so np.interp receives no NaN. + # Sparse GPS coverage (e.g. realtime SBD data) leaves gaps that would + # otherwise propagate NaN through cumsum and shorten distnav. + lon_filled = ( + pd.Series(self.ds.cf["longitude"].to_numpy()) + .interpolate(method="linear") + .ffill() + .bfill() + .to_numpy() + ) + lat_filled = ( + pd.Series(self.ds.cf["latitude"].to_numpy()) + .interpolate(method="linear") + .ffill() + .bfill() + .to_numpy() + ) lon_sub_intrp = np.interp( self.ds.cf["time"].to_numpy().astype(np.int64), self.ds.cf["time"].to_numpy()[::n_subsample].astype(np.int64), - self.ds.cf["longitude"].to_numpy()[::n_subsample], + lon_filled[::n_subsample], ) lat_sub_intrp = np.interp( self.ds.cf["time"].to_numpy().astype(np.int64), self.ds.cf["time"].to_numpy()[::n_subsample].astype(np.int64), - self.ds.cf["latitude"].to_numpy()[::n_subsample], + lat_filled[::n_subsample], ) x, y = pyproj.Proj(proj="utm", zone=utm_zone, ellps="WGS84")( lon_sub_intrp, @@ -1118,6 +1150,8 @@ def _get_gulper_locations(self, distnav: xr.DataArray) -> dict: Returns: Dictionary mapping bottle number to (distance_km, depth_m) tuple """ + if "realtime" in self.mission: + return {} gulper = Gulper() gulper.args = argparse.Namespace() gulper.args.base_path = self.base_path @@ -1201,13 +1235,50 @@ def _get_sipper_locations(self, distnav: xr.DataArray) -> dict: return locations + @staticmethod + def _canonical_var_name(var: str) -> str: + """Strip bin_mean_ / bin_median_ prefix from the instrument-variable part. + + Maps SBD realtime variable names to their delayed-mode equivalents so that + colormap, label, and format lookups work without duplicating every entry. + + Examples: + ctdseabird_bin_median_sea_water_temperature + → ctdseabird_sea_water_temperature + wetlabsbb2fl_bin_median_mass_concentration_of_chlorophyll_in_sea_water + → wetlabsbb2fl_mass_concentration_of_chlorophyll_in_sea_water + """ + import re as _re + + return _re.sub(r"_bin_(?:mean|median)_", "_", var, count=1) + + def _resolve_var_in_ds(self, var: str) -> str | None: + """Return the name under which var is stored in self.ds, or None. + + Checks the canonical name first, then the SBD realtime bin_median_ / + bin_mean_ variants (e.g. ctdseabird_sea_water_temperature → + ctdseabird_bin_median_sea_water_temperature). + """ + if var in self.ds: + return var + if "_" not in var: + return None + idx = var.index("_") + group, stem = var[:idx], var[idx + 1 :] + for infix in ("bin_median_", "bin_mean_"): + candidate = f"{group}_{infix}{stem}" + if candidate in self.ds: + return candidate + return None + def _get_colormap_name(self, var: str) -> str: """Get colormap name for a variable. Tries in order: 1. Lookup by standard_name attribute - 2. Lookup by matching variable name parts - 3. Default to 'cividis' + 2. Lookup via canonical (bin_*-stripped) name → cmocean_lookup by var suffix + 3. Lookup by matching variable name parts + 4. Default to 'cividis' Args: var: Variable name @@ -1221,6 +1292,14 @@ def _get_colormap_name(self, var: str) -> str: if standard_name in self.cmocean_lookup: return self.cmocean_lookup[standard_name] + # For bin_median_/bin_mean_ SBD variables, map to canonical delayed-mode name + # and derive colormap from the suffix (e.g. "sea_water_temperature" → "thermal"). + canonical = self._canonical_var_name(var) + if canonical != var: + suffix = canonical.split("_", 1)[-1] # strip group prefix + if suffix in self.cmocean_lookup: + return self.cmocean_lookup[suffix] + # Fallback: try matching variable name parts var_lower = var.lower() for key, colormap in self.variable_colormap_lookup.items(): @@ -1649,16 +1728,23 @@ def _apply_plot_conversion(self, var: str, data: np.ndarray) -> np.ndarray: def _resolve_label(self, var: str) -> tuple[str, str, str]: """Return (display_name, units, colormap) for a variable. - Priority: dataset attrs > variable_display_names > variable name. + Priority: dataset attrs > variable_display_names > canonical-name fallbacks > variable name. + For SBD bin_median_/bin_mean_ variables the canonical (stripped) name is tried in + display and fallback lookups so that entries don't need to be duplicated. """ + canonical = self._canonical_var_name(var) if var in self.ds and self.ds[var].attrs.get("long_name"): long_name = self.ds[var].attrs["long_name"] units = self.ds[var].attrs.get("units", "") color_map_name = self._get_colormap_name(var) else: - long_name = self.variable_display_names.get(var, var) + long_name = self.variable_display_names.get( + var, self.variable_display_names.get(canonical, var) + ) if var in self.variable_fallback_metadata: units, color_map_name = self.variable_fallback_metadata[var] + elif canonical in self.variable_fallback_metadata: + units, color_map_name = self.variable_fallback_metadata[canonical] else: units = "" color_map_name = self._get_colormap_name(var) @@ -1739,7 +1825,9 @@ def _setup_no_data_axes( # noqa: PLR0913 cb = fig.colorbar(dummy_im, ax=curr_ax, pad=0.01) cb.set_ticks([]) - short_label = self.variable_short_labels.get(var, "") + short_label = self.variable_short_labels.get( + var, self.variable_short_labels.get(self._canonical_var_name(var), "") + ) if short_label: curr_ax.text( 0.5, @@ -1910,14 +1998,28 @@ def _plot_var_scatter( # noqa: C901, PLR0912, PLR0913, PLR0915 curr_ax.set_ylim(max(iz), min(iz)) curr_ax.autoscale(enable=False) + # distnav may be shorter than the dataset when NaN positions were + # filtered in _grid_dims (e.g. sparse GPS in realtime SBD data). + # Align depth and var arrays to distnav's time subset. + n_ds = len(self.ds.cf["time"]) + if len(distnav) != n_ds: + ds_time = self.ds.cf["time"].to_numpy() + nav_idx = np.searchsorted(ds_time, distnav.coords["time"].to_numpy()) + nav_idx = nav_idx[nav_idx < n_ds] + depth_for_scatter = self.ds.cf["depth"].to_numpy()[nav_idx] + var_for_scatter = var_to_plot[nav_idx] + else: + depth_for_scatter = self.ds.cf["depth"].to_numpy() + var_for_scatter = var_to_plot + # Create scatter plot with actual data points marker_size = next( (size for key, size in self.scatter_marker_size.items() if key in var.lower()), 1 ) scatter = curr_ax.scatter( distnav.to_numpy() / 1000.0, - self.ds.cf["depth"].to_numpy(), - c=var_to_plot, + depth_for_scatter, + c=var_for_scatter, s=marker_size, cmap=cmap, norm=norm, @@ -1990,8 +2092,11 @@ def _plot_var_scatter( # noqa: C901, PLR0912, PLR0913, PLR0915 # Format tick labels intelligently based on value range tick_values = cb.get_ticks() if len(tick_values) > 0: - if var in self.variable_colorbar_format: - fmt = self.variable_colorbar_format[var] + _cbfmt_key = ( + var if var in self.variable_colorbar_format else self._canonical_var_name(var) + ) + if _cbfmt_key in self.variable_colorbar_format: + fmt = self.variable_colorbar_format[_cbfmt_key] labels = [fmt.format(x) for x in tick_values] else: value_range = abs(tick_values.max() - tick_values.min()) @@ -2020,7 +2125,9 @@ def _plot_var_scatter( # noqa: C901, PLR0912, PLR0913, PLR0915 long_name, units, _ = self._resolve_label(var) - short_label = self.variable_short_labels.get(var, "") + short_label = self.variable_short_labels.get( + var, self.variable_short_labels.get(self._canonical_var_name(var), "") + ) if short_label: curr_ax.text( 0.5, @@ -2290,8 +2397,11 @@ def _plot_var_contour( # noqa: C901, PLR0912, PLR0913, PLR0915 else: tick_values = cb.get_ticks() if len(tick_values) > 0: - if var in self.variable_colorbar_format: - fmt = self.variable_colorbar_format[var] + _cbfmt_key = ( + var if var in self.variable_colorbar_format else self._canonical_var_name(var) + ) + if _cbfmt_key in self.variable_colorbar_format: + fmt = self.variable_colorbar_format[_cbfmt_key] labels = [fmt.format(x) for x in tick_values] else: value_range = abs(tick_values.max() - tick_values.min()) @@ -2320,7 +2430,9 @@ def _plot_var_contour( # noqa: C901, PLR0912, PLR0913, PLR0915 long_name, units, _ = self._resolve_label(var) - short_label = self.variable_short_labels.get(var, "") + short_label = self.variable_short_labels.get( + var, self.variable_short_labels.get(self._canonical_var_name(var), "") + ) if short_label: curr_ax.text( 0.5, diff --git a/src/data/process_lrauv_sbd.py b/src/data/process_lrauv_sbd.py new file mode 100755 index 0000000..b338cc8 --- /dev/null +++ b/src/data/process_lrauv_sbd.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python +""" +Process realtime SBD shore.nc4 files into resampled netCDF and quick-look plots. + +Finds shore.nc4 files from: + smb://atlas.shore.mbari.org/LRAUV//realtime/sbdlogs/ + +and produces: + data/lrauv_data//realtime/sbdlogs/// + __sbd_1S.nc + __sbd_1S_2column_cmocean.png + __sbd_1S_2column_planktivore.png (if applicable) + +Usage: + uv run src/data/process_lrauv_sbd.py --auv_name ahi \\ + --start 20260406 --end 20260412 -v +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2026, Monterey Bay Aquarium Research Institute" + +import argparse +import logging +import sys +from datetime import UTC, datetime + +from sbd2netcdf import FREQ, SbdExtract + +_LOG_LEVELS = (logging.WARN, logging.INFO, logging.DEBUG) +_formatter = logging.Formatter( + "%(levelname)s %(asctime)s %(filename)s %(funcName)s():%(lineno)d [%(process)d] %(message)s" +) +_handler = logging.StreamHandler() +_handler.setFormatter(_formatter) +logger = logging.getLogger(__name__) +logger.addHandler(_handler) + + +def _parse_dt(s: str) -> datetime: + for fmt in ("%Y%m%dT%H%M%S", "%Y%m%d"): + try: + return datetime.strptime(s, fmt).replace(tzinfo=UTC) + except ValueError: + continue + msg = f"Unrecognised datetime format: {s!r}. Use YYYYMMDDTHHMMSS or YYYYMMDD." + raise ValueError(msg) + + +def process_command_line() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--auv_name", required=True, help="AUV name, e.g. ahi, tethys") + parser.add_argument( + "--start", + required=True, + help="Start datetime: YYYYMMDDTHHMMSS or YYYYMMDD", + ) + parser.add_argument( + "--end", + required=True, + help="End datetime: YYYYMMDDTHHMMSS or YYYYMMDD", + ) + parser.add_argument( + "--vehicle_dir", + default="/Volumes/LRAUV", + help="Root LRAUV vehicle directory (default: /Volumes/LRAUV)", + ) + parser.add_argument( + "--clobber", + action="store_true", + help="Overwrite existing output files", + ) + parser.add_argument( + "--noproducts", + action="store_true", + help="Skip create_products step (write netCDF only)", + ) + parser.add_argument( + "--verbose", + "-v", + type=int, + choices=range(3), + default=0, + const=1, + nargs="?", + help="Verbosity level: 0=WARN (default), 1=INFO, 2=DEBUG", + ) + return parser.parse_args() + + +def main() -> None: + args = process_command_line() + logger.setLevel(_LOG_LEVELS[min(args.verbose, 2)]) + + start = _parse_dt(args.start) + end = _parse_dt(args.end) + + extractor = SbdExtract( + auv_name=args.auv_name, + start=start, + end=end, + vehicle_dir=args.vehicle_dir, + verbose=args.verbose, + clobber=args.clobber, + commandline=" ".join(sys.argv), + ) + + out_paths = extractor.process() + if not out_paths: + logger.error("No data produced — check that shore.nc4 files exist for this range.") + sys.exit(1) + + if args.noproducts: + logger.info("Skipping create_products (--noproducts specified)") + return + + # Produce quick-look plots via create_products + import importlib.util + + if importlib.util.find_spec("create_products") is None: + logger.warning("create_products not available — skipping plots") + return + + try: + from create_products import CreateProducts + except ImportError as e: + logger.warning("Cannot import create_products: %s — skipping plots", e) + return + + date_range = f"{start.strftime('%Y%m%d')}_{end.strftime('%Y%m%d')}" + mission = f"realtime/sbdlogs/{start.strftime('%Y')}/{date_range}" + + logger.info("Creating products from %d shore_1S.nc files", len(out_paths)) + try: + import xarray as xr + + # Combine all shore_1S.nc files in the YYYYMM directory — not just those + # touched by --start/--end — so the plot covers the full month. + # Some log periods may have no depth data, so depth/lat/lon may be absent + # as coordinates. xr.concat requires coordinates in all datasets, so + # demote them to data variables first, concat with join="outer" + # (NaN-fill absent variables), then promote back. + # Log periods can also overlap in time, so sort and deduplicate after. + # Pre-open as ds so _open_ds() is a no-op; log_file enables _is_lrauv(). + output_dir = out_paths[0].parent.parent # YYYYMM directory + month_files = sorted(output_dir.rglob(f"shore_{FREQ}.nc")) + logger.info("Combining %d shore_%s.nc files from %s", len(month_files), FREQ, output_dir) + _GEO_COORDS = ("depth", "latitude", "longitude") + individual = [] + for p in month_files: + d = xr.open_dataset(p) + present = [c for c in _GEO_COORDS if c in d.coords] + if present: + d = d.reset_coords(present) + individual.append(d) + ds = xr.concat(individual, dim="time", join="outer") + ds = ds.sortby("time").drop_duplicates("time") + geo_present = [c for c in _GEO_COORDS if c in ds] + if geo_present: + ds = ds.set_coords(geo_present) + plot_stem = f"{args.auv_name}_{output_dir.name}_sbd" + cp = CreateProducts( + auv_name=args.auv_name, + mission=mission, + base_path=args.vehicle_dir, + freq="1S", + verbose=args.verbose, + ds=ds, + log_file="sbd_placeholder.nc4", + output_dir=output_dir, + plot_name_stem=plot_stem, + ) + cp.plot_2column() + cp.plot_planktivore_2column() + except RuntimeError as e: + logger.warning("create_products failed: %s", e) + + +if __name__ == "__main__": + AUV_NAME = "ahi" + LRAUV_DIR = "/Volumes/LRAUV" + main() diff --git a/src/data/resample.py b/src/data/resample.py index 0485c18..fd39e43 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -46,6 +46,56 @@ MAX_INTERPOLATE_LIMIT = 3 # Maximum number of consecutive NaNs to fill during interpolation +def compute_profile_number( + depth_da: xr.DataArray, + profile_min_depth_change: float = PROFILE_MIN_DEPTH_CHANGE, +) -> xr.DataArray | None: + """Return a sequential profile-number DataArray from a 1-D depth time series. + + Uses scipy peak detection to find depth turning points and increments the + counter when consecutive peaks differ by more than profile_min_depth_change. + Returns None if depth_da is empty. + """ + if len(depth_da) == 0: + return None + options = {"prominence": 10, "width": 30} + peaks_pos, _ = signal.find_peaks(depth_da.to_numpy(), **options) + peaks_neg, _ = signal.find_peaks(-depth_da.to_numpy(), **options) + peaks = np.concatenate((peaks_pos, peaks_neg, [0], [len(depth_da) - 1])) + peaks.sort(kind="mergesort") + s_peaks = depth_da.isel(time=peaks).to_pandas() + profiles: list[int] = [] + count = 1 + k = 0 + for tv in depth_da.coords["time"].to_numpy(): + if tv > s_peaks.index[k + 1]: + k += 1 + if abs(s_peaks.iloc[k + 1] - s_peaks.iloc[k]) > profile_min_depth_change: + count += 1 + profiles.append(count) + if k > len(s_peaks) - 2: + break + # Safety fill: extend to cover any remaining time values + if len(profiles) < len(depth_da): + profiles.extend([count] * (len(depth_da) - len(profiles))) + return xr.DataArray( + profiles, + dims="time", + coords={"time": depth_da.coords["time"].to_numpy()}, + name="profile_number", + attrs={ + "long_name": "Profile number", + "comment": ( + f"Sequential profile counter identifying individual vertical casts. " + f"Profiles are detected from depth vertices using scipy.signal.find_peaks " + f"with prominence=10m and width=30 samples. Increments when vehicle " + f"transitions between upcast and downcast with " + f">{profile_min_depth_change}m vertical displacement." + ), + }, + ) + + class InvalidAlignFile(Exception): pass @@ -833,54 +883,9 @@ def add_profile(self, profile_min_depth_change: float) -> None: "No depth data available to compute profile numbers", ) return - # Find depth vertices value using scipy's find_peaks algorithm - options = {"prominence": 10, "width": 30} - peaks_pos, _ = signal.find_peaks(self.resampled_nc["depth"], **options) - peaks_neg, _ = signal.find_peaks(-self.resampled_nc["depth"], **options) - # Need to add the first and last time values to the list of peaks - peaks = np.concatenate( - (peaks_pos, peaks_neg, [0], [len(self.resampled_nc["depth"]) - 1]), - ) - peaks.sort(kind="mergesort") - s_peaks = self.resampled_nc["depth"][peaks].to_pandas() - - # Assign a profile number to each time value - profiles = [] - count = 1 - k = 0 - self.logger.info( - "Before accessing time coordinate - resampled_nc variables: %s, coords: %s", - list(self.resampled_nc.variables.keys()), - list(self.resampled_nc.coords.keys()), - ) - for tv in self.resampled_nc["time"].to_numpy(): - if tv > s_peaks.index[k + 1]: - # Encountered a new simple_depth point - k += 1 - if abs(s_peaks.iloc[k + 1] - s_peaks.iloc[k]) > profile_min_depth_change: - # Completed downcast or upcast - count += 1 - profiles.append(count) - if k > len(s_peaks) - 2: - break - - self.resampled_nc["profile_number"] = xr.DataArray( - profiles, - dims="time", - coords=[self.resampled_nc["time"].to_numpy()], - name="profile_number", - ) - self.resampled_nc["profile_number"].attrs["coordinates"] = "time depth latitude longitude" - self.resampled_nc["profile_number"].attrs = { - "long_name": "Profile number", - "comment": ( - f"Sequential profile counter identifying individual vertical casts. " - f"Profiles are detected from depth vertices using scipy.signal.find_peaks " - f"with prominence=10m and width=30 samples. Increments when vehicle " - f"transitions between upcast and downcast with >{profile_min_depth_change}m " - f"vertical displacement." - ), - } + profile_da = compute_profile_number(self.resampled_nc["depth"], profile_min_depth_change) + if profile_da is not None: + self.resampled_nc["profile_number"] = profile_da def set_proxy_parameters( self, mission_start: datetime, auv_name: str = "dorado" diff --git a/src/data/sbd2netcdf.py b/src/data/sbd2netcdf.py new file mode 100755 index 0000000..0cb5f50 --- /dev/null +++ b/src/data/sbd2netcdf.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python +""" +Read realtime SBD shore.nc4 files and produce a resampled netCDF file alongside each. + +Each SBD transmission produces one shore.nc4 file under: + realtime/sbdlogs/YYYY/YYYYMM/YYYYMMDDTHHMMSS/shore.nc4 + +Shore.nc4 uses a "parallel time" pattern — each variable has a companion +variable _time holding its individual timestamps. Variables in named groups +are prefixed with the lowercased group name in the output; the "_" (backseat) group +maps to the prefix "backseat"; root-level non-coordinate variables use explicit +prefixes defined in ROOT_VAR_PREFIXES. + +Output: shore_{FREQ}.nc written into the same directory as the source shore.nc4. +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2026, Monterey Bay Aquarium Research Institute" + +import logging +import os +import sys +from datetime import UTC, datetime +from pathlib import Path +from socket import gethostname + +import git +import numpy as np +import pandas as pd +import xarray as xr +from resample import compute_profile_number + +FREQ = "1S" # used in filenames; pandas requires lowercase — use _PANDAS_FREQ there +_PANDAS_FREQ = FREQ.lower() + +# Variables to extract from each group. +# Key "/" = root group; key "_" = backseat group → prefix "backseat". +SBD_PARMS = { + "/": [ + "depth", + "latitude", + "longitude", + "platform_battery_charge", + "platform_battery_voltage", + "platform_average_current", + "time_fix", + "latitude_fix", + "longitude_fix", + ], + "CTD_Seabird": [ + "bin_median_sea_water_temperature", + "bin_median_sea_water_salinity", + ], + "WetLabsBB2FL": [ + "bin_median_mass_concentration_of_chlorophyll_in_sea_water", + "bin_median_ParticulateBackscatteringCoeff470nm", + "bin_median_ParticulateBackscatteringCoeff650nm", + ], + "_": [ + "planktivore_HM_AvgRois", + "planktivore_LM_AvgRois", + ], + "Science": [ + "PeakPlanktivoreLMavgROI", + "PeakPlanktivoreLMavgROIDepth", + "EdgePlanktivoreLMavgROI", + "EdgePlanktivoreLMavgROIDepth", + "PeakPlanktivoreHMavgROI", + "PeakPlanktivoreHMavgROIDepth", + ], +} + +# Root-level non-coordinate variables: variable name → output prefix +ROOT_VAR_PREFIXES = { + "platform_battery_charge": "bpc1", + "platform_battery_voltage": "bpc1", + "platform_average_current": "onboard", + "time_fix": "nal9602", + "latitude_fix": "nal9602", + "longitude_fix": "nal9602", +} + +# Coordinates from the root group — no prefix applied +ROOT_COORDS = {"depth", "latitude", "longitude"} + +# Group name → output variable prefix +GROUP_PREFIX = { + "CTD_Seabird": "ctdseabird", + "WetLabsBB2FL": "wetlabsbb2fl", + "_": "backseat", + "Science": "science", +} + + +class SbdExtract: + """Process SBD shore.nc4 files for a deployment window, one file at a time.""" + + logger = logging.getLogger(__name__) + _handler = logging.StreamHandler() + _formatter = logging.Formatter( + "%(levelname)s %(asctime)s %(filename)s " + "%(funcName)s():%(lineno)d [%(process)d] %(message)s", + ) + _handler.setFormatter(_formatter) + logger.addHandler(_handler) + _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + + def __init__( # noqa: PLR0913 + self, + auv_name: str, + start: datetime, + end: datetime, + vehicle_dir: str = "/Volumes/LRAUV", + verbose: int = 0, + clobber: bool = False, # noqa: FBT001, FBT002 + commandline: str = "", + ) -> None: + self.auv_name = auv_name + self.start = start + self.end = end + self.vehicle_dir = vehicle_dir + self.verbose = verbose + self.clobber = clobber + self.commandline = commandline + self.logger.setLevel(self._log_levels[min(verbose, 2)]) + + def sbd_file_list(self) -> list[Path]: + """Return sorted list of shore.nc4 paths whose directory timestamp is in [start, end].""" + sbd_root = Path(self.vehicle_dir) / self.auv_name / "realtime" / "sbdlogs" + self.logger.info("Searching for shore.nc4 files under %s", sbd_root) + files = [] + if not sbd_root.exists(): + self.logger.warning("SBD root directory not found: %s", sbd_root) + return files + + # Build the list of YYYYMM month dirs that overlap [start, end]. + month_dirs: list[Path] = [] + y, m = self.start.year, self.start.month + while (y, m) <= (self.end.year, self.end.month): + candidate = sbd_root / str(y) / f"{y}{m:02d}" + if candidate.is_dir(): + month_dirs.append(candidate) + m += 1 + if m > 12: # noqa: PLR2004 + m = 1 + y += 1 + + all_files: list[Path] = [] + for month_dir in month_dirs: + self.logger.info("Scanning %s", month_dir) + found = list(month_dir.rglob("shore.nc4")) + self.logger.debug(" rglob found %d shore.nc4 files", len(found)) + all_files.extend(found) + + self.logger.debug("Sorting %d total shore.nc4 candidates", len(all_files)) + for shore_file in sorted(all_files): + dir_name = shore_file.parent.name # e.g. 20260406T120000 + self.logger.debug(" checking %s", dir_name) + try: + dir_dt = datetime.strptime(dir_name, "%Y%m%dT%H%M%S").replace(tzinfo=UTC) + except ValueError: + self.logger.debug(" skipping (unrecognised dir name)") + continue + if self.start <= dir_dt <= self.end: + self.logger.debug(" → in window, adding") + files.append(shore_file) + else: + self.logger.debug(" → outside window [%s, %s]", self.start, self.end) + + self.logger.info("Found %d shore.nc4 files in [%s, %s]", len(files), self.start, self.end) + return files + + def _output_var_name(self, group: str, var: str) -> str: + """Return the output variable name for a group/variable pair.""" + if group == "/": + if var in ROOT_COORDS: + return var + prefix = ROOT_VAR_PREFIXES.get(var, "") + return f"{prefix}_{var}" if prefix else var + prefix = GROUP_PREFIX.get(group, group.lower().replace("_", "")) + return f"{prefix}_{var}" + + def _read_var_da(self, nc_path: Path, group: str, var: str) -> xr.DataArray | None: + """Open one variable as a time-indexed DataArray with dim 'time'. + + Shore.nc4 parallel-time pattern: each variable {var} has a companion + variable {var}_time holding its individual timestamps. We assign that + time array as the coordinate and rename the parallel dimension to 'time' + so every DataArray shares a common dimension name for interp(). + """ + open_kw: dict = {"engine": "netcdf4"} + if group != "/": + open_kw["group"] = group + try: + ds = xr.open_dataset(nc_path, **open_kw) + except (OSError, ValueError) as e: + self.logger.debug("Cannot open group %r in %s: %s", group, nc_path.name, e) + return None + + time_var = f"{var}_time" + if var not in ds or time_var not in ds: + return None + + # Rename the variable-specific parallel time dimension to the canonical "time". + dim = ds[var].dims[0] + da = ds[var].assign_coords(**{dim: ds[time_var]}).rename({dim: "time"}) + # Drop rows with NaT time coordinates (NC_FILL_DOUBLE ~9.97e36 and other + # non-finite fill values that slip through xarray's masking decode as NaT). + valid_time = pd.notna(pd.DatetimeIndex(da.time.to_numpy())) + da = da.isel(time=valid_time) + # Also gate to the deployment window: valid-but-outlier epoch values (e.g. 0 → + # 1970-01-01) would otherwise corrupt t_min/t_max for the common time grid. + start_np = np.datetime64(self.start.replace(tzinfo=None), "ns") + end_np = np.datetime64(self.end.replace(tzinfo=None), "ns") + in_window = (da.time >= start_np) & (da.time <= end_np) + da = da.isel(time=in_window.to_numpy()).dropna("time") + # interp() requires a unique, sorted time index. + return da.drop_duplicates("time").sortby("time") + + def process_one_file(self, nc_path: Path) -> Path | None: + """Process one shore.nc4 → write shore_{FREQ}.nc in the same directory. + + Each variable carries its own independently-sampled time axis. + All variables are interpolated onto a common 1S grid so the output + dataset has a single shared 'time' dimension. + """ + out_path = nc_path.parent / f"shore_{FREQ}.nc" + if out_path.exists() and not self.clobber: + self.logger.info("Already exists, skipping: %s", out_path) + return out_path + + self.logger.info("Reading %s", nc_path) + + # Collect a DataArray per output variable, each with its own time axis. + data_arrays: dict[str, xr.DataArray] = {} + for group, var_list in SBD_PARMS.items(): + for var in var_list: + da = self._read_var_da(nc_path, group, var) + if da is not None and da.size > 0: + data_arrays[self._output_var_name(group, var)] = da + + if not data_arrays: + self.logger.warning("No variables extracted from %s", nc_path.name) + return None + + # Build a common 1S grid that spans all individually-timed variables. + all_times = np.concatenate([da.time.to_numpy() for da in data_arrays.values()]) + t_min = pd.Timestamp(all_times.min()).floor("s") + t_max = pd.Timestamp(all_times.max()).ceil("s") + common_time = pd.date_range(t_min, t_max, freq=_PANDAS_FREQ) + common_time_np = common_time.to_numpy().astype("datetime64[ns]") + + # Interpolate each variable (with its own time axis) onto the common grid. + coords: dict[str, xr.Variable] = { + "time": xr.Variable( + "time", + common_time_np, + {"standard_name": "time", "long_name": "time"}, + ), + } + data_vars: dict[str, xr.Variable] = {} + for name, da in data_arrays.items(): + interped = da.interp(time=common_time_np, method="linear") + var_obj = xr.Variable("time", interped.values, dict(da.attrs)) + if name in ROOT_COORDS: + coords[name] = var_obj + else: + data_vars[name] = var_obj + + ds = xr.Dataset(data_vars, coords=coords) + if "depth" in ds.coords: + profile_da = compute_profile_number(ds.coords["depth"]) + if profile_da is not None: + ds["profile_number"] = profile_da + ds.attrs.update(self._global_metadata(ds)) + self.logger.info("Writing %s", out_path) + ds.to_netcdf(out_path) + return out_path + + def _global_metadata(self, ds: xr.Dataset) -> dict: + """Build CF-compliant global attributes for the output file.""" + if "pytest" in sys.modules: + return {} + + try: + repo = git.Repo(search_parent_directories=True) + gitcommit = repo.head.object.hexsha + except (git.exc.InvalidGitRepositoryError, git.exc.NoSuchPathError): + gitcommit = "" + + iso_now = datetime.now(UTC).isoformat() + "Z" + actual_hostname = os.getenv("HOST_NAME", gethostname()) + time_vals = ds.coords["time"].to_numpy() + depth_vals = ds.coords["depth"].to_numpy() if "depth" in ds.coords else np.array([np.nan]) + lat_vals = ( + ds.coords["latitude"].to_numpy() if "latitude" in ds.coords else np.array([np.nan]) + ) + lon_vals = ( + ds.coords["longitude"].to_numpy() if "longitude" in ds.coords else np.array([np.nan]) + ) + + return { + "netcdf_version": "4", + "Conventions": "CF-1.6", + "date_created": iso_now, + "date_update": iso_now, + "date_modified": iso_now, + "featureType": "trajectory", + "time_coverage_start": str(pd.Timestamp(time_vals.min())), + "time_coverage_end": str(pd.Timestamp(time_vals.max())), + "time_coverage_duration": str( + pd.Timestamp(time_vals.max()) - pd.Timestamp(time_vals.min()) + ), + "geospatial_vertical_min": float(np.nanmin(depth_vals)), + "geospatial_vertical_max": float(np.nanmax(depth_vals)), + "geospatial_lat_min": float(np.nanmin(lat_vals)), + "geospatial_lat_max": float(np.nanmax(lat_vals)), + "geospatial_lon_min": float(np.nanmin(lon_vals)), + "geospatial_lon_max": float(np.nanmax(lon_vals)), + "title": f"Realtime SBD AUV sensor data from {self.auv_name}", + "source": "realtime SBD shore.nc4 file", + "history": f"Created by {self.commandline} on {iso_now}", + "summary": ( + f"Realtime telemetered LRAUV data from {self.auv_name} " + f"resampled to {FREQ} grid from a single SBD shore.nc4 transmission." + ), + "license": "Any use requires prior approval from MBARI", + "distribution_statement": "Any use requires prior approval from MBARI", + "useconst": "Not intended for legal use. Data may contain inaccuracies.", + "auv_python_source": ( + f"sbd2netcdf.py git commit {gitcommit} on host {actual_hostname}" + ), + } + + def process(self) -> list[Path]: + """Process all shore.nc4 files in [start, end], writing shore_{FREQ}.nc alongside each. + + Returns the list of output paths that were written (or already existed). + """ + self.logger.info( + "Starting SBD extraction for %s [%s – %s]", self.auv_name, self.start, self.end + ) + files = self.sbd_file_list() + if not files: + self.logger.warning( + "No shore.nc4 files found for %s between %s and %s", + self.auv_name, + self.start, + self.end, + ) + return [] + + out_paths: list[Path] = [] + for nc_path in files: + out = self.process_one_file(nc_path) + if out is not None: + out_paths.append(out) + + self.logger.info("Wrote %d/%d shore_%s.nc files", len(out_paths), len(files), FREQ) + return out_paths + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--auv_name", required=True, help="AUV name, e.g. ahi") + parser.add_argument( + "--start", + required=True, + help="Start datetime YYYYMMDDTHHMMSS or YYYYMMDD", + ) + parser.add_argument( + "--end", + required=True, + help="End datetime YYYYMMDDTHHMMSS or YYYYMMDD", + ) + parser.add_argument( + "--vehicle_dir", + default="/Volumes/LRAUV", + help="Root of LRAUV vehicle directory (default: /Volumes/LRAUV)", + ) + parser.add_argument("--clobber", action="store_true", help="Overwrite existing output files") + parser.add_argument( + "--verbose", + "-v", + type=int, + choices=range(3), + default=0, + const=1, + nargs="?", + help="Verbosity level: 0=WARN (default), 1=INFO, 2=DEBUG", + ) + args = parser.parse_args() + + def _parse_dt(s: str) -> datetime: + for fmt in ("%Y%m%dT%H%M%S", "%Y%m%d"): + try: + return datetime.strptime(s, fmt).replace(tzinfo=UTC) + except ValueError: + continue + msg = f"Unrecognised datetime format: {s!r}" + raise ValueError(msg) + + extractor = SbdExtract( + auv_name=args.auv_name, + start=_parse_dt(args.start), + end=_parse_dt(args.end), + vehicle_dir=args.vehicle_dir, + verbose=args.verbose, + clobber=args.clobber, + commandline=" ".join(sys.argv), + ) + out_paths = extractor.process() + for p in out_paths: + logging.info(p) From 36427903315180cdfad93e7e5142bbc6f1d1d1ba Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 11:12:52 -0700 Subject: [PATCH 2/8] Update EXPECTED_SIZE_GITHUB, again. --- src/data/test_process_i2map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index 41d74d2..ae0f524 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 63131 + EXPECTED_SIZE_GITHUB = 63128 EXPECTED_SIZE_ACT = 63106 EXPECTED_SIZE_LOCAL = 64650 if str(proc.args.base_path).startswith("/home/runner"): From c6a4e14a226a3c49842dc03208a09e4b11c9d5b1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 11:28:22 -0700 Subject: [PATCH 3/8] Make all shore_1S.nc variables lower case and enable planktivore and engineering plots. --- src/data/process_lrauv_sbd.py | 1 + src/data/sbd2netcdf.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/data/process_lrauv_sbd.py b/src/data/process_lrauv_sbd.py index b338cc8..d0156f4 100755 --- a/src/data/process_lrauv_sbd.py +++ b/src/data/process_lrauv_sbd.py @@ -174,6 +174,7 @@ def main() -> None: ) cp.plot_2column() cp.plot_planktivore_2column() + cp.plot_engineering_2column() except RuntimeError as e: logger.warning("create_products failed: %s", e) diff --git a/src/data/sbd2netcdf.py b/src/data/sbd2netcdf.py index 0cb5f50..97decec 100755 --- a/src/data/sbd2netcdf.py +++ b/src/data/sbd2netcdf.py @@ -178,7 +178,9 @@ def _output_var_name(self, group: str, var: str) -> str: prefix = ROOT_VAR_PREFIXES.get(var, "") return f"{prefix}_{var}" if prefix else var prefix = GROUP_PREFIX.get(group, group.lower().replace("_", "")) - return f"{prefix}_{var}" + # Lowercase to match the all-lowercase convention in create_products.py + # lookup tables (shore.nc4 group variables use mixed case). + return f"{prefix}_{var}".lower() def _read_var_da(self, nc_path: Path, group: str, var: str) -> xr.DataArray | None: """Open one variable as a time-indexed DataArray with dim 'time'. From 837b31bda2be7524622ea8535599a84cffc79f0a Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 11:41:41 -0700 Subject: [PATCH 4/8] Call _plot_log_file_boundaries() for the realtime sbd data plots. --- src/data/create_products.py | 13 +++++++++++-- src/data/process_lrauv_sbd.py | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/data/create_products.py b/src/data/create_products.py index 2d87fe8..5735584 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -656,8 +656,17 @@ def _log_file_distance_ranges(self, distnav: xr.DataArray) -> list[tuple[str, fl t_start = pd.Timestamp(parts[0]).tz_localize("UTC") t_end = pd.Timestamp(parts[1]).tz_localize("UTC") except Exception: # noqa: BLE001 - self.logger.debug("Could not parse start/end times from filename: %s", filename) - continue + # Filename doesn't encode a time range (e.g. shore_1S.nc). + # Fall back to reading time_coverage_* from the file itself. + try: + with xr.open_dataset(url) as ds_log: + t_start = pd.Timestamp(ds_log.attrs["time_coverage_start"]).tz_localize( + "UTC" + ) + t_end = pd.Timestamp(ds_log.attrs["time_coverage_end"]).tz_localize("UTC") + except Exception: # noqa: BLE001 + self.logger.debug("Could not determine time range for %s, skipping", filename) + continue if label in seen_starts: continue diff --git a/src/data/process_lrauv_sbd.py b/src/data/process_lrauv_sbd.py index d0156f4..ac5d39d 100755 --- a/src/data/process_lrauv_sbd.py +++ b/src/data/process_lrauv_sbd.py @@ -169,6 +169,7 @@ def main() -> None: verbose=args.verbose, ds=ds, log_file="sbd_placeholder.nc4", + nc_files=[str(p) for p in month_files], output_dir=output_dir, plot_name_stem=plot_stem, ) From a66495d80f1f70498debd2a5ed978d6fc3aa5a2d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 12:03:41 -0700 Subject: [PATCH 5/8] Write out .html files for the monthly realtime sbd plot images. --- src/data/create_products.py | 17 ++++ src/data/process_lrauv_sbd.py | 152 +++++++++++++++++++++------------- 2 files changed, 111 insertions(+), 58 deletions(-) diff --git a/src/data/create_products.py b/src/data/create_products.py index 5735584..5f0e114 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -838,6 +838,23 @@ def write_per_log_html(self) -> str | None: self.logger.info("Wrote per-log HTML to %s", html_path) return str(html_path) + def sbd_png_paths(self) -> list[Path]: + """Return paths of existing SBD monthly plot PNGs in output_dir. + + Used by process_lrauv_sbd.py to locate PNGs before passing them + to DeploymentPlotter._write_per_png_html(). + """ + if not self.output_dir or not self.plot_name_stem: + return [] + suffixes = ("_2column_cmocean", "_2column_planktivore", "_2column_engineering") + return [ + p + for suffix in suffixes + if ( + p := Path(self.output_dir) / f"{self.plot_name_stem}_{self.freq}{suffix}.png" + ).exists() + ] + def _plot_nighttime_indicator( # noqa: PLR0915, C901 self, fig: matplotlib.figure.Figure, diff --git a/src/data/process_lrauv_sbd.py b/src/data/process_lrauv_sbd.py index ac5d39d..4388615 100755 --- a/src/data/process_lrauv_sbd.py +++ b/src/data/process_lrauv_sbd.py @@ -90,6 +90,99 @@ def process_command_line() -> argparse.Namespace: return parser.parse_args() +def _make_products( + args: argparse.Namespace, start: datetime, end: datetime, out_paths: list +) -> None: + """Build plots and HTML index from the shore_1S.nc files produced by SbdExtract.""" + import importlib.util + + if importlib.util.find_spec("create_products") is None: + logger.warning("create_products not available — skipping plots") + return + + try: + from create_products import CreateProducts + except ImportError as e: + logger.warning("Cannot import create_products: %s — skipping plots", e) + return + + date_range = f"{start.strftime('%Y%m%d')}_{end.strftime('%Y%m%d')}" + mission = f"realtime/sbdlogs/{start.strftime('%Y')}/{date_range}" + + import xarray as xr + + output_dir = out_paths[0].parent.parent # YYYYMM directory + month_files = sorted(output_dir.rglob(f"shore_{FREQ}.nc")) + logger.info("Combining %d shore_%s.nc files from %s", len(month_files), FREQ, output_dir) + _GEO_COORDS = ("depth", "latitude", "longitude") + individual = [] + for p in month_files: + d = xr.open_dataset(p) + present = [c for c in _GEO_COORDS if c in d.coords] + if present: + d = d.reset_coords(present) + individual.append(d) + ds = xr.concat(individual, dim="time", join="outer") + ds = ds.sortby("time").drop_duplicates("time") + geo_present = [c for c in _GEO_COORDS if c in ds] + if geo_present: + ds = ds.set_coords(geo_present) + plot_stem = f"{args.auv_name}_{output_dir.name}_sbd" + cp = CreateProducts( + auv_name=args.auv_name, + mission=mission, + base_path=args.vehicle_dir, + freq="1S", + verbose=args.verbose, + ds=ds, + log_file="sbd_placeholder.nc4", + nc_files=[str(p) for p in month_files], + output_dir=output_dir, + plot_name_stem=plot_stem, + ) + cp.plot_2column() + cp.plot_planktivore_2column() + cp.plot_engineering_2column() + + png_paths = cp.sbd_png_paths() + if png_paths: + from lrauv_deployment_plots import DeploymentPlotter + from make_permalink import stoqs_url_from_ds + + dp = DeploymentPlotter() + dp.logger.setLevel(_LOG_LEVELS[min(args.verbose, 2)]) + + stoqs_url = None + try: + stoqs_url = stoqs_url_from_ds(ds, auv_name=args.auv_name) + except Exception as e: # noqa: BLE001 + logger.debug("Could not generate STOQS URL: %s", e) + + nc_file_strs = [str(p) for p in month_files] + html_paths = [] + for png_path in png_paths: + html_path = png_path.with_suffix(".html") + other_pngs = [str(p) for p in png_paths if p != png_path] + dp._write_per_png_html( + html_path=html_path, + title=f"{args.auv_name} {png_path.stem}", + png_name=png_path.name, + png_url="", + stoqs_url=stoqs_url, + nc_files=nc_file_strs, + auv_name=args.auv_name, + png_file_path=png_path, + other_png_paths=other_pngs, + ) + html_paths.append(html_path) + + dp._update_index_html( + output_dir, + html_paths, + deployment_name=f"{args.auv_name} {output_dir.name}", + ) + + def main() -> None: args = process_command_line() logger.setLevel(_LOG_LEVELS[min(args.verbose, 2)]) @@ -116,66 +209,9 @@ def main() -> None: logger.info("Skipping create_products (--noproducts specified)") return - # Produce quick-look plots via create_products - import importlib.util - - if importlib.util.find_spec("create_products") is None: - logger.warning("create_products not available — skipping plots") - return - - try: - from create_products import CreateProducts - except ImportError as e: - logger.warning("Cannot import create_products: %s — skipping plots", e) - return - - date_range = f"{start.strftime('%Y%m%d')}_{end.strftime('%Y%m%d')}" - mission = f"realtime/sbdlogs/{start.strftime('%Y')}/{date_range}" - logger.info("Creating products from %d shore_1S.nc files", len(out_paths)) try: - import xarray as xr - - # Combine all shore_1S.nc files in the YYYYMM directory — not just those - # touched by --start/--end — so the plot covers the full month. - # Some log periods may have no depth data, so depth/lat/lon may be absent - # as coordinates. xr.concat requires coordinates in all datasets, so - # demote them to data variables first, concat with join="outer" - # (NaN-fill absent variables), then promote back. - # Log periods can also overlap in time, so sort and deduplicate after. - # Pre-open as ds so _open_ds() is a no-op; log_file enables _is_lrauv(). - output_dir = out_paths[0].parent.parent # YYYYMM directory - month_files = sorted(output_dir.rglob(f"shore_{FREQ}.nc")) - logger.info("Combining %d shore_%s.nc files from %s", len(month_files), FREQ, output_dir) - _GEO_COORDS = ("depth", "latitude", "longitude") - individual = [] - for p in month_files: - d = xr.open_dataset(p) - present = [c for c in _GEO_COORDS if c in d.coords] - if present: - d = d.reset_coords(present) - individual.append(d) - ds = xr.concat(individual, dim="time", join="outer") - ds = ds.sortby("time").drop_duplicates("time") - geo_present = [c for c in _GEO_COORDS if c in ds] - if geo_present: - ds = ds.set_coords(geo_present) - plot_stem = f"{args.auv_name}_{output_dir.name}_sbd" - cp = CreateProducts( - auv_name=args.auv_name, - mission=mission, - base_path=args.vehicle_dir, - freq="1S", - verbose=args.verbose, - ds=ds, - log_file="sbd_placeholder.nc4", - nc_files=[str(p) for p in month_files], - output_dir=output_dir, - plot_name_stem=plot_stem, - ) - cp.plot_2column() - cp.plot_planktivore_2column() - cp.plot_engineering_2column() + _make_products(args, start, end, out_paths) except RuntimeError as e: logger.warning("create_products failed: %s", e) From 4db64121a53fc968ffdb7120cc987f939f4288d8 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 12:15:53 -0700 Subject: [PATCH 6/8] Update the titles to reflect that these are realtime data. --- src/data/create_products.py | 7 +++-- src/data/process_lrauv_sbd.py | 49 ++++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/data/create_products.py b/src/data/create_products.py index 5f0e114..e02b3d5 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -1470,7 +1470,10 @@ def _plot_track_map( # noqa: PLR0915 end_time = pd.to_datetime(times[-1]).strftime("%Y-%m-%d %H:%M:%S UTC") # Get title from netCDF attributes - if self._is_lrauv() and self.plot_name_stem: + if self._is_lrauv() and self.plot_name_stem and "realtime" in self.mission: + month_year = pd.to_datetime(times[0]).strftime("%B %Y") + title = f"Interpolated realtime SBD data for {self.auv_name} in {month_year}" + elif self._is_lrauv() and self.plot_name_stem: # Derive dlist path (vehicle/missionlogs/year/dlist_dir) from log_file lf_parts = Path(self.log_file).parts dlist_path = "/".join(lf_parts[:4]) if len(lf_parts) >= 4 else self.log_file # noqa: PLR2004 @@ -1628,7 +1631,7 @@ def _plot_track_map( # noqa: PLR0915 # Position text in figure coordinates, to the right of the colorbar. # Center the full block vertically on the map. - text_x = ref_pos.x0 + map_width + cbar_pad + cbar_width + 0.03 + text_x = ref_pos.x0 + map_width + cbar_pad + cbar_width + 0.05 text_y = map_y0 + map_height / 2 + total_text_height / 2 # Add title diff --git a/src/data/process_lrauv_sbd.py b/src/data/process_lrauv_sbd.py index 4388615..ea7f950 100755 --- a/src/data/process_lrauv_sbd.py +++ b/src/data/process_lrauv_sbd.py @@ -23,7 +23,9 @@ import logging import sys from datetime import UTC, datetime +from pathlib import Path +import xarray as xr from sbd2netcdf import FREQ, SbdExtract _LOG_LEVELS = (logging.WARN, logging.INFO, logging.DEBUG) @@ -90,6 +92,28 @@ def process_command_line() -> argparse.Namespace: return parser.parse_args() +def _concat_month_files(output_dir: "Path") -> "tuple[xr.Dataset, list[Path]]": + """Concatenate all shore_1S.nc files in output_dir into one sorted dataset.""" + import xarray as xr + + month_files = sorted(output_dir.rglob(f"shore_{FREQ}.nc")) + logger.info("Combining %d shore_%s.nc files from %s", len(month_files), FREQ, output_dir) + _GEO_COORDS = ("depth", "latitude", "longitude") + individual = [] + for p in month_files: + d = xr.open_dataset(p) + present = [c for c in _GEO_COORDS if c in d.coords] + if present: + d = d.reset_coords(present) + individual.append(d) + ds = xr.concat(individual, dim="time", join="outer") + ds = ds.sortby("time").drop_duplicates("time") + geo_present = [c for c in _GEO_COORDS if c in ds] + if geo_present: + ds = ds.set_coords(geo_present) + return ds, month_files + + def _make_products( args: argparse.Namespace, start: datetime, end: datetime, out_paths: list ) -> None: @@ -109,24 +133,8 @@ def _make_products( date_range = f"{start.strftime('%Y%m%d')}_{end.strftime('%Y%m%d')}" mission = f"realtime/sbdlogs/{start.strftime('%Y')}/{date_range}" - import xarray as xr - output_dir = out_paths[0].parent.parent # YYYYMM directory - month_files = sorted(output_dir.rglob(f"shore_{FREQ}.nc")) - logger.info("Combining %d shore_%s.nc files from %s", len(month_files), FREQ, output_dir) - _GEO_COORDS = ("depth", "latitude", "longitude") - individual = [] - for p in month_files: - d = xr.open_dataset(p) - present = [c for c in _GEO_COORDS if c in d.coords] - if present: - d = d.reset_coords(present) - individual.append(d) - ds = xr.concat(individual, dim="time", join="outer") - ds = ds.sortby("time").drop_duplicates("time") - geo_present = [c for c in _GEO_COORDS if c in ds] - if geo_present: - ds = ds.set_coords(geo_present) + ds, month_files = _concat_month_files(output_dir) plot_stem = f"{args.auv_name}_{output_dir.name}_sbd" cp = CreateProducts( auv_name=args.auv_name, @@ -158,6 +166,11 @@ def _make_products( except Exception as e: # noqa: BLE001 logger.debug("Could not generate STOQS URL: %s", e) + import pandas as pd + + month_year = pd.to_datetime(ds.cf["time"].to_numpy()[0]).strftime("%B %Y") + html_title = f"Interpolated realtime SBD data for {args.auv_name} in {month_year}" + nc_file_strs = [str(p) for p in month_files] html_paths = [] for png_path in png_paths: @@ -165,7 +178,7 @@ def _make_products( other_pngs = [str(p) for p in png_paths if p != png_path] dp._write_per_png_html( html_path=html_path, - title=f"{args.auv_name} {png_path.stem}", + title=html_title, png_name=png_path.name, png_url="", stoqs_url=stoqs_url, From fb2b7e61340d54357e9d4a2f28756e35b39e0972 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 13:51:49 -0700 Subject: [PATCH 7/8] Make per log file plots before the monthly organized ones. --- .vscode/launch.json | 6 ++++- src/data/lrauv_deployment_plots.py | 24 ++++++++++++++++++-- src/data/process_lrauv_sbd.py | 36 ++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 72ed7f7..16f9e9c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -496,7 +496,11 @@ //"args": ["--auv_name", "ahi", "--start", "20260317", "--end", "20260318", "-v", "--noproducts"] // ahi 47 CFE deployment April 2026 //"args": ["--auv_name", "ahi", "--start", "20260406", "--end", "20260412", "-v"] - "args": ["--auv_name", "ahi", "--start", "20260303", "--end", "20260331", "-v", "2", "--clobber"] + //"args": ["--auv_name", "ahi", "--start", "20260303", "--end", "20260331", "-v", "2", "--clobber"] + // Test a month with fewer plots to make + //"args": ["--auv_name", "daphne", "--start", "20260101", "--end", "20260131", "-v", "2", "--clobber"] + // Test pyxis + "args": ["--auv_name", "pyxis", "--start", "20260501", "--end", "20260531", "-v", "2", "--clobber"] }, { "name": "lrauv_deployment_plots", diff --git a/src/data/lrauv_deployment_plots.py b/src/data/lrauv_deployment_plots.py index f64b92b..683faf8 100755 --- a/src/data/lrauv_deployment_plots.py +++ b/src/data/lrauv_deployment_plots.py @@ -1021,7 +1021,19 @@ def _write_per_png_html( # noqa: C901, PLR0913 } def _png_urls_for_nc(self, nc_url: str) -> list[str]: - """Return web-accessible PNG URLs for all plot kinds for one OPeNDAP nc URL.""" + """Return web PNG URLs for all plot kinds for one nc path or OPeNDAP URL.""" + if not nc_url.startswith("http"): + nc_path = Path(nc_url) + for local_base in (Path(LRAUV_VOL), BASE_LRAUV_PATH): + try: + rel = nc_path.relative_to(local_base) + base = BASE_LRAUV_WEB.rstrip("/") + "/" + str(rel)[: -len(".nc")] + return [f"{base}_{kind}.png" for kind in self._PLOT_KINDS] + except ValueError: + continue + # Unrecognised local path — fall back to local file paths + base = nc_url[: -len(".nc")] + return [f"{base}_{kind}.png" for kind in self._PLOT_KINDS] rel = nc_url.replace(LRAUV_OPENDAP_BASE.rstrip("/") + "/", "") base = BASE_LRAUV_WEB.rstrip("/") + "/" + rel[: -len(".nc")] return [f"{base}_{kind}.png" for kind in self._PLOT_KINDS] @@ -1034,7 +1046,15 @@ def _plot_label(self, path_str: str) -> str: return Path(path_str).stem def _url_exists(self, url: str) -> bool: - """Return True if the URL responds with HTTP 200 to a HEAD request.""" + """Return True if the resource exists (local check for BASE_LRAUV_WEB URLs).""" + if url.startswith(BASE_LRAUV_WEB): + rel = url[len(BASE_LRAUV_WEB.rstrip("/")) + 1 :] + for local_base in (Path(LRAUV_VOL), BASE_LRAUV_PATH): + if (local_base / rel).exists(): + return True + return False + if not url.startswith("http"): + return Path(url).exists() try: req = urllib.request.Request(url, method="HEAD") # noqa: S310 with urllib.request.urlopen(req, timeout=5) as resp: # noqa: S310 diff --git a/src/data/process_lrauv_sbd.py b/src/data/process_lrauv_sbd.py index ea7f950..91aa5b2 100755 --- a/src/data/process_lrauv_sbd.py +++ b/src/data/process_lrauv_sbd.py @@ -114,6 +114,40 @@ def _concat_month_files(output_dir: "Path") -> "tuple[xr.Dataset, list[Path]]": return ds, month_files +def _make_per_log_plots( + args: argparse.Namespace, + month_files: list, + mission: str, + CreateProducts: type, # noqa: N803 +) -> None: + """Create per-log plots for each shore_1S.nc in month_files.""" + for p in month_files: + out_png = p.parent / f"shore_{FREQ}_2column_cmocean.png" + if out_png.exists() and not args.clobber: + logger.info("Skipping per-log plot (exists): %s", out_png) + continue + logger.info("Creating per-log plots for %s", p.name) + try: + with xr.open_dataset(p) as ds_log: + cp = CreateProducts( + auv_name=args.auv_name, + mission=mission, + base_path=args.vehicle_dir, + freq=FREQ, + verbose=args.verbose, + ds=ds_log, + log_file=str(p), + nc_files=[str(p)], + output_dir=p.parent, + plot_name_stem="shore", + ) + cp.plot_2column() + cp.plot_planktivore_2column() + cp.plot_engineering_2column() + except Exception as e: # noqa: BLE001 + logger.warning("Per-log plot failed for %s: %s", p.name, e) + + def _make_products( args: argparse.Namespace, start: datetime, end: datetime, out_paths: list ) -> None: @@ -148,6 +182,8 @@ def _make_products( output_dir=output_dir, plot_name_stem=plot_stem, ) + _make_per_log_plots(args, month_files, mission, CreateProducts) + cp.plot_2column() cp.plot_planktivore_2column() cp.plot_engineering_2column() From ffd7c535804506185e24c6fb647b8fd4f00d5421 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 18 May 2026 15:45:17 -0700 Subject: [PATCH 8/8] Enhance process_lrauv_sbd.py with multi-vehicle support, freshness checks, and pyxis CBIT variable Add --current_month, --previous_month, --last_n_days date modes (mutually exclusive with --start); --auv_name now accepts multiple names and defaults to all known LRAUVs via ALL_LRAUV_NAMES in common_args.py; main() loops over vehicles Skip shore_1S.nc, per-log plots, and monthly plots when outputs are newer than their source files; --clobber bypasses all freshness checks Add CBIT/ampHoursUsed to SBD_PARMS; substitute cbit_amphoursused for elevator angle in the engineering plot for pyxis realtime only; add "Ah Used" short label; fix _resolve_label to read units from dataset attrs when no long_name is present Write shore_1S.nc as NETCDF4_CLASSIC to match the delayed-mode pipeline Use vehicle-relative paths in log messages; remove plt.show() from plot methods to prevent macOS backend timer crash; demote several per-variable warnings to debug --- .vscode/launch.json | 4 +- src/data/common_args.py | 17 ++++ src/data/create_products.py | 32 ++++--- src/data/process_lrauv_sbd.py | 155 ++++++++++++++++++++++++++-------- src/data/sbd2netcdf.py | 32 +++++-- 5 files changed, 186 insertions(+), 54 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 16f9e9c..173d2ff 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -500,7 +500,9 @@ // Test a month with fewer plots to make //"args": ["--auv_name", "daphne", "--start", "20260101", "--end", "20260131", "-v", "2", "--clobber"] // Test pyxis - "args": ["--auv_name", "pyxis", "--start", "20260501", "--end", "20260531", "-v", "2", "--clobber"] + "args": ["--auv_name", "pyxis", "--start", "20260501", "--end", "20260531", "-v", "1", "--clobber"] + // Test --current_month + //"args": ["--current_month", "-v", "1"] }, { "name": "lrauv_deployment_plots", diff --git a/src/data/common_args.py b/src/data/common_args.py index 12cc853..4767b9f 100644 --- a/src/data/common_args.py +++ b/src/data/common_args.py @@ -13,6 +13,23 @@ DEFAULT_FREQ = "1S" # 1 Hz resampling frequency DEFAULT_MF_WIDTH = 3 # Median filter width +ALL_LRAUV_NAMES = ( + "ahi", + "aku", + "brezo", + "brizo", + "daphne", + "galene", + "makai", + "opah", + "polaris", + "pontus", + "pyxis", + "tethys", + "triton", + "whoidhs", +) + class CommonArgumentParser: """Shared argument parser factory for all AUV processing modules.""" diff --git a/src/data/create_products.py b/src/data/create_products.py index e02b3d5..ef3096f 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -265,6 +265,7 @@ def __init__( # noqa: PLR0913 "universals_platform_speed_wrt_sea_water": "Speed", "buoyancyservo_platform_buoyancy_position": "Buoyancy", "elevatorservo_platform_elevator_angle": "Elevator", + "cbit_amphoursused": "Ah Used", "massservo_platform_mass_position": "Mass Position", "rudderservo_platform_rudder_angle": "Rudder", # ── LRAUV WetLabs BB2FL ─────────────────────────────────────────────── @@ -469,7 +470,7 @@ def _compute_density_lrauv(self) -> None: sal_var = "ctdseabird_sea_water_salinity" if temp_var not in self.ds or sal_var not in self.ds: - self.logger.warning( + self.logger.debug( "Cannot compute density: %s or %s not in dataset", temp_var, sal_var, @@ -612,6 +613,7 @@ def _get_planktivore_plot_variables(self) -> list: def _get_lrauv_engineering_plot_variables(self) -> list: """Get LRAUV engineering/platform variables for plot_engineering_2column().""" + is_pyxis_realtime = self.auv_name == "pyxis" and self.mission and "realtime" in self.mission return [ ("bpc1_platform_battery_charge", "linear"), ("bpc1_platform_battery_voltage", "linear"), @@ -620,7 +622,12 @@ def _get_lrauv_engineering_plot_variables(self) -> list: ("universals_platform_yaw_angle", "linear"), ("universals_platform_roll_angle", "linear"), ("buoyancyservo_platform_buoyancy_position", "linear"), - ("elevatorservo_platform_elevator_angle", "linear"), + ( + "cbit_amphoursused" + if is_pyxis_realtime + else "elevatorservo_platform_elevator_angle", + "linear", + ), ("massservo_platform_mass_position", "linear"), ] @@ -1111,7 +1118,7 @@ def _get_bathymetry(self, lons: np.ndarray, lats: np.ndarray) -> np.ndarray: # Otherwise fall back to global grids points = pd.DataFrame({"lon": lons, "lat": lats}).dropna() if len(pd.DataFrame({"lon": lons, "lat": lats})) != len(points): - self.logger.warning( + self.logger.debug( "Some lon/lat points have NaNs, these will be skipped for bathymetry retrieval" ) @@ -1777,6 +1784,9 @@ def _resolve_label(self, var: str) -> tuple[str, str, str]: else: units = "" color_map_name = self._get_colormap_name(var) + # If lookup tables have no units, try the dataset attrs directly + if not units and var in self.ds: + units = self.ds[var].attrs.get("units", "") if var in self.variable_plot_conversions: units = self.variable_plot_conversions[var][1] @@ -1965,7 +1975,7 @@ def _plot_var_scatter( # noqa: C901, PLR0912, PLR0913, PLR0915 # Check if variable exists and has valid data no_data = False if var not in self.ds: - self.logger.warning("%s not in dataset", var) + self.logger.debug("%s not in dataset", var) no_data = True else: if scale == "log": @@ -2615,7 +2625,7 @@ def plot_2column(self) -> str: # noqa: C901, PLR0912, PLR0915 # Use a quick pre-check with LRAUV or Dorado variables (excluding computed 'density') plot_variables = self._get_plot_variables(None if self._is_lrauv() else "ctd1") if not any(var in self.ds for var, _ in plot_variables if var != "density"): - self.logger.warning( + self.logger.debug( "No plot variables found in dataset, skipping plot_2column", ) return None @@ -2745,7 +2755,7 @@ def plot_2column(self) -> str: # noqa: C901, PLR0912, PLR0915 va="bottom", ) plt.savefig(output_file, dpi=100, bbox_inches="tight") - plt.show() + plt.close(fig) self.logger.info("Saved 2column plot to %s", output_file) @@ -2898,7 +2908,7 @@ def plot_biolume_2column(self) -> str: # noqa: C901, PLR0912, PLR0915 va="bottom", ) plt.savefig(output_file, dpi=100, bbox_inches="tight") - plt.show() + plt.close(fig) self.logger.info("Saved biolume 2column plot to %s", output_file) @@ -2930,7 +2940,7 @@ def plot_planktivore_2column(self) -> str: # noqa: C901, PLR0912, PLR0915 if v.startswith("backseat_planktivore_") ] if not any(var in self.ds for var in planktivore_vars): - self.logger.warning( + self.logger.debug( "No backseat_planktivore plot variables found in dataset, " "skipping plot_planktivore_2column", ) @@ -3054,7 +3064,7 @@ def plot_planktivore_2column(self) -> str: # noqa: C901, PLR0912, PLR0915 va="bottom", ) plt.savefig(output_file, dpi=100, bbox_inches="tight") - plt.show() + plt.close(fig) self.logger.info("Saved planktivore 2column plot to %s", output_file) @@ -3116,7 +3126,7 @@ def plot_engineering_2column(self) -> str: # noqa: C901, PLR0912, PLR0915 for var, scale in plot_variables: self.logger.info("Plotting %s...", var) if var not in self.ds: - self.logger.warning("%s not in dataset, plotting with no data", var) + self.logger.debug("%s not in dataset, plotting with no data", var) self._plot_var( var, @@ -3166,7 +3176,7 @@ def plot_engineering_2column(self) -> str: # noqa: C901, PLR0912, PLR0915 va="bottom", ) plt.savefig(output_file, dpi=100, bbox_inches="tight") - plt.show() + plt.close(fig) self.logger.info("Saved engineering 2column plot to %s", output_file) diff --git a/src/data/process_lrauv_sbd.py b/src/data/process_lrauv_sbd.py index 91aa5b2..23a9e73 100755 --- a/src/data/process_lrauv_sbd.py +++ b/src/data/process_lrauv_sbd.py @@ -12,8 +12,17 @@ __sbd_1S_2column_planktivore.png (if applicable) Usage: - uv run src/data/process_lrauv_sbd.py --auv_name ahi \\ - --start 20260406 --end 20260412 -v + # All vehicles for the current month: + uv run src/data/process_lrauv_sbd.py --current_month -v + + # All vehicles for the previous month: + uv run src/data/process_lrauv_sbd.py --previous_month -v + + # Last 7 days for a single vehicle: + uv run src/data/process_lrauv_sbd.py --last_n_days 7 --auv_name ahi -v + + # Explicit date range for a single vehicle: + uv run src/data/process_lrauv_sbd.py --start 20260406 --end 20260412 --auv_name ahi -v """ __author__ = "Mike McCann" @@ -22,10 +31,11 @@ import argparse import logging import sys -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta from pathlib import Path import xarray as xr +from common_args import ALL_LRAUV_NAMES from sbd2netcdf import FREQ, SbdExtract _LOG_LEVELS = (logging.WARN, logging.INFO, logging.DEBUG) @@ -48,21 +58,52 @@ def _parse_dt(s: str) -> datetime: raise ValueError(msg) +def _rel(path: Path, base: str) -> Path: + """Return path relative to base, falling back to the full path.""" + try: + return path.relative_to(base) + except ValueError: + return path + + def process_command_line() -> argparse.Namespace: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) - parser.add_argument("--auv_name", required=True, help="AUV name, e.g. ahi, tethys") parser.add_argument( + "--auv_name", + nargs="+", + default=list(ALL_LRAUV_NAMES), + metavar="NAME", + help="One or more AUV names (default: all known LRAUVs)", + ) + date_mode = parser.add_mutually_exclusive_group(required=True) + date_mode.add_argument( + "--current_month", + action="store_true", + help="Process from the first of the current month through today", + ) + date_mode.add_argument( + "--previous_month", + action="store_true", + help="Process the entire previous calendar month", + ) + date_mode.add_argument( + "--last_n_days", + type=int, + metavar="N", + help="Process the last N days", + ) + date_mode.add_argument( "--start", - required=True, - help="Start datetime: YYYYMMDDTHHMMSS or YYYYMMDD", + metavar="YYYYMMDD", + help="Start datetime: YYYYMMDDTHHMMSS or YYYYMMDD (use with --end)", ) parser.add_argument( "--end", - required=True, - help="End datetime: YYYYMMDDTHHMMSS or YYYYMMDD", + metavar="YYYYMMDD", + help="End datetime: YYYYMMDDTHHMMSS or YYYYMMDD (only used with --start)", ) parser.add_argument( "--vehicle_dir", @@ -89,7 +130,32 @@ def process_command_line() -> argparse.Namespace: nargs="?", help="Verbosity level: 0=WARN (default), 1=INFO, 2=DEBUG", ) - return parser.parse_args() + args = parser.parse_args() + if args.start and not args.end: + parser.error("--end is required when --start is used") + return args + + +def _resolve_date_range(args: argparse.Namespace) -> tuple[datetime, datetime]: + """Return (start, end) UTC datetimes from the active date-mode flag.""" + now = datetime.now(tz=UTC) + if args.current_month: + start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + end = now + elif args.previous_month: + first_of_this = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + last_of_prev = first_of_this - timedelta(days=1) + start = last_of_prev.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + end = last_of_prev.replace(hour=23, minute=59, second=59, microsecond=0) + elif args.last_n_days is not None: + start = (now - timedelta(days=args.last_n_days)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + end = now + else: + start = _parse_dt(args.start) + end = _parse_dt(args.end) + return start, end def _concat_month_files(output_dir: "Path") -> "tuple[xr.Dataset, list[Path]]": @@ -124,9 +190,14 @@ def _make_per_log_plots( for p in month_files: out_png = p.parent / f"shore_{FREQ}_2column_cmocean.png" if out_png.exists() and not args.clobber: - logger.info("Skipping per-log plot (exists): %s", out_png) - continue - logger.info("Creating per-log plots for %s", p.name) + if p.stat().st_mtime <= out_png.stat().st_mtime: + logger.info( + "Per-log plot up to date, skipping: %s", _rel(out_png, args.vehicle_dir) + ) + continue + logger.info("shore_1S.nc is newer — replotting: %s", _rel(p, args.vehicle_dir)) + else: + logger.info("Creating per-log plots for %s", _rel(p, args.vehicle_dir)) try: with xr.open_dataset(p) as ds_log: cp = CreateProducts( @@ -169,6 +240,14 @@ def _make_products( output_dir = out_paths[0].parent.parent # YYYYMM directory ds, month_files = _concat_month_files(output_dir) + + monthly_png = output_dir / f"{args.auv_name}_{output_dir.name}_sbd_{FREQ}_2column_cmocean.png" + if monthly_png.exists() and not args.clobber: + newest_nc = max(p.stat().st_mtime for p in month_files) + if newest_nc <= monthly_png.stat().st_mtime: + logger.info("Monthly plots up to date for %s — skipping", output_dir.name) + return + plot_stem = f"{args.auv_name}_{output_dir.name}_sbd" cp = CreateProducts( auv_name=args.auv_name, @@ -236,33 +315,37 @@ def main() -> None: args = process_command_line() logger.setLevel(_LOG_LEVELS[min(args.verbose, 2)]) - start = _parse_dt(args.start) - end = _parse_dt(args.end) - - extractor = SbdExtract( - auv_name=args.auv_name, - start=start, - end=end, - vehicle_dir=args.vehicle_dir, - verbose=args.verbose, - clobber=args.clobber, - commandline=" ".join(sys.argv), - ) + start, end = _resolve_date_range(args) + logger.info("Date range: %s → %s", start.date(), end.date()) + + for auv_name in args.auv_name: + logger.info("Processing %s", auv_name) + vehicle_args = argparse.Namespace(**{**vars(args), "auv_name": auv_name}) + + extractor = SbdExtract( + auv_name=auv_name, + start=start, + end=end, + vehicle_dir=args.vehicle_dir, + verbose=args.verbose, + clobber=args.clobber, + commandline=" ".join(sys.argv), + ) - out_paths = extractor.process() - if not out_paths: - logger.error("No data produced — check that shore.nc4 files exist for this range.") - sys.exit(1) + out_paths = extractor.process() + if not out_paths: + logger.warning("No data for %s — skipping", auv_name) + continue - if args.noproducts: - logger.info("Skipping create_products (--noproducts specified)") - return + if args.noproducts: + logger.info("Skipping create_products (--noproducts specified)") + continue - logger.info("Creating products from %d shore_1S.nc files", len(out_paths)) - try: - _make_products(args, start, end, out_paths) - except RuntimeError as e: - logger.warning("create_products failed: %s", e) + logger.info("Creating products from %d shore_1S.nc files for %s", len(out_paths), auv_name) + try: + _make_products(vehicle_args, start, end, out_paths) + except RuntimeError as e: + logger.warning("create_products failed for %s: %s", auv_name, e) if __name__ == "__main__": diff --git a/src/data/sbd2netcdf.py b/src/data/sbd2netcdf.py index 97decec..ccba3f8 100755 --- a/src/data/sbd2netcdf.py +++ b/src/data/sbd2netcdf.py @@ -68,6 +68,9 @@ "PeakPlanktivoreHMavgROI", "PeakPlanktivoreHMavgROIDepth", ], + "CBIT": [ + "ampHoursUsed", + ], } # Root-level non-coordinate variables: variable name → output prefix @@ -89,6 +92,7 @@ "WetLabsBB2FL": "wetlabsbb2fl", "_": "backseat", "Science": "science", + "CBIT": "cbit", } @@ -124,6 +128,13 @@ def __init__( # noqa: PLR0913 self.commandline = commandline self.logger.setLevel(self._log_levels[min(verbose, 2)]) + def _rel(self, path: Path) -> Path: + """Return path relative to vehicle_dir, falling back to the full path.""" + try: + return path.relative_to(self.vehicle_dir) + except ValueError: + return path + def sbd_file_list(self) -> list[Path]: """Return sorted list of shore.nc4 paths whose directory timestamp is in [start, end].""" sbd_root = Path(self.vehicle_dir) / self.auv_name / "realtime" / "sbdlogs" @@ -219,6 +230,16 @@ def _read_var_da(self, nc_path: Path, group: str, var: str) -> xr.DataArray | No # interp() requires a unique, sorted time index. return da.drop_duplicates("time").sortby("time") + def _is_output_fresh(self, src: Path, out: Path) -> bool: + """Return True if out exists, is not clobbered, and is newer than src.""" + if not out.exists() or self.clobber: + return False + if src.stat().st_mtime <= out.stat().st_mtime: + self.logger.info("Up to date, skipping: %s", self._rel(out)) + return True + self.logger.info("%s is newer — reprocessing: %s", self._rel(src), self._rel(out)) + return False + def process_one_file(self, nc_path: Path) -> Path | None: """Process one shore.nc4 → write shore_{FREQ}.nc in the same directory. @@ -227,11 +248,10 @@ def process_one_file(self, nc_path: Path) -> Path | None: dataset has a single shared 'time' dimension. """ out_path = nc_path.parent / f"shore_{FREQ}.nc" - if out_path.exists() and not self.clobber: - self.logger.info("Already exists, skipping: %s", out_path) + if self._is_output_fresh(nc_path, out_path): return out_path - self.logger.info("Reading %s", nc_path) + self.logger.info("Reading %s", self._rel(nc_path)) # Collect a DataArray per output variable, each with its own time axis. data_arrays: dict[str, xr.DataArray] = {} @@ -242,7 +262,7 @@ def process_one_file(self, nc_path: Path) -> Path | None: data_arrays[self._output_var_name(group, var)] = da if not data_arrays: - self.logger.warning("No variables extracted from %s", nc_path.name) + self.logger.warning("No variables extracted from %s", self._rel(nc_path)) return None # Build a common 1S grid that spans all individually-timed variables. @@ -275,8 +295,8 @@ def process_one_file(self, nc_path: Path) -> Path | None: if profile_da is not None: ds["profile_number"] = profile_da ds.attrs.update(self._global_metadata(ds)) - self.logger.info("Writing %s", out_path) - ds.to_netcdf(out_path) + self.logger.info("Writing %s", self._rel(out_path)) + ds.to_netcdf(out_path, format="NETCDF4_CLASSIC") return out_path def _global_metadata(self, ds: xr.Dataset) -> dict: