diff --git a/README.md b/README.md index 31b0259..d9bcf39 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,42 @@ your environment will be set up. ## Usage +At present, most functionality is accessed via the OpenVBI Python API, examples of +which can be found in the [examples](openvbi/examples) folder. + +There is a simple command line tool called `vbi`: +```shell +vbi --help +Usage: vbi [OPTIONS] COMMAND [ARGS]... + +Options: + --version Show the version and exit. + --help Show this message and exit. + +Commands: + dump Dump logger data file to ASCII format for debugging. +``` + +The `dump` command allows logger data files to be written as ASCII to the screen or to a file. To dump +a file to the screen, run: +```shell +vbi dump tests/data/00000029.TSV.lzma - +Packet 0: name: RMC + _data: {'Fields': {'timestamp': 123458, 'status': 'A', 'lat': 5048.614, 'lat_dir': 'N', 'lon': 108.632, 'lon_dir': 'W', 'spd_over_grnd': 0, 'true_course': 119.9, 'datestamp': 260822, 'mag_variation': 3.4, 'mag_var_dir': 'W'}, 'Talker': 'GP', 'Formatter': 'RMC'} +Packet 1: name: RMB + _data: {'Fields': {'status': 'A', 'cross_track_error': '', 'cte_correction_dir': '', 'origin_waypoint_id': '', 'dest_waypoint_id': '', 'dest_lat': '', 'dest_lat_dir': '', 'dest_lon': '', 'dest_lon_dir': '', 'dest_range': '', 'dest_true_bearing': '', 'dest_velocity': '', 'arrival_alarm': 'V'}, 'Talker': 'GP', 'Formatter': 'RMB'} +Packet 2: name: GLL + _data: {'Fields': {'lat': 5048.614, 'lat_dir': 'N', 'lon': 108.632, 'lon_dir': 'W', 'timestamp': 123458, 'status': 'A'}, 'Talker': 'GP', 'Formatter': 'GLL'} +... +... +... +``` + +To output to a file, specify the output file name instead of '-' as the last argument: +```shell +vbi dump tests/data/00000029.TSV.lzma 00000029.txt +``` + ## Testing Create a virtual environment with test dependencies: ```shell diff --git a/docs/dev/TODO.md b/docs/dev/TODO.md new file mode 100644 index 0000000..c04cf78 --- /dev/null +++ b/docs/dev/TODO.md @@ -0,0 +1,719 @@ +# Plans for future development of OpenVBI + +## Adding support for speed over ground + +### NMEA 0183 +Support for the following NMEA 0138 sentences would be necessary for providing SOG from logger files (see +[here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea0183_sentence_formatters.json#L857C9-L905C11)): +```json +{ + "RMA": { + "Fields": [ + { + "Id": "data_status", + "Description": "Data status" + }, + { + "Id": "lat", + "Description": "Latitude" + }, + { + "Id": "lat_dir", + "Description": "Latitude Direction" + }, + { + "Id": "lon", + "Description": "Longitude" + }, + { + "Id": "lon_dir", + "Description": "Longitude Direction" + }, + { + "Id": "not_used_1", + "Description": "Not Used 1" + }, + { + "Id": "not_used_2", + "Description": "Not Used 2" + }, + { + "Id": "spd_over_grnd", + "Description": "Speed over ground" + }, + { + "Id": "crse_over_grnd", + "Description": "Course over ground" + }, + { + "Id": "variation", + "Description": "Variation" + }, + { + "Id": "var_dir", + "Description": "Variation Direction" + } + ], + "Description": "" + } +} +``` + +Also for SOG +(see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea0183_sentence_formatters.json#L963C9-L1019C11)): +```json +{ + "RMC": { + "Fields": [ + { + "Id": "timestamp", + "Description": "Timestamp" + }, + { + "Id": "status", + "Description": "Status" + }, + { + "Id": "lat", + "Description": "Latitude" + }, + { + "Id": "lat_dir", + "Description": "Latitude Direction" + }, + { + "Id": "lon", + "Description": "Longitude" + }, + { + "Id": "lon_dir", + "Description": "Longitude Direction" + }, + { + "Id": "spd_over_grnd", + "Description": "Speed Over Ground" + }, + { + "Id": "true_course", + "Description": "True Course" + }, + { + "Id": "datestamp", + "Description": "Datestamp" + }, + { + "Id": "mag_variation", + "Description": "Magnetic Variation" + }, + { + "Id": "mag_var_dir", + "Description": "Magnetic Variation Direction" + }, + { + "Id": "mode_indicator", + "Description": "Mode Indicator" + }, + { + "Id": "nav_status", + "Description": "Navigational Status" + } + ], + "Description": "Recommended Minimum Specific GPS/TRANSIT Data" + } +} +``` + +### NMEA 2000 +Support for the following NMEA 2000 PGNs would be necessary for providing SOG from logger files +(see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea2000_pgn_specifications.json#L12108C7-L12178C31) +for more info): +```json + { + "PGN": 129026, + "Id": "cogSogRapidUpdate", + "Description": "COG & SOG, Rapid Update", + "Type": "Single", + "Complete": true, + "Length": 8, + "RepeatingFields": 0, + "Fields": [ + { + "Order": 1, + "Id": "sid", + "Name": "SID", + "BitLength": 8, + "BitOffset": 0, + "BitStart": 0, + "Signed": false}, + { + "Order": 2, + "Id": "cogReference", + "Name": "COG Reference", + "BitLength": 2, + "BitOffset": 8, + "BitStart": 0, + "Type": "Lookup table", + "Signed": false, + "EnumValues": [ + {"name": "True", "value": "0"}, + {"name": "Magnetic", "value": "1"}, + {"name": "Error", "value": "2"}, + {"name": "Null", "value": "3"}]}, + { + "Order": 3, + "Id": "reserved", + "Name": "Reserved", + "Description": "Reserved", + "BitLength": 6, + "BitOffset": 10, + "BitStart": 2, + "Type": "Binary data", + "Signed": false}, + { + "Order": 4, + "Id": "cog", + "Name": "COG", + "BitLength": 16, + "BitOffset": 16, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 5, + "Id": "sog", + "Name": "SOG", + "BitLength": 16, + "BitOffset": 32, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}, + { + "Order": 6, + "Id": "reserved", + "Name": "Reserved", + "Description": "Reserved", + "BitLength": 16, + "BitOffset": 48, + "BitStart": 0, + "Type": "Binary data", + "Signed": false}]}, +``` + +Also for SOG (see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea2000_pgn_specifications.json#L22404C7-L22524C31) +for more info): +```json + { + "PGN": 130577, + "Id": "directionData", + "Description": "Direction Data", + "Type": "Fast", + "Complete": false, + "Missing": [ + "Fields", + "FieldLengths", + "Precision", + "SampleData"], + "Length": 14, + "RepeatingFields": 0, + "Fields": [ + { + "Order": 1, + "Id": "dataMode", + "Name": "Data Mode", + "BitLength": 4, + "BitOffset": 0, + "BitStart": 0, + "Type": "Lookup table", + "Signed": false, + "EnumValues": [ + {"name": "Autonomous", "value": "0"}, + {"name": "Differential enhanced", "value": "1"}, + {"name": "Estimated", "value": "2"}, + {"name": "Simulator", "value": "3"}, + {"name": "Manual", "value": "4"}]}, + { + "Order": 2, + "Id": "cogReference", + "Name": "COG Reference", + "BitLength": 2, + "BitOffset": 4, + "BitStart": 4, + "Type": "Lookup table", + "Signed": false, + "EnumValues": [ + {"name": "True", "value": "0"}, + {"name": "Magnetic", "value": "1"}, + {"name": "Error", "value": "2"}, + {"name": "Null", "value": "3"}]}, + { + "Order": 3, + "Id": "reserved", + "Name": "Reserved", + "Description": "Reserved", + "BitLength": 2, + "BitOffset": 6, + "BitStart": 6, + "Type": "Binary data", + "Signed": false}, + { + "Order": 4, + "Id": "sid", + "Name": "SID", + "BitLength": 8, + "BitOffset": 8, + "BitStart": 0, + "Signed": false}, + { + "Order": 5, + "Id": "cog", + "Name": "COG", + "BitLength": 16, + "BitOffset": 16, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 6, + "Id": "sog", + "Name": "SOG", + "BitLength": 16, + "BitOffset": 32, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}, + { + "Order": 7, + "Id": "heading", + "Name": "Heading", + "BitLength": 16, + "BitOffset": 48, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 8, + "Id": "speedThroughWater", + "Name": "Speed through Water", + "BitLength": 16, + "BitOffset": 64, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}, + { + "Order": 9, + "Id": "set", + "Name": "Set", + "BitLength": 16, + "BitOffset": 80, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 10, + "Id": "drift", + "Name": "Drift", + "BitLength": 16, + "BitOffset": 96, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}]}, +``` + +## Adding support for wind data + +### NMEA 0183 +Support for the following NMEA 0138 sentences would be necessary for providing wind data from logger files +(see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea0183_sentence_formatters.json#L1361C9-L1422C11)): +```json +{ + "MWD": { + "Fields": [ + { + "Id": "direction_true", + "Description": "Wind direction true" + }, + { + "Id": "true", + "Description": "True" + }, + { + "Id": "direction_magnetic", + "Description": "Wind direction magnetic" + }, + { + "Id": "magnetic", + "Description": "Magnetic" + }, + { + "Id": "wind_speed_knots", + "Description": "Wind speed knots" + }, + { + "Id": "knots", + "Description": "Knots" + }, + { + "Id": "wind_speed_meters", + "Description": "Wind speed meters/second" + }, + { + "Id": "meters", + "Description": "Wind speed" + } + ], + "Description": "Wind Direction\n NMEA 0183 standard Wind Direction and Speed, with respect to north." + }, + "MWV": { + "Fields": [ + { + "Id": "wind_angle", + "Description": "Wind angle" + }, + { + "Id": "reference", + "Description": "Reference" + }, + { + "Id": "wind_speed", + "Description": "Wind speed" + }, + { + "Id": "wind_speed_units", + "Description": "Wind speed units" + }, + { + "Id": "status", + "Description": "Status" + } + ], + "Description": "Wind Speed and Angle\n NMEA 0183 standard Wind Speed and Angle, in relation to the vessel's\n bow/centerline." + } +} +``` + +Also need heading and speed through water to go from apparent to magnetic/true wind direction +(see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea0183_sentence_formatters.json#L1478C9-L1514C11) +for more info): +```json + { + "VHW": { + "Fields": [ + { + "Id": "heading_true", + "Description": "Heading true degrees" + }, + { + "Id": "true", + "Description": "heading true" + }, + { + "Id": "heading_magnetic", + "Description": "Heading Magnetic degrees" + }, + { + "Id": "magnetic", + "Description": "Magnetic" + }, + { + "Id": "water_speed_knots", + "Description": "Water speed knots" + }, + { + "Id": "knots", + "Description": "Knots" + }, + { + "Id": "water_speed_km", + "Description": "Water speed kilometers" + }, + { + "Id": "kilometers", + "Description": "Kilometers" + } + ], + "Description": "Water Speed and Heading" + } +} +``` + +### NMEA 2000 +Support for the following NMEA 2000 PGNs would be necessary for providing wind data from logger files +(see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea2000_pgn_specifications.json#L19739C7-L19799C31) +for more info): +```json + { + "PGN": 130306, + "Id": "windData", + "Description": "Wind Data", + "Type": "Single", + "Complete": true, + "Length": 8, + "RepeatingFields": 0, + "Fields": [ + { + "Order": 1, + "Id": "sid", + "Name": "SID", + "BitLength": 8, + "BitOffset": 0, + "BitStart": 0, + "Signed": false}, + { + "Order": 2, + "Id": "windSpeed", + "Name": "Wind Speed", + "BitLength": 16, + "BitOffset": 8, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}, + { + "Order": 3, + "Id": "windAngle", + "Name": "Wind Angle", + "BitLength": 16, + "BitOffset": 24, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 4, + "Id": "reference", + "Name": "Reference", + "BitLength": 3, + "BitOffset": 40, + "BitStart": 0, + "Type": "Lookup table", + "Signed": false, + "EnumValues": [ + {"name": "True (ground referenced to North)", "value": "0"}, + {"name": "Magnetic (ground referenced to Magnetic North)", "value": "1"}, + {"name": "Apparent", "value": "2"}, + {"name": "True (boat referenced)", "value": "3"}, + {"name": "True (water referenced)", "value": "4"}]}, + { + "Order": 5, + "Id": "reserved", + "Name": "Reserved", + "BitLength": 21, + "BitOffset": 43, + "BitStart": 3, + "Type": "Binary data", + "Signed": false}]}, +``` + +Also need heading and speed through water to go from apparent to magnetic/true wind direction: +Heading (see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea2000_pgn_specifications.json#L8461C7-L8531C31) +for more information): +```json + + { + "PGN": 127250, + "Id": "vesselHeading", + "Description": "Vessel Heading", + "Type": "Single", + "Complete": true, + "Length": 8, + "RepeatingFields": 0, + "Fields": [ + { + "Order": 1, + "Id": "sid", + "Name": "SID", + "BitLength": 8, + "BitOffset": 0, + "BitStart": 0, + "Signed": false}, + { + "Order": 2, + "Id": "heading", + "Name": "Heading", + "BitLength": 16, + "BitOffset": 8, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 3, + "Id": "deviation", + "Name": "Deviation", + "BitLength": 16, + "BitOffset": 24, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": true}, + { + "Order": 4, + "Id": "variation", + "Name": "Variation", + "BitLength": 16, + "BitOffset": 40, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": true}, + { + "Order": 5, + "Id": "reference", + "Name": "Reference", + "BitLength": 2, + "BitOffset": 56, + "BitStart": 0, + "Type": "Lookup table", + "Signed": false, + "EnumValues": [ + {"name": "True", "value": "0"}, + {"name": "Magnetic", "value": "1"}, + {"name": "Error", "value": "2"}, + {"name": "Null", "value": "3"}]}, + { + "Order": 6, + "Id": "reserved", + "Name": "Reserved", + "Description": "Reserved", + "BitLength": 6, + "BitOffset": 58, + "BitStart": 2, + "Type": "Binary data", + "Signed": false}]}, + ``` + +Speed through water (while we're at it, get speed over ground; +see [here](https://github.com/MO-RISE/marulc/blob/master/marulc/nmea2000_pgn_specifications.json#L22404C7-L22524C31) +for more info): +```json + { + "PGN": 130577, + "Id": "directionData", + "Description": "Direction Data", + "Type": "Fast", + "Complete": false, + "Missing": [ + "Fields", + "FieldLengths", + "Precision", + "SampleData"], + "Length": 14, + "RepeatingFields": 0, + "Fields": [ + { + "Order": 1, + "Id": "dataMode", + "Name": "Data Mode", + "BitLength": 4, + "BitOffset": 0, + "BitStart": 0, + "Type": "Lookup table", + "Signed": false, + "EnumValues": [ + {"name": "Autonomous", "value": "0"}, + {"name": "Differential enhanced", "value": "1"}, + {"name": "Estimated", "value": "2"}, + {"name": "Simulator", "value": "3"}, + {"name": "Manual", "value": "4"}]}, + { + "Order": 2, + "Id": "cogReference", + "Name": "COG Reference", + "BitLength": 2, + "BitOffset": 4, + "BitStart": 4, + "Type": "Lookup table", + "Signed": false, + "EnumValues": [ + {"name": "True", "value": "0"}, + {"name": "Magnetic", "value": "1"}, + {"name": "Error", "value": "2"}, + {"name": "Null", "value": "3"}]}, + { + "Order": 3, + "Id": "reserved", + "Name": "Reserved", + "Description": "Reserved", + "BitLength": 2, + "BitOffset": 6, + "BitStart": 6, + "Type": "Binary data", + "Signed": false}, + { + "Order": 4, + "Id": "sid", + "Name": "SID", + "BitLength": 8, + "BitOffset": 8, + "BitStart": 0, + "Signed": false}, + { + "Order": 5, + "Id": "cog", + "Name": "COG", + "BitLength": 16, + "BitOffset": 16, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 6, + "Id": "sog", + "Name": "SOG", + "BitLength": 16, + "BitOffset": 32, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}, + { + "Order": 7, + "Id": "heading", + "Name": "Heading", + "BitLength": 16, + "BitOffset": 48, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 8, + "Id": "speedThroughWater", + "Name": "Speed through Water", + "BitLength": 16, + "BitOffset": 64, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}, + { + "Order": 9, + "Id": "set", + "Name": "Set", + "BitLength": 16, + "BitOffset": 80, + "BitStart": 0, + "Units": "rad", + "Resolution": "0.0001", + "Signed": false}, + { + "Order": 10, + "Id": "drift", + "Name": "Drift", + "BitLength": 16, + "BitOffset": 96, + "BitStart": 0, + "Units": "m/s", + "Resolution": "0.01", + "Signed": false}]}, +``` + diff --git a/openvbi/__init__.py b/openvbi/__init__.py index c7457c5..af599e3 100644 --- a/openvbi/__init__.py +++ b/openvbi/__init__.py @@ -23,7 +23,7 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. -__version__ = '1.0.0' +__version__ = '1.0.1' def version() -> str: return __version__ diff --git a/openvbi/adaptors/__init__.py b/openvbi/adaptors/__init__.py index ec91e74..a2bf3eb 100644 --- a/openvbi/adaptors/__init__.py +++ b/openvbi/adaptors/__init__.py @@ -23,18 +23,51 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. -from typing import Protocol +from typing import Protocol, Callable, TypeVar from pathlib import Path +import gzip +import bz2 +import lzma + +import geopandas + +import openvbi.core.metadata as md from openvbi.core.observations import Dataset + +def get_fopen(filename: str | Path) -> Callable: + if isinstance(filename, Path): + fname: Path = filename + elif isinstance(filename, str): + fname: Path = Path(filename) + else: + raise ValueError(f"Expected filename to be of type str or Path but it was {type(filename)}.") + + match fname.suffix: + case '.gz' | '.gzip': + return gzip.open + case '.bz2' | '.bzip2': + return bz2.open + case '.lz' | '.lzma': + return lzma.open + case _: + return open + +# Type aliases +GeoPandasDataset = tuple[geopandas.GeoDataFrame, md.Metadata] +OpenVBIDataset = TypeVar('OpenVBIDataset', bound=Dataset|GeoPandasDataset) + class Loader(Protocol): def suffix(self) -> str: pass - def load(self, filename: str | Path) -> Dataset: + + def load(self, filename: str | Path, **kwargs) -> OpenVBIDataset: pass + class Writer(Protocol): def suffix(self) -> str: pass + def write(self, data: Dataset, filename: str | Path, **kwargs) -> None: - pass \ No newline at end of file + pass diff --git a/openvbi/adaptors/dcdb.py b/openvbi/adaptors/dcdb.py index 196565b..8fe853e 100644 --- a/openvbi/adaptors/dcdb.py +++ b/openvbi/adaptors/dcdb.py @@ -22,22 +22,24 @@ # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. +import json +from datetime import datetime, timezone from pathlib import Path -from typing import Dict,Tuple,Any + import geopandas -import pandas import numpy as np -import json -from datetime import datetime, timezone +import pandas + import openvbi.core.metadata as md +from openvbi.adaptors import Loader, Writer, OpenVBIDataset from openvbi.core.observations import Dataset -from openvbi.adaptors import Loader, Writer + class CSVLoader(Loader): def suffix(self) -> str: return '.csv' - def load(self, filename: str) -> Tuple[geopandas.GeoDataFrame, md.Metadata]: + def load(self, filename: str | Path, **kwargs) -> OpenVBIDataset: data = geopandas.read_file(filename) data = data.rename(columns={'DEPTH': 'z', 'LON': 'lon', 'LAT': 'lat'}) data['t'] = np.fromiter((t.timestamp() for t in pandas.to_datetime(data['TIME'])), dtype='float') @@ -66,20 +68,20 @@ def suffix(self) -> str: def write(self, dataset: Dataset, filename: str | Path, **kwargs) -> None: FMT_OBS_TIME='%Y-%m-%dT%H:%M:%S.%fZ' feature_lst = [] - for n in range(len(dataset.depths)): - timestamp = datetime.fromtimestamp(dataset.depths['t'].iloc[n], tz=timezone.utc).strftime(FMT_OBS_TIME) + for n in range(len(dataset.data)): + timestamp = datetime.fromtimestamp(dataset.data['t'].iloc[n], tz=timezone.utc).strftime(FMT_OBS_TIME) feature = { "type": "Feature", "geometry": { "type": "Point", "coordinates": [ - dataset.depths['lon'].iloc[n], - dataset.depths['lat'].iloc[n] + dataset.data['lon'].iloc[n], + dataset.data['lat'].iloc[n] ] }, "properties": { - "depth": dataset.depths['z'].iloc[n], - "uncertainty": dataset.depths['u'].iloc[n], + "depth": dataset.data['z'].iloc[n], + "uncertainty": dataset.data['u'].iloc[n], "time": timestamp } } @@ -102,8 +104,8 @@ def write(self, dataset: Dataset, basename: str | Path, **kwargs) -> None: case _: raise ValueError(f"Expected basename to be of type str or Path, but it was of type {type(basename)}") - working_depths = dataset.depths.copy() - working_depths['TIME'] = working_depths['t'].transform(lambda x: datetime.utcfromtimestamp(x).isoformat() + 'Z') + working_depths = dataset.data.copy() + working_depths['TIME'] = working_depths['t'].transform(lambda x: datetime.fromtimestamp(x, tz=timezone.utc).isoformat() + 'Z') working_depths.to_csv(basepath.with_suffix('.csv'), columns=['lon', 'lat', 'z', 'TIME'], index=False, header=['LON', 'LAT', 'DEPTH', 'TIME']) meta = dataset.meta.metadata() meta['properties']['trustedNode']['convention'] = 'XYZ GeoJSON CSB 3.1' diff --git a/openvbi/adaptors/factory.py b/openvbi/adaptors/factory.py new file mode 100644 index 0000000..b4a1785 --- /dev/null +++ b/openvbi/adaptors/factory.py @@ -0,0 +1,60 @@ +##\file loader.py +# This file provides a factory method for instantiating loaders logger file loadeds, currently +# those defined in teamsurv.py, wibl.py, and ydvr.py. The extension of the input filename provided +# is used to select the underlying loader to return. If the file uses an incorrect file extension, +# the wrong loader will be returned. +# +# Copyright 2025 Center for Coastal and Ocean Mapping & NOAA-UNH Joint +# Hydrographic Center, University of New Hampshire. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +from typing import Type +from pathlib import Path + + +from openvbi.adaptors import Loader, teamsurv, wibl, ydvr + +def get_loader(input_file: str | Path) -> Loader: + """ + + :rtype: Loader + """ + match input_file: + case str(): + infile: Path = Path(input_file) + case Path(): + infile: Path = input_file + case _: + raise ValueError(f"Expected input_file to be of type str or Path, but it was of type {type(input_file)}") + + suffixes = infile.suffixes + if len(suffixes) == 0: + raise ValueError(f"Unable to infer loader type for input file {str(input_file)}, which has no filename suffix.") + + first_suff = suffixes[0].lower() + if first_suff == teamsurv.LOADER_SUFFIX.lower(): + return teamsurv.TeamSurvLoader() + elif first_suff == wibl.LOADER_SUFFIX.lower(): + return wibl.WIBLLoader() + elif first_suff == ydvr.LOADER_SUFFIX.lower(): + return ydvr.YDVRLoader() + else: + raise ValueError("Unable to infer loader type for input file {str(input_file}: unknown filename suffix.") diff --git a/openvbi/adaptors/generic_ascii.py b/openvbi/adaptors/generic_ascii.py index 8821eff..9770a1a 100644 --- a/openvbi/adaptors/generic_ascii.py +++ b/openvbi/adaptors/generic_ascii.py @@ -23,11 +23,15 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. +from pathlib import Path +from datetime import datetime, timezone + import pandas import geopandas + from openvbi.core.observations import RawN0183Obs, Dataset from openvbi.core.timebase import determine_time_source, generate_timebase -from openvbi.adaptors import Loader +from openvbi.adaptors import Loader, Writer, get_fopen, OpenVBIDataset class GenericASCIILoader(Loader): def __init__(self, maxelapsed: int, suffix: str) -> None: @@ -37,7 +41,7 @@ def __init__(self, maxelapsed: int, suffix: str) -> None: def suffix(self) -> str: return self.suffix - def load(self, filename: str) -> Dataset: + def load(self, filename: str | Path, **kwargs) -> OpenVBIDataset: rtn: Dataset = Dataset() # The elapsed time is milliseconds since the start of logging, and can wrap round @@ -47,7 +51,8 @@ def load(self, filename: str) -> Dataset: elapsed_offset: int = 0 last_elapsed_mark: int = 0 - with open(filename) as f: + fopen = get_fopen(filename) + with fopen(filename, mode='rt') as f: for line in f: elapsed, message = line.split(' ') elapsed = int(elapsed) @@ -62,14 +67,68 @@ def load(self, filename: str) -> Dataset: rtn.meta.setIdentifiers('NOTSET', 'Generic ASCII Inputs', '1.0') return rtn + +class GenericASCIIWriter(Writer): + """ + Write Dataset to UTF-8 CSV file with default columns ['lon', 'lat', 'TIME', 'z']. If columns (either a single + string or a list of string) is specified to the write method, then that/those column(s) will be included in + CSV ouput. + """ + def suffix(self) -> str: + return '.csv' + + def write(self, dataset: Dataset, basename: str | Path, **kwargs) -> None: + """ + Write ``dataset`` to file named ``basename``+'csv'. + + If 'columns' is specified in kwargs (either a single string or a list of string) is specified to the write + method, then that/those column(s) will be included in CSV ouput. + :param dataset: + :param basename: + :param kwargs: + :return: + """ + match basename: + case str(): + basepath: Path = Path(basename) + case Path(): + basepath: Path = basename + case _: + raise ValueError(f"Expected basename to be of type str or Path, but it was of type {type(basename)}") + + cols = ['lon', 'lat', 'TIME', 'z'] + hdrs = ['LON', 'LAT', 'TIME', 'DEPTH'] + # Handle user-supplied columns + if 'columns' in kwargs: + user_cols = kwargs['columns'] + if isinstance(user_cols, str): + user_cols = [user_cols] + elif not isinstance(user_cols, list): + raise ValueError(f"kwarg 'columns' is not a string or list, but must be. Type was {type(user_cols)}.") + for uc in user_cols: + if uc not in dataset.data: + raise ValueError(f"User column {uc} is not present in dataset.") + user_hdrs = [] + cols += user_cols + hdrs += [c.casefold().swapcase() for c in user_cols] + + output_data = dataset.data.copy() + output_data['TIME'] = output_data['t'].transform(lambda x: datetime.fromtimestamp(x, tz=timezone.utc).isoformat() + 'Z') + output_data.to_csv(basepath.with_suffix('.csv'), columns=cols, index=False, + header=hdrs) + + class PreparsedASCIILoader(Loader): def suffix(self) -> str: return '.csv' - def load(self, filename: str) -> Dataset: + def load(self, filename: str | Path, **kwargs) -> OpenVBIDataset: data = Dataset() - depths = pandas.read_csv(filename) - # Translate from "Epoch,Longitude,Latitude,Depth" as input columns, to the standard set - depths = depths.rename(columns={'Epoch': 't', 'Longitude': 'lon', 'Latitude': 'lat', 'Depth': 'z'}) - data.depths = geopandas.GeoDataFrame(depths, geometry=geopandas.points_from_xy(depths['lon'], depths['lat']), crs='EPSG:4326') - return data + + fopen = get_fopen(filename) + with fopen(filename, mode='rt') as f: + depths = pandas.read_csv(f) + # Translate from "Epoch,Longitude,Latitude,Depth" as input columns, to the standard set + depths = depths.rename(columns={'Epoch': 't', 'Longitude': 'lon', 'Latitude': 'lat', 'Depth': 'z'}) + data.data = geopandas.GeoDataFrame(depths, geometry=geopandas.points_from_xy(depths['lon'], depths['lat']), crs='EPSG:4326') + return data diff --git a/openvbi/adaptors/teamsurv.py b/openvbi/adaptors/teamsurv.py index a1a21d5..be865b8 100644 --- a/openvbi/adaptors/teamsurv.py +++ b/openvbi/adaptors/teamsurv.py @@ -23,18 +23,25 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. +from pathlib import Path + from openvbi.core.observations import RawN0183Obs, BadData, Dataset from openvbi.core.timebase import determine_time_source, generate_timebase -from openvbi.adaptors import Loader +from openvbi.adaptors import Loader, get_fopen, OpenVBIDataset + + +LOADER_SUFFIX: str = '.TSV' + class TeamSurvLoader(Loader): def suffix(self) -> str: - return '.TSV' - - def load(self, filename: str) -> Dataset: + return LOADER_SUFFIX + + def load(self, filename: str | Path, **kwargs) -> OpenVBIDataset: rtn: Dataset = Dataset() - with open(filename) as f: + fopen = get_fopen(filename) + with fopen(filename, mode='rt', encoding='windows-1252') as f: for message in f: try: obs = RawN0183Obs(None, message) diff --git a/openvbi/adaptors/wibl.py b/openvbi/adaptors/wibl.py index b20191f..5d8a486 100644 --- a/openvbi/adaptors/wibl.py +++ b/openvbi/adaptors/wibl.py @@ -13,22 +13,29 @@ import json from typing import Dict, Any +from pathlib import Path + import openvbi.adaptors.logger_file as LoggerFile -from openvbi.adaptors import Loader +from openvbi.adaptors import Loader, get_fopen, OpenVBIDataset from openvbi.core.observations import RawN0183Obs, ParsedN2000, Dataset from openvbi.core.metadata import VerticalReference, VerticalReferencePosition + +LOADER_SUFFIX: str = '.wibl' + + class WIBLLoader(Loader): def suffix(self) -> str: - return '.wibl' + return LOADER_SUFFIX - def load(self, filename: str) -> Dataset: + def load(self, filename: str | Path, **kwargs) -> OpenVBIDataset: data: Dataset = Dataset() logger_UUID: str = 'NONE' ship_name: str = 'Anonymous' firmware_version: str = '0.0.0' - with open(filename, 'rb') as f: + fopen = get_fopen(filename) + with fopen(filename, mode='rb') as f: source = LoggerFile.PacketFactory(f) while source.has_more(): try: diff --git a/openvbi/adaptors/ydvr.py b/openvbi/adaptors/ydvr.py index 6d1b0ae..1abf9d4 100644 --- a/openvbi/adaptors/ydvr.py +++ b/openvbi/adaptors/ydvr.py @@ -25,6 +25,7 @@ import lzma import struct from typing import Tuple +from pathlib import Path from marulc.nmea2000 import get_description_for_pgn from marulc.exceptions import ParseError @@ -32,7 +33,11 @@ from openvbi.core.observations import RawN2000Obs, BadData, Dataset from openvbi.core.statistics import PktFaults from openvbi.core.timebase import determine_time_source, generate_timebase -from openvbi.adaptors import Loader +from openvbi.adaptors import Loader, get_fopen, OpenVBIDataset + + +LOADER_SUFFIX: str = '.DAT' + def TranslateCANId(id: int) -> Tuple[int, int, int, int]: pf = (id >> 16) & 0xFF @@ -98,24 +103,15 @@ def next_packet(f) -> Tuple[int, int, bytearray]: return elapsed, pgn, packet class YDVRLoader(Loader): - def __init__(self, compressed=False) -> None: - self.compressed = compressed - def suffix(self) -> str: - return '.DAT' + return LOADER_SUFFIX - def load(self, filename: str) -> Dataset: + def load(self, filename: str | Path, **kwargs) -> OpenVBIDataset: """ Load YDVR data from ``filename``. :param filename: - :param compressed: If true, attempt to open ``filename`` as a lzma-compressed file. :return: """ - if self.compressed: - fopen = lzma.open - else: - fopen = open - data: Dataset = Dataset() # The elapsed time is milliseconds since the start of logging, and can wrap round. @@ -132,7 +128,8 @@ def load(self, filename: str) -> Dataset: # 16-bit range, so it cycles quite a bit. maxelapsed: int = 65535 - with fopen(filename, 'rb') as f: + fopen = get_fopen(filename) + with fopen(filename, mode='rb') as f: while f: pkt_name = 'Unknown' elapsed, pgn, packet = next_packet(f) diff --git a/openvbi/command/__init__.py b/openvbi/command/__init__.py new file mode 100644 index 0000000..23a7c1a --- /dev/null +++ b/openvbi/command/__init__.py @@ -0,0 +1,39 @@ +import sys +from pathlib import Path +import traceback + +import click + +from openvbi.adaptors import factory, Loader + +@click.version_option() +@click.group() +def cli(): + pass + + +@click.command() +@click.argument('input_file', type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path)) +@click.argument('output', type=click.File(mode='w', encoding='ascii')) +def dump(input_file: Path, output): + """ + Dump logger data file to ASCII format for debugging. + + \b + INPUT_FILE represents the path of the logger data file to read from. + OUTPUT represents the path of the file to write ASCII values to. Use '-' for standard output. + + """ + try: + loader: Loader = factory.get_loader(input_file) + data = loader.load(input_file) + for i, p in enumerate(data.packets): + output.write(f"Packet {i}: name: {p.Name()}\n") + if hasattr(p, '_data'): + output.write(f"\t_data: {p._data}\n") + else: + output.write(f"\tNo _data in packet.\n") + except Exception as e: + sys.exit(traceback.format_exc()) + +cli.add_command(dump) diff --git a/openvbi/core/interpolation.py b/openvbi/core/interpolation.py index a4f2d39..ce18541 100644 --- a/openvbi/core/interpolation.py +++ b/openvbi/core/interpolation.py @@ -31,7 +31,8 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. -from typing import List +from collections.abc import Collection +from typing import Any import numpy as np @@ -58,22 +59,18 @@ class InterpTable: # This sets up for interpolation of an independent variable (added automatically) against # the named dependent variables. # - # \param vars (List[str]) List of the names of the dependent variables to manage - def __init__(self, vars: List[str]) -> None: - # Add an independent variable tag implicitly to the lookup table - self.vars = {} - self.vars['ind'] = [] + # \param vars (Collection[str]) list|tuple|etc. of the names of the dependent variables to manage + def __init__(self, vars: Collection[str]) -> None: + # Each dependent variable is stored as a dict mapping independent variable datum to dependent variable datum + self.vars: dict[str, dict[int|float, Any]] = {} + # Track all independent variables + self.indep_var = [] for v in vars: - self.vars[v] = [] + self.vars[v] = {} ## Add a data point to a single dependent variable # - # Add a single data point to a single dependent variable. Note that if the object is - # tracking more than one dependent variable, this is likely to cause a problem, since - # you'll have one more point in the independent variable array than in the dependent - # variables that aren't updated by this call, which will fail interpolation. This is - # therefore only really useful for the special case where there is only a single - # dependent variable being tracked. + # Add a single data point to a single dependent variable. # # \param ind Independent variable value to add to the array # \param var Name of the dependent variable to update @@ -81,45 +78,42 @@ def __init__(self, vars: List[str]) -> None: def add_point(self, ind: float, var: str, value: float) -> None: if var not in self.vars: raise NoSuchVariable() - self.vars['ind'].append(ind) - self.vars[var].append(value) + self.indep_var.append(ind) + self.vars[var][ind] = value ## Add a data point to multiple dependent variables simultaneously # # Add a data point for one or more dependent variables at a common independent variable - # point. Since adding a data point for a common independent variable value to some, but - # not all of the dependent variables would result in some dependent variables having arrays - # of different lengths, it would cause problems when interpolating. This is therefore only - # really useful if you're updating all of the variables. + # point. # # \param ind Independent variable value to add to the array - # \param vars List of names of the dependent variables to update - # \param values List of values to update for the named dependent variables, in the same order - def add_points(self, ind: float, vars: List[str], values: List[float]) -> None: + # \param vars list|tuple of names of the dependent variables to update + # \param values list|tuple of values to update for the named dependent variables, in the same order + def add_points(self, ind: float, vars: list[str]|tuple[str], values: list[float]|tuple[str]) -> None: for var in vars: if var not in self.vars: raise NoSuchVariable() if len(vars) != len(values): raise NotEnoughValues() - self.vars['ind'].append(ind) + self.indep_var.append(ind) for n in range(len(vars)): - self.vars[vars[n]].append(values[n]) + self.vars[vars[n]][ind] = values[n] ## Interpolate one or more dependent variables at an array of independent variable values # # Construct a linear interpolation of the named dependent variables at the given array # of independent variable values. # - # \param yvars List of names of the dependent variables to interpolate + # \param yvars Collection list|tuple|etc. of names of the dependent variables to interpolate # \param x NumPy array of the independent variable points at which to interpolate # \return List of NumPy arrays for the named dependent variables, in the same order - def interpolate(self, yvars: List[str], x: np.ndarray) -> List[np.ndarray]: + def interpolate(self, yvars: Collection[str], x: np.ndarray) -> list[np.ndarray]: for yvar in yvars: if yvar not in self.vars: raise NoSuchVariable() rtn = [] for yvar in yvars: - rtn.append(np.interp(x, self.vars['ind'], self.vars[yvar])) + rtn.append(np.interp(x, self.indep_var, self.var(yvar))) return rtn ## Determine the number of points in the independent variable array @@ -130,19 +124,27 @@ def interpolate(self, yvars: List[str], x: np.ndarray) -> List[np.ndarray]: # # \return Number of points in the interpolation table def n_points(self) -> int: - return len(self.vars['ind']) + return len(self.indep_var) ## Accessor for the array of points for a named variable # - # This provides checked access to one of the dependent variables stored in the array. This returns - # all of the points stored for that variable as a NumPy array. + # This provides checked access to one of the dependent variables stored in the array. This method + # generates an array of the same length as the independent variable, with values from var, where var + # has valid data for that ind, or None. This way, all variables returned will be of the same length, + # allowing interpolation and combination of multiple dependent variables into a single + # ndarray/dataframe. # # \param name Name of the dependent variable to extract # \return NumPy array for the dependent variable named def var(self, name: str) -> np.ndarray: if name not in self.vars: raise NoSuchVariable() - return np.array(self.vars[name]) + dst_array = [] + var_data = self.vars[name] + for ind in self.indep_var: + dst_array.append(var_data.get(ind, None)) + + return np.array(dst_array) ## Accessor for the array of points for the independent variable # @@ -151,4 +153,4 @@ def var(self, name: str) -> np.ndarray: # # \return NumPy array for the independent variable def ind(self) -> np.ndarray: - return np.array(self.vars['ind']) + return np.array(self.indep_var) diff --git a/openvbi/core/observations.py b/openvbi/core/observations.py index 4d6f3fd..2cc0823 100644 --- a/openvbi/core/observations.py +++ b/openvbi/core/observations.py @@ -23,23 +23,38 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. +from collections.abc import Collection from typing import List, Tuple import datetime from dataclasses import dataclass + import pandas import geopandas + from marulc import NMEA0183Parser, NMEA2000Parser from marulc.nmea2000 import unpack_complete_message, get_description_for_pgn from marulc.exceptions import ParseError, ChecksumError, PGNError + import bitstruct -from openvbi.core.types import TimeSource, RawObs, NoDepths + +from openvbi.core.types import TimeSource, RawObs, NoDataFound from openvbi.core.statistics import PktStats from openvbi.core.interpolation import InterpTable from openvbi.core.timebase import determine_time_source, generate_timebase import openvbi.core.metadata as md +import openvbi.core.unit_conversion as uc from openvbi import version from openvbi.adaptors.logger_file import DataPacket + +DEPENDENT_VARS = {'Depth': 'z', # NMEA2000 + 'DPT': 'z', # NMEA0183 + 'DBT': 'z', # NMEA0183 + 'WaterTemperature': 'waterTemp', # NMEA2000 + 'MTW': 'waterTemp' # NMEA0183 + } + + class BadData(Exception): pass @@ -70,9 +85,18 @@ def MatchesTimeSource(self, source: TimeSource) -> bool: def Timestamp(self) -> float: if not self.HasTime(): return -1.0 - base_date = datetime.datetime(self._data['Fields']['year'], self._data['Fields']['month'], self._data['Fields']['day']) - time_offset = datetime.timedelta(seconds = self._data['Fields']['timestamp']) - reftime = base_date + time_offset + + match self.Name(): + case 'ZDA': + base_date = datetime.datetime(self._data['Fields']['year'], self._data['Fields']['month'], self._data['Fields']['day']) + time_offset = datetime.timedelta(seconds = self._data['Fields']['timestamp']) + case 'RMC': + base_date = datetime.datetime.strptime(str(self._data['Fields']['datestamp']), '%d%m%y') + time_offset = datetime.datetime.strptime(str(self._data['Fields']['timestamp']), '%H%M%S') - datetime.datetime(1900, 1, 1) + case _: + raise ValueError(f"Unable to parse timestamp for NMEA0183 message of name {self.Name()}") + + reftime = (base_date + time_offset).astimezone(tz=datetime.timezone.utc) return reftime.timestamp() def Depth(self) -> float: @@ -94,7 +118,15 @@ def Position(self) -> Tuple[float,float]: if self._data['Fields']['lat_dir'] == 'S': lat = - lat return (lon, lat) - + + def WaterTemperature(self) -> float: + if self.Name() != 'MTW': + raise BadData() + temp = float(self._data['Fields']['temperature']) + unit = self._data['Fields']['units'] + return uc.to_temperature_kelvin(temp, unit) + + class RawN2000Obs(RawObs): def __init__(self, elapsed: int, pgn: int, message: bytearray) -> None: parser = NMEA2000Parser() @@ -124,6 +156,10 @@ def __init__(self, elapsed: int, pgn: int, message: bytearray) -> None: name = 'COG' elif pgn == 129029: name = 'GNSS' + elif pgn == 130577: + name = 'DirectionData' + elif pgn == 130316 and self._data['Fields']['source'] == 0: + name = 'WaterTemperature' else: # Attempt to get the PGN name from the MARULC database. We could do this for # all PGNs, but the names above are for data that we use everywhere, and we @@ -170,6 +206,12 @@ def Position(self) -> Tuple[float,float]: case _: raise BadData() + def WaterTemperature(self) -> float: + if self.Name() != 'WaterTemperature': + raise BadData() + temp = float(self._data['Fields']['temperature']) + return uc.to_temperature_kelvin(temp, 'K') + class ParsedN2000(RawObs): def __init__(self, elapsed: int, data: DataPacket) -> None: @@ -211,6 +253,13 @@ def Position(self) -> Tuple[float,float]: raise BadData() return (self._data.longitude, self._data.latitude) + def WaterTemperature(self) -> float: + if self.Name() != 'WaterTemperature': + raise BadData() + if self._data.tempSource != 0: + raise BadData() + return self._data.temperature + def count_messages(messages: List[RawObs]) -> PktStats: """Determine the list of messages that are available in the input data source. @@ -262,7 +311,7 @@ class Dataset: timesrc: TimeSource timebase: InterpTable meta: md.Metadata - depths: geopandas.GeoDataFrame + data: geopandas.GeoDataFrame def __init__(self): self.packets = list() @@ -270,7 +319,7 @@ def __init__(self): self.timesrc = None self.timebase = None self.meta = md.Metadata() - self.depths = None + self.data = None def add_timebase(self) -> None: '''This determines, given a list ot raw packets, which time source should be used for @@ -280,34 +329,61 @@ def add_timebase(self) -> None: ''' self.timesrc = determine_time_source(self.stats) self.timebase = generate_timebase(self.packets, self.timesrc) - - def generate_observations(self, depth: str) -> None: - depth_table = InterpTable(['z',]) + + + def generate_observations(self, obs_vars: Collection[str]) -> None: + vars = set() + for ov in obs_vars: + if ov not in DEPENDENT_VARS.keys(): + raise ValueError(f"Unknown observation variable {ov}") + vars.add(DEPENDENT_VARS[ov]) + dep_var_table = InterpTable(vars) position_table = InterpTable(['lon', 'lat']) for obs in self.packets: if obs.Elapsed() is None: continue - - if obs.Name() == depth: - depth_table.add_point(obs.Elapsed(), 'z', obs.Depth()) - elif obs.Name() in ['GGA','GNSS']: + obs_name = obs.Name() + if obs_name in obs_vars: + val = None + match obs_name: + case 'Depth' | 'DPT' | 'DBT': + val = obs.Depth() + case 'WaterTemperature' | 'MTW': + val = obs.WaterTemperature() + if val is not None: + dep_var_table.add_point(obs.Elapsed(), DEPENDENT_VARS[obs_name], val) + elif obs_name in ['GGA', 'GNSS']: position_table.add_points(obs.Elapsed(), ('lon', 'lat'), obs.Position()) - elif obs.Name() == 'Position, Rapid Update': + elif obs_name == 'Position, Rapid Update': position_table.add_points(obs.Elapsed(), ('lon', 'lat'), obs.Position()) - - depth_timepoints = depth_table.ind() - if len(depth_timepoints) == 0: - raise NoDepths() - z = depth_table.var('z') - z_times = self.timebase.interpolate(['ref',], depth_timepoints)[0] - z_lat, z_lon = position_table.interpolate(['lat', 'lon'], depth_timepoints) - - data = pandas.DataFrame(columns=['t', 'lon', 'lat', 'z', 'u']) - for n in range(depth_table.n_points()): - data.loc[len(data)] = [z_times[n], z_lon[n], z_lat[n], z[n], [-1.0, -1.0, -1.0]] - self.depths = geopandas.GeoDataFrame(data, geometry=geopandas.points_from_xy(data.lon, data.lat), crs='EPSG:4326') + dep_var_timepoints = dep_var_table.ind() + if len(dep_var_timepoints) == 0: + raise NoDataFound() + dep_vars = {} + for v in vars: + dep_vars[v] = dep_var_table.var(v) + times = self.timebase.interpolate(['ref', ], dep_var_timepoints)[0] + lat, lon = position_table.interpolate(['lat', 'lon'], dep_var_timepoints) + + dep_cols = list(dep_vars.keys()) + cols = ['t', 'lon', 'lat'] + dep_cols + emit_u: bool = False + if 'z' in vars: + cols += 'u' + emit_u: bool = True + + data = pandas.DataFrame(columns=cols) + for n in range(dep_var_table.n_points()): + row = [times[n], lon[n], lat[n]] + for dep_var in dep_vars.values(): + row.append(dep_var[n]) + if emit_u: + row.append([-1.0, -1.0, -1.0]) + data.loc[len(data)] = row + + self.data = geopandas.GeoDataFrame(data, geometry=geopandas.points_from_xy(data.lon, data.lat), crs='EPSG:4326') self.meta.addProcessingAction(md.ProcessingType.TIMESTAMP, None, method='Linear Interpolation', algorithm='OpenVBI', version=version()) self.meta.addProcessingAction(md.ProcessingType.UNCERTAINTY, None, name='OpenVBI Default Uncertainty', parameters={}, version=version(), comment='Default (non-valid) uncertainty', reference='None') diff --git a/openvbi/core/types.py b/openvbi/core/types.py index 3d02336..de23d49 100644 --- a/openvbi/core/types.py +++ b/openvbi/core/types.py @@ -25,7 +25,7 @@ from enum import Enum from abc import ABC, abstractmethod -from typing import Tuple +from typing import Tuple, Any ## Encapsulate the types of real-world time information that can be used # @@ -81,5 +81,16 @@ def Depth(self) -> float: def Position(self) -> Tuple[float,float]: pass -class NoDepths(RuntimeError): + @abstractmethod + def WaterTemperature(self) -> float: + """ + Water temperature in degrees Kelvin + :return: + """ + pass + +class NoDataFound(RuntimeError): + pass + +class NoDepths(NoDataFound): pass diff --git a/openvbi/core/unit_conversion.py b/openvbi/core/unit_conversion.py new file mode 100644 index 0000000..9884744 --- /dev/null +++ b/openvbi/core/unit_conversion.py @@ -0,0 +1,11 @@ + +def to_temperature_kelvin(in_temp: float, unit: str): + match unit: + case 'K' | 'k': + return in_temp + case 'C' | 'c': + return in_temp + 273.15 + case 'F' | 'f': + return ( ((in_temp - 32.0) * 5.0) / 9.0 ) + 273.15 + case _: + raise ValueError(f"Unknown temperature unit {unit}.") diff --git a/openvbi/corrections/waterlevel/__init__.py b/openvbi/corrections/waterlevel/__init__.py index c6eb7fb..7b01130 100644 --- a/openvbi/corrections/waterlevel/__init__.py +++ b/openvbi/corrections/waterlevel/__init__.py @@ -37,7 +37,7 @@ def preload(self, dataset: Dataset) -> None: pass def correct(self, dataset: Dataset) -> Dataset: - dataset.depths = self._execute(dataset.depths) + dataset.data = self._execute(dataset.data) self._metadata(dataset.meta) return dataset diff --git a/openvbi/corrections/waterlevel/noaa/__init__.py b/openvbi/corrections/waterlevel/noaa/__init__.py index 69fbbd9..3f8bda1 100644 --- a/openvbi/corrections/waterlevel/noaa/__init__.py +++ b/openvbi/corrections/waterlevel/noaa/__init__.py @@ -77,8 +77,8 @@ def __init__(self, stationName: str) -> None: super().__init__() def preload(self, dataset: Dataset) -> None: - startTime = dataset.depths['t'].min() - endTime = dataset.depths['t'].max() + startTime = dataset.data['t'].min() + endTime = dataset.data['t'].max() raw_levels = get_noaa_station(self._stationID, startTime, endTime) if raw_levels is None: print(f'Error: station failed to resolve waterlevels for time range [{startTime.isoformat()}, {endTime.isoformat()}].') @@ -112,7 +112,7 @@ def __init__(self, zone_shapefile: str) -> None: def preload(self, dataset: Dataset) -> None: # Spatial join to determine which polygon each observation is in (and hence which station controls) - annotated_pts = geopandas.sjoin(dataset.depths, self._zones, how='inner', predicate='within') + annotated_pts = geopandas.sjoin(dataset.data, self._zones, how='inner', predicate='within') # List of all required stations self._stations = annotated_pts['ControlStn'].unique() self._tides = dict() diff --git a/openvbi/examples/dedup.py b/openvbi/examples/dedup.py index b11fef8..bf1e375 100644 --- a/openvbi/examples/dedup.py +++ b/openvbi/examples/dedup.py @@ -17,7 +17,7 @@ def report_metadata(m: md.Metadata, tag: str) -> None: endTime = time.perf_counter() print(f'LoadData: {1000*(endTime - startTime):8.3f} ms (started {startTime:.3f}, completed {endTime:.3f})') -print(data.depths) +print(data.data) report_metadata(data.meta, 'Before deduplication') @@ -25,7 +25,7 @@ def report_metadata(m: md.Metadata, tag: str) -> None: dedup = deduplicate(verbose=True) deduped_data = dedup.Execute(data) endTime = time.perf_counter() -print(deduped_data.depths) +print(deduped_data.data) print(f'Deduplicate: {1000*(endTime - startTime):8.3f} ms (started {startTime:.3f}, completed {endTime:.3f})') report_metadata(deduped_data.meta, 'After deduplication') diff --git a/openvbi/examples/non_depth_to_csv.py b/openvbi/examples/non_depth_to_csv.py new file mode 100644 index 0000000..161ff5d --- /dev/null +++ b/openvbi/examples/non_depth_to_csv.py @@ -0,0 +1,15 @@ +from openvbi.adaptors import factory, Loader +from openvbi.adaptors.generic_ascii import GenericASCIIWriter + +def main(): + data_file = '../../tests/data/00010001.DAT.lzma' + outpath_base = '/tmp/00010001' + + loader: Loader = factory.get_loader(data_file) + data = loader.load(data_file) + data.generate_observations(['Depth', 'WaterTemperature']) + writer = GenericASCIIWriter() + writer.write(data, outpath_base, columns='waterTemp') + +if __name__ == '__main__': + main() diff --git a/openvbi/examples/prep-instrumented.py b/openvbi/examples/prep-instrumented.py index d7f2585..26fa05d 100644 --- a/openvbi/examples/prep-instrumented.py +++ b/openvbi/examples/prep-instrumented.py @@ -26,11 +26,11 @@ def report_metadata(m: md.Metadata, tag: str) -> None: print('\nSource data after load:') with pandas.option_context('display.float_format', '{:.6f}'.format): - print(data.depths) -min_depth = data.depths['z'].min() -max_depth = data.depths['z'].max() -min_time = data.depths['t'].min() + 10.0*60.0 # Remove first ten minutes -max_time = data.depths['t'].max() - 10.0*60.0 # Remove last ten minutes + print(data.data) +min_depth = data.data['z'].min() +max_depth = data.data['z'].max() +min_time = data.data['t'].min() + 10.0 * 60.0 # Remove first ten minutes +max_time = data.data['t'].max() - 10.0 * 60.0 # Remove last ten minutes depth_range = max_depth - min_depth shoal_threshold = min_depth + depth_range/3.0 deep_threshold = max_depth - depth_range/3.0 @@ -46,7 +46,7 @@ def report_metadata(m: md.Metadata, tag: str) -> None: print(f'Filters: {1000*(endTime - startTime):8.3f} ms (started {startTime:.3f}, completed {endTime:.3f})') print('\nAfter filtering, source data are:') with pandas.option_context('display.float_format', '{:.6f}'.format): - print(data.depths) + print(data.data) report_metadata(data.meta, "After filter, metadata is:") startTime = time.perf_counter() @@ -66,9 +66,9 @@ def report_metadata(m: md.Metadata, tag: str) -> None: single_station_wl.correct(src_depths) endTime = time.perf_counter() print(f'CorrectSingleStation: {1000*(endTime - startTime):8.3f} ms (started {startTime:.3f}, completed {endTime:.3f})') -print('\nAfter single-station correction, depths are') +print('\nAfter single-station correction, data are') with pandas.option_context('display.float_format', '{:.6f}'.format): - print(src_depths.depths) + print(src_depths.data) report_metadata(src_depths.meta, 'Single Station Metadata') src_depths = copy.deepcopy(data) @@ -76,7 +76,7 @@ def report_metadata(m: md.Metadata, tag: str) -> None: zone_tide_wl.correct(src_depths) endTime = time.perf_counter() print(f'CorrectZoneTides: {1000*(endTime - startTime):8.3f} ms (started {startTime:.3f}, completed {endTime:.3f})') -print('\nAfter zone-tide correction, depths are:') +print('\nAfter zone-tide correction, data are:') with pandas.option_context('display.float_format', '{:.6f}'.format): - print(src_depths.depths) + print(src_depths.data) report_metadata(src_depths.meta, 'Zone-tide Metadata') \ No newline at end of file diff --git a/openvbi/examples/prep-simple.py b/openvbi/examples/prep-simple.py index 11fa89a..ec35a99 100644 --- a/openvbi/examples/prep-simple.py +++ b/openvbi/examples/prep-simple.py @@ -13,14 +13,14 @@ # Calibrate acceptable depth and time windows for data (note that these are simply for # demonstration purposes: this cuts off a lot of valid data!) -min_depth = data.depths['z'].min() -max_depth = data.depths['z'].max() +min_depth = data.data['z'].min() +max_depth = data.data['z'].max() depth_range = max_depth - min_depth shoal_threshold = min_depth + depth_range/3.0 deep_threshold = max_depth - depth_range/3.0 -min_time = data.depths['t'].min() + 10.0*60.0 # Remove first ten minutes -max_time = data.depths['t'].max() - 10.0*60.0 # Remove last ten minutes +min_time = data.data['t'].min() + 10.0 * 60.0 # Remove first ten minutes +max_time = data.data['t'].max() - 10.0 * 60.0 # Remove last ten minutes # Generate filters for shoal/deep depth, and early/late time shoal = shoaler_than(shoal_threshold) diff --git a/openvbi/filters/__init__.py b/openvbi/filters/__init__.py index 108c665..3eca92c 100644 --- a/openvbi/filters/__init__.py +++ b/openvbi/filters/__init__.py @@ -35,7 +35,7 @@ def __init__(self): pass def Execute(self, dataset: Dataset) -> Dataset: - dataset.depths = self._execute(dataset.depths) + dataset.data = self._execute(dataset.data) self._metadata(dataset.meta) return dataset diff --git a/pyproject.toml b/pyproject.toml index b99d493..ec1b929 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,9 @@ classifiers = [ "Homepage" = "https://github.com/CCOMJHC/OpenVBI" "Bug Tracker" = "https://github.com/CCOMJHC/OpenVBI/issues" +[project.scripts] +vbi = "openvbi.command:cli" + [tool.setuptools.dynamic] version = {attr = "openvbi.__version__"} dependencies = {file = ["requirements.txt"]} diff --git a/requirements.txt b/requirements.txt index 6ff9cfc..789aae9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ setuptools>=76.1.0 +click>=8.0.0 numpy>=2.0.0 csbschema~=1.1.2 marulc~=0.2.0 diff --git a/tests/data/00010001.DAT.lzma b/tests/data/00010001.DAT.lzma new file mode 100644 index 0000000..7e510fc Binary files /dev/null and b/tests/data/00010001.DAT.lzma differ diff --git a/tests/unit/test_non_depth_variables.py b/tests/unit/test_non_depth_variables.py new file mode 100644 index 0000000..6ebedc4 --- /dev/null +++ b/tests/unit/test_non_depth_variables.py @@ -0,0 +1,41 @@ +from pathlib import Path +import traceback + +import pandas as pd + +from openvbi.adaptors import factory, Loader +from openvbi.adaptors.generic_ascii import GenericASCIIWriter + +from tests.fixtures import data_path, temp_path + + +def test_non_depth_nmea2000(data_path, temp_path): + data_file: Path = data_path / '00010001.DAT.lzma' + exception_thrown: bool = False + try: + loader: Loader = factory.get_loader(data_file) + data = loader.load(data_file) + data.generate_observations(['Depth', 'WaterTemperature']) + # Write data to CSV file + basepath = temp_path / '00010001' + writer = GenericASCIIWriter() + writer.write(data, basepath, columns='waterTemp') + assert basepath.with_suffix('.csv').exists() + # Read data back + csv_data = pd.read_csv(basepath.with_suffix('.csv')) + assert (2072, 5) == csv_data.shape + assert 'LON' in csv_data + assert 'LAT' in csv_data + assert 'TIME' in csv_data + assert 'DEPTH' in csv_data + assert 'WATERTEMP' in csv_data + assert 2072 == csv_data['LON'][~csv_data['LON'].isna()].shape[0] + assert 2072 == csv_data['LAT'][~csv_data['LAT'].isna()].shape[0] + assert 2072 == csv_data['TIME'][~csv_data['TIME'].isna()].shape[0] + assert 1381 == csv_data['DEPTH'][~csv_data['DEPTH'].isna()].shape[0] + assert 691 == csv_data['WATERTEMP'][~csv_data['WATERTEMP'].isna()].shape[0] + except Exception as e: + print(f"Error encountered: {str(e)}") + print(traceback.format_exc()) + exception_thrown = True + assert not exception_thrown diff --git a/tests/unit/test_processing.py b/tests/unit/test_processing.py index 6f62175..293f0c0 100644 --- a/tests/unit/test_processing.py +++ b/tests/unit/test_processing.py @@ -1,7 +1,7 @@ from pathlib import Path import uuid -from openvbi.adaptors.ydvr import YDVRLoader +from openvbi.adaptors import factory, Loader import openvbi.core.metadata as md from openvbi.adaptors.dcdb import GeoJSONWriter, CSVWriter @@ -13,7 +13,7 @@ def test_basic_processing(data_path, temp_path): try: # Load data from compressed YachtDevices file, and convert into a dataframe - loader = YDVRLoader(compressed=ydvr_file.suffix == '.lzma') + loader: Loader = factory.get_loader(ydvr_file) data = loader.load(ydvr_file) except Exception as e: exception_thrown = True @@ -43,7 +43,7 @@ def test_basic_processing(data_path, temp_path): data.meta.setVesselID(md.VesselIdentifier.MMSI, "000000000") # YachtDevices is a NMEA2000 device, so we convert 'Depth' messages for depth information - data.generate_observations('Depth') + data.generate_observations(['Depth']) gjson_path = temp_path / '00030095.json' writer = GeoJSONWriter() diff --git a/tests/unit/test_unit_conversion.py b/tests/unit/test_unit_conversion.py new file mode 100644 index 0000000..2c25b02 --- /dev/null +++ b/tests/unit/test_unit_conversion.py @@ -0,0 +1,30 @@ +import math + +from openvbi.core.unit_conversion import to_temperature_kelvin + + +def test_to_temperature_kelvin(): + expect: float = 300.0 + res: float = to_temperature_kelvin(300.0, 'k') + assert math.isclose(res, expect) + res: float = to_temperature_kelvin(300.0, 'K') + assert res == expect + + expect: float = 293.15 + res: float = to_temperature_kelvin(20, 'c') + assert res == expect + res: float = to_temperature_kelvin(20, 'C') + assert math.isclose(res, expect) + + expect: float = 295.3722222 + res: float = to_temperature_kelvin(72, 'f') + assert math.isclose(res, expect) + res: float = to_temperature_kelvin(72, 'F') + assert math.isclose(res, expect) + + threw_exc: bool = False + try: + _ = to_temperature_kelvin(491.67, 'R') + except ValueError: + threw_exc = True + assert threw_exc