-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
The tests of concat_ice_daily.py have recently started failing with HDF errors. Some example output is copied below and a full test report can be found here.
________ test_true_case[access-om3.cice.1day.mean-Default-365-False-12] ________
> file = self._cache[self._key]
^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/file_manager.py:211:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> value = self._cache[key]
^^^^^^^^^^^^^^^^^
E KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/tmp/pytest-of-runner/pytest-0/test_true_case_access_om3_cice0/archive/output000/access-om3.cice.1day.mean.2010-10.nc',), 'a', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), 'ad1ef0b0-6542-4a3a-a65f-e5f5c534625b']
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/lru_cache.py:56: KeyError
During handling of the above exception, another exception occurred:
self = <payu_config.postscript.concat_ice_daily.Concat_Ice_Daily object at 0x7fab85120d40>
def process(self):
# find months in dataset
times = self.daily_ds.time.values
monthly_range = monthly_ranges(np.min(times), np.max(times), times[0].calendar)
monthly_pairs = list(zip(monthly_range[:-1], monthly_range[1:]))
# slice ds for each month, and make a dask delayed object to save to file
# ignore incomplete months
monthly_ncs = list()
self.month_ds = list()
self.month_f = list()
for pair in monthly_pairs:
filename = Path(f"{self.directory}/{MONTHLY_STUB_FN}{str(pair[0])[0:7]}.nc")
ds = self.daily_ds.sel(time=slice(*pair))
ds = ds.chunk({"time": len(ds.time)})
# check for whole month
if ds.time.values[-1] != (
ds.time.values[0]
+ datetime.timedelta(days=ds.time.values[0].daysinmonth - 1)
):
print(
f"concat_ice_daily:ignoring incomplete month: {str(pair[0])[0:7]}"
)
if len(self.daily_ds.time) > len(ds.time):
self.daily_ds = self.daily_ds.drop_sel(time=ds.time.values)
else:
self.month_f.append(filename)
self.month_ds.append(ds)
# if monthly file already exists, don't process again
if not filename.exists():
monthly_ncs.append(ds.to_netcdf(filename, compute=False))
if len(self.month_f) == 0:
self.cleanup_exit(
f"concat_ice_daily: No whole months to concatenate found in {self.directory}"
)
# load and save all months concurrently
try:
> dask.compute(monthly_ncs)
/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:195:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/dask/base.py:681: in compute
results = schedule(expr, keys, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:86: in __setitem__
data = self.get_array(needs_lock=False)
^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:99: in get_array
ds = self.datastore._acquire(needs_lock)
^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:471: in _acquire
with self._manager.acquire_context(needs_lock) as root:
^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/file_manager.py:199: in acquire_context
file, cached = self._acquire_with_cache_info(needs_lock)
^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/file_manager.py:217: in _acquire_with_cache_info
file = self._opener(*self._args, **kwargs)
^^^^^^^^^^^^^^^^^
src/netCDF4/_netCDF4.pyx:2521: in netCDF4._netCDF4.Dataset.__init__
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> ???
E OSError: [Errno -101] NetCDF: HDF error: '/tmp/pytest-of-runner/pytest-0/test_true_case_access_om3_cice0/archive/output000/access-om3.cice.1day.mean.2010-10.nc'
src/netCDF4/_netCDF4.pyx:2158: OSError
During handling of the above exception, another exception occurred:
hist_dir = 'Default', ndays = 365, use_dir = False, nmonths = 12
hist_base = 'access-om3.cice.1day.mean'
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_true_case_access_om3_cice0')
@pytest.mark.parametrize(
"hist_dir, ndays, use_dir, nmonths",
[
("Default", 365, False, 12),
("archive/output999", 31, False, 1),
("archive/output9999", 31, False, 1),
("archive/output574", 365, True, 12),
],
) # run this test with a several folder names and lengths, provide the directory as an argument sometimes
def test_true_case(hist_dir, ndays, use_dir, nmonths, hist_base, tmp_path):
"""
Run the script to convert the daily data into monthly files, and check the monthly files and the daily files dont exist.
"""
daily_paths = dummy_files(hist_dir, hist_base, ndays, tmp_path)
chdir(tmp_path)
output_dir = Path(daily_paths[0]).parents[0]
if not use_dir: # default path
> concat_ice_daily(assume_gadi=False)
/home/runner/work/om3-scripts/om3-scripts/test/test_payu_conf/test_concat_ice_daily.py:99:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:243: in concat_ice_daily
concat.process()
/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:197: in process
self.cleanup_exit(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <payu_config.postscript.concat_ice_daily.Concat_Ice_Daily object at 0x7fab85120d40>
error_msg = 'concat_ice_daily: dask compute of saving monthly output failed'
delete_monthf = True
def cleanup_exit(self, error_msg, delete_monthf=False):
for file in self.month_f:
if file.exists() and delete_monthf:
os.remove(file)
self.client.close()
> raise Exception(error_msg)
E Exception: concat_ice_daily: dask compute of saving monthly output failed
/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:150: Exception
------------------------------ Captured log call -------------------------------
INFO distributed.http.proxy:proxy.py:85 To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
INFO distributed.scheduler:scheduler.py:1766 State start
INFO distributed.scheduler:scheduler.py:4282 Scheduler at: tcp://127.0.0.1:33069
INFO distributed.scheduler:scheduler.py:4297 dashboard at: http://127.0.0.1:8787/status
INFO distributed.scheduler:scheduler.py:8182 Registering Worker plugin shuffle
INFO distributed.nanny:nanny.py:368 Start Nanny at: 'tcp://127.0.0.1:43359'
INFO distributed.scheduler:scheduler.py:4635 Register worker addr: tcp://127.0.0.1:33837 name: 0
INFO distributed.scheduler:scheduler.py:6224 Starting worker compute stream, tcp://127.0.0.1:33837
INFO distributed.core:core.py:883 Starting established connection to tcp://127.0.0.1:41462
INFO distributed.scheduler:scheduler.py:5959 Receive client connection: Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO distributed.core:core.py:883 Starting established connection to tcp://127.0.0.1:41478
INFO distributed.scheduler:scheduler.py:6004 Remove client Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO distributed.core:core.py:908 Received 'close-stream' from tcp://127.0.0.1:41478; closing.
INFO distributed.scheduler:scheduler.py:6004 Remove client Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO distributed.scheduler:scheduler.py:5996 Close client connection: Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO distributed.scheduler:scheduler.py:7615 Retire worker addresses (stimulus_id='retire-workers-1758071467.0196784') (0,)
INFO distributed.nanny:nanny.py:611 Closing Nanny at 'tcp://127.0.0.1:43359'. Reason: nanny-close
INFO distributed.nanny:nanny.py:858 Nanny asking worker to close. Reason: nanny-close
INFO distributed.core:core.py:908 Received 'close-stream' from tcp://127.0.0.1:41462; closing.
INFO distributed.scheduler:scheduler.py:5445 Remove worker addr: tcp://127.0.0.1:33837 name: 0 (stimulus_id='handle-worker-cleanup-1758071467.0370812')
INFO distributed.scheduler:scheduler.py:5583 Lost all workers
INFO distributed.nanny:nanny.py:626 Nanny at 'tcp://127.0.0.1:43359' closed.
INFO distributed.scheduler:scheduler.py:4344 Closing scheduler. Reason: unknown
INFO distributed.scheduler:scheduler.py:4372 Scheduler closing all comms
Ping @anton-seaice
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels