Skip to content

Tests of concat_ice_daily are failing #84

@dougiesquire

Description

@dougiesquire

The tests of concat_ice_daily.py have recently started failing with HDF errors. Some example output is copied below and a full test report can be found here.

________ test_true_case[access-om3.cice.1day.mean-Default-365-False-12] ________

>   file = self._cache[self._key]
    ^^^^^^^^^^^^^^^^^

/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/file_manager.py:211: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   value = self._cache[key]
    ^^^^^^^^^^^^^^^^^
E   KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/tmp/pytest-of-runner/pytest-0/test_true_case_access_om3_cice0/archive/output000/access-om3.cice.1day.mean.2010-10.nc',), 'a', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), 'ad1ef0b0-6542-4a3a-a65f-e5f5c534625b']

/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/lru_cache.py:56: KeyError

During handling of the above exception, another exception occurred:

self = <payu_config.postscript.concat_ice_daily.Concat_Ice_Daily object at 0x7fab85120d40>

    def process(self):
    
        # find months in dataset
        times = self.daily_ds.time.values
        monthly_range = monthly_ranges(np.min(times), np.max(times), times[0].calendar)
        monthly_pairs = list(zip(monthly_range[:-1], monthly_range[1:]))
    
        # slice ds for each month, and make a dask delayed object to save to file
        # ignore incomplete months
        monthly_ncs = list()
        self.month_ds = list()
        self.month_f = list()
        for pair in monthly_pairs:
    
            filename = Path(f"{self.directory}/{MONTHLY_STUB_FN}{str(pair[0])[0:7]}.nc")
            ds = self.daily_ds.sel(time=slice(*pair))
            ds = ds.chunk({"time": len(ds.time)})
    
            # check for whole month
            if ds.time.values[-1] != (
                ds.time.values[0]
                + datetime.timedelta(days=ds.time.values[0].daysinmonth - 1)
            ):
                print(
                    f"concat_ice_daily:ignoring incomplete month: {str(pair[0])[0:7]}"
                )
                if len(self.daily_ds.time) > len(ds.time):
                    self.daily_ds = self.daily_ds.drop_sel(time=ds.time.values)
            else:
                self.month_f.append(filename)
                self.month_ds.append(ds)
    
                # if monthly file already exists, don't process again
                if not filename.exists():
                    monthly_ncs.append(ds.to_netcdf(filename, compute=False))
    
        if len(self.month_f) == 0:
            self.cleanup_exit(
                f"concat_ice_daily: No whole months to concatenate found in {self.directory}"
            )
    
        # load and save all months concurrently
        try:
>           dask.compute(monthly_ncs)

/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:195: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/dask/base.py:681: in compute
    results = schedule(expr, keys, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:86: in __setitem__
    data = self.get_array(needs_lock=False)
    ^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:99: in get_array
    ds = self.datastore._acquire(needs_lock)
    ^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/netCDF4_.py:471: in _acquire
    with self._manager.acquire_context(needs_lock) as root:
    ^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/contextlib.py:137: in __enter__
    return next(self.gen)
    ^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/file_manager.py:199: in acquire_context
    file, cached = self._acquire_with_cache_info(needs_lock)
    ^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.12.11/x64/lib/python3.12/site-packages/xarray/backends/file_manager.py:217: in _acquire_with_cache_info
    file = self._opener(*self._args, **kwargs)
    ^^^^^^^^^^^^^^^^^
src/netCDF4/_netCDF4.pyx:2521: in netCDF4._netCDF4.Dataset.__init__
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E   OSError: [Errno -101] NetCDF: HDF error: '/tmp/pytest-of-runner/pytest-0/test_true_case_access_om3_cice0/archive/output000/access-om3.cice.1day.mean.2010-10.nc'

src/netCDF4/_netCDF4.pyx:2158: OSError

During handling of the above exception, another exception occurred:

hist_dir = 'Default', ndays = 365, use_dir = False, nmonths = 12
hist_base = 'access-om3.cice.1day.mean'
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/test_true_case_access_om3_cice0')

    @pytest.mark.parametrize(
        "hist_dir, ndays, use_dir, nmonths",
        [
            ("Default", 365, False, 12),
            ("archive/output999", 31, False, 1),
            ("archive/output9999", 31, False, 1),
            ("archive/output574", 365, True, 12),
        ],
    )  # run this test with a several folder names and lengths, provide the directory as an argument sometimes
    def test_true_case(hist_dir, ndays, use_dir, nmonths, hist_base, tmp_path):
        """
        Run the script to convert the daily data into monthly files, and check the monthly files and the daily files dont exist.
        """
    
        daily_paths = dummy_files(hist_dir, hist_base, ndays, tmp_path)
        chdir(tmp_path)
        output_dir = Path(daily_paths[0]).parents[0]
    
        if not use_dir:  # default path
>           concat_ice_daily(assume_gadi=False)

/home/runner/work/om3-scripts/om3-scripts/test/test_payu_conf/test_concat_ice_daily.py:99: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:243: in concat_ice_daily
    concat.process()
/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:197: in process
    self.cleanup_exit(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <payu_config.postscript.concat_ice_daily.Concat_Ice_Daily object at 0x7fab85120d40>
error_msg = 'concat_ice_daily: dask compute of saving monthly output failed'
delete_monthf = True

    def cleanup_exit(self, error_msg, delete_monthf=False):
        for file in self.month_f:
            if file.exists() and delete_monthf:
                os.remove(file)
        self.client.close()
    
>       raise Exception(error_msg)
E       Exception: concat_ice_daily: dask compute of saving monthly output failed

/home/runner/work/om3-scripts/om3-scripts/payu_config/postscript/concat_ice_daily.py:150: Exception
------------------------------ Captured log call -------------------------------
INFO     distributed.http.proxy:proxy.py:85 To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
INFO     distributed.scheduler:scheduler.py:1766 State start
INFO     distributed.scheduler:scheduler.py:4282   Scheduler at:     tcp://127.0.0.1:33069
INFO     distributed.scheduler:scheduler.py:4297   dashboard at:  http://127.0.0.1:8787/status
INFO     distributed.scheduler:scheduler.py:8182 Registering Worker plugin shuffle
INFO     distributed.nanny:nanny.py:368         Start Nanny at: 'tcp://127.0.0.1:43359'
INFO     distributed.scheduler:scheduler.py:4635 Register worker addr: tcp://127.0.0.1:33837 name: 0
INFO     distributed.scheduler:scheduler.py:6224 Starting worker compute stream, tcp://127.0.0.1:33837
INFO     distributed.core:core.py:883 Starting established connection to tcp://127.0.0.1:41462
INFO     distributed.scheduler:scheduler.py:5959 Receive client connection: Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO     distributed.core:core.py:883 Starting established connection to tcp://127.0.0.1:41478
INFO     distributed.scheduler:scheduler.py:6004 Remove client Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO     distributed.core:core.py:908 Received 'close-stream' from tcp://127.0.0.1:41478; closing.
INFO     distributed.scheduler:scheduler.py:6004 Remove client Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO     distributed.scheduler:scheduler.py:5996 Close client connection: Client-2c49b429-9363-11f0-8f8f-00224844c946
INFO     distributed.scheduler:scheduler.py:7615 Retire worker addresses (stimulus_id='retire-workers-1758071467.0196784') (0,)
INFO     distributed.nanny:nanny.py:611 Closing Nanny at 'tcp://127.0.0.1:43359'. Reason: nanny-close
INFO     distributed.nanny:nanny.py:858 Nanny asking worker to close. Reason: nanny-close
INFO     distributed.core:core.py:908 Received 'close-stream' from tcp://127.0.0.1:41462; closing.
INFO     distributed.scheduler:scheduler.py:5445 Remove worker addr: tcp://127.0.0.1:33837 name: 0 (stimulus_id='handle-worker-cleanup-1758071467.0370812')
INFO     distributed.scheduler:scheduler.py:5583 Lost all workers
INFO     distributed.nanny:nanny.py:626 Nanny at 'tcp://127.0.0.1:43359' closed.
INFO     distributed.scheduler:scheduler.py:4344 Closing scheduler. Reason: unknown
INFO     distributed.scheduler:scheduler.py:4372 Scheduler closing all comms

Ping @anton-seaice

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions