cctbx · dagewa · Feb 25, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 25, 2026
diff --git a/dependencies.yaml b/dependencies.yaml
@@ -83,7 +83,7 @@ run:
   - tqdm
 
 test:
-  - dials-data
+  - dials-data >=2.4.140
   - pip
   - pytest >6
   - pytest-mock

diff --git a/newsfragments/879.feature b/newsfragments/879.feature
@@ -0,0 +1 @@
+FormatROD: support TY5 compression, gzip/bzipped files, non-unity gain and binning
diff --git a/src/dxtbx/boost_python/compression.cc b/src/dxtbx/boost_python/compression.cc
@@ -188,17 +188,20 @@ inline uint16_t read_uint16_from_bytearray(const char *buf) {
   return ((unsigned char)buf[0]) | ((unsigned char)buf[1] << 8);
 }
 
-void dxtbx::boost_python::rod_TY6_decompress(int *const ret,
-                                             const char *const buf_data,
-                                             const char *const buf_offsets,
-                                             const int slow,
-                                             const int fast) {
+void dxtbx::boost_python::rod_TY5_TY6_decompress(int *const ret,
+                                                 const char *const buf_data,
+                                                 const char *const buf_offsets,
+                                                 const int slow,
+                                                 const int fast,
+                                                 const int mode) {
   const size_t BLOCKSIZE = 8;             // Codes below assume this is at most 8
   const signed int SHORT_OVERFLOW = 127;  // after 127 is subtracted
   const signed int LONG_OVERFLOW = 128;
 
-  const size_t nblock = (fast - 1) / (BLOCKSIZE * 2);
-  const size_t nrest = (fast - 1) % (BLOCKSIZE * 2);
+  assert(mode == 5 || mode == 6);
+
+  const size_t nblock = (mode == 5) ? 0 : (fast - 1) / (BLOCKSIZE * 2);
+  const size_t nrest = (mode == 5) ? (fast - 1) : (fast - 1) % (BLOCKSIZE * 2);
 
   for (size_t iy = 0; iy < slow; iy++) {
     size_t ipos = read_uint32_from_bytearray(buf_offsets + iy * sizeof(uint32_t));

diff --git a/src/dxtbx/boost_python/compression.h b/src/dxtbx/boost_python/compression.h
@@ -6,12 +6,13 @@
 namespace dxtbx { namespace boost_python {
   unsigned int cbf_decompress(const char*, std::size_t, int*, const std::size_t);
   std::vector<char> cbf_compress(const int*, const std::size_t&);
-  // Decompress Rigaku Oxford diffractometer TY6 compression
-  void rod_TY6_decompress(int* const,
-                          const char* const,
-                          const char* const,
-                          const int,
-                          const int);
+  // Decompress Rigaku Oxford diffractometer TY5/6 compression
+  void rod_TY5_TY6_decompress(int* const,
+                              const char* const,
+                              const char* const,
+                              const int,
+                              const int,
+                              const int);
 }}  // namespace dxtbx::boost_python
 
 #endif
diff --git a/src/dxtbx/boost_python/ext.cpp b/src/dxtbx/boost_python/ext.cpp
@@ -233,20 +233,21 @@ namespace dxtbx { namespace boost_python {
     }
   }
 
-  // Python entry point to decompress Rigaku Oxford Diffractometer TY6 compression
-  scitbx::af::flex_int uncompress_rod_TY6(const boost::python::object &data,
-                                          const boost::python::object &offsets,
-                                          const int &slow,
-                                          const int &fast) {
+  // Python entry point to decompress Rigaku Oxford Diffractometer TY5/TY6 compression
+  scitbx::af::flex_int uncompress_rod_TY5_TY6(const boost::python::object &data,
+                                              const boost::python::object &offsets,
+                                              const int &slow,
+                                              const int &fast,
+                                              const int mode) {
     // Cannot I extract const char* directly?
     std::string str_data = boost::python::extract<std::string>(data);
     std::string str_offsets = boost::python::extract<std::string>(offsets);
 
     scitbx::af::flex_int z((scitbx::af::flex_grid<>(slow, fast)),
                            scitbx::af::init_functor_null<int>());
 
-    dxtbx::boost_python::rod_TY6_decompress(
-      z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast);
+    dxtbx::boost_python::rod_TY5_TY6_decompress(
+      z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast, mode);
 
     return z;
   }
@@ -275,9 +276,9 @@ namespace dxtbx { namespace boost_python {
          arg("distance"),
          arg("upper_left"),
          arg("lower_right")));
-    def("uncompress_rod_TY6",
-        &uncompress_rod_TY6,
-        (arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast")));
+    def("uncompress_rod_TY5_TY6",
+        &uncompress_rod_TY5_TY6,
+        (arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast"), arg_("mode")));
   }
 
   BOOST_PYTHON_MODULE(dxtbx_ext) {

diff --git a/src/dxtbx/format/FormatROD.py b/src/dxtbx/format/FormatROD.py
@@ -24,7 +24,7 @@
 from scitbx.array_family import flex
 from scitbx.math import r3_rotation_axis_and_angle_as_matrix
 
-from dxtbx.ext import uncompress_rod_TY6
+from dxtbx.ext import uncompress_rod_TY5_TY6
 from dxtbx.format.Format import Format
 from dxtbx.model.beam import Probe
 from dxtbx.model.detector import Detector
@@ -81,6 +81,10 @@ def _read_ascii_header(image_file):
         if len(vers) < 2 or vers[0] != "OD" or vers[1] != "SAPPHIRE":
             raise ValueError("Wrong header format")
         hd["version"] = float(vers[-1])
+        if hd["version"] != 4.0:
+            raise NotImplementedError(
+                f"FormatROD: only header version 4.0 is supported but got {hd['version']}"
+            )
 
         compression = lines[1].split("=")
         if compression[0] != "COMPRESSION":
@@ -122,8 +126,11 @@ def _read_binary_header(
             )
             f.seek(offset + 36)
             num_points = struct.unpack("<I", f.read(4))[0]
+            num_image = struct.unpack("<h", f.read(2))[0]
             if num_points != im_npx_x * im_npx_y:
                 raise ValueError("Cannot interpret binary header")
+            if num_image != 1:
+                raise NotImplementedError("num_image != 1 is not supported.")
 
             # Special section
             f.seek(offset + general_nbytes + 56)
@@ -203,6 +210,8 @@ def _start(self):
         self._header_dictionary"""
 
         self._txt_header = self._read_ascii_header(self._image_file)
+        # TODO: the block sizes in the binary header should be taken from the
+        # ASCII header.
         self._bin_header = self._read_binary_header(self._image_file)
 
         self._gonio_start_angles = (
@@ -329,8 +338,12 @@ def _detector(self):
         pixel_size_y = self._bin_header["real_px_size_y"]
         origin_at_zero = np.array(
             [
-                -self._bin_header["origin_px_x"] * pixel_size_x,
-                -self._bin_header["origin_px_y"] * pixel_size_y,
+                -self._bin_header["origin_px_x"]
+                * pixel_size_x
+                / self._bin_header["bin_x"],
+                -self._bin_header["origin_px_y"]
+                * pixel_size_y
+                / self._bin_header["bin_y"],
                 -self._bin_header["distance_mm"],
             ]
         )
@@ -344,6 +357,7 @@ def _detector(self):
             (pixel_size_x, pixel_size_y),
             (self._txt_header["NX"], self._txt_header["NY"]),
             (0, self._bin_header["overflow_threshold"]),
+            gain=self._bin_header["gain"],
         )
 
         return detector
@@ -367,40 +381,69 @@ def _scan(self):
 
     def get_raw_data(self):
         comp = self._txt_header["compression"].strip()
+
+        # The differences between TY5 and TY6 are:
+        # - TY5 lacks the lbytesincompressedfield field; we must calculate it.
+        # - TY6 uses fewer-bit compression inside "blocks".
+        #   Outside such blocks, TY5 and TY6 use the same delta compression.
         if comp.startswith("TY6"):
-            return self._get_raw_data_ty6_native()
+            return self._get_raw_data_ty5_ty6_native(mode=6)
+        elif comp.startswith("TY5"):
+            return self._get_raw_data_ty5_ty6_native(mode=5)
         else:
             raise NotImplementedError(f"Can't handle compression: {comp}")
 
-    def _get_raw_data_ty6_native(self):
+    def _get_raw_data_ty5_ty6_native(self, mode):
         offset = self._txt_header["NHEADER"]
         nx = self._txt_header["NX"]
         ny = self._txt_header["NY"]
-        with open(self._image_file, "rb") as f:
+        with FormatROD.open_file(self._image_file, "rb") as f:
             f.seek(offset)
-            lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
+            if mode == 5:
+                lbytesincompressedfield = (
+                    nx * ny + 2 * self._txt_header["OI"] + 4 * self._txt_header["OL"]
+                )
+            elif mode == 6:
+                lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
+            else:
+                raise NotImplementedError(
+                    "_get_raw_data_ty5_ty6: only mode 5 and 6 are supported."
+                )
+
             linedata = f.read(lbytesincompressedfield)
             offsets = f.read(4 * ny)
 
-            return uncompress_rod_TY6(linedata, offsets, ny, nx)
+            return uncompress_rod_TY5_TY6(linedata, offsets, ny, nx, mode)
 
     # Python implementation
-    def _get_raw_data_ty6(self):
+    def _get_raw_data_ty5_ty6(self, mode):
         offset = self._txt_header["NHEADER"]
         nx = self._txt_header["NX"]
         ny = self._txt_header["NY"]
-        with open(self._image_file, "rb") as f:
+        with FormatROD.open_file(self._image_file, "rb") as f:
             f.seek(offset)
-            lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
+            if mode == 5:
+                lbytesincompressedfield = (
+                    nx * ny + 2 * self._txt_header["OI"] + 4 * self._txt_header["OL"]
+                )
+            elif mode == 6:
+                lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
+            else:
+                raise NotImplementedError(
+                    "_get_raw_data_ty5_ty6: only mode 5 and 6 are supported."
+                )
+
             linedata = np.fromfile(f, dtype=np.uint8, count=lbytesincompressedfield)
             offsets = struct.unpack("<%dI" % ny, f.read(4 * ny))
 
             image = np.zeros((ny, nx), dtype=np.int32)
             for iy in range(ny):
-                image[iy, :] = self.decode_TY6_oneline(linedata[offsets[iy] :], nx)
+                image[iy, :] = self.decode_TY5_TY6_oneline(
+                    linedata[offsets[iy] :], nx, mode
+                )
             return flex.int(image)
 
-    def decode_TY6_oneline(self, linedata, w):
+    def decode_TY5_TY6_oneline(self, linedata, w, mode):
         """Decompress TY6 encoded pixels for a single line.
         w is the number of pixels in the fast axis."""
 
@@ -414,8 +457,16 @@ def decode_TY6_oneline(self, linedata, w):
         opos = 0
         ret = np.zeros(w, dtype=np.int32)
 
-        nblock = (w - 1) // (BLOCKSIZE * 2)
-        nrest = (w - 1) % (BLOCKSIZE * 2)
+        if mode == 5:
+            nblock = 0
+            nrest = w - 1  # 1st px is special
+        elif mode == 6:
+            nblock = (w - 1) // (BLOCKSIZE * 2)
+            nrest = (w - 1) % (BLOCKSIZE * 2)
+        else:
+            raise NotImplementedError(
+                "decode_TY5_TY6_oneline: only mode 5 and 6 are supported."
+            )
 
         firstpx = linedata[ipos]
         ipos += 1
@@ -619,6 +670,7 @@ def _detector(self):
             p = d.add_panel()
             p.set_type("SENSOR_PAD")
             p.set_name(panel_name)
+            p.set_gain(self._bin_header["gain"])
             p.set_raw_image_offset((xmin, ymin))
             p.set_image_size((xmax - xmin, ymax - ymin))
             p.set_trusted_range((0, self._bin_header["overflow_threshold"]))

diff --git a/tests/format/test_FormatROD.py b/tests/format/test_FormatROD.py
@@ -0,0 +1,86 @@
+"""Tests for dxbtx.format.FormatROD format classes."""
+
+from __future__ import annotations
+
+import pytest
+
+from dxtbx.format.FormatROD import FormatROD, FormatROD_Arc
+from dxtbx.model.experiment_list import ExperimentListFactory
+
+
+def test_HyPix6000_TY6(dials_data):
+    filename = (
+        dials_data("image_examples") / "Hypix6000-monoclinic_lysozyme1_1_1.rodhypix.bz2"
+    )
+
+    assert FormatROD.understand(filename)
+    expts = ExperimentListFactory.from_filenames([filename])
+    assert len(expts) == 1
+
+    imageset = expts[0].imageset
+    assert imageset.get_format_class() == FormatROD
+
+    gonio = imageset.get_goniometer()
+    axes = gonio.get_names()
+    assert list(axes) == ["PHI", "KAPPA=CHI", "OMEGA"]
+
+    detector = imageset.get_detector()
+    assert len(detector) == 1
+    assert detector[0].get_gain() == 1.0
+    assert detector[0].get_origin() == pytest.approx(
+        (-44.6944, -36.9736, -23.2606), rel=1e-5
+    )
+
+    # TY6 decompression test
+    img = imageset.get_raw_data(0)[0]
+    assert img.focus() == (800, 775)
+    assert img[123, 456] == 3
+
+
+def test_EosS2_TY5(dials_data):
+    filename = (
+        dials_data("image_examples") / "EosS2-pre_SHR248e_CHCl3_pentane_1_1.img.bz2"
+    )
+
+    assert FormatROD.understand(filename)
+    expts = ExperimentListFactory.from_filenames([filename])
+    assert len(expts) == 1
+
+    imageset = expts[0].imageset
+    assert imageset.get_format_class() == FormatROD
+
+    gonio = imageset.get_goniometer()
+    axes = gonio.get_names()
+    assert list(axes) == ["PHI", "KAPPA=CHI", "OMEGA"]
+
+    detector = imageset.get_detector()
+    assert len(detector) == 1
+    # this CCD image has a non-unity gain
+    assert detector[0].get_gain() == 100.0
+    # if binning was not properly taken into account, the origin would break
+    assert detector[0].get_origin() == pytest.approx(
+        (-48.2605, -30.8897, -17.5460), rel=1e-5
+    )
+
+    # TY5 decompression test
+    img = imageset.get_raw_data(0)[0]
+    assert img.focus() == (512, 512)
+    assert img[123, 456] == 174
+
+
+def test_HyPixArc_TY6(dials_data):
+    # HyPix-Arc is also tested in an end-to-end test `test_rigaku_hypix_arc_150()`
+    # in dials/tests/algorithms/indexing/test_index.py.
+
+    filename = dials_data("Rigaku_HyPix_Arc") / "CHNOS_1_2.rodhypix"
+
+    assert FormatROD_Arc.understand(filename)
+    expts = ExperimentListFactory.from_filenames([filename])
+    assert len(expts) == 1
+
+    imageset = expts[0].imageset
+    assert imageset.get_format_class() == FormatROD_Arc
+
+    # HyPix-Arc consists of three panels
+    detector = imageset.get_detector()
+    assert len(detector) == 3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		FormatROD: support TY5 compression, gzip/bzipped files, non-unity gain and binning