Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ run:
- tqdm

test:
- dials-data
- dials-data >=2.4.140
- pip
- pytest >6
- pytest-mock
Expand Down
1 change: 1 addition & 0 deletions newsfragments/879.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FormatROD: support TY5 compression, gzip/bzipped files, non-unity gain and binning
17 changes: 10 additions & 7 deletions src/dxtbx/boost_python/compression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,17 +188,20 @@ inline uint16_t read_uint16_from_bytearray(const char *buf) {
return ((unsigned char)buf[0]) | ((unsigned char)buf[1] << 8);
}

void dxtbx::boost_python::rod_TY6_decompress(int *const ret,
const char *const buf_data,
const char *const buf_offsets,
const int slow,
const int fast) {
void dxtbx::boost_python::rod_TY5_TY6_decompress(int *const ret,
const char *const buf_data,
const char *const buf_offsets,
const int slow,
const int fast,
const int mode) {
const size_t BLOCKSIZE = 8; // Codes below assume this is at most 8
const signed int SHORT_OVERFLOW = 127; // after 127 is subtracted
const signed int LONG_OVERFLOW = 128;

const size_t nblock = (fast - 1) / (BLOCKSIZE * 2);
const size_t nrest = (fast - 1) % (BLOCKSIZE * 2);
assert(mode == 5 || mode == 6);

const size_t nblock = (mode == 5) ? 0 : (fast - 1) / (BLOCKSIZE * 2);
const size_t nrest = (mode == 5) ? (fast - 1) : (fast - 1) % (BLOCKSIZE * 2);

for (size_t iy = 0; iy < slow; iy++) {
size_t ipos = read_uint32_from_bytearray(buf_offsets + iy * sizeof(uint32_t));
Expand Down
13 changes: 7 additions & 6 deletions src/dxtbx/boost_python/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
namespace dxtbx { namespace boost_python {
unsigned int cbf_decompress(const char*, std::size_t, int*, const std::size_t);
std::vector<char> cbf_compress(const int*, const std::size_t&);
// Decompress Rigaku Oxford diffractometer TY6 compression
void rod_TY6_decompress(int* const,
const char* const,
const char* const,
const int,
const int);
// Decompress Rigaku Oxford diffractometer TY5/6 compression
void rod_TY5_TY6_decompress(int* const,
const char* const,
const char* const,
const int,
const int,
const int);
}} // namespace dxtbx::boost_python

#endif
21 changes: 11 additions & 10 deletions src/dxtbx/boost_python/ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,20 +233,21 @@ namespace dxtbx { namespace boost_python {
}
}

// Python entry point to decompress Rigaku Oxford Diffractometer TY6 compression
scitbx::af::flex_int uncompress_rod_TY6(const boost::python::object &data,
const boost::python::object &offsets,
const int &slow,
const int &fast) {
// Python entry point to decompress Rigaku Oxford Diffractometer TY5/TY6 compression
scitbx::af::flex_int uncompress_rod_TY5_TY6(const boost::python::object &data,
const boost::python::object &offsets,
const int &slow,
const int &fast,
const int mode) {
// Cannot I extract const char* directly?
std::string str_data = boost::python::extract<std::string>(data);
std::string str_offsets = boost::python::extract<std::string>(offsets);

scitbx::af::flex_int z((scitbx::af::flex_grid<>(slow, fast)),
scitbx::af::init_functor_null<int>());

dxtbx::boost_python::rod_TY6_decompress(
z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast);
dxtbx::boost_python::rod_TY5_TY6_decompress(
z.begin(), str_data.c_str(), str_offsets.c_str(), slow, fast, mode);

return z;
}
Expand Down Expand Up @@ -275,9 +276,9 @@ namespace dxtbx { namespace boost_python {
arg("distance"),
arg("upper_left"),
arg("lower_right")));
def("uncompress_rod_TY6",
&uncompress_rod_TY6,
(arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast")));
def("uncompress_rod_TY5_TY6",
&uncompress_rod_TY5_TY6,
(arg_("data"), arg_("offsets"), arg_("slow"), arg_("fast"), arg_("mode")));
}

BOOST_PYTHON_MODULE(dxtbx_ext) {
Expand Down
82 changes: 67 additions & 15 deletions src/dxtbx/format/FormatROD.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from scitbx.array_family import flex
from scitbx.math import r3_rotation_axis_and_angle_as_matrix

from dxtbx.ext import uncompress_rod_TY6
from dxtbx.ext import uncompress_rod_TY5_TY6
from dxtbx.format.Format import Format
from dxtbx.model.beam import Probe
from dxtbx.model.detector import Detector
Expand Down Expand Up @@ -81,6 +81,10 @@ def _read_ascii_header(image_file):
if len(vers) < 2 or vers[0] != "OD" or vers[1] != "SAPPHIRE":
raise ValueError("Wrong header format")
hd["version"] = float(vers[-1])
if hd["version"] != 4.0:
raise NotImplementedError(
f"FormatROD: only header version 4.0 is supported but got {hd['version']}"
)

compression = lines[1].split("=")
if compression[0] != "COMPRESSION":
Expand Down Expand Up @@ -122,8 +126,11 @@ def _read_binary_header(
)
f.seek(offset + 36)
num_points = struct.unpack("<I", f.read(4))[0]
num_image = struct.unpack("<h", f.read(2))[0]
if num_points != im_npx_x * im_npx_y:
raise ValueError("Cannot interpret binary header")
if num_image != 1:
raise NotImplementedError("num_image != 1 is not supported.")

# Special section
f.seek(offset + general_nbytes + 56)
Expand Down Expand Up @@ -203,6 +210,8 @@ def _start(self):
self._header_dictionary"""

self._txt_header = self._read_ascii_header(self._image_file)
# TODO: the block sizes in the binary header should be taken from the
# ASCII header.
self._bin_header = self._read_binary_header(self._image_file)

self._gonio_start_angles = (
Expand Down Expand Up @@ -329,8 +338,12 @@ def _detector(self):
pixel_size_y = self._bin_header["real_px_size_y"]
origin_at_zero = np.array(
[
-self._bin_header["origin_px_x"] * pixel_size_x,
-self._bin_header["origin_px_y"] * pixel_size_y,
-self._bin_header["origin_px_x"]
* pixel_size_x
/ self._bin_header["bin_x"],
-self._bin_header["origin_px_y"]
* pixel_size_y
/ self._bin_header["bin_y"],
-self._bin_header["distance_mm"],
]
)
Expand All @@ -344,6 +357,7 @@ def _detector(self):
(pixel_size_x, pixel_size_y),
(self._txt_header["NX"], self._txt_header["NY"]),
(0, self._bin_header["overflow_threshold"]),
gain=self._bin_header["gain"],
)

return detector
Expand All @@ -367,40 +381,69 @@ def _scan(self):

def get_raw_data(self):
comp = self._txt_header["compression"].strip()

# The differences between TY5 and TY6 are:
# - TY5 lacks the lbytesincompressedfield field; we must calculate it.
# - TY6 uses fewer-bit compression inside "blocks".
# Outside such blocks, TY5 and TY6 use the same delta compression.
if comp.startswith("TY6"):
return self._get_raw_data_ty6_native()
return self._get_raw_data_ty5_ty6_native(mode=6)
elif comp.startswith("TY5"):
return self._get_raw_data_ty5_ty6_native(mode=5)
else:
raise NotImplementedError(f"Can't handle compression: {comp}")

def _get_raw_data_ty6_native(self):
def _get_raw_data_ty5_ty6_native(self, mode):
offset = self._txt_header["NHEADER"]
nx = self._txt_header["NX"]
ny = self._txt_header["NY"]
with open(self._image_file, "rb") as f:
with FormatROD.open_file(self._image_file, "rb") as f:
f.seek(offset)
lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
if mode == 5:
lbytesincompressedfield = (
nx * ny + 2 * self._txt_header["OI"] + 4 * self._txt_header["OL"]
)
elif mode == 6:
lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
else:
raise NotImplementedError(
"_get_raw_data_ty5_ty6: only mode 5 and 6 are supported."
)

linedata = f.read(lbytesincompressedfield)
offsets = f.read(4 * ny)

return uncompress_rod_TY6(linedata, offsets, ny, nx)
return uncompress_rod_TY5_TY6(linedata, offsets, ny, nx, mode)

# Python implementation
def _get_raw_data_ty6(self):
def _get_raw_data_ty5_ty6(self, mode):
offset = self._txt_header["NHEADER"]
nx = self._txt_header["NX"]
ny = self._txt_header["NY"]
with open(self._image_file, "rb") as f:
with FormatROD.open_file(self._image_file, "rb") as f:
f.seek(offset)
lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
if mode == 5:
lbytesincompressedfield = (
nx * ny + 2 * self._txt_header["OI"] + 4 * self._txt_header["OL"]
)
elif mode == 6:
lbytesincompressedfield = struct.unpack("<l", f.read(4))[0]
else:
raise NotImplementedError(
"_get_raw_data_ty5_ty6: only mode 5 and 6 are supported."
)

linedata = np.fromfile(f, dtype=np.uint8, count=lbytesincompressedfield)
offsets = struct.unpack("<%dI" % ny, f.read(4 * ny))

image = np.zeros((ny, nx), dtype=np.int32)
for iy in range(ny):
image[iy, :] = self.decode_TY6_oneline(linedata[offsets[iy] :], nx)
image[iy, :] = self.decode_TY5_TY6_oneline(
linedata[offsets[iy] :], nx, mode
)
return flex.int(image)

def decode_TY6_oneline(self, linedata, w):
def decode_TY5_TY6_oneline(self, linedata, w, mode):
"""Decompress TY6 encoded pixels for a single line.
w is the number of pixels in the fast axis."""

Expand All @@ -414,8 +457,16 @@ def decode_TY6_oneline(self, linedata, w):
opos = 0
ret = np.zeros(w, dtype=np.int32)

nblock = (w - 1) // (BLOCKSIZE * 2)
nrest = (w - 1) % (BLOCKSIZE * 2)
if mode == 5:
nblock = 0
nrest = w - 1 # 1st px is special
elif mode == 6:
nblock = (w - 1) // (BLOCKSIZE * 2)
nrest = (w - 1) % (BLOCKSIZE * 2)
else:
raise NotImplementedError(
"decode_TY5_TY6_oneline: only mode 5 and 6 are supported."
)

firstpx = linedata[ipos]
ipos += 1
Expand Down Expand Up @@ -619,6 +670,7 @@ def _detector(self):
p = d.add_panel()
p.set_type("SENSOR_PAD")
p.set_name(panel_name)
p.set_gain(self._bin_header["gain"])
p.set_raw_image_offset((xmin, ymin))
p.set_image_size((xmax - xmin, ymax - ymin))
p.set_trusted_range((0, self._bin_header["overflow_threshold"]))
Expand Down
86 changes: 86 additions & 0 deletions tests/format/test_FormatROD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Tests for dxbtx.format.FormatROD format classes."""

from __future__ import annotations

import pytest

from dxtbx.format.FormatROD import FormatROD, FormatROD_Arc
from dxtbx.model.experiment_list import ExperimentListFactory


def test_HyPix6000_TY6(dials_data):
filename = (
dials_data("image_examples") / "Hypix6000-monoclinic_lysozyme1_1_1.rodhypix.bz2"
)

assert FormatROD.understand(filename)
expts = ExperimentListFactory.from_filenames([filename])
assert len(expts) == 1

imageset = expts[0].imageset
assert imageset.get_format_class() == FormatROD

gonio = imageset.get_goniometer()
axes = gonio.get_names()
assert list(axes) == ["PHI", "KAPPA=CHI", "OMEGA"]

detector = imageset.get_detector()
assert len(detector) == 1
assert detector[0].get_gain() == 1.0
assert detector[0].get_origin() == pytest.approx(
(-44.6944, -36.9736, -23.2606), rel=1e-5
)

# TY6 decompression test
img = imageset.get_raw_data(0)[0]
assert img.focus() == (800, 775)
assert img[123, 456] == 3


def test_EosS2_TY5(dials_data):
filename = (
dials_data("image_examples") / "EosS2-pre_SHR248e_CHCl3_pentane_1_1.img.bz2"
)

assert FormatROD.understand(filename)
expts = ExperimentListFactory.from_filenames([filename])
assert len(expts) == 1

imageset = expts[0].imageset
assert imageset.get_format_class() == FormatROD

gonio = imageset.get_goniometer()
axes = gonio.get_names()
assert list(axes) == ["PHI", "KAPPA=CHI", "OMEGA"]

detector = imageset.get_detector()
assert len(detector) == 1
# this CCD image has a non-unity gain
assert detector[0].get_gain() == 100.0
# if binning was not properly taken into account, the origin would break
assert detector[0].get_origin() == pytest.approx(
(-48.2605, -30.8897, -17.5460), rel=1e-5
)

# TY5 decompression test
img = imageset.get_raw_data(0)[0]
assert img.focus() == (512, 512)
assert img[123, 456] == 174


def test_HyPixArc_TY6(dials_data):
# HyPix-Arc is also tested in an end-to-end test `test_rigaku_hypix_arc_150()`
# in dials/tests/algorithms/indexing/test_index.py.

filename = dials_data("Rigaku_HyPix_Arc") / "CHNOS_1_2.rodhypix"

assert FormatROD_Arc.understand(filename)
expts = ExperimentListFactory.from_filenames([filename])
assert len(expts) == 1

imageset = expts[0].imageset
assert imageset.get_format_class() == FormatROD_Arc

# HyPix-Arc consists of three panels
detector = imageset.get_detector()
assert len(detector) == 3