Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow input image files to be numbered '0' #42

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions newsfragments/42.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Support input files numbered zero, such as ``filename_00000.cbf``, ``filename_#####.cbf:0:99``, or similar.
Previously, passing a filename numbered zero would cause screen19 to exit with an error.
29 changes: 18 additions & 11 deletions screen19/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def prettyprint_procrunner(d):
)


def make_template(f):
def make_template(f): # type: (str) -> (str, int)
"""
Generate a xia2-style filename template.

Expand All @@ -104,25 +104,32 @@ def make_template(f):
before the file extension.
For example, the filename example_01_0001.cbf becomes example_01_####.cbf.

If the input data are in a single file and its name doesn't match the above
template, the resulting filename template is simply the input file name and the
resultant image number is None.
This might be the case for input data in NeXus NXmx HDF5 format, for example,
where one typically passes a file with a name like example_master.h5.

:param f: Filename, with extension.
:type f: str
:return: Filename template, with extension; image number.
:rtype: Tuple(str, int)
"""
# Split the file from its path
directory, f = os.path.split(f)
# Split off the file extension, assuming it begins at the first full stop,
# also split the last contiguous group of digits off the filename root
parts = re.split(r"([0-9#]+)(?=\.\w)", f, 1)
# Get the number of digits in the group we just isolated and their value
try:
# Combine the root, a hash for each digit and the extension
length = len(parts[1])
template = parts[0] + "#" * length + parts[2]
image = int(parts[1].replace("#", "0"))
except IndexError:
template = parts[0]
root, number, extension = re.split(r"([0-9#]+)(?=\.\w)", f, 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

et seq.

I am pretty sure general logic for this kind of thing already lives inside dxtbx somewhere which would support a more general range of filenames?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly, I'm afraid I wouldn't know where to look.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, dxtbx.model.scan_helpers.template_regex, I think this might do the trick!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

# Catch the case where the input data file name doesn't match the numbered pattern.
except ValueError:
template = f
benjaminhwilliams marked this conversation as resolved.
Show resolved Hide resolved
image = None
else:
# Get the number of digits in the group we just isolated and their value
length = len(number)
image = int(number.replace("#", "0"))
# Combine the root, a hash for each digit and the extension
template = root + "#" * length + extension

return os.path.join(directory, template), image


Expand Down
13 changes: 9 additions & 4 deletions screen19/screen.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@
import time
import timeit
from glob import glob
from typing import Dict, List, Optional, Sequence, Tuple
from typing import Dict, List, Optional, Sequence, Tuple, Union

import procrunner
import py.path
from six.moves.cPickle import PickleError

import iotbx.phil
Expand Down Expand Up @@ -94,6 +95,7 @@
import screen19
from screen19.minimum_exposure import suggest_minimum_exposure

ImportType = List[Union[str, py.path.local]]
Templates = List[Tuple[str, Tuple[int, int]]]

phil_scope = iotbx.phil.parse(
Expand Down Expand Up @@ -291,7 +293,7 @@ def __init__(self):
# iotbx.phil.parse. Confused? Blame PHIL.
self.params = phil_scope.fetch(iotbx.phil.parse("")).extract()

def _quick_import(self, files): # type: (List[str]) -> bool
def _quick_import(self, files): # type: (ImportType) -> bool
"""
Generate xia2-style templates from file names and attempt a quick import.

Expand Down Expand Up @@ -322,7 +324,7 @@ def _quick_import(self, files): # type: (List[str]) -> bool
for f in files:
template, image = screen19.make_template(f)
if template not in templates:
image_range = [image, image] if image else []
image_range = [image, image] if image is not None else []
templates.update({template: [image_range]})
elif image == templates[template][-1][-1] + 1:
templates[template][-1][-1] = image
Expand Down Expand Up @@ -374,7 +376,7 @@ def _quick_import_templates(self, templates): # type: (Templates) -> bool

return True

def _import(self, files): # type: (List[str]) -> None
def _import(self, files): # type: (ImportType) -> None
"""
Try to run a quick call of dials.import. Failing that, run a slow call.

Expand All @@ -384,6 +386,9 @@ def _import(self, files): # type: (List[str]) -> None
Args:
files: List of image filenames.
"""
# Convert py.path.local objects to strings.
files = [str(file) for file in files]

info("\nImporting data...")
if len(files) == 1:
if os.path.isdir(files[0]):
Expand Down
93 changes: 65 additions & 28 deletions tests/test_screen19.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,18 @@

from __future__ import absolute_import, division, print_function

import shutil

import pytest

from screen19 import minimum_exposure
from screen19.screen import Screen19


def image(n):
return "x3_1_{:04d}.cbf.gz".format(n)


# A list of tuples of example sys.argv[1:] cases and associated image count.
import_checks = [
([""], 900),
Expand All @@ -15,19 +22,45 @@
(["x3_1_####.cbf.gz:1:99"], 99),
(["x3_1_00##.cbf.gz:1:99"], 99),
(["x3_1_0001.cbf.gz:1:99"], 99),
(
[
"x3_1_0001.cbf.gz",
"x3_1_0002.cbf.gz",
"x3_1_0003.cbf.gz",
"x3_1_0004.cbf.gz",
"x3_1_0005.cbf.gz",
],
5,
),
([image(i + 1) for i in range(5)], 5),
]

# A list of tuples of example sys.argv[1:] cases, with files numbered from zero, and
# associated image count.
import_checks_zero = [
(["x3_1_000*.cbf.gz"], 10),
(["x3_1_####.cbf.gz:0:9"], 10),
(["x3_1_00##.cbf.gz:0:9"], 10),
(["x3_1_0000.cbf.gz:0:9"], 10),
([image(i) for i in range(5)], 5),
]


def import_data(data, image_count): # type: (str, int) -> None
"""
Generate and verify an imageset from spoof command-line input.

Check that importing data according to an input string corresponding to a single
contiguous image range results in a single imageset containing the correct number
of images.

Args:
data: Valid input, such as "x3_1_####.cbf.gz", "x3_1_0001.cbf.gz:1:99", etc.
image_count: Number of images matching the input.

Raises:
AssertionError: Either if more than one imageset is created or if the
imageset contains the wrong number of files.
"""
screen = Screen19()
screen._import(data)

# Check that the import has resulted in the creation of a single experiment.
assert len(screen.expts) == 1
# Check that the associated imageset has the expected number of images.
assert screen.expts[0].imageset.size() == image_count


def test_screen19_command_line_help_does_not_crash():
Screen19().run([])

Expand All @@ -36,32 +69,36 @@ def test_minimum_exposure_help_does_not_crash():
minimum_exposure.run(args=[])


@pytest.mark.parametrize("import_checks", import_checks)
def test_screen19_inputs(dials_data, tmpdir, import_checks):
@pytest.mark.parametrize("import_check", import_checks)
def test_screen19_inputs(dials_data, tmpdir, import_check):
"""Test various valid input argument styles"""
data_files, image_count = import_checks
data = [
dials_data("small_molecule_example").join(filename).strpath
for filename in data_files
]
data_files, image_count = import_check
data = [dials_data("small_molecule_example") / filename for filename in data_files]

foo = Screen19()
# The tmpdir should only be necessary for DIALS v1 — no output expected for DIALS v2
foo._import(data)
import_data(data, image_count)

# Check that the import has resulted in the creation of a single experiment.
assert len(foo.expts) == 1
# Check that the associated imageset has the expected number of images.
assert foo.expts[0].imageset.size() == image_count

@pytest.mark.parametrize("import_check_zero", import_checks_zero)
def test_screen19_inputs_zero(dials_data, tmpdir, import_check_zero):
"""Test various valid input argument styles with filenames numbered from zero."""
data_files, image_count = import_check_zero
with tmpdir.as_cwd():
# Copy x3_1_0001.cbf.gz to <tmpdir>/x3_1_0000.cbf.gz, etc.
for i in range(10):
shutil.copy(dials_data("small_molecule_example") / image(i + 1), image(i))

data = [tmpdir / filename for filename in data_files]

import_data(data, image_count)


def test_screen19(dials_data, tmpdir):
"""An integration test. Check the full functionality of screen19."""
data_dir = dials_data("x4wide").join("X4_wide_M1S4_2_####.cbf:1:30").strpath
data_dir = dials_data("x4wide") / "X4_wide_M1S4_2_####.cbf:1:30"

# Test screen19 first.
with tmpdir.as_cwd():
Screen19().run([data_dir], set_up_logging=True)
Screen19().run([data_dir.strpath], set_up_logging=True)

logfile = tmpdir.join("screen19.log").read()

Expand All @@ -83,10 +120,10 @@ def test_screen19(dials_data, tmpdir):


def test_screen19_single_frame(dials_data, tmpdir):
image = dials_data("x4wide").join("X4_wide_M1S4_2_0001.cbf").strpath
single_image = dials_data("x4wide") / "X4_wide_M1S4_2_0001.cbf"

with tmpdir.as_cwd():
Screen19().run([image], set_up_logging=True)
Screen19().run([single_image.strpath], set_up_logging=True)

logfile = tmpdir.join("screen19.log").read()

Expand Down