diff --git a/newsfragments/42.bugfix b/newsfragments/42.bugfix new file mode 100644 index 0000000..29bf82f --- /dev/null +++ b/newsfragments/42.bugfix @@ -0,0 +1,2 @@ +Support input files numbered zero, such as ``filename_00000.cbf``, ``filename_#####.cbf:0:99``, or similar. +Previously, passing a filename numbered zero would cause screen19 to exit with an error. \ No newline at end of file diff --git a/screen19/__init__.py b/screen19/__init__.py index fcfefad..d798a8b 100644 --- a/screen19/__init__.py +++ b/screen19/__init__.py @@ -95,7 +95,7 @@ def prettyprint_procrunner(d): ) -def make_template(f): +def make_template(f): # type: (str) -> (str, int) """ Generate a xia2-style filename template. @@ -104,25 +104,32 @@ def make_template(f): before the file extension. For example, the filename example_01_0001.cbf becomes example_01_####.cbf. + If the input data are in a single file and its name doesn't match the above + template, the resulting filename template is simply the input file name and the + resultant image number is None. + This might be the case for input data in NeXus NXmx HDF5 format, for example, + where one typically passes a file with a name like example_master.h5. + :param f: Filename, with extension. - :type f: str :return: Filename template, with extension; image number. - :rtype: Tuple(str, int) """ # Split the file from its path directory, f = os.path.split(f) # Split off the file extension, assuming it begins at the first full stop, # also split the last contiguous group of digits off the filename root - parts = re.split(r"([0-9#]+)(?=\.\w)", f, 1) - # Get the number of digits in the group we just isolated and their value try: - # Combine the root, a hash for each digit and the extension - length = len(parts[1]) - template = parts[0] + "#" * length + parts[2] - image = int(parts[1].replace("#", "0")) - except IndexError: - template = parts[0] + root, number, extension = re.split(r"([0-9#]+)(?=\.\w)", f, 1) + # Catch the case where the input data file name doesn't match the numbered pattern. + except ValueError: + template = f image = None + else: + # Get the number of digits in the group we just isolated and their value + length = len(number) + image = int(number.replace("#", "0")) + # Combine the root, a hash for each digit and the extension + template = root + "#" * length + extension + return os.path.join(directory, template), image diff --git a/screen19/screen.py b/screen19/screen.py index fabc572..a806aba 100644 --- a/screen19/screen.py +++ b/screen19/screen.py @@ -53,9 +53,10 @@ import time import timeit from glob import glob -from typing import Dict, List, Optional, Sequence, Tuple +from typing import Dict, List, Optional, Sequence, Tuple, Union import procrunner +import py.path from six.moves.cPickle import PickleError import iotbx.phil @@ -94,6 +95,7 @@ import screen19 from screen19.minimum_exposure import suggest_minimum_exposure +ImportType = List[Union[str, py.path.local]] Templates = List[Tuple[str, Tuple[int, int]]] phil_scope = iotbx.phil.parse( @@ -291,7 +293,7 @@ def __init__(self): # iotbx.phil.parse. Confused? Blame PHIL. self.params = phil_scope.fetch(iotbx.phil.parse("")).extract() - def _quick_import(self, files): # type: (List[str]) -> bool + def _quick_import(self, files): # type: (ImportType) -> bool """ Generate xia2-style templates from file names and attempt a quick import. @@ -322,7 +324,7 @@ def _quick_import(self, files): # type: (List[str]) -> bool for f in files: template, image = screen19.make_template(f) if template not in templates: - image_range = [image, image] if image else [] + image_range = [image, image] if image is not None else [] templates.update({template: [image_range]}) elif image == templates[template][-1][-1] + 1: templates[template][-1][-1] = image @@ -374,7 +376,7 @@ def _quick_import_templates(self, templates): # type: (Templates) -> bool return True - def _import(self, files): # type: (List[str]) -> None + def _import(self, files): # type: (ImportType) -> None """ Try to run a quick call of dials.import. Failing that, run a slow call. @@ -384,6 +386,9 @@ def _import(self, files): # type: (List[str]) -> None Args: files: List of image filenames. """ + # Convert py.path.local objects to strings. + files = [str(file) for file in files] + info("\nImporting data...") if len(files) == 1: if os.path.isdir(files[0]): diff --git a/tests/test_screen19.py b/tests/test_screen19.py index 8604235..bda9f84 100644 --- a/tests/test_screen19.py +++ b/tests/test_screen19.py @@ -2,11 +2,18 @@ from __future__ import absolute_import, division, print_function +import shutil + import pytest from screen19 import minimum_exposure from screen19.screen import Screen19 + +def image(n): + return "x3_1_{:04d}.cbf.gz".format(n) + + # A list of tuples of example sys.argv[1:] cases and associated image count. import_checks = [ ([""], 900), @@ -15,19 +22,45 @@ (["x3_1_####.cbf.gz:1:99"], 99), (["x3_1_00##.cbf.gz:1:99"], 99), (["x3_1_0001.cbf.gz:1:99"], 99), - ( - [ - "x3_1_0001.cbf.gz", - "x3_1_0002.cbf.gz", - "x3_1_0003.cbf.gz", - "x3_1_0004.cbf.gz", - "x3_1_0005.cbf.gz", - ], - 5, - ), + ([image(i + 1) for i in range(5)], 5), +] + +# A list of tuples of example sys.argv[1:] cases, with files numbered from zero, and +# associated image count. +import_checks_zero = [ + (["x3_1_000*.cbf.gz"], 10), + (["x3_1_####.cbf.gz:0:9"], 10), + (["x3_1_00##.cbf.gz:0:9"], 10), + (["x3_1_0000.cbf.gz:0:9"], 10), + ([image(i) for i in range(5)], 5), ] +def import_data(data, image_count): # type: (str, int) -> None + """ + Generate and verify an imageset from spoof command-line input. + + Check that importing data according to an input string corresponding to a single + contiguous image range results in a single imageset containing the correct number + of images. + + Args: + data: Valid input, such as "x3_1_####.cbf.gz", "x3_1_0001.cbf.gz:1:99", etc. + image_count: Number of images matching the input. + + Raises: + AssertionError: Either if more than one imageset is created or if the + imageset contains the wrong number of files. + """ + screen = Screen19() + screen._import(data) + + # Check that the import has resulted in the creation of a single experiment. + assert len(screen.expts) == 1 + # Check that the associated imageset has the expected number of images. + assert screen.expts[0].imageset.size() == image_count + + def test_screen19_command_line_help_does_not_crash(): Screen19().run([]) @@ -36,32 +69,36 @@ def test_minimum_exposure_help_does_not_crash(): minimum_exposure.run(args=[]) -@pytest.mark.parametrize("import_checks", import_checks) -def test_screen19_inputs(dials_data, tmpdir, import_checks): +@pytest.mark.parametrize("import_check", import_checks) +def test_screen19_inputs(dials_data, tmpdir, import_check): """Test various valid input argument styles""" - data_files, image_count = import_checks - data = [ - dials_data("small_molecule_example").join(filename).strpath - for filename in data_files - ] + data_files, image_count = import_check + data = [dials_data("small_molecule_example") / filename for filename in data_files] - foo = Screen19() - # The tmpdir should only be necessary for DIALS v1 — no output expected for DIALS v2 - foo._import(data) + import_data(data, image_count) - # Check that the import has resulted in the creation of a single experiment. - assert len(foo.expts) == 1 - # Check that the associated imageset has the expected number of images. - assert foo.expts[0].imageset.size() == image_count + +@pytest.mark.parametrize("import_check_zero", import_checks_zero) +def test_screen19_inputs_zero(dials_data, tmpdir, import_check_zero): + """Test various valid input argument styles with filenames numbered from zero.""" + data_files, image_count = import_check_zero + with tmpdir.as_cwd(): + # Copy x3_1_0001.cbf.gz to /x3_1_0000.cbf.gz, etc. + for i in range(10): + shutil.copy(dials_data("small_molecule_example") / image(i + 1), image(i)) + + data = [tmpdir / filename for filename in data_files] + + import_data(data, image_count) def test_screen19(dials_data, tmpdir): """An integration test. Check the full functionality of screen19.""" - data_dir = dials_data("x4wide").join("X4_wide_M1S4_2_####.cbf:1:30").strpath + data_dir = dials_data("x4wide") / "X4_wide_M1S4_2_####.cbf:1:30" # Test screen19 first. with tmpdir.as_cwd(): - Screen19().run([data_dir], set_up_logging=True) + Screen19().run([data_dir.strpath], set_up_logging=True) logfile = tmpdir.join("screen19.log").read() @@ -83,10 +120,10 @@ def test_screen19(dials_data, tmpdir): def test_screen19_single_frame(dials_data, tmpdir): - image = dials_data("x4wide").join("X4_wide_M1S4_2_0001.cbf").strpath + single_image = dials_data("x4wide") / "X4_wide_M1S4_2_0001.cbf" with tmpdir.as_cwd(): - Screen19().run([image], set_up_logging=True) + Screen19().run([single_image.strpath], set_up_logging=True) logfile = tmpdir.join("screen19.log").read()