diff --git a/docs/quickstart.md b/docs/quickstart.md index e3257e7..123124d 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -39,10 +39,10 @@ At this point, you should be good to go! Please [file an issue on github](https: ## Input files To run `matchmaps`, you will need: - - one `.pdb` file containing a refined structural model corresponding to your "off" data. - - two `.mtz` files corresponding to your "on" and "off" data respectively. + - one `.pdb` (or `.cif`) file containing a refined structural model corresponding to your "off" data. + - two `.mtz` (or `.cif`) files corresponding to your "on" and "off" data respectively. -You will also need to know the names of the columns in these `mtz`s containing your observed structure factor amplitudes and uncertainties. Depending on what software you used to produce these files, this may be something like `FP`/`SIGFP`, `Fobs`/`SIGFobs`, or similar. If you don't know these off-hand, you can figure it out using [`reciprocalspaceship`](https://rs-station.github.io/reciprocalspaceship/)'s `rs.mtzdump` utility, which is installed along with `matchmaps`. You can do this right in the command-line as: +You will also need to know the names of the columns in these `mtz`/`cif`s containing your observed structure factor amplitudes and uncertainties. Depending on what software you used to produce these files, this may be something like `FP`/`SIGFP`, `Fobs`/`SIGFobs`, or similar. If you don't know these off-hand and your input is an `.mtz` file, you can figure it out using [`reciprocalspaceship`](https://rs-station.github.io/reciprocalspaceship/)'s `rs.mtzdump` utility, which is installed along with `matchmaps`. You can do this right in the command-line as: ```bash rs.mtzdump mymtz.mtz ``` diff --git a/pyproject.toml b/pyproject.toml index 301252d..d1c9485 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dynamic = ["version"] dependencies = [ "numpy", "tqdm", - "reciprocalspaceship", + "reciprocalspaceship>=1.0.1", "rs-booster>=0.0.1", "gemmi" ] diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index 06ff21a..2cd6ae5 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -23,6 +23,7 @@ phaser_wrapper, _clean_up_files, _cif_or_pdb_to_pdb, + _cif_or_mtz_to_mtz, ) @@ -94,17 +95,16 @@ def compute_mr_difference_map( output_dir_contents = list(output_dir.glob("*")) - off_name = mtzoff.name.removesuffix(".mtz") - on_name = mtzon.name.removesuffix(".mtz") - pdboff = _cif_or_pdb_to_pdb(pdboff, output_dir) + mtzoff, off_name = _cif_or_mtz_to_mtz(mtzoff, output_dir) + mtzon, on_name = _cif_or_mtz_to_mtz(mtzon, output_dir) + # take in the list of rbr selections and parse them into phenix and gemmi selection formats # if rbr_groups = None, just returns (None, None) rbr_phenix, rbr_gemmi = _rbr_selection_parser(rbr_selections) # this is where scaling takes place in the usual pipeline, but that doesn't make sense with different-spacegroup inputs - # side note: I need to test the importance of scaling even in the normal case!! Might be more artifact than good, who knows pdboff = _handle_special_positions(pdboff, output_dir) @@ -133,8 +133,6 @@ def compute_mr_difference_map( output_dir=output_dir, ) - # the refinement process *should* be identical. Waters are gone already - # I just need to make sure that the phaser outputs go together print(f"{time.strftime('%H:%M:%S')}: Running phenix.refine for the 'on' data...") nickname_on = rigid_body_refinement_wrapper( @@ -166,7 +164,6 @@ def compute_mr_difference_map( ) # from here down I just copied over the stuff from the normal version - # this should be proofread for compatibility but should all work # read back in the files created by phenix # these have knowable names @@ -255,7 +252,7 @@ def parse_arguments(): metavar=("mtzfileoff", "Foff", "SigFoff"), required=True, help=( - "MTZ containing off/apo/ground/dark state data. " + "MTZ or sfCIF containing off/apo/ground/dark state data. " "Specified as [filename F SigF]" ), ) @@ -267,7 +264,7 @@ def parse_arguments(): metavar=("mtzfileon", "Fon", "SigFon"), required=True, help=( - "MTZ containing on/bound/excited/bright state data. " + "MTZ or SFCIF containing on/bound/excited/bright state data. " "Specified as [filename F SigF]" "This file may be in a different spacegroup / crystal packing than mtzoff" ), @@ -278,7 +275,7 @@ def parse_arguments(): "-p", required=True, help=( - "Reference pdb/cif corresponding to the off/apo/ground/dark state. " + "Reference PDB or mmCIF corresponding to the off/apo/ground/dark state. " "Used as a molecular replacement solution for mtzon and for rigid-body refinement of both input MTZs to generate phases." "Should match mtzoff well enough that molecular replacement is not necessary." ), @@ -298,7 +295,7 @@ def parse_arguments(): "-i", required=False, default="./", - help="Path to input mtzs and pdb. Optional, defaults to './' (current directory)", + help="Path to input files. Optional, defaults to './' (current directory)", ) parser.add_argument( diff --git a/src/matchmaps/_compute_ncs_diff.py b/src/matchmaps/_compute_ncs_diff.py index d2cff72..1e80b4e 100644 --- a/src/matchmaps/_compute_ncs_diff.py +++ b/src/matchmaps/_compute_ncs_diff.py @@ -24,6 +24,7 @@ _validate_inputs, _clean_up_files, _cif_or_pdb_to_pdb, + _cif_or_mtz_to_mtz, ) @@ -51,6 +52,8 @@ def compute_ncs_difference_map( pdb = _cif_or_pdb_to_pdb(pdb, output_dir) + mtz, _ = _cif_or_mtz_to_mtz(mtz, output_dir) + rbr_phenix, rbr_gemmi = _rbr_selection_parser(ncs_chains) if Phi is None: # do rigid-body refinement to get phases @@ -109,7 +112,7 @@ def compute_ncs_difference_map( ) print(f"{time.strftime('%H:%M:%S')}: Cleaning up files...") - print(keep_temp_files) + _clean_up_files(output_dir, output_dir_contents, keep_temp_files) print(f"{time.strftime('%H:%M:%S')}: Done!") @@ -136,7 +139,7 @@ def parse_arguments(): # metavar=("mtzfile", "F", "SigF"), required=True, help=( - "MTZ file containing structure factor amplitudes. " + "MTZ or sfCIF file containing structure factor amplitudes. " "Specified as [filename F SigF] or [filename F]. " "SigF is not necessary if phases are also provided" ), @@ -147,7 +150,7 @@ def parse_arguments(): required=False, default=None, help=( - "Optional. Column in MTZ file containing phases. " + "Optional. Column in MTZ/sfCIF file containing phases. " "If phases are not provided, phases will be computed via rigid-body refinement of " "the provided model and structure factor amplitudes." ), @@ -158,8 +161,8 @@ def parse_arguments(): "-p", required=True, help=( - "Reference pdb/cif. " - "If phases are not provided, used for rigid-body refinement of input MTZ to generate phases." + "Reference PDB or mmCIF. " + "If phases are not provided, used for rigid-body refinement of input MTZ/sfCIF to generate phases." ), ) @@ -197,7 +200,7 @@ def parse_arguments(): "-i", required=False, default="./", - help="Path to input mtz and pdb. Optional, defaults to './' (current directory)", + help="Path to input files. Optional, defaults to './' (current directory)", ) parser.add_argument( diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index 6f31dd0..b140729 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -22,7 +22,7 @@ _clean_up_files, _validate_environment, _validate_inputs, - #_cif_or_mtz_to_mtz, + _cif_or_mtz_to_mtz, _cif_or_pdb_to_pdb, ) @@ -94,13 +94,10 @@ def compute_realspace_difference_map( output_dir_contents = list(output_dir.glob("*")) - off_name = mtzoff.name.removesuffix(".mtz") - on_name = mtzon.name.removesuffix(".mtz") - pdboff = _cif_or_pdb_to_pdb(pdboff, output_dir) - # off_name = _cif_or_mtz_to_mtz(mtzoff) - # on_name = _cif_or_mtz_to_mtz(mtzon) + mtzoff, off_name = _cif_or_mtz_to_mtz(mtzoff, output_dir) + mtzon, on_name = _cif_or_mtz_to_mtz(mtzon, output_dir) # take in the list of rbr selections and parse them into phenix and gemmi selection formats # if rbr_groups = None, just returns (None, None) @@ -262,7 +259,7 @@ def parse_arguments(): metavar=("mtzfileoff", "Foff", "SigFoff"), required=True, help=( - "MTZ containing off/apo/ground/dark state data. " + "MTZ or sfCIF containing off/apo/ground/dark state data. " "Specified as [filename F SigF]" ), ) @@ -274,7 +271,7 @@ def parse_arguments(): metavar=("mtzfileon", "Fon", "SigFon"), required=True, help=( - "MTZ containing on/bound/excited/bright state data. " + "MTZ or sfCIF containing on/bound/excited/bright state data. " "Specified as [filename F SigF]" ), ) @@ -284,7 +281,7 @@ def parse_arguments(): "-p", required=True, help=( - "Reference pdb/cif corresponding to the off/apo/ground/dark state. " + "Reference PDB or mmCIF corresponding to the off/apo/ground/dark state. " "Used for rigid-body refinement of both input MTZs to generate phases." ), ) @@ -303,7 +300,7 @@ def parse_arguments(): "-i", required=False, default="./", - help="Path to input mtzs and pdb. Optional, defaults to './' (current directory)", + help="Path to input files. Optional, defaults to './' (current directory)", ) parser.add_argument( diff --git a/src/matchmaps/_utils.py b/src/matchmaps/_utils.py index 8c83a12..fc32388 100644 --- a/src/matchmaps/_utils.py +++ b/src/matchmaps/_utils.py @@ -885,17 +885,62 @@ def _validate_inputs( return input_dir, output_dir, ligands, *files -# def _cif_or_mtz_to_mtz(input_file, output_dir): +def _cif_or_mtz_to_mtz(input_file, output_dir): + """ + Return a File() object and also the original file name as a string + + Parameters + ---------- + input_file : pathlib.Path + _description_ + output_dir : pathlib.Path + _description_ + + Returns + ------- + (output_file, filename) + + """ -# if path.suffix.lower() == '.mtz': -# reflections = rs.read_mtz(str(path)) + if input_file.suffix.lower() == '.mtz': + + output_file = output_dir / (input_file.name) + + shutil.copy(input_file, output_file) + + elif input_file.suffix.lower() == '.cif': + + output_file = output_dir / (input_file.name.lower().removesuffix('.cif') + '.mtz') + + reflections = rs.read_cif(str(input_file)) + + reflections.write_mtz(str(output_file)) -# elif path.suffix.lower() == '.cif': -# reflections = rs.read_cif(str(path)) + else: + raise ValueError(f"Invalid file type {input_file.suffix} for starting model, must be '.mtz' or '.cif'") -# return name + return (output_file, + input_file.name.removesuffix(input_file.suffix)) def _cif_or_pdb_to_pdb(input_file, output_dir): + """ + _summary_ + + Parameters + ---------- + input_file : pathlib.Path + output_dir : pathlib.Path + + Returns + ------- + pathlib.Path + path to output file + + Raises + ------ + ValueError + _description_ + """ if input_file.suffix.lower() == '.pdb':