Skip to content

Commit

Permalink
Merge pull request #26 from rs-station/sfcif
Browse files Browse the repository at this point in the history
Support sfCIF files as alternatives to MTZs
  • Loading branch information
dennisbrookner authored Oct 12, 2023
2 parents 5a5458a + fbfbabf commit 8ec4ee8
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 37 deletions.
6 changes: 3 additions & 3 deletions docs/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ At this point, you should be good to go! Please [file an issue on github](https:
## Input files

To run `matchmaps`, you will need:
- one `.pdb` file containing a refined structural model corresponding to your "off" data.
- two `.mtz` files corresponding to your "on" and "off" data respectively.
- one `.pdb` (or `.cif`) file containing a refined structural model corresponding to your "off" data.
- two `.mtz` (or `.cif`) files corresponding to your "on" and "off" data respectively.

You will also need to know the names of the columns in these `mtz`s containing your observed structure factor amplitudes and uncertainties. Depending on what software you used to produce these files, this may be something like `FP`/`SIGFP`, `Fobs`/`SIGFobs`, or similar. If you don't know these off-hand, you can figure it out using [`reciprocalspaceship`](https://rs-station.github.io/reciprocalspaceship/)'s `rs.mtzdump` utility, which is installed along with `matchmaps`. You can do this right in the command-line as:
You will also need to know the names of the columns in these `mtz`/`cif`s containing your observed structure factor amplitudes and uncertainties. Depending on what software you used to produce these files, this may be something like `FP`/`SIGFP`, `Fobs`/`SIGFobs`, or similar. If you don't know these off-hand and your input is an `.mtz` file, you can figure it out using [`reciprocalspaceship`](https://rs-station.github.io/reciprocalspaceship/)'s `rs.mtzdump` utility, which is installed along with `matchmaps`. You can do this right in the command-line as:
```bash
rs.mtzdump mymtz.mtz
```
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dynamic = ["version"]
dependencies = [
"numpy",
"tqdm",
"reciprocalspaceship",
"reciprocalspaceship>=1.0.1",
"rs-booster>=0.0.1",
"gemmi"
]
Expand Down
19 changes: 8 additions & 11 deletions src/matchmaps/_compute_mr_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
phaser_wrapper,
_clean_up_files,
_cif_or_pdb_to_pdb,
_cif_or_mtz_to_mtz,
)


Expand Down Expand Up @@ -94,17 +95,16 @@ def compute_mr_difference_map(

output_dir_contents = list(output_dir.glob("*"))

off_name = mtzoff.name.removesuffix(".mtz")
on_name = mtzon.name.removesuffix(".mtz")

pdboff = _cif_or_pdb_to_pdb(pdboff, output_dir)

mtzoff, off_name = _cif_or_mtz_to_mtz(mtzoff, output_dir)
mtzon, on_name = _cif_or_mtz_to_mtz(mtzon, output_dir)

# take in the list of rbr selections and parse them into phenix and gemmi selection formats
# if rbr_groups = None, just returns (None, None)
rbr_phenix, rbr_gemmi = _rbr_selection_parser(rbr_selections)

# this is where scaling takes place in the usual pipeline, but that doesn't make sense with different-spacegroup inputs
# side note: I need to test the importance of scaling even in the normal case!! Might be more artifact than good, who knows

pdboff = _handle_special_positions(pdboff, output_dir)

Expand Down Expand Up @@ -133,8 +133,6 @@ def compute_mr_difference_map(
output_dir=output_dir,
)

# the refinement process *should* be identical. Waters are gone already
# I just need to make sure that the phaser outputs go together
print(f"{time.strftime('%H:%M:%S')}: Running phenix.refine for the 'on' data...")

nickname_on = rigid_body_refinement_wrapper(
Expand Down Expand Up @@ -166,7 +164,6 @@ def compute_mr_difference_map(
)

# from here down I just copied over the stuff from the normal version
# this should be proofread for compatibility but should all work

# read back in the files created by phenix
# these have knowable names
Expand Down Expand Up @@ -255,7 +252,7 @@ def parse_arguments():
metavar=("mtzfileoff", "Foff", "SigFoff"),
required=True,
help=(
"MTZ containing off/apo/ground/dark state data. "
"MTZ or sfCIF containing off/apo/ground/dark state data. "
"Specified as [filename F SigF]"
),
)
Expand All @@ -267,7 +264,7 @@ def parse_arguments():
metavar=("mtzfileon", "Fon", "SigFon"),
required=True,
help=(
"MTZ containing on/bound/excited/bright state data. "
"MTZ or SFCIF containing on/bound/excited/bright state data. "
"Specified as [filename F SigF]"
"This file may be in a different spacegroup / crystal packing than mtzoff"
),
Expand All @@ -278,7 +275,7 @@ def parse_arguments():
"-p",
required=True,
help=(
"Reference pdb/cif corresponding to the off/apo/ground/dark state. "
"Reference PDB or mmCIF corresponding to the off/apo/ground/dark state. "
"Used as a molecular replacement solution for mtzon and for rigid-body refinement of both input MTZs to generate phases."
"Should match mtzoff well enough that molecular replacement is not necessary."
),
Expand All @@ -298,7 +295,7 @@ def parse_arguments():
"-i",
required=False,
default="./",
help="Path to input mtzs and pdb. Optional, defaults to './' (current directory)",
help="Path to input files. Optional, defaults to './' (current directory)",
)

parser.add_argument(
Expand Down
15 changes: 9 additions & 6 deletions src/matchmaps/_compute_ncs_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
_validate_inputs,
_clean_up_files,
_cif_or_pdb_to_pdb,
_cif_or_mtz_to_mtz,
)


Expand Down Expand Up @@ -51,6 +52,8 @@ def compute_ncs_difference_map(

pdb = _cif_or_pdb_to_pdb(pdb, output_dir)

mtz, _ = _cif_or_mtz_to_mtz(mtz, output_dir)

rbr_phenix, rbr_gemmi = _rbr_selection_parser(ncs_chains)

if Phi is None: # do rigid-body refinement to get phases
Expand Down Expand Up @@ -109,7 +112,7 @@ def compute_ncs_difference_map(
)

print(f"{time.strftime('%H:%M:%S')}: Cleaning up files...")
print(keep_temp_files)

_clean_up_files(output_dir, output_dir_contents, keep_temp_files)

print(f"{time.strftime('%H:%M:%S')}: Done!")
Expand All @@ -136,7 +139,7 @@ def parse_arguments():
# metavar=("mtzfile", "F", "SigF"),
required=True,
help=(
"MTZ file containing structure factor amplitudes. "
"MTZ or sfCIF file containing structure factor amplitudes. "
"Specified as [filename F SigF] or [filename F]. "
"SigF is not necessary if phases are also provided"
),
Expand All @@ -147,7 +150,7 @@ def parse_arguments():
required=False,
default=None,
help=(
"Optional. Column in MTZ file containing phases. "
"Optional. Column in MTZ/sfCIF file containing phases. "
"If phases are not provided, phases will be computed via rigid-body refinement of "
"the provided model and structure factor amplitudes."
),
Expand All @@ -158,8 +161,8 @@ def parse_arguments():
"-p",
required=True,
help=(
"Reference pdb/cif. "
"If phases are not provided, used for rigid-body refinement of input MTZ to generate phases."
"Reference PDB or mmCIF. "
"If phases are not provided, used for rigid-body refinement of input MTZ/sfCIF to generate phases."
),
)

Expand Down Expand Up @@ -197,7 +200,7 @@ def parse_arguments():
"-i",
required=False,
default="./",
help="Path to input mtz and pdb. Optional, defaults to './' (current directory)",
help="Path to input files. Optional, defaults to './' (current directory)",
)

parser.add_argument(
Expand Down
17 changes: 7 additions & 10 deletions src/matchmaps/_compute_realspace_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
_clean_up_files,
_validate_environment,
_validate_inputs,
#_cif_or_mtz_to_mtz,
_cif_or_mtz_to_mtz,
_cif_or_pdb_to_pdb,
)

Expand Down Expand Up @@ -94,13 +94,10 @@ def compute_realspace_difference_map(

output_dir_contents = list(output_dir.glob("*"))

off_name = mtzoff.name.removesuffix(".mtz")
on_name = mtzon.name.removesuffix(".mtz")

pdboff = _cif_or_pdb_to_pdb(pdboff, output_dir)

# off_name = _cif_or_mtz_to_mtz(mtzoff)
# on_name = _cif_or_mtz_to_mtz(mtzon)
mtzoff, off_name = _cif_or_mtz_to_mtz(mtzoff, output_dir)
mtzon, on_name = _cif_or_mtz_to_mtz(mtzon, output_dir)

# take in the list of rbr selections and parse them into phenix and gemmi selection formats
# if rbr_groups = None, just returns (None, None)
Expand Down Expand Up @@ -262,7 +259,7 @@ def parse_arguments():
metavar=("mtzfileoff", "Foff", "SigFoff"),
required=True,
help=(
"MTZ containing off/apo/ground/dark state data. "
"MTZ or sfCIF containing off/apo/ground/dark state data. "
"Specified as [filename F SigF]"
),
)
Expand All @@ -274,7 +271,7 @@ def parse_arguments():
metavar=("mtzfileon", "Fon", "SigFon"),
required=True,
help=(
"MTZ containing on/bound/excited/bright state data. "
"MTZ or sfCIF containing on/bound/excited/bright state data. "
"Specified as [filename F SigF]"
),
)
Expand All @@ -284,7 +281,7 @@ def parse_arguments():
"-p",
required=True,
help=(
"Reference pdb/cif corresponding to the off/apo/ground/dark state. "
"Reference PDB or mmCIF corresponding to the off/apo/ground/dark state. "
"Used for rigid-body refinement of both input MTZs to generate phases."
),
)
Expand All @@ -303,7 +300,7 @@ def parse_arguments():
"-i",
required=False,
default="./",
help="Path to input mtzs and pdb. Optional, defaults to './' (current directory)",
help="Path to input files. Optional, defaults to './' (current directory)",
)

parser.add_argument(
Expand Down
57 changes: 51 additions & 6 deletions src/matchmaps/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,17 +885,62 @@ def _validate_inputs(
return input_dir, output_dir, ligands, *files


# def _cif_or_mtz_to_mtz(input_file, output_dir):
def _cif_or_mtz_to_mtz(input_file, output_dir):
"""
Return a File() object and also the original file name as a string
Parameters
----------
input_file : pathlib.Path
_description_
output_dir : pathlib.Path
_description_
Returns
-------
(output_file, filename)
"""

# if path.suffix.lower() == '.mtz':
# reflections = rs.read_mtz(str(path))
if input_file.suffix.lower() == '.mtz':

output_file = output_dir / (input_file.name)

shutil.copy(input_file, output_file)

elif input_file.suffix.lower() == '.cif':

output_file = output_dir / (input_file.name.lower().removesuffix('.cif') + '.mtz')

reflections = rs.read_cif(str(input_file))

reflections.write_mtz(str(output_file))

# elif path.suffix.lower() == '.cif':
# reflections = rs.read_cif(str(path))
else:
raise ValueError(f"Invalid file type {input_file.suffix} for starting model, must be '.mtz' or '.cif'")

# return name
return (output_file,
input_file.name.removesuffix(input_file.suffix))

def _cif_or_pdb_to_pdb(input_file, output_dir):
"""
_summary_
Parameters
----------
input_file : pathlib.Path
output_dir : pathlib.Path
Returns
-------
pathlib.Path
path to output file
Raises
------
ValueError
_description_
"""

if input_file.suffix.lower() == '.pdb':

Expand Down

0 comments on commit 8ec4ee8

Please sign in to comment.