From d02e5944ebbfb4166110859a68feb2c2fb2f144a Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Thu, 21 Dec 2023 12:12:48 -0500 Subject: [PATCH] continue incorporation of weighting --- src/matchmaps/_compute_realspace_diff.py | 22 +++++++++++++++++++--- src/matchmaps/_utils.py | 16 +++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index a117d25..0b89ea0 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -50,6 +50,7 @@ def compute_realspace_difference_map( eff : str = None, keep_temp_files : str = None, radius : float = 5, + alpha : float = 0, no_bss = False ): """ @@ -98,10 +99,12 @@ def compute_realspace_difference_map( If not None, the name of a subdirectory of the output_dir into which intermediate matchmaps files are moved upon program completion. radius : float, optional Maximum distance away from protein model to include voxels. Only applies to the "unmasked" difference map output. + alpha : float, optional + Alpha to use in error weighting of F-obs prior to Fourier Transform. Defaults to 0, e.g. no weighting. no_bss : bool, optional If True, skip bulk solvent scaling feature of phenix.refine """ - + _validate_environment(ccp4=True) output_dir_contents = list(output_dir.glob("*")) @@ -206,11 +209,12 @@ def compute_realspace_difference_map( # TO-DO: Figure out why phenix outputs are sometimes still split into (+) and (-) columns, even when I specify that anomalous=False # As a workaround, even anomalous files have a single 'F-obs-filtered' column, so I can always just use that. fg_off = make_floatgrid_from_mtz( - mtzoff, spacing, F="F-obs-filtered", Phi="PH2FOFCWT", spacegroup="P1", dmin=dmin + mtzoff, spacing, F="F-obs-filtered", SigF="SIGF-obs-filtered", Phi="PH2FOFCWT", spacegroup="P1", dmin=dmin, alpha=alpha, ) fg_on = make_floatgrid_from_mtz( - mtzon, spacing, F="F-obs-filtered", Phi="PH2FOFCWT", spacegroup="P1", dmin=dmin + mtzon, spacing, F="F-obs-filtered", SigF="SIGF-obs-filtered", Phi="PH2FOFCWT", spacegroup="P1", dmin=dmin, alpha=alpha, ) + if rbr_gemmi is None: _realspace_align_and_subtract( @@ -369,6 +373,18 @@ def parse_arguments(): "By default, cutoff is the resolution limit of the lower-resolution input MTZ. " ), ) + + parser.add_argument( + "--alpha", + required=False, + type=float, + default=0, + help=( + "Alpha to use for error weighting of F-obs prior to Fourier Transform. " + "Weights are computed as: 1 / ((1+(alpha*(SigF^2)) / ^2). " + "Default value is alpha=0, e.g., no weighting is performed. " + ) + ) parser.add_argument( "--unmasked-radius", diff --git a/src/matchmaps/_utils.py b/src/matchmaps/_utils.py index 95e820a..7c9a7a4 100644 --- a/src/matchmaps/_utils.py +++ b/src/matchmaps/_utils.py @@ -108,7 +108,7 @@ def _subparser(selection): def make_floatgrid_from_mtz( - mtz, spacing, F, SigF, Phi, spacegroup="P1", dmin=None, alpha=0 + mtz, spacing, F, SigF, Phi, spacegroup="P1", dmin=None, alpha=0.2 ): """ Make a gemmi.FloatGrid from an rs.DataSet. @@ -134,6 +134,7 @@ def make_floatgrid_from_mtz( Fourier transform of mtz, written out as a gemmi object containing a 3D voxel array and various other metadata and methods """ + # drop NAs in either of the specified columns # this has the secondary purpose of not silently modifying the input mtz new_mtz = mtz[~mtz[F].isnull()] @@ -151,17 +152,22 @@ def make_floatgrid_from_mtz( ] # apply weighting + # note: if alpha==1, then these numbers all just become 1, e.g. no weighting weights = 1 / ( 1 + ( - 0.05 - * mtz["SIGF-obs-filtered"] ** 2 - / np.mean(mtz["SIGF-obs-filtered"] ** 2) + alpha + * new_mtz[SigF] ** 2 + / np.mean(new_mtz[SigF] ** 2) ) ) + + if 'weighted_Fobs' in new_mtz.columns: + raise NotImplementedError('Error: mtz already contains a column named weighted_Fobs; email Dennis bugging him to support this') + new_mtz['weighted_Fobs'] = new_mtz[F] * weights # perform FFT using the desired amplitudes and phases - new_mtz["Fcomplex"] = new_mtz.to_structurefactor(F, Phi) + new_mtz["Fcomplex"] = new_mtz.to_structurefactor("weighted_Fobs", Phi) reciprocal_grid = new_mtz.to_reciprocal_grid("Fcomplex", grid_size=gridsize) real_grid = np.real(np.fft.fftn(reciprocal_grid))