From 81f997987250a7b0e5c55b4e180cf7852aa6fff7 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Tue, 21 May 2024 15:36:37 -0400 Subject: [PATCH 01/21] update _validate_environment to return a phenix version string --- src/matchmaps/_utils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/matchmaps/_utils.py b/src/matchmaps/_utils.py index 31afaab..95f698e 100644 --- a/src/matchmaps/_utils.py +++ b/src/matchmaps/_utils.py @@ -22,6 +22,8 @@ def _validate_environment(ccp4): """ Check if the environment contains phenix (and if necessary, ccp4) and throw a helpful error if not + + If the function runs successfully, it returns a string of form "X.XX" denoting the major and minor version of the phenix detected, e.g. "1.20" or "1.21". """ if shutil.which("phenix.refine") is None: @@ -37,11 +39,13 @@ def _validate_environment(ccp4): version_string = str(version_printout.stdout) - if version_string.find('21') > 0: - raise NotImplementedError("It seems that you are using phenix 1.21, which is not yet supported by matchmaps" - "\n" - "Please use phenix 1.20 or earlier.") + # if version_string.find('21') > 0: + # raise NotImplementedError("It seems that you are using phenix 1.21, which is not yet supported by matchmaps" + # "\n" + # "Please use phenix 1.20 or earlier.") + phenix_version = '.'.join(version_string.split(': ')[1].split('.')[:-1]) + if ccp4: if shutil.which("scaleit") is None: raise OSError( @@ -50,6 +54,11 @@ def _validate_environment(ccp4): "For more information, see https://rs-station.github.io/matchmaps/quickstart.html#additional-dependencies" ) + print(f'Detected phenix {phenix_version} in your environment.', + '\n', + 'If this is not the version you are using, please specify the version directly via the --phenix-version flag') + + return phenix_version def _rbr_selection_parser(rbr_selections): # end early and return nones if this feature isn't being used From 0fdb1b07ed06ce54bde4690a2a5165731a3bcdd6 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Tue, 21 May 2024 15:36:55 -0400 Subject: [PATCH 02/21] start slogging through adding a phenix_version parameter for refinement --- src/matchmaps/_compute_realspace_diff.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index 9b55bc6..7087abd 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -51,7 +51,8 @@ def compute_realspace_difference_map( keep_temp_files : str = None, radius : float = 5, alpha : float = 0, - no_bss = False + no_bss = False, + phenix_version: str = None, ): """ Compute a real-space difference map from mtzs. @@ -103,9 +104,14 @@ def compute_realspace_difference_map( Alpha to use in error weighting of F-obs prior to Fourier Transform. Defaults to 0, e.g. no weighting. no_bss : bool, optional If True, skip bulk solvent scaling feature of phenix.refine + phenix_version: str, optional + Phenix version string to override the automatically detected version. I don't know why this would be necessary. """ - _validate_environment(ccp4=True) + auto_phenix_version = _validate_environment(ccp4=True) + + if not phenix_version: + phenix_version = auto_phenix_version output_dir_contents = list(output_dir.glob("*")) @@ -173,6 +179,7 @@ def compute_realspace_difference_map( verbose=verbose, rbr_selections=rbr_phenix, no_bss=no_bss, + phenix_version=phenix_version, ) print(f"{time.strftime('%H:%M:%S')}: Running phenix.refine for the 'off' data...") @@ -188,6 +195,7 @@ def compute_realspace_difference_map( rbr_selections=rbr_phenix, off_labels=f"{Foff},{SigFoff}", no_bss=no_bss, + phenix_version=phenix_version, ) # read back in the files created by phenix From f73d003ba0f2441940ed91e5094aadc423ba3100 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Tue, 28 May 2024 14:23:15 -0400 Subject: [PATCH 03/21] add argument for phenix_version --- src/matchmaps/_compute_realspace_diff.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index 7087abd..3e2892d 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -454,6 +454,16 @@ def parse_arguments(): "Note that this file is written out in the current working directory, NOT the input or output directories" ) ) + + parser.add_argument( + "--phenix-version", + required=False, + help=( + "Specify phenix version as a string, e.g. '1.20'. " + "If omitted, matchmaps will attempt to automatically detect the version in use " + "by analyzing the output of phenix.version" + ) + ) return parser @@ -492,6 +502,7 @@ def main(): on_as_stationary=args.on_as_stationary, keep_temp_files=args.keep_temp_files, no_bss = args.no_bss, + phenix_version = args.phenix_version, ) if args.script: From 593f6489f3ee2d0ac0c8afe658f07b994a89bc5a Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Tue, 28 May 2024 18:36:10 -0400 Subject: [PATCH 04/21] Add preliminary phenix 1.21 support for main matchmaps utility --- .idea/.gitignore | 8 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/matchmaps.iml | 16 + .idea/misc.xml | 9 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + src/matchmaps/_compute_mr_diff.py | 5 +- src/matchmaps/_compute_ncs_diff.py | 3 +- src/matchmaps/_compute_realspace_diff.py | 7 +- src/matchmaps/_phenix_utils.py | 480 ++++++++++++++++++ src/matchmaps/_utils.py | 293 ----------- 11 files changed, 538 insertions(+), 303 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/matchmaps.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 src/matchmaps/_phenix_utils.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/matchmaps.iml b/.idea/matchmaps.iml new file mode 100644 index 0000000..a9cf77c --- /dev/null +++ b/.idea/matchmaps.iml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..f60d611 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,9 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..16524d8 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index 9d8f453..3f20207 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -15,19 +15,17 @@ from matchmaps._utils import ( _handle_special_positions, make_floatgrid_from_mtz, - rigid_body_refinement_wrapper, _realspace_align_and_subtract, _rbr_selection_parser, - _remove_waters, _restore_ligand_occupancy, _validate_environment, _validate_inputs, - phaser_wrapper, _clean_up_files, _cif_or_pdb_to_pdb, _cif_or_mtz_to_mtz, _write_script, ) +from matchmaps._phenix_utils import rigid_body_refinement_wrapper, phaser_wrapper, _remove_waters def compute_mr_difference_map( @@ -175,7 +173,6 @@ def compute_mr_difference_map( verbose=verbose, rbr_selections=rbr_phenix, off_labels=f"{Foff},{SigFoff}", - mr_off=True, no_bss=no_bss, ) diff --git a/src/matchmaps/_compute_ncs_diff.py b/src/matchmaps/_compute_ncs_diff.py index fcd1b5d..5b638e6 100644 --- a/src/matchmaps/_compute_ncs_diff.py +++ b/src/matchmaps/_compute_ncs_diff.py @@ -16,10 +16,8 @@ from matchmaps._utils import ( _handle_special_positions, make_floatgrid_from_mtz, - rigid_body_refinement_wrapper, _realspace_align_and_subtract, _rbr_selection_parser, - _renumber_waters, _ncs_align_and_subtract, _validate_environment, _validate_inputs, @@ -28,6 +26,7 @@ _cif_or_mtz_to_mtz, _write_script, ) +from matchmaps._phenix_utils import rigid_body_refinement_wrapper, _renumber_waters def compute_ncs_difference_map( diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index 3e2892d..6a23703 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -18,10 +18,8 @@ from matchmaps._utils import ( _handle_special_positions, make_floatgrid_from_mtz, - rigid_body_refinement_wrapper, _realspace_align_and_subtract, _rbr_selection_parser, - _renumber_waters, _clean_up_files, _validate_environment, _validate_inputs, @@ -29,6 +27,7 @@ _cif_or_pdb_to_pdb, _write_script, ) +from matchmaps._phenix_utils import rigid_body_refinement_wrapper, _renumber_waters def compute_realspace_difference_map( @@ -179,7 +178,7 @@ def compute_realspace_difference_map( verbose=verbose, rbr_selections=rbr_phenix, no_bss=no_bss, - phenix_version=phenix_version, + phenix_style=phenix_version, ) print(f"{time.strftime('%H:%M:%S')}: Running phenix.refine for the 'off' data...") @@ -195,7 +194,7 @@ def compute_realspace_difference_map( rbr_selections=rbr_phenix, off_labels=f"{Foff},{SigFoff}", no_bss=no_bss, - phenix_version=phenix_version, + phenix_style=phenix_version, ) # read back in the files created by phenix diff --git a/src/matchmaps/_phenix_utils.py b/src/matchmaps/_phenix_utils.py new file mode 100644 index 0000000..24c9667 --- /dev/null +++ b/src/matchmaps/_phenix_utils.py @@ -0,0 +1,480 @@ +import shutil +import subprocess +import time +from pathlib import Path + +import reciprocalspaceship as rs + + +def rigid_body_refinement_wrapper( + mtzon, + pdboff, + input_dir, + output_dir, + phenix_style, + off_labels=None, + ligands=None, + eff=None, + verbose=False, + rbr_selections=None, + mr_on=False, + no_bss=False, +): + if phenix_style == "1.20": + + output = rigid_body_refinement_wrapper_120style( + eff, + input_dir, + ligands, + mr_on, + mtzon, + no_bss, + off_labels, + output_dir, + pdboff, + rbr_selections, + verbose, + ) + + return output + + elif phenix_style == "1.21": + + output = rigid_body_refinement_wrapper_121style( + eff, + input_dir, + ligands, + mr_on, + mtzon, + no_bss, + off_labels, + output_dir, + pdboff, + rbr_selections, + verbose, + ) + + return output + + +def rigid_body_refinement_wrapper_121style( + eff, + input_dir, + ligands, + mr_on, + mtzon, + no_bss, + off_labels, + output_dir, + pdboff, + rbr_selections, + verbose, +): + if eff is None: + eff_contents = """ +data_manager { + model { + file = pdb_input + } + miller_array { + file = mtz_input + labels { + name = columns + } + } + fmodel { + xray_data { + r_free_flags { + generate = True + } + } + } +} +refinement { + crystal_symmetry { + unit_cell = cell_parameters + space_group = sg + } + output { + write_def_file = False + write_eff_file = False + write_geo_file = False + } + electron_density_maps { + apply_default_maps = False + map_coefficients { + map_type = "2mFo-DFc" + mtz_label_amplitudes = "2FOFCWT" + mtz_label_phases = "PH2FOFCWT" + } + map_coefficients { + map_type = "mFo-DFc" + mtz_label_amplitudes = "FOFCWT" + mtz_label_phases = "PHFOFCWT" + } + } + refine { + strategy = *rigid_body + sites { + rigid_body_sites + } + } + main { + number_of_macro_cycles = 1 + nproc = 8 + bulk_solvent_and_scale=bss + nqh_flips=False + } +} +output { + prefix = '''nickname''' + serial = 1 + serial_format = "%d" +} + """ + else: + with open(input_dir + eff) as file: + eff_contents = file.read() + if (off_labels is None) or (mr_on): + nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_{pdboff.name.removesuffix('.pdb')}" + else: + nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_self" + #### + # update this logic in the future if matchmaps.mr changes + # mtz_location = input_dir if (mr_on or mr_off) else output_dir + #### + similar_files = list(output_dir.glob(f"{nickname}_[0-9]_1.*")) + if len(similar_files) == 0: + nickname += "_0" + else: + nums = [] + for s in similar_files: + try: + nums.append(int(str(s).split("_")[-2])) + except ValueError: + pass + nickname += f"_{max(nums) + 1}" + # read in mtz to access cell parameters and spacegroup + mtz = rs.read_mtz(str(mtzon)) + cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" + sg = mtz.spacegroup.short_name() + # name for modified refinement file + eff = output_dir / f"params_{nickname}.eff" + params = { + "sg": sg, + "cell_parameters": cell_string, + "bss": str(not no_bss), + "pdb_input": str(pdboff), + "mtz_input": str(mtzon), + "nickname": str(output_dir / nickname), + } + if off_labels is None: + params["columns"] = "FPH1,SIGFPH1" # names from scaleit output + else: + params["columns"] = off_labels # user-provided column nanes + # if selection is not None: + # params["all"] = selection # overwrite atom selection + for key, value in params.items(): + eff_contents = eff_contents.replace(key, value) + # either add ligands to .eff file or delete "ligands" placeholder + if ligands is not None: + ligand_string = "\n".join([f"file_name = '{l}'" for l in ligands]) + eff_contents = eff_contents.replace("ligands", ligand_string) + else: + eff_contents = eff_contents.replace("ligands", "") + if rbr_selections is not None: + selection_string = "\n".join( + [f"rigid_body = '{sel}'" for sel in rbr_selections] + ) + eff_contents = eff_contents.replace("rigid_body_sites", selection_string) + else: + eff_contents = eff_contents.replace("rigid_body_sites", "rigid_body = all") + # write out customized .eff file for use by phenix + with open(eff, "w") as file: + file.write(eff_contents) + # run refinement! + # print refinement output to terminal if user supplied the --verbose flag + subprocess.run( + f"phenix.refine {eff}", + shell=True, + capture_output=(not verbose), + ) + + return output_dir / nickname + + +def rigid_body_refinement_wrapper_120style( + eff, + input_dir, + ligands, + mr_on, + mtzon, + no_bss, + off_labels, + output_dir, + pdboff, + rbr_selections, + verbose, +): + if eff is None: + eff_contents = """ +refinement { + crystal_symmetry { + unit_cell = cell_parameters + space_group = sg + } + input { + pdb { + file_name = pdb_input + } + xray_data { + file_name = "mtz_input" + labels = columns + r_free_flags { + generate=True + } + force_anomalous_flag_to_be_equal_to = False + } + monomers { + ligands + } + } + output { + prefix = '''nickname''' + serial = 1 + serial_format = "%d" + job_title = '''nickname''' + write_def_file = False + write_eff_file = False + write_geo_file = False + } + electron_density_maps { + map_coefficients { + map_type = "2mFo-DFc" + mtz_label_amplitudes = "2FOFCWT" + mtz_label_phases = "PH2FOFCWT" + } + map_coefficients { + map_type = "mFo-DFc" + mtz_label_amplitudes = "FOFCWT" + mtz_label_phases = "PHFOFCWT" + } + } + refine { + strategy = *rigid_body + sites { + rigid_body_sites + } + } + main { + number_of_macro_cycles = 1 + nproc = 8 + bulk_solvent_and_scale=bss + nqh_flips=False + } +} + """ + else: + with open(input_dir + eff) as file: + eff_contents = file.read() + if (off_labels is None) or (mr_on): + nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_{pdboff.name.removesuffix('.pdb')}" + else: + nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_self" + #### + # update this logic in the future if matchmaps.mr changes + # mtz_location = input_dir if (mr_on or mr_off) else output_dir + #### + similar_files = list(output_dir.glob(f"{nickname}_[0-9]_1.*")) + if len(similar_files) == 0: + nickname += "_0" + else: + nums = [] + for s in similar_files: + try: + nums.append(int(str(s).split("_")[-2])) + except ValueError: + pass + nickname += f"_{max(nums) + 1}" + # read in mtz to access cell parameters and spacegroup + mtz = rs.read_mtz(str(mtzon)) + cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" + sg = mtz.spacegroup.short_name() + # name for modified refinement file + eff = output_dir / f"params_{nickname}.eff" + params = { + "sg": sg, + "cell_parameters": cell_string, + "bss": str(not no_bss), + "pdb_input": str(pdboff), + "mtz_input": str(mtzon), + "nickname": str(output_dir / nickname), + } + if off_labels is None: + params["columns"] = "FPH1,SIGFPH1" # names from scaleit output + else: + params["columns"] = off_labels # user-provided column nanes + # if selection is not None: + # params["all"] = selection # overwrite atom selection + for key, value in params.items(): + eff_contents = eff_contents.replace(key, value) + # either add ligands to .eff file or delete "ligands" placeholder + if ligands is not None: + ligand_string = "\n".join([f"file_name = '{l}'" for l in ligands]) + eff_contents = eff_contents.replace("ligands", ligand_string) + else: + eff_contents = eff_contents.replace("ligands", "") + if rbr_selections is not None: + selection_string = "\n".join( + [f"rigid_body = '{sel}'" for sel in rbr_selections] + ) + eff_contents = eff_contents.replace("rigid_body_sites", selection_string) + else: + eff_contents = eff_contents.replace("rigid_body_sites", "rigid_body = all") + # write out customized .eff file for use by phenix + with open(eff, "w") as file: + file.write(eff_contents) + # run refinement! + # print refinement output to terminal if user supplied the --verbose flag + subprocess.run( + f"phenix.refine {eff}", + shell=True, + capture_output=(not verbose), + ) + + return output_dir / nickname + + +def phaser_wrapper( + mtzfile, + pdb, + input_dir, + output_dir, + off_labels, + eff=None, + verbose=False, +): + """ + Handle simple phaser run from the command line + """ + + if shutil.which("phenix.phaser") is None: + raise OSError( + "Cannot find executable, phenix.phaser. Please set up your phenix environment." + ) + + if eff is None: + eff_contents = """ +phaser { + mode = ANO CCA EP_AUTO *MR_AUTO MR_FRF MR_FTF MR_PAK MR_RNP NMAXYZ SCEDS + hklin = mtz_input + labin = labels + model = pdb_input + model_identity = 100 + component_copies = 1 + search_copies = 1 + chain_type = *protein dna rna + crystal_symmetry { + unit_cell = cell_parameters + space_group = sg + } + keywords { + general { + root = '''nickname''' + title = '''matchmaps_MR''' + mute = None + xyzout = True + xyzout_ensemble = True + hklout = True + jobs = 6 + } + } +} + """ + else: + raise NotImplementedError("Custom phaser specifications are not yet supported") + + nickname = f"{mtzfile.name.removesuffix('.mtz')}_phased_with_{pdb.name.removesuffix('.pdb')}" + + similar_files = list(output_dir.glob(f"{nickname}_*")) + if len(similar_files) == 0: + nickname += "_0" + else: + nums = [] + for s in similar_files: + try: + nums.append(int(str(s).split("_")[-1].split(".")[0])) + except ValueError: + pass + nickname += f"_{max(nums) + 1}" + + mtz = rs.read_mtz(str(mtzfile)) + cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" + sg = mtz.spacegroup.short_name() + + eff = output_dir / f"params_{nickname}.eff" + + params = { + "sg": sg, + "cell_parameters": cell_string, + "pdb_input": str(pdb), + "mtz_input": str(mtzfile), + "nickname": str(output_dir / nickname), + "labels": off_labels, # should be prepackaged as a string + } + + for key, value in params.items(): + eff_contents = eff_contents.replace(key, value) + + with open(eff, "w") as file: + file.write(eff_contents) + + subprocess.run( + f"phenix.phaser {eff}", + shell=True, + capture_output=(not verbose), + ) + + return output_dir / nickname + + +def _renumber_waters(pdb): + """ + Call phenix.sort_hetatms to place waters onto the nearest protein chain. This ensures that rbr selections handle waters properly + + Parameters + ---------- + pdb : str + name of pdb file + dir : str + directory in which pdb file lives + """ + + pdb_renumbered = Path(str(pdb).removesuffix(".pdb") + "_renumbered.pdb") + + subprocess.run( + f"phenix.sort_hetatms file_name={pdb} output_file={pdb_renumbered}", + shell=True, + capture_output=True, + ) + + print(f"{time.strftime('%H:%M:%S')}: Moved waters to nearest protein chains...") + + return pdb_renumbered + + +def _remove_waters( + pdb, + output_dir, +): + pdb_dry = pdb.name.removesuffix(".pdb") + "_dry" + + subprocess.run( + f"phenix.pdbtools {pdb} remove='water' \ + output.prefix='{output_dir}/' \ + output.suffix='{pdb_dry}'", + shell=True, + capture_output=True, + ) + + return output_dir / (pdb_dry + ".pdb") diff --git a/src/matchmaps/_utils.py b/src/matchmaps/_utils.py index 95f698e..7766911 100644 --- a/src/matchmaps/_utils.py +++ b/src/matchmaps/_utils.py @@ -6,14 +6,12 @@ """ import os -import glob import shutil import subprocess import time import re from functools import partial from pathlib import Path -from IPython import embed import gemmi import numpy as np import reciprocalspaceship as rs @@ -218,162 +216,6 @@ def make_floatgrid_from_mtz( return float_grid -def rigid_body_refinement_wrapper( - mtzon, - pdboff, - input_dir, - output_dir, - off_labels=None, - ligands=None, - eff=None, - verbose=False, - rbr_selections=None, - mr_on=False, - mr_off=False, - no_bss=False, -): - if eff is None: - eff_contents = """ -refinement { - crystal_symmetry { - unit_cell = cell_parameters - space_group = sg - } - input { - pdb { - file_name = pdb_input - } - xray_data { - file_name = "mtz_input" - labels = columns - r_free_flags { - generate=True - } - force_anomalous_flag_to_be_equal_to = False - } - monomers { - ligands - } - } - output { - prefix = '''nickname''' - serial = 1 - serial_format = "%d" - job_title = '''nickname''' - write_def_file = False - write_eff_file = False - write_geo_file = False - } - electron_density_maps { - map_coefficients { - map_type = "2mFo-DFc" - mtz_label_amplitudes = "2FOFCWT" - mtz_label_phases = "PH2FOFCWT" - } - map_coefficients { - map_type = "mFo-DFc" - mtz_label_amplitudes = "FOFCWT" - mtz_label_phases = "PHFOFCWT" - } - } - refine { - strategy = *rigid_body - sites { - rigid_body_sites - } - } - main { - number_of_macro_cycles = 1 - nproc = 8 - bulk_solvent_and_scale=bss - nqh_flips=False - } -} - """ - else: - with open(input_dir + eff) as file: - eff_contents = file.read() - - if (off_labels is None) or (mr_on): - nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_{pdboff.name.removesuffix('.pdb')}" - else: - nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_self" - - #### - # update this logic in the future if matchmaps.mr changes - # mtz_location = input_dir if (mr_on or mr_off) else output_dir - #### - - similar_files = list(output_dir.glob(f"{nickname}_[0-9]_1.*")) - if len(similar_files) == 0: - nickname += "_0" - else: - nums = [] - for s in similar_files: - try: - nums.append(int(str(s).split("_")[-2])) - except ValueError: - pass - nickname += f"_{max(nums)+1}" - - # read in mtz to access cell parameters and spacegroup - mtz = rs.read_mtz(str(mtzon)) - cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" - sg = mtz.spacegroup.short_name() - - # name for modified refinement file - eff = output_dir / f"params_{nickname}.eff" - - params = { - "sg": sg, - "cell_parameters": cell_string, - "bss": str(not no_bss), - "pdb_input": str(pdboff), - "mtz_input": str(mtzon), - "nickname": str(output_dir / nickname), - } - - if off_labels is None: - params["columns"] = "FPH1,SIGFPH1" # names from scaleit output - else: - params["columns"] = off_labels # user-provided column nanes - - # if selection is not None: - # params["all"] = selection # overwrite atom selection - - for key, value in params.items(): - eff_contents = eff_contents.replace(key, value) - - # either add ligands to .eff file or delete "ligands" placeholder - if ligands is not None: - ligand_string = "\n".join([f"file_name = '{l}'" for l in ligands]) - eff_contents = eff_contents.replace("ligands", ligand_string) - else: - eff_contents = eff_contents.replace("ligands", "") - - if rbr_selections is not None: - selection_string = "\n".join( - [f"rigid_body = '{sel}'" for sel in rbr_selections] - ) - eff_contents = eff_contents.replace("rigid_body_sites", selection_string) - else: - eff_contents = eff_contents.replace("rigid_body_sites", "rigid_body = all") - - # write out customized .eff file for use by phenix - with open(eff, "w") as file: - file.write(eff_contents) - - # run refinement! - # print refinement output to terminal if user supplied the --verbose flag - subprocess.run( - f"phenix.refine {eff}", - shell=True, - capture_output=(not verbose), - ) - - return output_dir / nickname - - def _handle_special_positions(pdboff, output_dir): """ Check if any waters happen to sit on special positions, and if so, remove them. @@ -432,141 +274,6 @@ def _handle_special_positions(pdboff, output_dir): return pdboff_nospecialpositions -def _renumber_waters(pdb): - """ - Call phenix.sort_hetatms to place waters onto the nearest protein chain. This ensures that rbr selections handle waters properly - - Parameters - ---------- - pdb : str - name of pdb file - dir : str - directory in which pdb file lives - """ - - pdb_renumbered = Path(str(pdb).removesuffix(".pdb") + "_renumbered.pdb") - - subprocess.run( - f"phenix.sort_hetatms file_name={pdb} output_file={pdb_renumbered}", - shell=True, - capture_output=True, - ) - - print(f"{time.strftime('%H:%M:%S')}: Moved waters to nearest protein chains...") - - return pdb_renumbered - - -def _remove_waters( - pdb, - output_dir, -): - pdb_dry = pdb.name.removesuffix(".pdb") + "_dry" - - subprocess.run( - f"phenix.pdbtools {pdb} remove='water' \ - output.prefix='{output_dir}/' \ - output.suffix='{pdb_dry}'", - shell=True, - capture_output=True, - ) - - return output_dir / (pdb_dry + ".pdb") - - -def phaser_wrapper( - mtzfile, - pdb, - input_dir, - output_dir, - off_labels, - eff=None, - verbose=False, -): - """ - Handle simple phaser run from the command line - """ - - if shutil.which("phenix.phaser") is None: - raise OSError( - "Cannot find executable, phenix.phaser. Please set up your phenix environment." - ) - - if eff is None: - eff_contents = """ -phaser { - mode = ANO CCA EP_AUTO *MR_AUTO MR_FRF MR_FTF MR_PAK MR_RNP NMAXYZ SCEDS - hklin = mtz_input - labin = labels - model = pdb_input - model_identity = 100 - component_copies = 1 - search_copies = 1 - chain_type = *protein dna rna - crystal_symmetry { - unit_cell = cell_parameters - space_group = sg - } - keywords { - general { - root = '''nickname''' - title = '''matchmaps_MR''' - mute = None - xyzout = True - xyzout_ensemble = True - hklout = True - jobs = 6 - } - } -} - """ - else: - raise NotImplementedError("Custom phaser specifications are not yet supported") - - nickname = f"{mtzfile.name.removesuffix('.mtz')}_phased_with_{pdb.name.removesuffix('.pdb')}" - - similar_files = list(output_dir.glob(f"{nickname}_*")) - if len(similar_files) == 0: - nickname += "_0" - else: - nums = [] - for s in similar_files: - try: - nums.append(int(str(s).split("_")[-1].split(".")[0])) - except ValueError: - pass - nickname += f"_{max(nums)+1}" - - mtz = rs.read_mtz(str(mtzfile)) - cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" - sg = mtz.spacegroup.short_name() - - eff = output_dir / f"params_{nickname}.eff" - - params = { - "sg": sg, - "cell_parameters": cell_string, - "pdb_input": str(pdb), - "mtz_input": str(mtzfile), - "nickname": str(output_dir / nickname), - "labels": off_labels, # should be prepackaged as a string - } - - for key, value in params.items(): - eff_contents = eff_contents.replace(key, value) - - with open(eff, "w") as file: - file.write(eff_contents) - - subprocess.run( - f"phenix.phaser {eff}", - shell=True, - capture_output=(not verbose), - ) - - return output_dir / nickname - - def _restore_ligand_occupancy( pdb_to_be_restored, original_pdb, From 9723c3232d8a8151c62c697008d278c07e0be139 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Tue, 28 May 2024 21:30:03 -0400 Subject: [PATCH 05/21] refactor rbr wrapper more elegantly --- src/matchmaps/_phenix_utils.py | 276 +++++++++------------------------ 1 file changed, 75 insertions(+), 201 deletions(-) diff --git a/src/matchmaps/_phenix_utils.py b/src/matchmaps/_phenix_utils.py index 24c9667..8fbe528 100644 --- a/src/matchmaps/_phenix_utils.py +++ b/src/matchmaps/_phenix_utils.py @@ -5,72 +5,66 @@ import reciprocalspaceship as rs - -def rigid_body_refinement_wrapper( - mtzon, - pdboff, - input_dir, - output_dir, - phenix_style, - off_labels=None, - ligands=None, - eff=None, - verbose=False, - rbr_selections=None, - mr_on=False, - no_bss=False, -): - if phenix_style == "1.20": - - output = rigid_body_refinement_wrapper_120style( - eff, - input_dir, - ligands, - mr_on, - mtzon, - no_bss, - off_labels, - output_dir, - pdboff, - rbr_selections, - verbose, - ) - - return output - - elif phenix_style == "1.21": - - output = rigid_body_refinement_wrapper_121style( - eff, - input_dir, - ligands, - mr_on, - mtzon, - no_bss, - off_labels, - output_dir, - pdboff, - rbr_selections, - verbose, - ) - - return output - - -def rigid_body_refinement_wrapper_121style( - eff, - input_dir, - ligands, - mr_on, - mtzon, - no_bss, - off_labels, - output_dir, - pdboff, - rbr_selections, - verbose, -): - if eff is None: +def _auto_eff_template(phenix_style: str): + if phenix_style == '1.20': + eff_contents = """ + refinement { + crystal_symmetry { + unit_cell = cell_parameters + space_group = sg + } + input { + pdb { + file_name = pdb_input + } + xray_data { + file_name = "mtz_input" + labels = columns + r_free_flags { + generate=True + } + force_anomalous_flag_to_be_equal_to = False + } + monomers { + ligands + } + } + output { + prefix = '''nickname''' + serial = 1 + serial_format = "%d" + job_title = '''nickname''' + write_def_file = False + write_eff_file = False + write_geo_file = False + } + electron_density_maps { + map_coefficients { + map_type = "2mFo-DFc" + mtz_label_amplitudes = "2FOFCWT" + mtz_label_phases = "PH2FOFCWT" + } + map_coefficients { + map_type = "mFo-DFc" + mtz_label_amplitudes = "FOFCWT" + mtz_label_phases = "PHFOFCWT" + } + } + refine { + strategy = *rigid_body + sites { + rigid_body_sites + } + } + main { + number_of_macro_cycles = 1 + nproc = 8 + bulk_solvent_and_scale=bss + nqh_flips=False + } + } + """ + elif phenix_style == '1.21': eff_contents = """ data_manager { model { @@ -133,150 +127,30 @@ def rigid_body_refinement_wrapper_121style( } """ else: - with open(input_dir + eff) as file: - eff_contents = file.read() - if (off_labels is None) or (mr_on): - nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_{pdboff.name.removesuffix('.pdb')}" - else: - nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_self" - #### - # update this logic in the future if matchmaps.mr changes - # mtz_location = input_dir if (mr_on or mr_off) else output_dir - #### - similar_files = list(output_dir.glob(f"{nickname}_[0-9]_1.*")) - if len(similar_files) == 0: - nickname += "_0" - else: - nums = [] - for s in similar_files: - try: - nums.append(int(str(s).split("_")[-2])) - except ValueError: - pass - nickname += f"_{max(nums) + 1}" - # read in mtz to access cell parameters and spacegroup - mtz = rs.read_mtz(str(mtzon)) - cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" - sg = mtz.spacegroup.short_name() - # name for modified refinement file - eff = output_dir / f"params_{nickname}.eff" - params = { - "sg": sg, - "cell_parameters": cell_string, - "bss": str(not no_bss), - "pdb_input": str(pdboff), - "mtz_input": str(mtzon), - "nickname": str(output_dir / nickname), - } - if off_labels is None: - params["columns"] = "FPH1,SIGFPH1" # names from scaleit output - else: - params["columns"] = off_labels # user-provided column nanes - # if selection is not None: - # params["all"] = selection # overwrite atom selection - for key, value in params.items(): - eff_contents = eff_contents.replace(key, value) - # either add ligands to .eff file or delete "ligands" placeholder - if ligands is not None: - ligand_string = "\n".join([f"file_name = '{l}'" for l in ligands]) - eff_contents = eff_contents.replace("ligands", ligand_string) - else: - eff_contents = eff_contents.replace("ligands", "") - if rbr_selections is not None: - selection_string = "\n".join( - [f"rigid_body = '{sel}'" for sel in rbr_selections] - ) - eff_contents = eff_contents.replace("rigid_body_sites", selection_string) - else: - eff_contents = eff_contents.replace("rigid_body_sites", "rigid_body = all") - # write out customized .eff file for use by phenix - with open(eff, "w") as file: - file.write(eff_contents) - # run refinement! - # print refinement output to terminal if user supplied the --verbose flag - subprocess.run( - f"phenix.refine {eff}", - shell=True, - capture_output=(not verbose), - ) - - return output_dir / nickname + raise NotImplementedError('unsupported phenix version') + return eff_contents -def rigid_body_refinement_wrapper_120style( - eff, - input_dir, - ligands, - mr_on, +def rigid_body_refinement_wrapper( mtzon, - no_bss, - off_labels, - output_dir, pdboff, - rbr_selections, - verbose, + input_dir, + output_dir, + phenix_style, + off_labels=None, + ligands=None, + eff=None, + verbose=False, + rbr_selections=None, + mr_on=False, + no_bss=False, ): if eff is None: - eff_contents = """ -refinement { - crystal_symmetry { - unit_cell = cell_parameters - space_group = sg - } - input { - pdb { - file_name = pdb_input - } - xray_data { - file_name = "mtz_input" - labels = columns - r_free_flags { - generate=True - } - force_anomalous_flag_to_be_equal_to = False - } - monomers { - ligands - } - } - output { - prefix = '''nickname''' - serial = 1 - serial_format = "%d" - job_title = '''nickname''' - write_def_file = False - write_eff_file = False - write_geo_file = False - } - electron_density_maps { - map_coefficients { - map_type = "2mFo-DFc" - mtz_label_amplitudes = "2FOFCWT" - mtz_label_phases = "PH2FOFCWT" - } - map_coefficients { - map_type = "mFo-DFc" - mtz_label_amplitudes = "FOFCWT" - mtz_label_phases = "PHFOFCWT" - } - } - refine { - strategy = *rigid_body - sites { - rigid_body_sites - } - } - main { - number_of_macro_cycles = 1 - nproc = 8 - bulk_solvent_and_scale=bss - nqh_flips=False - } -} - """ + eff_contents = _auto_eff_template(phenix_style=phenix_style) else: with open(input_dir + eff) as file: eff_contents = file.read() + if (off_labels is None) or (mr_on): nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_{pdboff.name.removesuffix('.pdb')}" else: From 998d9639e5cf4ac10ea9ca836f69e4d82c2b6de7 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 10:29:25 -0400 Subject: [PATCH 06/21] add PyCharm files to gitignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 713783f..0cb92af 100644 --- a/.gitignore +++ b/.gitignore @@ -117,4 +117,7 @@ src/.DS_Store # logo in illustrator docs/images/logo.ai -docs/images/logo2.ai \ No newline at end of file +docs/images/logo2.ai + +# PyCharm files +.idea/* \ No newline at end of file From 1c2181c0f48b681cf481c1daded37d8aa6e6db5f Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 10:30:08 -0400 Subject: [PATCH 07/21] PyCharm files removed --- .idea/inspectionProfiles/profiles_settings.xml | 1 + .idea/matchmaps.iml | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml index 105ce2d..dd4c951 100644 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -1,5 +1,6 @@ + diff --git a/.idea/matchmaps.iml b/.idea/matchmaps.iml index a9cf77c..623563c 100644 --- a/.idea/matchmaps.iml +++ b/.idea/matchmaps.iml @@ -4,7 +4,15 @@ + + + + + + + + From e6f62f624636b4dfd1cd553274dac8bf2600e6c5 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 10:30:54 -0400 Subject: [PATCH 08/21] latest update to phenix utils --- src/matchmaps/_phenix_utils.py | 73 +++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/src/matchmaps/_phenix_utils.py b/src/matchmaps/_phenix_utils.py index 8fbe528..e1d14af 100644 --- a/src/matchmaps/_phenix_utils.py +++ b/src/matchmaps/_phenix_utils.py @@ -5,9 +5,10 @@ import reciprocalspaceship as rs -def _auto_eff_template(phenix_style: str): + +def _auto_eff_refinement_template(phenix_style: str): if phenix_style == '1.20': - eff_contents = """ + eff_contents = """ refinement { crystal_symmetry { unit_cell = cell_parameters @@ -127,31 +128,32 @@ def _auto_eff_template(phenix_style: str): } """ else: - raise NotImplementedError('unsupported phenix version') + raise NotImplementedError('Unsupported phenix version') return eff_contents + def rigid_body_refinement_wrapper( - mtzon, - pdboff, - input_dir, - output_dir, - phenix_style, - off_labels=None, - ligands=None, - eff=None, - verbose=False, - rbr_selections=None, - mr_on=False, - no_bss=False, + mtzon, + pdboff, + input_dir, + output_dir, + phenix_style, + off_labels=None, + ligands=None, + eff=None, + verbose=False, + rbr_selections=None, + mr_on=False, + no_bss=False, ): if eff is None: - eff_contents = _auto_eff_template(phenix_style=phenix_style) + eff_contents = _auto_eff_refinement_template(phenix_style=phenix_style) else: with open(input_dir + eff) as file: eff_contents = file.read() - if (off_labels is None) or (mr_on): + if (off_labels is None) or mr_on: nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_{pdboff.name.removesuffix('.pdb')}" else: nickname = f"{mtzon.name.removesuffix('.mtz')}_rbr_to_self" @@ -171,9 +173,8 @@ def rigid_body_refinement_wrapper( pass nickname += f"_{max(nums) + 1}" # read in mtz to access cell parameters and spacegroup - mtz = rs.read_mtz(str(mtzon)) - cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" - sg = mtz.spacegroup.short_name() + cell_string, sg = _parse_mtz(mtzfile=str(mtzon)) + # name for modified refinement file eff = output_dir / f"params_{nickname}.eff" params = { @@ -220,13 +221,13 @@ def rigid_body_refinement_wrapper( def phaser_wrapper( - mtzfile, - pdb, - input_dir, - output_dir, - off_labels, - eff=None, - verbose=False, + mtzfile, + pdb, + input_dir, + output_dir, + off_labels, + eff=None, + verbose=False, ): """ Handle simple phaser run from the command line @@ -282,9 +283,7 @@ def phaser_wrapper( pass nickname += f"_{max(nums) + 1}" - mtz = rs.read_mtz(str(mtzfile)) - cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" - sg = mtz.spacegroup.short_name() + cell_string, sg = _parse_mtz(mtzfile) eff = output_dir / f"params_{nickname}.eff" @@ -312,9 +311,17 @@ def phaser_wrapper( return output_dir / nickname +def _parse_mtz(mtzfile): + mtz = rs.read_mtz(str(mtzfile)) + cell_string = f"{mtz.cell.a} {mtz.cell.b} {mtz.cell.c} {mtz.cell.alpha} {mtz.cell.beta} {mtz.cell.gamma}" + sg = mtz.spacegroup.short_name() + return cell_string, sg + + def _renumber_waters(pdb): """ - Call phenix.sort_hetatms to place waters onto the nearest protein chain. This ensures that rbr selections handle waters properly + Call phenix.sort_hetatms to place waters onto the nearest protein chain. + This ensures that rbr selections handle waters properly Parameters ---------- @@ -338,8 +345,8 @@ def _renumber_waters(pdb): def _remove_waters( - pdb, - output_dir, + pdb, + output_dir, ): pdb_dry = pdb.name.removesuffix(".pdb") + "_dry" From 4b88fca1a9efc3134288e7ca67dd0396627411b3 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 10:50:35 -0400 Subject: [PATCH 09/21] remove extra parameter from phaser wrapper --- src/matchmaps/_compute_mr_diff.py | 11 ++--------- src/matchmaps/_phenix_utils.py | 1 - 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index 3f20207..66e4b31 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -128,15 +128,8 @@ def compute_mr_difference_map( f"{time.strftime('%H:%M:%S')}: Running phenix.phaser to place 'off' model into 'on' data..." ) - phaser_nickname = phaser_wrapper( - mtzfile=mtzon, - pdb=pdboff, - input_dir=input_dir, - output_dir=output_dir, - off_labels=f"{Fon},{SigFon}", - eff=None, - verbose=verbose, - ) + phaser_nickname = phaser_wrapper(mtzfile=mtzon, pdb=pdboff, output_dir=output_dir, off_labels=f"{Fon},{SigFon}", + eff=None, verbose=verbose) # TO-DO: fix ligand occupancies in pdb_mr_to_on edited_mr_pdb = _restore_ligand_occupancy( diff --git a/src/matchmaps/_phenix_utils.py b/src/matchmaps/_phenix_utils.py index e1d14af..540d0d8 100644 --- a/src/matchmaps/_phenix_utils.py +++ b/src/matchmaps/_phenix_utils.py @@ -223,7 +223,6 @@ def rigid_body_refinement_wrapper( def phaser_wrapper( mtzfile, pdb, - input_dir, output_dir, off_labels, eff=None, From 1fbbf19df392eb0f475641d2ffd3c94f3f1c5985 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 14:51:08 -0400 Subject: [PATCH 10/21] update phenix refine --- src/matchmaps/_phenix_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/matchmaps/_phenix_utils.py b/src/matchmaps/_phenix_utils.py index 540d0d8..0ce151b 100644 --- a/src/matchmaps/_phenix_utils.py +++ b/src/matchmaps/_phenix_utils.py @@ -120,6 +120,10 @@ def _auto_eff_refinement_template(phenix_style: str): bulk_solvent_and_scale=bss nqh_flips=False } + rigid_body { + bulk_solvent_and_scale=bss + high_resolution=None + } } output { prefix = '''nickname''' From e970d943f94ee898df1e0c222dfb3095486eca55 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:13:10 -0400 Subject: [PATCH 11/21] matchmaps.mr pays attention to phenix version --- src/matchmaps/_compute_mr_diff.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index 66e4b31..e90c760 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -49,7 +49,8 @@ def compute_mr_difference_map( radius : float = 5, alpha : float = 0, no_bss = False, -): + phenix_version: str = None, + ): """ Compute a real-space difference map from mtzs in different spacegroups. @@ -129,7 +130,7 @@ def compute_mr_difference_map( ) phaser_nickname = phaser_wrapper(mtzfile=mtzon, pdb=pdboff, output_dir=output_dir, off_labels=f"{Fon},{SigFon}", - eff=None, verbose=verbose) + phenix_style=phenix_version, eff=None, verbose=verbose) # TO-DO: fix ligand occupancies in pdb_mr_to_on edited_mr_pdb = _restore_ligand_occupancy( @@ -433,6 +434,16 @@ def parse_arguments(): ) ) + parser.add_argument( + "--phenix-version", + required=False, + help=( + "Specify phenix version as a string, e.g. '1.20'. " + "If omitted, matchmaps will attempt to automatically detect the version in use " + "by analyzing the output of phenix.version" + ) + ) + return parser @@ -475,7 +486,8 @@ def main(): alpha=args.alpha, on_as_stationary=args.on_as_stationary, keep_temp_files=args.keep_temp_files, - no_bss = args.no_bss + no_bss = args.no_bss, + phenix_version = args.phenix_version, ) if args.script: From be17f872aa7f6f6a61c7b0243ebf8d3d40e93a70 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:25:15 -0400 Subject: [PATCH 12/21] invert if condition --- src/matchmaps/_compute_realspace_diff.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index 6a23703..287d76a 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -109,7 +109,9 @@ def compute_realspace_difference_map( auto_phenix_version = _validate_environment(ccp4=True) - if not phenix_version: + if phenix_version: + pass + else: phenix_version = auto_phenix_version output_dir_contents = list(output_dir.glob("*")) From 4181a1b6a751a723b2be526dfe5fd5717241a393 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:25:34 -0400 Subject: [PATCH 13/21] parse phenix version --- src/matchmaps/_compute_mr_diff.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index e90c760..38532f8 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -104,7 +104,12 @@ def compute_mr_difference_map( If True, skip bulk solvent scaling feature of phenix.refine """ - _validate_environment(ccp4=False) + auto_phenix_version = _validate_environment(ccp4=False) + + if phenix_version: + pass + else: + phenix_version = auto_phenix_version output_dir_contents = list(output_dir.glob("*")) From e07eae471044db24e62d1ec1c05cadaf3e912c17 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:31:30 -0400 Subject: [PATCH 14/21] preliminary phenix 1.21 support for matchmaps.mr --- src/matchmaps/_compute_mr_diff.py | 2 ++ src/matchmaps/_phenix_utils.py | 53 ++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index 38532f8..24dc58a 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -158,6 +158,7 @@ def compute_mr_difference_map( off_labels=f"{Fon},{SigFon}", # workaround for compatibility mr_on=True, no_bss=no_bss, + phenix_style=phenix_version, ) print(f"{time.strftime('%H:%M:%S')}: Running phenix.refine for the 'off' data...") @@ -173,6 +174,7 @@ def compute_mr_difference_map( rbr_selections=rbr_phenix, off_labels=f"{Foff},{SigFoff}", no_bss=no_bss, + phenix_style=phenix_version, ) # from here down I just copied over the stuff from the normal version diff --git a/src/matchmaps/_phenix_utils.py b/src/matchmaps/_phenix_utils.py index 0ce151b..968ba15 100644 --- a/src/matchmaps/_phenix_utils.py +++ b/src/matchmaps/_phenix_utils.py @@ -224,24 +224,8 @@ def rigid_body_refinement_wrapper( return output_dir / nickname -def phaser_wrapper( - mtzfile, - pdb, - output_dir, - off_labels, - eff=None, - verbose=False, -): - """ - Handle simple phaser run from the command line - """ - - if shutil.which("phenix.phaser") is None: - raise OSError( - "Cannot find executable, phenix.phaser. Please set up your phenix environment." - ) - - if eff is None: +def _auto_eff_phaser_template(phenix_style): + if (phenix_style == '1.20') or (phenix_style == '1.21'): eff_contents = """ phaser { mode = ANO CCA EP_AUTO *MR_AUTO MR_FRF MR_FTF MR_PAK MR_RNP NMAXYZ SCEDS @@ -269,6 +253,39 @@ def phaser_wrapper( } } """ + elif phenix_style == '1.21': + eff_contents = """""" + + else: + raise NotImplementedError(f"Phenix version {phenix_style} not supported") + + return eff_contents + +def phaser_wrapper( + mtzfile, + pdb, + output_dir, + off_labels, + phenix_style, + eff=None, + verbose=False, +): + """ + Handle simple phaser run from the command line + + Args: + phenix_style: + """ + + # this should never be needed; environment is already checked + if shutil.which("phenix.phaser") is None: + raise OSError( + "Cannot find executable, phenix.phaser. Please set up your phenix environment." + ) + + if eff is None: + eff_contents = _auto_eff_phaser_template(phenix_style=phenix_style) + else: raise NotImplementedError("Custom phaser specifications are not yet supported") From 70796c73b5a22c78ee20fe33ec28a3b4adaeca79 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:38:01 -0400 Subject: [PATCH 15/21] properly exclude pycharm files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0cb92af..ea90054 100644 --- a/.gitignore +++ b/.gitignore @@ -120,4 +120,4 @@ docs/images/logo.ai docs/images/logo2.ai # PyCharm files -.idea/* \ No newline at end of file +.idea/* From 2144c2a449db5604bb24dacea9dbdf5f3afe5b08 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:40:03 -0400 Subject: [PATCH 16/21] gitignore nonsense --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ea90054..2abb4e5 100644 --- a/.gitignore +++ b/.gitignore @@ -120,4 +120,4 @@ docs/images/logo.ai docs/images/logo2.ai # PyCharm files -.idea/* +#.idea/* From 236c2a12f4df072957e7751c14e4e2b8d292eec3 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:46:32 -0400 Subject: [PATCH 17/21] trick github into deleting pycharm files --- .idea/.gitignore | 8 ------- .../inspectionProfiles/profiles_settings.xml | 7 ------ .idea/matchmaps.iml | 24 ------------------- .idea/misc.xml | 9 ------- .idea/modules.xml | 8 ------- .idea/vcs.xml | 6 ----- 6 files changed, 62 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/matchmaps.iml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 13566b8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index dd4c951..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/matchmaps.iml b/.idea/matchmaps.iml deleted file mode 100644 index 623563c..0000000 --- a/.idea/matchmaps.iml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index f60d611..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 16524d8..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 1277ae11361c1ca0e875048a3e37a9f351f73f3c Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 29 May 2024 16:47:19 -0400 Subject: [PATCH 18/21] gitignore nonsense --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 2abb4e5..ea90054 100644 --- a/.gitignore +++ b/.gitignore @@ -120,4 +120,4 @@ docs/images/logo.ai docs/images/logo2.ai # PyCharm files -#.idea/* +.idea/* From d3d92377974a228dab345fcf1c860dffa275b53a Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Mon, 3 Jun 2024 12:18:21 -0400 Subject: [PATCH 19/21] phenix version printed to output script --- src/matchmaps/_compute_mr_diff.py | 1 + src/matchmaps/_compute_realspace_diff.py | 1 + src/matchmaps/_utils.py | 36 +++++++++++++++--------- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index 24dc58a..8df0062 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -502,6 +502,7 @@ def main(): utility = 'matchmaps.mr', arguments = sys.argv[1:], script_name = args.script, + phenix_version=args.phenix_version, ) return diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index 287d76a..7a34d65 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -511,6 +511,7 @@ def main(): utility = 'matchmaps', arguments = sys.argv[1:], script_name = args.script, + phenix_version=args.phenix_version, ) return diff --git a/src/matchmaps/_utils.py b/src/matchmaps/_utils.py index 7766911..2bfe8f1 100644 --- a/src/matchmaps/_utils.py +++ b/src/matchmaps/_utils.py @@ -31,19 +31,8 @@ def _validate_environment(ccp4): "For more information, see https://rs-station.github.io/matchmaps/quickstart.html#additional-dependencies" ) else: - version_printout = subprocess.run( - "phenix.version | grep Version", shell=True, capture_output=True - ) - - version_string = str(version_printout.stdout) - - # if version_string.find('21') > 0: - # raise NotImplementedError("It seems that you are using phenix 1.21, which is not yet supported by matchmaps" - # "\n" - # "Please use phenix 1.20 or earlier.") + phenix_version = _detect_phenix_version() - phenix_version = '.'.join(version_string.split(': ')[1].split('.')[:-1]) - if ccp4: if shutil.which("scaleit") is None: raise OSError( @@ -58,6 +47,20 @@ def _validate_environment(ccp4): return phenix_version + +def _detect_phenix_version(): + version_printout = subprocess.run( + "phenix.version | grep Version", shell=True, capture_output=True + ) + version_string = str(version_printout.stdout) + # if version_string.find('21') > 0: + # raise NotImplementedError("It seems that you are using phenix 1.21, which is not yet supported by matchmaps" + # "\n" + # "Please use phenix 1.20 or earlier.") + phenix_version = '.'.join(version_string.split(': ')[1].split('.')[:-1]) + return phenix_version + + def _rbr_selection_parser(rbr_selections): # end early and return nones if this feature isn't being used if rbr_selections is None: @@ -795,12 +798,17 @@ def _clean_up_files(output_dir, old_files, keep_temp_files): return -def _write_script(utility, arguments, script_name): +def _write_script(utility, arguments, script_name, phenix_version): from matchmaps import __version__ as version + if phenix_version is None: + phenix_version = _detect_phenix_version() + contents = f"""#!/bin/bash -# This file was produced by matchmaps version {version} on {time.strftime('%c')} +# This file was produced on {time.strftime('%c')} +# Using matchmaps version {version} and phenix version {phenix_version} +# # The command below was originally run in the following directory: # {os.getcwd()} From 21e29b29fdb035a3742bc9790901eb0257a08876 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Mon, 3 Jun 2024 12:22:51 -0400 Subject: [PATCH 20/21] update matchmaps.ncs to handle phenix versions --- src/matchmaps/_compute_ncs_diff.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/matchmaps/_compute_ncs_diff.py b/src/matchmaps/_compute_ncs_diff.py index 5b638e6..f2c0b86 100644 --- a/src/matchmaps/_compute_ncs_diff.py +++ b/src/matchmaps/_compute_ncs_diff.py @@ -47,6 +47,7 @@ def compute_ncs_difference_map( eff : str = None, keep_temp_files : str = None, no_bss = False, + phenix_version: str = None, ): """ Compute an internal difference map across non-crystallographic symmetry @@ -90,8 +91,13 @@ def compute_ncs_difference_map( no_bss : bool, optional If True, skip bulk solvent scaling feature of phenix.refine """ - _validate_environment(ccp4=False) - + auto_phenix_version = _validate_environment(ccp4=False) + + if phenix_version: + pass + else: + phenix_version = auto_phenix_version + output_dir_contents = list(output_dir.glob("*")) pdb = _cif_or_pdb_to_pdb(pdb, output_dir) @@ -120,7 +126,8 @@ def compute_ncs_difference_map( verbose=verbose, rbr_selections=rbr_phenix, off_labels=f"{F},{SigF}", - no_bss=no_bss + no_bss=no_bss, + phenix_style=phenix_version, ) # use phenix names for columns when computing FloatGrid @@ -326,6 +333,16 @@ def parse_arguments(): "Note that this file is written out in the current working directory, NOT the input or output directories" ) ) + + parser.add_argument( + "--phenix-version", + required=False, + help=( + "Specify phenix version as a string, e.g. '1.20'. " + "If omitted, matchmaps will attempt to automatically detect the version in use " + "by analyzing the output of phenix.version" + ) + ) return parser @@ -365,6 +382,7 @@ def main(): spacing=args.spacing, keep_temp_files=args.keep_temp_files, no_bss=args.no_bss, + phenix_version=args.phenix_version, ) if args.script: @@ -372,6 +390,7 @@ def main(): utility = 'matchmaps.ncs', arguments = sys.argv[1:], script_name = args.script, + phenix_version=args.phenix_version, ) return From 89c23e8fc0174965369d37d56d90794ccfcbb19b Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Mon, 3 Jun 2024 12:25:17 -0400 Subject: [PATCH 21/21] remove extra imports --- src/matchmaps/_compute_mr_diff.py | 5 +---- src/matchmaps/_compute_ncs_diff.py | 7 +------ src/matchmaps/_compute_realspace_diff.py | 10 ++-------- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/src/matchmaps/_compute_mr_diff.py b/src/matchmaps/_compute_mr_diff.py index 8df0062..6d13724 100644 --- a/src/matchmaps/_compute_mr_diff.py +++ b/src/matchmaps/_compute_mr_diff.py @@ -3,15 +3,13 @@ import argparse import os import sys -import subprocess import time -from functools import partial from pathlib import Path import gemmi -import numpy as np import reciprocalspaceship as rs +from matchmaps._phenix_utils import rigid_body_refinement_wrapper, phaser_wrapper, _remove_waters from matchmaps._utils import ( _handle_special_positions, make_floatgrid_from_mtz, @@ -25,7 +23,6 @@ _cif_or_mtz_to_mtz, _write_script, ) -from matchmaps._phenix_utils import rigid_body_refinement_wrapper, phaser_wrapper, _remove_waters def compute_mr_difference_map( diff --git a/src/matchmaps/_compute_ncs_diff.py b/src/matchmaps/_compute_ncs_diff.py index f2c0b86..15c6ad1 100644 --- a/src/matchmaps/_compute_ncs_diff.py +++ b/src/matchmaps/_compute_ncs_diff.py @@ -3,20 +3,16 @@ import argparse import os import sys -import subprocess import time -from functools import partial from pathlib import Path import gemmi -import numpy as np import reciprocalspaceship as rs - +from matchmaps._phenix_utils import rigid_body_refinement_wrapper, _renumber_waters from matchmaps._utils import ( _handle_special_positions, make_floatgrid_from_mtz, - _realspace_align_and_subtract, _rbr_selection_parser, _ncs_align_and_subtract, _validate_environment, @@ -26,7 +22,6 @@ _cif_or_mtz_to_mtz, _write_script, ) -from matchmaps._phenix_utils import rigid_body_refinement_wrapper, _renumber_waters def compute_ncs_difference_map( diff --git a/src/matchmaps/_compute_realspace_diff.py b/src/matchmaps/_compute_realspace_diff.py index 7a34d65..580bd55 100755 --- a/src/matchmaps/_compute_realspace_diff.py +++ b/src/matchmaps/_compute_realspace_diff.py @@ -1,20 +1,15 @@ """Compute unbiased real space difference map.""" import argparse -import os -import sys -import glob import subprocess +import sys import time -from functools import partial from pathlib import Path import gemmi -import numpy as np import reciprocalspaceship as rs -from IPython import embed - +from matchmaps._phenix_utils import rigid_body_refinement_wrapper, _renumber_waters from matchmaps._utils import ( _handle_special_positions, make_floatgrid_from_mtz, @@ -27,7 +22,6 @@ _cif_or_pdb_to_pdb, _write_script, ) -from matchmaps._phenix_utils import rigid_body_refinement_wrapper, _renumber_waters def compute_realspace_difference_map(