From b367104234091a5cd8650a9c9c04dc0ca79b918b Mon Sep 17 00:00:00 2001 From: Jimmy Shen <14003693+jmmshn@users.noreply.github.com> Date: Thu, 18 Jul 2024 08:50:31 -0700 Subject: [PATCH] Defect: Allow bulk SC calculation to be skipped. (#742) * start electrodes * start electrode * start electrode * start electrode * start electrode * start electrode * VASP electrode job * VASP electrode job * lint * n steps * n steps * n steps * n steps * rm defect changes * rm defect changes * update * update * update * update structure matcher update structure matcher update structure matcher update structure matcher update structure matcher update structure matcher * debugging debugging debugging debugging debugging debugging debugging debugging debugging debugging debugging debugging debugging debugging debugging * debugging * debugging * debugging * debugging * append names * append names * append names * append names * append names * dev script change dev script change dev script change dev script change dev script change dev script change dev script change dev script change test * working test * typo * lint * lint * lint * lint * allow different bulk relax * update * update * update * update * hydrogen * update emmet * ulid tests * emmet * uc_bulk * update docs * update docs * update depent * get charge state calcs ASAP --------- Co-authored-by: Alex Ganose --- src/atomate2/common/flows/defect.py | 216 +++++++++++++++++++++++----- src/atomate2/common/jobs/defect.py | 5 +- tests/vasp/flows/test_defect.py | 46 ++++++ 3 files changed, 226 insertions(+), 41 deletions(-) diff --git a/src/atomate2/common/flows/defect.py b/src/atomate2/common/flows/defect.py index b2761d9f00..4c393069ab 100644 --- a/src/atomate2/common/flows/defect.py +++ b/src/atomate2/common/flows/defect.py @@ -106,7 +106,108 @@ def make( dir2 = relax2.output.dir_name struct1 = relax1.output.structure struct2 = relax2.output.structure + add_info1 = {"relaxed_uuid": relax1.uuid, "distorted_uuid": relax2.uuid} + add_info2 = {"relaxed_uuid": relax2.uuid, "distorted_uuid": relax1.uuid} + deformations1, deformations2, ccd_job = self.get_deformation_and_ccd_jobs( + struct1, struct2, dir1, dir2, add_info1, add_info2 + ) + + return Flow( + jobs=[ + charged_structures, + relax1, + relax2, + deformations1, + deformations2, + ccd_job, + ], + output=ccd_job.output, + name=name, + ) + + def make_from_relaxed_structures( + self, + structure1: Structure, + structure2: Structure, + ) -> Flow: + """ + Make a job for the calculation of the configuration coordinate diagram. + + Parameters + ---------- + structure1 + The relaxed structure for charge state 1. + structure2 + The relaxed structure for charge state 2. + + Returns + ------- + Flow + The full workflow for the calculation of the configuration coordinate + diagram. + """ + # use a more descriptive name when possible + if not isinstance(structure1, OutputReference): + name = f"{self.name}: {structure1.formula}" + if not ( + isinstance(structure1, OutputReference) + or isinstance(structure2, OutputReference) + ): + name = ( + f"{self.name}: {structure1.formula}" + "({structure1.charge}-{structure2.charge})" + ) + + deformations1, deformations2, ccd_job = self.get_deformation_and_ccd_jobs( + structure1, structure2 + ) + + return Flow( + jobs=[ + deformations1, + deformations2, + ccd_job, + ], + output=ccd_job.output, + name=name, + ) + + def get_deformation_and_ccd_jobs( + self, + struct1: Structure, + struct2: Structure, + dir1: str | None = None, + dir2: str | None = None, + add_info1: dict | None = None, + add_info2: dict | None = None, + ) -> tuple[Job, Job, Job]: + """Get the deformation and CCD jobs for the given structures. + + Parameters + ---------- + struct1: Structure + The first structure. + struct2: Structure + The second structure. + dir1: str + The directory of the first structure. + dir2: str + The directory of the second structure. + add_info1: dict + Additional information to write + add_info2: dict + Additional information to write + + Returns + ------- + deformations1: Job + The deformation job for the first structure. + deformations2: Job + The deformation job for the second structure. + ccd_job: Job + The Job to construct the CCD document. + """ deformations1 = spawn_energy_curve_calcs( struct1, struct2, @@ -114,7 +215,7 @@ def make( static_maker=self.static_maker, prev_dir=dir1, add_name="q1", - add_info={"relaxed_uuid": relax1.uuid, "distorted_uuid": relax2.uuid}, + add_info=add_info1, ) deformations2 = spawn_energy_curve_calcs( @@ -124,7 +225,7 @@ def make( static_maker=self.static_maker, prev_dir=dir2, add_name="q2", - add_info={"relaxed_uuid": relax2.uuid, "distorted_uuid": relax1.uuid}, + add_info=add_info2, ) deformations1.append_name(" q1") @@ -139,18 +240,7 @@ def make( deformations1.output, deformations2.output, undistorted_index=min_abs_index ) - return Flow( - jobs=[ - charged_structures, - relax1, - relax2, - deformations1, - deformations2, - ccd_job, - ], - output=ccd_job.output, - name=name, - ) + return deformations1, deformations2, ccd_job @dataclass @@ -161,6 +251,15 @@ class FormationEnergyMaker(Maker, ABC): this maker is the `defect_relax_maker` which contains the settings for the atomic relaxations that each defect supercell will undergo. + This maker can be used as a stand-alone maker to calculate all of the data + needed to populate the `DefectEntry` object. However, for you can also use this + maker with `uc_bulk` set to True (also set `collect_defect_entry_data` to False + and `bulk_relax_maker` to None). This will skip the bulk supercell calculations + assuming that bulk unit cell calculations are of high enough quality to be used + directly. In these cases, the bulk SC electrostatic potentials need to be + constructed without running a separate bulk SC calculation. This is currently + implemented through the grid re-sampling tools in `mp-pyrho`. + Attributes ---------- defect_relax_maker: Maker @@ -189,6 +288,10 @@ class FormationEnergyMaker(Maker, ABC): ng_settings = dict(zip(params, ng + ngf)) relax_maker = update_user_incar_settings(relax_maker, ng_settings) + uc_bulk: bool + If True, skip the bulk supercell calculation and only perform the defect + supercell calculations. This is useful for large-scale defect databases. + name: str The name of the flow created by this maker. @@ -251,6 +354,7 @@ class FormationEnergyMaker(Maker, ABC): defect_relax_maker: Maker bulk_relax_maker: Maker | None = None + uc_bulk: bool = False name: str = "formation energy" relax_radius: float | str | None = None perturb: float | None = None @@ -260,8 +364,15 @@ class FormationEnergyMaker(Maker, ABC): def __post_init__(self) -> None: """Apply post init updates.""" self.validate_maker() - if self.bulk_relax_maker is None: - self.bulk_relax_maker = self.defect_relax_maker + if self.uc_bulk: + if self.bulk_relax_maker is not None: + raise ValueError("bulk_relax_maker should be None when uc_bulk is True") + if self.collect_defect_entry_data: + raise ValueError( + "collect_defect_entry_data should be False when uc_bulk is True" + ) + else: + self.bulk_relax_maker = self.bulk_relax_maker or self.defect_relax_maker def make( self, @@ -296,27 +407,41 @@ def make( The workflow to calculate the formation energy diagram. """ jobs = [] - if bulk_supercell_dir is None: - get_sc_job = bulk_supercell_calculation( - uc_structure=defect.structure, - relax_maker=self.bulk_relax_maker, - sc_mat=supercell_matrix, - get_planar_locpot=self.get_planar_locpot, - ) - sc_mat = get_sc_job.output["sc_mat"] - lattice = get_sc_job.output["sc_struct"].lattice - bulk_supercell_dir = get_sc_job.output["dir_name"] + if not self.uc_bulk: + if bulk_supercell_dir is None: + get_sc_job = bulk_supercell_calculation( + uc_structure=defect.structure, + relax_maker=self.bulk_relax_maker, + sc_mat=supercell_matrix, + get_planar_locpot=self.get_planar_locpot, + ) + sc_mat = get_sc_job.output["sc_mat"] + lattice = get_sc_job.output["sc_struct"].lattice + bulk_supercell_dir = get_sc_job.output["dir_name"] + sc_uuid = get_sc_job.output["uuid"] + else: + # all additional reader functions need to be in this job + # b/c they might receive Response objects instead of data. + get_sc_job = get_supercell_from_prv_calc( + uc_structure=defect.structure, + prv_calc_dir=bulk_supercell_dir, + sc_entry_and_locpot_from_prv=self.sc_entry_and_locpot_from_prv, + sc_mat_ref=supercell_matrix, + ) + sc_mat = get_sc_job.output["sc_mat"] + lattice = get_sc_job.output["lattice"] + sc_uuid = get_sc_job.output["uuid"] + jobs.append(get_sc_job) else: - # all additional reader functions need to be in this job - # b/c they might receive Response objects instead of data. - get_sc_job = get_supercell_from_prv_calc( - uc_structure=defect.structure, - prv_calc_dir=bulk_supercell_dir, - sc_entry_and_locpot_from_prv=self.sc_entry_and_locpot_from_prv, - sc_mat_ref=supercell_matrix, - ) - sc_mat = get_sc_job.output["sc_mat"] - lattice = get_sc_job.output["lattice"] + if bulk_supercell_dir is not None: + raise ValueError( + "bulk_supercell_dir should be None when uc_bulk is True." + "We will be using a uc bulk calculation, so no bulk supercell " + "is needed." + ) + sc_mat = supercell_matrix + lattice = None + sc_uuid = None spawn_output = spawn_defect_q_jobs( defect=defect, @@ -327,13 +452,26 @@ def make( add_info={ "bulk_supercell_dir": bulk_supercell_dir, "bulk_supercell_matrix": sc_mat, - "bulk_supercell_uuid": get_sc_job.uuid, + "bulk_supercell_uuid": sc_uuid, }, relax_radius=self.relax_radius, perturb=self.perturb, validate_charge=self.validate_charge, ) - jobs.extend([get_sc_job, spawn_output]) + + if self.uc_bulk: + # run the function here so we can get the charge state + # calculations ASAP + response = spawn_output.function( + *spawn_output.function_args, **spawn_output.function_kwargs + ) + jobs.append(response.replace) + output_ = response.output + else: + # execute this as job so you can string a single bulk sc with multiple + # defect scs + jobs.append(spawn_output) + output_ = spawn_output.output if self.collect_defect_entry_data: collection_job = get_defect_entry( @@ -344,7 +482,7 @@ def make( return Flow( jobs=jobs, - output=spawn_output.output, + output=output_, name=self.name, ) diff --git a/src/atomate2/common/jobs/defect.py b/src/atomate2/common/jobs/defect.py index 7871c01cef..d790968d39 100644 --- a/src/atomate2/common/jobs/defect.py +++ b/src/atomate2/common/jobs/defect.py @@ -300,7 +300,7 @@ def get_planar_locpot(task_doc: TaskDoc) -> NDArray: def spawn_defect_q_jobs( defect: Defect, relax_maker: RelaxMaker, - relaxed_sc_lattice: Lattice, + relaxed_sc_lattice: Lattice | None = None, sc_mat: NDArray | None = None, defect_index: int | str = "", add_info: dict | None = None, @@ -355,7 +355,8 @@ def spawn_defect_q_jobs( sc_def_struct = defect.get_supercell_structure( sc_mat=sc_mat, relax_radius=relax_radius, perturb=perturb ) - sc_def_struct.lattice = relaxed_sc_lattice + if relaxed_sc_lattice is not None: + sc_def_struct.lattice = relaxed_sc_lattice if sc_mat is not None: sc_mat = np.array(sc_mat).tolist() for qq in defect.get_charge_states(): diff --git a/tests/vasp/flows/test_defect.py b/tests/vasp/flows/test_defect.py index cb18af69c2..ae80d6b30b 100644 --- a/tests/vasp/flows/test_defect.py +++ b/tests/vasp/flows/test_defect.py @@ -185,3 +185,49 @@ def _check_plnr_locpot(name): prv_dir = test_dir / "vasp/GaN_Mg_defect/bulk_relax/outputs" flow2 = maker.make(defects[0], bulk_supercell_dir=prv_dir, defect_index=0) _ = run_locally(flow2, create_folders=True, ensure_success=True) + + +def test_formation_energy_maker_uc(mock_vasp, clean_dir, test_dir, monkeypatch): + from jobflow import run_locally + + # mapping from job name to directory containing test files + ref_paths = { + "relax Mg_Ga-0 q=-2": "GaN_Mg_defect/relax_Mg_Ga-0_q=-2", + "relax Mg_Ga-0 q=-1": "GaN_Mg_defect/relax_Mg_Ga-0_q=-1", + "relax Mg_Ga-0 q=0": "GaN_Mg_defect/relax_Mg_Ga-0_q=0", + "relax Mg_Ga-0 q=1": "GaN_Mg_defect/relax_Mg_Ga-0_q=1", + } + + fake_run_vasp_kwargs = { + k: {"incar_settings": ["ISIF"], "check_inputs": ["incar"]} for k in ref_paths + } + + # automatically use fake VASP and write POTCAR.spec during the test + mock_vasp(ref_paths, fake_run_vasp_kwargs) + + struct = Structure.from_file(test_dir / "structures" / "GaN.cif") + defects = list( + SubstitutionGenerator().get_defects( + structure=struct, substitution={"Ga": ["Mg"]} + ) + ) + + maker = FormationEnergyMaker( + relax_radius="auto", + perturb=0.1, + collect_defect_entry_data=False, + validate_charge=False, + uc_bulk=True, + ) + flow = maker.make( + defects[0], + supercell_matrix=[[2, 2, 0], [2, -2, 0], [0, 0, 1]], + defect_index=0, + ) + + # run the flow and ensure that it finished running successfully + _ = run_locally( + flow, + create_folders=True, + ensure_success=True, + )