From 735126f725fdd4ea9e4a74884c4f146032407a4d Mon Sep 17 00:00:00 2001 From: pee8379 Date: Thu, 18 May 2023 17:09:34 +0900 Subject: [PATCH 1/3] Update qe.py Signed-off-by: pee8379 --- dpdata/plugins/qe.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/dpdata/plugins/qe.py b/dpdata/plugins/qe.py index d524462f6..9eb16bd58 100644 --- a/dpdata/plugins/qe.py +++ b/dpdata/plugins/qe.py @@ -1,6 +1,7 @@ import dpdata.md.pbc import dpdata.qe.scf import dpdata.qe.traj +import dpdata.qe.pwmd from dpdata.format import Format @@ -49,3 +50,25 @@ def from_labeled_system(self, file_name, **kwargs): data["virials"], ) = dpdata.qe.scf.get_frame(file_name) return data + + +@Format.register("qe/pw/md") +class QECPPWMDFormat(Format): + @Format.post("rot_lower_triangular") + def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): + data = {} + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.qe.pwmd.to_system_data(file_name, begin=begin, step=step) + data["coords"] = dpdata.md.pbc.apply_pbc( + data["coords"], + data["cells"], + ) + return data From b309ba6ffda42ec70b75827a5fe266bab1355b65 Mon Sep 17 00:00:00 2001 From: pee8379 Date: Thu, 18 May 2023 17:09:58 +0900 Subject: [PATCH 2/3] Add files via upload Signed-off-by: pee8379 --- dpdata/qe/pwmd.py | 218 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 dpdata/qe/pwmd.py diff --git a/dpdata/qe/pwmd.py b/dpdata/qe/pwmd.py new file mode 100644 index 000000000..7d93dfc1d --- /dev/null +++ b/dpdata/qe/pwmd.py @@ -0,0 +1,218 @@ +import os +import sys +import re + +import numpy as np + +from ..unit import ( + EnergyConversion, + ForceConversion, + LengthConversion, + PressureConversion, +) + +ry2ev = EnergyConversion("rydberg", "eV").value() +kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value() + +length_convert = LengthConversion("bohr", "angstrom").value() +energy_convert = EnergyConversion("hartree", "eV").value() +force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() + +def get_coords(lines, natoms): + coord = [] + ret = [] + for i, string in enumerate(lines): + if "ATOMIC_POSITIONS" in string: + newlines = lines[i:] + blk = get_block(newlines, "ATOMIC_POSITIONS") + blk = blk[0 : sum(natoms)] + for ii in blk: + ret.append([float(jj) for jj in ii.split()[1:4]]) + coord.append(ret) + ret =[] + coord = np.array(coord) + return coord + +def get_cell_vc(lines): + cell = [] + ret = [] + for i, string in enumerate(lines): + if "CELL_PARAMETERS" in string: + newlines = lines[i:] + blk = get_block(newlines, "CELL_PARAMETERS") + for ii in blk: + ret.append([float(jj) for jj in ii.split()[0:3]]) + cell.append(ret) + ret =[] + cell = np.array(cell) + return cell + +def get_stress(lines): + ret = [] + stress = [] + for i, string in enumerate(lines): + if 'total stress' in string: + newlines = lines[i:] + blk = get_block(newlines, "total stress") + for ii in blk: + ret.append([float(jj) for jj in ii.split()[3:6]]) + stress.append(ret) + ret=[] + stress = np.array(stress) + stress *= kbar2evperang3 + return stress + +def get_energy(lines): + energy = [] + for i, string in enumerate(lines): + if "! total energy" in string: + energy.append(ry2ev * float(string.split("=")[1].split()[0])) + energy = np.array(energy) + + return energy + +def get_force(lines, natoms): + ret = [] + force = [] + for i, string in enumerate(lines): + if 'Forces acting on atoms' in string: + newlines = lines[i:] + blk = get_block(newlines, "Forces acting on atoms", skip=1) + blk = blk[0 : sum(natoms)] + for ii in blk: + ret.append([float(jj) for jj in ii.split("=")[1].split()]) + force.append(ret) + ret=[] + force = np.array(force) + force *= ry2ev / length_convert + return force + +def get_block(lines, keyword, skip=0): + ret = [] + for idx, ii in enumerate(lines): + if keyword in ii: + blk_idx = idx + 1 + skip + while len(lines[blk_idx]) == 0: + blk_idx += 1 + while len(lines[blk_idx]) != 0 and blk_idx != len(lines): + ret.append(lines[blk_idx]) + blk_idx += 1 + break + return ret + +def get_cell(lines): + ret = [] + for idx, ii in enumerate(lines): + if "ibrav" in ii: + break + blk = lines[idx : idx + 2] + ibrav = int(blk[0].replace(",", "").split("=")[-1]) + if ibrav == 0: + for iline in lines: + if "CELL_PARAMETERS" in iline and "angstrom" not in iline.lower(): + raise RuntimeError( + "CELL_PARAMETERS must be written in Angstrom. Other units are not supported yet." + ) + blk = get_block(lines, "CELL_PARAMETERS") + for ii in blk: + ret.append([float(jj) for jj in ii.split()[0:3]]) + ret = np.array(ret) + elif ibrav == 1: + a = None + for iline in lines: + line = iline.replace("=", " ").replace(",", "").split() + if len(line) >= 2 and "a" == line[0]: + # print("line = ", line) + a = float(line[1]) + if len(line) >= 2 and "celldm(1)" == line[0]: + a = float(line[1]) * length_convert + # print("a = ", a) + if not a: + raise RuntimeError("parameter 'a' or 'celldm(1)' cannot be found.") + ret = np.array([[a, 0.0, 0.0], [0.0, a, 0.0], [0.0, 0.0, a]]) + else: + sys.exit("ibrav > 1 not supported yet.") + return ret + +def get_atoms(lines): + atom_symbol_list = [] + for iline in lines: + if "ATOMIC_POSITIONS" in iline: + blk = get_block(lines, "ATOMIC_POSITIONS") + for ii in blk: + atom_symbol_list.append(ii.split()[0]) + + atom_symbol_list = np.array(atom_symbol_list) + tmp_names, symbol_idx = np.unique(atom_symbol_list, return_index=True) + atom_types = [] + atom_numbs = [] + # preserve the atom_name order + atom_names = atom_symbol_list[np.sort(symbol_idx)] + for jj in atom_symbol_list: + for idx, ii in enumerate(atom_names): + if jj == ii: + atom_types.append(idx) + for idx in range(len(atom_names)): + atom_numbs.append(atom_types.count(idx)) + atom_types = np.array(atom_types) + return list(atom_names), atom_numbs, atom_types + +def to_system_data(fname, begin=0, step=1): + if type(fname) == str: + path_out = fname + outname = os.path.basename(path_out) + # the name of the input file is assumed to be different from the output by 'in' and 'out' + inname = outname.replace("out", "in") + path_in = os.path.join(os.path.dirname(path_out), inname) + elif type(fname) == list and len(fname) == 2: + path_in = fname[0] + path_out = fname[1] + else: + raise RuntimeError("invalid input") + with open(path_out) as fp: + outlines = fp.read().split("\n") + with open(path_in) as fp: + inlines = fp.read().split("\n") + + for i, string in enumerate(outlines): + if 'Program PWSCF' in string: + checkpoint_index = i + outlines = outlines[checkpoint_index:] + # In case of output file from previous failed pw calculation was not deleted + + atom_names, atom_numbs, atom_types = get_atoms(inlines) + + coords = get_coords(outlines, atom_numbs) + energies = get_energy(outlines) + forces = get_force(outlines, atom_numbs) + virials = get_stress(outlines) + + for line in inlines: + # check calculation option in input file to find it is vc (variable cell) or not + if 'calculation' in line: + calculation=line.strip() + calculation = re.search(r"'([^']*)'", calculation) + calculation = calculation.group(1) + calculation = calculation.lower() + if calculation == 'md' or calculation == 'relax': + cells = get_cell(inlines) + cells = np.tile(cells, (len(virials),1,1)) + for ii, temp in enumerate(virials): + virials[ii] = virials[ii] * np.linalg.det(cells[ii]) + elif calculation == 'vc-md' or calculation == 'vc-relax': + cells = get_cell_vc(outlines) + for ii, temp in enumerate(cells): + virials[ii] = virials[ii] * np.linalg.det(cells[ii]) + # because cell changes every step when variable cell calculation + + + return ( + atom_names, + atom_numbs, + atom_types, + cells, + coords, + energies, + forces, + virials, + ) From 75ffbf715ab44c59cd40f163ff9137114e0544db Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 May 2023 08:19:21 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/plugins/qe.py | 32 ++++++++++++++--------------- dpdata/qe/pwmd.py | 48 ++++++++++++++++++++++++++------------------ 2 files changed, 44 insertions(+), 36 deletions(-) diff --git a/dpdata/plugins/qe.py b/dpdata/plugins/qe.py index 9eb16bd58..25e392fa1 100644 --- a/dpdata/plugins/qe.py +++ b/dpdata/plugins/qe.py @@ -1,7 +1,7 @@ import dpdata.md.pbc +import dpdata.qe.pwmd import dpdata.qe.scf import dpdata.qe.traj -import dpdata.qe.pwmd from dpdata.format import Format @@ -54,21 +54,21 @@ def from_labeled_system(self, file_name, **kwargs): @Format.register("qe/pw/md") class QECPPWMDFormat(Format): - @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): - data = {} - ( - data["atom_names"], - data["atom_numbs"], - data["atom_types"], - data["cells"], - data["coords"], - data["energies"], - data["forces"], - data["virials"], - ) = dpdata.qe.pwmd.to_system_data(file_name, begin=begin, step=step) - data["coords"] = dpdata.md.pbc.apply_pbc( + @Format.post("rot_lower_triangular") + def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): + data = {} + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.qe.pwmd.to_system_data(file_name, begin=begin, step=step) + data["coords"] = dpdata.md.pbc.apply_pbc( data["coords"], data["cells"], ) - return data + return data diff --git a/dpdata/qe/pwmd.py b/dpdata/qe/pwmd.py index 7d93dfc1d..ba2537b9f 100644 --- a/dpdata/qe/pwmd.py +++ b/dpdata/qe/pwmd.py @@ -1,6 +1,6 @@ import os -import sys import re +import sys import numpy as np @@ -18,75 +18,81 @@ energy_convert = EnergyConversion("hartree", "eV").value() force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() + def get_coords(lines, natoms): coord = [] ret = [] for i, string in enumerate(lines): if "ATOMIC_POSITIONS" in string: - newlines = lines[i:] + newlines = lines[i:] blk = get_block(newlines, "ATOMIC_POSITIONS") blk = blk[0 : sum(natoms)] for ii in blk: ret.append([float(jj) for jj in ii.split()[1:4]]) coord.append(ret) - ret =[] + ret = [] coord = np.array(coord) return coord + def get_cell_vc(lines): cell = [] ret = [] for i, string in enumerate(lines): if "CELL_PARAMETERS" in string: - newlines = lines[i:] + newlines = lines[i:] blk = get_block(newlines, "CELL_PARAMETERS") for ii in blk: ret.append([float(jj) for jj in ii.split()[0:3]]) cell.append(ret) - ret =[] + ret = [] cell = np.array(cell) return cell + def get_stress(lines): ret = [] stress = [] for i, string in enumerate(lines): - if 'total stress' in string: - newlines = lines[i:] + if "total stress" in string: + newlines = lines[i:] blk = get_block(newlines, "total stress") for ii in blk: ret.append([float(jj) for jj in ii.split()[3:6]]) stress.append(ret) - ret=[] + ret = [] stress = np.array(stress) stress *= kbar2evperang3 return stress + def get_energy(lines): energy = [] for i, string in enumerate(lines): if "! total energy" in string: energy.append(ry2ev * float(string.split("=")[1].split()[0])) energy = np.array(energy) - + return energy + def get_force(lines, natoms): ret = [] force = [] for i, string in enumerate(lines): - if 'Forces acting on atoms' in string: - newlines = lines[i:] + if "Forces acting on atoms" in string: + newlines = lines[i:] blk = get_block(newlines, "Forces acting on atoms", skip=1) blk = blk[0 : sum(natoms)] for ii in blk: ret.append([float(jj) for jj in ii.split("=")[1].split()]) force.append(ret) - ret=[] + ret = [] force = np.array(force) force *= ry2ev / length_convert return force + def get_block(lines, keyword, skip=0): ret = [] for idx, ii in enumerate(lines): @@ -100,6 +106,7 @@ def get_block(lines, keyword, skip=0): break return ret + def get_cell(lines): ret = [] for idx, ii in enumerate(lines): @@ -134,6 +141,7 @@ def get_cell(lines): sys.exit("ibrav > 1 not supported yet.") return ret + def get_atoms(lines): atom_symbol_list = [] for iline in lines: @@ -157,6 +165,7 @@ def get_atoms(lines): atom_types = np.array(atom_types) return list(atom_names), atom_numbs, atom_types + def to_system_data(fname, begin=0, step=1): if type(fname) == str: path_out = fname @@ -175,9 +184,9 @@ def to_system_data(fname, begin=0, step=1): inlines = fp.read().split("\n") for i, string in enumerate(outlines): - if 'Program PWSCF' in string: + if "Program PWSCF" in string: checkpoint_index = i - outlines = outlines[checkpoint_index:] + outlines = outlines[checkpoint_index:] # In case of output file from previous failed pw calculation was not deleted atom_names, atom_numbs, atom_types = get_atoms(inlines) @@ -189,23 +198,22 @@ def to_system_data(fname, begin=0, step=1): for line in inlines: # check calculation option in input file to find it is vc (variable cell) or not - if 'calculation' in line: - calculation=line.strip() + if "calculation" in line: + calculation = line.strip() calculation = re.search(r"'([^']*)'", calculation) calculation = calculation.group(1) calculation = calculation.lower() - if calculation == 'md' or calculation == 'relax': + if calculation == "md" or calculation == "relax": cells = get_cell(inlines) - cells = np.tile(cells, (len(virials),1,1)) + cells = np.tile(cells, (len(virials), 1, 1)) for ii, temp in enumerate(virials): virials[ii] = virials[ii] * np.linalg.det(cells[ii]) - elif calculation == 'vc-md' or calculation == 'vc-relax': + elif calculation == "vc-md" or calculation == "vc-relax": cells = get_cell_vc(outlines) for ii, temp in enumerate(cells): virials[ii] = virials[ii] * np.linalg.det(cells[ii]) # because cell changes every step when variable cell calculation - return ( atom_names, atom_numbs,