From 8645b98b638014a6eab37c9f3aab248a1ffb167a Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 19 Dec 2022 04:53:32 -0500 Subject: [PATCH 01/20] gaussian: support PBC (#397) Signed-off-by: Jinzhe Zeng --- dpdata/gaussian/gjf.py | 6 + dpdata/gaussian/log.py | 27 ++- tests/gaussian/h2pbc.gaussianlog | 301 +++++++++++++++++++++++++++++++ tests/test_gaussian_log.py | 18 ++ 4 files changed, 347 insertions(+), 5 deletions(-) create mode 100644 tests/gaussian/h2pbc.gaussianlog diff --git a/dpdata/gaussian/gjf.py b/dpdata/gaussian/gjf.py index 727732b8..c2652747 100644 --- a/dpdata/gaussian/gjf.py +++ b/dpdata/gaussian/gjf.py @@ -229,6 +229,12 @@ def make_gaussian_input( (symbol, frag_index[ii] + 1, *coordinate)) else: buff.append("%s %f %f %f" % (symbol, *coordinate)) + if not sys_data.get('nopbc', False): + # PBC condition + cell = sys_data['cells'][0] + for ii in range(3): + # use TV as atomic symbol, see https://gaussian.com/pbc/ + buff.append('TV %f %f %f' % (symbol, *cell[ii])) if basis_set is not None: # custom basis set buff.extend(['', basis_set, '']) diff --git a/dpdata/gaussian/log.py b/dpdata/gaussian/log.py index 5ed976fe..0e006682 100644 --- a/dpdata/gaussian/log.py +++ b/dpdata/gaussian/log.py @@ -16,6 +16,8 @@ def to_system_data(file_name, md=False): coords_t = [] atom_symbols = [] forces_t = [] + cells_t = [] + nopbc = True with open(file_name) as fp: for line in fp: @@ -29,6 +31,7 @@ def to_system_data(file_name, md=False): flag = 5 coords = [] atom_symbols = [] + cells = [] if 1 <= flag <= 3 or 5 <= flag <= 9: flag += 1 @@ -38,18 +41,31 @@ def to_system_data(file_name, md=False): forces_t.append(forces) energy_t.append(energy) coords_t.append(coords) + if cells: + nopbc = False + cells_t.append(cells) + else: + cells_t.append([[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]]) flag = 0 else: s = line.split() - forces.append([float(line[23:38]), float(line[38:53]), float(line[53:68])]) + if line[14:16] == "-2": + # PBC + pass + else: + forces.append([float(line[23:38]), float(line[38:53]), float(line[53:68])]) elif flag == 10: # atom_symbols and coords if line.startswith(" -------"): flag = 0 else: s = line.split() - coords.append([float(x) for x in s[3:6]]) - atom_symbols.append(symbols[int(s[1])]) + if int(s[1]) == -2: + # PBC cells, see https://gaussian.com/pbc/ + cells.append([float(x) for x in s[3:6]]) + else: + coords.append([float(x) for x in s[3:6]]) + atom_symbols.append(symbols[int(s[1])]) assert(coords_t), "cannot find coords" assert(energy_t), "cannot find energies" @@ -62,10 +78,11 @@ def to_system_data(file_name, md=False): forces_t = forces_t[-1:] energy_t = energy_t[-1:] coords_t = coords_t[-1:] + cells_t = cells_t[-1:] data['forces'] = np.array(forces_t) * force_convert data['energies'] = np.array(energy_t) * energy_convert data['coords'] = np.array(coords_t) data['orig'] = np.array([0, 0, 0]) - data['cells'] = np.array([[[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]] for _ in energy_t]) - data['nopbc'] = True + data['cells'] = np.array(cells_t) + data['nopbc'] = nopbc return data diff --git a/tests/gaussian/h2pbc.gaussianlog b/tests/gaussian/h2pbc.gaussianlog new file mode 100644 index 00000000..b876e317 --- /dev/null +++ b/tests/gaussian/h2pbc.gaussianlog @@ -0,0 +1,301 @@ + Entering Gaussian System, Link 0=g16 + Input=h.gjf + Output=h.log + Initial command: + /home/jz748/g16/g16/l1.exe "/home/jz748/tmp/Gau-1950599.inp" -scrdir="/home/jz748/tmp/" + Entering Link 1 = /home/jz748/g16/g16/l1.exe PID= 1950600. + + Copyright (c) 1988,1990,1992,1993,1995,1998,2003,2009,2016, + Gaussian, Inc. All Rights Reserved. + + This is part of the Gaussian(R) 16 program. It is based on + the Gaussian(R) 09 system (copyright 2009, Gaussian, Inc.), + the Gaussian(R) 03 system (copyright 2003, Gaussian, Inc.), + the Gaussian(R) 98 system (copyright 1998, Gaussian, Inc.), + the Gaussian(R) 94 system (copyright 1995, Gaussian, Inc.), + the Gaussian 92(TM) system (copyright 1992, Gaussian, Inc.), + the Gaussian 90(TM) system (copyright 1990, Gaussian, Inc.), + the Gaussian 88(TM) system (copyright 1988, Gaussian, Inc.), + the Gaussian 86(TM) system (copyright 1986, Carnegie Mellon + University), and the Gaussian 82(TM) system (copyright 1983, + Carnegie Mellon University). Gaussian is a federally registered + trademark of Gaussian, Inc. + + This software contains proprietary and confidential information, + including trade secrets, belonging to Gaussian, Inc. + + This software is provided under written license and may be + used, copied, transmitted, or stored only in accord with that + written license. + + The following legend is applicable only to US Government + contracts under FAR: + + RESTRICTED RIGHTS LEGEND + + Use, reproduction and disclosure by the US Government is + subject to restrictions as set forth in subparagraphs (a) + and (c) of the Commercial Computer Software - Restricted + Rights clause in FAR 52.227-19. + + Gaussian, Inc. + 340 Quinnipiac St., Bldg. 40, Wallingford CT 06492 + + + --------------------------------------------------------------- + Warning -- This program may not be used in any manner that + competes with the business of Gaussian, Inc. or will provide + assistance to any competitor of Gaussian, Inc. The licensee + of this program is prohibited from giving any competitor of + Gaussian, Inc. access to this program. By using this program, + the user acknowledges that Gaussian, Inc. is engaged in the + business of creating and licensing software in the field of + computational chemistry and represents and warrants to the + licensee that it is not a competitor of Gaussian, Inc. and that + it will not use this program in any manner prohibited above. + --------------------------------------------------------------- + + + Cite this work as: + Gaussian 16, Revision A.03, + M. J. Frisch, G. W. Trucks, H. B. Schlegel, G. E. Scuseria, + M. A. Robb, J. R. Cheeseman, G. Scalmani, V. Barone, + G. A. Petersson, H. Nakatsuji, X. Li, M. Caricato, A. V. Marenich, + J. Bloino, B. G. Janesko, R. Gomperts, B. Mennucci, H. P. Hratchian, + J. V. Ortiz, A. F. Izmaylov, J. L. Sonnenberg, D. Williams-Young, + F. Ding, F. Lipparini, F. Egidi, J. Goings, B. Peng, A. Petrone, + T. Henderson, D. Ranasinghe, V. G. Zakrzewski, J. Gao, N. Rega, + G. Zheng, W. Liang, M. Hada, M. Ehara, K. Toyota, R. Fukuda, + J. Hasegawa, M. Ishida, T. Nakajima, Y. Honda, O. Kitao, H. Nakai, + T. Vreven, K. Throssell, J. A. Montgomery, Jr., J. E. Peralta, + F. Ogliaro, M. J. Bearpark, J. J. Heyd, E. N. Brothers, K. N. Kudin, + V. N. Staroverov, T. A. Keith, R. Kobayashi, J. Normand, + K. Raghavachari, A. P. Rendell, J. C. Burant, S. S. Iyengar, + J. Tomasi, M. Cossi, J. M. Millam, M. Klene, C. Adamo, R. Cammi, + J. W. Ochterski, R. L. Martin, K. Morokuma, O. Farkas, + J. B. Foresman, and D. J. Fox, Gaussian, Inc., Wallingford CT, 2016. + + ****************************************** + Gaussian 16: ES64L-G16RevA.03 25-Dec-2016 + 19-Dec-2022 + ****************************************** + -------------------- + # force wb97x/6-31g* + -------------------- + 1/10=7,30=1,38=1/1,3; + 2/12=2,17=6,18=5,40=1/2; + 3/5=1,6=6,7=1,11=2,25=1,30=1,71=1,74=-57/1,2,3; + 4//1; + 5/5=2,38=5/2; + 6/7=2,8=2,9=2,10=2,28=1/1; + 7/29=1/1,2,3,16; + 1/10=7,30=1/3; + 99//99; + - + H + - + Symbolic Z-matrix: + Charge = 0 Multiplicity = 1 + H 0. 0. 0. + H 1. 1. 0. + TV 10. 0. 0. + TV 0. 10. 0. + TV 0. 0. 10. + + + GradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGrad + Berny optimization. + Initialization pass. + Trust Radius=3.00D-01 FncErr=1.00D-07 GrdErr=1.00D-07 EigMax=2.50D+02 EigMin=1.00D-04 + Number of steps in this run= 2 maximum allowed number of steps= 2. + GradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGrad + + Before rotation: + --------------------------------------------------------------------- + Center Atomic Atomic Coordinates (Angstroms) + Number Number Type X Y Z + --------------------------------------------------------------------- + 1 1 0 0.000000 0.000000 0.000000 + 2 1 0 1.000000 1.000000 0.000000 + 3 -2 0 10.000000 0.000000 0.000000 + 4 -2 0 0.000000 10.000000 0.000000 + 5 -2 0 0.000000 0.000000 10.000000 + --------------------------------------------------------------------- + Lengths of translation vectors: 10.000000 10.000000 10.000000 + Angles of translation vectors: 90.000000 90.000000 90.000000 + --------------------------------------------------------------------- + Input orientation: + --------------------------------------------------------------------- + Center Atomic Atomic Coordinates (Angstroms) + Number Number Type X Y Z + --------------------------------------------------------------------- + 1 1 0 -0.500000 -0.500000 0.000000 + 2 1 0 0.500000 0.500000 0.000000 + 3 -2 0 10.000000 0.000000 0.000000 + 4 -2 0 0.000000 10.000000 0.000000 + 5 -2 0 0.000000 0.000000 10.000000 + --------------------------------------------------------------------- + Lengths of translation vectors: 10.000000 10.000000 10.000000 + Angles of translation vectors: 90.000000 90.000000 90.000000 + --------------------------------------------------------------------- + Distance matrix (angstroms): + 1 2 3 4 5 + 1 H 0.000000 + 2 H 1.414214 0.000000 + 3 TV 10.511898 9.513149 0.000000 + 4 TV 10.511898 9.513149 14.142136 0.000000 + 5 TV 10.024969 10.024969 14.142136 14.142136 0.000000 + Unit Cell Distance matrix (angstroms): + 1 2 + 1 H 0.000000 + 2 H 12.727922 0.000000 + Symmetry turned off: + Cannot cope with ghost atoms or with translation vectors. + Stoichiometry H2 + Framework group C1[X(H2)] + Deg. of freedom 0 + Full point group C1 NOp 1 + Standard basis: 6-31G(d) (6D, 7F) + 4 basis functions, 8 primitive gaussians, 4 cartesian basis functions + 1 alpha electrons 1 beta electrons + nuclear repulsion energy 0.3741847943 Hartrees. + NAtoms= 2 NActive= 2 NUniq= 2 SFac= 1.00D+00 NAtFMM= 60 NAOKFM=F Big=F + Integral buffers will be 131072 words long. + Raffenetti 2 integral format. + Two-electron integral symmetry is turned off. + FOutLm= 100.00. + Periodicity: 1 1 1 + Max integer dimensions: 6 6 6 + PBC vector 1 X= 18.8973 Y= 0.0000 Z= 0.0000 + PBC vector 2 X= 0.0000 Y= 18.8973 Z= 0.0000 + PBC vector 3 X= 0.0000 Y= 0.0000 Z= 18.8973 + Recp vector 1 X= 0.0529 Y= 0.0000 Z= 0.0000 + Recp vector 2 X= 0.0000 Y= 0.0529 Z= 0.0000 + Recp vector 3 X= 0.0000 Y= 0.0000 Z= 0.0529 + Generated k point mesh (from -Pi to Pi): + K space mesh: X= 14 Y= 14 Z= 14 + A half-cell shift: 0 + Using k point mesh (from -Pi to Pi): + K space mesh: X= 14 Y= 14 Z= 14 + A half-cell shift: 0 + CountK=T Total number of k points: 0 + CountK=T Total number of k points: 1376 + One-electron integrals computed using PRISM. + NBasis= 4 RedAO= T EigKep= 2.13D-01 NBF= 4 + NBsUse= 4 1.00D-06 EigRej= -1.00D+00 NBFU= 4 + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + Harris functional with IExCor= 4538 and IRadAn= 5 diagonalized for initial guess. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + HarFok: IExCor= 4538 AccDes= 0.00D+00 IRadAn= 5 IDoV= 2 UseB2=F ITyADJ=14 + ICtDFT= 3500011 ScaDFX= 1.000000 1.000000 1.000000 1.000000 + FoFCou: FMM=T IPFlag= 524288 FMFlag= 990000 FMFlg1= 1001 + NFxFlg= 0 DoJE=T BraDBF=F KetDBF=T FulRan=T + wScrn= 0.000000 ICntrl= 500 IOpCl= 0 I1Cent= 200000004 NGrid= 0 + NMat0= 1 NMatS0= 1 NMatT0= 0 NMatD0= 1 NMtDS0= 0 NMtDT0= 0 + Symmetry not used in FoFCou. + FMM levels: 3 Number of levels for PrismC: 2 + Requested convergence on RMS density matrix=1.00D-07 within 128 cycles. + Requested convergence on MAX density matrix=1.00D-05. + Requested convergence on energy=1.00D-05. + No special actions if energy rises. + Diagonalized old Fock matrix for initial guess. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + SCF Done: E(RwB97X) = -1.06463933888 A.U. after 6 cycles + NFock= 6 Conv=0.16D-08 -V/T= 2.3027 + + ********************************************************************** + + Population analysis using the SCF density. + + ********************************************************************** + + Condensed to atoms (all electrons): + 1 2 + 1 H 0.713229 0.286771 + 2 H 0.286771 0.713229 + Mulliken charges: + 1 + 1 H 0.000000 + 2 H -0.000000 + Sum of Mulliken charges = -0.00000 + Mulliken charges with hydrogens summed into heavy atoms: + 1 + Calling FoFJK, ICntrl= 2127 FMM=T ISym2X=0 I1Cent= 0 IOpClX= 0 NMat=1 NMatS=1 NMatT=0. + RepCel: MaxNCR= 37 NClRep= 37 NMtPBC= 1389. + ------------------------------------------------------------------- + Center Atomic Forces (Hartrees/Bohr) + Number Number X Y Z + ------------------------------------------------------------------- + 1 1 0.066019420 0.066019420 0.000000000 + 2 1 -0.066019420 -0.066019420 -0.000000000 + -2 -0.000000162 -0.000000016 0.000000000 + -2 -0.000000016 -0.000000162 -0.000000000 + -2 0.000000000 0.000000000 0.000000123 + ------------------------------------------------------------------- + Cartesian Forces: Max 0.066019420 RMS 0.034092282 + ----------------------------------------------------------------------------------------------- + Internal Coordinate Forces (Hartree/Bohr or radian) + Cent Atom N1 Length/X N2 Alpha/Y N3 Beta/Z J + ----------------------------------------------------------------------------------------------- + 1 H 0.066019( 1) 0.066019( 6) 0.000000( 11) + 2 H -0.066019( 2) -0.066019( 7) -0.000000( 12) + TV -0.000000( 3) -0.000000( 8) 0.000000( 13) + TV -0.000000( 4) -0.000000( 9) -0.000000( 14) + TV 0.000000( 5) 0.000000( 10) 0.000000( 15) + ----------------------------------------------------------------------------------------------- + Internal Forces: Max 0.066019420 RMS 0.034092282 + + GradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGrad + Berny optimization. + Search for a local minimum. + Step number 1 out of a maximum of 2 + All quantities printed in internal units (Hartrees-Bohrs-Radians) + Second derivative matrix not updated -- first step. + ITU= 0 + Angle between quadratic step and forces= 0.00 degrees. + Linear search not attempted -- first point. + Variable Old X -DE/DX Delta X Delta X Delta X New X + (Linear) (Quad) (Total) + X1 -0.94486 0.06602 0.00000 0.06602 0.06602 -0.87884 + Y1 -0.94486 0.06602 0.00000 0.06602 0.06602 -0.87884 + Z1 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 + X2 0.94486 -0.06602 0.00000 -0.06602 -0.06602 0.87884 + Y2 0.94486 -0.06602 0.00000 -0.06602 -0.06602 0.87884 + Z2 0.00000 -0.00000 0.00000 -0.00000 -0.00000 -0.00000 + X3 18.89726 -0.00000 0.00000 -0.00000 -0.00000 18.89726 + Y3 0.00000 -0.00000 0.00000 -0.00000 -0.00000 -0.00000 + Z3 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 + X4 0.00000 -0.00000 0.00000 -0.00000 -0.00000 -0.00000 + Y4 18.89726 -0.00000 0.00000 -0.00000 -0.00000 18.89726 + Z4 0.00000 -0.00000 0.00000 -0.00000 -0.00000 -0.00000 + X5 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 + Y5 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 + Z5 18.89726 0.00000 0.00000 0.00000 0.00000 18.89726 + Item Value Threshold Converged? + Maximum Force 0.066019 0.000450 NO + RMS Force 0.034092 0.000300 NO + Maximum Displacement 0.066019 0.001800 NO + RMS Displacement 0.034092 0.001200 NO + Predicted change in Energy=-8.717128D-03 + GradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGradGrad + + 1\1\GINC-LOCALHOST\Force\RwB97X\6-31G(d)\H2\JZ748\19-Dec-2022\0\\# for + ce wb97x/6-31g*\\H\\0,1\H,-0.5,-0.5,0.\H,0.5,0.5,0.\TV,10.,0.,0.\TV,0. + ,10.,0.\TV,0.,0.,10.\\Version=ES64L-G16RevA.03\HF=-1.0646393\RMSD=1.63 + 1e-09\RMSF=3.409e-02\PG=C01 [X(H2)]\\@ + + + ABOVE ALL I AM AN OPTIMIST FOR NUMBER THEORY, + AND I HOLD THE HOPE THAT WE MAY NOT BE FAR FROM + A TIME WHEN IRREFUTABLE ARITHMETIC WILL CELEBRATE + ITS TRIUMPHS IN PHYSICS AND CHEMISTRY. + -- HERMANN MINKOWSKI, 1905 + Job cpu time: 0 days 0 hours 0 minutes 2.4 seconds. + Elapsed time: 0 days 0 hours 0 minutes 2.3 seconds. + File lengths (MBytes): RWF= 16 Int= 0 D2E= 0 Chk= 2 Scr= 1 + Normal termination of Gaussian 16 at Mon Dec 19 01:52:07 2022. diff --git a/tests/test_gaussian_log.py b/tests/test_gaussian_log.py index f492fb18..e52f9307 100644 --- a/tests/test_gaussian_log.py +++ b/tests/test_gaussian_log.py @@ -74,5 +74,23 @@ def test_forces(self) : def test_virials(self) : self.assertFalse('virials' in self.system.data) + +class TestGaussianLoadPBCLog(unittest.TestCase, TestGaussianLog): + """PBC.""" + def setUp (self) : + self.system = dpdata.LabeledSystem('gaussian/h2pbc.gaussianlog', + fmt = 'gaussian/log') + self.atom_names = ['H'] + self.atom_numbs = [2] + self.nframes = 1 + self.atom_types = [0, 0] + self.cells = (np.eye(3) * 10.0).reshape(1, 3, 3) + + def test_cells(self) : + self.assertTrue(np.allclose(self.system.data['cells'], self.cells)) + + def test_nopbc(self): + self.assertEqual(self.system.nopbc, False) + if __name__ == '__main__': unittest.main() From 26e22f73dcf1a4508df0918f627ee56c08e0ea75 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 20 Dec 2022 22:48:53 -0500 Subject: [PATCH 02/20] fix pass action (#400) --- .github/workflows/test.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e19f85d4..9e2cbf60 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,5 +30,9 @@ jobs: pass: needs: [build] runs-on: ubuntu-latest + if: always() steps: - - run: echo "All jobs passed" + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} From dcc48d5ab539926b4425af8385619299d939700e Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Wed, 21 Dec 2022 22:38:28 -0500 Subject: [PATCH 03/20] fix a bug in #397 (#401) --- dpdata/gaussian/gjf.py | 2 +- tests/test_gaussian_gjf.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 tests/test_gaussian_gjf.py diff --git a/dpdata/gaussian/gjf.py b/dpdata/gaussian/gjf.py index c2652747..6c169b48 100644 --- a/dpdata/gaussian/gjf.py +++ b/dpdata/gaussian/gjf.py @@ -234,7 +234,7 @@ def make_gaussian_input( cell = sys_data['cells'][0] for ii in range(3): # use TV as atomic symbol, see https://gaussian.com/pbc/ - buff.append('TV %f %f %f' % (symbol, *cell[ii])) + buff.append('TV %f %f %f' % (*cell[ii],)) if basis_set is not None: # custom basis set buff.extend(['', basis_set, '']) diff --git a/tests/test_gaussian_gjf.py b/tests/test_gaussian_gjf.py new file mode 100644 index 00000000..350b5025 --- /dev/null +++ b/tests/test_gaussian_gjf.py @@ -0,0 +1,14 @@ +import unittest +import os + +from context import dpdata + + +class TestGaussianGJF(unittest.TestCase): + def setUp (self) : + self.system = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', + fmt = 'vasp/outcar') + + def test_dump_gaussian_gjf(self): + self.system.to_gaussian_gjf('tmp.gjf', keywords="force b3lyp/6-31g*") + os.remove('tmp.gjf') From 5d9a63f86ebd1cc979a0ffae2ee0f708cfa5ddd2 Mon Sep 17 00:00:00 2001 From: pxlxingliang <91927439+pxlxingliang@users.noreply.github.com> Date: Thu, 22 Dec 2022 12:04:13 +0800 Subject: [PATCH 04/20] Refactor(abacus): return None but not raise error when energy can not be found in scf (#402) --- dpdata/abacus/scf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index 0283025a..b81d02cc 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -98,7 +98,7 @@ def get_energy(outlines): Etot = float(line.split()[1]) # in eV break if not Etot: - raise RuntimeError("Final total energy cannot be found in output. Unknown problem.") + return Etot,False for line in outlines: if "convergence has NOT been achieved!" in line: return Etot,False From b92d0735e526abbcf3a0d1f4025146fad6d95985 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 24 Dec 2022 22:21:30 -0500 Subject: [PATCH 05/20] docs: update installation commands (#403) `setup.py` has been removed. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 32f77831..c93221ad 100644 --- a/README.md +++ b/README.md @@ -7,15 +7,15 @@ One can download the source code of dpdata by ```bash git clone https://github.com/deepmodeling/dpdata.git dpdata ``` -then use `setup.py` to install the module +then use `pip` to install the module from source ```bash cd dpdata -python setup.py install +pip install . ``` -`dpdata` can also by install via pip +`dpdata` can also by install via pip without source ```bash -pip3 install dpdata +pip install dpdata ``` From e6cbaab6367aab036d0170b017afab8ca92b3afd Mon Sep 17 00:00:00 2001 From: pxlxingliang <91927439+pxlxingliang@users.noreply.github.com> Date: Thu, 5 Jan 2023 09:57:03 +0800 Subject: [PATCH 06/20] Fix(abacus): add judgment on the existence of INPUT/running_scf.log/STRU files in abacus/scf (#405) return a null dict but not raiseError when INPUT/running_scf.log/STRU files are not exist in abacus/scf. --- dpdata/abacus/scf.py | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index b81d02cc..f3f80d48 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -6,6 +6,12 @@ ry2ev = EnergyConversion("rydberg", "eV").value() kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value() +def CheckFile(ifile): + if not os.path.isfile(ifile): + print("Can not find file %s" % ifile) + return False + return True + def get_block (lines, keyword, skip = 0, nlines = None): ret = [] found = False @@ -108,7 +114,8 @@ def get_force (outlines, natoms): force = [] force_inlines = get_block (outlines, "TOTAL-FORCE (eV/Angstrom)", skip = 4, nlines=np.sum(natoms)) if force_inlines is None: - raise RuntimeError("TOTAL-FORCE (eV/Angstrom) is not found in running_scf.log. Please check.") + print("TOTAL-FORCE (eV/Angstrom) is not found in OUT.XXX/running_scf.log. May be you haven't set 'cal_force 1' in the INPUT.") + return [[]] for line in force_inlines: force.append([float(f) for f in line.split()[1:4]]) force = np.array(force) @@ -127,17 +134,32 @@ def get_stress(outlines): def get_frame (fname): + data = {'atom_names':[],\ + 'atom_numbs':[],\ + 'atom_types':[],\ + 'cells':[],\ + 'coords':[],\ + 'energies':[],\ + 'forces':[]} + if type(fname) == str: # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: raise RuntimeError('invalid input') + + if not CheckFile(path_in): + return data + with open(path_in, 'r') as fp: inlines = fp.read().split('\n') geometry_path_in = get_geometry_in(fname, inlines) path_out = get_path_out(fname, inlines) + if not (CheckFile(geometry_path_in) and CheckFile(path_out)): + return data + with open(geometry_path_in, 'r') as fp: geometry_inlines = fp.read().split('\n') with open(path_out, 'r') as fp: @@ -145,25 +167,18 @@ def get_frame (fname): celldm, cell = get_cell(geometry_inlines) atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) + data['atom_names'] = atom_names + data['atom_numbs'] = natoms + data['atom_types'] = types energy,converge = get_energy(outlines) if not converge: - return {'atom_names':atom_names,\ - 'atom_numbs':natoms,\ - 'atom_types':types,\ - 'cells':[],\ - 'coords':[],\ - 'energies':[],\ - 'forces':[]} + return data force = get_force (outlines, natoms) stress = get_stress(outlines) if stress is not None: stress *= np.abs(np.linalg.det(cell)) - data = {} - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types data['cells'] = cell[np.newaxis, :, :] data['coords'] = coords[np.newaxis, :, :] data['energies'] = np.array(energy)[np.newaxis] From cfc9fd4595cac840c14afd80674b747a69719d14 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 12 Jan 2023 01:36:54 -0500 Subject: [PATCH 07/20] accept np.ndarray as index (#407) --- dpdata/system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpdata/system.py b/dpdata/system.py index 6089db46..f403b879 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -339,7 +339,7 @@ def __str__(self): def __getitem__(self, key): """Returns proerty stored in System by key or by idx""" - if isinstance(key, (int, slice, list)): + if isinstance(key, (int, slice, list, np.ndarray)): return self.sub_system(key) return self.data[key] From a3d21334b152f3bebc17fe7394cc0414c3f8e644 Mon Sep 17 00:00:00 2001 From: pxlxingliang <91927439+pxlxingliang@users.noreply.github.com> Date: Fri, 20 Jan 2023 10:54:18 +0800 Subject: [PATCH 08/20] Fix(abacus/relax): refactor the read of results from log file (#409) --- dpdata/abacus/relax.py | 100 +- .../running_cell-relax.log.abnormal | 1186 +++++++++++++++++ ...elax.log => running_cell-relax.log.normal} | 0 tests/test_abacus_relax.py | 31 +- 4 files changed, 1283 insertions(+), 34 deletions(-) create mode 100644 tests/abacus.relax/OUT.abacus/running_cell-relax.log.abnormal rename tests/abacus.relax/OUT.abacus/{running_cell-relax.log => running_cell-relax.log.normal} (100%) diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py index c9d7a669..88ea8153 100644 --- a/dpdata/abacus/relax.py +++ b/dpdata/abacus/relax.py @@ -18,6 +18,12 @@ def get_log_file(fname, inlines): def get_coords_from_log(loglines,natoms): ''' NOTICE: unit of coords and cells is Angstrom + order: + coordinate + cell (no output if cell is not changed) + energy (no output, if SCF is not converged) + force (no output, if cal_force is not setted or abnormal ending) + stress (no output, if set cal_stress is not setted or abnormal ending) ''' natoms_log = 0 for line in loglines: @@ -31,47 +37,43 @@ def get_coords_from_log(loglines,natoms): coords = [] force = [] stress = [] + coord_direct = [] #if the coordinate is direct type or not for i in range(len(loglines)): line = loglines[i] if line[18:41] == "lattice constant (Bohr)": a0 = float(line.split()[-1]) elif len(loglines[i].split()) >=2 and loglines[i].split()[1] == 'COORDINATES': + #read coordinate information coords.append([]) direct_coord = False if loglines[i].split()[0] == 'DIRECT': - direct_coord = True + coord_direct.append(True) for k in range(2,2+natoms): coords[-1].append(list(map(lambda x: float(x),loglines[i+k].split()[1:4]))) elif loglines[i].split()[0] == 'CARTESIAN': + coord_direct.append(False) for k in range(2,2+natoms): coords[-1].append(list(map(lambda x: float(x)*a0,loglines[i+k].split()[1:4]))) else: assert(False),"Unrecongnized coordinate type, %s, line:%d" % (loglines[i].split()[0],i) + + elif loglines[i][1:56] == "Lattice vectors: (Cartesian coordinate: in unit of a_0)": + #add the cell information for previous structures + while len(cells) < len(coords) - 1: + cells.append(cells[-1]) + #get current cell information + cells.append([]) + for k in range(1,4): + cells[-1].append(list(map(lambda x:float(x)*a0,loglines[i+k].split()[0:3]))) - converg = True - for j in range(i): - if loglines[i-j-1][1:36] == 'Ion relaxation is not converged yet': - converg = False - break - elif loglines[i-j-1][1:29] == 'Ion relaxation is converged!': - converg = True - break - - if converg: - for j in range(i+1,len(loglines)): - if loglines[j][1:56] == "Lattice vectors: (Cartesian coordinate: in unit of a_0)": - cells.append([]) - for k in range(1,4): - cells[-1].append(list(map(lambda x:float(x)*a0,loglines[j+k].split()[0:3]))) - break - else: - cells.append(cells[-1]) - - coords[-1] = np.array(coords[-1]) - if direct_coord: - coords[-1] = coords[-1].dot(cells[-1]) - + elif line[1:14] == "final etot is": + #add the energy for previous structures whose SCF is not converged + while len(energy) < len(coords) - 1: + energy.append(np.nan) + #get the energy of current structure + energy.append(float(line.split()[-2])) + elif line[4:15] == "TOTAL-FORCE": force.append([]) for j in range(5,5+natoms): @@ -80,18 +82,58 @@ def get_coords_from_log(loglines,natoms): stress.append([]) for j in range(4,7): stress[-1].append(list(map(lambda x:float(x),loglines[i+j].split()[0:3]))) - elif line[1:14] == "final etot is": - energy.append(float(line.split()[-2])) - - assert(len(cells) == len(coords) or len(cells)+1 == len(coords)),"ERROR: detected %d coordinates and %d cells" % (len(coords),len(cells)) - if len(cells)+1 == len(coords): del(coords[-1]) + #delete last structures which has no energy + while len(energy) < len(coords): + del coords[-1] + del coord_direct[-1] + + #add cells for last structures whose cell is not changed + while len(cells) < len(coords): + cells.append(cells[-1]) + + #only keep structures that have all of coord, force and stress + if len(stress) == 0 and len(force) == 0: + minl = len(coords) + elif len(stress) == 0: + minl = min(len(coords),len(force)) + force = force[:minl] + elif len(force) == 0: + minl = min(len(coords),len(stress)) + stress = stress[:minl] + else: + minl = min(len(coords),len(force),len(stress)) + force = force[:minl] + stress = stress[:minl] + + coords = coords[:minl] + energy = energy[:minl] + cells = cells[:minl] + + #delete structures whose energy is np.nan + for i in range(minl): + if np.isnan(energy[i-minl]): + del energy[i-minl] + del coords[i-minl] + del cells[i-minl] + del coord_direct[i-minl] + if len(force) > 0: + del force[i-minl] + if len(stress) > 0: + del stress[i-minl] + energy = np.array(energy) cells = np.array(cells) coords = np.array(coords) stress = np.array(stress) force = np.array(force) + #transfer direct coordinate to cartessian type + for i in range(len(coords)): + if coord_direct[i]: + coords[i] = coords[i].dot(cells[i]) + + #transfer bohrium to angstrom cells *= bohr2ang coords *= bohr2ang diff --git a/tests/abacus.relax/OUT.abacus/running_cell-relax.log.abnormal b/tests/abacus.relax/OUT.abacus/running_cell-relax.log.abnormal new file mode 100644 index 00000000..733e20f7 --- /dev/null +++ b/tests/abacus.relax/OUT.abacus/running_cell-relax.log.abnormal @@ -0,0 +1,1186 @@ + + WELCOME TO ABACUS + + 'Atomic-orbital Based Ab-initio Computation at UStc' + + Website: http://abacus.ustc.edu.cn/ + + Version: Parallel, in development + Processor Number is 2 + Start Time is Mon Jul 25 11:30:20 2022 + + ------------------------------------------------------------------------------------ + + READING GENERAL INFORMATION + global_out_dir = OUT.abacus/ + global_in_card = INPUT + pseudo_dir = + orbital_dir = + pseudo_type = auto + DRANK = 1 + DSIZE = 2 + DCOLOR = 1 + GRANK = 1 + GSIZE = 1 + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Reading atom information in unitcell: | + | From the input file and the structure file we know the number of | + | different elments in this unitcell, then we list the detail | + | information for each element, especially the zeta and polar atomic | + | orbital number for each element. The total atom number is counted. | + | We calculate the nearest atom distance for each atom and show the | + | Cartesian and Direct coordinates for each atom. We list the file | + | address for atomic orbitals. The volume and the lattice vectors | + | in real and reciprocal space is also shown. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + READING UNITCELL INFORMATION + ntype = 2 + atom label for species 1 = H + atom label for species 2 = O + lattice constant (Bohr) = 1 + lattice constant (Angstrom) = 0.529177 + + READING ATOM TYPE 1 + atom label = H + L=0, number of zeta = 1 + L=1, number of zeta = 1 + L=2, number of zeta = 1 + number of atom for this type = 2 + start magnetization = FALSE + start magnetization = FALSE + + READING ATOM TYPE 2 + atom label = O + L=0, number of zeta = 1 + L=1, number of zeta = 1 + L=2, number of zeta = 1 + number of atom for this type = 1 + start magnetization = FALSE + + TOTAL ATOM NUMBER = 3 + + CARTESIAN COORDINATES ( UNIT = 1 Bohr ). + atom x y z mag vx vy vz + tauc_H1 15.9532129411 18.7655861467 8.39524747132 0 0 0 0 + tauc_H2 13.7711312041 20.6154930027 7.61198952454 0 0 0 0 + tauc_O1 14.5132108826 19.6841922084 8.95832135273 0 0 0 0 + + + Volume (Bohr^3) = 21952 + Volume (A^3) = 3252.94689686 + + Lattice vectors: (Cartesian coordinate: in unit of a_0) + +28 +0 +0 + +0 +28 +0 + +0 +0 +28 + Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0) + +0.0357142857143 -0 +0 + +0 +0.0357142857143 -0 + +0 -0 +0.0357142857143 + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Reading pseudopotentials files: | + | The pseudopotential file is in UPF format. The 'NC' indicates that | + | the type of pseudopotential is 'norm conserving'. Functional of | + | exchange and correlation is decided by 4 given parameters in UPF | + | file. We also read in the 'core correction' if there exists. | + | Also we can read the valence electrons number and the maximal | + | angular momentum used in this pseudopotential. We also read in the | + | trail wave function, trail atomic density and local-pseudopotential| + | on logrithmic grid. The non-local pseudopotential projector is also| + | read in if there is any. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + PAO radial cut off (Bohr) = 15 + + Read in pseudopotential file is ../potential/H_ONCV_PBE-1.0.upf + pseudopotential type = NC + exchange-correlation functional = PBE + nonlocal core correction = 0 + valence electrons = 1 + lmax = 0 + number of zeta = 0 + number of projectors = 2 + L of projector = 0 + L of projector = 0 + PAO radial cut off (Bohr) = 15 + + Read in pseudopotential file is ../potential/O_ONCV_PBE-1.0.upf + pseudopotential type = NC + exchange-correlation functional = PBE + nonlocal core correction = 0 + valence electrons = 6 + lmax = 1 + number of zeta = 0 + number of projectors = 4 + L of projector = 0 + L of projector = 0 + L of projector = 1 + L of projector = 1 + initial pseudo atomic orbital number = 0 + NLOCAL = 27 + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Setup plane waves of charge/potential: | + | Use the energy cutoff and the lattice vectors to generate the | + | dimensions of FFT grid. The number of FFT grid on each processor | + | is 'nrxx'. The number of plane wave basis in reciprocal space is | + | different for charege/potential and wave functions. We also set | + | the 'sticks' for the parallel of FFT. The number of plane waves | + | is 'npw' in each processor. | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + + SETUP THE PLANE WAVE BASIS + energy cutoff for charge/potential (unit:Ry) = 200 + [fft grid for charge/potential] = 128, 128, 128 + [fft grid division] = 1, 1, 1 + [big fft grid for charge/potential] = 128, 128, 128 + nbxx = 1048576 + nrxx = 1048576 + + SETUP PLANE WAVES FOR CHARGE/POTENTIAL + number of plane waves = 1048171 + number of sticks = 12469 + + PARALLEL PW FOR CHARGE/POTENTIAL + PROC COLUMNS(POT) PW + 1 6235 524087 + 2 6234 524084 + --------------- sum ------------------- + 2 12469 1048171 + number of |g| = 3312 + max |g| = 5.06505102041 + min |g| = 0 + + SETUP THE ELECTRONS NUMBER + electron number of element H = 1 + total electron number of element H = 2 + electron number of element O = 6 + total electron number of element O = 6 + occupied bands = 4 + NBANDS = 6 + DONE : SETUP UNITCELL Time : 0.128108929377 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Doing symmetry analysis: | + | We calculate the norm of 3 vectors and the angles between them, | + | the type of Bravais lattice is given. We can judge if the unticell | + | is a primitive cell. Finally we give the point group operation for | + | this unitcell. We we use the point group operations to do symmetry | + | analysis on given k-point mesh and the charge density. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0) + +28 +0 +0 + +0 +28 +0 + +0 +0 +28 + right hand lattice = 1 + NORM_A = 28 + NORM_B = 28 + NORM_C = 28 + ALPHA (DEGREE) = 90 + BETA (DEGREE) = 90 + GAMMA (DEGREE) = 90 + BRAVAIS TYPE = 1 + BRAVAIS LATTICE NAME = 01. Cubic P (simple) + IBRAV = 1 + BRAVAIS = SIMPLE CUBIC + LATTICE CONSTANT A = 122.049170419 + ibrav = 1 + ROTATION MATRICES = 48 + PURE POINT GROUP OPERATIONS = 1 + SPACE GROUP OPERATIONS = 1 + POINT GROUP = C_1 +Warning : If the optimal symmetric configuration is not the input configuration, +you have to manually change configurations, ABACUS would only calculate the input structure! + DONE : SYMMETRY Time : 0.155216244515 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Setup K-points | + | We setup the k-points according to input parameters. | + | The reduced k-points are set according to symmetry operations. | + | We treat the spin as another set of k-points. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + + SETUP K-POINTS + nspin = 1 + Input type of k points = Monkhorst-Pack(Gamma) + nkstot = 1 + nkstot_ibz = 1 + IBZ DirectX DirectY DirectZ Weight ibz2bz + 1 0 0 0 1 0 + nkstot now = 1 + + KPOINTS DIRECT_X DIRECT_Y DIRECT_Z WEIGHT + 1 0 0 0 1 + + k-point number in this process = 1 + minimum distributed K point number = 1 + + KPOINTS CARTESIAN_X CARTESIAN_Y CARTESIAN_Z WEIGHT + 1 0 0 0 2 + + KPOINTS DIRECT_X DIRECT_Y DIRECT_Z WEIGHT + 1 0 0 0 2 + DONE : INIT K-POINTS Time : 0.155574926175 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Setup plane waves of wave functions: | + | Use the energy cutoff and the lattice vectors to generate the | + | dimensions of FFT grid. The number of FFT grid on each processor | + | is 'nrxx'. The number of plane wave basis in reciprocal space is | + | different for charege/potential and wave functions. We also set | + | the 'sticks' for the parallel of FFT. The number of plane wave of | + | each k-point is 'npwk[ik]' in each processor | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + + SETUP PLANE WAVES FOR WAVE FUNCTIONS + energy cutoff for wavefunc (unit:Ry) = 50 + [fft grid for wave functions] = 128, 128, 128 + number of plane waves = 131155 + number of sticks = 3125 + + PARALLEL PW FOR WAVE FUNCTIONS + PROC COLUMNS(POT) PW + 1 1562 65576 + 2 1563 65579 + --------------- sum ------------------- + 2 3125 131155 + DONE : INIT PLANEWAVE Time : 0.174154018052 (SEC) + + DONE : INIT CHARGE Time : 0.259490167722 (SEC) + + npwx = 65576 + + SETUP NONLOCAL PSEUDOPOTENTIALS IN PLANE WAVE BASIS + H non-local projectors: + projector 1 L=0 + projector 2 L=0 + O non-local projectors: + projector 1 L=0 + projector 2 L=0 + projector 3 L=1 + projector 4 L=1 + TOTAL NUMBER OF NONLOCAL PROJECTORS = 12 + DONE : LOCAL POTENTIAL Time : 0.320586761925 (SEC) + + + Init Non-Local PseudoPotential table : + Init Non-Local-Pseudopotential done. + DONE : NON-LOCAL POTENTIAL Time : 0.331188020762 (SEC) + + init_chg = atomic + DONE : INIT POTENTIAL Time : 0.953154 (SEC) + + + Make real space PAO into reciprocal space. + max mesh points in Pseudopotential = 601 + dq(describe PAO in reciprocal space) = 0.01 + max q = 854 + + number of pseudo atomic orbitals for H is 0 + + number of pseudo atomic orbitals for O is 0 + DONE : INIT BASIS Time : 0.953302 (SEC) + + + ------------------------------------------- + RELAX CELL : 1 + RELAX IONS : 1 (in total: 1) + ------------------------------------------- + + PW ALGORITHM --------------- ION= 1 ELEC= 1-------------------------------- + + Density error is 0.417176464796 + Error Threshold = 0.01 + + Energy Rydberg eV + E_KohnSham -34.1543953066 -464.694387914 + E_Harris -34.3635778924 -467.540463003 + E_Fermi -0.520811172492 -7.08599952795 + + PW ALGORITHM --------------- ION= 1 ELEC= 2-------------------------------- + + Density error is 0.0243970602606 + Error Threshold = 0.00521470580995 + + Energy Rydberg eV + E_KohnSham -34.2350427506 -465.791652681 + E_Harris -34.2396178942 -465.853900704 + E_Fermi -0.445620982249 -6.06298450694 + + PW ALGORITHM --------------- ION= 1 ELEC= 3-------------------------------- + + Density error is 0.0106666914925 + Error Threshold = 0.000304963253257 + + Energy Rydberg eV + E_KohnSham -34.2334330467 -465.769751537 + E_Harris -34.2371193473 -465.819906229 + E_Fermi -0.461726538377 -6.28211183974 + + PW ALGORITHM --------------- ION= 1 ELEC= 4-------------------------------- + + Density error is 0.000502675170383 + Error Threshold = 0.000133333643656 + + Energy Rydberg eV + E_KohnSham -34.2339527078 -465.776821889 + E_Harris -34.2341613373 -465.779660439 + E_Fermi -0.200209037333 -2.72398369882 + + PW ALGORITHM --------------- ION= 1 ELEC= 5-------------------------------- + + Density error is 0.00013515778285 + Error Threshold = 6.28343962979e-06 + + Energy Rydberg eV + E_KohnSham -34.2339755848 -465.777133147 + E_Harris -34.2340600257 -465.778282024 + E_Fermi -0.1998556995 -2.71917629098 + + PW ALGORITHM --------------- ION= 1 ELEC= 6-------------------------------- + + Density error is 4.49530417282e-06 + Error Threshold = 1.68947228562e-06 + + Energy Rydberg eV + E_KohnSham -34.2340060994 -465.777548319 + E_Harris -34.2340025802 -465.777500437 + E_Fermi -0.198059680486 -2.69474019867 + + PW ALGORITHM --------------- ION= 1 ELEC= 7-------------------------------- + + Density error is 8.77501268413e-06 + Error Threshold = 5.61913021602e-08 + + Energy Rydberg eV + E_KohnSham -34.2340033243 -465.777510561 + E_Harris -34.2340088507 -465.777585752 + E_Fermi -0.197734942978 -2.69032191821 + + PW ALGORITHM --------------- ION= 1 ELEC= 8-------------------------------- + + Density error is 8.67497506757e-08 + Error Threshold = 5.61913021602e-08 + + Energy Rydberg eV + E_KohnSham -34.2340048296 -465.777531042 + E_Harris -34.234005046 -465.777533987 + E_band -8.10804363451 -110.315593062 + E_one_elec -69.5144554731 -945.792687802 + E_Hartree +36.17757886 +492.221212341 + E_xc -8.44392246259 -114.885458961 + E_Ewald +7.5467942461 +102.679403381 + E_demet -3.01469596081e-24 -4.10170428046e-23 + E_descf +0 +0 + E_efield +0 +0 + E_exx +0 +0 + E_Fermi -0.197510647395 -2.68727022024 + + charge density convergence is achieved + final etot is -465.777531042 eV + + STATE ENERGY(eV) AND OCCUPATIONS NSPIN == 1 + 1/1 kpoint (Cartesian) = 0 0 0 (65576 pws) + 1 -25.4315 2.00000 + 2 -13.5818 2.00000 + 3 -8.97266 2.00000 + 4 -7.17178 2.00000 + 5 -0.769414 0.00000 + 6 0.0954287 0.00000 + + + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-FORCE (eV/Angstrom) + + ><><><><><><><><><><><><><><><><><><><><><>< + + atom x y z + H1 +0.40678204 -0.13991970 -0.61593726 + H2 +0.05888465 +0.16630779 -0.76223540 + O1 -0.46566669 -0.02638809 +1.37817266 + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-STRESS (KBAR) + + ><><><><><><><><><><><><><><><><><><><><><>< + + -2.153904 -0.330883 -0.070536 + -0.330883 -2.338728 -0.128010 + -0.070536 -0.128010 -1.978256 + TOTAL-PRESSURE: -2.156963 KBAR + + Ion relaxation is not converged yet (threshold is +0.025711) + + CARTESIAN COORDINATES ( UNIT = +1.000000 Bohr ). + atom x y z mag vx vy vz + tauc_H1 +16.065287158181 +18.727036287001 +8.225548029745 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_H2 +13.787354759370 +20.661313156171 +7.401982872047 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_O1 +14.384913110258 +19.676921914754 +9.338027446795 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + + Setup the structure factor in plane wave basis. + Setup the extrapolated charge. + NEW-OLD atomic charge density approx. for the potential ! + Setup the Vl+Vh+Vxc according to new structure factor and new charge. + Setup the new wave functions? + + ------------------------------------------- + RELAX CELL : 1 + RELAX IONS : 2 (in total: 2) + ------------------------------------------- + + PW ALGORITHM --------------- ION= +2 ELEC= +1-------------------------------- + + Density error is +4.306406951178 + Error Threshold = +0.010000000000 + + Energy Rydberg eV + E_KohnSham -33.984845752223 -462.387547881332 + E_Harris -36.146219524498 -491.794546692019 + E_Fermi -0.197995181756 -2.693862648432 + + PW ALGORITHM --------------- ION= 2 ELEC= 2-------------------------------- + + Density error is 0.524139822667 + Error Threshold = 0.010000000000 + + Energy Rydberg eV + E_KohnSham -34.028968257734 -462.987865366314 + E_Harris -34.358907885553 -467.476924300648 + E_Fermi -0.181612940365 -2.470970819494 + + PW ALGORITHM --------------- ION= 2 ELEC= 3-------------------------------- + + Density error is 0.008603629609 + Error Threshold = 0.006551747783 + + Energy Rydberg eV + E_KohnSham -34.126866845044 -464.319843979885 + E_Harris -34.126456524090 -464.314261276898 + E_Fermi -0.297352672177 -4.045690657129 + + PW ALGORITHM --------------- ION= 2 ELEC= 4-------------------------------- + + Density error is 0.002218320093 + Error Threshold = 0.000107545370 + + Energy Rydberg eV + E_KohnSham -34.129390206695 -464.354176076454 + E_Harris -34.129193594956 -464.351501036512 + E_Fermi -0.416104088025 -5.661386558240 + + PW ALGORITHM --------------- ION= 2 ELEC= 5-------------------------------- + + Density error is 0.001468898398 + Error Threshold = 0.000027729001 + + Energy Rydberg eV + E_KohnSham -34.129256085952 -464.352351270125 + E_Harris -34.130075919228 -464.363505674089 + E_Fermi -0.418170511223 -5.689501688208 + + PW ALGORITHM --------------- ION= 2 ELEC= 6-------------------------------- + + Density error is 0.000129882152 + Error Threshold = 0.000018361230 + + Energy Rydberg eV + E_KohnSham -34.129606602018 -464.357120285860 + E_Harris -34.129674949307 -464.358050198437 + E_Fermi -0.300798600818 -4.092574921552 + + PW ALGORITHM --------------- ION= 2 ELEC= 7-------------------------------- + + Density error is 0.000010859629 + Error Threshold = 0.000001623527 + + Energy Rydberg eV + E_KohnSham -34.129637781039 -464.357544498211 + E_Harris -34.129643192740 -464.357618128180 + E_Fermi -0.318223545585 -4.329653457719 + + PW ALGORITHM --------------- ION= 2 ELEC= 8-------------------------------- + + Density error is 0.000000556298 + Error Threshold = 0.000000135745 + + Energy Rydberg eV + E_KohnSham -34.129640373324 -464.357579768059 + E_Harris -34.129641983060 -464.357601669637 + E_Fermi -0.318727582976 -4.336511238241 + + PW ALGORITHM --------------- ION= 2 ELEC= 9-------------------------------- + + Density error is 0.000000121833 + Error Threshold = 0.000000006954 + + Energy Rydberg eV + E_KohnSham -34.129640005864 -464.357574768506 + E_Harris -34.129640436568 -464.357580628526 + E_Fermi -0.318641075078 -4.335334237908 + + PW ALGORITHM --------------- ION= 2 ELEC= 10-------------------------------- + + Density error is 0.000000186810 + Error Threshold = 0.000000001523 + + Energy Rydberg eV + E_KohnSham -34.129640196409 -464.357577360995 + E_Harris -34.129640113063 -464.357576227022 + E_Fermi -0.318550723465 -4.334104941148 + + PW ALGORITHM --------------- ION= 2 ELEC= 11-------------------------------- + + Density error is 0.000000001485 + Error Threshold = 0.000000001523 + + Energy Rydberg eV + E_KohnSham -34.129640061126 -464.357575520378 + E_Harris -34.129640228306 -464.357577794985 + E_band -7.520787696202 -102.325566116640 + E_one_elec -64.823282231884 -881.966001415785 + E_Hartree +33.884562763382 +461.023127820625 + E_xc -8.077485864562 -109.899833272499 + E_Ewald +4.886565271938 +66.485131347281 + E_demet -0.000000000000 -0.000000000000 + E_descf +0.000000000000 +0.000000000000 + E_efield +0.000000000000 +0.000000000000 + E_exx +0.000000000000 +0.000000000000 + E_Fermi -0.318592256747 -4.334670030437 + + charge density convergence is achieved + final etot is -464.357575520378 eV + + STATE ENERGY(eV) AND OCCUPATIONS NSPIN == 1 + 1/1 kpoint (Cartesian) = 0.00000 0.00000 0.00000 (65576 pws) + 1 -23.732691 2.000000 + 2 -10.717782 2.000000 + 3 -9.827251 2.000000 + 4 -6.885060 2.000000 + 5 -1.826406 0.000000 + 6 -0.276841 0.000000 + + + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-FORCE (eV/Angstrom) + + ><><><><><><><><><><><><><><><><><><><><><>< + + atom x y z + H1 -3.43125048 +1.80114652 +2.82460187 + H2 +0.90733216 -1.89495640 +4.43921704 + O1 +2.52391832 +0.09380988 -7.26381891 + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-STRESS (KBAR) + + ><><><><><><><><><><><><><><><><><><><><><>< + + -4.009216 +0.908593 +0.503260 + +0.908593 -3.391991 +0.372926 + +0.503260 +0.372926 -5.317856 + TOTAL-PRESSURE: -4.239688 KBAR + + Ion relaxation is not converged yet (threshold is +0.025711) + + CARTESIAN COORDINATES ( UNIT = +1.000000 Bohr ). + atom x y z mag vx vy vz + tauc_H1 +15.970368922751 +18.759685051002 +8.369270394915 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_H2 +13.773614656790 +20.622507013014 +7.579842342394 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_O1 +14.493571448267 +19.683079293910 +9.016445611278 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + + Setup the structure factor in plane wave basis. + Setup the extrapolated charge. + NEW-OLD atomic charge density approx. for the potential ! + Setup the Vl+Vh+Vxc according to new structure factor and new charge. + Setup the new wave functions? + + ------------------------------------------- + RELAX CELL : 1 + RELAX IONS : 3 (in total: 3) + ------------------------------------------- + + PW ALGORITHM --------------- ION= +3 ELEC= +1-------------------------------- + + Density error is +2.863901623074 + Error Threshold = +0.010000000000 + + Energy Rydberg eV + E_KohnSham -34.163066102874 -464.812360149745 + E_Harris -35.594866834916 -484.293008506088 + E_Fermi -0.455725359897 -6.200461617704 + + PW ALGORITHM --------------- ION= 3 ELEC= 2-------------------------------- + + Density error is 0.410367630415 + Error Threshold = 0.010000000000 + + Energy Rydberg eV + E_KohnSham -34.147651136624 -464.602628774257 + E_Harris -34.409877454265 -468.170400859739 + E_Fermi -0.502269645612 -6.833729112759 + + PW ALGORITHM --------------- ION= 3 ELEC= 3-------------------------------- + + Density error is 0.016431897145 + Error Threshold = 0.005129595380 + + Energy Rydberg eV + E_KohnSham -34.231884315653 -465.748679969711 + E_Harris -34.237205015345 -465.821071802865 + E_Fermi -0.163677941108 -2.226952635976 + + PW ALGORITHM --------------- ION= 3 ELEC= 4-------------------------------- + + Density error is 0.003339586249 + Error Threshold = 0.000205398714 + + Energy Rydberg eV + E_KohnSham -34.234286271013 -465.781360248946 + E_Harris -34.234645115958 -465.786242584902 + E_Fermi -0.419282058198 -5.704625060666 + + PW ALGORITHM --------------- ION= 3 ELEC= 5-------------------------------- + + Density error is 0.007193242746 + Error Threshold = 0.000041744828 + + Energy Rydberg eV + E_KohnSham -34.233965842505 -465.777000595434 + E_Harris -34.237258750651 -465.821802909220 + E_Fermi -0.445575876230 -6.062370808069 + + PW ALGORITHM --------------- ION= 3 ELEC= 6-------------------------------- + + Density error is 0.000071123739 + Error Threshold = 0.000041744828 + + Energy Rydberg eV + E_KohnSham -34.235105030292 -465.792500040429 + E_Harris -34.235135970645 -465.792921005526 + E_Fermi -0.195917059478 -2.665588344306 + + PW ALGORITHM --------------- ION= 3 ELEC= 7-------------------------------- + + Density error is 0.000003025816 + Error Threshold = 0.000000889047 + + Energy Rydberg eV + E_KohnSham -34.235144953777 -465.793043227315 + E_Harris -34.235146413266 -465.793063084684 + E_Fermi -0.195891623194 -2.665242265913 + + PW ALGORITHM --------------- ION= 3 ELEC= 8-------------------------------- + + Density error is 0.000014047622 + Error Threshold = 0.000000037823 + + Energy Rydberg eV + E_KohnSham -34.235144028617 -465.793030639864 + E_Harris -34.235149064326 -465.793099154201 + E_Fermi -0.195695384814 -2.662572305769 + + PW ALGORITHM --------------- ION= 3 ELEC= 9-------------------------------- + + Density error is 0.000000380301 + Error Threshold = 0.000000037823 + + Energy Rydberg eV + E_KohnSham -34.235146948037 -465.793070360619 + E_Harris -34.235147173255 -465.793073424855 + E_Fermi -0.195885679726 -2.665161400881 + + PW ALGORITHM --------------- ION= 3 ELEC= 10-------------------------------- + + Density error is 0.000000003456 + Error Threshold = 0.000000004754 + + Energy Rydberg eV + E_KohnSham -34.235147175798 -465.793073459458 + E_Harris -34.235147030585 -465.793071483737 + E_band -8.005203857309 -108.916386110984 + E_one_elec -68.764358752319 -935.587098347715 + E_Hartree +35.812701320327 +487.256798728570 + E_xc -8.382702179786 -114.052514282117 + E_Ewald +7.099212435981 +96.589740441803 + E_demet -0.000000000000 -0.000000000000 + E_descf +0.000000000000 +0.000000000000 + E_efield +0.000000000000 +0.000000000000 + E_exx +0.000000000000 +0.000000000000 + E_Fermi -0.195843647331 -2.664589520805 + + charge density convergence is achieved + final etot is -465.793073459458 eV + + STATE ENERGY(eV) AND OCCUPATIONS NSPIN == 1 + 1/1 kpoint (Cartesian) = 0.00000 0.00000 0.00000 (65576 pws) + 1 -25.151687 2.000000 + 2 -13.061167 2.000000 + 3 -9.128130 2.000000 + 4 -7.117209 2.000000 + 5 -0.875259 0.000000 + 6 0.081372 0.000000 + + + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-FORCE (eV/Angstrom) + + ><><><><><><><><><><><><><><><><><><><><><>< + + atom x y z + H1 -0.74519674 +0.51685204 +0.05757714 + H2 +0.52845057 -0.57175572 +0.56392399 + O1 +0.21674617 +0.05490368 -0.62150113 + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-STRESS (KBAR) + + ><><><><><><><><><><><><><><><><><><><><><>< + + -2.676771 +0.070442 -0.080953 + +0.070442 -2.674219 +0.037379 + -0.080953 +0.037379 -2.531715 + TOTAL-PRESSURE: -2.627568 KBAR + + Ion relaxation is not converged yet (threshold is +0.025711) + + CARTESIAN COORDINATES ( UNIT = +1.000000 Bohr ). + atom x y z mag vx vy vz + tauc_H1 +15.963906084978 +18.761908055509 +8.379056231391 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_H2 +13.772679114178 +20.619864761958 +7.591952522256 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_O1 +14.500969828653 +19.683498540459 +8.994549594941 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + + Setup the structure factor in plane wave basis. + Setup the extrapolated charge. + NEW-OLD atomic charge density approx. for the potential ! + Setup the Vl+Vh+Vxc according to new structure factor and new charge. + Setup the new wave functions? + + ------------------------------------------- + RELAX CELL : 1 + RELAX IONS : 4 (in total: 4) + ------------------------------------------- + + PW ALGORITHM --------------- ION= +4 ELEC= +1-------------------------------- + Notice: Threshold on eigenvalues was too large. + hsover_error=+0.080000 > DRHO=+0.009754 + Origin diag_ethr = +0.010000 + New diag_ethr = +0.000122 + + Density error is +0.013008603075 + Error Threshold = +0.000121928257 + + Energy Rydberg eV + E_KohnSham -34.235665065186 -465.800119706069 + E_Harris -34.242123145811 -465.887986400714 + E_Fermi -0.201851572452 -2.746331535609 + + PW ALGORITHM --------------- ION= 4 ELEC= 2-------------------------------- + + Density error is 0.002090692286 + Error Threshold = 0.000162607538 + + Energy Rydberg eV + E_KohnSham -34.235132150914 -465.792869035430 + E_Harris -34.236530970810 -465.811900956494 + E_Fermi -0.198476768045 -2.700414966033 + + PW ALGORITHM --------------- ION= 4 ELEC= 3-------------------------------- + + Density error is 0.000052460392 + Error Threshold = 0.000026133654 + + Energy Rydberg eV + E_KohnSham -34.235682052400 -465.800350828976 + E_Harris -34.235704411445 -465.800655039388 + E_Fermi -0.196153271944 -2.668802179785 + + PW ALGORITHM --------------- ION= 4 ELEC= 4-------------------------------- + + Density error is 0.000009658088 + Error Threshold = 0.000000655755 + + Energy Rydberg eV + E_KohnSham -34.235696222774 -465.800543626804 + E_Harris -34.235698393982 -465.800573167609 + E_Fermi -0.196616103423 -2.675099325104 + + PW ALGORITHM --------------- ION= 4 ELEC= 5-------------------------------- + + Density error is 0.000000187065 + Error Threshold = 0.000000120726 + + Energy Rydberg eV + E_KohnSham -34.235697732989 -465.800564174335 + E_Harris -34.235697255523 -465.800557678071 + E_Fermi -0.196471698719 -2.673134598311 + + PW ALGORITHM --------------- ION= 4 ELEC= 6-------------------------------- + + Density error is 0.000000075160 + Error Threshold = 0.000000002338 + + Energy Rydberg eV + E_KohnSham -34.235698022205 -465.800568109314 + E_Harris -34.235697872683 -465.800566074967 + E_band -8.042398038358 -109.422438905691 + E_one_elec -69.045489549154 -939.412079067952 + E_Hartree +35.948698866316 +489.107140268040 + E_xc -8.405372202565 -114.360955765699 + E_Ewald +7.266464863199 +98.865326456297 + E_demet -0.000000000000 -0.000000000000 + E_descf +0.000000000000 +0.000000000000 + E_efield +0.000000000000 +0.000000000000 + E_exx +0.000000000000 +0.000000000000 + E_Fermi -0.196459651857 -2.672970692356 + + convergence has NOT been achieved! + + STATE ENERGY(eV) AND OCCUPATIONS NSPIN == 1 + 1/1 kpoint (Cartesian) = 0.00000 0.00000 0.00000 (65576 pws) + 1 -25.255180 2.000000 + 2 -13.252315 2.000000 + 3 -9.068312 2.000000 + 4 -7.135412 2.000000 + 5 -0.833001 0.000000 + 6 0.086578 0.000000 + + + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-FORCE (eV/Angstrom) + + ><><><><><><><><><><><><><><><><><><><><><>< + + atom x y z + H1 -0.34425976 +0.29101536 -0.19310908 + H2 +0.37384222 -0.31991297 +0.09092313 + O1 -0.02958246 +0.02889761 +0.10218595 + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-STRESS (KBAR) + + ><><><><><><><><><><><><><><><><><><><><><>< + + -2.495529 -0.069759 -0.084163 + -0.069759 -2.561022 -0.020268 + -0.084163 -0.020268 -2.324147 + TOTAL-PRESSURE: -2.460232 KBAR + + Ion relaxation is not converged yet (threshold is +0.025711) + + CARTESIAN COORDINATES ( UNIT = +1.000000 Bohr ). + atom x y z mag vx vy vz + tauc_H1 +15.946191313792 +18.778838846668 +8.361024276793 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_H2 +13.796717055281 +20.601372573983 +7.590351723874 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + tauc_O1 +14.494646658736 +19.685059937275 +9.014182347920 +0.000000000000 +0.000000000000 +0.000000000000 +0.000000000000 + + Setup the structure factor in plane wave basis. + Setup the extrapolated charge. + NEW-OLD atomic charge density approx. for the potential ! + Setup the Vl+Vh+Vxc according to new structure factor and new charge. + Setup the new wave functions? + + ------------------------------------------- + RELAX CELL : 1 + RELAX IONS : 5 (in total: 5) + ------------------------------------------- + + PW ALGORITHM --------------- ION= +5 ELEC= +1-------------------------------- + Notice: Threshold on eigenvalues was too large. + hsover_error=+0.080000 > DRHO=+0.005280 + Origin diag_ethr = +0.010000 + New diag_ethr = +0.000066 + + Density error is +0.008353549675 + Error Threshold = +0.000065997708 + + Energy Rydberg eV + E_KohnSham -34.236235736125 -465.807884082530 + E_Harris -34.240337321619 -465.863689016079 + E_Fermi -0.438351093885 -5.964072601368 + + PW ALGORITHM --------------- ION= 5 ELEC= 2-------------------------------- + + Density error is 0.001090032164 + Error Threshold = 0.000104419371 + + Energy Rydberg eV + E_KohnSham -34.236307152675 -465.808855754541 + E_Harris -34.236930505462 -465.817336904304 + E_Fermi -0.196029016571 -2.667111598704 + + PW ALGORITHM --------------- ION= 5 ELEC= 3-------------------------------- + + Density error is 0.000082249260 + Error Threshold = 0.000013625402 + + Energy Rydberg eV + E_KohnSham -34.236527086358 -465.811848105802 + E_Harris -34.236560493475 -465.812302632946 + E_Fermi -0.196758447273 -2.677036012549 + + PW ALGORITHM --------------- ION= 5 ELEC= 4-------------------------------- + + Density error is 0.000130293705 + Error Threshold = 0.000001028116 + + Energy Rydberg eV + E_KohnSham -34.236562121478 -465.812324783072 + E_Harris -34.236605870085 -465.812920013399 + E_Fermi -0.197596470944 -2.688437909528 + + PW ALGORITHM --------------- ION= 5 ELEC= 5-------------------------------- + + Density error is 0.000024525955 + Error Threshold = 0.000001028116 + + Energy Rydberg eV + E_KohnSham -34.236557021747 -465.812255397668 + E_Harris -34.236571054853 -465.812446327870 + E_Fermi -0.197329109008 -2.684800263772 + + PW ALGORITHM --------------- ION= 5 ELEC= 6-------------------------------- + + Density error is 0.000001304759 + Error Threshold = 0.000000306574 + + Energy Rydberg eV + E_KohnSham -34.236563844352 -465.812348223977 + E_Harris -34.236563957567 -465.812349764348 + E_Fermi -0.197072028110 -2.681302498717 + + PW ALGORITHM --------------- ION= 5 ELEC= 7-------------------------------- + + Density error is 0.000000346081 + Error Threshold = 0.000000016309 + + Energy Rydberg eV + E_KohnSham -34.236564300744 -465.812354433499 + E_Harris -34.236564193956 -465.812352980576 + E_Fermi -0.197125544263 -2.682030623327 + + PW ALGORITHM --------------- ION= 5 ELEC= 8-------------------------------- + + Density error is 0.000000130954 + Error Threshold = 0.000000004326 + + Energy Rydberg eV + E_KohnSham -34.236564259962 -465.812353878642 + E_Harris -34.236564316674 -465.812354650242 + E_Fermi -0.197131965268 -2.682117985583 + + PW ALGORITHM --------------- ION= 5 ELEC= 9-------------------------------- + + Density error is 0.000000001123 + Error Threshold = 0.000000001637 + + Energy Rydberg eV + E_KohnSham -34.236564293143 -465.812354330090 + E_Harris -34.236564293813 -465.812354339209 + E_band -8.062071986763 -109.690116706159 + E_one_elec -69.117794401277 -940.395837049864 + E_Hartree +35.979373791465 +489.524494035791 + E_xc -8.410666291229 -114.432985537248 + E_Ewald +7.312522607898 +99.491974221231 + E_demet -0.000000000000 -0.000000000000 + E_descf +0.000000000000 +0.000000000000 + E_efield +0.000000000000 +0.000000000000 + E_exx +0.000000000000 +0.000000000000 + E_Fermi -0.197145167314 -2.682297608633 + + charge density convergence is achieved + final etot is -465.812354330090 eV + + STATE ENERGY(eV) AND OCCUPATIONS NSPIN == 1 + 1/1 kpoint (Cartesian) = 0.00000 0.00000 0.00000 (65576 pws) + 1 -25.330615 2.000000 + 2 -13.155666 2.000000 + 3 -9.203788 2.000000 + 4 -7.154990 2.000000 + 5 -0.831095 0.000000 + 6 0.086695 0.000000 + + + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-FORCE (eV/Angstrom) + + ><><><><><><><><><><><><><><><><><><><><><>< + + atom x y z + H1 -0.18807867 +0.13569889 -0.06941882 + H2 +0.14926844 -0.14376688 +0.04694486 + O1 +0.03881023 +0.00806799 +0.02247396 + + + ><><><><><><><><><><><><><><><><><><><><><>< + + TOTAL-STRESS (KBAR) + + ><><><><><><><><><><><><><><><><><><><><><>< + + -2.392469 -0.160549 -0.032796 + -0.160549 -2.480798 -0.059783 + -0.032796 -0.059783 -2.315957 + TOTAL-PRESSURE: -2.396408 KBAR + + + -------------------------------------------- + !FINAL_ETOT_IS -465.8123543300895335 eV + -------------------------------------------- + + + + + + + |CLASS_NAME---------|NAME---------------|TIME(Sec)-----|CALLS----|AVG------|PER%------- + total +84.59445 19 +4.45 +100.00% + Run_pw plane_wave_line +84.58434 1 +84.58 +99.99% + PW_Basis setup_struc_factor +0.37204 9 +0.04 +0.44% + Potential init_pot +2.60817 5 +0.52 +3.08% + Potential set_local_pot +0.17850 5 +0.04 +0.21% + PW_Basis recip2real +6.88976 294 +0.02 +8.14% + PW_Basis gathers_scatterp +2.80189 294 +0.01 +3.31% + Charge atomic_rho +0.93431 9 +0.10 +1.10% + Potential v_of_rho +21.19138 49 +0.43 +25.05% + XC_Functional v_xc +19.67060 54 +0.36 +23.25% + PW_Basis real2recip +9.16010 446 +0.02 +10.83% + PW_Basis gatherp_scatters +3.68783 446 +0.01 +4.36% + H_Hartree_pw v_hartree +3.16563 49 +0.06 +3.74% + Potential set_vr_eff +0.12252 49 +0.00 +0.14% + Cell_PW opt_cells_pw +83.63640 1 +83.64 +98.87% + Ions opt_ions_pw +83.63639 1 +83.64 +98.87% + ESolver_KS_PW Run +73.96240 5 +14.79 +87.43% + Symmetry rho_symmetry +3.36747 51 +0.07 +3.98% + HSolverPW solve +43.08348 46 +0.94 +50.93% + pp_cell_vnl getvnl +1.24764 56 +0.02 +1.47% + WF_igk get_sk +0.31200 231 +0.00 +0.37% + DiagoIterAssist diagH_subspace +6.55659 45 +0.15 +7.75% + HamiltPW h_psi +34.80823 1083 +0.03 +41.15% + Operator EkineticPW +0.20066 1083 +0.00 +0.24% + Operator VeffPW +31.49160 1083 +0.03 +37.23% + PW_Basis_K recip2real +17.21913 1493 +0.01 +20.35% + PW_Basis_K gathers_scatterp +4.88147 1493 +0.00 +5.77% + PW_Basis_K real2recip +11.93248 1308 +0.01 +14.11% + PW_Basis_K gatherp_scatters +2.55499 1308 +0.00 +3.02% + Operator NonlocalPW +3.11107 1083 +0.00 +3.68% + NonlocalPW add_nonlocal_pp +1.47037 1083 +0.00 +1.74% + DiagoCG diag_once +31.41731 46 +0.68 +37.14% + ElecStatePW psiToRho +3.97135 46 +0.09 +4.69% + Charge rho_mpi +1.34662 46 +0.03 +1.59% + Charge mix_rho +3.23741 39 +0.08 +3.83% + Forces cal_force_loc +0.29882 5 +0.06 +0.35% + Forces cal_force_ew +0.22785 5 +0.05 +0.27% + Forces cal_force_nl +0.20171 5 +0.04 +0.24% + Stress_PW cal_stress +3.22335 5 +0.64 +3.81% + Stress_Func stress_har +0.17289 5 +0.03 +0.20% + Stress_Func stress_ew +0.28902 5 +0.06 +0.34% + Stress_Func stress_gga +0.80316 5 +0.16 +0.95% + Stress_Func stress_loc +0.48018 5 +0.10 +0.57% + Stress_Func stres_nl +1.42753 5 +0.29 +1.69% + ---------------------------------------------------------------------------------------- + + CLASS_NAME---------|NAME---------------|MEMORY(MB)-------- + +418.2583 + Charge_Pulay Rrho +64.0000 + Charge_Pulay dRrho +56.0000 + Charge_Pulay drho +56.0000 + PW_Basis struc_fac +15.9939 + Charge rho +8.0000 + Charge rho_save +8.0000 + Charge rho_core +8.0000 + Potential vltot +8.0000 + Potential vr +8.0000 + Potential vr_eff +8.0000 + Potential vr_eff1 +8.0000 + Potential vnew +8.0000 + Charge_Pulay rho_save2 +8.0000 + wavefunc psi +6.0037 + Charge rhog +3.9985 + Charge rhog_save +3.9985 + Charge kin_r +3.9985 + Charge kin_r_save +3.9985 + Charge rhog_core +3.9985 + ---------------------------------------------------------- + + Start Time : Mon Jul 25 11:30:20 2022 + Finish Time : Mon Jul 25 11:31:45 2022 + Total Time : 0 h 1 mins 25 secs diff --git a/tests/abacus.relax/OUT.abacus/running_cell-relax.log b/tests/abacus.relax/OUT.abacus/running_cell-relax.log.normal similarity index 100% rename from tests/abacus.relax/OUT.abacus/running_cell-relax.log rename to tests/abacus.relax/OUT.abacus/running_cell-relax.log.normal diff --git a/tests/test_abacus_relax.py b/tests/test_abacus_relax.py index 3f1230ad..be331009 100644 --- a/tests/test_abacus_relax.py +++ b/tests/test_abacus_relax.py @@ -1,4 +1,4 @@ -import os +import os,shutil import numpy as np import unittest from context import dpdata @@ -6,7 +6,14 @@ bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSRelax: +class TestABACUSRelaxLabeledOutput(unittest.TestCase): + + def setUp(self): + shutil.copy('abacus.relax/OUT.abacus/running_cell-relax.log.normal','abacus.relax/OUT.abacus/running_cell-relax.log') + self.system = dpdata.LabeledSystem('abacus.relax',fmt='abacus/relax') + def tearDown(self): + if os.path.isfile("abacus.relax/OUT.abacus/running_cell-relax.log"): + os.remove("abacus.relax/OUT.abacus/running_cell-relax.log") def test_atom_names(self) : self.assertEqual(self.system.data['atom_names'], ['H','O']) @@ -65,11 +72,25 @@ def test_energy(self) : -465.81235433]) np.testing.assert_almost_equal(self.system.data['energies'], ref_energy) - -class TestABACUSMDLabeledOutput(unittest.TestCase, TestABACUSRelax): +class TestABACUSRelaxLabeledOutputAbnormal(unittest.TestCase): def setUp(self): - self.system = dpdata.LabeledSystem('abacus.relax',fmt='abacus/relax') + shutil.copy('abacus.relax/OUT.abacus/running_cell-relax.log.abnormal','abacus.relax/OUT.abacus/running_cell-relax.log') + self.system = dpdata.LabeledSystem('abacus.relax',fmt='abacus/relax') + + def test_result(self): + data = self.system.data + self.assertEqual(len(data['coords']),4) + self.assertEqual(len(data['energies']),len(data['coords'])) + self.assertEqual(len(data['cells']),len(data['coords'])) + self.assertEqual(len(data['forces']),len(data['coords'])) + self.assertEqual(len(data['stress']),len(data['coords'])) + self.assertEqual(len(data['virials']),len(data['coords'])) + np.testing.assert_almost_equal(data['energies'][3],-465.81235433) + + def tearDown(self): + if os.path.isfile("abacus.relax/OUT.abacus/running_cell-relax.log"): + os.remove("abacus.relax/OUT.abacus/running_cell-relax.log") if __name__ == '__main__': unittest.main() \ No newline at end of file From 7b0f44b2b62a8f8ac97ffab9ec6af53cad32c401 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Wed, 25 Jan 2023 04:21:04 -0500 Subject: [PATCH 09/20] fix a doc typo in plugins/gaussian.py (#410) --- dpdata/plugins/gaussian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py index f49dbf05..dcae8528 100644 --- a/dpdata/plugins/gaussian.py +++ b/dpdata/plugins/gaussian.py @@ -59,7 +59,7 @@ class GaussianDriver(Driver): gaussian_exec : str, default=g16 path to gaussian program **kwargs : dict - other arguments to make input files. See :class:`SQMINFormat` + other arguments to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input` Examples -------- From ffa52c5d6230303d6f7ee4f1356f01aa5b2a011d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Wed, 25 Jan 2023 05:41:19 -0500 Subject: [PATCH 10/20] lint and automatic lint (#413) Fix #344. To lint codes, this PR uses a popular tool, [pre-commit](https://pre-commit.com/). `.pre-commit-config.yaml` shows all hooks used, including [black](https://github.com/psf/black) and some common fixes. It can add a git hook to `git commit`. Thus, everything committed can be already formatted. In addition, there is also a [CI](https://pre-commit.ci/) to fix PRs. This way, we can ensure that everything new will also be formatted. This PR introduces a huge change history to almost all files, but it's a necessary step to start linting our codes. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/request-for-help.md | 2 +- .github/workflows/pub-pypi.yml | 1 - .github/workflows/test_import.yml | 1 - .pre-commit-config.yaml | 25 + README.md | 32 +- docs/Makefile | 2 +- docs/conf.py | 91 ++- docs/credits.rst | 2 +- docs/formats.rst | 1 - docs/make_format.py | 58 +- dpdata/__about__.py | 2 +- dpdata/__init__.py | 3 +- dpdata/abacus/md.py | 198 +++-- dpdata/abacus/relax.py | 191 +++-- dpdata/abacus/scf.py | 270 ++++--- dpdata/amber/__init__.py | 1 - dpdata/amber/mask.py | 10 +- dpdata/amber/md.py | 49 +- dpdata/amber/sqm.py | 48 +- dpdata/ase_calculator.py | 34 +- dpdata/bond_order_system.py | 125 +-- dpdata/cli.py | 45 +- dpdata/cp2k/cell.py | 96 ++- dpdata/cp2k/output.py | 432 +++++----- dpdata/deepmd/comp.py | 144 ++-- dpdata/deepmd/hdf5.py | 160 ++-- dpdata/deepmd/raw.py | 127 +-- dpdata/driver.py | 45 +- dpdata/fhi_aims/output.py | 205 ++--- dpdata/format.py | 41 +- dpdata/gaussian/gjf.py | 126 +-- dpdata/gaussian/log.py | 43 +- dpdata/gromacs/gro.py | 39 +- dpdata/lammps/dump.py | 244 +++--- dpdata/lammps/lmp.py | 241 +++--- dpdata/md/msd.py | 44 +- dpdata/md/pbc.py | 55 +- dpdata/md/rdf.py | 80 +- dpdata/md/water.py | 145 ++-- dpdata/periodic_table.json | 2 +- dpdata/periodic_table.py | 140 +++- dpdata/plugin.py | 7 +- dpdata/plugins/3dmol.py | 22 +- dpdata/plugins/__init__.py | 7 +- dpdata/plugins/abacus.py | 33 +- dpdata/plugins/amber.py | 61 +- dpdata/plugins/ase.py | 107 ++- dpdata/plugins/cp2k.py | 21 +- dpdata/plugins/deepmd.py | 155 ++-- dpdata/plugins/fhi_aims.py | 58 +- dpdata/plugins/gaussian.py | 20 +- dpdata/plugins/gromacs.py | 16 +- dpdata/plugins/lammps.py | 16 +- dpdata/plugins/list.py | 3 +- dpdata/plugins/pwmat.py | 41 +- dpdata/plugins/pymatgen.py | 46 +- dpdata/plugins/qe.py | 56 +- dpdata/plugins/rdkit.py | 19 +- dpdata/plugins/siesta.py | 76 +- dpdata/plugins/vasp.py | 83 +- dpdata/plugins/xyz.py | 28 +- dpdata/pwmat/__init__.py | 1 - dpdata/pwmat/atomconfig.py | 97 ++- dpdata/pwmat/movement.py | 190 +++-- dpdata/pymatgen/molecule.py | 18 +- dpdata/qe/__init__.py | 1 - dpdata/qe/scf.py | 125 +-- dpdata/qe/traj.py | 223 +++--- dpdata/rdkit/sanitize.py | 197 +++-- dpdata/rdkit/utils.py | 75 +- dpdata/siesta/__init__.py | 1 - dpdata/siesta/aiMD_output.py | 79 +- dpdata/siesta/output.py | 55 +- dpdata/stat.py | 19 +- dpdata/system.py | 742 ++++++++++-------- dpdata/unit.py | 40 +- dpdata/utils.py | 57 +- dpdata/vasp/outcar.py | 146 ++-- dpdata/vasp/poscar.py | 112 +-- dpdata/vasp/xml.py | 133 ++-- dpdata/xyz/quip_gap_xyz.py | 161 ++-- dpdata/xyz/xyz.py | 14 +- plugin_example/README.md | 2 +- plugin_example/dpdata_random/__init__.py | 25 +- requirements.txt | 1 - tests/comp_sys.py | 134 ++-- tests/context.py | 5 +- tests/poscars/poscar_ref_oh.py | 60 +- tests/poscars/test_lammps_dump_s_su.py | 31 +- tests/pwmat/config_ref_ch4.py | 77 +- tests/pwmat/config_ref_oh.py | 56 +- tests/test_abacus_md.py | 230 ++++-- tests/test_abacus_pw_scf.py | 108 +-- tests/test_abacus_relax.py | 111 +-- tests/test_abacus_stru_dump.py | 19 +- tests/test_amber_md.py | 41 +- tests/test_amber_sqm.py | 64 +- tests/test_ase_traj.py | 33 +- tests/test_bond_order_system.py | 90 ++- tests/test_cell_to_low_triangle.py | 54 +- tests/test_cli.py | 18 +- tests/test_corr.py | 31 +- tests/test_cp2k_aimd_output.py | 20 +- tests/test_cp2k_output.py | 63 +- tests/test_deepmd_comp.py | 106 ++- tests/test_deepmd_hdf5.py | 73 +- tests/test_deepmd_raw.py | 282 ++++--- tests/test_elements_index.py | 37 +- tests/test_empty.py | 42 +- tests/test_fhi_md_multi_elem_output.py | 43 +- tests/test_fhi_md_output.py | 41 +- tests/test_fhi_output.py | 100 +-- tests/test_gaussian_driver.py | 105 ++- tests/test_gaussian_gjf.py | 11 +- tests/test_gaussian_log.py | 103 +-- tests/test_gromacs_gro.py | 185 +++-- tests/test_json.py | 17 +- tests/test_lammps_dump_idx.py | 27 +- tests/test_lammps_dump_shift_origin.py | 10 +- tests/test_lammps_dump_skipload.py | 19 +- tests/test_lammps_dump_to_system.py | 31 +- tests/test_lammps_dump_unfold.py | 35 +- tests/test_lammps_lmp_dump.py | 34 +- tests/test_lammps_lmp_to_system.py | 17 +- tests/test_lammps_read_from_trajs.py | 151 +++- tests/test_msd.py | 44 +- tests/test_multisystems.py | 164 ++-- tests/test_periodic_table.py | 37 +- tests/test_perturb.py | 175 +++-- tests/test_pick_atom_idx.py | 48 +- tests/test_predict.py | 94 +-- tests/test_pwmat_config_dump.py | 57 +- tests/test_pwmat_config_to_system.py | 24 +- tests/test_pwmat_mlmd.py | 63 +- tests/test_pwmat_movement.py | 75 +- tests/test_pymatgen_molecule.py | 30 +- tests/test_qe_cp_traj.py | 74 +- tests/test_qe_cp_traj_skipload.py | 90 ++- tests/test_qe_pw_scf.py | 166 ++-- ...test_qe_pw_scf_crystal_atomic_positions.py | 30 +- tests/test_qe_pw_scf_energy_bug.py | 20 +- tests/test_quip_gap_xyz.py | 98 ++- tests/test_remove_atom_names.py | 14 +- tests/test_remove_pbc.py | 44 +- tests/test_replace.py | 19 +- tests/test_replicate.py | 35 +- tests/test_shuffle.py | 8 +- tests/test_siesta_aiMD_output.py | 101 ++- tests/test_siesta_output.py | 64 +- tests/test_sqm_driver.py | 29 +- tests/test_stat.py | 12 +- tests/test_system_append.py | 35 +- tests/test_system_apply_pbc.py | 35 +- tests/test_system_set_type.py | 46 +- tests/test_to_ase.py | 30 +- tests/test_to_list.py | 8 +- tests/test_to_pymatgen.py | 28 +- tests/test_to_pymatgen_entry.py | 41 +- tests/test_type_map.py | 20 +- tests/test_vasp_outcar.py | 95 ++- tests/test_vasp_poscar_dump.py | 59 +- tests/test_vasp_poscar_to_system.py | 56 +- tests/test_vasp_unconverged_outcar.py | 23 +- tests/test_vasp_xml.py | 25 +- tests/test_water_ions.py | 59 +- tests/test_xyz.py | 33 +- 166 files changed, 6957 insertions(+), 4954 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/ISSUE_TEMPLATE/request-for-help.md b/.github/ISSUE_TEMPLATE/request-for-help.md index 397d2b02..ad05a34d 100644 --- a/.github/ISSUE_TEMPLATE/request-for-help.md +++ b/.github/ISSUE_TEMPLATE/request-for-help.md @@ -13,7 +13,7 @@ Before asking questions, you can search the previous issues or discussions check the [README](https://github.com/deepmodeling/dpdata/#readme). -Please **do not** post requests for help (e.g. with installing or using dpdata) here. +Please **do not** post requests for help (e.g. with installing or using dpdata) here. Instead go to [discussions](https://github.com/deepmodeling/dpdata/discussions). This issue tracker is for tracking dpdata development related issues only. diff --git a/.github/workflows/pub-pypi.yml b/.github/workflows/pub-pypi.yml index a276f85b..24f99f7f 100644 --- a/.github/workflows/pub-pypi.yml +++ b/.github/workflows/pub-pypi.yml @@ -36,4 +36,3 @@ jobs: uses: pypa/gh-action-pypi-publish@master with: password: ${{ secrets.PYPI_API_TOKEN }} - diff --git a/.github/workflows/test_import.yml b/.github/workflows/test_import.yml index 34bc23be..b04d05cd 100644 --- a/.github/workflows/test_import.yml +++ b/.github/workflows/test_import.yml @@ -15,4 +15,3 @@ jobs: architecture: 'x64' - run: python -m pip install . - run: python -c 'import dpdata' - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..83769dfb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,25 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + # there are many log files in tests + # TODO: seperate py files and log files + - id: trailing-whitespace + exclude: "^tests/.*$" + - id: end-of-file-fixer + exclude: "^tests/.*$" + - id: check-yaml + - id: check-json + - id: check-added-large-files + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml +# Python +- repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black-jupyter +ci: + autoupdate_branch: devel diff --git a/README.md b/README.md index c93221ad..9c9fe53b 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ dpdata only works with python 3.7 or above. # Installation -One can download the source code of dpdata by +One can download the source code of dpdata by ```bash git clone https://github.com/deepmodeling/dpdata.git dpdata ``` @@ -25,10 +25,10 @@ This section gives some examples on how dpdata works. Firstly one needs to impor ```python import dpdata ``` -The typicall workflow of `dpdata` is +The typicall workflow of `dpdata` is 1. Load data from vasp or lammps or deepmd-kit data files. -2. Manipulate data +2. Manipulate data 3. Dump data to in a desired format @@ -41,9 +41,9 @@ or let dpdata infer the format (`vasp/poscar`) of the file from the file name ex d_poscar = dpdata.System('my.POSCAR') ``` The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data `System` called `d_poscar`. -A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`. +A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`. It is noted that `POSCAR` only contains one frame. -If the multiple frames stored in, for example, a `OUTCAR` is wanted, +If the multiple frames stored in, for example, a `OUTCAR` is wanted, ```python d_outcar = dpdata.LabeledSystem('OUTCAR') ``` @@ -53,9 +53,9 @@ The `System` or `LabeledSystem` can be constructed from the following file forma | Software| format | multi frames | labeled | class | format key | | ------- | :--- | :---: | :---: | :--- | :--- | -| vasp | poscar | False | False | System | 'vasp/poscar' | -| vasp | outcar | True | True | LabeledSystem | 'vasp/outcar' | -| vasp | xml | True | True | LabeledSystem | 'vasp/xml' | +| vasp | poscar | False | False | System | 'vasp/poscar' | +| vasp | outcar | True | True | LabeledSystem | 'vasp/outcar' | +| vasp | xml | True | True | LabeledSystem | 'vasp/xml' | | lammps | lmp | False | False | System | 'lammps/lmp' | | lammps | dump | True | False | System | 'lammps/dump' | | deepmd | raw | True | False | System | 'deepmd/raw' | @@ -89,7 +89,7 @@ The `System` or `LabeledSystem` can be constructed from the following file forma The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems. -Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory +Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory Recursively and find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports. Use `dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats. @@ -148,7 +148,7 @@ coords = d_outcar['coords'] ``` Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system) -| key | type | dimension | are labels | description +| key | type | dimension | are labels | description | --- | --- | --- | --- | --- | 'atom_names' | list of str | ntypes | False | The name of each atom type | 'atom_numbs' | list of int | ntypes | False | The number of atoms of each atom type @@ -186,7 +186,7 @@ dpdata.LabeledSystem('OUTCAR').sub_system([0,-1]).to('deepmd/raw', 'dpmd_raw') by which only the first and last frames are dumped to `dpmd_raw`. -## replicate +## replicate dpdata will create a super cell of the current atom configuration. ```python dpdata.System('./POSCAR').replicate((1,2,3,) ) @@ -197,9 +197,9 @@ tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in ## perturb By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). ```python -perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3, - cell_pert_fraction=0.05, - atom_pert_distance=0.6, +perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3, + cell_pert_fraction=0.05, + atom_pert_distance=0.6, atom_pert_style='normal') print(perturbed_system.data) ``` @@ -213,7 +213,7 @@ s.to_vasp_poscar('POSCAR.P42nmc.replace') ``` # BondOrderSystem -A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). +A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). ```python import dpdata system_1 = dpdata.BondOrderSystem("tests/bond_order/CH3OH.mol", fmt="mol") # read from .mol file @@ -242,7 +242,7 @@ According to our test, our sanitization procedure can successfully read 4852 sma ```python import dpdata - + for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): syst = dpdata.BondOrderSystem(sdf_file, sanitize_level='high', verbose=False) ``` diff --git a/docs/Makefile b/docs/Makefile index 1c9a12fe..5970ce25 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py index 7931ff70..eabf1c84 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,19 +16,20 @@ import sys import subprocess as sp from datetime import date -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) # -- Project information ----------------------------------------------------- -project = 'dpdata' -copyright = '2019-%d, DeepModeling ' % date.today().year -author = 'Han Wang' +project = "dpdata" +copyright = "2019-%d, DeepModeling " % date.today().year +author = "Han Wang" # The short X.Y version -version = '0.0' +version = "0.0" # The full version, including alpha/beta/rc tags -release = '0.0.0-rc' +release = "0.0.0-rc" # -- General configuration --------------------------------------------------- @@ -41,27 +42,27 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'deepmodeling_sphinx', - 'sphinx_rtd_theme', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx.ext.intersphinx', - 'numpydoc', - 'm2r2', - 'sphinxarg.ext', + "deepmodeling_sphinx", + "sphinx_rtd_theme", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "numpydoc", + "m2r2", + "sphinxarg.ext", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = ['.rst', '.md'] +source_suffix = [".rst", ".md"] # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -73,10 +74,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # -- Options for HTML output ------------------------------------------------- @@ -84,7 +85,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -95,7 +96,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -111,7 +112,7 @@ # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'dpdatadoc' +htmlhelp_basename = "dpdatadoc" # -- Options for LaTeX output ------------------------------------------------ @@ -120,15 +121,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -138,8 +136,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'dpdata.tex', 'dpdata Documentation', - 'Han Wang', 'manual'), + (master_doc, "dpdata.tex", "dpdata Documentation", "Han Wang", "manual"), ] @@ -147,10 +144,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'dpdata', 'dpdata Documentation', - [author], 1) -] +man_pages = [(master_doc, "dpdata", "dpdata Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -159,26 +153,47 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'dpdata', 'dpdata Documentation', - author, 'dpdata', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "dpdata", + "dpdata Documentation", + author, + "dpdata", + "One line description of project.", + "Miscellaneous", + ), ] # -- Extension configuration ------------------------------------------------- def run_apidoc(_): from sphinx.ext.apidoc import main - sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + + sys.path.append(os.path.join(os.path.dirname(__file__), "..")) cur_dir = os.path.abspath(os.path.dirname(__file__)) module = os.path.join(cur_dir, "..", "dpdata") - main(['-M', '--tocfile', 'api', '-H', 'API documentation', '-o', os.path.join(cur_dir, "api"), module, '--force']) + main( + [ + "-M", + "--tocfile", + "api", + "-H", + "API documentation", + "-o", + os.path.join(cur_dir, "api"), + module, + "--force", + ] + ) + def run_formats(_): sp.check_output([sys.executable, "make_format.py"]) + def setup(app): - app.connect('builder-inited', run_apidoc) - app.connect('builder-inited', run_formats) + app.connect("builder-inited", run_apidoc) + app.connect("builder-inited", run_formats) intersphinx_mapping = { diff --git a/docs/credits.rst b/docs/credits.rst index a72b83e5..54fd9884 100644 --- a/docs/credits.rst +++ b/docs/credits.rst @@ -1,4 +1,4 @@ Authors ======= -.. git-shortlog-authors:: \ No newline at end of file +.. git-shortlog-authors:: diff --git a/docs/formats.rst b/docs/formats.rst index 1920a848..c0ff5b8f 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -6,4 +6,3 @@ dpdata supports the following formats: .. csv-table:: Supported Formats :file: formats.csv :header-rows: 1 - diff --git a/docs/make_format.py b/docs/make_format.py index 61fc10a4..ae8002c1 100644 --- a/docs/make_format.py +++ b/docs/make_format.py @@ -13,16 +13,17 @@ def get_formats() -> dict: formats[ff].append(kk) return formats + def detect_overridden(cls: Format, method: str) -> bool: """Check whether a method is override - + Parameters ---------- cls : Format a format method : str method name - + Returns ------- bool @@ -30,38 +31,45 @@ def detect_overridden(cls: Format, method: str) -> bool: """ return method in cls.__dict__ + def get_cls_link(cls: object) -> str: """Returns class link. - + Parameters ---------- cls : object the class - + Returns ------- str the link of a class """ - return ':class:`%s <%s>`' % (cls.__name__, ".".join([cls.__module__, cls.__name__])) + return ":class:`%s <%s>`" % (cls.__name__, ".".join([cls.__module__, cls.__name__])) + def check_supported(fmt: Format): methods = set() for mtd in [ - 'from_system', 'to_system', - 'from_labeled_system', 'to_labeled_system', - 'from_bond_order_system', 'to_bond_order_system', - 'from_multi_systems', 'to_multi_systems', - ]: + "from_system", + "to_system", + "from_labeled_system", + "to_labeled_system", + "from_bond_order_system", + "to_bond_order_system", + "from_multi_systems", + "to_multi_systems", + ]: if detect_overridden(fmt, mtd): methods.add(mtd) - if mtd == 'to_system': - methods.add('to_labeled_system') + if mtd == "to_system": + methods.add("to_labeled_system") if fmt.MultiMode != fmt.MultiModes.NotImplemented: - methods.add('from_multi_systems') - methods.add('to_multi_systems') + methods.add("from_multi_systems") + methods.add("to_multi_systems") return methods + method_links = { "from_system": ":func:`System() `", "to_system": ":func:`System.to() `", @@ -75,16 +83,22 @@ def check_supported(fmt: Format): if __name__ == "__main__": formats = get_formats() - with open('formats.csv', 'w', newline='') as csvfile: + with open("formats.csv", "w", newline="") as csvfile: fieldnames = [ - 'Class', 'Alias', 'Supported Functions', - ] + "Class", + "Alias", + "Supported Functions", + ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for kk, vv in formats.items(): - writer.writerow({ - 'Class': get_cls_link(kk), - 'Alias': '\n'.join(('``%s``' % vvv for vvv in vv)), - 'Supported Functions': '\n'.join(method_links[mtd] for mtd in check_supported(kk)), - }) + writer.writerow( + { + "Class": get_cls_link(kk), + "Alias": "\n".join(("``%s``" % vvv for vvv in vv)), + "Supported Functions": "\n".join( + method_links[mtd] for mtd in check_supported(kk) + ), + } + ) diff --git a/dpdata/__about__.py b/dpdata/__about__.py index 2c6b9a13..d5cfca64 100644 --- a/dpdata/__about__.py +++ b/dpdata/__about__.py @@ -1 +1 @@ -__version__ = 'unknown' +__version__ = "unknown" diff --git a/dpdata/__init__.py b/dpdata/__init__.py index 001777a3..f426b790 100644 --- a/dpdata/__init__.py +++ b/dpdata/__init__.py @@ -14,11 +14,10 @@ try: # prevent conflict with dpdata.rdkit import rdkit as _ + USE_RDKIT = True except ModuleNotFoundError: USE_RDKIT = False if USE_RDKIT: from .bond_order_system import BondOrderSystem - - diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py index 253e48a2..be6bee47 100644 --- a/dpdata/abacus/md.py +++ b/dpdata/abacus/md.py @@ -1,7 +1,15 @@ from ast import dump -import os,sys +import os, sys import numpy as np -from .scf import ry2ev, bohr2ang, kbar2evperang3, get_block, get_geometry_in, get_cell, get_coords +from .scf import ( + ry2ev, + bohr2ang, + kbar2evperang3, + get_block, + get_geometry_in, + get_cell, + get_coords, +) import re import warnings @@ -9,28 +17,31 @@ # The atomic coordinates are read in from generated files in OUT.XXXX. # Energies, forces # IMPORTANT: the program defaultly takes STRU input file as standard cell information, -# therefore the direct and cartesan coordinates read could be different from the ones in +# therefore the direct and cartesan coordinates read could be different from the ones in # the output cif files!!! # It is highly recommanded to use ORTHOGANAL coordinates in STRU file if you wish to get -# same coordinates in both dpdata and output cif files. +# same coordinates in both dpdata and output cif files. + def get_path_out(fname, inlines): # This function is different from the same-name function in scf.py. # This function returns OUT.XXXX's base directory. path_out = os.path.join(fname, "OUT.ABACUS/") for line in inlines: - if len(line)>0 and "suffix" in line and "suffix"==line.split()[0]: - suffix = line.split()[1] - path_out = os.path.join(fname, "OUT.%s/" % suffix) - break + if len(line) > 0 and "suffix" in line and "suffix" == line.split()[0]: + suffix = line.split()[1] + path_out = os.path.join(fname, "OUT.%s/" % suffix) + break return path_out + def get_coord_dump_freq(inlines): for line in inlines: - if len(line)>0 and "md_dumpfreq" in line and "md_dumpfreq" == line.split()[0]: + if len(line) > 0 and "md_dumpfreq" in line and "md_dumpfreq" == line.split()[0]: return int(line.split()[1]) return 1 + def get_coords_from_dump(dumplines, natoms): nlines = len(dumplines) total_natoms = sum(natoms) @@ -38,13 +49,18 @@ def get_coords_from_dump(dumplines, natoms): if "VIRIAL" in dumplines[6]: calc_stress = True else: - assert("POSITIONS" in dumplines[6] and "FORCE" in dumplines[6]), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check." + assert ( + "POSITIONS" in dumplines[6] and "FORCE" in dumplines[6] + ), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check." nframes_dump = -1 if calc_stress: - nframes_dump = int(nlines/(total_natoms + 13)) + nframes_dump = int(nlines / (total_natoms + 13)) else: - nframes_dump = int(nlines/(total_natoms + 9)) - assert(nframes_dump > 0), "Number of lines in MD_dump file = %d. Number of atoms = %d. The MD_dump file is incomplete."%(nlines, total_natoms) + nframes_dump = int(nlines / (total_natoms + 9)) + assert nframes_dump > 0, ( + "Number of lines in MD_dump file = %d. Number of atoms = %d. The MD_dump file is incomplete." + % (nlines, total_natoms) + ) cells = np.zeros([nframes_dump, 3, 3]) stresses = np.zeros([nframes_dump, 3, 3]) forces = np.zeros([nframes_dump, total_natoms, 3]) @@ -53,102 +69,154 @@ def get_coords_from_dump(dumplines, natoms): for iline in range(nlines): if "MDSTEP" in dumplines[iline]: # read in LATTICE_CONSTANT - celldm = float(dumplines[iline+1].split(" ")[-1]) + celldm = float(dumplines[iline + 1].split(" ")[-1]) # read in LATTICE_VECTORS for ix in range(3): - cells[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+3+ix])[-3:]]) * celldm + cells[iframe, ix] = ( + np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 3 + ix])[-3:] + ] + ) + * celldm + ) if calc_stress: - stresses[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+ix])[-3:]]) + stresses[iframe, ix] = np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 7 + ix])[-3:] + ] + ) for iat in range(total_natoms): if calc_stress: - coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-6:-3]])*celldm - forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-3:]]) + coords[iframe, iat] = ( + np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 11 + iat])[ + -6:-3 + ] + ] + ) + * celldm + ) + forces[iframe, iat] = np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 11 + iat])[-3:] + ] + ) else: - coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-6:-3]])*celldm - forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-3:]]) + coords[iframe, iat] = ( + np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 7 + iat])[ + -6:-3 + ] + ] + ) + * celldm + ) + forces[iframe, iat] = np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 7 + iat])[-3:] + ] + ) iframe += 1 - assert(iframe == nframes_dump), "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."%(iframe, nframes_dump) + assert iframe == nframes_dump, ( + "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump." + % (iframe, nframes_dump) + ) cells *= bohr2ang coords *= bohr2ang stresses *= kbar2evperang3 return coords, cells, forces, stresses + def get_energy(outlines, ndump, dump_freq): energy = [] nenergy = 0 for line_idx, line in enumerate(outlines): if "final etot is" in line: - if nenergy%dump_freq == 0: + if nenergy % dump_freq == 0: energy.append(float(line.split()[-2])) - nenergy+=1 + nenergy += 1 elif "!! convergence has not been achieved" in line: - if nenergy%dump_freq == 0: + if nenergy % dump_freq == 0: energy.append(np.nan) - nenergy+=1 - assert(ndump == len(energy)), "Number of total energies in running_md.log = %d. Number of frames in MD_dump = %d. Please check."%(len(energy), ndump) + nenergy += 1 + assert ndump == len(energy), ( + "Number of total energies in running_md.log = %d. Number of frames in MD_dump = %d. Please check." + % (len(energy), ndump) + ) energy = np.array(energy) return energy -def get_frame (fname): +def get_frame(fname): if type(fname) == str: - # if the input parameter is only one string, it is assumed that it is the + # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: - raise RuntimeError('invalid input') - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU - path_out = get_path_out(fname, inlines) + raise RuntimeError("invalid input") + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU + path_out = get_path_out(fname, inlines) - with open(geometry_path_in, 'r') as fp: - geometry_inlines = fp.read().split('\n') - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) + with open(geometry_path_in, "r") as fp: + geometry_inlines = fp.read().split("\n") + celldm, cell = get_cell(geometry_inlines) + atom_names, natoms, types, coords = get_coords( + celldm, cell, geometry_inlines, inlines + ) # This coords is not to be used. - dump_freq = get_coord_dump_freq(inlines = inlines) - #ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0]) + dump_freq = get_coord_dump_freq(inlines=inlines) + # ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0]) # number of dumped geometry files - #coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell) - with open(os.path.join(path_out, "MD_dump"), 'r') as fp: - dumplines = fp.read().split('\n') + # coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell) + with open(os.path.join(path_out, "MD_dump"), "r") as fp: + dumplines = fp.read().split("\n") coords, cells, force, stress = get_coords_from_dump(dumplines, natoms) ndump = np.shape(coords)[0] - with open(os.path.join(path_out, "running_md.log"), 'r') as fp: - outlines = fp.read().split('\n') + with open(os.path.join(path_out, "running_md.log"), "r") as fp: + outlines = fp.read().split("\n") energy = get_energy(outlines, ndump, dump_freq) - unconv_stru = '' - for i,iene in enumerate(energy): + unconv_stru = "" + for i, iene in enumerate(energy): if np.isnan(iene): - coords = np.delete(coords,i-ndump,axis=0) - cells = np.delete(cells,i-ndump,axis=0) - force = np.delete(force,i-ndump,axis=0) - stress = np.delete(stress,i-ndump,axis=0) - energy = np.delete(energy,i-ndump,axis=0) + coords = np.delete(coords, i - ndump, axis=0) + cells = np.delete(cells, i - ndump, axis=0) + force = np.delete(force, i - ndump, axis=0) + stress = np.delete(stress, i - ndump, axis=0) + energy = np.delete(energy, i - ndump, axis=0) unconv_stru += "%d " % i ndump = len(energy) - if unconv_stru != '': - warnings.warn(f"Structure %s are unconverged and not collected!" % unconv_stru) + if unconv_stru != "": + warnings.warn(f"Structure %s are unconverged and not collected!" % unconv_stru) for iframe in range(ndump): stress[iframe] *= np.linalg.det(cells[iframe, :, :].reshape([3, 3])) if np.sum(np.abs(stress[0])) < 1e-10: stress = None data = {} - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - data['cells'] = cells - #for idx in range(ndump): + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + data["cells"] = cells + # for idx in range(ndump): # data['cells'][:, :, :] = cell - data['coords'] = coords - data['energies'] = energy - data['forces'] = force - data['virials'] = stress - if type(data['virials']) != np.ndarray: - del data['virials'] - data['orig'] = np.zeros(3) + data["coords"] = coords + data["energies"] = energy + data["forces"] = force + data["virials"] = stress + if type(data["virials"]) != np.ndarray: + del data["virials"] + data["orig"] = np.zeros(3) return data diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py index 88ea8153..6b552174 100644 --- a/dpdata/abacus/relax.py +++ b/dpdata/abacus/relax.py @@ -1,22 +1,24 @@ -import os,sys +import os, sys import numpy as np from .scf import bohr2ang, kbar2evperang3, get_geometry_in, get_cell, get_coords -# Read in geometries from an ABACUS RELAX(CELL-RELAX) trajectory in OUT.XXXX/runnning_relax/cell-relax.log. +# Read in geometries from an ABACUS RELAX(CELL-RELAX) trajectory in OUT.XXXX/runnning_relax/cell-relax.log. + def get_log_file(fname, inlines): suffix = "ABACUS" calculation = "scf" for line in inlines: - if "suffix" in line and "suffix"==line.split()[0]: - suffix = line.split()[1] + if "suffix" in line and "suffix" == line.split()[0]: + suffix = line.split()[1] elif "calculation" in line and "calculation" == line.split()[0]: calculation = line.split()[1] - logf = os.path.join(fname, "OUT.%s/running_%s.log"%(suffix,calculation)) + logf = os.path.join(fname, "OUT.%s/running_%s.log" % (suffix, calculation)) return logf -def get_coords_from_log(loglines,natoms): - ''' + +def get_coords_from_log(loglines, natoms): + """ NOTICE: unit of coords and cells is Angstrom order: coordinate @@ -24,116 +26,134 @@ def get_coords_from_log(loglines,natoms): energy (no output, if SCF is not converged) force (no output, if cal_force is not setted or abnormal ending) stress (no output, if set cal_stress is not setted or abnormal ending) - ''' + """ natoms_log = 0 for line in loglines: if line[13:41] == "number of atom for this type": natoms_log += int(line.split()[-1]) - assert(natoms_log>0 and natoms_log == natoms),"ERROR: detected atom number in log file is %d" % natoms + assert natoms_log > 0 and natoms_log == natoms, ( + "ERROR: detected atom number in log file is %d" % natoms + ) energy = [] cells = [] coords = [] force = [] stress = [] - coord_direct = [] #if the coordinate is direct type or not + coord_direct = [] # if the coordinate is direct type or not for i in range(len(loglines)): line = loglines[i] - if line[18:41] == "lattice constant (Bohr)": + if line[18:41] == "lattice constant (Bohr)": a0 = float(line.split()[-1]) - elif len(loglines[i].split()) >=2 and loglines[i].split()[1] == 'COORDINATES': - #read coordinate information + elif len(loglines[i].split()) >= 2 and loglines[i].split()[1] == "COORDINATES": + # read coordinate information coords.append([]) direct_coord = False - if loglines[i].split()[0] == 'DIRECT': + if loglines[i].split()[0] == "DIRECT": coord_direct.append(True) - for k in range(2,2+natoms): - coords[-1].append(list(map(lambda x: float(x),loglines[i+k].split()[1:4]))) - elif loglines[i].split()[0] == 'CARTESIAN': + for k in range(2, 2 + natoms): + coords[-1].append( + list(map(lambda x: float(x), loglines[i + k].split()[1:4])) + ) + elif loglines[i].split()[0] == "CARTESIAN": coord_direct.append(False) - for k in range(2,2+natoms): - coords[-1].append(list(map(lambda x: float(x)*a0,loglines[i+k].split()[1:4]))) + for k in range(2, 2 + natoms): + coords[-1].append( + list(map(lambda x: float(x) * a0, loglines[i + k].split()[1:4])) + ) else: - assert(False),"Unrecongnized coordinate type, %s, line:%d" % (loglines[i].split()[0],i) - - elif loglines[i][1:56] == "Lattice vectors: (Cartesian coordinate: in unit of a_0)": - #add the cell information for previous structures + assert False, "Unrecongnized coordinate type, %s, line:%d" % ( + loglines[i].split()[0], + i, + ) + + elif ( + loglines[i][1:56] + == "Lattice vectors: (Cartesian coordinate: in unit of a_0)" + ): + # add the cell information for previous structures while len(cells) < len(coords) - 1: - cells.append(cells[-1]) - #get current cell information + cells.append(cells[-1]) + # get current cell information cells.append([]) - for k in range(1,4): - cells[-1].append(list(map(lambda x:float(x)*a0,loglines[i+k].split()[0:3]))) + for k in range(1, 4): + cells[-1].append( + list(map(lambda x: float(x) * a0, loglines[i + k].split()[0:3])) + ) elif line[1:14] == "final etot is": - #add the energy for previous structures whose SCF is not converged + # add the energy for previous structures whose SCF is not converged while len(energy) < len(coords) - 1: energy.append(np.nan) - #get the energy of current structure + # get the energy of current structure energy.append(float(line.split()[-2])) - + elif line[4:15] == "TOTAL-FORCE": force.append([]) - for j in range(5,5+natoms): - force[-1].append(list(map(lambda x:float(x),loglines[i+j].split()[1:4]))) + for j in range(5, 5 + natoms): + force[-1].append( + list(map(lambda x: float(x), loglines[i + j].split()[1:4])) + ) elif line[1:13] == "TOTAL-STRESS": stress.append([]) - for j in range(4,7): - stress[-1].append(list(map(lambda x:float(x),loglines[i+j].split()[0:3]))) + for j in range(4, 7): + stress[-1].append( + list(map(lambda x: float(x), loglines[i + j].split()[0:3])) + ) - #delete last structures which has no energy + # delete last structures which has no energy while len(energy) < len(coords): del coords[-1] del coord_direct[-1] - - #add cells for last structures whose cell is not changed + + # add cells for last structures whose cell is not changed while len(cells) < len(coords): cells.append(cells[-1]) - - #only keep structures that have all of coord, force and stress + + # only keep structures that have all of coord, force and stress if len(stress) == 0 and len(force) == 0: minl = len(coords) elif len(stress) == 0: - minl = min(len(coords),len(force)) + minl = min(len(coords), len(force)) force = force[:minl] elif len(force) == 0: - minl = min(len(coords),len(stress)) + minl = min(len(coords), len(stress)) stress = stress[:minl] else: - minl = min(len(coords),len(force),len(stress)) + minl = min(len(coords), len(force), len(stress)) force = force[:minl] stress = stress[:minl] - + coords = coords[:minl] energy = energy[:minl] cells = cells[:minl] - - #delete structures whose energy is np.nan + + # delete structures whose energy is np.nan for i in range(minl): - if np.isnan(energy[i-minl]): - del energy[i-minl] - del coords[i-minl] - del cells[i-minl] - del coord_direct[i-minl] + if np.isnan(energy[i - minl]): + del energy[i - minl] + del coords[i - minl] + del cells[i - minl] + del coord_direct[i - minl] if len(force) > 0: - del force[i-minl] + del force[i - minl] if len(stress) > 0: - del stress[i-minl] - + del stress[i - minl] + energy = np.array(energy) cells = np.array(cells) coords = np.array(coords) stress = np.array(stress) force = np.array(force) - #transfer direct coordinate to cartessian type + # transfer direct coordinate to cartessian type for i in range(len(coords)): if coord_direct[i]: coords[i] = coords[i].dot(cells[i]) - #transfer bohrium to angstrom + # transfer bohrium to angstrom cells *= bohr2ang coords *= bohr2ang @@ -142,41 +162,48 @@ def get_coords_from_log(loglines,natoms): volume = np.linalg.det(cells[i, :, :].reshape([3, 3])) virial[i] = stress[i] * kbar2evperang3 * volume - return energy,cells,coords,force,stress,virial + return energy, cells, coords, force, stress, virial + -def get_frame (fname): +def get_frame(fname): if type(fname) == str: - # if the input parameter is only one string, it is assumed that it is the + # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: - raise RuntimeError('invalid input') - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU - with open(geometry_path_in, 'r') as fp: - geometry_inlines = fp.read().split('\n') - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coord_tmp = get_coords(celldm, cell, geometry_inlines, inlines) - - logf = get_log_file(fname, inlines) - assert(os.path.isfile(logf)),"Error: can not find %s" % logf - with open(logf) as f1: lines = f1.readlines() + raise RuntimeError("invalid input") + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU + with open(geometry_path_in, "r") as fp: + geometry_inlines = fp.read().split("\n") + celldm, cell = get_cell(geometry_inlines) + atom_names, natoms, types, coord_tmp = get_coords( + celldm, cell, geometry_inlines, inlines + ) + + logf = get_log_file(fname, inlines) + assert os.path.isfile(logf), "Error: can not find %s" % logf + with open(logf) as f1: + lines = f1.readlines() atomnumber = 0 - for i in natoms: atomnumber += i - energy,cells,coords,force,stress,virial = get_coords_from_log(lines,atomnumber) + for i in natoms: + atomnumber += i + energy, cells, coords, force, stress, virial = get_coords_from_log( + lines, atomnumber + ) data = {} - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - data['cells'] = cells - data['coords'] = coords - data['energies'] = energy - data['forces'] = force - data['virials'] = virial - data['stress'] = stress - data['orig'] = np.zeros(3) + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + data["cells"] = cells + data["coords"] = coords + data["energies"] = energy + data["forces"] = force + data["virials"] = virial + data["stress"] = stress + data["orig"] = np.zeros(3) return data diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index f3f80d48..94f4bf7b 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -1,24 +1,27 @@ -import os,sys +import os, sys import numpy as np from ..unit import EnergyConversion, PressureConversion, LengthConversion import re + bohr2ang = LengthConversion("bohr", "angstrom").value() ry2ev = EnergyConversion("rydberg", "eV").value() kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value() + def CheckFile(ifile): if not os.path.isfile(ifile): print("Can not find file %s" % ifile) return False return True -def get_block (lines, keyword, skip = 0, nlines = None): + +def get_block(lines, keyword, skip=0, nlines=None): ret = [] found = False if not nlines: nlines = 1e6 - for idx,ii in enumerate(lines) : - if keyword in ii : + for idx, ii in enumerate(lines): + if keyword in ii: found = True blk_idx = idx + 1 + skip line_idx = 0 @@ -26,7 +29,7 @@ def get_block (lines, keyword, skip = 0, nlines = None): blk_idx += 1 while line_idx < nlines and blk_idx != len(lines): if len(re.split("\s+", lines[blk_idx])) == 0 or lines[blk_idx] == "": - blk_idx+=1 + blk_idx += 1 continue ret.append(lines[blk_idx]) blk_idx += 1 @@ -36,94 +39,107 @@ def get_block (lines, keyword, skip = 0, nlines = None): return None return ret + def get_geometry_in(fname, inlines): geometry_path_in = os.path.join(fname, "STRU") for line in inlines: - if "stru_file" in line and "stru_file"==line.split()[0]: - atom_file = line.split()[1] - geometry_path_in = os.path.join(fname, atom_file) - break + if "stru_file" in line and "stru_file" == line.split()[0]: + atom_file = line.split()[1] + geometry_path_in = os.path.join(fname, atom_file) + break return geometry_path_in + def get_path_out(fname, inlines): path_out = os.path.join(fname, "OUT.ABACUS/running_scf.log") for line in inlines: - if "suffix" in line and "suffix"==line.split()[0]: - suffix = line.split()[1] - path_out = os.path.join(fname, "OUT.%s/running_scf.log" % suffix) - break + if "suffix" in line and "suffix" == line.split()[0]: + suffix = line.split()[1] + path_out = os.path.join(fname, "OUT.%s/running_scf.log" % suffix) + break return path_out + def get_cell(geometry_inlines): - cell_lines = get_block(geometry_inlines, "LATTICE_VECTORS", skip = 0, nlines = 3) + cell_lines = get_block(geometry_inlines, "LATTICE_VECTORS", skip=0, nlines=3) celldm_lines = get_block(geometry_inlines, "LATTICE_CONSTANT", skip=0, nlines=1) - celldm = float(celldm_lines[0].split()[0]) * bohr2ang # lattice const is in Bohr + celldm = float(celldm_lines[0].split()[0]) * bohr2ang # lattice const is in Bohr cell = [] for ii in range(3): cell.append([float(jj) for jj in cell_lines[ii].split()[0:3]]) - cell = celldm*np.array(cell) + cell = celldm * np.array(cell) return celldm, cell + def get_coords(celldm, cell, geometry_inlines, inlines=None): coords_lines = get_block(geometry_inlines, "ATOMIC_POSITIONS", skip=0) # assuming that ATOMIC_POSITIONS is at the bottom of the STRU file - coord_type = coords_lines[0].split()[0].lower() # cartisan or direct - atom_names = [] # element abbr in periodic table - atom_types = [] # index of atom_names of each atom in the geometry - atom_numbs = [] # of atoms for each element - coords = [] # coordinations of atoms + coord_type = coords_lines[0].split()[0].lower() # cartisan or direct + atom_names = [] # element abbr in periodic table + atom_types = [] # index of atom_names of each atom in the geometry + atom_numbs = [] # of atoms for each element + coords = [] # coordinations of atoms ntype = get_nele_from_stru(geometry_inlines) - line_idx = 1 # starting line of first element + line_idx = 1 # starting line of first element for it in range(ntype): atom_names.append(coords_lines[line_idx].split()[0]) - line_idx+=2 + line_idx += 2 atom_numbs.append(int(coords_lines[line_idx].split()[0])) - line_idx+=1 + line_idx += 1 for iline in range(atom_numbs[it]): xyz = np.array([float(xx) for xx in coords_lines[line_idx].split()[0:3]]) if coord_type == "cartesian": - xyz = xyz*celldm + xyz = xyz * celldm elif coord_type == "direct": tmp = np.matmul(xyz, cell) xyz = tmp else: print("coord_type = %s" % coord_type) - raise RuntimeError("Input coordination type is invalid.\n Only direct and cartesian are accepted.") + raise RuntimeError( + "Input coordination type is invalid.\n Only direct and cartesian are accepted." + ) coords.append(xyz) atom_types.append(it) line_idx += 1 - coords = np.array(coords) # need transformation!!! + coords = np.array(coords) # need transformation!!! atom_types = np.array(atom_types) return atom_names, atom_numbs, atom_types, coords + def get_energy(outlines): Etot = None for line in outlines: if "!FINAL_ETOT_IS" in line: - Etot = float(line.split()[1]) # in eV + Etot = float(line.split()[1]) # in eV break if not Etot: - return Etot,False + return Etot, False for line in outlines: if "convergence has NOT been achieved!" in line: - return Etot,False - return Etot,True + return Etot, False + return Etot, True -def get_force (outlines, natoms): + +def get_force(outlines, natoms): force = [] - force_inlines = get_block (outlines, "TOTAL-FORCE (eV/Angstrom)", skip = 4, nlines=np.sum(natoms)) + force_inlines = get_block( + outlines, "TOTAL-FORCE (eV/Angstrom)", skip=4, nlines=np.sum(natoms) + ) if force_inlines is None: - print("TOTAL-FORCE (eV/Angstrom) is not found in OUT.XXX/running_scf.log. May be you haven't set 'cal_force 1' in the INPUT.") + print( + "TOTAL-FORCE (eV/Angstrom) is not found in OUT.XXX/running_scf.log. May be you haven't set 'cal_force 1' in the INPUT." + ) return [[]] for line in force_inlines: force.append([float(f) for f in line.split()[1:4]]) force = np.array(force) return force + def get_stress(outlines): stress = [] - stress_inlines = get_block(outlines, "TOTAL-STRESS (KBAR)", skip = 3, nlines=3) + stress_inlines = get_block(outlines, "TOTAL-STRESS (KBAR)", skip=3, nlines=3) if stress_inlines is None: return None for line in stress_inlines: @@ -132,60 +148,63 @@ def get_stress(outlines): return stress +def get_frame(fname): + data = { + "atom_names": [], + "atom_numbs": [], + "atom_types": [], + "cells": [], + "coords": [], + "energies": [], + "forces": [], + } -def get_frame (fname): - data = {'atom_names':[],\ - 'atom_numbs':[],\ - 'atom_types':[],\ - 'cells':[],\ - 'coords':[],\ - 'energies':[],\ - 'forces':[]} - if type(fname) == str: - # if the input parameter is only one string, it is assumed that it is the + # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: - raise RuntimeError('invalid input') - + raise RuntimeError("invalid input") + if not CheckFile(path_in): return data - - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - - geometry_path_in = get_geometry_in(fname, inlines) - path_out = get_path_out(fname, inlines) + + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + + geometry_path_in = get_geometry_in(fname, inlines) + path_out = get_path_out(fname, inlines) if not (CheckFile(geometry_path_in) and CheckFile(path_out)): return data - - with open(geometry_path_in, 'r') as fp: - geometry_inlines = fp.read().split('\n') - with open(path_out, 'r') as fp: - outlines = fp.read().split('\n') - - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - - energy,converge = get_energy(outlines) + + with open(geometry_path_in, "r") as fp: + geometry_inlines = fp.read().split("\n") + with open(path_out, "r") as fp: + outlines = fp.read().split("\n") + + celldm, cell = get_cell(geometry_inlines) + atom_names, natoms, types, coords = get_coords( + celldm, cell, geometry_inlines, inlines + ) + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + + energy, converge = get_energy(outlines) if not converge: return data - force = get_force (outlines, natoms) - stress = get_stress(outlines) + force = get_force(outlines, natoms) + stress = get_stress(outlines) if stress is not None: - stress *= np.abs(np.linalg.det(cell)) - - data['cells'] = cell[np.newaxis, :, :] - data['coords'] = coords[np.newaxis, :, :] - data['energies'] = np.array(energy)[np.newaxis] - data['forces'] = force[np.newaxis, :, :] + stress *= np.abs(np.linalg.det(cell)) + + data["cells"] = cell[np.newaxis, :, :] + data["coords"] = coords[np.newaxis, :, :] + data["energies"] = np.array(energy)[np.newaxis] + data["forces"] = force[np.newaxis, :, :] if stress is not None: - data['virials'] = stress[np.newaxis, :, :] - data['orig'] = np.zeros(3) + data["virials"] = stress[np.newaxis, :, :] + data["orig"] = np.zeros(3) # print("atom_names = ", data['atom_names']) # print("natoms = ", data['atom_numbs']) # print("types = ", data['atom_types']) @@ -196,8 +215,16 @@ def get_frame (fname): # print("virial = ", data['virials']) return data + def get_nele_from_stru(geometry_inlines): - key_words_list = ["ATOMIC_SPECIES", "NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS", "NUMERICAL_DESCRIPTOR"] + key_words_list = [ + "ATOMIC_SPECIES", + "NUMERICAL_ORBITAL", + "LATTICE_CONSTANT", + "LATTICE_VECTORS", + "ATOMIC_POSITIONS", + "NUMERICAL_DESCRIPTOR", + ] keyword_sequence = [] keyword_line_index = [] atom_names = [] @@ -210,86 +237,107 @@ def get_nele_from_stru(geometry_inlines): if keyword in line and keyword == line.split()[0]: keyword_sequence.append(keyword) keyword_line_index.append(iline) - assert(len(keyword_line_index) == len(keyword_sequence)) - assert(len(keyword_sequence) > 0) + assert len(keyword_line_index) == len(keyword_sequence) + assert len(keyword_sequence) > 0 keyword_line_index.append(len(geometry_inlines)) nele = 0 for idx, keyword in enumerate(keyword_sequence): if keyword == "ATOMIC_SPECIES": - for iline in range(keyword_line_index[idx]+1, keyword_line_index[idx+1]): + for iline in range( + keyword_line_index[idx] + 1, keyword_line_index[idx + 1] + ): if len(re.split("\s+", geometry_inlines[iline])) >= 3: nele += 1 return nele + def get_frame_from_stru(fname): - assert(type(fname) == str) - with open(fname, 'r') as fp: - geometry_inlines = fp.read().split('\n') + assert type(fname) == str + with open(fname, "r") as fp: + geometry_inlines = fp.read().split("\n") nele = get_nele_from_stru(geometry_inlines) - inlines = ["ntype %d" %nele] + inlines = ["ntype %d" % nele] celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) + atom_names, natoms, types, coords = get_coords( + celldm, cell, geometry_inlines, inlines + ) data = {} - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - data['cells'] = cell[np.newaxis, :, :] - data['coords'] = coords[np.newaxis, :, :] - data['orig'] = np.zeros(3) + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + data["cells"] = cell[np.newaxis, :, :] + data["coords"] = coords[np.newaxis, :, :] + data["orig"] = np.zeros(3) return data -def make_unlabeled_stru(data, frame_idx, pp_file=None, numerical_orbital=None, numerical_descriptor=None, mass=None): + +def make_unlabeled_stru( + data, + frame_idx, + pp_file=None, + numerical_orbital=None, + numerical_descriptor=None, + mass=None, +): out = "ATOMIC_SPECIES\n" - for iele in range(len(data['atom_names'])): - out += data['atom_names'][iele] + " " + for iele in range(len(data["atom_names"])): + out += data["atom_names"][iele] + " " if mass is not None: - out += "%.3f "%mass[iele] + out += "%.3f " % mass[iele] else: out += "1 " if pp_file is not None: - out += "%s\n"%pp_file[iele] + out += "%s\n" % pp_file[iele] else: out += "\n" out += "\n" if numerical_orbital is not None: - assert(len(numerical_orbital) == len(data['atom_names'])) + assert len(numerical_orbital) == len(data["atom_names"]) out += "NUMERICAL_ORBITAL\n" for iele in range(len(numerical_orbital)): - out += "%s\n"%numerical_orbital[iele] + out += "%s\n" % numerical_orbital[iele] out += "\n" if numerical_descriptor is not None: - assert(type(numerical_descriptor) == str) - out += "NUMERICAL_DESCRIPTOR\n%s\n"%numerical_descriptor + assert type(numerical_descriptor) == str + out += "NUMERICAL_DESCRIPTOR\n%s\n" % numerical_descriptor out += "\n" - + out += "LATTICE_CONSTANT\n" - out += str(1/bohr2ang) + "\n\n" + out += str(1 / bohr2ang) + "\n\n" out += "LATTICE_VECTORS\n" for ix in range(3): for iy in range(3): - out += str(data['cells'][frame_idx][ix][iy]) + " " + out += str(data["cells"][frame_idx][ix][iy]) + " " out += "\n" out += "\n" out += "ATOMIC_POSITIONS\n" out += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n" - #ret += "\n" + # ret += "\n" natom_tot = 0 - for iele in range(len(data['atom_names'])): - out += data['atom_names'][iele] + "\n" + for iele in range(len(data["atom_names"])): + out += data["atom_names"][iele] + "\n" out += "0.0\n" - out += str(data['atom_numbs'][iele]) + "\n" - for iatom in range(data['atom_numbs'][iele]): - out += "%.12f %.12f %.12f %d %d %d\n" % (data['coords'][frame_idx][natom_tot, 0], data['coords'][frame_idx][natom_tot, 1], data['coords'][frame_idx][natom_tot, 2], 1, 1, 1) + out += str(data["atom_numbs"][iele]) + "\n" + for iatom in range(data["atom_numbs"][iele]): + out += "%.12f %.12f %.12f %d %d %d\n" % ( + data["coords"][frame_idx][natom_tot, 0], + data["coords"][frame_idx][natom_tot, 1], + data["coords"][frame_idx][natom_tot, 2], + 1, + 1, + 1, + ) natom_tot += 1 - assert(natom_tot == sum(data['atom_numbs'])) + assert natom_tot == sum(data["atom_numbs"]) return out -#if __name__ == "__main__": + +# if __name__ == "__main__": # path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf" # data = get_frame(path) diff --git a/dpdata/amber/__init__.py b/dpdata/amber/__init__.py index 8b137891..e69de29b 100644 --- a/dpdata/amber/__init__.py +++ b/dpdata/amber/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/amber/mask.py b/dpdata/amber/mask.py index 99a7d3d3..f44ee047 100644 --- a/dpdata/amber/mask.py +++ b/dpdata/amber/mask.py @@ -4,9 +4,10 @@ except ImportError: pass + def pick_by_amber_mask(param, maskstr, coords=None): """Pick atoms by amber masks - + Parameters ---------- param: str or parmed.Structure @@ -22,10 +23,13 @@ def pick_by_amber_mask(param, maskstr, coords=None): sele = [] if len(maskstr) > 0: newmaskstr = maskstr.replace("@0", "!@*") - sele = [parm.atoms[i].idx for i in parmed.amber.mask.AmberMask( - parm, newmaskstr).Selected()] + sele = [ + parm.atoms[i].idx + for i in parmed.amber.mask.AmberMask(parm, newmaskstr).Selected() + ] return sele + def load_param_file(param_file): if isinstance(param_file, str): return parmed.load_file(param_file) diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py index 64227a6a..0d178104 100644 --- a/dpdata/amber/md.py +++ b/dpdata/amber/md.py @@ -7,15 +7,21 @@ from ..periodic_table import ELEMENTS kcalmol2eV = EnergyConversion("kcal_mol", "eV").value() -symbols = ['X'] + ELEMENTS +symbols = ["X"] + ELEMENTS energy_convert = kcalmol2eV force_convert = energy_convert -def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdout_file = None, - use_element_symbols=None, labeled=True, - ): +def read_amber_traj( + parm7_file, + nc_file, + mdfrc_file=None, + mden_file=None, + mdout_file=None, + use_element_symbols=None, + labeled=True, +): """The amber trajectory includes: * nc, NetCDF format, stores coordinates * mdfrc, NetCDF format, stores forces @@ -42,10 +48,12 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou for line in f: if line.startswith("%FLAG"): flag_atom_type = line.startswith("%FLAG AMBER_ATOM_TYPE") - flag_atom_numb = (use_element_symbols is not None) and line.startswith("%FLAG ATOMIC_NUMBER") + flag_atom_numb = (use_element_symbols is not None) and line.startswith( + "%FLAG ATOMIC_NUMBER" + ) elif flag_atom_type or flag_atom_numb: if line.startswith("%FORMAT"): - fmt = re.findall(r'\d+', line) + fmt = re.findall(r"\d+", line) fmt0 = int(fmt[0]) fmt1 = int(fmt[1]) else: @@ -58,18 +66,18 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou if flag_atom_type: amber_types.append(content) elif flag_atom_numb: - atomic_number.append(int(content)) + atomic_number.append(int(content)) if use_element_symbols is not None: if isinstance(use_element_symbols, str): use_element_symbols = pick_by_amber_mask(parm7_file, use_element_symbols) for ii in use_element_symbols: amber_types[ii] = symbols[atomic_number[ii]] - with netcdf.netcdf_file(nc_file, 'r') as f: + with netcdf.netcdf_file(nc_file, "r") as f: coords = np.array(f.variables["coordinates"][:]) cell_lengths = np.array(f.variables["cell_lengths"][:]) cell_angles = np.array(f.variables["cell_angles"][:]) - if np.all(cell_angles > 89.99 ) and np.all(cell_angles < 90.01): + if np.all(cell_angles > 89.99) and np.all(cell_angles < 90.01): # only support 90 # TODO: support other angles shape = cell_lengths.shape @@ -80,7 +88,7 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou raise RuntimeError("Unsupported cells") if labeled: - with netcdf.netcdf_file(mdfrc_file, 'r') as f: + with netcdf.netcdf_file(mdfrc_file, "r") as f: forces = np.array(f.variables["forces"][:]) # load energy from mden_file or mdout_file @@ -101,17 +109,18 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou else: raise RuntimeError("Please provide one of mden_file and mdout_file") - atom_names, atom_types, atom_numbs = np.unique(amber_types, return_inverse=True, return_counts=True) + atom_names, atom_types, atom_numbs = np.unique( + amber_types, return_inverse=True, return_counts=True + ) data = {} - data['atom_names'] = list(atom_names) - data['atom_numbs'] = list(atom_numbs) - data['atom_types'] = atom_types + data["atom_names"] = list(atom_names) + data["atom_numbs"] = list(atom_numbs) + data["atom_types"] = atom_types if labeled: - data['forces'] = forces * force_convert - data['energies'] = np.array(energies) * energy_convert - data['coords'] = coords - data['cells'] = cells - data['orig'] = np.array([0, 0, 0]) + data["forces"] = forces * force_convert + data["energies"] = np.array(energies) * energy_convert + data["coords"] = coords + data["cells"] = cells + data["orig"] = np.array([0, 0, 0]) return data - diff --git a/dpdata/amber/sqm.py b/dpdata/amber/sqm.py index 7080e43e..7826d201 100644 --- a/dpdata/amber/sqm.py +++ b/dpdata/amber/sqm.py @@ -10,10 +10,11 @@ READ_COORDS = 6 READ_FORCES = 7 + def parse_sqm_out(fname): - ''' - Read atom symbols, charges and coordinates from ambertools sqm.out file - ''' + """ + Read atom symbols, charges and coordinates from ambertools sqm.out file + """ atom_symbols = [] coords = [] charges = [] @@ -55,36 +56,41 @@ def parse_sqm_out(fname): forces.append([float(ll[-60:-40]), float(ll[-40:-20]), float(ll[-20:])]) if len(forces) == len(charges): flag = START - + data = {} - atom_names, data['atom_types'], atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True) - data['charges'] = np.array(charges) - data['atom_names'] = list(atom_names) - data['atom_numbs'] = list(atom_numbs) - data['orig'] = np.array([0, 0, 0]) - data['cells'] = np.array([[[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]]]) - data['nopbc'] = True - data['coords'] = np.array([coords]) + atom_names, data["atom_types"], atom_numbs = np.unique( + atom_symbols, return_inverse=True, return_counts=True + ) + data["charges"] = np.array(charges) + data["atom_names"] = list(atom_names) + data["atom_numbs"] = list(atom_numbs) + data["orig"] = np.array([0, 0, 0]) + data["cells"] = np.array( + [[[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]] + ) + data["nopbc"] = True + data["coords"] = np.array([coords]) energies = np.array(energies) forces = -np.array([forces], dtype=np.float64) * kcal2ev if len(forces) > 0: - data['energies'] = energies - data['forces'] = forces - + data["energies"] = energies + data["forces"] = forces + return data + def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): - symbols = [data['atom_names'][ii] for ii in data['atom_types']] + symbols = [data["atom_names"][ii] for ii in data["atom_types"]] atomic_numbers = [ELEMENTS.index(ss) + 1 for ss in symbols] charge = kwargs.get("charge", 0) # multiplicity mult = kwargs.get("mult", 1) - if mult != 1 : + if mult != 1: raise RuntimeError("Multiplicity is not 1, which is not supported by sqm") - maxcyc = kwargs.get("maxcyc", 0) # 0 represents a single-point calculation + maxcyc = kwargs.get("maxcyc", 0) # 0 represents a single-point calculation theory = kwargs.get("qm_theory", "DFTB3") ret = "Run semi-emperical minimization\n" ret += " &qmmm\n" @@ -93,15 +99,15 @@ def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): ret += f" maxcyc={maxcyc}\n" ret += " verbosity=4\n" ret += " /\n" - for ii in range(len(data['atom_types'])): + for ii in range(len(data["atom_types"])): ret += "{:>4s}{:>6s}{:>16s}{:>16s}{:>16s}\n".format( str(atomic_numbers[ii]), str(symbols[ii]), f"{data['coords'][frame_idx][ii, 0]:.6f}", f"{data['coords'][frame_idx][ii, 1]:.6f}", - f"{data['coords'][frame_idx][ii, 2]:.6f}" + f"{data['coords'][frame_idx][ii, 2]:.6f}", ) if fname is not None: - with open(fname, 'w') as fp: + with open(fname, "w") as fp: fp.write(ret) return ret diff --git a/dpdata/ase_calculator.py b/dpdata/ase_calculator.py index df89d05c..ea5e5009 100644 --- a/dpdata/ase_calculator.py +++ b/dpdata/ase_calculator.py @@ -1,7 +1,9 @@ from typing import List, Optional, TYPE_CHECKING from ase.calculators.calculator import ( - Calculator, all_changes, PropertyNotImplementedError + Calculator, + all_changes, + PropertyNotImplementedError, ) import dpdata @@ -12,7 +14,7 @@ class DPDataCalculator(Calculator): - """Implementation of ASE deepmd calculator based on a driver. + """Implementation of ASE deepmd calculator based on a driver. Parameters ---------- @@ -21,14 +23,9 @@ class DPDataCalculator(Calculator): """ name = "dpdata" - implemented_properties = [ - "energy", "free_energy", "forces", "virial", "stress"] + implemented_properties = ["energy", "free_energy", "forces", "virial", "stress"] - def __init__( - self, - driver: Driver, - **kwargs - ) -> None: + def __init__(self, driver: Driver, **kwargs) -> None: Calculator.__init__(self, label=Driver.__name__, **kwargs) self.driver = driver @@ -56,21 +53,24 @@ def calculate( system = dpdata.System(self.atoms, fmt="ase/structure") data = system.predict(driver=self.driver).data - self.results['energy'] = data['energies'][0] + self.results["energy"] = data["energies"][0] # see https://gitlab.com/ase/ase/-/merge_requests/2485 - self.results['free_energy'] = data['energies'][0] - self.results['forces'] = data['forces'][0] - if 'virials' in data: - self.results['virial'] = data['virials'][0].reshape(3, 3) + self.results["free_energy"] = data["energies"][0] + self.results["forces"] = data["forces"][0] + if "virials" in data: + self.results["virial"] = data["virials"][0].reshape(3, 3) # convert virial into stress for lattice relaxation if "stress" in properties: if sum(atoms.get_pbc()) > 0: # the usual convention (tensile stress is positive) # stress = -virial / volume - stress = -0.5 * (data['virials'][0].copy() + data['virials'][0].copy().T) / \ - atoms.get_volume() + stress = ( + -0.5 + * (data["virials"][0].copy() + data["virials"][0].copy().T) + / atoms.get_volume() + ) # Voigt notation - self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]] + self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]] else: raise PropertyNotImplementedError diff --git a/dpdata/bond_order_system.py b/dpdata/bond_order_system.py index f2a66fd2..6cb834d4 100644 --- a/dpdata/bond_order_system.py +++ b/dpdata/bond_order_system.py @@ -6,38 +6,42 @@ from dpdata.rdkit.sanitize import Sanitizer, SanitizeError from copy import deepcopy from rdkit.Chem import Conformer + # import dpdata.rdkit.mol2 - + class BondOrderSystem(System): - ''' + """ The system with chemical bond and formal charges information For example, a labeled methane system named `d_example` has one molecule (5 atoms, 4 bonds) and `n_frames` frames. The bond order and formal charge information can be accessed by - `d_example['bonds']` : a numpy array of size 4 x 3, and the first column represents the index of begin atom, - the second column represents the index of end atom, + the second column represents the index of end atom, the third columen represents the bond order: 1 - single bond, 2 - double bond, 3 - triple bond, 1.5 - aromatic bond - `d_example['formal_charges']` : a numpy array of size 5 x 1 - ''' + """ + DTYPES = System.DTYPES + ( DataType("bonds", np.ndarray, (Axis.NBONDS, 3)), DataType("formal_charges", np.ndarray, (Axis.NATOMS,)), ) - def __init__(self, - file_name = None, - fmt = 'auto', - type_map = None, - begin = 0, - step = 1, - data = None, - rdkit_mol = None, - sanitize_level = "medium", - raise_errors = True, - verbose = False, - **kwargs): + def __init__( + self, + file_name=None, + fmt="auto", + type_map=None, + begin=0, + step=1, + data=None, + rdkit_mol=None, + sanitize_level="medium", + raise_errors=True, + verbose=False, + **kwargs, + ): """ Constructor @@ -76,12 +80,9 @@ def __init__(self, mol = dpdata.rdkit.utils.system_data_to_mol(data) self.from_rdkit_mol(mol) if file_name: - self.from_fmt(file_name, - fmt, - type_map=type_map, - begin=begin, - step=step, - **kwargs) + self.from_fmt( + file_name, fmt, type_map=type_map, begin=begin, step=step, **kwargs + ) elif rdkit_mol: self.from_rdkit_mol(rdkit_mol) else: @@ -94,7 +95,7 @@ def __init__(self, def from_fmt_obj(self, fmtobj, file_name, **kwargs): mol = fmtobj.from_bond_order_system(file_name, **kwargs) self.from_rdkit_mol(mol) - if hasattr(fmtobj.from_bond_order_system, 'post_func'): + if hasattr(fmtobj.from_bond_order_system, "post_func"): for post_f in fmtobj.from_bond_order_system.post_func: self.post_funcs.get_plugin(post_f)(self) return self @@ -109,9 +110,9 @@ def to_fmt_obj(self, fmtobj, *args, **kwargs): return fmtobj.to_bond_order_system(self.data, self.rdkit_mol, *args, **kwargs) def __str__(self): - ''' - A brief summary of the system - ''' + """ + A brief summary of the system + """ ret = "Data Summary" ret += "\nBondOrder System" ret += "\n-------------------" @@ -120,47 +121,49 @@ def __str__(self): ret += f"\nBond Numbers : {self.get_nbonds()}" ret += "\nElement List :" ret += "\n-------------------" - ret += "\n"+" ".join(map(str,self.get_atom_names())) - ret += "\n"+" ".join(map(str,self.get_atom_numbs())) + ret += "\n" + " ".join(map(str, self.get_atom_names())) + ret += "\n" + " ".join(map(str, self.get_atom_numbs())) return ret def get_nbonds(self): - ''' - Return the number of bonds - ''' - return len(self.data['bonds']) - + """ + Return the number of bonds + """ + return len(self.data["bonds"]) + def get_charge(self): - ''' - Return the total formal charge of the moleclue - ''' - return sum(self.data['formal_charges']) - + """ + Return the total formal charge of the moleclue + """ + return sum(self.data["formal_charges"]) + def get_mol(self): - ''' - Return the rdkit.Mol object - ''' + """ + Return the rdkit.Mol object + """ return self.rdkit_mol - + def get_bond_order(self, begin_atom_idx, end_atom_idx): - ''' - Return the bond order between given atoms - ''' - return self.data['bond_dict'][f'{int(begin_atom_idx)}-{int(end_atom_idx)}'] - + """ + Return the bond order between given atoms + """ + return self.data["bond_dict"][f"{int(begin_atom_idx)}-{int(end_atom_idx)}"] + def get_formal_charges(self): - ''' - Return the formal charges on each atom - ''' - return self.data['formal_charges'] - + """ + Return the formal charges on each atom + """ + return self.data["formal_charges"] + def copy(self): new_mol = deepcopy(self.rdkit_mol) - self.__class__(data=deepcopy(self.data), - rdkit_mol=new_mol) - + self.__class__(data=deepcopy(self.data), rdkit_mol=new_mol) + def __add__(self, other): - raise NotImplementedError("magic method '+' has not been implemented on BondOrderSystem") + raise NotImplementedError( + "magic method '+' has not been implemented on BondOrderSystem" + ) + # ''' # magic method "+" operation # ''' @@ -173,10 +176,12 @@ def __add__(self, other): # raise RuntimeError(f"Unsupported data structure: {type(other)}") def from_rdkit_mol(self, rdkit_mol): - ''' - Initialize from a rdkit.Chem.rdchem.Mol object - ''' + """ + Initialize from a rdkit.Chem.rdchem.Mol object + """ rdkit_mol = self.sanitizer.sanitize(rdkit_mol) self.data = dpdata.rdkit.utils.mol_to_system_data(rdkit_mol) - self.data['bond_dict'] = dict([(f'{int(bond[0])}-{int(bond[1])}', bond[2]) for bond in self.data['bonds']]) + self.data["bond_dict"] = dict( + [(f"{int(bond[0])}-{int(bond[1])}", bond[2]) for bond in self.data["bonds"]] + ) self.rdkit_mol = rdkit_mol diff --git a/dpdata/cli.py b/dpdata/cli.py index 2b33f959..88e49ba5 100644 --- a/dpdata/cli.py +++ b/dpdata/cli.py @@ -21,13 +21,24 @@ def dpdata_parser() -> argparse.ArgumentParser: parser.add_argument("from_file", type=str, help="read data from a file") parser.add_argument("--to_file", "-O", type=str, help="dump data to a file") - parser.add_argument("--from_format", "-i", type=str, default="auto", help="the format of from_file") + parser.add_argument( + "--from_format", "-i", type=str, default="auto", help="the format of from_file" + ) parser.add_argument("--to_format", "-o", type=str, help="the format of to_file") - parser.add_argument("--no-labeled", "-n", action="store_true", help="labels aren't provided") - parser.add_argument("--multi", "-m", action="store_true", help="the system contains multiple directories") + parser.add_argument( + "--no-labeled", "-n", action="store_true", help="labels aren't provided" + ) + parser.add_argument( + "--multi", + "-m", + action="store_true", + help="the system contains multiple directories", + ) parser.add_argument("--type-map", "-t", type=str, nargs="+", help="type map") - parser.add_argument('--version', action='version', version='dpdata v%s' % __version__) + parser.add_argument( + "--version", action="version", version="dpdata v%s" % __version__ + ) return parser @@ -45,17 +56,19 @@ def dpdata_cli(): convert(**vars(parsed_args)) -def convert(*, - from_file: str, - from_format: str = "auto", - to_file: Optional[str] = None, - to_format: Optional[str] = None, - no_labeled: bool = False, - multi: bool = False, - type_map: Optional[list] = None, - **kwargs): +def convert( + *, + from_file: str, + from_format: str = "auto", + to_file: Optional[str] = None, + to_format: Optional[str] = None, + no_labeled: bool = False, + multi: bool = False, + type_map: Optional[list] = None, + **kwargs +): """Convert files from one format to another one. - + Parameters ---------- from_file : str @@ -74,7 +87,9 @@ def convert(*, type map """ if multi: - s = MultiSystems.from_file(from_file, fmt=from_format, type_map=type_map, labeled=not no_labeled) + s = MultiSystems.from_file( + from_file, fmt=from_format, type_map=type_map, labeled=not no_labeled + ) elif not no_labeled: s = LabeledSystem(from_file, fmt=from_format, type_map=type_map) else: diff --git a/dpdata/cp2k/cell.py b/dpdata/cp2k/cell.py index 017986ec..3fd8b6c5 100644 --- a/dpdata/cp2k/cell.py +++ b/dpdata/cp2k/cell.py @@ -1,42 +1,54 @@ - #%% import numpy as np from collections import OrderedDict import re -def cell_to_low_triangle(A,B,C,alpha,beta,gamma): + +def cell_to_low_triangle(A, B, C, alpha, beta, gamma): """ - Convert cell to low triangle matrix. + Convert cell to low triangle matrix. - Parameters - ---------- - A : float - cell length A - B : float - cell length B - C : float - cell length C - alpha : float - radian. The angle between vector B and vector C. - beta : float - radian. The angle between vector A and vector C. - gamma : float - radian. The angle between vector B and vector C. - - Returns - ------- - cell : list - The cell matrix used by dpdata in low triangle form. + Parameters + ---------- + A : float + cell length A + B : float + cell length B + C : float + cell length C + alpha : float + radian. The angle between vector B and vector C. + beta : float + radian. The angle between vector A and vector C. + gamma : float + radian. The angle between vector B and vector C. + + Returns + ------- + cell : list + The cell matrix used by dpdata in low triangle form. """ - if not np.pi*5/180 0.2: raise RuntimeError("A=={}, must be greater than 0.2".format(A)) if not B > 0.2: @@ -47,15 +59,17 @@ def cell_to_low_triangle(A,B,C,alpha,beta,gamma): lx = A xy = B * np.cos(gamma) xz = C * np.cos(beta) - ly = B* np.sin(gamma) + ly = B * np.sin(gamma) if not ly > 0.1: - raise RuntimeError("ly:=B* np.sin(gamma)=={}, must be greater than 0.1",format(ly)) - yz = (B*C*np.cos(alpha)-xy*xz)/ly - if not C**2-xz**2-yz**2 > 0.01: - raise RuntimeError("lz^2:=C**2-xz**2-yz**2=={}, must be greater than 0.01",format(C**2-xz**2-yz**2)) - lz = np.sqrt(C**2-xz**2-yz**2) - cell = np.asarray([[lx, 0 , 0], - [xy, ly, 0 ], - [xz, yz, lz]]).astype('float32') + raise RuntimeError( + "ly:=B* np.sin(gamma)=={}, must be greater than 0.1", format(ly) + ) + yz = (B * C * np.cos(alpha) - xy * xz) / ly + if not C**2 - xz**2 - yz**2 > 0.01: + raise RuntimeError( + "lz^2:=C**2-xz**2-yz**2=={}, must be greater than 0.01", + format(C**2 - xz**2 - yz**2), + ) + lz = np.sqrt(C**2 - xz**2 - yz**2) + cell = np.asarray([[lx, 0, 0], [xy, ly, 0], [xz, yz, lz]]).astype("float32") return cell - diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py index 2f5e9cc6..965d0656 100644 --- a/dpdata/cp2k/output.py +++ b/dpdata/cp2k/output.py @@ -5,33 +5,40 @@ from scipy.constants import R from .cell import cell_to_low_triangle -from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion +from ..unit import ( + EnergyConversion, + LengthConversion, + ForceConversion, + PressureConversion, +) #%% AU_TO_ANG = LengthConversion("bohr", "angstrom").value() AU_TO_EV = EnergyConversion("hartree", "eV").value() AU_TO_EV_EVERY_ANG = ForceConversion("hartree/bohr", "eV/angstrom").value() -delimiter_patterns=[] -delimiter_p1 = re.compile(r'^ \* GO CP2K GO! \*+') -delimiter_p2 = re.compile(r'^ \*+') +delimiter_patterns = [] +delimiter_p1 = re.compile(r"^ \* GO CP2K GO! \*+") +delimiter_p2 = re.compile(r"^ \*+") delimiter_patterns.append(delimiter_p1) delimiter_patterns.append(delimiter_p2) avail_patterns = [] -avail_patterns.append(re.compile(r'^ INITIAL POTENTIAL ENERGY')) -avail_patterns.append(re.compile(r'^ ENSEMBLE TYPE')) +avail_patterns.append(re.compile(r"^ INITIAL POTENTIAL ENERGY")) +avail_patterns.append(re.compile(r"^ ENSEMBLE TYPE")) + class Cp2kSystems(object): """ deal with cp2k outputfile """ + def __init__(self, log_file_name, xyz_file_name, restart=False): - self.log_file_object = open(log_file_name, 'r') - self.xyz_file_object = open(xyz_file_name, 'r') + self.log_file_object = open(log_file_name, "r") + self.xyz_file_object = open(xyz_file_name, "r") self.log_block_generator = self.get_log_block_generator() self.xyz_block_generator = self.get_xyz_block_generator() self.restart_flag = restart - self.cell=None - self.print_level=None + self.cell = None + self.print_level = None self.atomic_kinds = None if self.restart_flag: @@ -47,15 +54,19 @@ def __iter__(self): def __next__(self): info_dict = {} log_info_dict = self.handle_single_log_frame(next(self.log_block_generator)) - #print(log_info_dict) + # print(log_info_dict) xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator)) - #eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])] - #eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])] - #eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])] - #assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') - #assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') - #assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') - assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'], xyz_info_dict['energies'],'There may be errors in the file') + # eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])] + # eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])] + # eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])] + # assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') + # assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') + # assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') + assert log_info_dict["energies"] == xyz_info_dict["energies"], ( + log_info_dict["energies"], + xyz_info_dict["energies"], + "There may be errors in the file", + ) info_dict.update(log_info_dict) info_dict.update(xyz_info_dict) return info_dict @@ -80,10 +91,10 @@ def get_log_block_generator(self): else: break if delimiter_flag is True: - raise RuntimeError('This file lacks some content, please check') + raise RuntimeError("This file lacks some content, please check") def get_xyz_block_generator(self): - p3 = re.compile(r'^\s*(\d+)\s*') + p3 = re.compile(r"^\s*(\d+)\s*") while True: line = self.xyz_file_object.readline() if not line: @@ -92,122 +103,172 @@ def get_xyz_block_generator(self): atom_num = int(p3.match(line).group(1)) lines = [] lines.append(line) - for ii in range(atom_num+1): + for ii in range(atom_num + 1): lines.append(self.xyz_file_object.readline()) if not lines[-1]: - raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines)) + raise RuntimeError( + "this xyz file may lack of lines, should be {};lines:{}".format( + atom_num + 2, lines + ) + ) yield lines def handle_single_log_frame(self, lines): - info_dict={} - energy_pattern_1 = re.compile(r' INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)') + info_dict = {} + energy_pattern_1 = re.compile( + r" INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)" + ) # CONSERVED QUANTITY [hartree] = -0.279168013085E+04 - energy_pattern_2 = re.compile(r' POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)') - energy=None - cell_length_pattern = re.compile(r' (INITIAL ){0,1}CELL LNTHS\[bohr\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_angle_pattern = re.compile(r' (INITIAL ){0,1}CELL ANGLS\[deg\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_A, cell_B, cell_C = (0,0,0,) - cell_alpha, cell_beta, cell_gamma=(0,0,0,) - cell_a_pattern = re.compile(r' CELL\| Vector a \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_b_pattern = re.compile(r' CELL\| Vector b \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_c_pattern = re.compile(r' CELL\| Vector c \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - force_start_pattern = re.compile(r' ATOMIC FORCES in') - force_flag=False - force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES') - force_lines= [] - cell_flag=0 - print_level_pattern = re.compile(r' GLOBAL\| Global print level\s+(?P\S+)') + energy_pattern_2 = re.compile( + r" POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)" + ) + energy = None + cell_length_pattern = re.compile( + r" (INITIAL ){0,1}CELL LNTHS\[bohr\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_angle_pattern = re.compile( + r" (INITIAL ){0,1}CELL ANGLS\[deg\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_A, cell_B, cell_C = ( + 0, + 0, + 0, + ) + cell_alpha, cell_beta, cell_gamma = ( + 0, + 0, + 0, + ) + cell_a_pattern = re.compile( + r" CELL\| Vector a \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_b_pattern = re.compile( + r" CELL\| Vector b \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_c_pattern = re.compile( + r" CELL\| Vector c \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + force_start_pattern = re.compile(r" ATOMIC FORCES in") + force_flag = False + force_end_pattern = re.compile(r" SUM OF ATOMIC FORCES") + force_lines = [] + cell_flag = 0 + print_level_pattern = re.compile( + r" GLOBAL\| Global print level\s+(?P\S+)" + ) print_level_flag = 0 - atomic_kinds_pattern = re.compile(r'\s+\d+\. Atomic kind:\s+(?P\S+)') - atomic_kinds = [] - stress_sign = 'STRESS' + atomic_kinds_pattern = re.compile(r"\s+\d+\. Atomic kind:\s+(?P\S+)") + atomic_kinds = [] + stress_sign = "STRESS" stress_flag = 0 stress = [] for line in lines: - if stress_flag == 3 : - if (line == '\n') : + if stress_flag == 3: + if line == "\n": stress_flag = 0 - else : + else: stress.append(line.split()[1:4]) - if stress_flag == 2 : + if stress_flag == 2: stress_flag = 3 - if stress_flag == 1 : + if stress_flag == 1: stress_flag = 2 - if (stress_sign in line): + if stress_sign in line: stress_flag = 1 if force_start_pattern.match(line): - force_flag=True + force_flag = True if force_end_pattern.match(line): - assert force_flag is True, (force_flag,'there may be errors in this file ') - force_flag=False + assert force_flag is True, ( + force_flag, + "there may be errors in this file ", + ) + force_flag = False if force_flag is True: - force_lines.append(line) + force_lines.append(line) if energy_pattern_1.match(line): - energy = float(energy_pattern_1.match(line).groupdict()['number']) * AU_TO_EV - #print('1to', energy) + energy = ( + float(energy_pattern_1.match(line).groupdict()["number"]) * AU_TO_EV + ) + # print('1to', energy) if energy_pattern_2.match(line): - energy = float(energy_pattern_2.match(line).groupdict()['number']) * AU_TO_EV + energy = ( + float(energy_pattern_2.match(line).groupdict()["number"]) * AU_TO_EV + ) if cell_length_pattern.match(line): - cell_A = float(cell_length_pattern.match(line).groupdict()['A']) * AU_TO_ANG - cell_B = float(cell_length_pattern.match(line).groupdict()['B']) * AU_TO_ANG - cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG - cell_flag+=1 + cell_A = ( + float(cell_length_pattern.match(line).groupdict()["A"]) * AU_TO_ANG + ) + cell_B = ( + float(cell_length_pattern.match(line).groupdict()["B"]) * AU_TO_ANG + ) + cell_C = ( + float(cell_length_pattern.match(line).groupdict()["C"]) * AU_TO_ANG + ) + cell_flag += 1 if cell_angle_pattern.match(line): - cell_alpha = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['alpha'])) - cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta'])) - cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma'])) - cell_flag+=1 + cell_alpha = np.deg2rad( + float(cell_angle_pattern.match(line).groupdict()["alpha"]) + ) + cell_beta = np.deg2rad( + float(cell_angle_pattern.match(line).groupdict()["beta"]) + ) + cell_gamma = np.deg2rad( + float(cell_angle_pattern.match(line).groupdict()["gamma"]) + ) + cell_flag += 1 if print_level_pattern.match(line): - print_level = print_level_pattern.match(line).groupdict()['print_level'] + print_level = print_level_pattern.match(line).groupdict()["print_level"] print_level_flag += 1 if cell_a_pattern.match(line): - cell_ax = float(cell_a_pattern.match(line).groupdict()['ax']) - cell_ay = float(cell_a_pattern.match(line).groupdict()['ay']) - cell_az = float(cell_a_pattern.match(line).groupdict()['az']) - cell_flag+=1 + cell_ax = float(cell_a_pattern.match(line).groupdict()["ax"]) + cell_ay = float(cell_a_pattern.match(line).groupdict()["ay"]) + cell_az = float(cell_a_pattern.match(line).groupdict()["az"]) + cell_flag += 1 if cell_b_pattern.match(line): - cell_bx = float(cell_b_pattern.match(line).groupdict()['bx']) - cell_by = float(cell_b_pattern.match(line).groupdict()['by']) - cell_bz = float(cell_b_pattern.match(line).groupdict()['bz']) - cell_flag+=1 + cell_bx = float(cell_b_pattern.match(line).groupdict()["bx"]) + cell_by = float(cell_b_pattern.match(line).groupdict()["by"]) + cell_bz = float(cell_b_pattern.match(line).groupdict()["bz"]) + cell_flag += 1 if cell_c_pattern.match(line): - cell_cx = float(cell_c_pattern.match(line).groupdict()['cx']) - cell_cy = float(cell_c_pattern.match(line).groupdict()['cy']) - cell_cz = float(cell_c_pattern.match(line).groupdict()['cz']) - cell_flag+=1 + cell_cx = float(cell_c_pattern.match(line).groupdict()["cx"]) + cell_cy = float(cell_c_pattern.match(line).groupdict()["cy"]) + cell_cz = float(cell_c_pattern.match(line).groupdict()["cz"]) + cell_flag += 1 if atomic_kinds_pattern.match(line): - akind = atomic_kinds_pattern.match(line).groupdict()['akind'] + akind = atomic_kinds_pattern.match(line).groupdict()["akind"] atomic_kinds.append(akind) if print_level_flag == 1: self.print_level = print_level - if print_level == 'LOW': - raise RuntimeError("please provide cp2k output with higher print level(at least MEDIUM)") - + if print_level == "LOW": + raise RuntimeError( + "please provide cp2k output with higher print level(at least MEDIUM)" + ) if cell_flag == 2: - self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C, - cell_alpha,cell_beta,cell_gamma) + self.cell = cell_to_low_triangle( + cell_A, cell_B, cell_C, cell_alpha, cell_beta, cell_gamma + ) elif cell_flag == 5: self.cell = np.asarray( [ [cell_ax, cell_ay, cell_az], - [cell_bx, cell_by, cell_bz], - [cell_cx, cell_cy, cell_cz]] - ).astype('float32') + [cell_bx, cell_by, cell_bz], + [cell_cx, cell_cy, cell_cz], + ] + ).astype("float32") if atomic_kinds: self.atomic_kinds = atomic_kinds - #print(self.atomic_kinds) - # lx = cell_A - # xy = cell_B * np.cos(cell_gamma) - # xz = cell_C * np.cos(cell_beta) - # ly = cell_B* np.sin(cell_gamma) - # yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly - # lz = np.sqrt(cell_C**2-xz**2-yz**2) - # self.cell = [[lx, 0 , 0], - # [xy, ly, 0 ], - # [xz, yz, lz]] + # print(self.atomic_kinds) + # lx = cell_A + # xy = cell_B * np.cos(cell_gamma) + # xz = cell_C * np.cos(cell_beta) + # ly = cell_B* np.sin(cell_gamma) + # yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly + # lz = np.sqrt(cell_C**2-xz**2-yz**2) + # self.cell = [[lx, 0 , 0], + # [xy, ly, 0 ], + # [xz, yz, lz]] element_index = -1 element_dict = OrderedDict() @@ -215,58 +276,66 @@ def handle_single_log_frame(self, lines): forces_list = [] for line in force_lines[3:]: line_list = line.split() - #print(line_list) + # print(line_list) if element_dict.get(line_list[1]): - element_dict[line_list[1]][1]+=1 + element_dict[line_list[1]][1] += 1 else: - element_index +=1 - element_dict[line_list[1]]=[element_index,1] + element_index += 1 + element_dict[line_list[1]] = [element_index, 1] atom_types_idx_list.append(element_dict[line_list[1]][0]) - forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG, - float(line_list[4])*AU_TO_EV_EVERY_ANG, - float(line_list[5])*AU_TO_EV_EVERY_ANG]) - #print(atom_types_idx_list) - #atom_names=list(element_dict.keys()) - atom_names=self.atomic_kinds - atom_numbs=[] + forces_list.append( + [ + float(line_list[3]) * AU_TO_EV_EVERY_ANG, + float(line_list[4]) * AU_TO_EV_EVERY_ANG, + float(line_list[5]) * AU_TO_EV_EVERY_ANG, + ] + ) + # print(atom_types_idx_list) + # atom_names=list(element_dict.keys()) + atom_names = self.atomic_kinds + atom_numbs = [] GPa = PressureConversion("eV/angstrom^3", "GPa").value() if stress: stress = np.array(stress) - stress = stress.astype('float32') + stress = stress.astype("float32") stress = stress[np.newaxis, :, :] # stress to virial conversion, default unit in cp2k is GPa # note the stress is virial = stress * volume - virial = stress * np.linalg.det(self.cell)/GPa + virial = stress * np.linalg.det(self.cell) / GPa virial = virial.squeeze() else: virial = None for ii in element_dict.keys(): atom_numbs.append(element_dict[ii][1]) - #print(atom_numbs) - info_dict['atom_names'] = atom_names - info_dict['atom_numbs'] = atom_numbs - info_dict['atom_types'] = np.asarray(atom_types_idx_list) - info_dict['print_level'] = self.print_level - info_dict['cells'] = np.asarray([self.cell]).astype('float32') - info_dict['energies'] = np.asarray([energy]).astype('float32') - info_dict['forces'] = np.asarray([forces_list]).astype('float32') - if(virial is not None ): - info_dict['virials'] = np.asarray([virial]).astype('float32') + # print(atom_numbs) + info_dict["atom_names"] = atom_names + info_dict["atom_numbs"] = atom_numbs + info_dict["atom_types"] = np.asarray(atom_types_idx_list) + info_dict["print_level"] = self.print_level + info_dict["cells"] = np.asarray([self.cell]).astype("float32") + info_dict["energies"] = np.asarray([energy]).astype("float32") + info_dict["forces"] = np.asarray([forces_list]).astype("float32") + if virial is not None: + info_dict["virials"] = np.asarray([virial]).astype("float32") return info_dict def handle_single_xyz_frame(self, lines): info_dict = {} - atom_num = int(lines[0].strip('\n').strip()) + atom_num = int(lines[0].strip("\n").strip()) if len(lines) != atom_num + 2: - raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines))) - data_format_line = lines[1].strip('\n').strip()+str(' ') - prop_pattern = re.compile(r'(?P\w+)\s*=\s*(?P.*?)[, ]') + raise RuntimeError( + "format error, atom_num=={}, {}!=atom_num+2".format( + atom_num, len(lines) + ) + ) + data_format_line = lines[1].strip("\n").strip() + str(" ") + prop_pattern = re.compile(r"(?P\w+)\s*=\s*(?P.*?)[, ]") prop_dict = dict(prop_pattern.findall(data_format_line)) - energy=0 - if prop_dict.get('E'): - energy = float(prop_dict.get('E')) * AU_TO_EV + energy = 0 + if prop_dict.get("E"): + energy = float(prop_dict.get("E")) * AU_TO_EV # info_dict['energies'] = np.array([prop_dict['E']]).astype('float32') element_index = -1 @@ -276,32 +345,34 @@ def handle_single_xyz_frame(self, lines): for line in lines[2:]: line_list = line.split() if element_dict.get(line_list[0]): - element_dict[line_list[0]][1]+=1 + element_dict[line_list[0]][1] += 1 else: - element_index +=1 - element_dict[line_list[0]]=[element_index,1] + element_index += 1 + element_dict[line_list[0]] = [element_index, 1] atom_types_list.append(element_dict[line_list[0]][0]) # coords_list.append([float(line_list[1])*AU_TO_ANG, # float(line_list[2])*AU_TO_ANG, # float(line_list[3])*AU_TO_ANG]) - coords_list.append([float(line_list[1]), - float(line_list[2]), - float(line_list[3])]) - atom_names=list(element_dict.keys()) - atom_numbs=[] + coords_list.append( + [float(line_list[1]), float(line_list[2]), float(line_list[3])] + ) + atom_names = list(element_dict.keys()) + atom_numbs = [] for ii in atom_names: atom_numbs.append(element_dict[ii][1]) - #info_dict['atom_names'] = atom_names - #info_dict['atom_numbs'] = atom_numbs - #info_dict['atom_types'] = np.asarray(atom_types_list) - info_dict['coords'] = np.asarray([coords_list]).astype('float32') - info_dict['energies'] = np.array([energy]).astype('float32') - info_dict['orig'] = np.zeros(3) + # info_dict['atom_names'] = atom_names + # info_dict['atom_numbs'] = atom_numbs + # info_dict['atom_types'] = np.asarray(atom_types_list) + info_dict["coords"] = np.asarray([coords_list]).astype("float32") + info_dict["energies"] = np.array([energy]).astype("float32") + info_dict["orig"] = np.zeros(3) return info_dict + #%% -def get_frames (fname) : + +def get_frames(fname): coord_flag = False force_flag = False stress_flag = False @@ -315,75 +386,73 @@ def get_frames (fname) : force = [] stress = [] - fp = open(fname) # check if output is converged, if not, return sys = 0 content = fp.read() - count = content.count('SCF run converged') + count = content.count("SCF run converged") if count == 0: return [], [], [], [], [], [], [], None - # search duplicated header + # search duplicated header fp.seek(0) header_idx = [] - for idx, ii in enumerate(fp) : - if 'Multiplication driver' in ii : + for idx, ii in enumerate(fp): + if "Multiplication driver" in ii: header_idx.append(idx) # parse from last header fp.seek(0) - for idx, ii in enumerate(fp) : - if idx > header_idx[-1] : - if 'CELL| Vector' in ii: + for idx, ii in enumerate(fp): + if idx > header_idx[-1]: + if "CELL| Vector" in ii: cell.append(ii.split()[4:7]) - if 'Atomic kind:' in ii: + if "Atomic kind:" in ii: atom_symbol_list.append(ii.split()[3]) - if 'Atom Kind Element' in ii : + if "Atom Kind Element" in ii: coord_flag = True coord_idx = idx - + # get the coord block info - if coord_flag : - if (idx > coord_idx + 1) : - if (ii == '\n') : + if coord_flag: + if idx > coord_idx + 1: + if ii == "\n": coord_flag = False - else : + else: coord.append(ii.split()[4:7]) atom_symbol_idx_list.append(ii.split()[1]) - if 'ENERGY|' in ii : - energy = (ii.split()[8]) - if ' Atom Kind ' in ii : + if "ENERGY|" in ii: + energy = ii.split()[8] + if " Atom Kind " in ii: force_flag = True force_idx = idx - if force_flag : - if (idx > force_idx) : - if 'SUM OF ATOMIC FORCES' in ii : + if force_flag: + if idx > force_idx: + if "SUM OF ATOMIC FORCES" in ii: force_flag = False - else : + else: force.append(ii.split()[3:6]) # add reading stress tensor - if 'STRESS TENSOR [GPa' in ii : + if "STRESS TENSOR [GPa" in ii: stress_flag = True stress_idx = idx - if stress_flag : - if (idx > stress_idx + 2): - if (ii == '\n') : + if stress_flag: + if idx > stress_idx + 2: + if ii == "\n": stress_flag = False - else : + else: stress.append(ii.split()[1:4]) - fp.close() - assert(coord), "cannot find coords" - assert(energy), "cannot find energies" - assert(force), "cannot find forces" + assert coord, "cannot find coords" + assert energy, "cannot find energies" + assert force, "cannot find forces" - #conver to float array and add extra dimension for nframes + # conver to float array and add extra dimension for nframes cell = np.array(cell) - cell = cell.astype('float32') + cell = cell.astype("float32") cell = cell[np.newaxis, :, :] coord = np.array(coord) - coord = coord.astype('float32') + coord = coord.astype("float32") coord = coord[np.newaxis, :, :] atom_symbol_idx_list = np.array(atom_symbol_idx_list) atom_symbol_idx_list = atom_symbol_idx_list.astype(int) @@ -391,17 +460,17 @@ def get_frames (fname) : atom_symbol_list = np.array(atom_symbol_list) atom_symbol_list = atom_symbol_list[atom_symbol_idx_list] force = np.array(force) - force = force.astype('float32') + force = force.astype("float32") force = force[np.newaxis, :, :] # virial is not necessary if stress: stress = np.array(stress) - stress = stress.astype('float32') + stress = stress.astype("float32") stress = stress[np.newaxis, :, :] # stress to virial conversion, default unit in cp2k is GPa # note the stress is virial = stress * volume - virial = stress * np.linalg.det(cell[0])/GPa + virial = stress * np.linalg.det(cell[0]) / GPa else: virial = None @@ -409,18 +478,17 @@ def get_frames (fname) : force = force * eV / angstrom # energy unit conversion, default unit in cp2k is hartree energy = float(energy) * eV - energy = np.array(energy).astype('float32') + energy = np.array(energy).astype("float32") energy = energy[np.newaxis] - tmp_names, symbol_idx = np.unique(atom_symbol_list, return_index=True) atom_types = [] atom_numbs = [] - #preserve the atom_name order + # preserve the atom_name order atom_names = atom_symbol_list[np.sort(symbol_idx)] for jj in atom_symbol_list: for idx, ii in enumerate(atom_names): - if (jj == ii) : + if jj == ii: atom_types.append(idx) for idx in range(len(atom_names)): atom_numbs.append(atom_types.count(idx)) @@ -430,6 +498,4 @@ def get_frames (fname) : return list(atom_names), atom_numbs, atom_types, cell, coord, energy, force, virial - - # %% diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index 3ac239b0..a5eb7334 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -1,117 +1,124 @@ -import os,glob,shutil +import os, glob, shutil import numpy as np from .raw import load_type -def _cond_load_data(fname) : + +def _cond_load_data(fname): tmp = None - if os.path.isfile(fname) : + if os.path.isfile(fname): tmp = np.load(fname) return tmp -def _load_set(folder, nopbc: bool) : - coords = np.load(os.path.join(folder, 'coord.npy')) + +def _load_set(folder, nopbc: bool): + coords = np.load(os.path.join(folder, "coord.npy")) if nopbc: - cells = np.zeros((coords.shape[0], 3,3)) + cells = np.zeros((coords.shape[0], 3, 3)) else: - cells = np.load(os.path.join(folder, 'box.npy')) - eners = _cond_load_data(os.path.join(folder, 'energy.npy')) - forces = _cond_load_data(os.path.join(folder, 'force.npy')) - virs = _cond_load_data(os.path.join(folder, 'virial.npy')) + cells = np.load(os.path.join(folder, "box.npy")) + eners = _cond_load_data(os.path.join(folder, "energy.npy")) + forces = _cond_load_data(os.path.join(folder, "force.npy")) + virs = _cond_load_data(os.path.join(folder, "virial.npy")) return cells, coords, eners, forces, virs -def to_system_data(folder, - type_map = None, - labels = True) : + +def to_system_data(folder, type_map=None, labels=True): # data is empty - data = load_type(folder, type_map = type_map) - data['orig'] = np.zeros([3]) + data = load_type(folder, type_map=type_map) + data["orig"] = np.zeros([3]) if os.path.isfile(os.path.join(folder, "nopbc")): - data['nopbc'] = True - sets = sorted(glob.glob(os.path.join(folder, 'set.*'))) + data["nopbc"] = True + sets = sorted(glob.glob(os.path.join(folder, "set.*"))) all_cells = [] all_coords = [] all_eners = [] all_forces = [] all_virs = [] - for ii in sets : - cells, coords, eners, forces, virs = _load_set(ii, data.get('nopbc', False)) - nframes = np.reshape(cells, [-1,3,3]).shape[0] - all_cells.append(np.reshape(cells, [nframes,3,3])) - all_coords.append(np.reshape(coords, [nframes,-1,3])) + for ii in sets: + cells, coords, eners, forces, virs = _load_set(ii, data.get("nopbc", False)) + nframes = np.reshape(cells, [-1, 3, 3]).shape[0] + all_cells.append(np.reshape(cells, [nframes, 3, 3])) + all_coords.append(np.reshape(coords, [nframes, -1, 3])) if eners is not None: eners = np.reshape(eners, [nframes]) if labels: if eners is not None and eners.size > 0: all_eners.append(np.reshape(eners, [nframes])) if forces is not None and forces.size > 0: - all_forces.append(np.reshape(forces, [nframes,-1,3])) + all_forces.append(np.reshape(forces, [nframes, -1, 3])) if virs is not None and virs.size > 0: - all_virs.append(np.reshape(virs, [nframes,3,3])) - data['cells'] = np.concatenate(all_cells, axis = 0) - data['coords'] = np.concatenate(all_coords, axis = 0) - if len(all_eners) > 0 : - data['energies'] = np.concatenate(all_eners, axis = 0) - if len(all_forces) > 0 : - data['forces'] = np.concatenate(all_forces, axis = 0) + all_virs.append(np.reshape(virs, [nframes, 3, 3])) + data["cells"] = np.concatenate(all_cells, axis=0) + data["coords"] = np.concatenate(all_coords, axis=0) + if len(all_eners) > 0: + data["energies"] = np.concatenate(all_eners, axis=0) + if len(all_forces) > 0: + data["forces"] = np.concatenate(all_forces, axis=0) if len(all_virs) > 0: - data['virials'] = np.concatenate(all_virs, axis = 0) + data["virials"] = np.concatenate(all_virs, axis=0) return data -def dump(folder, - data, - set_size = 5000, - comp_prec = np.float32, - remove_sets = True) : - os.makedirs(folder, exist_ok = True) - sets = sorted(glob.glob(os.path.join(folder, 'set.*'))) +def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): + os.makedirs(folder, exist_ok=True) + sets = sorted(glob.glob(os.path.join(folder, "set.*"))) if len(sets) > 0: - if remove_sets : - for ii in sets : + if remove_sets: + for ii in sets: shutil.rmtree(ii) - else : - raise RuntimeError('found ' + str(sets) + ' in ' + folder + 'not a clean deepmd raw dir. please firstly clean set.* then try compress') - # dump raw - np.savetxt(os.path.join(folder, 'type.raw'), data['atom_types'], fmt = '%d') - np.savetxt(os.path.join(folder, 'type_map.raw'), data['atom_names'], fmt = '%s') + else: + raise RuntimeError( + "found " + + str(sets) + + " in " + + folder + + "not a clean deepmd raw dir. please firstly clean set.* then try compress" + ) + # dump raw + np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d") + np.savetxt(os.path.join(folder, "type_map.raw"), data["atom_names"], fmt="%s") # BondOrder System if "bonds" in data: - np.savetxt(os.path.join(folder, "bonds.raw"), data['bonds'], header="begin_atom, end_atom, bond_order") + np.savetxt( + os.path.join(folder, "bonds.raw"), + data["bonds"], + header="begin_atom, end_atom, bond_order", + ) if "formal_charges" in data: - np.savetxt(os.path.join(folder, "formal_charges.raw"), data['formal_charges']) + np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"]) # reshape frame properties and convert prec - nframes = data['cells'].shape[0] - cells = np.reshape(data['cells'], [nframes, 9]).astype(comp_prec) - coords = np.reshape(data['coords'], [nframes, -1]).astype(comp_prec) + nframes = data["cells"].shape[0] + cells = np.reshape(data["cells"], [nframes, 9]).astype(comp_prec) + coords = np.reshape(data["coords"], [nframes, -1]).astype(comp_prec) eners = None forces = None virials = None - if 'energies' in data: - eners = np.reshape(data['energies'], [nframes ]).astype(comp_prec) - if 'forces' in data: - forces = np.reshape(data['forces'], [nframes, -1]).astype(comp_prec) - if 'virials' in data : - virials = np.reshape(data['virials'], [nframes, 9]).astype(comp_prec) - if 'atom_pref' in data: - atom_pref = np.reshape(data['atom_pref'], [nframes, -1]).astype(comp_prec) + if "energies" in data: + eners = np.reshape(data["energies"], [nframes]).astype(comp_prec) + if "forces" in data: + forces = np.reshape(data["forces"], [nframes, -1]).astype(comp_prec) + if "virials" in data: + virials = np.reshape(data["virials"], [nframes, 9]).astype(comp_prec) + if "atom_pref" in data: + atom_pref = np.reshape(data["atom_pref"], [nframes, -1]).astype(comp_prec) # dump frame properties: cell, coord, energy, force and virial nsets = nframes // set_size - if set_size * nsets < nframes : + if set_size * nsets < nframes: nsets += 1 - for ii in range(nsets) : + for ii in range(nsets): set_stt = ii * set_size - set_end = (ii+1) * set_size - set_folder = os.path.join(folder, 'set.%03d' % ii) + set_end = (ii + 1) * set_size + set_folder = os.path.join(folder, "set.%03d" % ii) os.makedirs(set_folder) - np.save(os.path.join(set_folder, 'box'), cells [set_stt:set_end]) - np.save(os.path.join(set_folder, 'coord'), coords [set_stt:set_end]) + np.save(os.path.join(set_folder, "box"), cells[set_stt:set_end]) + np.save(os.path.join(set_folder, "coord"), coords[set_stt:set_end]) if eners is not None: - np.save(os.path.join(set_folder, 'energy'), eners [set_stt:set_end]) + np.save(os.path.join(set_folder, "energy"), eners[set_stt:set_end]) if forces is not None: - np.save(os.path.join(set_folder, 'force'), forces [set_stt:set_end]) + np.save(os.path.join(set_folder, "force"), forces[set_stt:set_end]) if virials is not None: - np.save(os.path.join(set_folder, 'virial'), virials[set_stt:set_end]) - if 'atom_pref' in data: + np.save(os.path.join(set_folder, "virial"), virials[set_stt:set_end]) + if "atom_pref" in data: np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end]) try: os.remove(os.path.join(folder, "nopbc")) @@ -120,4 +127,3 @@ def dump(folder, if data.get("nopbc", False): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass - diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py index 0e807f05..1e44b790 100644 --- a/dpdata/deepmd/hdf5.py +++ b/dpdata/deepmd/hdf5.py @@ -7,12 +7,15 @@ from wcmatch.glob import globfilter -__all__ = ['to_system_data', 'dump'] +__all__ = ["to_system_data", "dump"] -def to_system_data(f: Union[h5py.File, h5py.Group], - folder: str, - type_map: Optional[list] = None, - labels: bool = True) : + +def to_system_data( + f: Union[h5py.File, h5py.Group], + folder: str, + type_map: Optional[list] = None, + labels: bool = True, +): """Load a HDF5 file. Parameters @@ -29,67 +32,94 @@ def to_system_data(f: Union[h5py.File, h5py.Group], g = f[folder] if folder else f data = {} - data['atom_types'] = g['type.raw'][:] - ntypes = np.max(data['atom_types']) + 1 - natoms = data['atom_types'].size - data['atom_numbs'] = [] - for ii in range (ntypes) : - data['atom_numbs'].append(np.count_nonzero(data['atom_types'] == ii)) - data['atom_names'] = [] + data["atom_types"] = g["type.raw"][:] + ntypes = np.max(data["atom_types"]) + 1 + natoms = data["atom_types"].size + data["atom_numbs"] = [] + for ii in range(ntypes): + data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) + data["atom_names"] = [] # if find type_map.raw, use it - if 'type_map.raw' in g.keys(): - my_type_map = list(np.char.decode(g['type_map.raw'][:])) - # else try to use arg type_map + if "type_map.raw" in g.keys(): + my_type_map = list(np.char.decode(g["type_map.raw"][:])) + # else try to use arg type_map elif type_map is not None: my_type_map = type_map # in the last case, make artificial atom names else: my_type_map = [] - for ii in range(ntypes) : - my_type_map.append('Type_%d' % ii) - assert(len(my_type_map) >= len(data['atom_numbs'])) - for ii in range(len(data['atom_numbs'])) : - data['atom_names'].append(my_type_map[ii]) + for ii in range(ntypes): + my_type_map.append("Type_%d" % ii) + assert len(my_type_map) >= len(data["atom_numbs"]) + for ii in range(len(data["atom_numbs"])): + data["atom_names"].append(my_type_map[ii]) - data['orig'] = np.zeros([3]) - if 'nopbc' in g.keys(): - data['nopbc'] = True - sets = globfilter(g.keys(), 'set.*') + data["orig"] = np.zeros([3]) + if "nopbc" in g.keys(): + data["nopbc"] = True + sets = globfilter(g.keys(), "set.*") data_types = { - 'cells': {'fn': 'box', 'labeled': False, 'shape': (3,3), 'required': 'nopbc' not in data}, - 'coords': {'fn': 'coord', 'labeled': False, 'shape': (natoms,3), 'required': True}, - 'energies': {'fn': 'energy', 'labeled': True, 'shape': tuple(), 'required': False}, - 'forces': {'fn': 'force', 'labeled': True, 'shape': (natoms,3), 'required': False}, - 'virials': {'fn': 'virial', 'labeled': True, 'shape': (3,3), 'required': False}, + "cells": { + "fn": "box", + "labeled": False, + "shape": (3, 3), + "required": "nopbc" not in data, + }, + "coords": { + "fn": "coord", + "labeled": False, + "shape": (natoms, 3), + "required": True, + }, + "energies": { + "fn": "energy", + "labeled": True, + "shape": tuple(), + "required": False, + }, + "forces": { + "fn": "force", + "labeled": True, + "shape": (natoms, 3), + "required": False, + }, + "virials": { + "fn": "virial", + "labeled": True, + "shape": (3, 3), + "required": False, + }, } - + for dt, prop in data_types.items(): all_data = [] for ii in sets: set = g[ii] - fn = '%s.npy' % prop['fn'] + fn = "%s.npy" % prop["fn"] if fn in set.keys(): dd = set[fn][:] nframes = dd.shape[0] - all_data.append(np.reshape(dd, (nframes, *prop['shape']))) - elif prop['required']: + all_data.append(np.reshape(dd, (nframes, *prop["shape"]))) + elif prop["required"]: raise RuntimeError("%s/%s/%s not found" % (folder, ii, fn)) - - if len(all_data) > 0 : - data[dt] = np.concatenate(all_data, axis = 0) - if 'cells' not in data: - nframes = data['coords'].shape[0] - data['cells'] = np.zeros((nframes, 3, 3)) + + if len(all_data) > 0: + data[dt] = np.concatenate(all_data, axis=0) + if "cells" not in data: + nframes = data["coords"].shape[0] + data["cells"] = np.zeros((nframes, 3, 3)) return data -def dump(f: Union[h5py.File, h5py.Group], - folder: str, - data: dict, - set_size = 5000, - comp_prec = np.float32, - ) -> None: + +def dump( + f: Union[h5py.File, h5py.Group], + folder: str, + data: dict, + set_size=5000, + comp_prec=np.float32, +) -> None: """Dump data to a HDF5 file. Parameters @@ -113,42 +143,46 @@ def dump(f: Union[h5py.File, h5py.Group], else: g = f # dump raw (array in fact) - g.create_dataset('type.raw', data=data['atom_types']) - g.create_dataset('type_map.raw', data=np.array(data['atom_names'], dtype='S')) + g.create_dataset("type.raw", data=data["atom_types"]) + g.create_dataset("type_map.raw", data=np.array(data["atom_names"], dtype="S")) # BondOrder System if "bonds" in data: - g.create_dataset("bonds.raw", data=data['bonds']) + g.create_dataset("bonds.raw", data=data["bonds"]) if "formal_charges" in data: - g.create_dataset("formal_charges.raw", data=data['formal_charges']) + g.create_dataset("formal_charges.raw", data=data["formal_charges"]) # reshape frame properties and convert prec - nframes = data['cells'].shape[0] + nframes = data["cells"].shape[0] nopbc = data.get("nopbc", False) reshaped_data = {} data_types = { - 'cells': {'fn': 'box', 'shape': (nframes, 9), 'dump': not nopbc}, - 'coords': {'fn': 'coord', 'shape': (nframes, -1), 'dump': True}, - 'energies': {'fn': 'energy', 'shape': (nframes,), 'dump': True}, - 'forces': {'fn': 'force', 'shape': (nframes, -1), 'dump': True}, - 'virials': {'fn': 'virial', 'shape': (nframes, 9), 'dump': True}, + "cells": {"fn": "box", "shape": (nframes, 9), "dump": not nopbc}, + "coords": {"fn": "coord", "shape": (nframes, -1), "dump": True}, + "energies": {"fn": "energy", "shape": (nframes,), "dump": True}, + "forces": {"fn": "force", "shape": (nframes, -1), "dump": True}, + "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True}, } for dt, prop in data_types.items(): if dt in data: - if prop['dump']: - reshaped_data[dt] = np.reshape(data[dt], prop['shape']).astype(comp_prec) + if prop["dump"]: + reshaped_data[dt] = np.reshape(data[dt], prop["shape"]).astype( + comp_prec + ) # dump frame properties: cell, coord, energy, force and virial nsets = nframes // set_size - if set_size * nsets < nframes : + if set_size * nsets < nframes: nsets += 1 - for ii in range(nsets) : + for ii in range(nsets): set_stt = ii * set_size - set_end = (ii+1) * set_size - set_folder = g.create_group('set.%03d' % ii) + set_end = (ii + 1) * set_size + set_folder = g.create_group("set.%03d" % ii) for dt, prop in data_types.items(): if dt in reshaped_data: - set_folder.create_dataset('%s.npy' % prop['fn'], data=reshaped_data[dt][set_stt:set_end]) + set_folder.create_dataset( + "%s.npy" % prop["fn"], data=reshaped_data[dt][set_stt:set_end] + ) if nopbc: - g.create_dataset("nopbc", data=True) + g.create_dataset("nopbc", data=True) diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index 49744d0e..7de14baa 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -1,83 +1,99 @@ import os import numpy as np -def load_type(folder, type_map = None) : + +def load_type(folder, type_map=None): data = {} - data['atom_types'] \ - = np.loadtxt(os.path.join(folder, 'type.raw'), ndmin=1).astype(int) - ntypes = np.max(data['atom_types']) + 1 - data['atom_numbs'] = [] - for ii in range (ntypes) : - data['atom_numbs'].append(np.count_nonzero(data['atom_types'] == ii)) - data['atom_names'] = [] + data["atom_types"] = np.loadtxt(os.path.join(folder, "type.raw"), ndmin=1).astype( + int + ) + ntypes = np.max(data["atom_types"]) + 1 + data["atom_numbs"] = [] + for ii in range(ntypes): + data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) + data["atom_names"] = [] # if find type_map.raw, use it - if os.path.isfile(os.path.join(folder, 'type_map.raw')) : - with open(os.path.join(folder, 'type_map.raw')) as fp: + if os.path.isfile(os.path.join(folder, "type_map.raw")): + with open(os.path.join(folder, "type_map.raw")) as fp: my_type_map = fp.read().split() - # else try to use arg type_map + # else try to use arg type_map elif type_map is not None: my_type_map = type_map # in the last case, make artificial atom names else: my_type_map = [] - for ii in range(ntypes) : - my_type_map.append('Type_%d' % ii) - assert(len(my_type_map) >= len(data['atom_numbs'])) - for ii in range(len(data['atom_numbs'])) : - data['atom_names'].append(my_type_map[ii]) + for ii in range(ntypes): + my_type_map.append("Type_%d" % ii) + assert len(my_type_map) >= len(data["atom_numbs"]) + for ii in range(len(data["atom_numbs"])): + data["atom_names"].append(my_type_map[ii]) return data -def to_system_data(folder, type_map = None, labels = True) : - if os.path.isdir(folder) : - data = load_type(folder, type_map = type_map) - data['orig'] = np.zeros([3]) - data['coords'] = np.loadtxt(os.path.join(folder, 'coord.raw'), ndmin=2) - nframes = data['coords'].shape[0] +def to_system_data(folder, type_map=None, labels=True): + if os.path.isdir(folder): + data = load_type(folder, type_map=type_map) + data["orig"] = np.zeros([3]) + data["coords"] = np.loadtxt(os.path.join(folder, "coord.raw"), ndmin=2) + nframes = data["coords"].shape[0] if os.path.isfile(os.path.join(folder, "nopbc")): - data['nopbc'] = True - data['cells'] = np.zeros((nframes, 3,3)) + data["nopbc"] = True + data["cells"] = np.zeros((nframes, 3, 3)) else: - data['cells'] = np.loadtxt(os.path.join(folder, 'box.raw'), ndmin=2) - data['cells'] = np.reshape(data['cells'], [nframes, 3, 3]) - data['coords'] = np.reshape(data['coords'], [nframes, -1, 3]) - if labels : - if os.path.exists(os.path.join(folder, 'energy.raw')) : - data['energies'] = np.loadtxt(os.path.join(folder, 'energy.raw')) - data['energies'] = np.reshape(data['energies'], [nframes]) - if os.path.exists(os.path.join(folder, 'force.raw')) : - data['forces'] = np.loadtxt(os.path.join(folder, 'force.raw')) - data['forces'] = np.reshape(data['forces'], [nframes, -1, 3]) - if os.path.exists(os.path.join(folder, 'virial.raw')) : - data['virials'] = np.loadtxt(os.path.join(folder, 'virial.raw')) - data['virials'] = np.reshape(data['virials'], [nframes, 3, 3]) + data["cells"] = np.loadtxt(os.path.join(folder, "box.raw"), ndmin=2) + data["cells"] = np.reshape(data["cells"], [nframes, 3, 3]) + data["coords"] = np.reshape(data["coords"], [nframes, -1, 3]) + if labels: + if os.path.exists(os.path.join(folder, "energy.raw")): + data["energies"] = np.loadtxt(os.path.join(folder, "energy.raw")) + data["energies"] = np.reshape(data["energies"], [nframes]) + if os.path.exists(os.path.join(folder, "force.raw")): + data["forces"] = np.loadtxt(os.path.join(folder, "force.raw")) + data["forces"] = np.reshape(data["forces"], [nframes, -1, 3]) + if os.path.exists(os.path.join(folder, "virial.raw")): + data["virials"] = np.loadtxt(os.path.join(folder, "virial.raw")) + data["virials"] = np.reshape(data["virials"], [nframes, 3, 3]) if os.path.isfile(os.path.join(folder, "nopbc")): - data['nopbc'] = True + data["nopbc"] = True return data - else : - raise RuntimeError('not dir ' + folder) + else: + raise RuntimeError("not dir " + folder) -def dump (folder, data) : - os.makedirs(folder, exist_ok = True) - nframes = data['cells'].shape[0] - np.savetxt(os.path.join(folder, 'type.raw'), data['atom_types'], fmt = '%d') - np.savetxt(os.path.join(folder, 'type_map.raw'), data['atom_names'], fmt = '%s') - np.savetxt(os.path.join(folder, 'box.raw'), np.reshape(data['cells'], [nframes, 9])) - np.savetxt(os.path.join(folder, 'coord.raw'), np.reshape(data['coords'], [nframes, -1])) +def dump(folder, data): + os.makedirs(folder, exist_ok=True) + nframes = data["cells"].shape[0] + np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d") + np.savetxt(os.path.join(folder, "type_map.raw"), data["atom_names"], fmt="%s") + np.savetxt(os.path.join(folder, "box.raw"), np.reshape(data["cells"], [nframes, 9])) + np.savetxt( + os.path.join(folder, "coord.raw"), np.reshape(data["coords"], [nframes, -1]) + ) # BondOrder System if "bonds" in data: - np.savetxt(os.path.join(folder, "bonds.raw"), data['bonds'], header="begin_atom, end_atom, bond_order") + np.savetxt( + os.path.join(folder, "bonds.raw"), + data["bonds"], + header="begin_atom, end_atom, bond_order", + ) if "formal_charges" in data: - np.savetxt(os.path.join(folder, "formal_charges.raw"), data['formal_charges']) + np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"]) # Labeled System - if 'energies' in data : - np.savetxt(os.path.join(folder, 'energy.raw'), np.reshape(data['energies'], [nframes, 1])) - if 'forces' in data : - np.savetxt(os.path.join(folder, 'force.raw'), np.reshape(data['forces'], [nframes, -1])) - if 'virials' in data : - np.savetxt(os.path.join(folder, 'virial.raw'), np.reshape(data['virials'], [nframes, 9])) + if "energies" in data: + np.savetxt( + os.path.join(folder, "energy.raw"), + np.reshape(data["energies"], [nframes, 1]), + ) + if "forces" in data: + np.savetxt( + os.path.join(folder, "force.raw"), np.reshape(data["forces"], [nframes, -1]) + ) + if "virials" in data: + np.savetxt( + os.path.join(folder, "virial.raw"), + np.reshape(data["virials"], [nframes, 9]), + ) try: os.remove(os.path.join(folder, "nopbc")) except OSError: @@ -85,4 +101,3 @@ def dump (folder, data) : if data.get("nopbc", False): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass - diff --git a/dpdata/driver.py b/dpdata/driver.py index 670b0337..0f903947 100644 --- a/dpdata/driver.py +++ b/dpdata/driver.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: import ase + class Driver(ABC): """The base class for a driver plugin. A driver can label a pure System to generate the LabeledSystem. @@ -14,17 +15,18 @@ class Driver(ABC): -------- dpdata.plugins.deepmd.DPDriver : an example of Driver """ + __DriverPlugin = Plugin() @staticmethod def register(key: str) -> Callable: """Register a driver plugin. Used as decorators. - + Parameter --------- key: str key of the plugin. - + Returns ------- Callable @@ -41,17 +43,17 @@ def register(key: str) -> Callable: @staticmethod def get_driver(key: str) -> "Driver": """Get a driver plugin. - + Parameter --------- key: str key of the plugin. - + Returns ------- Driver the specific driver class - + Raises ------ RuntimeError @@ -60,20 +62,20 @@ def get_driver(key: str) -> "Driver": try: return Driver.__DriverPlugin.plugins[key] except KeyError as e: - raise RuntimeError('Unknown driver: ' + key) from e - + raise RuntimeError("Unknown driver: " + key) from e + def __init__(self, *args, **kwargs) -> None: """Setup the driver.""" @abstractmethod def label(self, data: dict) -> dict: """Label a system data. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -85,6 +87,7 @@ def label(self, data: dict) -> dict: def ase_calculator(self) -> "ase.calculators.calculator.Calculator": """Returns an ase calculator based on this driver.""" from .ase_calculator import DPDataCalculator + return DPDataCalculator(self) @@ -112,6 +115,7 @@ class HybridDriver(Driver): ... ]) This driver is the hybrid of SQM and DP. """ + def __init__(self, drivers: List[Union[dict, Driver]]) -> None: self.drivers = [] for driver in drivers: @@ -128,12 +132,12 @@ def label(self, data: dict) -> dict: """Label a system data. Energies and forces are the sum of those of each driver. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -144,8 +148,8 @@ def label(self, data: dict) -> dict: if ii == 0: labeled_data = lb_data.copy() else: - labeled_data['energies'] += lb_data ['energies'] - labeled_data['forces'] += lb_data ['forces'] + labeled_data["energies"] += lb_data["energies"] + labeled_data["forces"] += lb_data["forces"] return labeled_data @@ -153,17 +157,18 @@ class Minimizer(ABC): """The base class for a minimizer plugin. A minimizer can minimize geometry. """ + __MinimizerPlugin = Plugin() @staticmethod def register(key: str) -> Callable: """Register a minimizer plugin. Used as decorators. - + Parameter --------- key: str key of the plugin. - + Returns ------- Callable @@ -180,17 +185,17 @@ def register(key: str) -> Callable: @staticmethod def get_minimizer(key: str) -> "Minimizer": """Get a minimizer plugin. - + Parameter --------- key: str key of the plugin. - + Returns ------- Minimizer the specific minimizer class - + Raises ------ RuntimeError @@ -199,7 +204,7 @@ def get_minimizer(key: str) -> "Minimizer": try: return Minimizer.__MinimizerPlugin.plugins[key] except KeyError as e: - raise RuntimeError('Unknown minimizer: ' + key) from e + raise RuntimeError("Unknown minimizer: " + key) from e def __init__(self, *args, **kwargs) -> None: """Setup the minimizer.""" @@ -212,7 +217,7 @@ def minimize(self, data: dict) -> dict: ---------- data : dict data with coordinates and atom types - + Returns ------- dict diff --git a/dpdata/fhi_aims/output.py b/dpdata/fhi_aims/output.py index 1a1b2c57..423957b7 100755 --- a/dpdata/fhi_aims/output.py +++ b/dpdata/fhi_aims/output.py @@ -2,15 +2,17 @@ import re import warnings -latt_patt="\|\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)" -pos_patt_first="\|\s+[0-9]{1,}[:]\s\w+\s(\w+)(\s.*[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)" -pos_patt_other="\s+[a][t][o][m]\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+(\w{1,2})" -force_patt="\|\s+[0-9]{1,}\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})" -eng_patt="Total energy uncorrected.*([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+eV" -#atom_numb_patt="Number of atoms.*([0-9]{1,})" +latt_patt = "\|\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)" +pos_patt_first = "\|\s+[0-9]{1,}[:]\s\w+\s(\w+)(\s.*[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)" +pos_patt_other = "\s+[a][t][o][m]\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+(\w{1,2})" +force_patt = "\|\s+[0-9]{1,}\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})" +eng_patt = "Total energy uncorrected.*([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+eV" +# atom_numb_patt="Number of atoms.*([0-9]{1,})" debug = False -def get_info (lines, type_idx_zero = False) : + + +def get_info(lines, type_idx_zero=False): atom_types = [] atom_names = [] @@ -18,163 +20,184 @@ def get_info (lines, type_idx_zero = False) : atom_numbs = None _atom_names = [] - contents="\n".join(lines) - #cell - #_tmp=re.findall(latt_patt,contents) - #for ii in _tmp: + contents = "\n".join(lines) + # cell + # _tmp=re.findall(latt_patt,contents) + # for ii in _tmp: # vect=[float(kk) for kk in ii] # cell.append(vect) - #------------------ - for ln,l in enumerate(lines): - if l.startswith(' | Unit cell'): + # ------------------ + for ln, l in enumerate(lines): + if l.startswith(" | Unit cell"): break - _tmp=lines[ln+1:ln+4] + _tmp = lines[ln + 1 : ln + 4] for ii in _tmp: - v_str=ii.split('|')[1].split() - vect=[float(kk) for kk in v_str] + v_str = ii.split("|")[1].split() + vect = [float(kk) for kk in v_str] cell.append(vect) - _tmp=re.findall(pos_patt_first,contents) + _tmp = re.findall(pos_patt_first, contents) for ii in _tmp: _atom_names.append(ii[0]) - atom_names=[] + atom_names = [] for ii in _atom_names: if not ii in atom_names: - atom_names.append(ii) - - atom_numbs =[_atom_names.count(ii) for ii in atom_names] - if type_idx_zero : - type_map=dict(zip(atom_names,range(len(atom_names)))) + atom_names.append(ii) + + atom_numbs = [_atom_names.count(ii) for ii in atom_names] + if type_idx_zero: + type_map = dict(zip(atom_names, range(len(atom_names)))) else: - type_map=dict(zip(atom_names,range(1,len(atom_names)+1))) - atom_types=list(map(lambda k: type_map[k], _atom_names)) - assert(atom_numbs is not None), "cannot find ion type info in aims output" - + type_map = dict(zip(atom_names, range(1, len(atom_names) + 1))) + atom_types = list(map(lambda k: type_map[k], _atom_names)) + assert atom_numbs is not None, "cannot find ion type info in aims output" - return [cell, atom_numbs, atom_names, atom_types ] + return [cell, atom_numbs, atom_names, atom_types] -def get_fhi_aims_block(fp) : +def get_fhi_aims_block(fp): blk = [] - for ii in fp : - if not ii : + for ii in fp: + if not ii: return blk - blk.append(ii.rstrip('\n')) - if 'Begin self-consistency loop: Re-initialization' in ii: + blk.append(ii.rstrip("\n")) + if "Begin self-consistency loop: Re-initialization" in ii: return blk return blk -def get_frames (fname, md=True, begin = 0, step = 1, convergence_check=True) : + +def get_frames(fname, md=True, begin=0, step=1, convergence_check=True): fp = open(fname) blk = get_fhi_aims_block(fp) - ret = get_info(blk, type_idx_zero = True) + ret = get_info(blk, type_idx_zero=True) - cell, atom_numbs, atom_names, atom_types =ret[0],ret[1],ret[2],ret[3] + cell, atom_numbs, atom_names, atom_types = ret[0], ret[1], ret[2], ret[3] ntot = sum(atom_numbs) all_coords = [] all_cells = [] all_energies = [] all_forces = [] - all_virials = [] + all_virials = [] cc = 0 rec_failed = [] - while len(blk) > 0 : + while len(blk) > 0: if debug: - with open(str(cc),'w') as f: - f.write('\n'.join(blk)) - if cc >= begin and (cc - begin) % step == 0 : - if cc==0: - coord, _cell, energy, force, virial, is_converge = analyze_block(blk, first_blk=True, md=md) + with open(str(cc), "w") as f: + f.write("\n".join(blk)) + if cc >= begin and (cc - begin) % step == 0: + if cc == 0: + coord, _cell, energy, force, virial, is_converge = analyze_block( + blk, first_blk=True, md=md + ) else: - coord, _cell, energy, force, virial, is_converge = analyze_block(blk, first_blk=False) + coord, _cell, energy, force, virial, is_converge = analyze_block( + blk, first_blk=False + ) if len(coord) == 0: break - if is_converge or not convergence_check: + if is_converge or not convergence_check: all_coords.append(coord) if _cell: - all_cells.append(_cell) + all_cells.append(_cell) else: - all_cells.append(cell) + all_cells.append(cell) all_energies.append(energy) all_forces.append(force) - if virial is not None : + if virial is not None: all_virials.append(virial) if not is_converge: - rec_failed.append(cc+1) - + rec_failed.append(cc + 1) + blk = get_fhi_aims_block(fp) cc += 1 - - if len(rec_failed) > 0 : - prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." - warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) - - if len(all_virials) == 0 : + + if len(rec_failed) > 0: + prt = ( + "so they are not collected." + if convergence_check + else "but they are still collected due to the requirement for ignoring convergence checks." + ) + warnings.warn( + f"The following structures were unconverged: {rec_failed}; " + prt + ) + + if len(all_virials) == 0: all_virials = None - else : + else: all_virials = np.array(all_virials) fp.close() - return atom_names, atom_numbs, np.array(atom_types), np.array(all_cells), np.array(all_coords), np.array(all_energies), np.array(all_forces), all_virials - - -def analyze_block(lines, first_blk=False, md=True) : + return ( + atom_names, + atom_numbs, + np.array(atom_types), + np.array(all_cells), + np.array(all_coords), + np.array(all_energies), + np.array(all_forces), + all_virials, + ) + + +def analyze_block(lines, first_blk=False, md=True): coord = [] cell = [] energy = None force = [] virial = None - atom_names=[] - _atom_names=[] + atom_names = [] + _atom_names = [] - contents="\n".join(lines) + contents = "\n".join(lines) try: - natom=int(re.findall("Number of atoms.*([0-9]{1,})",lines)[0]) + natom = int(re.findall("Number of atoms.*([0-9]{1,})", lines)[0]) except Exception: - natom=0 + natom = 0 if first_blk: - if md: - _tmp=re.findall(pos_patt_other,contents)[:] - for ii in _tmp[slice(int(len(_tmp)/2),len(_tmp))]: - coord.append([float(kk) for kk in ii[:-1]]) - else: - _tmp=re.findall(pos_patt_first,contents) - for ii in _tmp: - coord.append([float(kk) for kk in ii[1:]]) + if md: + _tmp = re.findall(pos_patt_other, contents)[:] + for ii in _tmp[slice(int(len(_tmp) / 2), len(_tmp))]: + coord.append([float(kk) for kk in ii[:-1]]) + else: + _tmp = re.findall(pos_patt_first, contents) + for ii in _tmp: + coord.append([float(kk) for kk in ii[1:]]) else: - _tmp=re.findall(pos_patt_other,contents) - for ii in _tmp: - coord.append([float(kk) for kk in ii[:-1]]) + _tmp = re.findall(pos_patt_other, contents) + for ii in _tmp: + coord.append([float(kk) for kk in ii[:-1]]) - _tmp=re.findall(force_patt,contents) + _tmp = re.findall(force_patt, contents) for ii in _tmp: force.append([float(kk) for kk in ii]) if "Self-consistency cycle converged" in contents: - is_converge=True + is_converge = True else: - is_converge=False + is_converge = False try: - _eng_patt=re.compile(eng_patt) - energy=float(_eng_patt.search(contents).group().split()[-2]) + _eng_patt = re.compile(eng_patt) + energy = float(_eng_patt.search(contents).group().split()[-2]) except Exception: - energy=None - + energy = None + if not energy: - is_converge = False + is_converge = False if energy: - assert((force is not None) and len(coord) > 0 ) + assert (force is not None) and len(coord) > 0 return coord, cell, energy, force, virial, is_converge -if __name__=='__main__': - import sys - ret=get_frames (sys.argv[1], begin = 0, step = 1) - print(ret) + +if __name__ == "__main__": + import sys + + ret = get_frames(sys.argv[1], begin=0, step=1) + print(ret) diff --git a/dpdata/format.py b/dpdata/format.py index b96c374a..84813c76 100644 --- a/dpdata/format.py +++ b/dpdata/format.py @@ -22,7 +22,7 @@ def register_from(key): @staticmethod def register_to(key): return Format.__ToPlugin.register(key) - + @staticmethod def get_formats(): return Format.__FormatPlugin.plugins @@ -34,7 +34,7 @@ def get_from_methods(): @staticmethod def get_to_methods(): return Format.__ToPlugin.plugins - + @staticmethod def post(func_name): def decorator(object): @@ -43,6 +43,7 @@ def decorator(object): else: object.post_func = func_name return object + return decorator def from_system(self, file_name, **kwargs): @@ -58,7 +59,9 @@ def from_system(self, file_name, **kwargs): data: dict system data """ - raise NotImplementedError("%s doesn't support System.from" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support System.from" % (self.__class__.__name__) + ) def to_system(self, data, *args, **kwargs): """System.to @@ -68,16 +71,22 @@ def to_system(self, data, *args, **kwargs): data: dict system data """ - raise NotImplementedError("%s doesn't support System.to" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support System.to" % (self.__class__.__name__) + ) def from_labeled_system(self, file_name, **kwargs): - raise NotImplementedError("%s doesn't support LabeledSystem.from" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support LabeledSystem.from" % (self.__class__.__name__) + ) def to_labeled_system(self, data, *args, **kwargs): return self.to_system(data, *args, **kwargs) def from_bond_order_system(self, file_name, **kwargs): - raise NotImplementedError("%s doesn't support BondOrderSystem.from" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support BondOrderSystem.from" % (self.__class__.__name__) + ) def to_bond_order_system(self, data, rdkit_mol, *args, **kwargs): return self.to_system(data, *args, **kwargs) @@ -87,6 +96,7 @@ class MultiModes: 0 (default): not implemented 1: every directory under the top-level directory is a system """ + NotImplemented = 0 Directory = 1 @@ -94,23 +104,30 @@ class MultiModes: def from_multi_systems(self, directory, **kwargs): """MultiSystems.from - + Parameters ---------- directory: str directory of system - + Returns ------- filenames: list[str] list of filenames """ if self.MultiMode == self.MultiModes.Directory: - return [os.path.join(directory, name) for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))] - raise NotImplementedError("%s doesn't support MultiSystems.from" %(self.__class__.__name__)) + return [ + os.path.join(directory, name) + for name in os.listdir(directory) + if os.path.isdir(os.path.join(directory, name)) + ] + raise NotImplementedError( + "%s doesn't support MultiSystems.from" % (self.__class__.__name__) + ) def to_multi_systems(self, formulas, directory, **kwargs): if self.MultiMode == self.MultiModes.Directory: return [os.path.join(directory, ff) for ff in formulas] - raise NotImplementedError("%s doesn't support MultiSystems.to" %(self.__class__.__name__)) - + raise NotImplementedError( + "%s doesn't support MultiSystems.to" % (self.__class__.__name__) + ) diff --git a/dpdata/gaussian/gjf.py b/dpdata/gaussian/gjf.py index 6c169b48..be089e24 100644 --- a/dpdata/gaussian/gjf.py +++ b/dpdata/gaussian/gjf.py @@ -10,6 +10,7 @@ import numpy as np from scipy.sparse import csr_matrix from scipy.sparse.csgraph import connected_components + try: from openbabel import openbabel except ImportError: @@ -20,10 +21,9 @@ from dpdata.periodic_table import Element - def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]: """Detect fragments from coordinates. - + Parameters ---------- symbols : list[str] @@ -52,7 +52,9 @@ def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]: if Open Babel is not installed """ if openbabel is None: - raise ImportError("Open Babel (Python interface) should be installed to detect fragmentation!") + raise ImportError( + "Open Babel (Python interface) should be installed to detect fragmentation!" + ) atomnumber = len(symbols) # Use openbabel to connect atoms mol = openbabel.OBMol() @@ -74,14 +76,15 @@ def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]: bonds.extend([[a, b, bo], [b, a, bo]]) bonds = np.array(bonds, ndmin=2).reshape((-1, 3)) graph = csr_matrix( - (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber)) + (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber) + ) frag_numb, frag_index = connected_components(graph, 0) return frag_numb, frag_index def detect_multiplicity(symbols: np.ndarray) -> int: """Find the minimal multiplicity of the given molecules. - + Parameters ---------- symbols : np.ndarray @@ -102,15 +105,15 @@ def detect_multiplicity(symbols: np.ndarray) -> int: def make_gaussian_input( - sys_data: dict, - keywords: Union[str, List[str]], - multiplicity: Union[str ,int] = "auto", - charge: int = 0, - fragment_guesses: bool = False, - basis_set: Optional[str] = None, - keywords_high_multiplicity: Optional[str] = None, - nproc: int = 1, - ) -> str: + sys_data: dict, + keywords: Union[str, List[str]], + multiplicity: Union[str, int] = "auto", + charge: int = 0, + fragment_guesses: bool = False, + basis_set: Optional[str] = None, + keywords_high_multiplicity: Optional[str] = None, + nproc: int = 1, +) -> str: """Make gaussian input file. Parameters @@ -149,21 +152,21 @@ def make_gaussian_input( str gjf output string """ - coordinates = sys_data['coords'][0] - atom_names = sys_data['atom_names'] - atom_numbs = sys_data['atom_numbs'] - atom_types = sys_data['atom_types'] + coordinates = sys_data["coords"][0] + atom_names = sys_data["atom_names"] + atom_numbs = sys_data["atom_numbs"] + atom_types = sys_data["atom_types"] # get atom symbols list symbols = [atom_names[atom_type] for atom_type in atom_types] # assume default charge is zero and default spin multiplicity is 1 - if 'charge' in sys_data.keys(): - charge = sys_data['charge'] - + if "charge" in sys_data.keys(): + charge = sys_data["charge"] + use_fragment_guesses = False if isinstance(multiplicity, int): mult_auto = False - elif multiplicity == 'auto': + elif multiplicity == "auto": mult_auto = True else: raise RuntimeError('The keyword "multiplicity" is illegal.') @@ -186,16 +189,22 @@ def make_gaussian_input( mult_frags.append(detect_multiplicity(np.array(symbols)[idx])) if use_fragment_guesses: multiplicity = sum(mult_frags) - frag_numb + 1 - charge % 2 - chargekeywords_frag = "%d %d" % (charge, multiplicity) + \ - ''.join([' %d %d' % (charge, mult_frag) - for mult_frag in mult_frags]) + chargekeywords_frag = "%d %d" % (charge, multiplicity) + "".join( + [" %d %d" % (charge, mult_frag) for mult_frag in mult_frags] + ) else: multi_frags = np.array(mult_frags) - multiplicity = 1 + \ - np.count_nonzero(multi_frags == 2) % 2 + \ - np.count_nonzero(multi_frags == 3) * 2 - charge % 2 + multiplicity = ( + 1 + + np.count_nonzero(multi_frags == 2) % 2 + + np.count_nonzero(multi_frags == 3) * 2 + - charge % 2 + ) - if keywords_high_multiplicity is not None and np.count_nonzero(multi_frags == 2) >= 2: + if ( + keywords_high_multiplicity is not None + and np.count_nonzero(multi_frags == 2) >= 2 + ): # at least 2 radicals keywords = keywords_high_multiplicity @@ -207,39 +216,58 @@ def make_gaussian_input( buff = [] # keywords, e.g., force b3lyp/6-31g** if use_fragment_guesses: - keywords[0] = '{} guess=fragment={}'.format( - keywords[0], frag_numb) + keywords[0] = "{} guess=fragment={}".format(keywords[0], frag_numb) chkkeywords = [] - if len(keywords)>1: - chkkeywords.append('%chk={}.chk'.format(str(uuid.uuid1()))) + if len(keywords) > 1: + chkkeywords.append("%chk={}.chk".format(str(uuid.uuid1()))) - nprockeywords = '%nproc={:d}'.format(nproc) + nprockeywords = "%nproc={:d}".format(nproc) # use formula as title - titlekeywords = ''.join(["{}{}".format(symbol,numb) for symbol,numb in - zip(atom_names, atom_numbs)]) - chargekeywords = '{} {}'.format(charge, multiplicity) + titlekeywords = "".join( + ["{}{}".format(symbol, numb) for symbol, numb in zip(atom_names, atom_numbs)] + ) + chargekeywords = "{} {}".format(charge, multiplicity) - buff = [*chkkeywords, nprockeywords, '#{}'.format( - keywords[0]), '', titlekeywords, '', (chargekeywords_frag if use_fragment_guesses else chargekeywords)] + buff = [ + *chkkeywords, + nprockeywords, + "#{}".format(keywords[0]), + "", + titlekeywords, + "", + (chargekeywords_frag if use_fragment_guesses else chargekeywords), + ] for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)): if use_fragment_guesses: - buff.append("%s(Fragment=%d) %f %f %f" % - (symbol, frag_index[ii] + 1, *coordinate)) + buff.append( + "%s(Fragment=%d) %f %f %f" % (symbol, frag_index[ii] + 1, *coordinate) + ) else: buff.append("%s %f %f %f" % (symbol, *coordinate)) - if not sys_data.get('nopbc', False): + if not sys_data.get("nopbc", False): # PBC condition - cell = sys_data['cells'][0] + cell = sys_data["cells"][0] for ii in range(3): # use TV as atomic symbol, see https://gaussian.com/pbc/ - buff.append('TV %f %f %f' % (*cell[ii],)) + buff.append("TV %f %f %f" % (*cell[ii],)) if basis_set is not None: # custom basis set - buff.extend(['', basis_set, '']) + buff.extend(["", basis_set, ""]) for kw in itertools.islice(keywords, 1, None): - buff.extend(['\n--link1--', *chkkeywords, nprockeywords, - '#{}'.format(kw), '', titlekeywords, '', chargekeywords, '']) - buff.append('\n') - return '\n'.join(buff) + buff.extend( + [ + "\n--link1--", + *chkkeywords, + nprockeywords, + "#{}".format(kw), + "", + titlekeywords, + "", + chargekeywords, + "", + ] + ) + buff.append("\n") + return "\n".join(buff) diff --git a/dpdata/gaussian/log.py b/dpdata/gaussian/log.py index 0e006682..54bc1d51 100644 --- a/dpdata/gaussian/log.py +++ b/dpdata/gaussian/log.py @@ -8,6 +8,7 @@ symbols = ["X"] + ELEMENTS + def to_system_data(file_name, md=False): data = {} # read from log lines @@ -24,10 +25,14 @@ def to_system_data(file_name, md=False): if line.startswith(" SCF Done"): # energies energy = float(line.split()[4]) - elif line.startswith(" Center Atomic Forces (Hartrees/Bohr)"): + elif line.startswith( + " Center Atomic Forces (Hartrees/Bohr)" + ): flag = 1 forces = [] - elif line.startswith(" Input orientation:") or line.startswith(" Z-Matrix orientation:"): + elif line.startswith( + " Input orientation:" + ) or line.startswith(" Z-Matrix orientation:"): flag = 5 coords = [] atom_symbols = [] @@ -45,7 +50,9 @@ def to_system_data(file_name, md=False): nopbc = False cells_t.append(cells) else: - cells_t.append([[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]]) + cells_t.append( + [[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]] + ) flag = 0 else: s = line.split() @@ -53,7 +60,9 @@ def to_system_data(file_name, md=False): # PBC pass else: - forces.append([float(line[23:38]), float(line[38:53]), float(line[53:68])]) + forces.append( + [float(line[23:38]), float(line[38:53]), float(line[53:68])] + ) elif flag == 10: # atom_symbols and coords if line.startswith(" -------"): @@ -67,22 +76,24 @@ def to_system_data(file_name, md=False): coords.append([float(x) for x in s[3:6]]) atom_symbols.append(symbols[int(s[1])]) - assert(coords_t), "cannot find coords" - assert(energy_t), "cannot find energies" - assert(forces_t), "cannot find forces" + assert coords_t, "cannot find coords" + assert energy_t, "cannot find energies" + assert forces_t, "cannot find forces" - atom_names, data['atom_types'], atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True) - data['atom_names'] = list(atom_names) - data['atom_numbs'] = list(atom_numbs) + atom_names, data["atom_types"], atom_numbs = np.unique( + atom_symbols, return_inverse=True, return_counts=True + ) + data["atom_names"] = list(atom_names) + data["atom_numbs"] = list(atom_numbs) if not md: forces_t = forces_t[-1:] energy_t = energy_t[-1:] coords_t = coords_t[-1:] cells_t = cells_t[-1:] - data['forces'] = np.array(forces_t) * force_convert - data['energies'] = np.array(energy_t) * energy_convert - data['coords'] = np.array(coords_t) - data['orig'] = np.array([0, 0, 0]) - data['cells'] = np.array(cells_t) - data['nopbc'] = nopbc + data["forces"] = np.array(forces_t) * force_convert + data["energies"] = np.array(energy_t) * energy_convert + data["coords"] = np.array(coords_t) + data["orig"] = np.array([0, 0, 0]) + data["cells"] = np.array(cells_t) + data["nopbc"] = nopbc return data diff --git a/dpdata/gromacs/gro.py b/dpdata/gromacs/gro.py index 2114810e..b9930f2b 100644 --- a/dpdata/gromacs/gro.py +++ b/dpdata/gromacs/gro.py @@ -7,29 +7,32 @@ ang2nm = LengthConversion("angstrom", "nm").value() cell_idx_gmx2dp = [0, 4, 8, 1, 2, 3, 5, 6, 7] + def _format_atom_name(atom_name): patt = re.compile("[a-zA-Z]*") match = re.search(patt, atom_name) fmt_name = match.group().capitalize() return fmt_name + def _get_line(line, fmt_atom_name=True): atom_name = line[10:15].split()[0] if fmt_atom_name: atom_name = _format_atom_name(atom_name) atom_idx = int(line[15:20].split()[0]) - posis = [float(line[ii:ii+8]) for ii in range(20,44,8)] + posis = [float(line[ii : ii + 8]) for ii in range(20, 44, 8)] posis = np.array(posis) * nm2ang return atom_name, atom_idx, posis + def _get_cell(line): - cell = np.zeros([3,3]) + cell = np.zeros([3, 3]) lengths = [float(ii) for ii in line.split()] if len(lengths) >= 3: for dd in range(3): cell[dd][dd] = lengths[dd] else: - raise RuntimeError('wrong box format: ', line) + raise RuntimeError("wrong box format: ", line) if len(lengths) == 9: cell[0][1] = lengths[3] cell[0][2] = lengths[4] @@ -40,8 +43,9 @@ def _get_cell(line): cell = cell * nm2ang return cell + def file_to_system_data(fname, format_atom_name=True, **kwargs): - system = {'coords': [], 'cells': []} + system = {"coords": [], "cells": []} with open(fname) as fp: frame = 0 while True: @@ -62,17 +66,22 @@ def file_to_system_data(fname, format_atom_name=True, **kwargs): cell = _get_cell(fp.readline()) posis = np.array(posis) if frame == 1: - system['orig'] = np.zeros(3) - system['atom_names'] = list(set(names)) - system['atom_numbs'] = [names.count(ii) for ii in system['atom_names']] - system['atom_types'] = [system['atom_names'].index(ii) for ii in names] - system['atom_types'] = np.array(system['atom_types'], dtype = int) - system['coords'].append(posis) - system['cells'].append(cell) - system['coords'] = np.array(system['coords']) - system['cells'] = np.array(system['cells']) + system["orig"] = np.zeros(3) + system["atom_names"] = list(set(names)) + system["atom_numbs"] = [ + names.count(ii) for ii in system["atom_names"] + ] + system["atom_types"] = [ + system["atom_names"].index(ii) for ii in names + ] + system["atom_types"] = np.array(system["atom_types"], dtype=int) + system["coords"].append(posis) + system["cells"].append(cell) + system["coords"] = np.array(system["coords"]) + system["cells"] = np.array(system["cells"]) return system + def from_system_data(system, f_idx=0, **kwargs): resname = kwargs.get("resname", "MOL") shift = kwargs.get("shift", 0) @@ -84,7 +93,9 @@ def from_system_data(system, f_idx=0, **kwargs): atom_type = system["atom_types"][i] atom_name = system["atom_names"][atom_type] coords = system["coords"][f_idx] * ang2nm - ret += "{:>5d}{:<5s}{:>5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format(1, resname, atom_name, i+shift+1, *tuple(coords[i])) + ret += "{:>5d}{:<5s}{:>5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format( + 1, resname, atom_name, i + shift + 1, *tuple(coords[i]) + ) cell = (system["cells"][f_idx].flatten() * ang2nm)[cell_idx_gmx2dp] ret += " " + " ".join([f"{x:.3f}" for x in cell]) diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py index 135fe051..85b87ba8 100644 --- a/dpdata/lammps/dump.py +++ b/dpdata/lammps/dump.py @@ -2,214 +2,246 @@ import os, sys import numpy as np + lib_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(lib_path) import lmp import warnings + + class UnwrapWarning(UserWarning): pass -warnings.simplefilter('once', UnwrapWarning) -def _get_block (lines, key) : - for idx in range(len(lines)) : - if ('ITEM: ' + key) in lines[idx] : +warnings.simplefilter("once", UnwrapWarning) + + +def _get_block(lines, key): + for idx in range(len(lines)): + if ("ITEM: " + key) in lines[idx]: break idx_s = idx + 1 - for idx in range(idx_s, len(lines)) : - if ('ITEM: ') in lines[idx] : + for idx in range(idx_s, len(lines)): + if ("ITEM: ") in lines[idx]: break idx_e = idx - if idx_e == len(lines)-1 : + if idx_e == len(lines) - 1: idx_e += 1 - return lines[idx_s:idx_e], lines[idx_s-1] + return lines[idx_s:idx_e], lines[idx_s - 1] -def get_atype(lines, type_idx_zero = False) : - blk, head = _get_block(lines, 'ATOMS') + +def get_atype(lines, type_idx_zero=False): + blk, head = _get_block(lines, "ATOMS") keys = head.split() - id_idx = keys.index('id') - 2 - tidx = keys.index('type') - 2 + id_idx = keys.index("id") - 2 + tidx = keys.index("type") - 2 atype = [] - for ii in blk : + for ii in blk: atype.append([int(ii.split()[id_idx]), int(ii.split()[tidx])]) atype.sort() - atype = np.array(atype, dtype = int) - if type_idx_zero : - return atype[:,1] - 1 - else : - return atype[:,1] - -def get_natoms(lines) : - blk, head = _get_block(lines, 'NUMBER OF ATOMS') + atype = np.array(atype, dtype=int) + if type_idx_zero: + return atype[:, 1] - 1 + else: + return atype[:, 1] + + +def get_natoms(lines): + blk, head = _get_block(lines, "NUMBER OF ATOMS") return int(blk[0]) -def get_natomtypes(lines) : + +def get_natomtypes(lines): atype = get_atype(lines) return max(atype) -def get_natoms_vec(lines) : + +def get_natoms_vec(lines): atype = get_atype(lines) natoms_vec = [] natomtypes = get_natomtypes(lines) - for ii in range(natomtypes) : - natoms_vec.append(sum(atype == ii+1)) - assert (sum(natoms_vec) == get_natoms(lines)) + for ii in range(natomtypes): + natoms_vec.append(sum(atype == ii + 1)) + assert sum(natoms_vec) == get_natoms(lines) return natoms_vec + def get_coordtype_and_scalefactor(keys): # 4 types in total,with different scaling factor - key_pc=['x','y','z'] # plain cartesian, sf = 1 - key_uc=['xu','yu','zu'] # unwraped cartesian, sf = 1 - key_s=['xs','ys','zs'] # scaled by lattice parameter, sf = lattice parameter - key_su = ['xsu','ysu','zsu'] #scaled and unfolded,sf = lattice parameter - lmp_coor_type = [key_pc,key_uc,key_s,key_su] - sf = [0,0,1,1] - uw = [0,1,0,1] # unwraped or not + key_pc = ["x", "y", "z"] # plain cartesian, sf = 1 + key_uc = ["xu", "yu", "zu"] # unwraped cartesian, sf = 1 + key_s = ["xs", "ys", "zs"] # scaled by lattice parameter, sf = lattice parameter + key_su = ["xsu", "ysu", "zsu"] # scaled and unfolded,sf = lattice parameter + lmp_coor_type = [key_pc, key_uc, key_s, key_su] + sf = [0, 0, 1, 1] + uw = [0, 1, 0, 1] # unwraped or not for k in range(4): if all(i in keys for i in lmp_coor_type[k]): return lmp_coor_type[k], sf[k], uw[k] -def safe_get_posi(lines,cell,orig=np.zeros(3), unwrap=False) : - blk, head = _get_block(lines, 'ATOMS') + +def safe_get_posi(lines, cell, orig=np.zeros(3), unwrap=False): + blk, head = _get_block(lines, "ATOMS") keys = head.split() coord_tp_and_sf = get_coordtype_and_scalefactor(keys) - assert coord_tp_and_sf is not None, 'Dump file does not contain atomic coordinates!' + assert coord_tp_and_sf is not None, "Dump file does not contain atomic coordinates!" coordtype, sf, uw = coord_tp_and_sf - id_idx = keys.index('id') - 2 - xidx = keys.index(coordtype[0])-2 - yidx = keys.index(coordtype[1])-2 - zidx = keys.index(coordtype[2])-2 + id_idx = keys.index("id") - 2 + xidx = keys.index(coordtype[0]) - 2 + yidx = keys.index(coordtype[1]) - 2 + zidx = keys.index(coordtype[2]) - 2 posis = [] - for ii in blk : + for ii in blk: words = ii.split() - posis.append([float(words[id_idx]), float(words[xidx]), float(words[yidx]), float(words[zidx])]) + posis.append( + [ + float(words[id_idx]), + float(words[xidx]), + float(words[yidx]), + float(words[zidx]), + ] + ) posis.sort() - posis = np.array(posis)[:,1:4] + posis = np.array(posis)[:, 1:4] if not sf: - posis = (posis - orig) @ np.linalg.inv(cell) # Convert to scaled coordinates for unscaled coordinates + posis = (posis - orig) @ np.linalg.inv( + cell + ) # Convert to scaled coordinates for unscaled coordinates if uw and unwrap: - return posis @ cell # convert scaled coordinates back to Cartesien coordinates unwrap at the periodic boundaries + return ( + posis @ cell + ) # convert scaled coordinates back to Cartesien coordinates unwrap at the periodic boundaries else: if uw and not unwrap: - warnings.warn(message='Your dump file contains unwrapped coordinates, but you did not specify unwrapping (unwrap = True). The default is wrapping at periodic boundaries (unwrap = False).\n',category=UnwrapWarning) - return (posis % 1) @ cell # Convert scaled coordinates back to Cartesien coordinates with wraping at periodic boundary conditions + warnings.warn( + message="Your dump file contains unwrapped coordinates, but you did not specify unwrapping (unwrap = True). The default is wrapping at periodic boundaries (unwrap = False).\n", + category=UnwrapWarning, + ) + return ( + posis % 1 + ) @ cell # Convert scaled coordinates back to Cartesien coordinates with wraping at periodic boundary conditions + -def get_dumpbox(lines) : - blk, h = _get_block(lines, 'BOX BOUNDS') - bounds = np.zeros([3,2]) +def get_dumpbox(lines): + blk, h = _get_block(lines, "BOX BOUNDS") + bounds = np.zeros([3, 2]) tilt = np.zeros([3]) - load_tilt = 'xy xz yz' in h - for dd in range(3) : + load_tilt = "xy xz yz" in h + for dd in range(3): info = [float(jj) for jj in blk[dd].split()] bounds[dd][0] = info[0] bounds[dd][1] = info[1] - if load_tilt : + if load_tilt: tilt[dd] = info[2] return bounds, tilt -def dumpbox2box(bounds, tilt) : + +def dumpbox2box(bounds, tilt): xy = tilt[0] xz = tilt[1] yz = tilt[2] - xlo = bounds[0][0] - min(0.0,xy,xz,xy+xz) - xhi = bounds[0][1] - max(0.0,xy,xz,xy+xz) - ylo = bounds[1][0] - min(0.0,yz) - yhi = bounds[1][1] - max(0.0,yz) + xlo = bounds[0][0] - min(0.0, xy, xz, xy + xz) + xhi = bounds[0][1] - max(0.0, xy, xz, xy + xz) + ylo = bounds[1][0] - min(0.0, yz) + yhi = bounds[1][1] - max(0.0, yz) zlo = bounds[2][0] zhi = bounds[2][1] info = [[xlo, xhi], [ylo, yhi], [zlo, zhi]] return lmp.lmpbox2box(info, tilt) -def box2dumpbox(orig, box) : + +def box2dumpbox(orig, box): lohi, tilt = lmp.box2lmpbox(orig, box) xy = tilt[0] xz = tilt[1] yz = tilt[2] - bounds = np.zeros([3,2]) - bounds[0][0] = lohi[0][0] + min(0.0,xy,xz,xy+xz) - bounds[0][1] = lohi[0][1] + max(0.0,xy,xz,xy+xz) - bounds[1][0] = lohi[1][0] + min(0.0,yz) - bounds[1][1] = lohi[1][1] + max(0.0,yz) + bounds = np.zeros([3, 2]) + bounds[0][0] = lohi[0][0] + min(0.0, xy, xz, xy + xz) + bounds[0][1] = lohi[0][1] + max(0.0, xy, xz, xy + xz) + bounds[1][0] = lohi[1][0] + min(0.0, yz) + bounds[1][1] = lohi[1][1] + max(0.0, yz) bounds[2][0] = lohi[2][0] bounds[2][1] = lohi[2][1] return bounds, tilt -def load_file(fname, begin = 0, step = 1) : +def load_file(fname, begin=0, step=1): lines = [] buff = [] cc = -1 with open(fname) as fp: while True: - line = fp.readline().rstrip('\n') - if not line : - if cc >= begin and (cc - begin) % step == 0 : + line = fp.readline().rstrip("\n") + if not line: + if cc >= begin and (cc - begin) % step == 0: lines += buff buff = [] cc += 1 return lines - if 'ITEM: TIMESTEP' in line : - if cc >= begin and (cc - begin) % step == 0 : + if "ITEM: TIMESTEP" in line: + if cc >= begin and (cc - begin) % step == 0: lines += buff buff = [] cc += 1 - if cc >= begin and (cc - begin) % step == 0 : + if cc >= begin and (cc - begin) % step == 0: buff.append(line) -def system_data(lines, type_map = None, type_idx_zero = True, unwrap=False) : +def system_data(lines, type_map=None, type_idx_zero=True, unwrap=False): array_lines = split_traj(lines) lines = array_lines[0] system = {} - system['atom_numbs'] = get_natoms_vec(lines) - system['atom_names'] = [] - if type_map == None : - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append('TYPE_%d' % ii) - else : - assert(len(type_map) >= len(system['atom_numbs'])) - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append(type_map[ii]) + system["atom_numbs"] = get_natoms_vec(lines) + system["atom_names"] = [] + if type_map == None: + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append("TYPE_%d" % ii) + else: + assert len(type_map) >= len(system["atom_numbs"]) + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append(type_map[ii]) bounds, tilt = get_dumpbox(lines) orig, cell = dumpbox2box(bounds, tilt) - system['orig'] = np.array(orig) - np.array(orig) - system['cells'] = [np.array(cell)] - system['atom_types'] = get_atype(lines, type_idx_zero = type_idx_zero) - system['coords'] = [safe_get_posi(lines, cell, np.array(orig), unwrap)] - for ii in range(1, len(array_lines)) : + system["orig"] = np.array(orig) - np.array(orig) + system["cells"] = [np.array(cell)] + system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero) + system["coords"] = [safe_get_posi(lines, cell, np.array(orig), unwrap)] + for ii in range(1, len(array_lines)): bounds, tilt = get_dumpbox(array_lines[ii]) orig, cell = dumpbox2box(bounds, tilt) - system['cells'].append(cell) - atype = get_atype(array_lines[ii], type_idx_zero = type_idx_zero) + system["cells"].append(cell) + atype = get_atype(array_lines[ii], type_idx_zero=type_idx_zero) # map atom type; a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id] - idx = np.argsort(atype)[np.argsort(np.argsort(system['atom_types']))] - system['coords'].append(safe_get_posi(array_lines[ii], cell, np.array(orig), unwrap)[idx]) - system['cells'] = np.array(system['cells']) - system['coords'] = np.array(system['coords']) + idx = np.argsort(atype)[np.argsort(np.argsort(system["atom_types"]))] + system["coords"].append( + safe_get_posi(array_lines[ii], cell, np.array(orig), unwrap)[idx] + ) + system["cells"] = np.array(system["cells"]) + system["coords"] = np.array(system["coords"]) return system -def split_traj(dump_lines) : +def split_traj(dump_lines): marks = [] - for idx,ii in enumerate(dump_lines) : - if 'ITEM: TIMESTEP' in ii : + for idx, ii in enumerate(dump_lines): + if "ITEM: TIMESTEP" in ii: marks.append(idx) - if len(marks) == 0 : + if len(marks) == 0: return None - elif len(marks) == 1 : + elif len(marks) == 1: return [dump_lines] - else : + else: block_size = marks[1] - marks[0] ret = [] - for ii in marks : - ret.append(dump_lines[ii:ii+block_size]) + for ii in marks: + ret.append(dump_lines[ii : ii + block_size]) # for ii in range(len(marks)-1): # assert(marks[ii+1] - marks[ii] == block_size) return ret return None -if __name__ == '__main__' : +if __name__ == "__main__": # fname = 'dump.hti' # lines = open(fname).read().split('\n') # # print(get_natoms(lines)) @@ -223,9 +255,9 @@ def split_traj(dump_lines) : # print(box) # np.savetxt('tmp.out', posi - orig, fmt='%.6f') # print(system_data(lines)) - lines = load_file('conf_unfold.dump', begin = 0, step = 1) + lines = load_file("conf_unfold.dump", begin=0, step=1) al = split_traj(lines) - s = system_data(lines,['O','H']) - #l = np.linalg.norm(s['cells'][1],axis=1) - #p = s['coords'][0] + l - #np.savetxt('p',p,fmt='%1.10f') + s = system_data(lines, ["O", "H"]) + # l = np.linalg.norm(s['cells'][1],axis=1) + # p = s['coords'][0] + l + # np.savetxt('p',p,fmt='%1.10f') diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index c5a82b63..7f80fcc0 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -2,186 +2,233 @@ import numpy as np -ptr_float_fmt = '%15.10f' -ptr_int_fmt = '%6d' -ptr_key_fmt = '%15s' +ptr_float_fmt = "%15.10f" +ptr_int_fmt = "%6d" +ptr_key_fmt = "%15s" -def _get_block (lines, keys) : - for idx in range(len(lines)) : - if keys in lines[idx] : + +def _get_block(lines, keys): + for idx in range(len(lines)): + if keys in lines[idx]: break if idx == len(lines) - 1: return None - idx_s = idx+2 + idx_s = idx + 2 idx = idx_s ret = [] - while True : - if idx == len(lines) or len(lines[idx].split()) == 0 : + while True: + if idx == len(lines) or len(lines[idx].split()) == 0: break - else : + else: ret.append(lines[idx]) idx += 1 return ret -def lmpbox2box(lohi, tilt) : + +def lmpbox2box(lohi, tilt): xy = tilt[0] xz = tilt[1] yz = tilt[2] orig = np.array([lohi[0][0], lohi[1][0], lohi[2][0]]) lens = [] - for dd in range(3) : + for dd in range(3): lens.append(lohi[dd][1] - lohi[dd][0]) xx = [lens[0], 0, 0] yy = [xy, lens[1], 0] - zz= [xz, yz, lens[2]] + zz = [xz, yz, lens[2]] return orig, np.array([xx, yy, zz]) -def box2lmpbox(orig, box) : - lohi = np.zeros([3,2]) - for dd in range(3) : + +def box2lmpbox(orig, box): + lohi = np.zeros([3, 2]) + for dd in range(3): lohi[dd][0] = orig[dd] tilt = np.zeros(3) tilt[0] = box[1][0] tilt[1] = box[2][0] tilt[2] = box[2][1] - lens = np.zeros(3) + lens = np.zeros(3) lens[0] = box[0][0] lens[1] = box[1][1] lens[2] = box[2][2] - for dd in range(3) : + for dd in range(3): lohi[dd][1] = lohi[dd][0] + lens[dd] return lohi, tilt -def get_atoms(lines) : - return _get_block(lines, 'Atoms') -def get_natoms(lines) : - for ii in lines : - if 'atoms' in ii : +def get_atoms(lines): + return _get_block(lines, "Atoms") + + +def get_natoms(lines): + for ii in lines: + if "atoms" in ii: return int(ii.split()[0]) return None -def get_natomtypes(lines) : - for ii in lines : - if 'atom types' in ii : + +def get_natomtypes(lines): + for ii in lines: + if "atom types" in ii: return int(ii.split()[0]) return None -def _atom_info_mol(line) : + +def _atom_info_mol(line): vec = line.split() # idx, mole_type, atom_type, charge, x, y, z - return int(vec[0]), int(vec[1]), int(vec[2]), float(vec[3]), float(vec[4]), float(vec[5]), float(vec[6]) - -def _atom_info_atom(line) : + return ( + int(vec[0]), + int(vec[1]), + int(vec[2]), + float(vec[3]), + float(vec[4]), + float(vec[5]), + float(vec[6]), + ) + + +def _atom_info_atom(line): vec = line.split() # idx, atom_type, x, y, z return int(vec[0]), int(vec[1]), float(vec[2]), float(vec[3]), float(vec[4]) -def get_natoms_vec(lines) : + +def get_natoms_vec(lines): atype = get_atype(lines) natoms_vec = [] natomtypes = get_natomtypes(lines) - for ii in range(natomtypes) : - natoms_vec.append(sum(atype == ii+1)) - assert (sum(natoms_vec) == get_natoms(lines)) + for ii in range(natomtypes): + natoms_vec.append(sum(atype == ii + 1)) + assert sum(natoms_vec) == get_natoms(lines) return natoms_vec -def get_atype(lines, type_idx_zero = False) : - alines = get_atoms(lines) + +def get_atype(lines, type_idx_zero=False): + alines = get_atoms(lines) atype = [] - for ii in alines : + for ii in alines: # idx, mt, at, q, x, y, z = _atom_info_mol(ii) idx, at, x, y, z = _atom_info_atom(ii) - if type_idx_zero : - atype.append(at-1) + if type_idx_zero: + atype.append(at - 1) else: atype.append(at) - return np.array(atype, dtype = int) + return np.array(atype, dtype=int) -def get_posi(lines) : + +def get_posi(lines): atom_lines = get_atoms(lines) posis = [] - for ii in atom_lines : + for ii in atom_lines: # posis.append([float(jj) for jj in ii.split()[4:7]]) posis.append([float(jj) for jj in ii.split()[2:5]]) return np.array(posis) -def get_lmpbox(lines) : + +def get_lmpbox(lines): box_info = [] tilt = np.zeros(3) - for ii in lines : - if 'xlo' in ii and 'xhi' in ii : + for ii in lines: + if "xlo" in ii and "xhi" in ii: box_info.append([float(ii.split()[0]), float(ii.split()[1])]) break - for ii in lines : - if 'ylo' in ii and 'yhi' in ii : + for ii in lines: + if "ylo" in ii and "yhi" in ii: box_info.append([float(ii.split()[0]), float(ii.split()[1])]) break - for ii in lines : - if 'zlo' in ii and 'zhi' in ii : + for ii in lines: + if "zlo" in ii and "zhi" in ii: box_info.append([float(ii.split()[0]), float(ii.split()[1])]) break - for ii in lines : - if 'xy' in ii and 'xz' in ii and 'yz' in ii : + for ii in lines: + if "xy" in ii and "xz" in ii and "yz" in ii: tilt = np.array([float(jj) for jj in ii.split()[0:3]]) return box_info, tilt -def system_data(lines, type_map = None, type_idx_zero = True) : +def system_data(lines, type_map=None, type_idx_zero=True): system = {} - system['atom_numbs'] = get_natoms_vec(lines) - system['atom_names'] = [] - if type_map == None : - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append('Type_%d' % ii) - else : - assert(len(type_map) >= len(system['atom_numbs'])) - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append(type_map[ii]) + system["atom_numbs"] = get_natoms_vec(lines) + system["atom_names"] = [] + if type_map == None: + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append("Type_%d" % ii) + else: + assert len(type_map) >= len(system["atom_numbs"]) + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append(type_map[ii]) lohi, tilt = get_lmpbox(lines) orig, cell = lmpbox2box(lohi, tilt) - system['orig'] = np.array(orig) - system['cells'] = [np.array(cell)] - natoms = sum(system['atom_numbs']) - system['atom_types'] = get_atype(lines, type_idx_zero = type_idx_zero) - system['coords'] = [get_posi(lines)] - system['cells'] = np.array(system['cells']) - system['coords'] = np.array(system['coords']) + system["orig"] = np.array(orig) + system["cells"] = [np.array(cell)] + natoms = sum(system["atom_numbs"]) + system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero) + system["coords"] = [get_posi(lines)] + system["cells"] = np.array(system["cells"]) + system["coords"] = np.array(system["coords"]) return system -def to_system_data(lines, type_map = None, type_idx_zero = True) : - return system_data(lines, type_map = type_map, type_idx_zero = type_idx_zero) - -def from_system_data(system, f_idx = 0) : - ret = '' - ret += '\n' - natoms = sum(system['atom_numbs']) - ntypes = len(system['atom_numbs']) - ret += '%d atoms\n' % natoms - ret += '%d atom types\n' % ntypes - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' xlo xhi\n') % (0, system['cells'][f_idx][0][0]) - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' ylo yhi\n') % (0, system['cells'][f_idx][1][1]) - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' zlo zhi\n') % (0, system['cells'][f_idx][2][2]) - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' ' + ptr_float_fmt + ' xy xz yz\n') % \ - (system['cells'][f_idx][1][0], system['cells'][f_idx][2][0], system['cells'][f_idx][2][1]) - ret += '\n' - ret += 'Atoms # atomic\n' - ret += '\n' - coord_fmt = ptr_int_fmt + ' ' + ptr_int_fmt + ' ' + ptr_float_fmt + ' ' + ptr_float_fmt + ' ' + ptr_float_fmt + '\n' - for ii in range(natoms) : - ret += coord_fmt % \ - (ii+1, - system['atom_types'][ii] + 1, - system['coords'][f_idx][ii][0] - system['orig'][0], - system['coords'][f_idx][ii][1] - system['orig'][1], - system['coords'][f_idx][ii][2] - system['orig'][2] + +def to_system_data(lines, type_map=None, type_idx_zero=True): + return system_data(lines, type_map=type_map, type_idx_zero=type_idx_zero) + + +def from_system_data(system, f_idx=0): + ret = "" + ret += "\n" + natoms = sum(system["atom_numbs"]) + ntypes = len(system["atom_numbs"]) + ret += "%d atoms\n" % natoms + ret += "%d atom types\n" % ntypes + ret += (ptr_float_fmt + " " + ptr_float_fmt + " xlo xhi\n") % ( + 0, + system["cells"][f_idx][0][0], + ) + ret += (ptr_float_fmt + " " + ptr_float_fmt + " ylo yhi\n") % ( + 0, + system["cells"][f_idx][1][1], + ) + ret += (ptr_float_fmt + " " + ptr_float_fmt + " zlo zhi\n") % ( + 0, + system["cells"][f_idx][2][2], + ) + ret += ( + ptr_float_fmt + " " + ptr_float_fmt + " " + ptr_float_fmt + " xy xz yz\n" + ) % ( + system["cells"][f_idx][1][0], + system["cells"][f_idx][2][0], + system["cells"][f_idx][2][1], + ) + ret += "\n" + ret += "Atoms # atomic\n" + ret += "\n" + coord_fmt = ( + ptr_int_fmt + + " " + + ptr_int_fmt + + " " + + ptr_float_fmt + + " " + + ptr_float_fmt + + " " + + ptr_float_fmt + + "\n" + ) + for ii in range(natoms): + ret += coord_fmt % ( + ii + 1, + system["atom_types"][ii] + 1, + system["coords"][f_idx][ii][0] - system["orig"][0], + system["coords"][f_idx][ii][1] - system["orig"][1], + system["coords"][f_idx][ii][2] - system["orig"][2], ) return ret -if __name__ == '__main__' : - fname = 'water-SPCE.data' - lines = open(fname).read().split('\n') +if __name__ == "__main__": + fname = "water-SPCE.data" + lines = open(fname).read().split("\n") bonds, tilt = get_lmpbox(lines) # print(bonds, tilt) orig, box = lmpbox2box(bonds, tilt) diff --git a/dpdata/md/msd.py b/dpdata/md/msd.py index 0286b8ab..eebc7296 100644 --- a/dpdata/md/msd.py +++ b/dpdata/md/msd.py @@ -1,53 +1,55 @@ import numpy as np from .pbc import system_pbc_shift + def _msd(coords, cells, pbc_shift, begin): nframes = cells.shape[0] natoms = coords.shape[1] ff = begin prev_coord = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) - msds = [0.] - for ff in range(begin+1,nframes) : + msds = [0.0] + for ff in range(begin + 1, nframes): curr_coord = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) diff_coord = curr_coord - prev_coord msds.append(np.sum(diff_coord * diff_coord) / natoms) return np.array(msds) + def _msd_win(coords, cells, pbc_shift, begin, window): nframes = cells.shape[0] natoms = coords.shape[1] ncoords = np.zeros(coords.shape) msd = np.zeros([window]) - for ff in range(nframes) : - ncoords[ff] = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) + for ff in range(nframes): + ncoords[ff] = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) cc = 0 - for ii in range(begin,nframes-window+1) : + for ii in range(begin, nframes - window + 1): start = np.tile(ncoords[ii], (window, 1, 1)) - diff_coord = ncoords[ii:ii+window] - start + diff_coord = ncoords[ii : ii + window] - start diff_coord = np.reshape(diff_coord, [-1, natoms * 3]) - msd += np.sum(diff_coord * diff_coord, axis = 1) / natoms + msd += np.sum(diff_coord * diff_coord, axis=1) / natoms cc += 1 return np.array(msd) / cc -def msd(system, sel = None, begin = 0, window = 0) : - natoms = system.get_natoms() - if sel is None : + +def msd(system, sel=None, begin=0, window=0): + natoms = system.get_natoms() + if sel is None: sel_idx = np.arange(natoms) - else : + else: sel_idx = [] - for ii in range(natoms) : - if sel[ii] : + for ii in range(natoms): + if sel[ii]: sel_idx.append(ii) - sel_idx = np.array(sel_idx, dtype = int) + sel_idx = np.array(sel_idx, dtype=int) nsel = sel_idx.size nframes = system.get_nframes() pbc_shift = system_pbc_shift(system) - coords = system['coords'] - cells = system['cells'] - pbc_shift = pbc_shift[:,sel_idx,:] - coords = coords[:,sel_idx,:] - if window <= 0 : + coords = system["coords"] + cells = system["cells"] + pbc_shift = pbc_shift[:, sel_idx, :] + coords = coords[:, sel_idx, :] + if window <= 0: return _msd(coords, cells, pbc_shift, begin) - else : + else: return _msd_win(coords, cells, pbc_shift, begin, window) - diff --git a/dpdata/md/pbc.py b/dpdata/md/pbc.py index b3318aaf..4eee7c65 100644 --- a/dpdata/md/pbc.py +++ b/dpdata/md/pbc.py @@ -1,66 +1,63 @@ import numpy as np -def posi_diff(box, r0, r1) : +def posi_diff(box, r0, r1): rbox = np.linalg.inv(box) rbox = rbox.T - p0 = (np.dot(rbox, r0)) - p1 = (np.dot(rbox, r1)) + p0 = np.dot(rbox, r0) + p1 = np.dot(rbox, r1) dp = p0 - p1 shift = np.zeros(3) - for dd in range(3) : - if dp[dd] >= 0.5 : + for dd in range(3): + if dp[dd] >= 0.5: dp[dd] -= 1 - elif dp[dd] < -0.5 : + elif dp[dd] < -0.5: dp[dd] += 1 - dr = np.dot(box.T, dp) + dr = np.dot(box.T, dp) return dr -def posi_shift(box, r0, r1) : +def posi_shift(box, r0, r1): rbox = np.linalg.inv(box) rbox = rbox.T - p0 = (np.dot(rbox, r0)) - p1 = (np.dot(rbox, r1)) + p0 = np.dot(rbox, r0) + p1 = np.dot(rbox, r1) dp = p0 - p1 shift = np.zeros(3) - for dd in range(3) : - if dp[dd] >= 0.5 : + for dd in range(3): + if dp[dd] >= 0.5: shift[dd] -= 1 - elif dp[dd] < -0.5 : + elif dp[dd] < -0.5: shift[dd] += 1 return shift -def dir_coord(coord, box) : +def dir_coord(coord, box): rbox = np.linalg.inv(box) return np.matmul(coord, rbox) -def system_pbc_shift(system) : +def system_pbc_shift(system): f_idx = 0 - prev_ncoord = dir_coord(system['coords'][f_idx], - system['cells' ][f_idx]) - shifts = np.zeros([system.get_nframes(), system.get_natoms(), 3], dtype = int) - curr_shift = np.zeros([system.get_natoms(), 3], dtype = int) + prev_ncoord = dir_coord(system["coords"][f_idx], system["cells"][f_idx]) + shifts = np.zeros([system.get_nframes(), system.get_natoms(), 3], dtype=int) + curr_shift = np.zeros([system.get_natoms(), 3], dtype=int) half = 0.5 * np.ones([system.get_natoms(), 3]) - for ii in range(system.get_nframes()) : - curr_ncoord = dir_coord(system['coords'][ii], - system['cells' ][ii]) + for ii in range(system.get_nframes()): + curr_ncoord = dir_coord(system["coords"][ii], system["cells"][ii]) diff_ncoord = curr_ncoord - prev_ncoord - curr_shift -= (diff_ncoord > half) - curr_shift += (diff_ncoord <-half) + curr_shift -= diff_ncoord > half + curr_shift += diff_ncoord < -half shifts[ii] = np.copy(curr_shift) prev_ncoord = curr_ncoord - return np.array(shifts, dtype = int) + return np.array(shifts, dtype=int) -def apply_pbc(system_coords, system_cells) : +def apply_pbc(system_coords, system_cells): coords = [] nframes = system_cells.shape[0] - for ff in range(nframes) : - ncoord = dir_coord(system_coords[ff], - system_cells[ff]) + for ff in range(nframes): + ncoord = dir_coord(system_coords[ff], system_cells[ff]) ncoord = ncoord % 1 coords.append(np.matmul(ncoord, system_cells[ff])) return np.array(coords) diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py index e1fd50a2..220bdcb0 100644 --- a/dpdata/md/rdf.py +++ b/dpdata/md/rdf.py @@ -1,9 +1,7 @@ import numpy as np -def rdf(sys, - sel_type = [None, None], - max_r = 5, - nbins = 100) : + +def rdf(sys, sel_type=[None, None], max_r=5, nbins=100): """ compute the rdf of a system @@ -12,9 +10,9 @@ def rdf(sys, sys : System or LabeledSystem The dpdata system sel_type: list - List of size 2. The first element specifies the type of the first atom, - while the second element specifies the type of the second atom. - Both elements can be ints or list of ints. + List of size 2. The first element specifies the type of the first atom, + while the second element specifies the type of the second atom. + Both elements can be ints or list of ints. If the element is None, all types are specified. Examples are sel_type = [0, 0], sel_type = [0, [0, 1]] or sel_type = [0, None] max_r: float @@ -31,37 +29,35 @@ def rdf(sys, coord: np.array The coordination number up to r """ - return compute_rdf(sys['cells'], sys['coords'], sys['atom_types'], - sel_type = sel_type, - max_r = max_r, - nbins = nbins) + return compute_rdf( + sys["cells"], + sys["coords"], + sys["atom_types"], + sel_type=sel_type, + max_r=max_r, + nbins=nbins, + ) + -def compute_rdf(box, - posis, - atype, - sel_type = [None, None], - max_r = 5, - nbins = 100) : +def compute_rdf(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100): nframes = box.shape[0] xx = None all_rdf = [] all_cod = [] for ii in range(nframes): - xx, rdf, cod = _compute_rdf_1frame(box[ii], posis[ii], atype, sel_type, max_r, nbins) + xx, rdf, cod = _compute_rdf_1frame( + box[ii], posis[ii], atype, sel_type, max_r, nbins + ) all_rdf.append(rdf) all_cod.append(cod) all_rdf = np.array(all_rdf).reshape([nframes, -1]) all_cod = np.array(all_cod).reshape([nframes, -1]) - all_rdf = np.average(all_rdf, axis = 0) - all_cod = np.average(all_cod, axis = 0) + all_rdf = np.average(all_rdf, axis=0) + all_cod = np.average(all_cod, axis=0) return xx, all_rdf, all_cod -def _compute_rdf_1frame(box, - posis, - atype, - sel_type = [None, None], - max_r = 5, - nbins = 100) : + +def _compute_rdf_1frame(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100): all_types = list(set(list(np.sort(atype)))) if sel_type[0] is None: sel_type[0] = all_types @@ -73,14 +69,20 @@ def _compute_rdf_1frame(box, sel_type[1] = [sel_type[1]] natoms = len(posis) from ase import Atoms - import ase.neighborlist - atoms = Atoms(positions=posis, cell=box, pbc=[1,1,1]) - nlist = ase.neighborlist.NeighborList(max_r, self_interaction=False, bothways=True, primitive=ase.neighborlist.NewPrimitiveNeighborList) + import ase.neighborlist + + atoms = Atoms(positions=posis, cell=box, pbc=[1, 1, 1]) + nlist = ase.neighborlist.NeighborList( + max_r, + self_interaction=False, + bothways=True, + primitive=ase.neighborlist.NewPrimitiveNeighborList, + ) nlist.update(atoms) stat = np.zeros(nbins) stat_acc = np.zeros(nbins) hh = max_r / float(nbins) - for ii in range(natoms) : + for ii in range(natoms): # atom "0" if atype[ii] in sel_type[0]: indices, offsets = nlist.get_neighbors(ii) @@ -89,7 +91,7 @@ def _compute_rdf_1frame(box, if atype[jj] in sel_type[1]: posi_jj = atoms.positions[jj] + np.dot(os, atoms.get_cell()) diff = posi_jj - atoms.positions[ii] - dr = np.linalg.norm(diff) + dr = np.linalg.norm(diff) # if (np.linalg.norm(diff- diff_1)) > 1e-12 : # raise RuntimeError si = int(dr / hh) @@ -106,19 +108,21 @@ def _compute_rdf_1frame(box, rho1 = c1 / np.linalg.det(box) # compute coordination number for ii in range(1, nbins): - stat_acc[ii] = stat_acc[ii-1] + stat[ii-1] + stat_acc[ii] = stat_acc[ii - 1] + stat[ii - 1] stat_acc = stat_acc / c0 # compute rdf for ii in range(nbins): - vol = 4./3. * np.pi * ( ((ii+1)*hh) ** 3 - ((ii)*hh) ** 3 ) + vol = 4.0 / 3.0 * np.pi * (((ii + 1) * hh) ** 3 - ((ii) * hh) ** 3) rho = stat[ii] / vol stat[ii] = rho / rho1 / c0 - xx = np.arange(0, max_r-1e-12, hh) + xx = np.arange(0, max_r - 1e-12, hh) return xx, stat, stat_acc -if __name__ == '__main__': + +if __name__ == "__main__": import dpdata - sys = dpdata.System('out.lmp') - xx, stat = rdf(sys, sel_type = [[0], None], max_r = 8, nbins = 100) + + sys = dpdata.System("out.lmp") + xx, stat = rdf(sys, sel_type=[[0], None], max_r=8, nbins=100) res = np.concatenate([xx, stat]).reshape([2, -1]) - np.savetxt('rdf.out', res.T) + np.savetxt("rdf.out", res.T) diff --git a/dpdata/md/water.py b/dpdata/md/water.py index 75ad1ad2..b9ab833b 100644 --- a/dpdata/md/water.py +++ b/dpdata/md/water.py @@ -2,114 +2,107 @@ from .pbc import posi_diff from .pbc import posi_shift -def compute_bonds (box, - posis, - atype, - oh_sel = [0,1], - max_roh = 1.3, - uniq_hbond = True): - try : + +def compute_bonds(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): + try: import ase - import ase.neighborlist + import ase.neighborlist + # nlist implemented by ase - return compute_bonds_ase(box, posis, atype, oh_sel, max_roh, uniq_hbond) + return compute_bonds_ase(box, posis, atype, oh_sel, max_roh, uniq_hbond) except ImportError: # nlist naivly implemented , scales as O(N^2) return compute_bonds_naive(box, posis, atype, oh_sel, max_roh, uniq_hbond) -def compute_bonds_ase(box, - posis, - atype, - oh_sel = [0,1], - max_roh = 1.3, - uniq_hbond = True): +def compute_bonds_ase(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): natoms = len(posis) from ase import Atoms - import ase.neighborlist - atoms = Atoms(positions=posis, cell=box, pbc=[1,1,1]) - nlist = ase.neighborlist.NeighborList(max_roh, self_interaction=False, bothways=True, primitive=ase.neighborlist.NewPrimitiveNeighborList) + import ase.neighborlist + + atoms = Atoms(positions=posis, cell=box, pbc=[1, 1, 1]) + nlist = ase.neighborlist.NeighborList( + max_roh, + self_interaction=False, + bothways=True, + primitive=ase.neighborlist.NewPrimitiveNeighborList, + ) nlist.update(atoms) bonds = [] o_type = oh_sel[0] h_type = oh_sel[1] - for ii in range(natoms) : + for ii in range(natoms): bonds.append([]) - for ii in range(natoms) : - if atype[ii] == o_type : + for ii in range(natoms): + if atype[ii] == o_type: nn, ss = nlist.get_neighbors(ii) for jj in nn: - if atype[jj] == h_type : + if atype[jj] == h_type: dr = posi_diff(box, posis[ii], posis[jj]) - if np.linalg.norm(dr) < max_roh : + if np.linalg.norm(dr) < max_roh: bonds[ii].append(jj) bonds[jj].append(ii) - if uniq_hbond : - for jj in range(natoms) : - if atype[jj] == h_type : - if len(bonds[jj]) > 1 : + if uniq_hbond: + for jj in range(natoms): + if atype[jj] == h_type: + if len(bonds[jj]) > 1: orig_bonds = bonds[jj] min_bd = 1e10 min_idx = -1 - for ii in bonds[jj] : + for ii in bonds[jj]: dr = posi_diff(box, posis[ii], posis[jj]) drr = np.linalg.norm(dr) # print(ii,jj, posis[ii], posis[jj], drr) - if drr < min_bd : + if drr < min_bd: min_idx = ii min_bd = drr bonds[jj] = [min_idx] orig_bonds.remove(min_idx) # print(min_idx, orig_bonds, bonds[jj]) - for ii in orig_bonds : + for ii in orig_bonds: bonds[ii].remove(jj) return bonds - -def compute_bonds_naive(box, - posis, - atype, - oh_sel = [0,1], - max_roh = 1.3, - uniq_hbond = True): + +def compute_bonds_naive(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): natoms = len(posis) bonds = [] o_type = oh_sel[0] h_type = oh_sel[1] - for ii in range(natoms) : + for ii in range(natoms): bonds.append([]) - for ii in range(natoms) : - if atype[ii] == o_type : - for jj in range(natoms) : - if atype[jj] == h_type : + for ii in range(natoms): + if atype[ii] == o_type: + for jj in range(natoms): + if atype[jj] == h_type: dr = posi_diff(box, posis[ii], posis[jj]) - if np.linalg.norm(dr) < max_roh : + if np.linalg.norm(dr) < max_roh: bonds[ii].append(jj) bonds[jj].append(ii) - if uniq_hbond : - for jj in range(natoms) : - if atype[jj] == h_type : - if len(bonds[jj]) > 1 : + if uniq_hbond: + for jj in range(natoms): + if atype[jj] == h_type: + if len(bonds[jj]) > 1: orig_bonds = bonds[jj] min_bd = 1e10 min_idx = -1 - for ii in bonds[jj] : + for ii in bonds[jj]: dr = posi_diff(box, posis[ii], posis[jj]) drr = np.linalg.norm(dr) # print(ii,jj, posis[ii], posis[jj], drr) - if drr < min_bd : + if drr < min_bd: min_idx = ii min_bd = drr bonds[jj] = [min_idx] orig_bonds.remove(min_idx) # print(min_idx, orig_bonds, bonds[jj]) - for ii in orig_bonds : + for ii in orig_bonds: bonds[ii].remove(jj) return bonds -# def ions_count (atype, -# bonds, +# def ions_count (atype, +# bonds, # oh_sel = [0, 1]) : # no = 0 # noh = 0 @@ -146,10 +139,8 @@ def compute_bonds_naive(box, # raise RuntimeError("unknow case: numb of O bonded to H > 1") # return no, noh, noh2, noh3, nh -def find_ions (atype, - bonds, - oh_sel = [0, 1], - ret_h2o = True) : + +def find_ions(atype, bonds, oh_sel=[0, 1], ret_h2o=True): no = [] noh = [] noh2 = [] @@ -158,37 +149,34 @@ def find_ions (atype, natoms = len(atype) o_type = oh_sel[0] h_type = oh_sel[1] - for ii in range(natoms) : - if atype[ii] == o_type : - if len(bonds[ii] ) == 0 : + for ii in range(natoms): + if atype[ii] == o_type: + if len(bonds[ii]) == 0: no.append(ii) - elif len(bonds[ii] ) == 1 : + elif len(bonds[ii]) == 1: noh.append(ii) - elif len(bonds[ii] ) == 2 : - if ret_h2o : + elif len(bonds[ii]) == 2: + if ret_h2o: noh2.append(ii) - elif len(bonds[ii] ) == 3 : + elif len(bonds[ii]) == 3: noh3.append(ii) - else : + else: raise RuntimeError("unknow case: numb of H bonded to O > 3") - for ii in range(natoms) : - if atype[ii] == h_type : - if len(bonds[ii] ) == 0 : + for ii in range(natoms): + if atype[ii] == h_type: + if len(bonds[ii]) == 0: nh.append(ii) - elif len(bonds[ii] ) == 1 : + elif len(bonds[ii]) == 1: pass - else : + else: raise RuntimeError("unknow case: numb of O bonded to H > 1") return no, noh, noh2, noh3, nh - -def pbc_coords(cells, - coords, - atom_types, - oh_sel = [0, 1], - max_roh = 1.3): - bonds = compute_bonds(cells, coords, atom_types, oh_sel = oh_sel, max_roh = max_roh, uniq_hbond = True) +def pbc_coords(cells, coords, atom_types, oh_sel=[0, 1], max_roh=1.3): + bonds = compute_bonds( + cells, coords, atom_types, oh_sel=oh_sel, max_roh=max_roh, uniq_hbond=True + ) new_coords = np.copy(coords) natoms = len(atom_types) @@ -196,10 +184,9 @@ def pbc_coords(cells, h_type = oh_sel[1] for ii in range(natoms): if atom_types[ii] == o_type: - assert(len(bonds[ii]) == 2), 'O has more than 2 bonded Hs, stop' + assert len(bonds[ii]) == 2, "O has more than 2 bonded Hs, stop" for jj in bonds[ii]: - assert(atom_types[jj] == h_type), 'The atom bonded to O is not H, stop' + assert atom_types[jj] == h_type, "The atom bonded to O is not H, stop" shift = posi_shift(cells, coords[jj], coords[ii]) new_coords[jj] = coords[jj] + np.matmul(shift, cells) return new_coords - diff --git a/dpdata/periodic_table.json b/dpdata/periodic_table.json index 69c55325..7a055ad6 100644 --- a/dpdata/periodic_table.json +++ b/dpdata/periodic_table.json @@ -823,4 +823,4 @@ "radius": null, "calculated_radius": null } -} \ No newline at end of file +} diff --git a/dpdata/periodic_table.py b/dpdata/periodic_table.py index df8a5038..dc64d40a 100644 --- a/dpdata/periodic_table.py +++ b/dpdata/periodic_table.py @@ -1,38 +1,138 @@ from pathlib import Path -from monty.serialization import loadfn,dumpfn +from monty.serialization import loadfn, dumpfn -fpdt=str(Path(__file__).absolute().parent / "periodic_table.json") -_pdt=loadfn(fpdt) -ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', \ - 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag',\ - 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb',\ - 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', \ - 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'] +fpdt = str(Path(__file__).absolute().parent / "periodic_table.json") +_pdt = loadfn(fpdt) +ELEMENTS = [ + "H", + "He", + "Li", + "Be", + "B", + "C", + "N", + "O", + "F", + "Ne", + "Na", + "Mg", + "Al", + "Si", + "P", + "S", + "Cl", + "Ar", + "K", + "Ca", + "Sc", + "Ti", + "V", + "Cr", + "Mn", + "Fe", + "Co", + "Ni", + "Cu", + "Zn", + "Ga", + "Ge", + "As", + "Se", + "Br", + "Kr", + "Rb", + "Sr", + "Y", + "Zr", + "Nb", + "Mo", + "Tc", + "Ru", + "Rh", + "Pd", + "Ag", + "Cd", + "In", + "Sn", + "Sb", + "Te", + "I", + "Xe", + "Cs", + "Ba", + "La", + "Ce", + "Pr", + "Nd", + "Pm", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Ho", + "Er", + "Tm", + "Yb", + "Lu", + "Hf", + "Ta", + "W", + "Re", + "Os", + "Ir", + "Pt", + "Au", + "Hg", + "Tl", + "Pb", + "Bi", + "Po", + "At", + "Rn", + "Fr", + "Ra", + "Ac", + "Th", + "Pa", + "U", + "Np", + "Pu", + "Am", + "Cm", + "Bk", + "Cf", + "Es", + "Fm", + "Md", + "No", + "Lr", +] -class Element: +class Element: def __init__(self, symbol: str): assert symbol in ELEMENTS self.symbol = "%s" % symbol d = _pdt[symbol] - self._Z = d['atomic_no'] - self._name = d['name'] - self._X = d['X'] - self._mass = d['atomic_mass'] - self._r = d['radius'] + self._Z = d["atomic_no"] + self._name = d["name"] + self._X = d["X"] + self._mass = d["atomic_mass"] + self._r = d["radius"] self._cr = d["calculated_radius"] def __str__(self): return self.symbol def __repr__(self): - return "Element : %s"%self.symbol - + return "Element : %s" % self.symbol + @classmethod - def from_Z(cls,Z): - assert(Z>0) - assert(Z 0 + assert Z < len(ELEMENTS) + return cls(ELEMENTS[Z - 1]) @property def Z(self): diff --git a/dpdata/plugin.py b/dpdata/plugin.py index d000f558..4f163ced 100644 --- a/dpdata/plugin.py +++ b/dpdata/plugin.py @@ -12,22 +12,25 @@ def xxx(): pass >>> print(Plugin.plugins['xx']) """ + def __init__(self): self.plugins = {} def register(self, key): """Register a plugin. - + Parameter --------- key: str Key of the plugin. """ + def decorator(object): self.plugins[key] = object return object + return decorator - + def get_plugin(self, key): return self.plugins[key] diff --git a/dpdata/plugins/3dmol.py b/dpdata/plugins/3dmol.py index c3089329..fa9f02aa 100644 --- a/dpdata/plugins/3dmol.py +++ b/dpdata/plugins/3dmol.py @@ -11,12 +11,15 @@ class Py3DMolFormat(Format): To use this format, py3Dmol should be installed in advance. """ - def to_system(self, - data: dict, - f_idx: int = 0, - size: Tuple[int] = (300,300), - style: dict = {"stick":{}, "sphere":{"radius":0.4}}, - **kwargs): + + def to_system( + self, + data: dict, + f_idx: int = 0, + size: Tuple[int] = (300, 300), + style: dict = {"stick": {}, "sphere": {"radius": 0.4}}, + **kwargs + ): """Show 3D structure of a frame in jupyter. Parameters @@ -35,10 +38,11 @@ def to_system(self, >>> system.to_3dmol() """ import py3Dmol - types = np.array(data['atom_names'])[data['atom_types']] - xyz = coord_to_xyz(data['coords'][f_idx], types) + + types = np.array(data["atom_names"])[data["atom_types"]] + xyz = coord_to_xyz(data["coords"][f_idx], types) viewer = py3Dmol.view(width=size[0], height=size[1]) - viewer.addModel(xyz, 'xyz') + viewer.addModel(xyz, "xyz") viewer.setStyle(style.copy()) viewer.zoomTo() return viewer diff --git a/dpdata/plugins/__init__.py b/dpdata/plugins/__init__.py index ca097fd7..66364aa2 100644 --- a/dpdata/plugins/__init__.py +++ b/dpdata/plugins/__init__.py @@ -1,8 +1,9 @@ import importlib from pathlib import Path + try: from importlib import metadata -except ImportError: # for Python<3.8 +except ImportError: # for Python<3.8 import importlib_metadata as metadata PACKAGE_BASE = "dpdata.plugins" @@ -15,8 +16,8 @@ # https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html try: - eps = metadata.entry_points(group='dpdata.plugins') + eps = metadata.entry_points(group="dpdata.plugins") except TypeError: - eps = metadata.entry_points().get('dpdata.plugins', []) + eps = metadata.entry_points().get("dpdata.plugins", []) for ep in eps: plugin = ep.load() diff --git a/dpdata/plugins/abacus.py b/dpdata/plugins/abacus.py index c219053b..a9c82b05 100644 --- a/dpdata/plugins/abacus.py +++ b/dpdata/plugins/abacus.py @@ -3,12 +3,13 @@ import dpdata.abacus.relax from dpdata.format import Format + @Format.register("abacus/stru") @Format.register("stru") class AbacusSTRUFormat(Format): def from_system(self, file_name, **kwargs): return dpdata.abacus.scf.get_frame_from_stru(file_name) - + def to_system(self, data, file_name, frame_idx=0, **kwargs): """ Dump the system into ABACUS STRU format file. @@ -28,35 +29,45 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): numerical_descriptor: str, optional numerical descriptor file """ - - pp_file = kwargs.get('pp_file') - numerical_orbital = kwargs.get('numerical_orbital') - mass = kwargs.get('mass') - numerical_descriptor = kwargs.get('numerical_descriptor') - stru_string = dpdata.abacus.scf.make_unlabeled_stru(data=data, frame_idx=frame_idx, pp_file=pp_file, numerical_orbital=numerical_orbital, numerical_descriptor=numerical_descriptor, mass=mass) + + pp_file = kwargs.get("pp_file") + numerical_orbital = kwargs.get("numerical_orbital") + mass = kwargs.get("mass") + numerical_descriptor = kwargs.get("numerical_descriptor") + stru_string = dpdata.abacus.scf.make_unlabeled_stru( + data=data, + frame_idx=frame_idx, + pp_file=pp_file, + numerical_orbital=numerical_orbital, + numerical_descriptor=numerical_descriptor, + mass=mass, + ) with open(file_name, "w") as fp: fp.write(stru_string) + @Format.register("abacus/scf") @Format.register("abacus/pw/scf") @Format.register("abacus/lcao/scf") class AbacusSCFFormat(Format): - #@Format.post("rot_lower_triangular") + # @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): return dpdata.abacus.scf.get_frame(file_name) + @Format.register("abacus/md") @Format.register("abacus/pw/md") @Format.register("abacus/lcao/md") class AbacusMDFormat(Format): - #@Format.post("rot_lower_triangular") + # @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): return dpdata.abacus.md.get_frame(file_name) + @Format.register("abacus/relax") @Format.register("abacus/pw/relax") @Format.register("abacus/lcao/relax") class AbacusRelaxFormat(Format): - #@Format.post("rot_lower_triangular") + # @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): - return dpdata.abacus.relax.get_frame(file_name) + return dpdata.abacus.relax.get_frame(file_name) diff --git a/dpdata/plugins/amber.py b/dpdata/plugins/amber.py index 4fe41c1e..cf2df3ca 100644 --- a/dpdata/plugins/amber.py +++ b/dpdata/plugins/amber.py @@ -10,15 +10,37 @@ @Format.register("amber/md") class AmberMDFormat(Format): - def from_system(self, file_name=None, parm7_file=None, nc_file=None, use_element_symbols=None, **kwargs): + def from_system( + self, + file_name=None, + parm7_file=None, + nc_file=None, + use_element_symbols=None, + **kwargs, + ): # assume the prefix is the same if the spefic name is not given if parm7_file is None: parm7_file = file_name + ".parm7" if nc_file is None: nc_file = file_name + ".nc" - return dpdata.amber.md.read_amber_traj(parm7_file=parm7_file, nc_file=nc_file, use_element_symbols=use_element_symbols, labeled=False) - - def from_labeled_system(self, file_name=None, parm7_file=None, nc_file=None, mdfrc_file=None, mden_file=None, mdout_file=None, use_element_symbols=None, **kwargs): + return dpdata.amber.md.read_amber_traj( + parm7_file=parm7_file, + nc_file=nc_file, + use_element_symbols=use_element_symbols, + labeled=False, + ) + + def from_labeled_system( + self, + file_name=None, + parm7_file=None, + nc_file=None, + mdfrc_file=None, + mden_file=None, + mdout_file=None, + use_element_symbols=None, + **kwargs, + ): # assume the prefix is the same if the spefic name is not given if parm7_file is None: parm7_file = file_name + ".parm7" @@ -30,25 +52,28 @@ def from_labeled_system(self, file_name=None, parm7_file=None, nc_file=None, mdf mden_file = file_name + ".mden" if mdout_file is None: mdout_file = file_name + ".mdout" - return dpdata.amber.md.read_amber_traj(parm7_file, nc_file, mdfrc_file, mden_file, mdout_file, use_element_symbols) + return dpdata.amber.md.read_amber_traj( + parm7_file, nc_file, mdfrc_file, mden_file, mdout_file, use_element_symbols + ) @Format.register("sqm/out") class SQMOutFormat(Format): def from_system(self, fname, **kwargs): - ''' + """ Read from ambertools sqm.out - ''' + """ return dpdata.amber.sqm.parse_sqm_out(fname) - + def from_labeled_system(self, fname, **kwargs): - ''' + """ Read from ambertools sqm.out - ''' + """ data = dpdata.amber.sqm.parse_sqm_out(fname) assert "forces" in list(data.keys()), f"No forces in {fname}" return data + @Format.register("sqm/in") class SQMINFormat(Format): def to_system(self, data, fname=None, frame_idx=0, **kwargs): @@ -85,14 +110,14 @@ def to_system(self, data, fname=None, frame_idx=0, **kwargs): @Driver.register("sqm") class SQMDriver(Driver): """AMBER sqm program driver. - + Parameters ---------- sqm_exec : str, default=sqm path to sqm program **kwargs : dict other arguments to make input files. See :class:`SQMINFormat` - + Examples -------- Use DFTB3 method to calculate potential energy: @@ -101,7 +126,8 @@ class SQMDriver(Driver): >>> labeled_system['energies'][0] -15.41111246 """ - def __init__(self, sqm_exec: str="sqm", **kwargs: dict) -> None: + + def __init__(self, sqm_exec: str = "sqm", **kwargs: dict) -> None: self.sqm_exec = sqm_exec self.kwargs = kwargs @@ -114,12 +140,14 @@ def label(self, data: dict) -> dict: out_fn = os.path.join(d, "%d.out" % ii) ss.to("sqm/in", inp_fn, **self.kwargs) try: - sp.check_output([*self.sqm_exec.split(), "-O", "-i", inp_fn, "-o", out_fn]) + sp.check_output( + [*self.sqm_exec.split(), "-O", "-i", inp_fn, "-o", out_fn] + ) except sp.CalledProcessError as e: with open(out_fn) as f: raise RuntimeError( "Run sqm failed! Output:\n" + f.read() - ) from e + ) from e labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="sqm/out")) return labeled_system.data @@ -127,12 +155,13 @@ def label(self, data: dict) -> dict: @Minimizer.register("sqm") class SQMMinimizer(Minimizer): """SQM minimizer. - + Parameters ---------- maxcyc : int, default=1000 maximun cycle to minimize """ + def __init__(self, maxcyc=1000, *args, **kwargs) -> None: assert maxcyc > 0, "maxcyc should be more than 0 to minimize" self.driver = SQMDriver(maxcyc=maxcyc, **kwargs) diff --git a/dpdata/plugins/ase.py b/dpdata/plugins/ase.py index fa2093e4..b6e0fcb7 100644 --- a/dpdata/plugins/ase.py +++ b/dpdata/plugins/ase.py @@ -3,9 +3,11 @@ from dpdata.format import Format import numpy as np import dpdata + try: import ase.io from ase.calculators.calculator import PropertyNotImplementedError + if TYPE_CHECKING: from ase.optimize.optimize import Optimizer except ImportError: @@ -39,17 +41,19 @@ def from_system(self, atoms: "ase.Atoms", **kwargs) -> dict: symbols = atoms.get_chemical_symbols() atom_names = list(set(symbols)) atom_numbs = [symbols.count(symbol) for symbol in atom_names] - atom_types = np.array([atom_names.index(symbol) for symbol in symbols]).astype(int) + atom_types = np.array([atom_names.index(symbol) for symbol in symbols]).astype( + int + ) cells = atoms.cell[:] coords = atoms.get_positions() info_dict = { - 'atom_names': atom_names, - 'atom_numbs': atom_numbs, - 'atom_types': atom_types, - 'cells': np.array([cells]).astype('float32'), - 'coords': np.array([coords]).astype('float32'), - 'orig': np.zeros(3), - 'nopbc': not np.any(atoms.get_pbc()), + "atom_names": atom_names, + "atom_numbs": atom_numbs, + "atom_types": atom_types, + "cells": np.array([cells]).astype("float32"), + "coords": np.array([coords]).astype("float32"), + "orig": np.zeros(3), + "nopbc": not np.any(atoms.get_pbc()), } return info_dict @@ -66,7 +70,7 @@ def from_labeled_system(self, atoms: "ase.Atoms", **kwargs) -> dict: ------- dict data dict - + Raises ------ RuntimeError @@ -80,20 +84,28 @@ def from_labeled_system(self, atoms: "ase.Atoms", **kwargs) -> dict: energies = atoms.get_potential_energy() forces = atoms.get_forces() info_dict = { - ** info_dict, - 'energies': np.array([energies]).astype('float32'), - 'forces': np.array([forces]).astype('float32'), + **info_dict, + "energies": np.array([energies]).astype("float32"), + "forces": np.array([forces]).astype("float32"), } try: stress = atoms.get_stress(False) except PropertyNotImplementedError: pass else: - virials = np.array([-atoms.get_volume() * stress]).astype('float32') - info_dict['virials'] = virials + virials = np.array([-atoms.get_volume() * stress]).astype("float32") + info_dict["virials"] = virials return info_dict - def from_multi_systems(self, file_name: str, begin: Optional[int] = None, end: Optional[int] = None, step: Optional[int] = None, ase_fmt: Optional[str] = None, **kwargs) -> "ase.Atoms": + def from_multi_systems( + self, + file_name: str, + begin: Optional[int] = None, + end: Optional[int] = None, + step: Optional[int] = None, + ase_fmt: Optional[str] = None, + **kwargs + ) -> "ase.Atoms": """Convert a ASE supported file to ASE Atoms. It will finally be converted to MultiSystems. @@ -121,48 +133,49 @@ def from_multi_systems(self, file_name: str, begin: Optional[int] = None, end: O yield atoms def to_system(self, data, **kwargs): - ''' + """ convert System to ASE Atom obj - ''' + """ from ase import Atoms structures = [] - species = [data['atom_names'][tt] for tt in data['atom_types']] + species = [data["atom_names"][tt] for tt in data["atom_types"]] - for ii in range(data['coords'].shape[0]): + for ii in range(data["coords"].shape[0]): structure = Atoms( - symbols=species, positions=data['coords'][ii], pbc=not data.get('nopbc', False), cell=data['cells'][ii]) + symbols=species, + positions=data["coords"][ii], + pbc=not data.get("nopbc", False), + cell=data["cells"][ii], + ) structures.append(structure) return structures def to_labeled_system(self, data, *args, **kwargs): - '''Convert System to ASE Atoms object.''' + """Convert System to ASE Atoms object.""" from ase import Atoms from ase.calculators.singlepoint import SinglePointCalculator structures = [] - species = [data['atom_names'][tt] for tt in data['atom_types']] + species = [data["atom_names"][tt] for tt in data["atom_types"]] - for ii in range(data['coords'].shape[0]): + for ii in range(data["coords"].shape[0]): structure = Atoms( symbols=species, - positions=data['coords'][ii], - pbc=not data.get('nopbc', False), - cell=data['cells'][ii] + positions=data["coords"][ii], + pbc=not data.get("nopbc", False), + cell=data["cells"][ii], ) - results = { - 'energy': data["energies"][ii], - 'forces': data["forces"][ii] - } + results = {"energy": data["energies"][ii], "forces": data["forces"][ii]} if "virials" in data: # convert to GPa as this is ase convention # v_pref = 1 * 1e4 / 1.602176621e6 vol = structure.get_volume() # results['stress'] = data["virials"][ii] / (v_pref * vol) - results['stress'] = -data["virials"][ii] / vol + results["stress"] = -data["virials"][ii] / vol structure.calc = SinglePointCalculator(structure, **results) structures.append(structure) @@ -173,7 +186,7 @@ def to_labeled_system(self, data, *args, **kwargs): @Driver.register("ase") class ASEDriver(Driver): """ASE Driver. - + Parameters ---------- calculator : ase.calculators.calculator.Calculato @@ -186,12 +199,12 @@ def __init__(self, calculator: "ase.calculators.calculator.Calculator") -> None: def label(self, data: dict) -> dict: """Label a system data. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -204,7 +217,9 @@ def label(self, data: dict) -> dict: labeled_system = dpdata.LabeledSystem() for atoms in structures: atoms.calc = self.calculator - ls = dpdata.LabeledSystem(atoms, fmt="ase/structure", type_map=data['atom_names']) + ls = dpdata.LabeledSystem( + atoms, fmt="ase/structure", type_map=data["atom_names"] + ) labeled_system.append(ls) return labeled_system.data @@ -226,15 +241,19 @@ class ASEMinimizer(Minimizer): optimizer_kwargs : dict, optional other parameters for optimizer """ - def __init__(self, - driver: Driver, - optimizer: Optional[Type["Optimizer"]] = None, - fmax: float = 5e-3, - max_steps: Optional[int] = None, - optimizer_kwargs: dict = {}) -> None: + + def __init__( + self, + driver: Driver, + optimizer: Optional[Type["Optimizer"]] = None, + fmax: float = 5e-3, + max_steps: Optional[int] = None, + optimizer_kwargs: dict = {}, + ) -> None: self.calculator = driver.ase_calculator if optimizer is None: from ase.optimize import LBFGS + self.optimizer = LBFGS else: self.optimizer = optimizer @@ -252,7 +271,7 @@ def minimize(self, data: dict) -> dict: ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -266,6 +285,8 @@ def minimize(self, data: dict) -> dict: atoms.calc = self.calculator dyn = self.optimizer(atoms, **self.optimizer_kwargs) dyn.run(fmax=self.fmax, steps=self.max_steps) - ls = dpdata.LabeledSystem(atoms, fmt="ase/structure", type_map=data['atom_names']) + ls = dpdata.LabeledSystem( + atoms, fmt="ase/structure", type_map=data["atom_names"] + ) labeled_system.append(ls) return labeled_system.data diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index 8787c7f0..143c1821 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -16,15 +16,16 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): class CP2KOutputFormat(Format): def from_labeled_system(self, file_name, restart=False, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial \ - = dpdata.cp2k.output.get_frames(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.cp2k.output.get_frames(file_name) if tmp_virial is not None: - data['virials'] = tmp_virial + data["virials"] = tmp_virial return data diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py index c3b7f4ca..2e885009 100644 --- a/dpdata/plugins/deepmd.py +++ b/dpdata/plugins/deepmd.py @@ -14,15 +14,18 @@ @Format.register("deepmd/raw") class DeePMDRawFormat(Format): def from_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.raw.to_system_data(file_name, type_map=type_map, labels=False) + return dpdata.deepmd.raw.to_system_data( + file_name, type_map=type_map, labels=False + ) def to_system(self, data, file_name, **kwargs): - """Dump the system in deepmd raw format to directory `file_name` - """ + """Dump the system in deepmd raw format to directory `file_name`""" dpdata.deepmd.raw.dump(file_name, data) def from_labeled_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.raw.to_system_data(file_name, type_map=type_map, labels=True) + return dpdata.deepmd.raw.to_system_data( + file_name, type_map=type_map, labels=True + ) MultiMode = Format.MultiModes.Directory @@ -31,7 +34,9 @@ def from_labeled_system(self, file_name, type_map=None, **kwargs): @Format.register("deepmd/comp") class DeePMDCompFormat(Format): def from_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.comp.to_system_data(file_name, type_map=type_map, labels=False) + return dpdata.deepmd.comp.to_system_data( + file_name, type_map=type_map, labels=False + ) def to_system(self, data, file_name, set_size=5000, prec=np.float64, **kwargs): """ @@ -53,29 +58,37 @@ def to_system(self, data, file_name, set_size=5000, prec=np.float64, **kwargs): prec: {numpy.float32, numpy.float64} The floating point precision of the compressed data """ - dpdata.deepmd.comp.dump( - file_name, data, set_size=set_size, comp_prec=prec) + dpdata.deepmd.comp.dump(file_name, data, set_size=set_size, comp_prec=prec) def from_labeled_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.comp.to_system_data(file_name, type_map=type_map, labels=True) + return dpdata.deepmd.comp.to_system_data( + file_name, type_map=type_map, labels=True + ) MultiMode = Format.MultiModes.Directory + @Format.register("deepmd/hdf5") class DeePMDHDF5Format(Format): """HDF5 format for DeePMD-kit. - + Examples -------- Dump a MultiSystems to a HDF5 file: >>> import dpdata >>> dpdata.MultiSystems().from_deepmd_npy("data").to_deepmd_hdf5("data.hdf5") """ - def _from_system(self, file_name: Union[str, h5py.Group, h5py.File], type_map: List[str], labels: bool): + + def _from_system( + self, + file_name: Union[str, h5py.Group, h5py.File], + type_map: List[str], + labels: bool, + ): """Convert HDF5 file to System or LabeledSystem data. - + This method is used to switch from labeled or non-labeled options. - + Parameters ---------- file_name : str or h5py.Group or h5py.File @@ -97,19 +110,25 @@ def _from_system(self, file_name: Union[str, h5py.Group, h5py.File], type_map: L file_name is not str or h5py.Group or h5py.File """ if isinstance(file_name, (h5py.Group, h5py.File)): - return dpdata.deepmd.hdf5.to_system_data(file_name, "", type_map=type_map, labels=labels) + return dpdata.deepmd.hdf5.to_system_data( + file_name, "", type_map=type_map, labels=labels + ) elif isinstance(file_name, str): s = file_name.split("#") name = s[1] if len(s) > 1 else "" - with h5py.File(s[0], 'r') as f: - return dpdata.deepmd.hdf5.to_system_data(f, name, type_map=type_map, labels=labels) + with h5py.File(s[0], "r") as f: + return dpdata.deepmd.hdf5.to_system_data( + f, name, type_map=type_map, labels=labels + ) else: raise TypeError("Unsupported file_name") - def from_system(self, - file_name: Union[str, h5py.Group, h5py.File], - type_map: Optional[List[str]] = None, - **kwargs) -> dict: + def from_system( + self, + file_name: Union[str, h5py.Group, h5py.File], + type_map: Optional[List[str]] = None, + **kwargs + ) -> dict: """Convert HDF5 file to System data. Parameters @@ -132,10 +151,12 @@ def from_system(self, """ return self._from_system(file_name, type_map=type_map, labels=False) - def from_labeled_system(self, - file_name: Union[str, h5py.Group, h5py.File], - type_map: Optional[List[str]] = None, - **kwargs) -> dict: + def from_labeled_system( + self, + file_name: Union[str, h5py.Group, h5py.File], + type_map: Optional[List[str]] = None, + **kwargs + ) -> dict: """Convert HDF5 file to LabeledSystem data. Parameters @@ -158,14 +179,16 @@ def from_labeled_system(self, """ return self._from_system(file_name, type_map=type_map, labels=True) - def to_system(self, - data : dict, - file_name: Union[str, h5py.Group, h5py.File], - set_size : int = 5000, - comp_prec : np.dtype = np.float64, - **kwargs): + def to_system( + self, + data: dict, + file_name: Union[str, h5py.Group, h5py.File], + set_size: int = 5000, + comp_prec: np.dtype = np.float64, + **kwargs + ): """Convert System data to HDF5 file. - + Parameters ---------- data : dict @@ -179,21 +202,23 @@ def to_system(self, data precision """ if isinstance(file_name, (h5py.Group, h5py.File)): - dpdata.deepmd.hdf5.dump(file_name, "", data, set_size = set_size, comp_prec = comp_prec) + dpdata.deepmd.hdf5.dump( + file_name, "", data, set_size=set_size, comp_prec=comp_prec + ) elif isinstance(file_name, str): s = file_name.split("#") name = s[1] if len(s) > 1 else "" - with h5py.File(s[0], 'w') as f: - dpdata.deepmd.hdf5.dump(f, name, data, set_size = set_size, comp_prec = comp_prec) + with h5py.File(s[0], "w") as f: + dpdata.deepmd.hdf5.dump( + f, name, data, set_size=set_size, comp_prec=comp_prec + ) else: raise TypeError("Unsupported file_name") - def from_multi_systems(self, - directory: str, - **kwargs) -> h5py.Group: + def from_multi_systems(self, directory: str, **kwargs) -> h5py.Group: """Generate HDF5 groups from a HDF5 file, which will be passed to `from_system`. - + Parameters ---------- directory : str @@ -204,29 +229,28 @@ def from_multi_systems(self, h5py.Group a HDF5 group in the HDF5 file """ - with h5py.File(directory, 'r') as f: + with h5py.File(directory, "r") as f: for ff in f.keys(): yield f[ff] - def to_multi_systems(self, - formulas: List[str], - directory: str, - **kwargs) -> h5py.Group: + def to_multi_systems( + self, formulas: List[str], directory: str, **kwargs + ) -> h5py.Group: """Generate HDF5 groups, which will be passed to `to_system`. - + Parameters ---------- formulas : list[str] formulas of MultiSystems directory : str HDF5 file name - + Yields ------ h5py.Group a HDF5 group with the name of formula """ - with h5py.File(directory, 'w') as f: + with h5py.File(directory, "w") as f: for ff in formulas: yield f.create_group(ff) @@ -236,16 +260,17 @@ def to_multi_systems(self, @Driver.register("deepmd-kit") class DPDriver(Driver): """DeePMD-kit driver. - + Parameters ---------- dp : deepmd.DeepPot or str The deepmd-kit potential class or the filename of the model. - + Examples -------- >>> DPDriver("frozen_model.pb") """ + def __init__(self, dp: str) -> None: try: # DP 1.x @@ -257,16 +282,18 @@ def __init__(self, dp: str) -> None: self.dp = DeepPot(dp) else: self.dp = dp - self.enable_auto_batch_size = 'auto_batch_size' in DeepPot.__init__.__code__.co_varnames + self.enable_auto_batch_size = ( + "auto_batch_size" in DeepPot.__init__.__code__.co_varnames + ) def label(self, data: dict) -> dict: """Label a system data by deepmd-kit. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -274,36 +301,38 @@ def label(self, data: dict) -> dict: """ type_map = self.dp.get_type_map() - ori_sys = dpdata.System.from_dict({'data': data}) + ori_sys = dpdata.System.from_dict({"data": data}) ori_sys.sort_atom_names(type_map=type_map) - atype = ori_sys['atom_types'] + atype = ori_sys["atom_types"] if not self.enable_auto_batch_size: labeled_sys = dpdata.LabeledSystem() for ss in ori_sys: - coord = ss['coords'].reshape((1, ss.get_natoms()*3)) + coord = ss["coords"].reshape((1, ss.get_natoms() * 3)) if not ss.nopbc: - cell = ss['cells'].reshape((1, 9)) + cell = ss["cells"].reshape((1, 9)) else: cell = None e, f, v = self.dp.eval(coord, cell, atype) data = ss.data - data['energies'] = e.reshape((1,)) - data['forces'] = f.reshape((1, ss.get_natoms(), 3)) - data['virials'] = v.reshape((1, 3, 3)) - this_sys = dpdata.LabeledSystem.from_dict({'data': data}) + data["energies"] = e.reshape((1,)) + data["forces"] = f.reshape((1, ss.get_natoms(), 3)) + data["virials"] = v.reshape((1, 3, 3)) + this_sys = dpdata.LabeledSystem.from_dict({"data": data}) labeled_sys.append(this_sys) data = labeled_sys.data else: # since v2.0.2, auto batch size is supported - coord = ori_sys.data['coords'].reshape((ori_sys.get_nframes(), ori_sys.get_natoms()*3)) + coord = ori_sys.data["coords"].reshape( + (ori_sys.get_nframes(), ori_sys.get_natoms() * 3) + ) if not ori_sys.nopbc: - cell = ori_sys.data['cells'].reshape((ori_sys.get_nframes(), 9)) + cell = ori_sys.data["cells"].reshape((ori_sys.get_nframes(), 9)) else: cell = None e, f, v = self.dp.eval(coord, cell, atype) data = ori_sys.data.copy() - data['energies'] = e.reshape((ori_sys.get_nframes(),)) - data['forces'] = f.reshape((ori_sys.get_nframes(), ori_sys.get_natoms(), 3)) - data['virials'] = v.reshape((ori_sys.get_nframes(), 3, 3)) + data["energies"] = e.reshape((ori_sys.get_nframes(),)) + data["forces"] = f.reshape((ori_sys.get_nframes(), ori_sys.get_natoms(), 3)) + data["virials"] = v.reshape((ori_sys.get_nframes(), 3, 3)) return data diff --git a/dpdata/plugins/fhi_aims.py b/dpdata/plugins/fhi_aims.py index b1805c4e..45b181fc 100644 --- a/dpdata/plugins/fhi_aims.py +++ b/dpdata/plugins/fhi_aims.py @@ -1,37 +1,49 @@ import dpdata.fhi_aims.output from dpdata.format import Format + @Format.register("fhi_aims/md") @Format.register("fhi_aims/output") class FhiMDFormat(Format): - def from_labeled_system(self, file_name, md=True, begin = 0, step = 1, convergence_check=True, **kwargs): + def from_labeled_system( + self, file_name, md=True, begin=0, step=1, convergence_check=True, **kwargs + ): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial, \ - = dpdata.fhi_aims.output.get_frames(file_name, md = md, begin = begin, step = step, convergence_check=convergence_check) - if tmp_virial is not None : - data['virials'] = tmp_virial + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.fhi_aims.output.get_frames( + file_name, + md=md, + begin=begin, + step=step, + convergence_check=convergence_check, + ) + if tmp_virial is not None: + data["virials"] = tmp_virial return data + @Format.register("fhi_aims/scf") class FhiSCFFormat(Format): def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial, \ - = dpdata.fhi_aims.output.get_frames(file_name, md = False, begin = 0, step = 1) - if tmp_virial is not None : - data['virials'] = tmp_virial + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.fhi_aims.output.get_frames(file_name, md=False, begin=0, step=1) + if tmp_virial is not None: + data["virials"] = tmp_virial return data diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py index dcae8528..37a5ee8b 100644 --- a/dpdata/plugins/gaussian.py +++ b/dpdata/plugins/gaussian.py @@ -14,11 +14,7 @@ def from_labeled_system(self, file_name, md=False, **kwargs): try: return dpdata.gaussian.log.to_system_data(file_name, md=md) except AssertionError: - return { - 'energies': [], - 'forces': [], - 'nopbc': True - } + return {"energies": [], "forces": [], "nopbc": True} @Format.register("gaussian/md") @@ -30,6 +26,7 @@ def from_labeled_system(self, file_name, **kwargs): @Format.register("gaussian/gjf") class GaussiaGJFFormat(Format): """Gaussian input file""" + def to_system(self, data: dict, file_name: str, **kwargs): """Generate Gaussian input file. @@ -43,7 +40,7 @@ def to_system(self, data: dict, file_name: str, **kwargs): Other parameters to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input` """ text = dpdata.gaussian.gjf.make_gaussian_input(data, **kwargs) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(text) @@ -69,18 +66,19 @@ class GaussianDriver(Driver): >>> labeled_system['energies'][0] -1102.714590995794 """ - def __init__(self, gaussian_exec: str="g16", **kwargs: dict) -> None: + + def __init__(self, gaussian_exec: str = "g16", **kwargs: dict) -> None: self.gaussian_exec = gaussian_exec self.kwargs = kwargs def label(self, data: dict) -> dict: """Label a system data. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -98,8 +96,6 @@ def label(self, data: dict) -> dict: except sp.CalledProcessError as e: with open(out_fn) as f: out = f.read() - raise RuntimeError( - "Run gaussian failed! Output:\n" + out - ) from e + raise RuntimeError("Run gaussian failed! Output:\n" + out) from e labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="gaussian/log")) return labeled_system.data diff --git a/dpdata/plugins/gromacs.py b/dpdata/plugins/gromacs.py index f4e3d528..6f19a27f 100644 --- a/dpdata/plugins/gromacs.py +++ b/dpdata/plugins/gromacs.py @@ -14,7 +14,9 @@ def from_system(self, file_name, format_atom_name=True, **kwargs): file_name : str The input file name """ - return dpdata.gromacs.gro.file_to_system_data(file_name, format_atom_name=format_atom_name, **kwargs) + return dpdata.gromacs.gro.file_to_system_data( + file_name, format_atom_name=format_atom_name, **kwargs + ) def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): """ @@ -27,20 +29,20 @@ def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) if frame_idx == -1: strs = [] - for idx in range(data['coords'].shape[0]): - gro_str = dpdata.gromacs.gro.from_system_data(data, f_idx=idx, - **kwargs) + for idx in range(data["coords"].shape[0]): + gro_str = dpdata.gromacs.gro.from_system_data(data, f_idx=idx, **kwargs) strs.append(gro_str) gro_str = "\n".join(strs) else: gro_str = dpdata.gromacs.gro.from_system_data( - data, f_idx=frame_idx, **kwargs) + data, f_idx=frame_idx, **kwargs + ) if file_name is None: return gro_str else: - with open(file_name, 'w+') as fp: + with open(file_name, "w+") as fp: fp.write(gro_str) diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 8f996296..d4bce01b 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -9,7 +9,7 @@ class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") def from_system(self, file_name, type_map=None, **kwargs): with open(file_name) as fp: - lines = [line.rstrip('\n') for line in fp] + lines = [line.rstrip("\n") for line in fp] return dpdata.lammps.lmp.to_system_data(lines, type_map) def to_system(self, data, file_name, frame_idx=0, **kwargs): @@ -25,9 +25,9 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) w_str = dpdata.lammps.lmp.from_system_data(data, frame_idx) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(w_str) @@ -35,12 +35,8 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): @Format.register("lammps/dump") class LAMMPSDumpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, - file_name, - type_map=None, - begin=0, - step=1, - unwrap=False, - **kwargs): + def from_system( + self, file_name, type_map=None, begin=0, step=1, unwrap=False, **kwargs + ): lines = dpdata.lammps.dump.load_file(file_name, begin=begin, step=step) return dpdata.lammps.dump.system_data(lines, type_map, unwrap=unwrap) diff --git a/dpdata/plugins/list.py b/dpdata/plugins/list.py index 0eca2e13..99ac6d4a 100644 --- a/dpdata/plugins/list.py +++ b/dpdata/plugins/list.py @@ -8,7 +8,8 @@ def to_system(self, data, **kwargs): convert system to list, usefull for data collection """ from dpdata import System, LabeledSystem - if 'forces' in data: + + if "forces" in data: system = LabeledSystem(data=data) else: system = System(data=data) diff --git a/dpdata/plugins/pwmat.py b/dpdata/plugins/pwmat.py index 3365806e..baa415d6 100644 --- a/dpdata/plugins/pwmat.py +++ b/dpdata/plugins/pwmat.py @@ -11,25 +11,30 @@ @Format.register("pwmat/output") class PwmatOutputFormat(Format): @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin=0, step=1, convergence_check=True, **kwargs): + def from_labeled_system( + self, file_name, begin=0, step=1, convergence_check=True, **kwargs + ): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial \ - = dpdata.pwmat.movement.get_frames(file_name, begin=begin, step=step, convergence_check=convergence_check) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.pwmat.movement.get_frames( + file_name, begin=begin, step=step, convergence_check=convergence_check + ) if tmp_virial is not None: - data['virials'] = tmp_virial + data["virials"] = tmp_virial # scale virial to the unit of eV - if 'virials' in data: + if "virials" in data: v_pref = 1 * 1e3 / 1.602176621e6 - for ii in range(data['coords'].shape[0]): - vol = np.linalg.det(np.reshape(data['cells'][ii], [3, 3])) - data['virials'][ii] *= v_pref * vol + for ii in range(data["coords"].shape[0]): + vol = np.linalg.det(np.reshape(data["cells"][ii], [3, 3])) + data["virials"][ii] *= v_pref * vol return data @@ -41,7 +46,7 @@ class PwmatAtomconfigFormat(Format): @Format.post("rot_lower_triangular") def from_system(self, file_name, **kwargs): with open(file_name) as fp: - lines = [line.rstrip('\n') for line in fp] + lines = [line.rstrip("\n") for line in fp] return dpdata.pwmat.atomconfig.to_system_data(lines) def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): @@ -55,7 +60,7 @@ def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) w_str = dpdata.pwmat.atomconfig.from_system_data(data, frame_idx) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/pymatgen.py b/dpdata/plugins/pymatgen.py index f29ac382..514b8d76 100644 --- a/dpdata/plugins/pymatgen.py +++ b/dpdata/plugins/pymatgen.py @@ -6,20 +6,23 @@ @Format.register("pymatgen/structure") class PyMatgenStructureFormat(Format): def to_system(self, data, **kwargs): - """convert System to Pymatgen Structure obj - """ + """convert System to Pymatgen Structure obj""" structures = [] try: from pymatgen.core import Structure except ModuleNotFoundError as e: - raise ImportError('No module pymatgen.Structure') from e + raise ImportError("No module pymatgen.Structure") from e species = [] - for name, numb in zip(data['atom_names'], data['atom_numbs']): - species.extend([name]*numb) - for ii in range(data['coords'].shape[0]): + for name, numb in zip(data["atom_names"], data["atom_numbs"]): + species.extend([name] * numb) + for ii in range(data["coords"].shape[0]): structure = Structure( - data['cells'][ii], species, data['coords'][ii], coords_are_cartesian=True) + data["cells"][ii], + species, + data["coords"][ii], + coords_are_cartesian=True, + ) structures.append(structure) return structures @@ -31,26 +34,24 @@ def from_system(self, file_name, **kwargs): try: from pymatgen.core import Molecule except ModuleNotFoundError as e: - raise ImportError('No module pymatgen.Molecule') from e + raise ImportError("No module pymatgen.Molecule") from e return dpdata.pymatgen.molecule.to_system_data(file_name) def to_system(self, data, **kwargs): - """convert System to Pymatgen Molecule obj - """ + """convert System to Pymatgen Molecule obj""" molecules = [] try: from pymatgen.core import Molecule except ModuleNotFoundError as e: - raise ImportError('No module pymatgen.Molecule') from e + raise ImportError("No module pymatgen.Molecule") from e species = [] - for name, numb in zip(data['atom_names'], data['atom_numbs']): - species.extend([name]*numb) + for name, numb in zip(data["atom_names"], data["atom_numbs"]): + species.extend([name] * numb) data = dpdata.system.remove_pbc(data) - for ii in range(np.array(data['coords']).shape[0]): - molecule = Molecule( - species, data['coords'][ii]) + for ii in range(np.array(data["coords"]).shape[0]): + molecule = Molecule(species, data["coords"][ii]) molecules.append(molecule) return molecules @@ -59,23 +60,20 @@ def to_system(self, data, **kwargs): @Format.register_to("to_pymatgen_ComputedStructureEntry") class PyMatgenCSEFormat(Format): def to_labeled_system(self, data, *args, **kwargs): - """convert System to Pymagen ComputedStructureEntry obj - """ + """convert System to Pymagen ComputedStructureEntry obj""" try: from pymatgen.entries.computed_entries import ComputedStructureEntry except ModuleNotFoundError as e: raise ImportError( - 'No module ComputedStructureEntry in pymatgen.entries.computed_entries') from e + "No module ComputedStructureEntry in pymatgen.entries.computed_entries" + ) from e entries = [] for ii, structure in enumerate(PyMatgenStructureFormat().to_system(data)): - energy = data['energies'][ii] - csedata = {'forces': data['forces'][ii], - 'virials': data['virials'][ii]} + energy = data["energies"][ii] + csedata = {"forces": data["forces"][ii], "virials": data["virials"][ii]} entry = ComputedStructureEntry(structure, energy, data=csedata) entries.append(entry) return entries - - diff --git a/dpdata/plugins/qe.py b/dpdata/plugins/qe.py index e6a1665a..1b95a6d4 100644 --- a/dpdata/plugins/qe.py +++ b/dpdata/plugins/qe.py @@ -3,41 +3,49 @@ import dpdata.md.pbc from dpdata.format import Format + @Format.register("qe/cp/traj") class QECPTrajFormat(Format): @Format.post("rot_lower_triangular") - def from_system(self, file_name, begin = 0, step = 1, **kwargs): - data, _ = dpdata.qe.traj.to_system_data(file_name + '.in', file_name, begin = begin, step = step) - data['coords'] \ - = dpdata.md.pbc.apply_pbc(data['coords'], - data['cells'], - ) + def from_system(self, file_name, begin=0, step=1, **kwargs): + data, _ = dpdata.qe.traj.to_system_data( + file_name + ".in", file_name, begin=begin, step=step + ) + data["coords"] = dpdata.md.pbc.apply_pbc( + data["coords"], + data["cells"], + ) return data @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin = 0, step = 1, **kwargs): - data, cs = dpdata.qe.traj.to_system_data(file_name + '.in', file_name, begin = begin, step = step) - data['coords'] \ - = dpdata.md.pbc.apply_pbc(data['coords'], - data['cells'], - ) - data['energies'], data['forces'], es \ - = dpdata.qe.traj.to_system_label(file_name + '.in', file_name, begin = begin, step = step) - assert(cs == es), "the step key between files are not consistent" + def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): + data, cs = dpdata.qe.traj.to_system_data( + file_name + ".in", file_name, begin=begin, step=step + ) + data["coords"] = dpdata.md.pbc.apply_pbc( + data["coords"], + data["cells"], + ) + data["energies"], data["forces"], es = dpdata.qe.traj.to_system_label( + file_name + ".in", file_name, begin=begin, step=step + ) + assert cs == es, "the step key between files are not consistent" return data + @Format.register("qe/pw/scf") class QECPPWSCFFormat(Format): @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'], \ - = dpdata.qe.scf.get_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.qe.scf.get_frame(file_name) return data diff --git a/dpdata/plugins/rdkit.py b/dpdata/plugins/rdkit.py index 9d40257a..043fad1e 100644 --- a/dpdata/plugins/rdkit.py +++ b/dpdata/plugins/rdkit.py @@ -1,4 +1,5 @@ from dpdata.format import Format + try: import rdkit.Chem import dpdata.rdkit.utils @@ -11,10 +12,9 @@ class MolFormat(Format): def from_bond_order_system(self, file_name, **kwargs): return rdkit.Chem.MolFromMolFile(file_name, sanitize=False, removeHs=False) - def to_bond_order_system(self, data, mol, file_name, frame_idx=0, **kwargs): - assert (frame_idx < mol.GetNumConformers()) + assert frame_idx < mol.GetNumConformers() rdkit.Chem.MolToMolFile(mol, file_name, confId=frame_idx) @@ -22,22 +22,25 @@ def to_bond_order_system(self, data, mol, file_name, frame_idx=0, **kwargs): @Format.register("sdf_file") class SdfFormat(Format): def from_bond_order_system(self, file_name, **kwargs): - ''' + """ Note that it requires all molecules in .sdf file must be of the same topology - ''' - mols = [m for m in rdkit.Chem.SDMolSupplier(file_name, sanitize=False, removeHs=False)] + """ + mols = [ + m + for m in rdkit.Chem.SDMolSupplier(file_name, sanitize=False, removeHs=False) + ] if len(mols) > 1: mol = dpdata.rdkit.utils.combine_molecules(mols) else: mol = mols[0] return mol - + def to_bond_order_system(self, data, mol, file_name, frame_idx=-1, **kwargs): sdf_writer = rdkit.Chem.SDWriter(file_name) if frame_idx == -1: for ii in range(mol.GetNumConformers()): sdf_writer.write(mol, confId=ii) else: - assert (frame_idx < mol.GetNumConformers()) + assert frame_idx < mol.GetNumConformers() sdf_writer.write(mol, confId=frame_idx) - sdf_writer.close() \ No newline at end of file + sdf_writer.close() diff --git a/dpdata/plugins/siesta.py b/dpdata/plugins/siesta.py index 6838dac3..5b38e8b2 100644 --- a/dpdata/plugins/siesta.py +++ b/dpdata/plugins/siesta.py @@ -7,28 +7,30 @@ class SiestaOutputFormat(Format): def from_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - _e, \ - _f, \ - _v \ - = dpdata.siesta.output.obtain_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + _e, + _f, + _v, + ) = dpdata.siesta.output.obtain_frame(file_name) return data def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'] \ - = dpdata.siesta.output.obtain_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.siesta.output.obtain_frame(file_name) return data @@ -37,26 +39,28 @@ def from_labeled_system(self, file_name, **kwargs): class SiestaAIMDOutputFormat(Format): def from_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - _e, \ - _f, \ - _v \ - = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + _e, + _f, + _v, + ) = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) return data def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'] \ - = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) return data diff --git a/dpdata/plugins/vasp.py b/dpdata/plugins/vasp.py index 07ec34f1..5b151f80 100644 --- a/dpdata/plugins/vasp.py +++ b/dpdata/plugins/vasp.py @@ -5,6 +5,7 @@ from dpdata.format import Format from dpdata.utils import sort_atom_names, uniq_atom_names + @Format.register("poscar") @Format.register("contcar") @Format.register("vasp/poscar") @@ -13,7 +14,7 @@ class VASPPoscarFormat(Format): @Format.post("rot_lower_triangular") def from_system(self, file_name, **kwargs): with open(file_name) as fp: - lines = [line.rstrip('\n') for line in fp] + lines = [line.rstrip("\n") for line in fp] data = dpdata.vasp.poscar.to_system_data(lines) data = uniq_atom_names(data) return data @@ -30,7 +31,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): The index of the frame to dump """ w_str = VASPStringFormat().to_system(data, frame_idx=frame_idx) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(w_str) @@ -45,7 +46,7 @@ def to_system(self, data, frame_idx=0, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) return dpdata.vasp.poscar.from_system_data(data, frame_idx) @@ -54,26 +55,35 @@ def to_system(self, data, frame_idx=0, **kwargs): @Format.register("vasp/outcar") class VASPOutcarFormat(Format): @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin=0, step=1, convergence_check=True, **kwargs): + def from_labeled_system( + self, file_name, begin=0, step=1, convergence_check=True, **kwargs + ): data = {} ml = kwargs.get("ml", False) - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial, \ - = dpdata.vasp.outcar.get_frames(file_name, begin=begin, step=step, ml=ml, convergence_check=convergence_check) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.vasp.outcar.get_frames( + file_name, + begin=begin, + step=step, + ml=ml, + convergence_check=convergence_check, + ) if tmp_virial is not None: - data['virials'] = tmp_virial + data["virials"] = tmp_virial # scale virial to the unit of eV - if 'virials' in data: + if "virials" in data: v_pref = 1 * 1e3 / 1.602176621e6 - for ii in range(data['cells'].shape[0]): - vol = np.linalg.det(np.reshape(data['cells'][ii], [3, 3])) - data['virials'][ii] *= v_pref * vol + for ii in range(data["cells"].shape[0]): + vol = np.linalg.det(np.reshape(data["cells"][ii], [3, 3])) + data["virials"][ii] *= v_pref * vol data = uniq_atom_names(data) return data @@ -85,27 +95,28 @@ class VASPXMLFormat(Format): @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): data = {} - data['atom_names'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'], \ - = dpdata.vasp.xml.analyze(file_name, type_idx_zero=True, begin=begin, step=step) - data['atom_numbs'] = [] - for ii in range(len(data['atom_names'])): - data['atom_numbs'].append(sum(data['atom_types'] == ii)) + ( + data["atom_names"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.vasp.xml.analyze( + file_name, type_idx_zero=True, begin=begin, step=step + ) + data["atom_numbs"] = [] + for ii in range(len(data["atom_names"])): + data["atom_numbs"].append(sum(data["atom_types"] == ii)) # the vasp xml assumes the direct coordinates # apply the transform to the cartesan coordinates - for ii in range(data['cells'].shape[0]): - data['coords'][ii] = np.matmul( - data['coords'][ii], data['cells'][ii]) + for ii in range(data["cells"].shape[0]): + data["coords"][ii] = np.matmul(data["coords"][ii], data["cells"][ii]) # scale virial to the unit of eV v_pref = 1 * 1e3 / 1.602176621e6 - for ii in range(data['cells'].shape[0]): - vol = np.linalg.det(np.reshape(data['cells'][ii], [3, 3])) - data['virials'][ii] *= v_pref * vol + for ii in range(data["cells"].shape[0]): + vol = np.linalg.det(np.reshape(data["cells"][ii], [3, 3])) + data["virials"][ii] *= v_pref * vol data = uniq_atom_names(data) return data - diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 7ec9d266..4db722e3 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -4,6 +4,7 @@ from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord from dpdata.format import Format + @Format.register("xyz") class XYZFormat(Format): """XYZ foramt. @@ -12,26 +13,29 @@ class XYZFormat(Format): -------- >>> s.to("xyz", "a.xyz") """ + def to_system(self, data, file_name, **kwargs): buff = [] - types = np.array(data['atom_names'])[data['atom_types']] - for cc in data['coords']: + types = np.array(data["atom_names"])[data["atom_types"]] + for cc in data["coords"]: buff.append(coord_to_xyz(cc, types)) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write("\n".join(buff)) def from_system(self, file_name, **kwargs): - with open(file_name, 'r') as fp: + with open(file_name, "r") as fp: coords, types = xyz_to_coord(fp.read()) - atom_names, atom_types, atom_numbs = np.unique(types, return_inverse=True, return_counts=True) + atom_names, atom_types, atom_numbs = np.unique( + types, return_inverse=True, return_counts=True + ) return { - 'atom_names': list(atom_names), - 'atom_numbs': list(atom_numbs), - 'atom_types': atom_types, - 'coords': coords.reshape((1, *coords.shape)), - 'cells': np.eye(3).reshape((1, 3, 3)) * 100, - 'nopbc': True, - 'orig': np.zeros(3), + "atom_names": list(atom_names), + "atom_numbs": list(atom_numbs), + "atom_types": atom_types, + "coords": coords.reshape((1, *coords.shape)), + "cells": np.eye(3).reshape((1, 3, 3)) * 100, + "nopbc": True, + "orig": np.zeros(3), } diff --git a/dpdata/pwmat/__init__.py b/dpdata/pwmat/__init__.py index 8b137891..e69de29b 100644 --- a/dpdata/pwmat/__init__.py +++ b/dpdata/pwmat/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/pwmat/atomconfig.py b/dpdata/pwmat/atomconfig.py index bc06f347..5e953c47 100644 --- a/dpdata/pwmat/atomconfig.py +++ b/dpdata/pwmat/atomconfig.py @@ -1,69 +1,70 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 from ..periodic_table import ELEMENTS import numpy as np -def _to_system_data_lower(lines) : + +def _to_system_data_lower(lines): system = {} natoms = int(lines[0].split()[0]) cell = [] for idx, ii in enumerate(lines): - if 'lattice' in ii or 'Lattice' in ii or 'LATTICE' in ii: - for kk in range(idx+1,idx+1+3): - vector=[float(jj) for jj in lines[kk].split()[0:3]] + if "lattice" in ii or "Lattice" in ii or "LATTICE" in ii: + for kk in range(idx + 1, idx + 1 + 3): + vector = [float(jj) for jj in lines[kk].split()[0:3]] cell.append(vector) - system['cells'] = np.array([cell]) + system["cells"] = np.array([cell]) coord = [] atomic_number = [] atom_numbs = [] for idx, ii in enumerate(lines): - if 'Position' in ii or 'POSITION' in ii or 'position' in ii: - for kk in range(idx+1,idx+1+natoms): + if "Position" in ii or "POSITION" in ii or "position" in ii: + for kk in range(idx + 1, idx + 1 + natoms): min = kk - for jj in range(kk+1,idx+1+natoms): + for jj in range(kk + 1, idx + 1 + natoms): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+natoms): + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + natoms): tmpv = [float(jj) for jj in lines[gg].split()[1:4]] - tmpv = np.matmul(np.array(tmpv), system['cells'][0]) + tmpv = np.matmul(np.array(tmpv), system["cells"][0]) coord.append(tmpv) tmpn = int(lines[gg].split()[0]) atomic_number.append(tmpn) - for ii in np.unique(sorted(atomic_number)) : + for ii in np.unique(sorted(atomic_number)): atom_numbs.append(atomic_number.count(ii)) - system['atom_numbs'] = [int(ii) for ii in atom_numbs] - system['coords'] = np.array([coord]) - system['orig'] = np.zeros(3) + system["atom_numbs"] = [int(ii) for ii in atom_numbs] + system["coords"] = np.array([coord]) + system["orig"] = np.zeros(3) atom_types = [] - for idx,ii in enumerate(system['atom_numbs']) : - for jj in range(ii) : + for idx, ii in enumerate(system["atom_numbs"]): + for jj in range(ii): atom_types.append(idx) - system['atom_types'] = np.array(atom_types, dtype = int) - system['atom_names'] = [ELEMENTS[ii-1] for ii in np.unique(sorted(atomic_number))] + system["atom_types"] = np.array(atom_types, dtype=int) + system["atom_names"] = [ELEMENTS[ii - 1] for ii in np.unique(sorted(atomic_number))] return system -def to_system_data(lines) : +def to_system_data(lines): return _to_system_data_lower(lines) -def from_system_data(system, f_idx = 0, skip_zeros = True) : - ret = '' - natoms = sum(system['atom_numbs']) - ret += '%d' % natoms - ret += '\n' - ret += 'LATTICE' - ret += '\n' - for ii in system['cells'][f_idx] : - for jj in ii : - ret += '%.16e ' % jj - ret += '\n' - ret += 'POSITION' - ret += '\n' - atom_numbs = system['atom_numbs'] - atom_names = system['atom_names'] - atype = system['atom_types'] - posis = system['coords'][f_idx] +def from_system_data(system, f_idx=0, skip_zeros=True): + ret = "" + natoms = sum(system["atom_numbs"]) + ret += "%d" % natoms + ret += "\n" + ret += "LATTICE" + ret += "\n" + for ii in system["cells"][f_idx]: + for jj in ii: + ret += "%.16e " % jj + ret += "\n" + ret += "POSITION" + ret += "\n" + atom_numbs = system["atom_numbs"] + atom_names = system["atom_names"] + atype = system["atom_types"] + posis = system["coords"][f_idx] # atype_idx = [[idx,tt] for idx,tt in enumerate(atype)] # sort_idx = np.argsort(atype, kind = 'mergesort') sort_idx = np.lexsort((np.arange(len(atype)), atype)) @@ -72,22 +73,20 @@ def from_system_data(system, f_idx = 0, skip_zeros = True) : symbal = [] for ii, jj in zip(atom_numbs, atom_names): for kk in range(ii): - symbal.append(jj) + symbal.append(jj) atomic_numbers = [] for ii in symbal: - atomic_numbers.append(ELEMENTS.index(ii)+1) + atomic_numbers.append(ELEMENTS.index(ii) + 1) posi_list = [] - for jj, ii in zip(atomic_numbers,posis) : - ii = np.matmul(ii, np.linalg.inv(system['cells'][0])) - posi_list.append('%d %15.10f %15.10f %15.10f 1 1 1' % \ - (jj, ii[0], ii[1], ii[2]) - ) + for jj, ii in zip(atomic_numbers, posis): + ii = np.matmul(ii, np.linalg.inv(system["cells"][0])) + posi_list.append("%d %15.10f %15.10f %15.10f 1 1 1" % (jj, ii[0], ii[1], ii[2])) for kk in range(len(posi_list)): min = kk - for jj in range(kk,len(posi_list)): + for jj in range(kk, len(posi_list)): if int(posi_list[jj].split()[0]) < int(posi_list[min].split()[0]): min = jj - posi_list[min], posi_list[kk] = posi_list[kk],posi_list[min] - posi_list.append('') - ret += '\n'.join(posi_list) + posi_list[min], posi_list[kk] = posi_list[kk], posi_list[min] + posi_list.append("") + ret += "\n".join(posi_list) return ret diff --git a/dpdata/pwmat/movement.py b/dpdata/pwmat/movement.py index c39950f0..c2e0bf3a 100644 --- a/dpdata/pwmat/movement.py +++ b/dpdata/pwmat/movement.py @@ -2,59 +2,61 @@ from ..periodic_table import ELEMENTS import warnings -def system_info (lines, type_idx_zero = False) : + +def system_info(lines, type_idx_zero=False): atom_names = [] atom_numbs = [] nelm = 0 natoms = int(lines[0].split()[0]) - iteration = float(lines[0].split('Etot')[0].split('=')[1].split(',')[0]) -# print(iteration) - if iteration > 0 : + iteration = float(lines[0].split("Etot")[0].split("=")[1].split(",")[0]) + # print(iteration) + if iteration > 0: nelm = 40 else: nelm = 100 atomic_number = [] - for idx,ii in enumerate(lines): - if 'Position' in ii: - for kk in range(idx+1,idx+1+natoms) : + for idx, ii in enumerate(lines): + if "Position" in ii: + for kk in range(idx + 1, idx + 1 + natoms): min = kk - for jj in range(kk+1,idx+1+natoms): + for jj in range(kk + 1, idx + 1 + natoms): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+natoms): + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + natoms): tmpn = int(lines[gg].split()[0]) atomic_number.append(tmpn) - for ii in np.unique(sorted(atomic_number)) : + for ii in np.unique(sorted(atomic_number)): atom_numbs.append(atomic_number.count(ii)) atom_types = [] - for idx,ii in enumerate(atom_numbs) : - for jj in range(ii) : - if type_idx_zero : + for idx, ii in enumerate(atom_numbs): + for jj in range(ii): + if type_idx_zero: atom_types.append(idx) - else : - atom_types.append(idx+1) + else: + atom_types.append(idx + 1) for ii in np.unique(sorted(atomic_number)): - atom_names.append(ELEMENTS[ii-1]) - return atom_names, atom_numbs, np.array(atom_types, dtype = int), nelm + atom_names.append(ELEMENTS[ii - 1]) + return atom_names, atom_numbs, np.array(atom_types, dtype=int), nelm -def get_movement_block(fp) : +def get_movement_block(fp): blk = [] - for ii in fp : + for ii in fp: if not ii: return blk - blk.append(ii.rstrip('\n')) - if '------------' in ii: + blk.append(ii.rstrip("\n")) + if "------------" in ii: return blk return blk + # we assume that the force is printed ... -def get_frames (fname, begin = 0, step = 1, convergence_check=True) : +def get_frames(fname, begin=0, step=1, convergence_check=True): fp = open(fname) blk = get_movement_block(fp) - atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero = True) + atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero=True) ntot = sum(atom_numbs) all_coords = [] @@ -62,13 +64,15 @@ def get_frames (fname, begin = 0, step = 1, convergence_check=True) : all_energies = [] all_atomic_energy = [] all_forces = [] - all_virials = [] + all_virials = [] cc = 0 rec_failed = [] - while len(blk) > 0 : - if cc >= begin and (cc - begin) % step == 0 : - coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm) + while len(blk) > 0: + if cc >= begin and (cc - begin) % step == 0: + coord, cell, energy, force, virial, is_converge = analyze_block( + blk, ntot, nelm + ) if len(coord) == 0: break if is_converge or not convergence_check: @@ -76,44 +80,60 @@ def get_frames (fname, begin = 0, step = 1, convergence_check=True) : all_cells.append(cell) all_energies.append(energy) all_forces.append(force) - if virial is not None : + if virial is not None: all_virials.append(virial) if not is_converge: - rec_failed.append(cc+1) - + rec_failed.append(cc + 1) + blk = get_movement_block(fp) cc += 1 - - if len(rec_failed) > 0 : - prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." - warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) - - if len(all_virials) == 0 : + + if len(rec_failed) > 0: + prt = ( + "so they are not collected." + if convergence_check + else "but they are still collected due to the requirement for ignoring convergence checks." + ) + warnings.warn( + f"The following structures were unconverged: {rec_failed}; " + prt + ) + + if len(all_virials) == 0: all_virials = None - else : + else: all_virials = np.array(all_virials) fp.close() - return atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), \ - np.array(all_energies), np.array(all_forces), all_virials + return ( + atom_names, + atom_numbs, + atom_types, + np.array(all_cells), + np.array(all_coords), + np.array(all_energies), + np.array(all_forces), + all_virials, + ) -def analyze_block(lines, ntot, nelm) : +def analyze_block(lines, ntot, nelm): coord = [] cell = [] energy = None -# atomic_energy = [] + # atomic_energy = [] force = [] virial = None is_converge = True sc_index = 0 - for idx,ii in enumerate(lines) : - if 'Iteration' in ii: - sc_index = int(ii.split('SCF =')[1]) + for idx, ii in enumerate(lines): + if "Iteration" in ii: + sc_index = int(ii.split("SCF =")[1]) if sc_index >= nelm: is_converge = False - energy = float(ii.split('Etot,Ep,Ek (eV)')[1].split()[2]) # use Ep, not Etot=Ep+Ek - elif '----------' in ii: - assert((force is not None) and len(coord) > 0 and len(cell) > 0) + energy = float( + ii.split("Etot,Ep,Ek (eV)")[1].split()[2] + ) # use Ep, not Etot=Ep+Ek + elif "----------" in ii: + assert (force is not None) and len(coord) > 0 and len(cell) > 0 # all_coords.append(coord) # all_cells.append(cell) # all_energies.append(energy) @@ -121,17 +141,16 @@ def analyze_block(lines, ntot, nelm) : # if virial is not None : # all_virials.append(virial) return coord, cell, energy, force, virial, is_converge -# elif 'NPT' in ii: -# tmp_v = [] - elif 'Lattice vector' in ii: - if 'stress' in lines[idx+1]: + # elif 'NPT' in ii: + # tmp_v = [] + elif "Lattice vector" in ii: + if "stress" in lines[idx + 1]: tmp_v = [] - for dd in range(3) : - tmp_l = lines[idx+1+dd] - cell.append([float(ss) - for ss in tmp_l.split()[0:3]]) + for dd in range(3): + tmp_l = lines[idx + 1 + dd] + cell.append([float(ss) for ss in tmp_l.split()[0:3]]) tmp_v.append([float(stress) for stress in tmp_l.split()[5:8]]) - virial = np.zeros([3,3]) + virial = np.zeros([3, 3]) virial[0][0] = tmp_v[0][0] virial[0][1] = tmp_v[0][1] virial[0][2] = tmp_v[0][2] @@ -142,43 +161,44 @@ def analyze_block(lines, ntot, nelm) : virial[2][1] = tmp_v[2][1] virial[2][2] = tmp_v[2][2] volume = np.linalg.det(np.array(cell)) - virial = virial*160.2*10.0/volume + virial = virial * 160.2 * 10.0 / volume else: - for dd in range(3) : - tmp_l = lines[idx+1+dd] - cell.append([float(ss) - for ss in tmp_l.split()[0:3]]) + for dd in range(3): + tmp_l = lines[idx + 1 + dd] + cell.append([float(ss) for ss in tmp_l.split()[0:3]]) -# else : -# for dd in range(3) : -# tmp_l = lines[idx+1+dd] -# cell.append([float(ss) -# for ss in tmp_l.split()[0:3]]) -# virial = np.zeros([3,3]) - elif 'Position' in ii: - for kk in range(idx+1, idx+1+ntot): + # else : + # for dd in range(3) : + # tmp_l = lines[idx+1+dd] + # cell.append([float(ss) + # for ss in tmp_l.split()[0:3]]) + # virial = np.zeros([3,3]) + elif "Position" in ii: + for kk in range(idx + 1, idx + 1 + ntot): min = kk - for jj in range(kk+1,idx+1+ntot): + for jj in range(kk + 1, idx + 1 + ntot): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+ntot): + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + ntot): info = [float(jj) for jj in lines[gg].split()[1:4]] - info = np.matmul(np.array(info),np.array(cell)) + info = np.matmul(np.array(info), np.array(cell)) coord.append(info) - elif 'Force' in ii: - for kk in range(idx+1, idx+1+ntot): + elif "Force" in ii: + for kk in range(idx + 1, idx + 1 + ntot): min = kk - for jj in range(kk+1,idx+1+ntot): + for jj in range(kk + 1, idx + 1 + ntot): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+ntot): - info = [-float(ss) for ss in lines[gg].split()] # forces in MOVEMENT file are dE/dR, lacking a minus sign + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + ntot): + info = [ + -float(ss) for ss in lines[gg].split() + ] # forces in MOVEMENT file are dE/dR, lacking a minus sign force.append(info[1:4]) -# elif 'Atomic-Energy' in ii: -# for jj in range(idx+1, idx+1+ntot) : -# tmp_l = lines[jj] -# info = [float(ss) for ss in tmp_l.split()] -# atomic_energy.append(info[1]) + # elif 'Atomic-Energy' in ii: + # for jj in range(idx+1, idx+1+ntot) : + # tmp_l = lines[jj] + # info = [float(ss) for ss in tmp_l.split()] + # atomic_energy.append(info[1]) return coord, cell, energy, force, virial, is_converge diff --git a/dpdata/pymatgen/molecule.py b/dpdata/pymatgen/molecule.py index a362bb53..c2559bef 100644 --- a/dpdata/pymatgen/molecule.py +++ b/dpdata/pymatgen/molecule.py @@ -1,4 +1,5 @@ import numpy as np + try: from pymatgen.core import Molecule except ImportError: @@ -6,7 +7,8 @@ from collections import Counter import dpdata -def to_system_data(file_name, protect_layer = 9) : + +def to_system_data(file_name, protect_layer=9): mol = Molecule.from_file(file_name) elem_mol = list(str(site.species.elements[0]) for site in mol.sites) elem_counter = Counter(elem_mol) @@ -14,16 +16,16 @@ def to_system_data(file_name, protect_layer = 9) : atom_numbs = list(elem_counter.values()) atom_types = [list(atom_names).index(e) for e in elem_mol] natoms = np.sum(atom_numbs) - + tmpcoord = np.copy(mol.cart_coords) system = {} - system['atom_names'] = atom_names - system['atom_numbs'] = atom_numbs - system['atom_types'] = np.array(atom_types, dtype = int) + system["atom_names"] = atom_names + system["atom_numbs"] = atom_numbs + system["atom_types"] = np.array(atom_types, dtype=int) # center = [c - h_cell_size for c in mol.center_of_mass] - system['orig'] = np.array([0, 0, 0]) + system["orig"] = np.array([0, 0, 0]) - system['coords'] = np.array([tmpcoord]) - system['cells'] = np.array([10.0 * np.eye(3)]) + system["coords"] = np.array([tmpcoord]) + system["cells"] = np.array([10.0 * np.eye(3)]) return system diff --git a/dpdata/qe/__init__.py b/dpdata/qe/__init__.py index d3f5a12f..e69de29b 100644 --- a/dpdata/qe/__init__.py +++ b/dpdata/qe/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py index 50312aee..afdb6ae1 100755 --- a/dpdata/qe/scf.py +++ b/dpdata/qe/scf.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 -import os,sys +import os, sys import numpy as np ry2ev = 13.605693009 bohr2ang = 0.52917721067 kbar2evperang3 = 1e3 / 1.602176621e6 -def get_block (lines, keyword, skip = 0) : + +def get_block(lines, keyword, skip=0): ret = [] - for idx,ii in enumerate(lines) : - if keyword in ii : + for idx, ii in enumerate(lines): + if keyword in ii: blk_idx = idx + 1 + skip while len(lines[blk_idx]) == 0: blk_idx += 1 @@ -20,18 +21,21 @@ def get_block (lines, keyword, skip = 0) : break return ret -def get_cell (lines) : + +def get_cell(lines): ret = [] - for idx,ii in enumerate(lines): - if 'ibrav' in ii : + for idx, ii in enumerate(lines): + if "ibrav" in ii: break - blk = lines[idx:idx+2] - ibrav = int(blk[0].replace(',','').split('=')[-1]) + blk = lines[idx : idx + 2] + ibrav = int(blk[0].replace(",", "").split("=")[-1]) if ibrav == 0: for iline in lines: - if 'CELL_PARAMETERS' in iline and 'angstrom' not in iline.lower(): - raise RuntimeError("CELL_PARAMETERS must be written in Angstrom. Other units are not supported yet.") - blk = get_block(lines, 'CELL_PARAMETERS') + if "CELL_PARAMETERS" in iline and "angstrom" not in iline.lower(): + raise RuntimeError( + "CELL_PARAMETERS must be written in Angstrom. Other units are not supported yet." + ) + blk = get_block(lines, "CELL_PARAMETERS") for ii in blk: ret.append([float(jj) for jj in ii.split()[0:3]]) ret = np.array(ret) @@ -40,32 +44,37 @@ def get_cell (lines) : for iline in lines: line = iline.replace("=", " ").replace(",", "").split() if len(line) >= 2 and "a" == line[0]: - #print("line = ", line) + # print("line = ", line) a = float(line[1]) if len(line) >= 2 and "celldm(1)" == line[0]: - a = float(line[1])*bohr2ang - #print("a = ", a) + a = float(line[1]) * bohr2ang + # print("a = ", a) if not a: raise RuntimeError("parameter 'a' or 'celldm(1)' cannot be found.") - ret = np.array([[a,0.,0.],[0.,a,0.],[0.,0.,a]]) + ret = np.array([[a, 0.0, 0.0], [0.0, a, 0.0], [0.0, 0.0, a]]) else: - sys.exit('ibrav > 1 not supported yet.') + sys.exit("ibrav > 1 not supported yet.") return ret -def get_coords (lines, cell) : + +def get_coords(lines, cell): coord = [] atom_symbol_list = [] for iline in lines: - if 'ATOMIC_POSITIONS' in iline and ('angstrom' not in iline.lower() and 'crystal' not in iline.lower()): - raise RuntimeError("ATOMIC_POSITIONS must be written in Angstrom or crystal. Other units are not supported yet.") - if 'ATOMIC_POSITIONS' in iline and 'angstrom' in iline.lower(): - blk = get_block(lines, 'ATOMIC_POSITIONS') + if "ATOMIC_POSITIONS" in iline and ( + "angstrom" not in iline.lower() and "crystal" not in iline.lower() + ): + raise RuntimeError( + "ATOMIC_POSITIONS must be written in Angstrom or crystal. Other units are not supported yet." + ) + if "ATOMIC_POSITIONS" in iline and "angstrom" in iline.lower(): + blk = get_block(lines, "ATOMIC_POSITIONS") for ii in blk: coord.append([float(jj) for jj in ii.split()[1:4]]) atom_symbol_list.append(ii.split()[0]) coord = np.array(coord) - elif 'ATOMIC_POSITIONS' in iline and 'crystal' in iline.lower(): - blk = get_block(lines, 'ATOMIC_POSITIONS') + elif "ATOMIC_POSITIONS" in iline and "crystal" in iline.lower(): + blk = get_block(lines, "ATOMIC_POSITIONS") for ii in blk: coord.append([float(jj) for jj in ii.split()[1:4]]) atom_symbol_list.append(ii.split()[0]) @@ -75,11 +84,11 @@ def get_coords (lines, cell) : tmp_names, symbol_idx = np.unique(atom_symbol_list, return_index=True) atom_types = [] atom_numbs = [] - #preserve the atom_name order + # preserve the atom_name order atom_names = atom_symbol_list[np.sort(symbol_idx)] for jj in atom_symbol_list: for idx, ii in enumerate(atom_names): - if (jj == ii) : + if jj == ii: atom_types.append(idx) for idx in range(len(atom_names)): atom_numbs.append(atom_types.count(idx)) @@ -87,51 +96,63 @@ def get_coords (lines, cell) : return list(atom_names), atom_numbs, atom_types, coord -def get_energy (lines) : + +def get_energy(lines): energy = None - for ii in lines : - if '! total energy' in ii : - energy = ry2ev * float(ii.split('=')[1].split()[0]) + for ii in lines: + if "! total energy" in ii: + energy = ry2ev * float(ii.split("=")[1].split()[0]) return energy -def get_force (lines) : - blk = get_block(lines, 'Forces acting on atoms', skip = 1) + +def get_force(lines): + blk = get_block(lines, "Forces acting on atoms", skip=1) ret = [] for ii in blk: - ret.append([float(jj) for jj in ii.split('=')[1].split()]) + ret.append([float(jj) for jj in ii.split("=")[1].split()]) ret = np.array(ret) - ret *= (ry2ev / bohr2ang) + ret *= ry2ev / bohr2ang return ret -def get_stress (lines) : - blk = get_block(lines, 'total stress') + +def get_stress(lines): + blk = get_block(lines, "total stress") ret = [] for ii in blk: ret.append([float(jj) for jj in ii.split()[3:6]]) ret = np.array(ret) ret *= kbar2evperang3 return ret - -def get_frame (fname): + + +def get_frame(fname): if type(fname) == str: path_out = fname outname = os.path.basename(path_out) - # the name of the input file is assumed to be different from the output by 'in' and 'out' - inname = outname.replace('out', 'in') + # the name of the input file is assumed to be different from the output by 'in' and 'out' + inname = outname.replace("out", "in") path_in = os.path.join(os.path.dirname(path_out), inname) elif type(fname) == list and len(fname) == 2: path_in = fname[0] path_out = fname[1] else: - raise RuntimeError('invalid input') - with open(path_out, 'r') as fp: - outlines = fp.read().split('\n') - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - cell = get_cell (inlines) - atom_names, natoms, types, coords = get_coords(inlines, cell) - energy = get_energy(outlines) - force = get_force (outlines) - stress = get_stress(outlines) * np.linalg.det(cell) - return atom_names, natoms, types, cell[np.newaxis, :, :], coords[np.newaxis, :, :], \ - np.array(energy)[np.newaxis], force[np.newaxis, :, :], stress[np.newaxis, :, :] + raise RuntimeError("invalid input") + with open(path_out, "r") as fp: + outlines = fp.read().split("\n") + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + cell = get_cell(inlines) + atom_names, natoms, types, coords = get_coords(inlines, cell) + energy = get_energy(outlines) + force = get_force(outlines) + stress = get_stress(outlines) * np.linalg.det(cell) + return ( + atom_names, + natoms, + types, + cell[np.newaxis, :, :], + coords[np.newaxis, :, :], + np.array(energy)[np.newaxis], + force[np.newaxis, :, :], + stress[np.newaxis, :, :], + ) diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py index 62b10d44..1bdacab3 100644 --- a/dpdata/qe/traj.py +++ b/dpdata/qe/traj.py @@ -1,7 +1,12 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 import numpy as np import dpdata, warnings -from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion +from ..unit import ( + EnergyConversion, + LengthConversion, + ForceConversion, + PressureConversion, +) ry2ev = EnergyConversion("rydberg", "eV").value() kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value() @@ -10,81 +15,89 @@ energy_convert = EnergyConversion("hartree", "eV").value() force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() -def load_key (lines, key) : - for ii in lines : - if key in ii : - words = ii.split(',') - for jj in words : - if key in jj : - return jj.split('=')[1] + +def load_key(lines, key): + for ii in lines: + if key in ii: + words = ii.split(",") + for jj in words: + if key in jj: + return jj.split("=")[1] return None -def load_block(lines, key, nlines) : - for idx,ii in enumerate(lines) : - if key in ii : + +def load_block(lines, key, nlines): + for idx, ii in enumerate(lines): + if key in ii: break - return lines[idx+1:idx+1+nlines] - -def convert_celldm(ibrav, celldm) : - if ibrav == 1 : - return celldm[0] * np.eye(3) - elif ibrav == 2 : - return celldm[0] * 0.5 * np.array([[-1,0,1], [0,1,1], [-1,1,0]]) - elif ibrav == 3 : - return celldm[0] * 0.5 * np.array([[1,1,1], [-1,1,1], [-1,-1,1]]) - elif ibrav == -3 : - return celldm[0] * 0.5 * np.array([[-1,1,1], [1,-1,1], [1,1,-1]]) - else : - warnings.warn('unsupported ibrav ' + str(ibrav) + ' if no .cel file, the cell convertion may be wrong. ') + return lines[idx + 1 : idx + 1 + nlines] + + +def convert_celldm(ibrav, celldm): + if ibrav == 1: + return celldm[0] * np.eye(3) + elif ibrav == 2: + return celldm[0] * 0.5 * np.array([[-1, 0, 1], [0, 1, 1], [-1, 1, 0]]) + elif ibrav == 3: + return celldm[0] * 0.5 * np.array([[1, 1, 1], [-1, 1, 1], [-1, -1, 1]]) + elif ibrav == -3: + return celldm[0] * 0.5 * np.array([[-1, 1, 1], [1, -1, 1], [1, 1, -1]]) + else: + warnings.warn( + "unsupported ibrav " + + str(ibrav) + + " if no .cel file, the cell convertion may be wrong. " + ) return np.eye(3) - #raise RuntimeError('unsupported ibrav ' + str(ibrav)) + # raise RuntimeError('unsupported ibrav ' + str(ibrav)) + -def load_cell_parameters(lines) : - blk = load_block(lines, 'CELL_PARAMETERS', 3) +def load_cell_parameters(lines): + blk = load_block(lines, "CELL_PARAMETERS", 3) ret = [] - for ii in blk : + for ii in blk: ret.append([float(jj) for jj in ii.split()[0:3]]) return np.array(ret) -def load_atom_names(lines, ntypes) : - blk = load_block(lines, 'ATOMIC_SPECIES', ntypes) +def load_atom_names(lines, ntypes): + blk = load_block(lines, "ATOMIC_SPECIES", ntypes) return [ii.split()[0] for ii in blk] -def load_celldm(lines) : +def load_celldm(lines): celldm = np.zeros(6) for ii in range(6): - key = 'celldm(%d)' % (ii+1) + key = "celldm(%d)" % (ii + 1) val = load_key(lines, key) - if val is not None : - celldm[ii] = float(val) + if val is not None: + celldm[ii] = float(val) return celldm -def load_atom_types(lines, natoms, atom_names) : - blk = load_block(lines, 'ATOMIC_POSITIONS', natoms) +def load_atom_types(lines, natoms, atom_names): + blk = load_block(lines, "ATOMIC_POSITIONS", natoms) ret = [] - for ii in blk : + for ii in blk: ret.append(atom_names.index(ii.split()[0])) - return np.array(ret, dtype = int) + return np.array(ret, dtype=int) -def load_param_file(fname) : +def load_param_file(fname): with open(fname) as fp: - lines = fp.read().split('\n') - natoms = int(load_key(lines, 'nat')) - ntypes = int(load_key(lines, 'ntyp')) + lines = fp.read().split("\n") + natoms = int(load_key(lines, "nat")) + ntypes = int(load_key(lines, "ntyp")) atom_names = load_atom_names(lines, ntypes) atom_types = load_atom_types(lines, natoms, atom_names) atom_numbs = [] - for ii in range(ntypes) : + for ii in range(ntypes): atom_numbs.append(np.sum(atom_types == ii)) - ibrav = int(load_key(lines, 'ibrav')) + ibrav = int(load_key(lines, "ibrav")) celldm = load_celldm(lines) - if ibrav == 0 : - cell = load_cell_parameters(lines) - else : + if ibrav == 0: + cell = load_cell_parameters(lines) + else: cell = convert_celldm(ibrav, celldm) cell = cell * length_convert # print(atom_names) @@ -94,41 +107,37 @@ def load_param_file(fname) : return atom_names, atom_numbs, atom_types, cell -def _load_pos_block(fp, natoms) : +def _load_pos_block(fp, natoms): head = fp.readline() if not head: # print('get None') return None, None - else : + else: ss = head.split()[0] blk = [] - for ii in range(natoms) : + for ii in range(natoms): newline = fp.readline() - if not newline : + if not newline: return None, None blk.append([float(jj) for jj in newline.split()]) return blk, ss -def load_data(fname, - natoms, - begin = 0, - step = 1, - convert = 1.) : +def load_data(fname, natoms, begin=0, step=1, convert=1.0): coords = [] steps = [] cc = 0 with open(fname) as fp: while True: blk, ss = _load_pos_block(fp, natoms) - if blk == None : + if blk == None: break - else : - if cc >= begin and (cc - begin) % step == 0 : + else: + if cc >= begin and (cc - begin) % step == 0: coords.append(blk) steps.append(ss) cc += 1 - coords= convert * np.array(coords) + coords = convert * np.array(coords) return coords, steps @@ -146,21 +155,21 @@ def load_data(fname, # return coords -def load_energy(fname, begin = 0, step = 1) : +def load_energy(fname, begin=0, step=1): data = np.loadtxt(fname) steps = [] - for ii in data[begin::step,0]: - steps.append('%d'%ii) + for ii in data[begin::step, 0]: + steps.append("%d" % ii) with open(fname) as fp: while True: line = fp.readline() - if not line : + if not line: return None - if line.split()[0][0] != '#': + if line.split()[0][0] != "#": nw = len(line.split()) break data = np.reshape(data, [-1, nw]) - return energy_convert * data[begin::step,5], steps + return energy_convert * data[begin::step, 5], steps # def load_force(fname, natoms) : @@ -177,61 +186,59 @@ def load_energy(fname, begin = 0, step = 1) : # return coords -def to_system_data(input_name, prefix, begin = 0, step = 1) : +def to_system_data(input_name, prefix, begin=0, step=1): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - cell \ - = load_param_file(input_name) - data['coords'], csteps\ - = load_data(prefix + '.pos', - np.sum(data['atom_numbs']), - begin = begin, - step = step, - convert = length_convert) - data['orig'] = np.zeros(3) - try : - data['cells'], tmp_steps \ - = load_data(prefix + '.cel', - 3, - begin = begin, - step = step, - convert = length_convert) + data["atom_names"], data["atom_numbs"], data["atom_types"], cell = load_param_file( + input_name + ) + data["coords"], csteps = load_data( + prefix + ".pos", + np.sum(data["atom_numbs"]), + begin=begin, + step=step, + convert=length_convert, + ) + data["orig"] = np.zeros(3) + try: + data["cells"], tmp_steps = load_data( + prefix + ".cel", 3, begin=begin, step=step, convert=length_convert + ) if csteps != tmp_steps: csteps.append(None) tmp_steps.append(None) for int_id in range(len(csteps)): if csteps[int_id] != tmp_steps[int_id]: break - step_id = begin + int_id*step - raise RuntimeError(f"the step key between files are not consistent. " - f"The difference locates at step: {step_id}, " - f".pos is {csteps[int_id]}, .cel is {tmp_steps[int_id]}") - except FileNotFoundError : - data['cells'] = np.tile(cell, (data['coords'].shape[0], 1, 1)) + step_id = begin + int_id * step + raise RuntimeError( + f"the step key between files are not consistent. " + f"The difference locates at step: {step_id}, " + f".pos is {csteps[int_id]}, .cel is {tmp_steps[int_id]}" + ) + except FileNotFoundError: + data["cells"] = np.tile(cell, (data["coords"].shape[0], 1, 1)) return data, csteps -def to_system_label(input_name, prefix, begin = 0, step = 1) : +def to_system_label(input_name, prefix, begin=0, step=1): atom_names, atom_numbs, atom_types, cell = load_param_file(input_name) - energy, esteps = load_energy(prefix + '.evp', - begin = begin, - step = step) - force, fsteps = load_data(prefix + '.for', - np.sum(atom_numbs), - begin = begin, - step = step, - convert = force_convert) - assert(esteps == fsteps), "the step key between files are not consistent " + energy, esteps = load_energy(prefix + ".evp", begin=begin, step=step) + force, fsteps = load_data( + prefix + ".for", + np.sum(atom_numbs), + begin=begin, + step=step, + convert=force_convert, + ) + assert esteps == fsteps, "the step key between files are not consistent " return energy, force, esteps -if __name__ == '__main__': - prefix='nacl' - atom_names, atom_numbs, atom_types, cell = load_param_file(prefix+'.in') - coords = load_data(prefix+'.pos', np.sum(atom_numbs)) - cells = load_data(prefix+'.cel', 3) +if __name__ == "__main__": + prefix = "nacl" + atom_names, atom_numbs, atom_types, cell = load_param_file(prefix + ".in") + coords = load_data(prefix + ".pos", np.sum(atom_numbs)) + cells = load_data(prefix + ".cel", 3) print(atom_names) print(atom_numbs) print(atom_types) diff --git a/dpdata/rdkit/sanitize.py b/dpdata/rdkit/sanitize.py index 0e163740..f3b1690e 100644 --- a/dpdata/rdkit/sanitize.py +++ b/dpdata/rdkit/sanitize.py @@ -14,13 +14,16 @@ def get_explicit_valence(atom, verbose=False): - exp_val_calculated_from_bonds = int(sum([bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])) + exp_val_calculated_from_bonds = int( + sum([bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) + ) try: exp_val = atom.GetExplicitValence() if exp_val != exp_val_calculated_from_bonds: if verbose: print( - f"Explicit valence given by GetExplicitValence() and sum of bond order are inconsistent on {atom.GetSymbol()}{atom.GetIdx() + 1}, using sum of bond order.") + f"Explicit valence given by GetExplicitValence() and sum of bond order are inconsistent on {atom.GetSymbol()}{atom.GetIdx() + 1}, using sum of bond order." + ) return exp_val_calculated_from_bonds except Exception: return exp_val_calculated_from_bonds @@ -45,7 +48,7 @@ def regularize_formal_charges(mol, sanitize=True, verbose=False): def assign_formal_charge_for_atom(atom, verbose=False): """ - assigen formal charge according to 8-electron rule for element B,C,N,O,S,P,As + assigen formal charge according to 8-electron rule for element B,C,N,O,S,P,As """ assert isinstance(atom, Chem.rdchem.Atom) valence = get_explicit_valence(atom, verbose) @@ -55,7 +58,8 @@ def assign_formal_charge_for_atom(atom, verbose=False): atom.SetFormalCharge(valence - 4) if valence == 3: print( - f"Detect a valence of 3 on #C{atom.GetIdx() + 1}, the formal charge of this atom will be assigned to -1") + f"Detect a valence of 3 on #C{atom.GetIdx() + 1}, the formal charge of this atom will be assigned to -1" + ) elif valence > 4: raise ValueError(f"#C{atom.GetIdx() + 1} has a valence larger than 4") elif atom.GetSymbol() == "N": @@ -78,7 +82,9 @@ def assign_formal_charge_for_atom(atom, verbose=False): if valence == 5: atom.SetFormalCharge(0) elif valence > 5: - raise ValueError(f"#{atom.GetSymbol()}{atom.GetIdx() + 1} has a valence larger than 5") + raise ValueError( + f"#{atom.GetSymbol()}{atom.GetIdx() + 1} has a valence larger than 5" + ) else: atom.SetFormalCharge(valence - 3) @@ -89,12 +95,15 @@ def print_bonds(mol): begin_atom = bond.GetBeginAtom() end_atom = bond.GetEndAtom() print( - f'{begin_atom.GetSymbol()}{begin_atom.GetIdx() + 1} {end_atom.GetSymbol()}{end_atom.GetIdx() + 1} {bond.GetBondType()}') + f"{begin_atom.GetSymbol()}{begin_atom.GetIdx() + 1} {end_atom.GetSymbol()}{end_atom.GetIdx() + 1} {bond.GetBondType()}" + ) def print_atoms(mol): for atom in mol.GetAtoms(): - print(f'{atom.GetSymbol()}{atom.GetIdx() + 1} {atom.GetFormalCharge()} {get_explicit_valence(atom)}') + print( + f"{atom.GetSymbol()}{atom.GetIdx() + 1} {atom.GetFormalCharge()} {get_explicit_valence(atom)}" + ) def is_terminal_oxygen(O_atom): @@ -120,7 +129,11 @@ def get_terminal_NR2s(atom): if nei.GetSymbol() == "N": if is_terminal_NR2(nei): terminal_NR2s.append(nei) - terminal_NR2s.sort(key=lambda N_atom: len([atom for atom in N_atom.GetNeighbors() if atom.GetSymbol() == 'H'])) + terminal_NR2s.sort( + key=lambda N_atom: len( + [atom for atom in N_atom.GetNeighbors() if atom.GetSymbol() == "H"] + ) + ) return terminal_NR2s @@ -132,10 +145,14 @@ def sanitize_phosphate_Patom(P_atom, verbose=True): if verbose: print("Phospate group detected, sanitizing it...") # set one P=O and two P-O - bond1 = mol.GetBondBetweenAtoms(P_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + P_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.DOUBLE) for ii in range(1, len(terminal_oxygens)): - bond = mol.GetBondBetweenAtoms(P_atom.GetIdx(), terminal_oxygens[ii].GetIdx()) + bond = mol.GetBondBetweenAtoms( + P_atom.GetIdx(), terminal_oxygens[ii].GetIdx() + ) bond.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[ii].SetFormalCharge(-1) @@ -154,11 +171,15 @@ def sanitize_sulfate_Satom(S_atom, verbose=True): if verbose: print("Sulfate group detected, sanitizing it...") # set one S-O and two S=O - bond1 = mol.GetBondBetweenAtoms(S_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + S_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[0].SetFormalCharge(-1) for ii in range(1, len(terminal_oxygens)): - bond = mol.GetBondBetweenAtoms(S_atom.GetIdx(), terminal_oxygens[ii].GetIdx()) + bond = mol.GetBondBetweenAtoms( + S_atom.GetIdx(), terminal_oxygens[ii].GetIdx() + ) bond.SetBondType(Chem.rdchem.BondType.DOUBLE) @@ -176,11 +197,15 @@ def sanitize_carboxyl_Catom(C_atom, verbose=True): if verbose: print("Carbonxyl group detected, sanitizing it...") # set one C-O and one C=O - bond1 = mol.GetBondBetweenAtoms(C_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + C_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[0].SetFormalCharge(-1) - bond2 = mol.GetBondBetweenAtoms(C_atom.GetIdx(), terminal_oxygens[1].GetIdx()) + bond2 = mol.GetBondBetweenAtoms( + C_atom.GetIdx(), terminal_oxygens[1].GetIdx() + ) bond2.SetBondType(Chem.rdchem.BondType.DOUBLE) terminal_oxygens[1].SetFormalCharge(0) @@ -226,11 +251,15 @@ def sanitize_nitro_Natom(N_atom, verbose=True): if verbose: print("Nitro group detected, sanitizing it...") # set one N-O and one N=O - bond1 = mol.GetBondBetweenAtoms(N_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + N_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[0].SetFormalCharge(-1) - bond2 = mol.GetBondBetweenAtoms(N_atom.GetIdx(), terminal_oxygens[1].GetIdx()) + bond2 = mol.GetBondBetweenAtoms( + N_atom.GetIdx(), terminal_oxygens[1].GetIdx() + ) bond2.SetBondType(Chem.rdchem.BondType.DOUBLE) terminal_oxygens[1].SetFormalCharge(0) @@ -242,7 +271,7 @@ def sanitize_nitro(mol): def is_terminal_nitrogen(N_atom): - if N_atom.GetSymbol() == 'N' and len(N_atom.GetNeighbors()) == 1: + if N_atom.GetSymbol() == "N" and len(N_atom.GetNeighbors()) == 1: return True else: return False @@ -342,7 +371,9 @@ def kekulize_aromatic_heterocycles(mol_in, assign_formal_charge=True, sanitize=T rings = [list(i) for i in list(rings)] rings.sort(key=lambda r: len(r)) - def search_and_assign_ring(mol, ring, hetero, start, forward=True, start_switch=True): + def search_and_assign_ring( + mol, ring, hetero, start, forward=True, start_switch=True + ): j = start switch = start_switch lring = len(ring) @@ -370,7 +401,11 @@ def print_bondtypes(mol, rings): lring = len(ring) btype = [] for i in range(lring): - btype.append(mol.GetBondBetweenAtoms(ring[i], ring[(i + 1) % lring]).GetBondType()) + btype.append( + mol.GetBondBetweenAtoms( + ring[i], ring[(i + 1) % lring] + ).GetBondType() + ) atoms = [mol.GetAtomWithIdx(i).GetSymbol() for i in ring] print(ring) print(atoms) @@ -381,9 +416,9 @@ def hetero_priority(idx, mol): sym = atom.GetSymbol() valence = len(atom.GetBonds()) - if (sym in ['O', 'S']) & (valence == 2): + if (sym in ["O", "S"]) & (valence == 2): return 0 - elif (sym in ['N', 'P', 'As', 'B']): + elif sym in ["N", "P", "As", "B"]: if valence == 3: return 1 elif valence == 2: @@ -398,7 +433,7 @@ def hetero_priority(idx, mol): bAllC = True for i in range(lring): atom = mol.GetAtomWithIdx(ring[i]) - if atom.GetSymbol() != 'C': + if atom.GetSymbol() != "C": bAllC = False bond = mol.GetBondBetweenAtoms(ring[i], ring[(i + 1) % lring]) @@ -431,7 +466,7 @@ def hetero_priority(idx, mol): if (fuseCAr[i] == fuseCAr[i - 1]) & (fuseCAr[i] >= 0): fuseDouble.append(i) atom = mol.GetAtomWithIdx(ring[i]) - if atom.GetSymbol() != 'C': + if atom.GetSymbol() != "C": hetero.append(i) atom_bonds = atom.GetBonds() btype = [bond.GetBondType() for bond in atom_bonds] @@ -451,40 +486,58 @@ def hetero_priority(idx, mol): for i in hasDouble: d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True) d2, e2 = search_and_assign_ring(mol, ring, hetero, i, forward=False) - n_targetDouble -= (d1 + d2 + 1) - n_targetEdit -= (e1 + e2) + n_targetDouble -= d1 + d2 + 1 + n_targetEdit -= e1 + e2 for i in fuseDouble: bond = mol.GetBondBetweenAtoms(ring[i], ring[(i - 1) % lring]) if bond.GetBondType() == BondType.AROMATIC: bond.SetBondType(BondType.DOUBLE) mol_edit_log(mol, ring[i], ring[(i - 1) % lring]) d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True) - d2, e2 = search_and_assign_ring(mol, ring, hetero, (i - 1) % lring, forward=False) - n_targetDouble -= (d1 + d2 + 1) - n_targetEdit -= (e1 + e2 + 1) + d2, e2 = search_and_assign_ring( + mol, ring, hetero, (i - 1) % lring, forward=False + ) + n_targetDouble -= d1 + d2 + 1 + n_targetEdit -= e1 + e2 + 1 for i in hetero: atom = mol.GetAtomWithIdx(ring[i]) if (hetero_prior[i] == 2) | (n_targetDouble * 2 >= n_targetEdit): - forward_btype = mol.GetBondBetweenAtoms(ring[i], ring[(i + 1) % lring]).GetBondType() - backward_btype = mol.GetBondBetweenAtoms(ring[i], ring[(i - 1) % lring]).GetBondType() + forward_btype = mol.GetBondBetweenAtoms( + ring[i], ring[(i + 1) % lring] + ).GetBondType() + backward_btype = mol.GetBondBetweenAtoms( + ring[i], ring[(i - 1) % lring] + ).GetBondType() if forward_btype != BondType.AROMATIC: switch = forward_btype == BondType.DOUBLE - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=False, start_switch=switch) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=False, start_switch=switch + ) d2 = e2 = 0 elif backward_btype != BondType.AROMATIC: switch = backward_btype == BondType.DOUBLE - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True, start_switch=switch) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=True, start_switch=switch + ) d2 = e2 = 0 else: - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True, start_switch=True) - d2, e2 = search_and_assign_ring(mol, ring, hetero, i, forward=False, start_switch=False) - n_targetDouble -= (d1 + d2) - n_targetEdit -= (e1 + e2) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=True, start_switch=True + ) + d2, e2 = search_and_assign_ring( + mol, ring, hetero, i, forward=False, start_switch=False + ) + n_targetDouble -= d1 + d2 + n_targetEdit -= e1 + e2 else: - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True, start_switch=True) - d2, e2 = search_and_assign_ring(mol, ring, hetero, i, forward=False, start_switch=True) - n_targetDouble -= (d1 + d2) - n_targetEdit -= (e1 + e2) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=True, start_switch=True + ) + d2, e2 = search_and_assign_ring( + mol, ring, hetero, i, forward=False, start_switch=True + ) + n_targetDouble -= d1 + d2 + n_targetEdit -= e1 + e2 for ring in CAr: lring = len(ring) @@ -508,18 +561,22 @@ def hetero_priority(idx, mol): Chem.SanitizeMol(mol_edited) return mol_edited except Exception as e: - raise RuntimeError(f"Manual kekulization for aromatic heterocycles failed, below are errors:\n\t {e}") + raise RuntimeError( + f"Manual kekulization for aromatic heterocycles failed, below are errors:\n\t {e}" + ) -def convert_by_obabel(mol, cache_dir=os.path.join(os.getcwd(), '.cache'), obabel_path="obabel"): +def convert_by_obabel( + mol, cache_dir=os.path.join(os.getcwd(), ".cache"), obabel_path="obabel" +): if not os.path.exists(cache_dir): os.mkdir(cache_dir) if mol.HasProp("_Name"): name = mol.GetProp("_Name") else: name = f"mol{int(time.time())}" - mol_file_in = os.path.join(cache_dir, f'{name}.mol') - mol_file_out = os.path.join(cache_dir, f'{name}_obabel.mol') + mol_file_in = os.path.join(cache_dir, f"{name}.mol") + mol_file_out = os.path.join(cache_dir, f"{name}_obabel.mol") Chem.MolToMolFile(mol, mol_file_in, kekulize=False) obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("mol", "mol") @@ -551,12 +608,14 @@ def super_sanitize_mol(mol, name=None, verbose=True): try: if verbose: print( - "Hermite procedure failed, maybe due to unsupported representation of hetero aromatic rings, re-try with obabel") + "Hermite procedure failed, maybe due to unsupported representation of hetero aromatic rings, re-try with obabel" + ) print("=====Stage 2: re-try with obabel=====") mol = convert_by_obabel(mol) mol = sanitize_mol(mol, verbose) - mol = kekulize_aromatic_heterocycles(mol, assign_formal_charge=False, - sanitize=False) # aromatic heterocycles + mol = kekulize_aromatic_heterocycles( + mol, assign_formal_charge=False, sanitize=False + ) # aromatic heterocycles mol = regularize_formal_charges(mol, sanitize=False) mol_copy = deepcopy(mol) Chem.SanitizeMol(mol_copy) @@ -571,30 +630,34 @@ def super_sanitize_mol(mol, name=None, verbose=True): class Sanitizer(object): - def __init__(self, level='medium', raise_errors=True, verbose=False): - ''' - Set up sanitizer. - -------- - Parameters: - level : 'low', 'medium' or 'high'. - `low` - use rdkit.Chem.SanitizeMol() to sanitize - `medium` - before using rdkit, assign formal charges of each atom first, which requires - the rightness of bond order information - `high` - try to regularize bond order of nitro, phosphate, sulfate, nitrine, guanidine, - pyridine-oxide function groups and aromatic heterocycles. If failed, the program - will call obabel to pre-process the mol object and re-try the procedure. - ''' + def __init__(self, level="medium", raise_errors=True, verbose=False): + """ + Set up sanitizer. + -------- + Parameters: + level : 'low', 'medium' or 'high'. + `low` - use rdkit.Chem.SanitizeMol() to sanitize + `medium` - before using rdkit, assign formal charges of each atom first, which requires + the rightness of bond order information + `high` - try to regularize bond order of nitro, phosphate, sulfate, nitrine, guanidine, + pyridine-oxide function groups and aromatic heterocycles. If failed, the program + will call obabel to pre-process the mol object and re-try the procedure. + """ self._check_level(level) self.level = level self.raise_errors = raise_errors self.verbose = verbose def _check_level(self, level): - if level not in ['low', 'medium', 'high']: - raise ValueError(f"Invalid level '{level}', please set to 'low', 'medium' or 'high'") + if level not in ["low", "medium", "high"]: + raise ValueError( + f"Invalid level '{level}', please set to 'low', 'medium' or 'high'" + ) else: - if level == 'high' and not USE_OBABEL: - raise ModuleNotFoundError("obabel not installed, high level sanitizer cannot work") + if level == "high" and not USE_OBABEL: + raise ModuleNotFoundError( + "obabel not installed, high level sanitizer cannot work" + ) def _handle_exception(self, error_info): if self.raise_errors: @@ -603,9 +666,9 @@ def _handle_exception(self, error_info): print(error_info) def sanitize(self, mol): - ''' - Sanitize mol according to `self.level`. If failed, return None. - ''' + """ + Sanitize mol according to `self.level`. If failed, return None. + """ if self.level == "low": try: Chem.SanitizeMol(mol) diff --git a/dpdata/rdkit/utils.py b/dpdata/rdkit/utils.py index 5cf0df32..e5d1c7a8 100644 --- a/dpdata/rdkit/utils.py +++ b/dpdata/rdkit/utils.py @@ -5,6 +5,7 @@ pass import numpy as np + def mol_to_system_data(mol): if not isinstance(mol, Chem.rdchem.Mol): raise TypeError(f"rdkit.Chem.Mol required, not {type(mol)}") @@ -12,40 +13,55 @@ def mol_to_system_data(mol): num_confs = mol.GetNumConformers() if num_confs: atom_symbols = [at.GetSymbol() for at in mol.GetAtoms()] - atom_names, atom_types, atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True) + atom_names, atom_types, atom_numbs = np.unique( + atom_symbols, return_inverse=True, return_counts=True + ) coords = np.array([conf.GetPositions() for conf in mol.GetConformers()]) - bonds = np.array([[bond.GetBeginAtomIdx(), - bond.GetEndAtomIdx(), - bond.GetBondTypeAsDouble()] for bond in mol.GetBonds()]) - formal_charges = np.array([at.GetFormalCharge() for at in mol.GetAtoms()], dtype=np.int32) + bonds = np.array( + [ + [ + bond.GetBeginAtomIdx(), + bond.GetEndAtomIdx(), + bond.GetBondTypeAsDouble(), + ] + for bond in mol.GetBonds() + ] + ) + formal_charges = np.array( + [at.GetFormalCharge() for at in mol.GetAtoms()], dtype=np.int32 + ) data = {} - data['atom_numbs'] = list(atom_numbs) - data['atom_names'] = list(atom_names) - data['atom_types'] = atom_types - data['cells'] = np.array([[[100., 0., 0.], - [0., 100., 0.], - [0., 0., 100.]] for _ in range(num_confs)]) - data['coords'] = coords - data['bonds'] = bonds - data['formal_charges'] = formal_charges - data['orig'] = np.array([0., 0., 0.]) + data["atom_numbs"] = list(atom_numbs) + data["atom_names"] = list(atom_names) + data["atom_types"] = atom_types + data["cells"] = np.array( + [ + [[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]] + for _ in range(num_confs) + ] + ) + data["coords"] = coords + data["bonds"] = bonds + data["formal_charges"] = formal_charges + data["orig"] = np.array([0.0, 0.0, 0.0]) # other properties if mol.HasProp("_Name"): - data['_name'] = mol.GetProp('_Name') + data["_name"] = mol.GetProp("_Name") return data else: raise ValueError("The moleclue does not contain 3-D conformers") + def system_data_to_mol(data): mol_ed = Chem.RWMol() - atom_symbols = [data['atom_names'][i] for i in data['atom_types']] + atom_symbols = [data["atom_names"][i] for i in data["atom_types"]] # add atoms - for atom_type in data['atom_types']: - symbol = data['atom_names'][atom_type] + for atom_type in data["atom_types"]: + symbol = data["atom_names"][atom_type] atom = Chem.Atom(symbol) mol_ed.AddAtom(atom) # add bonds - for bond_info in data['bonds']: + for bond_info in data["bonds"]: if bond_info[2] == 1: mol_ed.AddBond(int(bond_info[0]), int(bond_info[1]), Chem.BondType.SINGLE) elif bond_info[2] == 2: @@ -55,21 +71,21 @@ def system_data_to_mol(data): elif bond_info[2] == 1.5: mol_ed.AddBond(int(bond_info[0]), int(bond_info[1]), Chem.BondType.AROMATIC) # set conformers - for frame_idx in range(data['coords'].shape[0]): - conf = Chem.rdchem.Conformer(len(data['atom_types'])) - for atom_idx in range(len(data['atom_types'])): - conf.SetAtomPosition(atom_idx, data['coords'][frame_idx][atom_idx]) + for frame_idx in range(data["coords"].shape[0]): + conf = Chem.rdchem.Conformer(len(data["atom_types"])) + for atom_idx in range(len(data["atom_types"])): + conf.SetAtomPosition(atom_idx, data["coords"][frame_idx][atom_idx]) mol_ed.AddConformer(conf, assignId=True) mol = mol_ed.GetMol() # set formal charges for idx, atom in enumerate(mol.GetAtoms()): - atom.SetFormalCharge(int(data['formal_charges'][idx])) + atom.SetFormalCharge(int(data["formal_charges"][idx])) # set mol name - if '_name' in list(data.keys()): - mol.SetProp("_Name", data['_name']) + if "_name" in list(data.keys()): + mol.SetProp("_Name", data["_name"]) # sanitize Chem.SanitizeMol(mol_ed) - return mol + return mol def check_same_atom(atom_1, atom_2): @@ -80,6 +96,7 @@ def check_same_atom(atom_1, atom_2): else: return True + def check_same_molecule(mol_1, mol_2): flag = True for bond_1, bond_2 in zip(mol_1.GetBonds(), mol_2.GetBonds()): @@ -93,6 +110,7 @@ def check_same_molecule(mol_1, mol_2): break return flag + def check_molecule_list(mols): flag = True for mol in mols[1:]: @@ -101,6 +119,7 @@ def check_molecule_list(mols): break return flag + def combine_molecules(mols): if check_molecule_list(mols): for mol in mols[1:]: diff --git a/dpdata/siesta/__init__.py b/dpdata/siesta/__init__.py index 8b137891..e69de29b 100644 --- a/dpdata/siesta/__init__.py +++ b/dpdata/siesta/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/siesta/aiMD_output.py b/dpdata/siesta/aiMD_output.py index 766ced21..32cb081f 100644 --- a/dpdata/siesta/aiMD_output.py +++ b/dpdata/siesta/aiMD_output.py @@ -7,7 +7,7 @@ #############################read output##################################### def get_single_line_tail(fin, keyword, num=1): - file = open(fin, 'r') + file = open(fin, "r") part_res = [] for value in file: if keyword in value: @@ -17,12 +17,21 @@ def get_single_line_tail(fin, keyword, num=1): file.close() return part_res + ## atomnum: number of atoms, row numbers ## begin_column: begin column num ## read_column_num: read column num ## column_num: the column number in nxet reading line -def extract_keyword(fout, keyword, down_line_num, begin_column, read_column_num, is_repeated_read, column_num): - file = open(fout, 'r') +def extract_keyword( + fout, + keyword, + down_line_num, + begin_column, + read_column_num, + is_repeated_read, + column_num, +): + file = open(fout, "r") ret = [] part_ret = [] flag = 0 @@ -61,17 +70,18 @@ def extract_keyword(fout, keyword, down_line_num, begin_column, read_column_num, file.close() return part_ret + def obtain_nframe(fname): - fp = open(fname, 'r') + fp = open(fname, "r") flag = False idx = 0 temp = 0 for ii in fp: - if 'siesta: Stress tensor (static) (eV/Ang**3):' in ii: + if "siesta: Stress tensor (static) (eV/Ang**3):" in ii: flag = True continue if flag: - if not 'siesta: Pressure (static):' in ii: + if not "siesta: Pressure (static):" in ii: if len(ii.split()) == 3: temp += 1 if temp == 3: @@ -82,34 +92,42 @@ def obtain_nframe(fname): fp.close() return idx + def get_atom_types(fout, atomnums): - covert_type = extract_keyword(fout, 'outcoor: Atomic coordinates (Ang):', atomnums, 3, 4, 0, 6)[0] + covert_type = extract_keyword( + fout, "outcoor: Atomic coordinates (Ang):", atomnums, 3, 4, 0, 6 + )[0] atomtype = [] # print(covert_type) for i in range(0, len(covert_type)): atomtype.append(int(covert_type[i]) - 1) return atomtype + def get_atom_name(fout): - file = open(fout, 'r') + file = open(fout, "r") ret = [] for value in file: - if 'Species number:' in value: + if "Species number:" in value: for j in range(len(value.split())): - if value.split()[j] == 'Label:': - ret.append(value.split()[j+1]) - break + if value.split()[j] == "Label:": + ret.append(value.split()[j + 1]) + break file.close() return ret + def get_atom_numbs(atomtypes): atom_numbs = [] for i in set(atomtypes): atom_numbs.append(atomtypes.count(i)) return atom_numbs + def get_virial(fout, cell): - viri = extract_keyword(fout, 'siesta: Stress tensor (static) (eV/Ang**3):', 3, 0, 3, 1, 3) + viri = extract_keyword( + fout, "siesta: Stress tensor (static) (eV/Ang**3):", 3, 0, 3, 1, 3 + ) vols = [] length = obtain_nframe(fout) for ii in range(length): @@ -120,6 +138,7 @@ def get_virial(fout, cell): viri[ii][jj] *= vols[ii] return viri + def covert_dimension(arr, num): arr = np.array(arr) frames = len(arr) @@ -128,23 +147,39 @@ def covert_dimension(arr, num): ret[i] = arr[i].reshape(num, 3) return ret + def get_aiMD_frame(fname): - NumberOfSpecies = int(get_single_line_tail(fname, 'redata: Number of Atomic Species')[0]) + NumberOfSpecies = int( + get_single_line_tail(fname, "redata: Number of Atomic Species")[0] + ) atom_names = get_atom_name(fname) - tot_natoms = int(get_single_line_tail(fname, 'Number of atoms', 3)[0]) + tot_natoms = int(get_single_line_tail(fname, "Number of atoms", 3)[0]) atom_types = get_atom_types(fname, tot_natoms) atom_numbs = get_atom_numbs(atom_types) - assert (max(atom_types) + 1 == NumberOfSpecies) - - cell = extract_keyword(fname, 'outcell: Unit cell vectors (Ang):', 3, 0, 3, 1, 3) - coord = extract_keyword(fname, 'outcoor: Atomic coordinates (Ang):', tot_natoms, 0, 3, 1, 6) - energy = get_single_line_tail(fname, 'siesta: E_KS(eV) =') - force = extract_keyword(fname, 'siesta: Atomic forces (eV/Ang):', tot_natoms, 1, 4, 1, 4) + assert max(atom_types) + 1 == NumberOfSpecies + + cell = extract_keyword(fname, "outcell: Unit cell vectors (Ang):", 3, 0, 3, 1, 3) + coord = extract_keyword( + fname, "outcoor: Atomic coordinates (Ang):", tot_natoms, 0, 3, 1, 6 + ) + energy = get_single_line_tail(fname, "siesta: E_KS(eV) =") + force = extract_keyword( + fname, "siesta: Atomic forces (eV/Ang):", tot_natoms, 1, 4, 1, 4 + ) virial = get_virial(fname, cell) cells = covert_dimension(np.array(cell), 3) coords = covert_dimension(np.array(coord), tot_natoms) forces = covert_dimension(np.array(force), tot_natoms) virials = covert_dimension(np.array(virial), 3) - return atom_names, atom_numbs, np.array(atom_types), cells, coords, np.array(energy), forces, virials + return ( + atom_names, + atom_numbs, + np.array(atom_types), + cells, + coords, + np.array(energy), + forces, + virials, + ) diff --git a/dpdata/siesta/output.py b/dpdata/siesta/output.py index e5969b27..b70fe55d 100644 --- a/dpdata/siesta/output.py +++ b/dpdata/siesta/output.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 import numpy as np @@ -8,7 +8,7 @@ #############################read output##################################### def get_single_line_tail(fin, keyword, num=1): - file = open(fin, 'r') + file = open(fin, "r") res = [] for value in file: if keyword in value: @@ -23,7 +23,7 @@ def get_single_line_tail(fin, keyword, num=1): ## begin_column: begin column num ## column_num: read column num def extract_keyword(fout, keyword, down_line_num, begin_column, column_num): - file = open(fout, 'r') + file = open(fout, "r") ret = [] flag = 0 idx = 0 @@ -53,24 +53,28 @@ def extract_keyword(fout, keyword, down_line_num, begin_column, column_num): def get_atom_types(fout, atomnums): - covert_type = extract_keyword(fout, 'outcoor: Atomic coordinates (Ang):', atomnums, 3, 4) + covert_type = extract_keyword( + fout, "outcoor: Atomic coordinates (Ang):", atomnums, 3, 4 + ) atomtype = [] for i in range(0, len(covert_type)): atomtype.append(int(covert_type[i]) - 1) return atomtype + def get_atom_name(fout): - file = open(fout, 'r') + file = open(fout, "r") ret = [] for value in file: - if 'Species number:' in value: + if "Species number:" in value: for j in range(len(value.split())): - if value.split()[j] == 'Label:': - ret.append(value.split()[j+1]) - break + if value.split()[j] == "Label:": + ret.append(value.split()[j + 1]) + break file.close() return ret + def get_atom_numbs(atomtypes): atom_numbs = [] for i in set(atomtypes): @@ -83,7 +87,7 @@ def get_virial(fout, cells): for ii in cells: ### calucate vol vols.append(np.linalg.det(ii.reshape([3, 3]))) - ret = extract_keyword(fout, 'siesta: Stress tensor (static) (eV/Ang**3):', 3, 1, 4) + ret = extract_keyword(fout, "siesta: Stress tensor (static) (eV/Ang**3):", 3, 1, 4) ret = np.array([ret]) for idx, ii in enumerate(ret): ## siesta: 1eV/A^3= 1.60217*10^11 Pa , ---> qe: kBar=10^8Pa @@ -93,16 +97,20 @@ def get_virial(fout, cells): def obtain_frame(fname): - NumberOfSpecies = int(get_single_line_tail(fname, 'redata: Number of Atomic Species')[0]) + NumberOfSpecies = int( + get_single_line_tail(fname, "redata: Number of Atomic Species")[0] + ) atom_names = get_atom_name(fname) - tot_natoms = int(get_single_line_tail(fname, 'Number of atoms', 3)[0]) + tot_natoms = int(get_single_line_tail(fname, "Number of atoms", 3)[0]) atom_types = get_atom_types(fname, tot_natoms) atom_numbs = get_atom_numbs(atom_types) - assert (max(atom_types) + 1 == NumberOfSpecies) - cell = extract_keyword(fname, 'outcell: Unit cell vectors (Ang):', 3, 0, 3) - coord = extract_keyword(fname, 'outcoor: Atomic coordinates (Ang):', tot_natoms, 0, 3) - energy = get_single_line_tail(fname, 'siesta: E_KS(eV) =') - force = extract_keyword(fname, 'siesta: Atomic forces (eV/Ang):', tot_natoms, 1, 4) + assert max(atom_types) + 1 == NumberOfSpecies + cell = extract_keyword(fname, "outcell: Unit cell vectors (Ang):", 3, 0, 3) + coord = extract_keyword( + fname, "outcoor: Atomic coordinates (Ang):", tot_natoms, 0, 3 + ) + energy = get_single_line_tail(fname, "siesta: E_KS(eV) =") + force = extract_keyword(fname, "siesta: Atomic forces (eV/Ang):", tot_natoms, 1, 4) virial = get_virial(fname, np.array([cell])) cell = np.array(cell).reshape(3, 3) @@ -121,6 +129,13 @@ def obtain_frame(fname): # data['forces'] = np.array([force]) # data['virials'] = virial # return data - return atom_names, atom_numbs, np.array(atom_types), np.array([cell]), np.array([coord]), \ - np.array(energy), np.array([force]), np.array([virial]) - + return ( + atom_names, + atom_numbs, + np.array(atom_types), + np.array([cell]), + np.array([coord]), + np.array(energy), + np.array([force]), + np.array([virial]), + ) diff --git a/dpdata/stat.py b/dpdata/stat.py index 46d0a4a3..1f6193af 100644 --- a/dpdata/stat.py +++ b/dpdata/stat.py @@ -40,7 +40,7 @@ def rmse(errors: np.ndarray) -> np.float64: class ErrorsBase(metaclass=ABCMeta): """Compute errors (deviations) between two systems. The type of system is assigned by SYSTEM_TYPE. - + Parameters ---------- system_1 : object @@ -48,11 +48,16 @@ class ErrorsBase(metaclass=ABCMeta): system_2 : object system 2 """ + SYSTEM_TYPE = object def __init__(self, system_1: SYSTEM_TYPE, system_2: SYSTEM_TYPE) -> None: - assert isinstance(system_1, self.SYSTEM_TYPE), "system_1 should be %s" % self.SYSTEM_TYPE.__name__ - assert isinstance(system_2, self.SYSTEM_TYPE), "system_2 should be %s" % self.SYSTEM_TYPE.__name__ + assert isinstance(system_1, self.SYSTEM_TYPE), ( + "system_1 should be %s" % self.SYSTEM_TYPE.__name__ + ) + assert isinstance(system_2, self.SYSTEM_TYPE), ( + "system_2 should be %s" % self.SYSTEM_TYPE.__name__ + ) self.system_1 = system_1 self.system_2 = system_2 @@ -78,7 +83,7 @@ def e_rmse(self) -> np.float64: def f_mae(self) -> np.float64: """Force MAE.""" return mae(self.f_errors) - + @property def f_rmse(self) -> np.float64: """Force RMSE.""" @@ -102,19 +107,20 @@ class Errors(ErrorsBase): >>> e = dpdata.stat.Errors(system_1, system_2) >>> print("%.4f %.4f %.4f %.4f" % (e.e_mae, e.e_rmse, e.f_mae, e.f_rmse)) """ + SYSTEM_TYPE = LabeledSystem @property @lru_cache() def e_errors(self) -> np.ndarray: """Energy errors.""" - return self.system_1['energies'] - self.system_2['energies'] + return self.system_1["energies"] - self.system_2["energies"] @property @lru_cache() def f_errors(self) -> np.ndarray: """Force errors.""" - return (self.system_1['forces'] - self.system_2['forces']).ravel() + return (self.system_1["forces"] - self.system_2["forces"]).ravel() class MultiErrors(ErrorsBase): @@ -134,6 +140,7 @@ class MultiErrors(ErrorsBase): >>> e = dpdata.stat.MultiErrors(system_1, system_2) >>> print("%.4f %.4f %.4f %.4f" % (e.e_mae, e.e_rmse, e.f_mae, e.f_rmse)) """ + SYSTEM_TYPE = MultiSystems @property diff --git a/dpdata/system.py b/dpdata/system.py index f403b879..f7ad7b5f 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -8,7 +8,7 @@ from enum import Enum, unique from typing import Any, Tuple, Union from monty.json import MSONable -from monty.serialization import loadfn,dumpfn +from monty.serialization import loadfn, dumpfn from dpdata.periodic_table import Element from dpdata.amber.mask import pick_by_amber_mask, load_param_file import dpdata @@ -26,19 +26,21 @@ add_atom_names, ) + def load_format(fmt): fmt = fmt.lower() formats = Format.get_formats() if fmt in formats: return formats[fmt]() raise NotImplementedError( - "Unsupported data format %s. Supported formats: %s" % ( - fmt, " ".join(formats) - )) + "Unsupported data format %s. Supported formats: %s" % (fmt, " ".join(formats)) + ) + @unique class Axis(Enum): """Data axis.""" + NFRAMES = "nframes" NATOMS = "natoms" NTYPES = "ntypes" @@ -64,7 +66,14 @@ class DataType: required : bool, default=True whether this data is required """ - def __init__(self, name: str, dtype: type, shape: Tuple[int, Axis]=None, required: bool=True) -> None: + + def __init__( + self, + name: str, + dtype: type, + shape: Tuple[int, Axis] = None, + required: bool = True, + ) -> None: self.name = name self.dtype = dtype self.shape = shape @@ -91,12 +100,12 @@ def real_shape(self, system: "System") -> Tuple[int]: def check(self, system: "System"): """Check if a system has correct data of this type. - + Parameters ---------- system : System checked system - + Raises ------ DataError @@ -110,28 +119,34 @@ def check(self, system: "System"): if isinstance(data, list) and not len(data): pass elif not isinstance(data, self.dtype): - raise DataError("Type of %s is %s, but expected %s" % (self.name, - type(data).__name__, self.dtype.__name__)) + raise DataError( + "Type of %s is %s, but expected %s" + % (self.name, type(data).__name__, self.dtype.__name__) + ) # check shape if self.shape is not None: shape = self.real_shape(system) # skip checking empty list of np.ndarray if isinstance(data, np.ndarray): if data.size and shape != data.shape: - raise DataError("Shape of %s is %s, but expected %s" % (self.name, - data.shape, shape)) + raise DataError( + "Shape of %s is %s, but expected %s" + % (self.name, data.shape, shape) + ) elif isinstance(data, list): if len(shape) and shape[0] != len(data): - raise DataError("Length of %s is %d, but expected %d" % (self.name, - len(data), shape[0])) + raise DataError( + "Length of %s is %d, but expected %d" + % (self.name, len(data), shape[0]) + ) else: raise RuntimeError("Unsupported type to check shape") elif self.required: raise DataError("%s not found in data" % self.name) -class System (MSONable) : - ''' +class System(MSONable): + """ The data System A data System (a concept used by `deepmd-kit `_) @@ -153,12 +168,13 @@ class System (MSONable) : Restrictions: - `d_example['orig']` is always [0, 0, 0] - `d_example['cells'][ii]` is always lower triangular (lammps cell tensor convention) - + Attributes ---------- DTYPES : tuple[DataType] data types of this class - ''' + """ + DTYPES = ( DataType("atom_numbs", list, (Axis.NTYPES,)), DataType("atom_names", list, (Axis.NTYPES,)), @@ -169,15 +185,17 @@ class System (MSONable) : DataType("nopbc", bool, required=False), ) - def __init__ (self, - file_name = None, - fmt = 'auto', - type_map = None, - begin = 0, - step = 1, - data = None, - convergence_check = True, - **kwargs) : + def __init__( + self, + file_name=None, + fmt="auto", + type_map=None, + begin=0, + step=1, + data=None, + convergence_check=True, + **kwargs, + ): """ Constructor @@ -199,7 +217,7 @@ def __init__ (self, - ``vasp/xml``: vasp xml - ``qe/cp/traj``: Quantum Espresso CP trajectory files. should have: file_name+'.in' and file_name+'.pos' - ``qe/pw/scf``: Quantum Espresso PW single point calculations. Both input and output files are required. If file_name is a string, it denotes the output file name. Input file name is obtained by replacing 'out' by 'in' from file_name. Or file_name is a list, with the first element being the input file name and the second element being the output filename. - - ``abacus/scf``: ABACUS pw/lcao scf. The directory containing INPUT file is required. + - ``abacus/scf``: ABACUS pw/lcao scf. The directory containing INPUT file is required. - ``abacus/md``: ABACUS pw/lcao MD. The directory containing INPUT file is required. - ``abacus/relax``: ABACUS pw/lcao relax or cell-relax. The directory containing INPUT file is required. - ``abacus/stru``: abacus stru @@ -249,27 +267,35 @@ def __init__ (self, Whether to request a convergence check. """ self.data = {} - self.data['atom_numbs'] = [] - self.data['atom_names'] = [] - self.data['atom_types'] = [] - self.data['orig'] = np.array([0, 0, 0]) - self.data['cells'] = [] - self.data['coords'] = [] + self.data["atom_numbs"] = [] + self.data["atom_names"] = [] + self.data["atom_types"] = [] + self.data["orig"] = np.array([0, 0, 0]) + self.data["cells"] = [] + self.data["coords"] = [] if data: - self.data=data + self.data = data self.check_data() return - if file_name is None : + if file_name is None: return - self.from_fmt(file_name, fmt, type_map=type_map, begin= begin, step=step, convergence_check=convergence_check, **kwargs) + self.from_fmt( + file_name, + fmt, + type_map=type_map, + begin=begin, + step=step, + convergence_check=convergence_check, + **kwargs, + ) if type_map is not None: self.apply_type_map(type_map) def check_data(self): """Check if data is correct. - + Raises ------ DataError @@ -280,16 +306,19 @@ def check_data(self): for dd in self.DTYPES: dd.check(self) if sum(self.get_atom_numbs()) != self.get_natoms(): - raise DataError("Sum of atom_numbs (%d) is not equal to natoms (%d)." % (sum(self.get_atom_numbs()), self.get_natoms())) + raise DataError( + "Sum of atom_numbs (%d) is not equal to natoms (%d)." + % (sum(self.get_atom_numbs()), self.get_natoms()) + ) post_funcs = Plugin() - def from_fmt(self, file_name, fmt='auto', **kwargs): + def from_fmt(self, file_name, fmt="auto", **kwargs): fmt = fmt.lower() - if fmt == 'auto': - fmt = os.path.basename(file_name).split('.')[-1].lower() + if fmt == "auto": + fmt = os.path.basename(file_name).split(".")[-1].lower() return self.from_fmt_obj(load_format(fmt), file_name, **kwargs) - + def from_fmt_obj(self, fmtobj, file_name, **kwargs): data = fmtobj.from_system(file_name, **kwargs) if data: @@ -299,26 +328,26 @@ def from_fmt_obj(self, fmtobj, file_name, **kwargs): else: self.data = {**self.data, **data} self.check_data() - if hasattr(fmtobj.from_system, 'post_func'): + if hasattr(fmtobj.from_system, "post_func"): for post_f in fmtobj.from_system.post_func: self.post_funcs.get_plugin(post_f)(self) return self - def to(self, fmt: str, *args, **kwargs) -> 'System': + def to(self, fmt: str, *args, **kwargs) -> "System": """Dump systems to the specific format. - + Parameters ---------- fmt : str format - + Returns ------- System self """ return self.to_fmt_obj(load_format(fmt), *args, **kwargs) - + def to_fmt_obj(self, fmtobj, *args, **kwargs): return fmtobj.to_system(self.data, *args, **kwargs) @@ -326,15 +355,15 @@ def __repr__(self): return self.__str__() def __str__(self): - ret="Data Summary" - ret+="\nUnlabeled System" - ret+="\n-------------------" - ret+="\nFrame Numbers : %d"%self.get_nframes() - ret+="\nAtom Numbers : %d"%self.get_natoms() - ret+="\nElement List :" - ret+="\n-------------------" - ret+="\n"+" ".join(map(str,self.get_atom_names())) - ret+="\n"+" ".join(map(str,self.get_atom_numbs())) + ret = "Data Summary" + ret += "\nUnlabeled System" + ret += "\n-------------------" + ret += "\nFrame Numbers : %d" % self.get_nframes() + ret += "\nAtom Numbers : %d" % self.get_natoms() + ret += "\nElement List :" + ret += "\n-------------------" + ret += "\n" + " ".join(map(str, self.get_atom_names())) + ret += "\n" + " ".join(map(str, self.get_atom_numbs())) return ret def __getitem__(self, key): @@ -343,31 +372,28 @@ def __getitem__(self, key): return self.sub_system(key) return self.data[key] - def __len__(self) : + def __len__(self): """Returns number of frames in the system""" return self.get_nframes() + def __add__(self, others): + """magic method "+" operation""" + self_copy = self.copy() + if isinstance(others, System): + other_copy = others.copy() + self_copy.append(other_copy) + elif isinstance(others, list): + for ii in others: + assert isinstance(ii, System) + ii_copy = ii.copy() + self_copy.append(ii_copy) + else: + raise RuntimeError("Unspported data structure") + return self.__class__.from_dict({"data": self_copy.data}) - def __add__(self,others) : - """magic method "+" operation """ - self_copy=self.copy() - if isinstance(others,System): - other_copy=others.copy() - self_copy.append(other_copy) - elif isinstance(others, list): - for ii in others: - assert(isinstance(ii,System)) - ii_copy=ii.copy() - self_copy.append(ii_copy) - else: - raise RuntimeError("Unspported data structure") - return self.__class__.from_dict({'data':self_copy.data}) - - - def dump(self,filename,indent=4): - """dump .json or .yaml file """ - dumpfn(self.as_dict(),filename,indent=indent) - + def dump(self, filename, indent=4): + """dump .json or .yaml file""" + dumpfn(self.as_dict(), filename, indent=indent) def map_atom_types(self, type_map=None) -> np.ndarray: """ @@ -387,75 +413,70 @@ def map_atom_types(self, type_map=None) -> np.ndarray: new_atom_types : np.ndarray The mapped atom types """ - if isinstance(type_map,dict) or type_map is None: - pass - elif isinstance(type_map,list): - type_map=dict(zip(type_map,range(len(type_map)))) + if isinstance(type_map, dict) or type_map is None: + pass + elif isinstance(type_map, list): + type_map = dict(zip(type_map, range(len(type_map)))) else: - raise RuntimeError("Unknown format") + raise RuntimeError("Unknown format") if type_map is None: - type_map=elements_index_map(self.get_atom_names().copy(),standard=True) + type_map = elements_index_map(self.get_atom_names().copy(), standard=True) - _set1=set(self.get_atom_names()) - _set2=set(list(type_map.keys())) + _set1 = set(self.get_atom_names()) + _set2 = set(list(type_map.keys())) assert _set1.issubset(_set2) - atom_types_list=[] - for name, numb in zip(self.get_atom_names(), self.get_atom_numbs()): - atom_types_list.extend([name]*numb) + atom_types_list = [] + for name, numb in zip(self.get_atom_names(), self.get_atom_numbs()): + atom_types_list.extend([name] * numb) new_atom_types = np.array([type_map[ii] for ii in atom_types_list], dtype=int) return new_atom_types @staticmethod def load(filename): - """rebuild System obj. from .json or .yaml file """ + """rebuild System obj. from .json or .yaml file""" return loadfn(filename) def as_dict(self): """Returns data dict of System instance""" - d={"@module": self.__class__.__module__, - "@class": self.__class__.__name__, - "data": self.data - } + d = { + "@module": self.__class__.__module__, + "@class": self.__class__.__name__, + "data": self.data, + } return d - def get_atom_names(self): - """Returns name of atoms """ - return self.data['atom_names'] - + """Returns name of atoms""" + return self.data["atom_names"] def get_atom_types(self): - """Returns type of atoms """ - return self.data['atom_types'] - + """Returns type of atoms""" + return self.data["atom_types"] def get_atom_numbs(self): - """Returns number of atoms """ - return self.data['atom_numbs'] - + """Returns number of atoms""" + return self.data["atom_numbs"] - def get_nframes(self) : + def get_nframes(self): """Returns number of frames in the system""" - return len(self.data['cells']) + return len(self.data["cells"]) - - def get_natoms(self) : + def get_natoms(self): """Returns total number of atoms in the system""" - return len(self.data['atom_types']) + return len(self.data["atom_types"]) def get_ntypes(self) -> int: """Returns total number of atom types in the system.""" - return len(self.data['atom_names']) + return len(self.data["atom_names"]) def copy(self): - """Returns a copy of the system. """ - return self.__class__.from_dict({'data':deepcopy(self.data)}) - + """Returns a copy of the system.""" + return self.__class__.from_dict({"data": deepcopy(self.data)}) - def sub_system(self, f_idx) : + def sub_system(self, f_idx): """ Construct a subsystem from the system @@ -487,8 +508,7 @@ def sub_system(self, f_idx) : tmp.data[tt.name] = self.data[tt.name] return tmp - - def append(self, system) : + def append(self, system): """ Append a system to this system @@ -497,44 +517,49 @@ def append(self, system) : system : System The system to append """ - if not len(system.data['atom_numbs']): + if not len(system.data["atom_numbs"]): # skip if the system to append is non-converged return False - elif not len(self.data['atom_numbs']): + elif not len(self.data["atom_numbs"]): # this system is non-converged but the system to append is converged self.data = system.data return False if system.uniq_formula != self.uniq_formula: - raise RuntimeError('systems with inconsistent formula could not be append: %s v.s. %s' % (self.uniq_formula, system.uniq_formula)) - if system.data['atom_names'] != self.data['atom_names']: + raise RuntimeError( + "systems with inconsistent formula could not be append: %s v.s. %s" + % (self.uniq_formula, system.uniq_formula) + ) + if system.data["atom_names"] != self.data["atom_names"]: # allow to append a system with different atom_names order system.sort_atom_names() self.sort_atom_names() - if (system.data['atom_types'] != self.data['atom_types']).any(): + if (system.data["atom_types"] != self.data["atom_types"]).any(): # allow to append a system with different atom_types order system.sort_atom_types() self.sort_atom_types() - for ii in ['atom_numbs', 'atom_names'] : - assert(system.data[ii] == self.data[ii]) - for ii in ['atom_types','orig'] : - eq = [v1==v2 for v1,v2 in zip(system.data[ii], self.data[ii])] - assert(all(eq)) + for ii in ["atom_numbs", "atom_names"]: + assert system.data[ii] == self.data[ii] + for ii in ["atom_types", "orig"]: + eq = [v1 == v2 for v1, v2 in zip(system.data[ii], self.data[ii])] + assert all(eq) for tt in self.DTYPES: # check if the first shape is nframes if tt.shape is not None and Axis.NFRAMES in tt.shape: if tt.name not in self.data and tt.name in system.data: - raise RuntimeError('system has %s, but this does not' % tt.name) + raise RuntimeError("system has %s, but this does not" % tt.name) elif tt.name in self.data and tt.name not in system.data: - raise RuntimeError('this has %s, but system does not' % tt.name) + raise RuntimeError("this has %s, but system does not" % tt.name) elif tt.name not in self.data and tt.name not in system.data: # skip if both not exist continue # concat any data in nframes axis axis_nframes = tt.shape.index(Axis.NFRAMES) - self.data[tt.name] = np.concatenate((self.data[tt.name], system[tt.name]), axis=axis_nframes) + self.data[tt.name] = np.concatenate( + (self.data[tt.name], system[tt.name]), axis=axis_nframes + ) if self.nopbc and not system.nopbc: # appended system uses PBC, cancel nopbc - self.data['nopbc'] = False + self.data["nopbc"] = False return True def sort_atom_names(self, type_map=None): @@ -560,24 +585,24 @@ def check_type_map(self, type_map): type_map : list type_map """ - if type_map is not None and type_map != self.data['atom_names']: + if type_map is not None and type_map != self.data["atom_names"]: self.sort_atom_names(type_map=type_map) - def apply_type_map(self, type_map) : + def apply_type_map(self, type_map): if type_map is not None and type(type_map) is list: self.check_type_map(type_map) else: - raise RuntimeError('invalid type map, cannot be applied') + raise RuntimeError("invalid type map, cannot be applied") def sort_atom_types(self) -> np.ndarray: """Sort atom types. - + Returns ------- idx : np.ndarray new atom index in the Axis.NATOMS """ - idx = np.argsort(self.data['atom_types']) + idx = np.argsort(self.data["atom_types"]) for tt in self.DTYPES: if tt.name not in self.data: # skip optional data @@ -594,8 +619,14 @@ def formula(self): """ Return the formula of this system, like C3H5O2 """ - return ''.join(["{}{}".format(symbol,numb) for symbol,numb in - zip(self.data['atom_names'], self.data['atom_numbs'])]) + return "".join( + [ + "{}{}".format(symbol, numb) + for symbol, numb in zip( + self.data["atom_names"], self.data["atom_numbs"] + ) + ] + ) @property def uniq_formula(self): @@ -604,9 +635,14 @@ def uniq_formula(self): The uniq_formula sort the elements in formula by names. Systems with the same uniq_formula can be append together. """ - return ''.join(["{}{}".format(symbol,numb) for symbol,numb in sorted( - zip(self.data['atom_names'], self.data['atom_numbs']))]) - + return "".join( + [ + "{}{}".format(symbol, numb) + for symbol, numb in sorted( + zip(self.data["atom_names"], self.data["atom_numbs"]) + ) + ] + ) def extend(self, systems): """ @@ -621,18 +657,16 @@ def extend(self, systems): for system in systems: self.append(system.copy()) - - def apply_pbc(self) : + def apply_pbc(self): """ Append periodic boundary condition """ - ncoord = dpdata.md.pbc.dir_coord(self.data['coords'], self.data['cells']) + ncoord = dpdata.md.pbc.dir_coord(self.data["coords"], self.data["cells"]) ncoord = ncoord % 1 - self.data['coords'] = np.matmul(ncoord, self.data['cells']) - + self.data["coords"] = np.matmul(ncoord, self.data["cells"]) @post_funcs.register("remove_pbc") - def remove_pbc(self, protect_layer = 9): + def remove_pbc(self, protect_layer=9): """ This method does NOT delete the definition of the cells, it (1) revises the cell to a cubic cell and ensures that the cell @@ -645,47 +679,44 @@ def remove_pbc(self, protect_layer = 9): protect_layer : the protect layer between the atoms and the cell boundary """ - assert(protect_layer >= 0), "the protect_layer should be no less than 0" + assert protect_layer >= 0, "the protect_layer should be no less than 0" remove_pbc(self.data, protect_layer) - def affine_map(self, trans, f_idx = 0) : - assert(np.linalg.det(trans) != 0) - self.data['cells'][f_idx] = np.matmul(self.data['cells'][f_idx], trans) - self.data['coords'][f_idx] = np.matmul(self.data['coords'][f_idx], trans) - + def affine_map(self, trans, f_idx=0): + assert np.linalg.det(trans) != 0 + self.data["cells"][f_idx] = np.matmul(self.data["cells"][f_idx], trans) + self.data["coords"][f_idx] = np.matmul(self.data["coords"][f_idx], trans) @post_funcs.register("shift_orig_zero") - def _shift_orig_zero(self) : - for ff in self.data['coords'] : - for ii in ff : - ii = ii - self.data['orig'] - self.data['orig'] = self.data['orig'] - self.data['orig'] - assert((np.zeros([3]) == self.data['orig']).all()) + def _shift_orig_zero(self): + for ff in self.data["coords"]: + for ii in ff: + ii = ii - self.data["orig"] + self.data["orig"] = self.data["orig"] - self.data["orig"] + assert (np.zeros([3]) == self.data["orig"]).all() @post_funcs.register("rot_lower_triangular") - def rot_lower_triangular(self) : - for ii in range(self.get_nframes()) : + def rot_lower_triangular(self): + for ii in range(self.get_nframes()): self.rot_frame_lower_triangular(ii) - - def rot_frame_lower_triangular(self, f_idx = 0) : - qq, rr = np.linalg.qr(self.data['cells'][f_idx].T) - if np.linalg.det(qq) < 0 : + def rot_frame_lower_triangular(self, f_idx=0): + qq, rr = np.linalg.qr(self.data["cells"][f_idx].T) + if np.linalg.det(qq) < 0: qq = -qq rr = -rr - self.affine_map(qq, f_idx = f_idx) + self.affine_map(qq, f_idx=f_idx) rot = np.eye(3) - if self.data['cells'][f_idx][0][0] < 0 : + if self.data["cells"][f_idx][0][0] < 0: rot[0][0] = -1 - if self.data['cells'][f_idx][1][1] < 0 : + if self.data["cells"][f_idx][1][1] < 0: rot[1][1] = -1 - if self.data['cells'][f_idx][2][2] < 0 : + if self.data["cells"][f_idx][2][2] < 0: rot[2][2] = -1 - assert(np.linalg.det(rot) == 1) - self.affine_map(rot, f_idx = f_idx) + assert np.linalg.det(rot) == 1 + self.affine_map(rot, f_idx=f_idx) return np.matmul(qq, rot) - def add_atom_names(self, atom_names): """ Add atom_names that do not exist. @@ -711,71 +742,99 @@ def replicate(self, ncopy): tmp : System The system after replication. """ - if len(ncopy) !=3: - raise RuntimeError('ncopy must be a list or tuple with 3 int') + if len(ncopy) != 3: + raise RuntimeError("ncopy must be a list or tuple with 3 int") for ii in ncopy: if type(ii) is not int: - raise RuntimeError('ncopy must be a list or tuple must with 3 int') + raise RuntimeError("ncopy must be a list or tuple must with 3 int") tmp = System() nframes = self.get_nframes() data = self.data - tmp.data['atom_names'] = list(np.copy(data['atom_names'])) - tmp.data['atom_numbs'] = list(np.array(np.copy(data['atom_numbs'])) * np.prod(ncopy)) - tmp.data['atom_types'] = np.sort(np.tile(np.copy(data['atom_types']),np.prod(ncopy))) - tmp.data['cells'] = np.copy(data['cells']) + tmp.data["atom_names"] = list(np.copy(data["atom_names"])) + tmp.data["atom_numbs"] = list( + np.array(np.copy(data["atom_numbs"])) * np.prod(ncopy) + ) + tmp.data["atom_types"] = np.sort( + np.tile(np.copy(data["atom_types"]), np.prod(ncopy)) + ) + tmp.data["cells"] = np.copy(data["cells"]) for ii in range(3): - tmp.data['cells'][:,ii,:] *= ncopy[ii] - tmp.data['coords'] = np.tile(np.copy(data['coords']),tuple(ncopy)+(1,1,1)) + tmp.data["cells"][:, ii, :] *= ncopy[ii] + tmp.data["coords"] = np.tile(np.copy(data["coords"]), tuple(ncopy) + (1, 1, 1)) for xx in range(ncopy[0]): for yy in range(ncopy[1]): for zz in range(ncopy[2]): - tmp.data['coords'][xx,yy,zz,:,:,:] += xx * np.reshape(data['cells'][:,0,:], [-1,1,3])\ - + yy * np.reshape(data['cells'][:,1,:], [-1,1,3])\ - + zz * np.reshape(data['cells'][:,2,:], [-1,1,3]) - tmp.data['coords'] = np.reshape(np.transpose(tmp.data['coords'], [3,4,0,1,2,5]), (nframes, -1 , 3)) + tmp.data["coords"][xx, yy, zz, :, :, :] += ( + xx * np.reshape(data["cells"][:, 0, :], [-1, 1, 3]) + + yy * np.reshape(data["cells"][:, 1, :], [-1, 1, 3]) + + zz * np.reshape(data["cells"][:, 2, :], [-1, 1, 3]) + ) + tmp.data["coords"] = np.reshape( + np.transpose(tmp.data["coords"], [3, 4, 0, 1, 2, 5]), (nframes, -1, 3) + ) return tmp def replace(self, initial_atom_type, end_atom_type, replace_num): if type(self) is not dpdata.System: - raise RuntimeError('Must use method replace() of the instance of class dpdata.System') + raise RuntimeError( + "Must use method replace() of the instance of class dpdata.System" + ) if type(replace_num) is not int: - raise ValueError("replace_num must be a integer. Now is {replace_num}".format(replace_num=replace_num)) + raise ValueError( + "replace_num must be a integer. Now is {replace_num}".format( + replace_num=replace_num + ) + ) if replace_num <= 0: - raise ValueError("replace_num must be larger than 0.Now is {replace_num}".format(replace_num=replace_num)) + raise ValueError( + "replace_num must be larger than 0.Now is {replace_num}".format( + replace_num=replace_num + ) + ) try: - initial_atom_index = self.data['atom_names'].index(initial_atom_type) + initial_atom_index = self.data["atom_names"].index(initial_atom_type) except ValueError as e: - raise ValueError("atom_type {initial_atom_type} not in {atom_names}" - .format(initial_atom_type=initial_atom_type, atom_names=self.data['atom_names'])) - max_replace_num = self.data['atom_numbs'][initial_atom_index] + raise ValueError( + "atom_type {initial_atom_type} not in {atom_names}".format( + initial_atom_type=initial_atom_type, + atom_names=self.data["atom_names"], + ) + ) + max_replace_num = self.data["atom_numbs"][initial_atom_index] if replace_num > max_replace_num: - raise RuntimeError("not enough {initial_atom_type} atom, only {max_replace_num} available, less than {replace_num}.Please check." - .format(initial_atom_type=initial_atom_type,max_replace_num=max_replace_num, replace_num=replace_num)) + raise RuntimeError( + "not enough {initial_atom_type} atom, only {max_replace_num} available, less than {replace_num}.Please check.".format( + initial_atom_type=initial_atom_type, + max_replace_num=max_replace_num, + replace_num=replace_num, + ) + ) - may_replace_indices = [i for i, x in enumerate(self.data['atom_types']) if x == initial_atom_index] - to_replace_indices = np.random.choice(may_replace_indices, size=replace_num, replace=False) + may_replace_indices = [ + i for i, x in enumerate(self.data["atom_types"]) if x == initial_atom_index + ] + to_replace_indices = np.random.choice( + may_replace_indices, size=replace_num, replace=False + ) - if end_atom_type not in self.data['atom_names']: - self.data['atom_names'].append(end_atom_type) - self.data['atom_numbs'].append(0) + if end_atom_type not in self.data["atom_names"]: + self.data["atom_names"].append(end_atom_type) + self.data["atom_numbs"].append(0) - end_atom_index = self.data['atom_names'].index(end_atom_type) + end_atom_index = self.data["atom_names"].index(end_atom_type) for ii in to_replace_indices: - self.data['atom_types'][ii] = end_atom_index - self.data['atom_numbs'][initial_atom_index] -= replace_num - self.data['atom_numbs'][end_atom_index] += replace_num + self.data["atom_types"][ii] = end_atom_index + self.data["atom_numbs"][initial_atom_index] -= replace_num + self.data["atom_numbs"][end_atom_index] += replace_num self.sort_atom_types() - - def perturb(self, - pert_num, - cell_pert_fraction, - atom_pert_distance, - atom_pert_style='normal'): + def perturb( + self, pert_num, cell_pert_fraction, atom_pert_distance, atom_pert_style="normal" + ): """ Perturb each frame in the system randomly. The cell will be deformed randomly, and atoms will be displaced by a random distance in random direction. @@ -812,8 +871,8 @@ def perturb(self, """ if type(self) is not dpdata.System: raise RuntimeError( - f'Using method perturb() of an instance of {type(self)}. ' - f'Must use method perturb() of the instance of class dpdata.System.' + f"Using method perturb() of an instance of {type(self)}. " + f"Must use method perturb() of the instance of class dpdata.System." ) perturbed_system = System() nframes = self.get_nframes() @@ -821,11 +880,17 @@ def perturb(self, for jj in range(pert_num): tmp_system = self[ii].copy() cell_perturb_matrix = get_cell_perturb_matrix(cell_pert_fraction) - tmp_system.data['cells'][0] = np.matmul(tmp_system.data['cells'][0],cell_perturb_matrix) - tmp_system.data['coords'][0] = np.matmul(tmp_system.data['coords'][0],cell_perturb_matrix) - for kk in range(len(tmp_system.data['coords'][0])): - atom_perturb_vector = get_atom_perturb_vector(atom_pert_distance, atom_pert_style) - tmp_system.data['coords'][0][kk] += atom_perturb_vector + tmp_system.data["cells"][0] = np.matmul( + tmp_system.data["cells"][0], cell_perturb_matrix + ) + tmp_system.data["coords"][0] = np.matmul( + tmp_system.data["coords"][0], cell_perturb_matrix + ) + for kk in range(len(tmp_system.data["coords"][0])): + atom_perturb_vector = get_atom_perturb_vector( + atom_pert_distance, atom_pert_style + ) + tmp_system.data["coords"][0][kk] += atom_perturb_vector tmp_system.rot_lower_triangular() perturbed_system.append(tmp_system) return perturbed_system @@ -838,7 +903,7 @@ def nopbc(self): @nopbc.setter def nopbc(self, value): - self.data['nopbc'] = value + self.data["nopbc"] = value def shuffle(self): """Shuffle frames randomly.""" @@ -846,7 +911,7 @@ def shuffle(self): self.data = self.sub_system(idx).data return idx - def predict(self, *args: Any, driver: str="dp", **kwargs: Any) -> "LabeledSystem": + def predict(self, *args: Any, driver: str = "dp", **kwargs: Any) -> "LabeledSystem": """ Predict energies and forces by a driver. @@ -874,9 +939,11 @@ def predict(self, *args: Any, driver: str="dp", **kwargs: Any) -> "LabeledSystem data = driver.label(self.data.copy()) return LabeledSystem(data=data) - def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) -> "LabeledSystem": + def minimize( + self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any + ) -> "LabeledSystem": """Minimize the geometry. - + Parameters ---------- *args : iterable @@ -898,7 +965,7 @@ def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) def pick_atom_idx(self, idx, nopbc=None): """Pick atom index - + Parameters ---------- idx: int or list or slice @@ -924,8 +991,10 @@ def pick_atom_idx(self, idx, nopbc=None): new_shape[axis_natoms] = idx new_sys.data[tt.name] = self.data[tt.name][tuple(new_shape)] # recalculate atom_numbs according to atom_types - atom_numbs = np.bincount(new_sys.data['atom_types'], minlength=len(self.get_atom_names())) - new_sys.data['atom_numbs'] = list(atom_numbs) + atom_numbs = np.bincount( + new_sys.data["atom_types"], minlength=len(self.get_atom_names()) + ) + new_sys.data["atom_numbs"] = list(atom_numbs) if nopbc is True or nopbc is False: new_sys.nopbc = nopbc return new_sys @@ -933,30 +1002,32 @@ def pick_atom_idx(self, idx, nopbc=None): def remove_atom_names(self, atom_names): """Remove atom names and all such atoms. For example, you may not remove EP atoms in TIP4P/Ew water, which - is not a real atom. + is not a real atom. """ if isinstance(atom_names, str): atom_names = [atom_names] removed_atom_idx = [] for an in atom_names: # get atom name idx - idx = self.data['atom_names'].index(an) - atom_idx = self.data['atom_types'] == idx + idx = self.data["atom_names"].index(an) + atom_idx = self.data["atom_types"] == idx removed_atom_idx.append(atom_idx) picked_atom_idx = ~np.any(removed_atom_idx, axis=0) new_sys = self.pick_atom_idx(picked_atom_idx) # let's remove atom_names # firstly, rearrange atom_names and put these atom_names in the end - new_atom_names = list([xx for xx in new_sys.data['atom_names'] if xx not in atom_names]) + new_atom_names = list( + [xx for xx in new_sys.data["atom_names"] if xx not in atom_names] + ) new_sys.sort_atom_names(type_map=new_atom_names + atom_names) # remove atom_names and atom_numbs - new_sys.data['atom_names'] = new_atom_names - new_sys.data['atom_numbs'] = new_sys.data['atom_numbs'][:len(new_atom_names)] + new_sys.data["atom_names"] = new_atom_names + new_sys.data["atom_numbs"] = new_sys.data["atom_numbs"][: len(new_atom_names)] return new_sys def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): """Pick atoms by amber mask - + Parameters ---------- param: str or parmed.Structure @@ -964,7 +1035,7 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): maskstr: str Amber masks pass_coords: Boolen (default: False) - If pass_coords is true, the function will pass coordinates and + If pass_coords is true, the function will pass coordinates and return a MultiSystem. Otherwise, the result is coordinate-independent, and the function will return System or LabeledSystem. @@ -976,53 +1047,60 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): ms = MultiSystems() for sub_s in self: # TODO: this can computed in pararrel - idx = pick_by_amber_mask(parm, maskstr, sub_s['coords'][0]) + idx = pick_by_amber_mask(parm, maskstr, sub_s["coords"][0]) ms.append(sub_s.pick_atom_idx(idx, nopbc=nopbc)) return ms else: idx = pick_by_amber_mask(parm, maskstr) return self.pick_atom_idx(idx, nopbc=nopbc) + def get_cell_perturb_matrix(cell_pert_fraction): - if cell_pert_fraction<0: - raise RuntimeError('cell_pert_fraction can not be negative') + if cell_pert_fraction < 0: + raise RuntimeError("cell_pert_fraction can not be negative") e0 = np.random.rand(6) - e = e0 * 2 *cell_pert_fraction - cell_pert_fraction + e = e0 * 2 * cell_pert_fraction - cell_pert_fraction cell_pert_matrix = np.array( - [[1+e[0], 0.5 * e[5], 0.5 * e[4]], - [0.5 * e[5], 1+e[1], 0.5 * e[3]], - [0.5 * e[4], 0.5 * e[3], 1+e[2]]] + [ + [1 + e[0], 0.5 * e[5], 0.5 * e[4]], + [0.5 * e[5], 1 + e[1], 0.5 * e[3]], + [0.5 * e[4], 0.5 * e[3], 1 + e[2]], + ] ) return cell_pert_matrix -def get_atom_perturb_vector(atom_pert_distance, atom_pert_style='normal'): + +def get_atom_perturb_vector(atom_pert_distance, atom_pert_style="normal"): random_vector = None if atom_pert_distance < 0: - raise RuntimeError('atom_pert_distance can not be negative') + raise RuntimeError("atom_pert_distance can not be negative") - if atom_pert_style == 'normal': + if atom_pert_style == "normal": e = np.random.randn(3) - random_vector=(atom_pert_distance/np.sqrt(3))*e - elif atom_pert_style == 'uniform': + random_vector = (atom_pert_distance / np.sqrt(3)) * e + elif atom_pert_style == "uniform": e = np.random.randn(3) while np.linalg.norm(e) < 0.1: e = np.random.randn(3) - random_unit_vector = e/np.linalg.norm(e) + random_unit_vector = e / np.linalg.norm(e) v0 = np.random.rand(1) - v = np.power(v0,1/3) - random_vector = atom_pert_distance*v*random_unit_vector - elif atom_pert_style == 'const' : + v = np.power(v0, 1 / 3) + random_vector = atom_pert_distance * v * random_unit_vector + elif atom_pert_style == "const": e = np.random.randn(3) while np.linalg.norm(e) < 0.1: e = np.random.randn(3) - random_unit_vector = e/np.linalg.norm(e) - random_vector = atom_pert_distance*random_unit_vector + random_unit_vector = e / np.linalg.norm(e) + random_vector = atom_pert_distance * random_unit_vector else: - raise RuntimeError('unsupported options atom_pert_style={}'.format(atom_pert_style)) + raise RuntimeError( + "unsupported options atom_pert_style={}".format(atom_pert_style) + ) return random_vector -class LabeledSystem (System): - ''' + +class LabeledSystem(System): + """ The labeled data System For example, a labeled water system named `d_example` has two molecules (6 atoms) and `nframes` frames. The labels can be accessed by @@ -1063,7 +1141,7 @@ class LabeledSystem (System): The beginning frame when loading MD trajectory. step : int The number of skipped frames when loading MD trajectory. - ''' + """ DTYPES = System.DTYPES + ( DataType("energies", np.ndarray, (Axis.NFRAMES,)), @@ -1083,7 +1161,7 @@ def from_fmt_obj(self, fmtobj, file_name, **kwargs): else: self.data = {**self.data, **data} self.check_data() - if hasattr(fmtobj.from_labeled_system, 'post_func'): + if hasattr(fmtobj.from_labeled_system, "post_func"): for post_f in fmtobj.from_labeled_system.post_func: self.post_funcs.get_plugin(post_f)(self) return self @@ -1092,47 +1170,49 @@ def to_fmt_obj(self, fmtobj, *args, **kwargs): return fmtobj.to_labeled_system(self.data, *args, **kwargs) def __str__(self): - ret="Data Summary" - ret+="\nLabeled System" - ret+="\n-------------------" - ret+="\nFrame Numbers : %d"%self.get_nframes() - ret+="\nAtom Numbers : %d"%self.get_natoms() - status= "Yes" if self.has_virial() else "No" - ret+="\nIncluding Virials : %s"% status - ret+="\nElement List :" - ret+="\n-------------------" - ret+="\n"+" ".join(map(str,self.get_atom_names())) - ret+="\n"+" ".join(map(str,self.get_atom_numbs())) + ret = "Data Summary" + ret += "\nLabeled System" + ret += "\n-------------------" + ret += "\nFrame Numbers : %d" % self.get_nframes() + ret += "\nAtom Numbers : %d" % self.get_natoms() + status = "Yes" if self.has_virial() else "No" + ret += "\nIncluding Virials : %s" % status + ret += "\nElement List :" + ret += "\n-------------------" + ret += "\n" + " ".join(map(str, self.get_atom_names())) + ret += "\n" + " ".join(map(str, self.get_atom_numbs())) return ret - def __add__(self,others) : - """magic method "+" operation """ - self_copy=self.copy() - if isinstance(others,LabeledSystem): - other_copy=others.copy() - self_copy.append(other_copy) - elif isinstance(others, list): - for ii in others: - assert(isinstance(ii,LabeledSystem)) - ii_copy=ii.copy() - self_copy.append(ii_copy) - else: - raise RuntimeError("Unspported data structure") - return self.__class__.from_dict({'data':self_copy.data}) - - def has_virial(self) : + def __add__(self, others): + """magic method "+" operation""" + self_copy = self.copy() + if isinstance(others, LabeledSystem): + other_copy = others.copy() + self_copy.append(other_copy) + elif isinstance(others, list): + for ii in others: + assert isinstance(ii, LabeledSystem) + ii_copy = ii.copy() + self_copy.append(ii_copy) + else: + raise RuntimeError("Unspported data structure") + return self.__class__.from_dict({"data": self_copy.data}) + + def has_virial(self): # return ('virials' in self.data) and (len(self.data['virials']) > 0) - return ('virials' in self.data) + return "virials" in self.data - def affine_map_fv(self, trans, f_idx) : - assert(np.linalg.det(trans) != 0) - self.data['forces'][f_idx] = np.matmul(self.data['forces'][f_idx], trans) + def affine_map_fv(self, trans, f_idx): + assert np.linalg.det(trans) != 0 + self.data["forces"][f_idx] = np.matmul(self.data["forces"][f_idx], trans) if self.has_virial(): - self.data['virials'][f_idx] = np.matmul(trans.T, np.matmul(self.data['virials'][f_idx], trans)) + self.data["virials"][f_idx] = np.matmul( + trans.T, np.matmul(self.data["virials"][f_idx], trans) + ) - def rot_frame_lower_triangular(self, f_idx = 0) : - trans = System.rot_frame_lower_triangular(self, f_idx = f_idx) - self.affine_map_fv(trans, f_idx = f_idx) + def rot_frame_lower_triangular(self, f_idx=0): + trans = System.rot_frame_lower_triangular(self, f_idx=f_idx) + self.affine_map_fv(trans, f_idx=f_idx) return trans def correction(self, hl_sys): @@ -1156,17 +1236,19 @@ def correction(self, hl_sys): if not isinstance(hl_sys, LabeledSystem): raise RuntimeError("high_sys should be LabeledSystem") corrected_sys = self.copy() - corrected_sys.data['energies'] = hl_sys.data['energies'] - self.data['energies'] - corrected_sys.data['forces'] = hl_sys.data['forces'] - self.data['forces'] - if 'virials' in self.data and 'virials' in hl_sys.data: - corrected_sys.data['virials'] = hl_sys.data['virials'] - self.data['virials'] + corrected_sys.data["energies"] = hl_sys.data["energies"] - self.data["energies"] + corrected_sys.data["forces"] = hl_sys.data["forces"] - self.data["forces"] + if "virials" in self.data and "virials" in hl_sys.data: + corrected_sys.data["virials"] = ( + hl_sys.data["virials"] - self.data["virials"] + ) return corrected_sys class MultiSystems: - '''A set containing several systems.''' + """A set containing several systems.""" - def __init__(self, *systems,type_map=None): + def __init__(self, *systems, type_map=None): """ Parameters ---------- @@ -1193,18 +1275,21 @@ def from_fmt_obj(self, fmtobj, directory, labeled=True, **kwargs): return self def to_fmt_obj(self, fmtobj, directory, *args, **kwargs): - for fn, ss in zip(fmtobj.to_multi_systems(self.systems.keys(), directory, **kwargs), self.systems.values()): + for fn, ss in zip( + fmtobj.to_multi_systems(self.systems.keys(), directory, **kwargs), + self.systems.values(), + ): ss.to_fmt_obj(fmtobj, fn, *args, **kwargs) return self - + def to(self, fmt: str, *args, **kwargs) -> "MultiSystems": """Dump systems to the specific format. - + Parameters ---------- fmt : str format - + Returns ------- MultiSystems @@ -1225,41 +1310,46 @@ def __repr__(self): return self.__str__() def __str__(self): - return 'MultiSystems ({} systems containing {} frames)'.format(len(self.systems), self.get_nframes()) - - def __add__(self, others) : - """magic method "+" operation """ - self_copy = deepcopy(self) - if isinstance(others, System) or isinstance(others, MultiSystems): - return self.__class__(self, others) - elif isinstance(others, list): - return self.__class__(self, *others) - raise RuntimeError("Unspported data structure") + return "MultiSystems ({} systems containing {} frames)".format( + len(self.systems), self.get_nframes() + ) + + def __add__(self, others): + """magic method "+" operation""" + self_copy = deepcopy(self) + if isinstance(others, System) or isinstance(others, MultiSystems): + return self.__class__(self, others) + elif isinstance(others, list): + return self.__class__(self, *others) + raise RuntimeError("Unspported data structure") @classmethod - def from_file(cls,file_name,fmt, **kwargs): + def from_file(cls, file_name, fmt, **kwargs): multi_systems = cls() - multi_systems.load_systems_from_file(file_name=file_name,fmt=fmt, **kwargs) + multi_systems.load_systems_from_file(file_name=file_name, fmt=fmt, **kwargs) return multi_systems @classmethod - def from_dir(cls,dir_name, file_name, fmt='auto', type_map=None): + def from_dir(cls, dir_name, file_name, fmt="auto", type_map=None): multi_systems = cls() - target_file_list = sorted(glob.glob('./{}/**/{}'.format(dir_name, file_name), recursive=True)) + target_file_list = sorted( + glob.glob("./{}/**/{}".format(dir_name, file_name), recursive=True) + ) for target_file in target_file_list: - multi_systems.append(LabeledSystem(file_name=target_file, fmt=fmt, type_map=type_map)) + multi_systems.append( + LabeledSystem(file_name=target_file, fmt=fmt, type_map=type_map) + ) return multi_systems - def load_systems_from_file(self, file_name=None, fmt=None, **kwargs): fmt = fmt.lower() return self.from_fmt_obj(load_format(fmt), file_name, **kwargs) - def get_nframes(self) : + def get_nframes(self): """Returns number of frames in all systems""" return sum(len(system) for system in self.systems.values()) - def append(self, *systems) : + def append(self, *systems): """ Append systems or MultiSystems to systems @@ -1335,7 +1425,9 @@ def predict(self, *args: Any, driver="dp", **kwargs: Any) -> "MultiSystems": new_multisystems.append(ss.predict(*args, driver=driver, **kwargs)) return new_multisystems - def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) -> "MultiSystems": + def minimize( + self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any + ) -> "MultiSystems": """ Minimize geometry by a minimizer. @@ -1367,10 +1459,10 @@ def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) for ss in self: new_multisystems.append(ss.minimize(*args, minimizer=minimizer, **kwargs)) return new_multisystems - + def pick_atom_idx(self, idx, nopbc=None): """Pick atom index - + Parameters ---------- idx: int or list or slice @@ -1429,12 +1521,12 @@ def correction(self, hl_sys: "MultiSystems"): def get_cls_name(cls: object) -> str: """Returns the fully qualified name of a class, such as `np.ndarray`. - + Parameters ---------- cls : object the class - + Returns ------- str @@ -1442,6 +1534,7 @@ def get_cls_name(cls: object) -> str: """ return ".".join([cls.__module__, cls.__name__]) + def add_format_methods(): """Add format methods to System, LabeledSystem, and MultiSystems. @@ -1459,11 +1552,15 @@ def add_format_methods(): Format.register_to(to_func_name)(formatcls) for method, formatcls in Format.get_from_methods().items(): + def get_func(ff): # ff is not initized when defining from_format so cannot be polluted def from_format(self, file_name, **kwargs): return self.from_fmt_obj(ff(), file_name, **kwargs) - from_format.__doc__ = "Read data from :class:`%s` format." % (get_cls_name(ff)) + + from_format.__doc__ = "Read data from :class:`%s` format." % ( + get_cls_name(ff) + ) return from_format setattr(System, method, get_func(formatcls)) @@ -1471,9 +1568,11 @@ def from_format(self, file_name, **kwargs): setattr(MultiSystems, method, get_func(formatcls)) for method, formatcls in Format.get_to_methods().items(): + def get_func(ff): def to_format(self, *args, **kwargs): return self.to_fmt_obj(ff(), *args, **kwargs) + to_format.__doc__ = "Dump data to :class:`%s` format." % (get_cls_name(ff)) return to_format @@ -1481,4 +1580,5 @@ def to_format(self, *args, **kwargs): setattr(LabeledSystem, method, get_func(formatcls)) setattr(MultiSystems, method, get_func(formatcls)) + add_format_methods() diff --git a/dpdata/unit.py b/dpdata/unit.py index 0c612256..a1dc1c7b 100644 --- a/dpdata/unit.py +++ b/dpdata/unit.py @@ -1,11 +1,11 @@ from abc import ABC from scipy import constants -AVOGADRO = constants.Avogadro # Avagadro constant -ELE_CHG = constants.elementary_charge # Elementary Charge, in C -BOHR = constants.value("atomic unit of length") # Bohr, in m -HARTREE = constants.value("atomic unit of energy") # Hartree, in Jole -RYDBERG = constants.Rydberg * constants.h * constants.c # Rydberg, in Jole +AVOGADRO = constants.Avogadro # Avagadro constant +ELE_CHG = constants.elementary_charge # Elementary Charge, in C +BOHR = constants.value("atomic unit of length") # Bohr, in m +HARTREE = constants.value("atomic unit of energy") # Hartree, in Jole +RYDBERG = constants.Rydberg * constants.h * constants.c # Rydberg, in Jole # energy conversions econvs = { @@ -15,17 +15,18 @@ "kcal_mol": 1 / (ELE_CHG * AVOGADRO / 1000 / 4.184), "rydberg": RYDBERG / ELE_CHG, "J": 1 / ELE_CHG, - "kJ": 1000 / ELE_CHG + "kJ": 1000 / ELE_CHG, } # length conversions lconvs = { "angstrom": 1.0, - "bohr": BOHR * 1E10, + "bohr": BOHR * 1e10, "nm": 10.0, - "m": 1E10, + "m": 1e10, } + def check_unit(unit): if unit not in econvs.keys() and unit not in lconvs.keys(): try: @@ -38,6 +39,7 @@ def check_unit(unit): except Exception: raise RuntimeError(f"Invalid unit: {unit}") + class Conversion(ABC): def __init__(self, unitA, unitB, check=True): """ @@ -48,7 +50,7 @@ def __init__(self, unitA, unitB, check=True): unitA : str, unit to be converted unitB : str, unit which unitA is converted to, i.e. `1 unitA = self._value unitB` check : bool, whether to check unit validity - + Examples -------- >>> conv = Conversion("foo", "bar", check=False) @@ -64,19 +66,20 @@ def __init__(self, unitA, unitB, check=True): self.unitA = unitA self.unitB = unitB self._value = 0.0 - + def value(self): return self._value - + def set_value(self, value): self._value = value - + def __repr__(self): return f"1 {self.unitA} = {self._value} {self.unitB}" - + def __str__(self): return self.__repr__() + class EnergyConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -91,6 +94,7 @@ def __init__(self, unitA, unitB): super().__init__(unitA, unitB) self.set_value(econvs[unitA] / econvs[unitB]) + class LengthConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -105,6 +109,7 @@ def __init__(self, unitA, unitB): super().__init__(unitA, unitB) self.set_value(lconvs[unitA] / lconvs[unitB]) + class ForceConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -125,6 +130,7 @@ def __init__(self, unitA, unitB): lconv = LengthConversion(unitA.split("/")[1], unitB.split("/")[1]).value() self.set_value(econv / lconv) + class PressureConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -148,18 +154,18 @@ def __init__(self, unitA, unitB): econv = EnergyConversion(eunitA, eunitB).value() * factorA / factorB lconv = LengthConversion(lunitA, lunitB).value() self.set_value(econv / lconv**3) - + def _convert_unit(self, unit): if unit == "Pa" or unit == "pa": return "J/m^3", 1 elif unit == "kPa" or unit == "kpa": return "kJ/m^3", 1 elif unit == "GPa" or unit == "Gpa": - return "kJ/m^3", 1E6 + return "kJ/m^3", 1e6 elif unit == "bar": - return "J/m^3", 1E5 + return "J/m^3", 1e5 elif unit == "kbar": - return "kJ/m^3", 1E5 + return "kJ/m^3", 1e5 else: return unit, 1 diff --git a/dpdata/utils.py b/dpdata/utils.py index d0ccb26b..90fef137 100644 --- a/dpdata/utils.py +++ b/dpdata/utils.py @@ -1,38 +1,44 @@ import numpy as np from dpdata.periodic_table import Element -def elements_index_map(elements,standard=False,inverse=False): + +def elements_index_map(elements, standard=False, inverse=False): if standard: elements.sort(key=lambda x: Element(x).Z) if inverse: - return dict(zip(range(len(elements)),elements)) + return dict(zip(range(len(elements)), elements)) else: - return dict(zip(elements,range(len(elements)))) + return dict(zip(elements, range(len(elements)))) + + # %% -def remove_pbc(system, protect_layer = 9): + +def remove_pbc(system, protect_layer=9): nframes = len(system["coords"]) - natoms = len(system['coords'][0]) + natoms = len(system["coords"][0]) for ff in range(nframes): - tmpcoord = system['coords'][ff] - cog = np.average(tmpcoord, axis = 0) + tmpcoord = system["coords"][ff] + cog = np.average(tmpcoord, axis=0) dist = tmpcoord - np.tile(cog, [natoms, 1]) - max_dist = np.max(np.linalg.norm(dist, axis = 1)) + max_dist = np.max(np.linalg.norm(dist, axis=1)) h_cell_size = max_dist + protect_layer cell_size = h_cell_size * 2 - shift = np.array([1,1,1]) * h_cell_size - cog - system['coords'][ff] = system['coords'][ff] + np.tile(shift, [natoms, 1]) - system['cells'][ff] = cell_size * np.eye(3) + shift = np.array([1, 1, 1]) * h_cell_size - cog + system["coords"][ff] = system["coords"][ff] + np.tile(shift, [natoms, 1]) + system["cells"][ff] = cell_size * np.eye(3) return system + def add_atom_names(data, atom_names): """ Add atom_names that do not exist. """ - data['atom_names'].extend(atom_names) - data['atom_numbs'].extend([0 for _ in atom_names]) + data["atom_names"].extend(atom_names) + data["atom_numbs"].extend([0 for _ in atom_names]) return data + def sort_atom_names(data, type_map=None): """ Sort atom_names of the system and reorder atom_numbs and atom_types accoarding @@ -47,7 +53,7 @@ def sort_atom_names(data, type_map=None): if type_map is not None: # assign atom_names index to the specify order # atom_names must be a subset of type_map - assert (set(data['atom_names']).issubset(set(type_map))) + assert set(data["atom_names"]).issubset(set(type_map)) # for the condition that type_map is a proper superset of atom_names # new_atoms = set(type_map) - set(data["atom_names"]) new_atoms = [e for e in type_map if e not in data["atom_names"]] @@ -57,16 +63,17 @@ def sort_atom_names(data, type_map=None): # a[as[a]] == b[as[b]] as == argsort # as[as[b]] == as^{-1}[b] # a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id] - idx = np.argsort(data['atom_names'])[np.argsort(np.argsort(type_map))] + idx = np.argsort(data["atom_names"])[np.argsort(np.argsort(type_map))] else: # index that will sort an array by alphabetical order - idx = np.argsort(data['atom_names']) + idx = np.argsort(data["atom_names"]) # sort atom_names, atom_numbs, atom_types by idx - data['atom_names'] = list(np.array(data['atom_names'])[idx]) - data['atom_numbs'] = list(np.array(data['atom_numbs'])[idx]) - data['atom_types'] = np.argsort(idx)[data['atom_types']] + data["atom_names"] = list(np.array(data["atom_names"])[idx]) + data["atom_numbs"] = list(np.array(data["atom_numbs"])[idx]) + data["atom_types"] = np.argsort(idx)[data["atom_types"]] return data + def uniq_atom_names(data): """ Make the atom names uniq. For example @@ -80,12 +87,14 @@ def uniq_atom_names(data): """ unames = [] uidxmap = [] - for idx,ii in enumerate(data['atom_names']): + for idx, ii in enumerate(data["atom_names"]): if ii not in unames: unames.append(ii) uidxmap.append(unames.index(ii)) - data['atom_names'] = unames - tmp_type = list(data['atom_types']).copy() - data['atom_types'] = np.array([uidxmap[jj] for jj in tmp_type], dtype=int) - data['atom_numbs'] = [sum( ii == data['atom_types'] ) for ii in range(len(data['atom_names'])) ] + data["atom_names"] = unames + tmp_type = list(data["atom_types"]).copy() + data["atom_types"] = np.array([uidxmap[jj] for jj in tmp_type], dtype=int) + data["atom_numbs"] = [ + sum(ii == data["atom_types"]) for ii in range(len(data["atom_names"])) + ] return data diff --git a/dpdata/vasp/outcar.py b/dpdata/vasp/outcar.py index 3e32a146..ec26a181 100644 --- a/dpdata/vasp/outcar.py +++ b/dpdata/vasp/outcar.py @@ -2,103 +2,122 @@ import re import warnings -def system_info(lines, type_idx_zero = False): + +def system_info(lines, type_idx_zero=False): atom_names = [] atom_numbs = None nelm = None for ii in lines: - ii_word_list=ii.split() - if 'TITEL' in ii : + ii_word_list = ii.split() + if "TITEL" in ii: # get atom names from POTCAR info, tested only for PAW_PBE ... - _ii=ii.split()[3] - if '_' in _ii: + _ii = ii.split()[3] + if "_" in _ii: # for case like : TITEL = PAW_PBE Sn_d 06Sep2000 - atom_names.append(_ii.split('_')[0]) + atom_names.append(_ii.split("_")[0]) else: atom_names.append(_ii) - #a stricker check for "NELM"; compatible with distingct formats in different versions(6 and older, newers_expect-to-work) of vasp + # a stricker check for "NELM"; compatible with distingct formats in different versions(6 and older, newers_expect-to-work) of vasp elif nelm is None: - m = re.search(r'NELM\s*=\s*(\d+)', ii) + m = re.search(r"NELM\s*=\s*(\d+)", ii) if m: nelm = int(m.group(1)) - if 'ions per type' in ii : + if "ions per type" in ii: atom_numbs_ = [int(s) for s in ii.split()[4:]] - if atom_numbs is None : + if atom_numbs is None: atom_numbs = atom_numbs_ - else : - assert (atom_numbs == atom_numbs_), "in consistent numb atoms in OUTCAR" - assert(nelm is not None), "cannot find maximum steps for each SC iteration" - assert(atom_numbs is not None), "cannot find ion type info in OUTCAR" - atom_names = atom_names[:len(atom_numbs)] + else: + assert atom_numbs == atom_numbs_, "in consistent numb atoms in OUTCAR" + assert nelm is not None, "cannot find maximum steps for each SC iteration" + assert atom_numbs is not None, "cannot find ion type info in OUTCAR" + atom_names = atom_names[: len(atom_numbs)] atom_types = [] - for idx,ii in enumerate(atom_numbs): - for jj in range(ii) : - if type_idx_zero : + for idx, ii in enumerate(atom_numbs): + for jj in range(ii): + if type_idx_zero: atom_types.append(idx) - else : - atom_types.append(idx+1) - return atom_names, atom_numbs, np.array(atom_types, dtype = int), nelm + else: + atom_types.append(idx + 1) + return atom_names, atom_numbs, np.array(atom_types, dtype=int), nelm -def get_outcar_block(fp, ml = False): +def get_outcar_block(fp, ml=False): blk = [] - energy_token = ['free energy TOTEN', 'free energy ML TOTEN'] + energy_token = ["free energy TOTEN", "free energy ML TOTEN"] ml_index = int(ml) - for ii in fp : - if not ii : + for ii in fp: + if not ii: return blk - blk.append(ii.rstrip('\n')) + blk.append(ii.rstrip("\n")) if energy_token[ml_index] in ii: return blk return blk + # we assume that the force is printed ... -def get_frames(fname, begin = 0, step = 1, ml = False, convergence_check=True): +def get_frames(fname, begin=0, step=1, ml=False, convergence_check=True): fp = open(fname) blk = get_outcar_block(fp) - atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero = True) + atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero=True) ntot = sum(atom_numbs) all_coords = [] all_cells = [] all_energies = [] all_forces = [] - all_virials = [] + all_virials = [] cc = 0 rec_failed = [] - while len(blk) > 0 : - if cc >= begin and (cc - begin) % step == 0 : - coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm, ml) + while len(blk) > 0: + if cc >= begin and (cc - begin) % step == 0: + coord, cell, energy, force, virial, is_converge = analyze_block( + blk, ntot, nelm, ml + ) if len(coord) == 0: break - if is_converge or not convergence_check: + if is_converge or not convergence_check: all_coords.append(coord) all_cells.append(cell) all_energies.append(energy) all_forces.append(force) - if virial is not None : + if virial is not None: all_virials.append(virial) if not is_converge: - rec_failed.append(cc+1) + rec_failed.append(cc + 1) blk = get_outcar_block(fp, ml) cc += 1 - - if len(rec_failed) > 0 : - prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." - warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) - - if len(all_virials) == 0 : + + if len(rec_failed) > 0: + prt = ( + "so they are not collected." + if convergence_check + else "but they are still collected due to the requirement for ignoring convergence checks." + ) + warnings.warn( + f"The following structures were unconverged: {rec_failed}; " + prt + ) + + if len(all_virials) == 0: all_virials = None - else : + else: all_virials = np.array(all_virials) fp.close() - return atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), np.array(all_energies), np.array(all_forces), all_virials + return ( + atom_names, + atom_numbs, + atom_types, + np.array(all_cells), + np.array(all_coords), + np.array(all_energies), + np.array(all_forces), + all_virials, + ) -def analyze_block(lines, ntot, nelm, ml = False): +def analyze_block(lines, ntot, nelm, ml=False): coord = [] cell = [] energy = None @@ -106,36 +125,39 @@ def analyze_block(lines, ntot, nelm, ml = False): virial = None is_converge = True sc_index = 0 - #select different searching tokens based on the ml label - energy_token = ['free energy TOTEN', 'free energy ML TOTEN'] + # select different searching tokens based on the ml label + energy_token = ["free energy TOTEN", "free energy ML TOTEN"] energy_index = [4, 5] - virial_token = ['FORCE on cell =-STRESS in cart. coord. units', 'ML FORCE'] + virial_token = ["FORCE on cell =-STRESS in cart. coord. units", "ML FORCE"] virial_index = [14, 4] - cell_token = ['VOLUME and BASIS', 'ML FORCE'] + cell_token = ["VOLUME and BASIS", "ML FORCE"] cell_index = [5, 12] ml_index = int(ml) - for idx,ii in enumerate(lines): - #if set ml == True, is_converged will always be True - if ('Iteration' in ii) and (not ml): + for idx, ii in enumerate(lines): + # if set ml == True, is_converged will always be True + if ("Iteration" in ii) and (not ml): sc_index = int(ii.split()[3][:-1]) if sc_index >= nelm: is_converge = False elif energy_token[ml_index] in ii: energy = float(ii.split()[energy_index[ml_index]]) - assert((force is not None) and len(coord) > 0 and len(cell) > 0) + assert (force is not None) and len(coord) > 0 and len(cell) > 0 return coord, cell, energy, force, virial, is_converge elif cell_token[ml_index] in ii: - for dd in range(3) : - tmp_l = lines[idx+cell_index[ml_index]+dd] - cell.append([float(ss) - for ss in tmp_l.replace('-',' -').split()[0:3]]) + for dd in range(3): + tmp_l = lines[idx + cell_index[ml_index] + dd] + cell.append([float(ss) for ss in tmp_l.replace("-", " -").split()[0:3]]) elif virial_token[ml_index] in ii: in_kB_index = virial_index[ml_index] - while idx+in_kB_index < len(lines) and (not lines[idx+in_kB_index].split()[0:2] == ["in", "kB"]) : + while idx + in_kB_index < len(lines) and ( + not lines[idx + in_kB_index].split()[0:2] == ["in", "kB"] + ): in_kB_index += 1 - assert(idx+in_kB_index < len(lines)),'ERROR: "in kB" is not found in OUTCAR. Unable to extract virial.' - tmp_v = [float(ss) for ss in lines[idx+in_kB_index].split()[2:8]] - virial = np.zeros([3,3]) + assert idx + in_kB_index < len( + lines + ), 'ERROR: "in kB" is not found in OUTCAR. Unable to extract virial.' + tmp_v = [float(ss) for ss in lines[idx + in_kB_index].split()[2:8]] + virial = np.zeros([3, 3]) virial[0][0] = tmp_v[0] virial[1][1] = tmp_v[1] virial[2][2] = tmp_v[2] @@ -145,8 +167,8 @@ def analyze_block(lines, ntot, nelm, ml = False): virial[2][1] = tmp_v[4] virial[0][2] = tmp_v[5] virial[2][0] = tmp_v[5] - elif 'TOTAL-FORCE' in ii and (("ML" in ii) == ml): - for jj in range(idx+2, idx+2+ntot) : + elif "TOTAL-FORCE" in ii and (("ML" in ii) == ml): + for jj in range(idx + 2, idx + 2 + ntot): tmp_l = lines[jj] info = [float(ss) for ss in tmp_l.split()] coord.append(info[:3]) diff --git a/dpdata/vasp/poscar.py b/dpdata/vasp/poscar.py index 53d93782..1100eb85 100644 --- a/dpdata/vasp/poscar.py +++ b/dpdata/vasp/poscar.py @@ -1,86 +1,90 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 import numpy as np -def _to_system_data_lower(lines, cartesian = True) : - ''' + +def _to_system_data_lower(lines, cartesian=True): + """ Treat as cartesian poscar - ''' + """ system = {} - system['atom_names'] = [str(ii) for ii in lines[5].split()] - system['atom_numbs'] = [int(ii) for ii in lines[6].split()] + system["atom_names"] = [str(ii) for ii in lines[5].split()] + system["atom_numbs"] = [int(ii) for ii in lines[6].split()] scale = float(lines[1]) cell = [] - for ii in range(2,5) : + for ii in range(2, 5): boxv = [float(jj) for jj in lines[ii].split()] boxv = np.array(boxv) * scale cell.append(boxv) - system['cells'] = [np.array(cell)] - natoms = sum(system['atom_numbs']) + system["cells"] = [np.array(cell)] + natoms = sum(system["atom_numbs"]) coord = [] - for ii in range(8, 8+natoms) : + for ii in range(8, 8 + natoms): tmpv = [float(jj) for jj in lines[ii].split()[:3]] - if cartesian : + if cartesian: tmpv = np.array(tmpv) * scale - else : - tmpv = np.matmul(np.array(tmpv), system['cells'][0]) + else: + tmpv = np.matmul(np.array(tmpv), system["cells"][0]) coord.append(tmpv) - system['coords'] = [np.array(coord)] - system['orig'] = np.zeros(3) + system["coords"] = [np.array(coord)] + system["orig"] = np.zeros(3) atom_types = [] - for idx,ii in enumerate(system['atom_numbs']) : - for jj in range(ii) : + for idx, ii in enumerate(system["atom_numbs"]): + for jj in range(ii): atom_types.append(idx) - system['atom_types'] = np.array(atom_types, dtype = int) - system['cells'] = np.array(system['cells']) - system['coords'] = np.array(system['coords']) + system["atom_types"] = np.array(atom_types, dtype=int) + system["cells"] = np.array(system["cells"]) + system["coords"] = np.array(system["coords"]) return system -def to_system_data(lines) : +def to_system_data(lines): # remove the line that has 'selective dynamics' - if lines[7][0] == 'S' or lines[7][0] == 's' : + if lines[7][0] == "S" or lines[7][0] == "s": lines.pop(7) - is_cartesian = (lines[7][0] in ['C', 'c', 'K', 'k']) - if not is_cartesian : - if not (lines[7][0] in ['d', 'D']) : - raise RuntimeError('seem not to be a valid POSCAR of vasp 5.x, may be a POSCAR of vasp 4.x?') + is_cartesian = lines[7][0] in ["C", "c", "K", "k"] + if not is_cartesian: + if not (lines[7][0] in ["d", "D"]): + raise RuntimeError( + "seem not to be a valid POSCAR of vasp 5.x, may be a POSCAR of vasp 4.x?" + ) return _to_system_data_lower(lines, is_cartesian) -def from_system_data(system, f_idx = 0, skip_zeros = True) : - ret = '' - for ii,name in zip(system['atom_numbs'], system['atom_names']) : - if ii == 0: continue - ret += '%s%d ' % (name, ii) - ret += '\n' - ret += '1.0\n' - for ii in system['cells'][f_idx] : - for jj in ii : - ret += '%.16e ' % jj - ret += '\n' - for idx,ii in enumerate(system['atom_names']) : - if system['atom_numbs'][idx] == 0: continue - ret += '%s ' % ii - ret += '\n' - for ii in system['atom_numbs'] : - if ii == 0: continue - ret += '%d ' % ii - ret += '\n' +def from_system_data(system, f_idx=0, skip_zeros=True): + ret = "" + for ii, name in zip(system["atom_numbs"], system["atom_names"]): + if ii == 0: + continue + ret += "%s%d " % (name, ii) + ret += "\n" + ret += "1.0\n" + for ii in system["cells"][f_idx]: + for jj in ii: + ret += "%.16e " % jj + ret += "\n" + for idx, ii in enumerate(system["atom_names"]): + if system["atom_numbs"][idx] == 0: + continue + ret += "%s " % ii + ret += "\n" + for ii in system["atom_numbs"]: + if ii == 0: + continue + ret += "%d " % ii + ret += "\n" # should use Cartesian for VESTA software - ret += 'Cartesian\n' - atype = system['atom_types'] - posis = system['coords'][f_idx] + ret += "Cartesian\n" + atype = system["atom_types"] + posis = system["coords"][f_idx] # atype_idx = [[idx,tt] for idx,tt in enumerate(atype)] # sort_idx = np.argsort(atype, kind = 'mergesort') sort_idx = np.lexsort((np.arange(len(atype)), atype)) atype = atype[sort_idx] posis = posis[sort_idx] posi_list = [] - for ii in posis : - posi_list.append('%15.10f %15.10f %15.10f' % \ - (ii[0], ii[1], ii[2]) - ) - posi_list.append('') - ret += '\n'.join(posi_list) + for ii in posis: + posi_list.append("%15.10f %15.10f %15.10f" % (ii[0], ii[1], ii[2])) + posi_list.append("") + ret += "\n".join(posi_list) return ret diff --git a/dpdata/vasp/xml.py b/dpdata/vasp/xml.py index d5e1fb03..f87b5716 100755 --- a/dpdata/vasp/xml.py +++ b/dpdata/vasp/xml.py @@ -3,75 +3,89 @@ import xml.etree.ElementTree as ET import numpy as np -def check_name(item, name) : - assert (item.attrib['name'] == name), "item attrib '%s' dose not math required '%s'" % (item.attrib['name'], name) -def get_varray(varray) : +def check_name(item, name): + assert ( + item.attrib["name"] == name + ), "item attrib '%s' dose not math required '%s'" % (item.attrib["name"], name) + + +def get_varray(varray): array = [] - for vv in varray.findall('v') : - array.append([ float(ii) for ii in vv.text.split()]) + for vv in varray.findall("v"): + array.append([float(ii) for ii in vv.text.split()]) return np.array(array) -def analyze_atominfo(atominfo_xml) : - check_name(atominfo_xml.find('array'), 'atoms') + +def analyze_atominfo(atominfo_xml): + check_name(atominfo_xml.find("array"), "atoms") eles = [] types = [] - for ii in atominfo_xml.find('array').find('set') : - eles .append((ii.findall('c')[0].text.strip())) - types.append(int(ii.findall('c')[1].text)) + for ii in atominfo_xml.find("array").find("set"): + eles.append((ii.findall("c")[0].text.strip())) + types.append(int(ii.findall("c")[1].text)) uniq_ele = [] - for ii in eles : - if not(ii in uniq_ele): + for ii in eles: + if not (ii in uniq_ele): uniq_ele.append(ii) return uniq_ele, types -def analyze_calculation(cc) : - structure_xml = cc.find('structure') - check_name(structure_xml.find('crystal').find('varray'), 'basis') - check_name(structure_xml.find('varray'), 'positions') - cell = get_varray(structure_xml.find('crystal').find('varray')) - posi = get_varray(structure_xml.find('varray')) + +def analyze_calculation(cc): + structure_xml = cc.find("structure") + check_name(structure_xml.find("crystal").find("varray"), "basis") + check_name(structure_xml.find("varray"), "positions") + cell = get_varray(structure_xml.find("crystal").find("varray")) + posi = get_varray(structure_xml.find("varray")) strs = None - for vv in cc.findall('varray') : - if vv.attrib['name'] == 'forces' : - forc = get_varray(vv) - elif vv.attrib['name'] == 'stress' : + for vv in cc.findall("varray"): + if vv.attrib["name"] == "forces": + forc = get_varray(vv) + elif vv.attrib["name"] == "stress": strs = get_varray(vv) - for ii in cc.find('energy').findall('i') : - if ii.attrib['name'] == 'e_fr_energy' : + for ii in cc.find("energy").findall("i"): + if ii.attrib["name"] == "e_fr_energy": ener = float(ii.text) # print(ener) # return 'a' return posi, cell, ener, forc, strs -def formulate_config(eles, types, posi, cell, ener, forc, strs_) : + +def formulate_config(eles, types, posi, cell, ener, forc, strs_): strs = strs_ / 1602 natoms = len(types) - ntypes = len(eles) + ntypes = len(eles) ret = "" - ret += "#N %d %d\n" % (natoms, ntypes-1) + ret += "#N %d %d\n" % (natoms, ntypes - 1) ret += "#C " - for ii in eles : - ret += ' ' + ii - ret += '\n' + for ii in eles: + ret += " " + ii + ret += "\n" ret += "##\n" - ret += '#X %13.8f %13.8f %13.8f\n' % (cell[0][0], cell[0][1], cell[0][2]) - ret += '#Y %13.8f %13.8f %13.8f\n' % (cell[1][0], cell[1][1], cell[1][2]) - ret += '#Z %13.8f %13.8f %13.8f\n' % (cell[2][0], cell[2][1], cell[2][2]) + ret += "#X %13.8f %13.8f %13.8f\n" % (cell[0][0], cell[0][1], cell[0][2]) + ret += "#Y %13.8f %13.8f %13.8f\n" % (cell[1][0], cell[1][1], cell[1][2]) + ret += "#Z %13.8f %13.8f %13.8f\n" % (cell[2][0], cell[2][1], cell[2][2]) ret += "#W 1.0\n" ret += "#E %.10f\n" % (ener / natoms) - ret += '#S %.9e %.9e %.9e %.9e %.9e %.9e\n' % \ - (strs[0][0], strs[1][1], strs[2][2], strs[0][1], strs[1][2], strs[0][2]) - ret += '#F\n' - for ii in range(natoms) : + ret += "#S %.9e %.9e %.9e %.9e %.9e %.9e\n" % ( + strs[0][0], + strs[1][1], + strs[2][2], + strs[0][1], + strs[1][2], + strs[0][2], + ) + ret += "#F\n" + for ii in range(natoms): sp = np.matmul(cell.T, posi[ii]) - ret += '%d' % (types[ii]-1) - ret += ' %12.6f %12.6f %12.6f' % (sp[0], sp[1], sp[2]) - ret += ' %12.6f %12.6f %12.6f' % (forc[ii][0], forc[ii][1], forc[ii][2]) - ret += '\n' + ret += "%d" % (types[ii] - 1) + ret += " %12.6f %12.6f %12.6f" % (sp[0], sp[1], sp[2]) + ret += " %12.6f %12.6f %12.6f" % (forc[ii][0], forc[ii][1], forc[ii][2]) + ret += "\n" return ret -def analyze (fname, type_idx_zero = False, begin = 0, step = 1) : + +def analyze(fname, type_idx_zero=False, begin=0, step=1): """ can deal with broken xml file """ @@ -83,22 +97,37 @@ def analyze (fname, type_idx_zero = False, begin = 0, step = 1) : cc = 0 try: for event, elem in ET.iterparse(fname): - if elem.tag == 'atominfo' : + if elem.tag == "atominfo": eles, types = analyze_atominfo(elem) - types = np.array(types, dtype = int) - if type_idx_zero : + types = np.array(types, dtype=int) + if type_idx_zero: types = types - 1 - if elem.tag == 'calculation' : + if elem.tag == "calculation": posi, cell, ener, forc, strs = analyze_calculation(elem) - if cc >= begin and (cc - begin) % step == 0 : + if cc >= begin and (cc - begin) % step == 0: all_posi.append(posi) all_cell.append(cell) all_ener.append(ener) all_forc.append(forc) - if strs is not None : - all_strs.append(strs) + if strs is not None: + all_strs.append(strs) cc += 1 except ET.ParseError: - return eles, types, np.array(all_cell), np.array(all_posi), np.array(all_ener), np.array(all_forc), np.array(all_strs) - return eles, types, np.array(all_cell), np.array(all_posi), np.array(all_ener), np.array(all_forc), np.array(all_strs) - + return ( + eles, + types, + np.array(all_cell), + np.array(all_posi), + np.array(all_ener), + np.array(all_forc), + np.array(all_strs), + ) + return ( + eles, + types, + np.array(all_cell), + np.array(all_posi), + np.array(all_ener), + np.array(all_forc), + np.array(all_strs), + ) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index e902958a..ea2d9a77 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -1,24 +1,30 @@ #!/usr/bin/env python3 -#%% +#%% import numpy as np from collections import OrderedDict -import re +import re + + class QuipGapxyzSystems(object): """ - deal with QuipGapxyzFile + deal with QuipGapxyzFile """ + def __init__(self, file_name): - self.file_object = open(file_name, 'r') + self.file_object = open(file_name, "r") self.block_generator = self.get_block_generator() + def __iter__(self): return self + def __next__(self): return self.handle_single_xyz_frame(next(self.block_generator)) + def __del__(self): self.file_object.close() - + def get_block_generator(self): - p3 = re.compile(r'^\s*(\d+)\s*') + p3 = re.compile(r"^\s*(\d+)\s*") while True: line = self.file_object.readline() if not line: @@ -27,28 +33,45 @@ def get_block_generator(self): atom_num = int(p3.match(line).group(1)) lines = [] lines.append(line) - for ii in range(atom_num+1): + for ii in range(atom_num + 1): lines.append(self.file_object.readline()) if not lines[-1]: - raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines)) + raise RuntimeError( + "this xyz file may lack of lines, should be {};lines:{}".format( + atom_num + 2, lines + ) + ) yield lines - + @staticmethod def handle_single_xyz_frame(lines): - atom_num = int(lines[0].strip('\n').strip()) + atom_num = int(lines[0].strip("\n").strip()) if len(lines) != atom_num + 2: - raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines))) - data_format_line = lines[1].strip('\n').strip()+str(' ') - field_value_pattern= re.compile(r'(?P\S+)=(?P[\'\"]?)(?P.*?)(?P=quote)\s+') - prop_pattern = re.compile(r'(?P\w+?):(?P[a-zA-Z]):(?P\d+)') + raise RuntimeError( + "format error, atom_num=={}, {}!=atom_num+2".format( + atom_num, len(lines) + ) + ) + data_format_line = lines[1].strip("\n").strip() + str(" ") + field_value_pattern = re.compile( + r"(?P\S+)=(?P[\'\"]?)(?P.*?)(?P=quote)\s+" + ) + prop_pattern = re.compile( + r"(?P\w+?):(?P[a-zA-Z]):(?P\d+)" + ) - data_format_list= [kv_dict.groupdict() for kv_dict in field_value_pattern.finditer(data_format_line)] + data_format_list = [ + kv_dict.groupdict() + for kv_dict in field_value_pattern.finditer(data_format_line) + ] field_dict = {} for item in data_format_list: - field_dict[item['key']]=item['value'] + field_dict[item["key"]] = item["value"] - Properties = field_dict['Properties'] - prop_list = [kv_dict.groupdict() for kv_dict in prop_pattern.finditer(Properties)] + Properties = field_dict["Properties"] + prop_list = [ + kv_dict.groupdict() for kv_dict in prop_pattern.finditer(Properties) + ] data_lines = [] for line in lines[2:]: @@ -60,38 +83,58 @@ def handle_single_xyz_frame(lines): coords_array = None Z_array = None force_array = None - virials = None + virials = None for kv_dict in prop_list: - if kv_dict['key'] == 'species': - if kv_dict['datatype'] != 'S': - raise RuntimeError("datatype for species must be 'S' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - type_array = data_array[:,used_colomn:used_colomn+field_length].flatten() + if kv_dict["key"] == "species": + if kv_dict["datatype"] != "S": + raise RuntimeError( + "datatype for species must be 'S' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + type_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() used_colomn += field_length continue - elif kv_dict['key'] == 'pos': - if kv_dict['datatype'] != 'R': - raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - coords_array = data_array[:,used_colomn:used_colomn+field_length] + elif kv_dict["key"] == "pos": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + coords_array = data_array[:, used_colomn : used_colomn + field_length] used_colomn += field_length continue - elif kv_dict['key'] == 'Z': - if kv_dict['datatype'] != 'I': - raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - Z_array = data_array[:,used_colomn:used_colomn+field_length].flatten() + elif kv_dict["key"] == "Z": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + Z_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() used_colomn += field_length continue - elif kv_dict['key'] == 'force': - if kv_dict['datatype'] != 'R': - raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - force_array = data_array[:,used_colomn:used_colomn+field_length] + elif kv_dict["key"] == "force": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + force_array = data_array[:, used_colomn : used_colomn + field_length] used_colomn += field_length continue else: - raise RuntimeError("unknown field {}".format(kv_dict['key'])) + raise RuntimeError("unknown field {}".format(kv_dict["key"])) type_num_dict = OrderedDict() atom_type_list = [] @@ -111,23 +154,35 @@ def handle_single_xyz_frame(lines): atom_type_list.append(temp_atom_index) type_num_dict[ii] += 1 type_num_list = [] - for atom_type,atom_num in type_num_dict.items(): - type_num_list.append((atom_type,atom_num)) + for atom_type, atom_num in type_num_dict.items(): + type_num_list.append((atom_type, atom_num)) type_num_array = np.array(type_num_list) - if field_dict.get('virial', None): - virials = np.array([np.array(list(filter(bool,field_dict['virial'].split(' ')))).reshape(3,3)]).astype('float32') + if field_dict.get("virial", None): + virials = np.array( + [ + np.array( + list(filter(bool, field_dict["virial"].split(" "))) + ).reshape(3, 3) + ] + ).astype("float32") else: virials = None info_dict = {} - info_dict['atom_names'] = list(type_num_array[:,0]) - info_dict['atom_numbs'] = list(type_num_array[:,1].astype(int)) - info_dict['atom_types'] = np.array(atom_type_list).astype(int) - info_dict['cells'] = np.array([np.array(list(filter(bool,field_dict['Lattice'].split(' ')))).reshape(3,3)]).astype('float32') - info_dict['coords'] = np.array([coords_array]).astype('float32') - info_dict['energies'] = np.array([field_dict['energy']]).astype('float32') - info_dict['forces'] = np.array([force_array]).astype('float32') + info_dict["atom_names"] = list(type_num_array[:, 0]) + info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int)) + info_dict["atom_types"] = np.array(atom_type_list).astype(int) + info_dict["cells"] = np.array( + [ + np.array(list(filter(bool, field_dict["Lattice"].split(" ")))).reshape( + 3, 3 + ) + ] + ).astype("float32") + info_dict["coords"] = np.array([coords_array]).astype("float32") + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: - info_dict['virials'] = virials - info_dict['orig'] = np.zeros(3) + info_dict["virials"] = virials + info_dict["orig"] = np.zeros(3) return info_dict diff --git a/dpdata/xyz/xyz.py b/dpdata/xyz/xyz.py index 0ca5ac31..a28bafa0 100644 --- a/dpdata/xyz/xyz.py +++ b/dpdata/xyz/xyz.py @@ -2,21 +2,22 @@ import numpy as np -def coord_to_xyz(coord: np.ndarray, types: list)->str: + +def coord_to_xyz(coord: np.ndarray, types: list) -> str: """Convert coordinates and types to xyz format. - + Parameters ---------- coord: np.ndarray coordinates, Nx3 array types: list list of types - + Returns ------- str xyz format string - + Examples -------- >>> coord_to_xyz(np.ones((1,3)), ["C"]) @@ -24,7 +25,7 @@ def coord_to_xyz(coord: np.ndarray, types: list)->str: C 1.000000 1.000000 1.000000 """ - buff = [str(len(types)), ''] + buff = [str(len(types)), ""] for at, cc in zip(types, coord): buff.append("{} {:.6f} {:.6f} {:.6f}".format(at, *cc)) return "\n".join(buff) @@ -47,7 +48,7 @@ def xyz_to_coord(xyz: str) -> Tuple[np.ndarray, list]: """ symbols = [] coords = [] - for ii, line in enumerate(xyz.split('\n')): + for ii, line in enumerate(xyz.split("\n")): if ii == 0: natoms = int(line.strip()) elif 2 <= ii <= 1 + natoms: @@ -56,4 +57,3 @@ def xyz_to_coord(xyz: str) -> Tuple[np.ndarray, list]: coords.append((float(x), float(y), float(z))) symbols.append(symbol) return np.array(coords), symbols - diff --git a/plugin_example/README.md b/plugin_example/README.md index 10aadf04..322756f9 100644 --- a/plugin_example/README.md +++ b/plugin_example/README.md @@ -21,4 +21,4 @@ Element List : ------------------- X 20 -``` \ No newline at end of file +``` diff --git a/plugin_example/dpdata_random/__init__.py b/plugin_example/dpdata_random/__init__.py index b9ce840d..8e1450c9 100644 --- a/plugin_example/dpdata_random/__init__.py +++ b/plugin_example/dpdata_random/__init__.py @@ -1,24 +1,31 @@ from dpdata.format import Format import numpy as np + @Format.register("random") class RandomFormat(Format): def from_system(self, N, **kwargs): return { "atom_numbs": [20], - "atom_names": ['X'], + "atom_names": ["X"], "atom_types": [0] * 20, - "cells": np.repeat(np.diag(np.diag(np.ones((3, 3))))[np.newaxis,...], N, axis=0) * 100., - "coords": np.random.rand(N, 20, 3) * 100., + "cells": np.repeat( + np.diag(np.diag(np.ones((3, 3))))[np.newaxis, ...], N, axis=0 + ) + * 100.0, + "coords": np.random.rand(N, 20, 3) * 100.0, } def from_labeled_system(self, N, **kwargs): return { "atom_numbs": [20], - "atom_names": ['X'], + "atom_names": ["X"], "atom_types": [0] * 20, - "cells": np.repeat(np.diag(np.diag(np.ones((3, 3))))[np.newaxis,...], N, axis=0) * 100., - "coords": np.random.rand(N, 20, 3) * 100., - "energies": np.random.rand(N) * 100., - "forces": np.random.rand(N, 20, 3) * 100., - } \ No newline at end of file + "cells": np.repeat( + np.diag(np.diag(np.ones((3, 3))))[np.newaxis, ...], N, axis=0 + ) + * 100.0, + "coords": np.random.rand(N, 20, 3) * 100.0, + "energies": np.random.rand(N) * 100.0, + "forces": np.random.rand(N, 20, 3) * 100.0, + } diff --git a/requirements.txt b/requirements.txt index 2a302c7e..1c7c2293 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ monty==2.0.4 pymatgen==2019.7.2 - diff --git a/tests/comp_sys.py b/tests/comp_sys.py index 94c3e52a..6618cdee 100644 --- a/tests/comp_sys.py +++ b/tests/comp_sys.py @@ -1,55 +1,61 @@ import numpy as np -class CompSys : - + +class CompSys: def test_len_func(self): - self.assertEqual(len(self.system_1),len(self.system_2)) + self.assertEqual(len(self.system_1), len(self.system_2)) def test_add_func(self): - self.assertEqual(len(self.system_1+self.system_1), - len(self.system_2+self.system_2)) + self.assertEqual( + len(self.system_1 + self.system_1), len(self.system_2 + self.system_2) + ) def test_atom_numbs(self): - self.assertEqual(self.system_1.data['atom_numbs'], - self.system_2.data['atom_numbs']) + self.assertEqual( + self.system_1.data["atom_numbs"], self.system_2.data["atom_numbs"] + ) def test_atom_names(self): - self.assertEqual(self.system_1.data['atom_names'], - self.system_2.data['atom_names']) + self.assertEqual( + self.system_1.data["atom_names"], self.system_2.data["atom_names"] + ) def test_atom_types(self): - np.testing.assert_array_equal(self.system_1.data['atom_types'], - self.system_2.data['atom_types']) + np.testing.assert_array_equal( + self.system_1.data["atom_types"], self.system_2.data["atom_types"] + ) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system_1.data['orig'][d0], - self.system_2.data['orig'][d0]) + for d0 in range(3): + self.assertEqual( + self.system_1.data["orig"][d0], self.system_2.data["orig"][d0] + ) def test_nframs(self): - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) def test_cell(self): - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) if not self.system_1.nopbc and not self.system_2.nopbc: - np.testing.assert_almost_equal(self.system_1.data['cells'], - self.system_2.data['cells'], - decimal = self.places, - err_msg = 'cell failed') - - def test_coord(self): - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) + np.testing.assert_almost_equal( + self.system_1.data["cells"], + self.system_2.data["cells"], + decimal=self.places, + err_msg="cell failed", + ) + + def test_coord(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) # think about direct coord - tmp_cell = self.system_1.data['cells'] + tmp_cell = self.system_1.data["cells"] tmp_cell = np.reshape(tmp_cell, [-1, 3]) - tmp_cell_norm = np.reshape(np.linalg.norm(tmp_cell, axis = 1), [-1, 1, 3]) - np.testing.assert_almost_equal(self.system_1.data['coords'] / tmp_cell_norm, - self.system_2.data['coords'] / tmp_cell_norm, - decimal = self.places, - err_msg = 'coord failed') + tmp_cell_norm = np.reshape(np.linalg.norm(tmp_cell, axis=1), [-1, 1, 3]) + np.testing.assert_almost_equal( + self.system_1.data["coords"] / tmp_cell_norm, + self.system_2.data["coords"] / tmp_cell_norm, + decimal=self.places, + err_msg="coord failed", + ) def test_nopbc(self): self.assertEqual(self.system_1.nopbc, self.system_2.nopbc) @@ -59,56 +65,60 @@ def test_data_check(self): self.system_2.check_data() -class CompLabeledSys (CompSys) : - def test_energy(self) : - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) - np.testing.assert_almost_equal(self.system_1.data['energies'], - self.system_2.data['energies'], - decimal = self.e_places, - err_msg = 'energies failed') - - def test_force(self) : - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) - np.testing.assert_almost_equal(self.system_1.data['forces'], - self.system_2.data['forces'], - decimal = self.f_places, - err_msg = 'forces failed') - - def test_virial(self) : - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) +class CompLabeledSys(CompSys): + def test_energy(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) + np.testing.assert_almost_equal( + self.system_1.data["energies"], + self.system_2.data["energies"], + decimal=self.e_places, + err_msg="energies failed", + ) + + def test_force(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) + np.testing.assert_almost_equal( + self.system_1.data["forces"], + self.system_2.data["forces"], + decimal=self.f_places, + err_msg="forces failed", + ) + + def test_virial(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) # if len(self.system_1['virials']) == 0: # self.assertEqual(len(self.system_1['virials']), 0) # return - if not 'virials' in self.system_1: - self.assertFalse('virials' in self.system_2) + if not "virials" in self.system_1: + self.assertFalse("virials" in self.system_2) return - np.testing.assert_almost_equal(self.system_1['virials'], - self.system_2['virials'], - decimal = self.v_places, - err_msg = 'virials failed') + np.testing.assert_almost_equal( + self.system_1["virials"], + self.system_2["virials"], + decimal=self.v_places, + err_msg="virials failed", + ) class MultiSystems: def test_systems_name(self): self.assertEqual(set(self.systems.systems), set(self.system_names)) - + def test_systems_size(self): for name, size in self.system_sizes.items(): self.assertEqual(self.systems[name].get_nframes(), size) - + def test_atom_names(self): self.assertEqual(self.atom_names, self.systems.atom_names) -class IsPBC: +class IsPBC: def test_is_pbc(self): self.assertFalse(self.system_1.nopbc) self.assertFalse(self.system_2.nopbc) -class IsNoPBC: + +class IsNoPBC: def test_is_nopbc(self): self.assertTrue(self.system_1.nopbc) self.assertTrue(self.system_2.nopbc) diff --git a/tests/context.py b/tests/context.py index 6c828a1b..e305ff0f 100644 --- a/tests/context.py +++ b/tests/context.py @@ -1,5 +1,6 @@ -import sys,os -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import sys, os + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) import dpdata import dpdata.md.water import dpdata.md.msd diff --git a/tests/poscars/poscar_ref_oh.py b/tests/poscars/poscar_ref_oh.py index 9b12c151..f120183e 100644 --- a/tests/poscars/poscar_ref_oh.py +++ b/tests/poscars/poscar_ref_oh.py @@ -1,42 +1,50 @@ import numpy as np -class TestPOSCARoh : +class TestPOSCARoh: def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [1,1]) + self.assertEqual(self.system.data["atom_numbs"], [1, 1]) def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['O','H']) + self.assertEqual(self.system.data["atom_names"], ["O", "H"]) def test_atom_types(self): - self.assertEqual(self.system.data['atom_types'][0], 0) - self.assertEqual(self.system.data['atom_types'][1], 1) + self.assertEqual(self.system.data["atom_types"][0], 0) + self.assertEqual(self.system.data["atom_types"][1], 1) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system.data['orig'][d0], 0) + for d0 in range(3): + self.assertEqual(self.system.data["orig"][d0], 0) def test_cell(self): - ovito_cell = np.array([[2.5243712, 0.0000000, 0.0000000], - [1.2621856, 2.0430257, 0.0000000], - [1.2874292, 0.7485898, 2.2254033]]) - for ii in range(3) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['cells'][0][ii][jj], - ovito_cell[ii][jj], - places = 6, - msg = 'cell[%d][%d] failed' % (ii,jj)) + ovito_cell = np.array( + [ + [2.5243712, 0.0000000, 0.0000000], + [1.2621856, 2.0430257, 0.0000000], + [1.2874292, 0.7485898, 2.2254033], + ] + ) + for ii in range(3): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["cells"][0][ii][jj], + ovito_cell[ii][jj], + places=6, + msg="cell[%d][%d] failed" % (ii, jj), + ) def test_frame(self): if hasattr(self, "unwrap") and self.unwrap is True: - ovito_posis = np.array([[5.0739861, 2.7916155, 2.2254033], - [6.3361717, 3.4934183, 2.7767918]]) + ovito_posis = np.array( + [[5.0739861, 2.7916155, 2.2254033], [6.3361717, 3.4934183, 2.7767918]] + ) else: - ovito_posis = np.array([[0, 0, 0], - [1.2621856, 0.7018028, 0.5513885]]) - for ii in range(2) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['coords'][0][ii][jj], - ovito_posis[ii][jj], - places = 6, - msg = 'posis[%d][%d] failed' % (ii,jj)) + ovito_posis = np.array([[0, 0, 0], [1.2621856, 0.7018028, 0.5513885]]) + for ii in range(2): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["coords"][0][ii][jj], + ovito_posis[ii][jj], + places=6, + msg="posis[%d][%d] failed" % (ii, jj), + ) diff --git a/tests/poscars/test_lammps_dump_s_su.py b/tests/poscars/test_lammps_dump_s_su.py index 2370cffc..5e914ea5 100644 --- a/tests/poscars/test_lammps_dump_s_su.py +++ b/tests/poscars/test_lammps_dump_s_su.py @@ -2,25 +2,26 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf_s_su.dump'), - type_map = ['O', 'H']) - + def setUp(self): + self.system = dpdata.System( + os.path.join("poscars", "conf_s_su.dump"), type_map=["O", "H"] + ) + + class TestDump2(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.tmp_system = dpdata.System(os.path.join('poscars', 'conf_s_su.dump'), - type_map = ['O', 'H']) + def setUp(self): + self.tmp_system = dpdata.System( + os.path.join("poscars", "conf_s_su.dump"), type_map=["O", "H"] + ) self.system = self.tmp_system.sub_system([1]) - def test_nframes (self) : + def test_nframes(self): self.assertEqual(self.tmp_system.get_nframes(), 2) - - -if __name__ == '__main__': + + +if __name__ == "__main__": unittest.main() - diff --git a/tests/pwmat/config_ref_ch4.py b/tests/pwmat/config_ref_ch4.py index 6fd65864..71aef7fe 100644 --- a/tests/pwmat/config_ref_ch4.py +++ b/tests/pwmat/config_ref_ch4.py @@ -1,44 +1,59 @@ import numpy as np -class Testconfigch4 : +class Testconfigch4: def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [4,1]) + self.assertEqual(self.system.data["atom_numbs"], [4, 1]) def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['H','C']) + self.assertEqual(self.system.data["atom_names"], ["H", "C"]) def test_atom_types(self): - self.assertEqual(self.system.data['atom_types'][0], 0) - self.assertEqual(self.system.data['atom_types'][1], 0) - self.assertEqual(self.system.data['atom_types'][2], 0) - self.assertEqual(self.system.data['atom_types'][3], 0) - self.assertEqual(self.system.data['atom_types'][4], 1) + self.assertEqual(self.system.data["atom_types"][0], 0) + self.assertEqual(self.system.data["atom_types"][1], 0) + self.assertEqual(self.system.data["atom_types"][2], 0) + self.assertEqual(self.system.data["atom_types"][3], 0) + self.assertEqual(self.system.data["atom_types"][4], 1) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system.data['orig'][d0], 0) + for d0 in range(3): + self.assertEqual(self.system.data["orig"][d0], 0) def test_cell(self): - ovito_cell = np.array([[10.000000, 0.0000000, 0.0000000], - [0.0000000, 10.000000, 0.0000000], - [0.0000000, 0.0000000, 10.000000]]) - for ii in range(3) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['cells'][0][ii][jj], - ovito_cell[ii][jj], - places = 6, - msg = 'cell[%d][%d] failed' % (ii,jj)) + ovito_cell = np.array( + [ + [10.000000, 0.0000000, 0.0000000], + [0.0000000, 10.000000, 0.0000000], + [0.0000000, 0.0000000, 10.000000], + ] + ) + for ii in range(3): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["cells"][0][ii][jj], + ovito_cell[ii][jj], + places=6, + msg="cell[%d][%d] failed" % (ii, jj), + ) - def test_frame(self): - ovito_posis = np.array([[0.53815434, 0.40686080, 0.36057301], - [0.39453966, 0.48032057, 0.43846884], - [0.55209243, 0.56545029, 0.44270874], - [0.52818530, 0.41641476, 0.53918266], - [0.50325059, 0.46725516, 0.44523234]])*10 - for ii in range(2) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['coords'][0][ii][jj], - ovito_posis[ii][jj], - places = 6, - msg = 'posis[%d][%d] failed' % (ii,jj)) + def test_frame(self): + ovito_posis = ( + np.array( + [ + [0.53815434, 0.40686080, 0.36057301], + [0.39453966, 0.48032057, 0.43846884], + [0.55209243, 0.56545029, 0.44270874], + [0.52818530, 0.41641476, 0.53918266], + [0.50325059, 0.46725516, 0.44523234], + ] + ) + * 10 + ) + for ii in range(2): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["coords"][0][ii][jj], + ovito_posis[ii][jj], + places=6, + msg="posis[%d][%d] failed" % (ii, jj), + ) diff --git a/tests/pwmat/config_ref_oh.py b/tests/pwmat/config_ref_oh.py index 7ce36791..6f3e0561 100644 --- a/tests/pwmat/config_ref_oh.py +++ b/tests/pwmat/config_ref_oh.py @@ -1,37 +1,45 @@ import numpy as np -class Testconfigoh : +class Testconfigoh: def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [1,1]) + self.assertEqual(self.system.data["atom_numbs"], [1, 1]) def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['H','O']) + self.assertEqual(self.system.data["atom_names"], ["H", "O"]) def test_atom_types(self): - self.assertEqual(self.system.data['atom_types'][0], 0) - self.assertEqual(self.system.data['atom_types'][1], 1) + self.assertEqual(self.system.data["atom_types"][0], 0) + self.assertEqual(self.system.data["atom_types"][1], 1) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system.data['orig'][d0], 0) + for d0 in range(3): + self.assertEqual(self.system.data["orig"][d0], 0) def test_cell(self): - ovito_cell = np.array([[2.5243712, 0.0000000, 0.0000000], - [1.2621856, 2.0430257, 0.0000000], - [1.2874292, 0.7485898, 2.2254033]]) - for ii in range(3) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['cells'][0][ii][jj], - ovito_cell[ii][jj], - places = 6, - msg = 'cell[%d][%d] failed' % (ii,jj)) + ovito_cell = np.array( + [ + [2.5243712, 0.0000000, 0.0000000], + [1.2621856, 2.0430257, 0.0000000], + [1.2874292, 0.7485898, 2.2254033], + ] + ) + for ii in range(3): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["cells"][0][ii][jj], + ovito_cell[ii][jj], + places=6, + msg="cell[%d][%d] failed" % (ii, jj), + ) - def test_frame(self): - ovito_posis = np.array([[1.2621856, 0.7018028, 0.5513885],[0, 0, 0]]) - for ii in range(2) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['coords'][0][ii][jj], - ovito_posis[ii][jj], - places = 6, - msg = 'posis[%d][%d] failed' % (ii,jj)) + def test_frame(self): + ovito_posis = np.array([[1.2621856, 0.7018028, 0.5513885], [0, 0, 0]]) + for ii in range(2): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["coords"][0][ii][jj], + ovito_posis[ii][jj], + places=6, + msg="posis[%d][%d] failed" % (ii, jj), + ) diff --git a/tests/test_abacus_md.py b/tests/test_abacus_md.py index 3621242d..89df93b6 100644 --- a/tests/test_abacus_md.py +++ b/tests/test_abacus_md.py @@ -6,147 +6,203 @@ bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSMD: - def test_atom_names(self) : - self.assertEqual(self.system_water.data['atom_names'], ['H', 'O']) - self.assertEqual(self.system_Si.data['atom_names'], ['Si']) - self.assertEqual(self.system_water_unconv.data['atom_names'], ['H', 'O']) +class TestABACUSMD: + def test_atom_names(self): + self.assertEqual(self.system_water.data["atom_names"], ["H", "O"]) + self.assertEqual(self.system_Si.data["atom_names"], ["Si"]) + self.assertEqual(self.system_water_unconv.data["atom_names"], ["H", "O"]) - def test_atom_numbs(self) : - self.assertEqual(self.system_water.data['atom_numbs'], [2, 1]) - self.assertEqual(self.system_Si.data['atom_numbs'], [2]) - self.assertEqual(self.system_water_unconv.data['atom_numbs'], [2, 1]) + def test_atom_numbs(self): + self.assertEqual(self.system_water.data["atom_numbs"], [2, 1]) + self.assertEqual(self.system_Si.data["atom_numbs"], [2]) + self.assertEqual(self.system_water_unconv.data["atom_numbs"], [2, 1]) - def test_atom_types(self) : + def test_atom_types(self): ref_type = [0, 0, 1] - ref_type = np.array(ref_type) + ref_type = np.array(ref_type) ref_type2 = np.array([0, 0]) - ref_type3 = np.array([0,0,1]) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_water.data['atom_types'][ii], ref_type[ii]) - for ii in range(ref_type2.shape[0]) : - self.assertEqual(self.system_Si.data['atom_types'][ii], ref_type2[ii]) - for ii in range(ref_type3.shape[0]) : - self.assertEqual(self.system_water_unconv.data['atom_types'][ii], ref_type3[ii]) - - def test_cell(self) : + ref_type3 = np.array([0, 0, 1]) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_water.data["atom_types"][ii], ref_type[ii]) + for ii in range(ref_type2.shape[0]): + self.assertEqual(self.system_Si.data["atom_types"][ii], ref_type2[ii]) + for ii in range(ref_type3.shape[0]): + self.assertEqual( + self.system_water_unconv.data["atom_types"][ii], ref_type3[ii] + ) + + def test_cell(self): cell = bohr2ang * 28 * np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) cell2 = bohr2ang * 5.1 * np.array([[1, 1, 0], [1, 0, 1], [0, 1, 1]]) - cell3 = np.array([[1.45245092e+01, 0, 0], - [-1.40550526e-02, 1.51277202e+01, 0], - [-4.42369435e-01, 4.17648184e-01, 1.49535208e+01]]) - for idx in range(np.shape(self.system_water.data['cells'])[0]): - np.testing.assert_almost_equal(cell, self.system_water.data['cells'][idx], decimal = 5) - for idx in range(np.shape(self.system_Si.data['cells'])[0]): - np.testing.assert_almost_equal(self.system_Si.data['cells'][idx], cell2, decimal = 5) - for idx in range(np.shape(self.system_water_unconv.data['cells'])[0]): - np.testing.assert_almost_equal(self.system_water_unconv.data['cells'][idx], cell3, decimal = 5) - - def test_coord(self) : - with open('abacus.md/water_coord') as fp: + cell3 = np.array( + [ + [1.45245092e01, 0, 0], + [-1.40550526e-02, 1.51277202e01, 0], + [-4.42369435e-01, 4.17648184e-01, 1.49535208e01], + ] + ) + for idx in range(np.shape(self.system_water.data["cells"])[0]): + np.testing.assert_almost_equal( + cell, self.system_water.data["cells"][idx], decimal=5 + ) + for idx in range(np.shape(self.system_Si.data["cells"])[0]): + np.testing.assert_almost_equal( + self.system_Si.data["cells"][idx], cell2, decimal=5 + ) + for idx in range(np.shape(self.system_water_unconv.data["cells"])[0]): + np.testing.assert_almost_equal( + self.system_water_unconv.data["cells"][idx], cell3, decimal=5 + ) + + def test_coord(self): + with open("abacus.md/water_coord") as fp: coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) coord = coord.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system_water.data['coords'], coord, decimal = 5) + np.testing.assert_almost_equal( + self.system_water.data["coords"], coord, decimal=5 + ) - with open('abacus.md.nostress/Si_coord') as fp2: + with open("abacus.md.nostress/Si_coord") as fp2: coord = [] - for ii in fp2 : + for ii in fp2: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) coord = coord.reshape([4, 2, 3]) - np.testing.assert_almost_equal(self.system_Si.data['coords'], coord, decimal = 5) + np.testing.assert_almost_equal( + self.system_Si.data["coords"], coord, decimal=5 + ) - with open('abacus.md.unconv/water_coord') as fp3: + with open("abacus.md.unconv/water_coord") as fp3: coord = [] - for ii in fp3 : + for ii in fp3: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) coord = coord.reshape([10, 3, 3]) - np.testing.assert_almost_equal(self.system_water_unconv.data['coords'], coord, decimal = 5) + np.testing.assert_almost_equal( + self.system_water_unconv.data["coords"], coord, decimal=5 + ) - def test_force(self) : - with open('abacus.md/water_force') as fp: + def test_force(self): + with open("abacus.md/water_force") as fp: force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) force = force.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system_water.data['forces'], force, decimal=5) + np.testing.assert_almost_equal( + self.system_water.data["forces"], force, decimal=5 + ) - with open('abacus.md.nostress/Si_force') as fp2: + with open("abacus.md.nostress/Si_force") as fp2: force = [] - for ii in fp2 : + for ii in fp2: force.append([float(jj) for jj in ii.split()]) force = np.array(force) force = force.reshape([4, 2, 3]) - np.testing.assert_almost_equal(self.system_Si.data['forces'], force, decimal=5) + np.testing.assert_almost_equal( + self.system_Si.data["forces"], force, decimal=5 + ) - with open('abacus.md.unconv/water_force') as fp3: + with open("abacus.md.unconv/water_force") as fp3: force = [] - for ii in fp3 : + for ii in fp3: force.append([float(jj) for jj in ii.split()]) force = np.array(force) force = force.reshape([10, 3, 3]) - np.testing.assert_almost_equal(self.system_water_unconv.data['forces'], force, decimal=5) + np.testing.assert_almost_equal( + self.system_water_unconv.data["forces"], force, decimal=5 + ) - def test_virial(self) : - with open('abacus.md/water_virial') as fp: + def test_virial(self): + with open("abacus.md/water_virial") as fp: virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) virial = virial.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system_water.data['virials'], virial, decimal=5) + np.testing.assert_almost_equal( + self.system_water.data["virials"], virial, decimal=5 + ) - with open('abacus.md.unconv/water_virial') as fp: + with open("abacus.md.unconv/water_virial") as fp: virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) virial = virial.reshape([10, 3, 3]) - np.testing.assert_almost_equal(self.system_water_unconv.data['virials'], virial, decimal=5) - - def test_energy(self) : - ref_energy = np.array([-466.69285117, -466.69929051, -466.69829826, -466.70364664, - -466.6976083]) - ref_energy2 = np.array([-211.77184603, -211.78111966, -211.79681663, -211.79875524]) - ref_energy3 = np.array([-464.87380991, -465.18489358, -465.97407849, -465.98292836, -465.85528926, - -465.33957501, -464.64886682, -464.61802032, -465.61854656, -466.05660096]) - np.testing.assert_almost_equal(self.system_water.data['energies'], ref_energy) - np.testing.assert_almost_equal(self.system_Si.data['energies'], ref_energy2) - np.testing.assert_almost_equal(self.system_water_unconv.data['energies'], ref_energy3) + np.testing.assert_almost_equal( + self.system_water_unconv.data["virials"], virial, decimal=5 + ) + + def test_energy(self): + ref_energy = np.array( + [-466.69285117, -466.69929051, -466.69829826, -466.70364664, -466.6976083] + ) + ref_energy2 = np.array( + [-211.77184603, -211.78111966, -211.79681663, -211.79875524] + ) + ref_energy3 = np.array( + [ + -464.87380991, + -465.18489358, + -465.97407849, + -465.98292836, + -465.85528926, + -465.33957501, + -464.64886682, + -464.61802032, + -465.61854656, + -466.05660096, + ] + ) + np.testing.assert_almost_equal(self.system_water.data["energies"], ref_energy) + np.testing.assert_almost_equal(self.system_Si.data["energies"], ref_energy2) + np.testing.assert_almost_equal( + self.system_water_unconv.data["energies"], ref_energy3 + ) def test_to_system(self): - pp_file=["H.upf","O.upf"] - numerical_orbital=["H.upf","O.upf"] - numerical_descriptor="jle.orb" - mass=[1.008,15.994] - self.system_water.to(file_name="abacus.md/water_stru",fmt='abacus/stru',pp_file=pp_file,\ - numerical_orbital=numerical_orbital,numerical_descriptor=numerical_descriptor,\ - mass=mass) - self.assertTrue(os.path.isfile('abacus.md/water_stru')) - if os.path.isfile('abacus.md/water_stru'): - with open('abacus.md/water_stru') as f: - iline=0 - for iline,l in enumerate(f): + pp_file = ["H.upf", "O.upf"] + numerical_orbital = ["H.upf", "O.upf"] + numerical_descriptor = "jle.orb" + mass = [1.008, 15.994] + self.system_water.to( + file_name="abacus.md/water_stru", + fmt="abacus/stru", + pp_file=pp_file, + numerical_orbital=numerical_orbital, + numerical_descriptor=numerical_descriptor, + mass=mass, + ) + self.assertTrue(os.path.isfile("abacus.md/water_stru")) + if os.path.isfile("abacus.md/water_stru"): + with open("abacus.md/water_stru") as f: + iline = 0 + for iline, l in enumerate(f): iline += 1 - self.assertEqual(iline,30) + self.assertEqual(iline, 30) class TestABACUSMDLabeledOutput(unittest.TestCase, TestABACUSMD): - def setUp(self): - self.system_water = dpdata.LabeledSystem('abacus.md',fmt='abacus/md') # system with stress - self.system_Si = dpdata.LabeledSystem('abacus.md.nostress',fmt='abacus/md') # system without stress - self.system_water_unconv = dpdata.LabeledSystem('abacus.md.unconv',fmt='abacus/md') #system with unconverged SCF + self.system_water = dpdata.LabeledSystem( + "abacus.md", fmt="abacus/md" + ) # system with stress + self.system_Si = dpdata.LabeledSystem( + "abacus.md.nostress", fmt="abacus/md" + ) # system without stress + self.system_water_unconv = dpdata.LabeledSystem( + "abacus.md.unconv", fmt="abacus/md" + ) # system with unconverged SCF def tearDown(self): - if os.path.isfile('abacus.md/water_stru'): - os.remove('abacus.md/water_stru') + if os.path.isfile("abacus.md/water_stru"): + os.remove("abacus.md/water_stru") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_abacus_pw_scf.py b/tests/test_abacus_pw_scf.py index f2a2c89b..fc06f227 100644 --- a/tests/test_abacus_pw_scf.py +++ b/tests/test_abacus_pw_scf.py @@ -1,37 +1,39 @@ import os import numpy as np -import unittest,shutil +import unittest, shutil from context import dpdata from dpdata.unit import LengthConversion bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSSinglePointEnergy: - def test_atom_names(self) : - self.assertEqual(self.system_ch4.data['atom_names'], ['C', 'H']) - #self.assertEqual(self.system_h2o.data['atom_names'], ['O','H']) - self.assertEqual(self.system_ch4_unlabeled.data['atom_names'], ['C', 'H']) - def test_atom_numbs(self) : - self.assertEqual(self.system_ch4.data['atom_numbs'], [1, 4]) - #self.assertEqual(self.system_h2o.data['atom_numbs'], [64,128]) - self.assertEqual(self.system_ch4_unlabeled.data['atom_numbs'], [1, 4]) - def test_atom_types(self) : - ref_type = [0,1,1,1,1] - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_ch4.data['atom_types'][ii], ref_type[ii]) - self.assertEqual(self.system_ch4_unlabeled['atom_types'][ii], ref_type[ii]) +class TestABACUSSinglePointEnergy: + def test_atom_names(self): + self.assertEqual(self.system_ch4.data["atom_names"], ["C", "H"]) + # self.assertEqual(self.system_h2o.data['atom_names'], ['O','H']) + self.assertEqual(self.system_ch4_unlabeled.data["atom_names"], ["C", "H"]) + + def test_atom_numbs(self): + self.assertEqual(self.system_ch4.data["atom_numbs"], [1, 4]) + # self.assertEqual(self.system_h2o.data['atom_numbs'], [64,128]) + self.assertEqual(self.system_ch4_unlabeled.data["atom_numbs"], [1, 4]) + + def test_atom_types(self): + ref_type = [0, 1, 1, 1, 1] + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_ch4.data["atom_types"][ii], ref_type[ii]) + self.assertEqual(self.system_ch4_unlabeled["atom_types"][ii], ref_type[ii]) # ref_type = [0]*64 + [1]*128 # ref_type = np.array(ref_type) # for ii in range(ref_type.shape[0]) : # self.assertEqual(self.system_h2o.data['atom_types'][ii], ref_type[ii]) - def test_cell(self) : + def test_cell(self): # cell = 5.29177 * np.eye(3) cell = bohr2ang * 10 * np.eye(3) - np.testing.assert_almost_equal(self.system_ch4.data['cells'][0], cell) - np.testing.assert_almost_equal(self.system_ch4_unlabeled.data['cells'][0], cell) + np.testing.assert_almost_equal(self.system_ch4.data["cells"][0], cell) + np.testing.assert_almost_equal(self.system_ch4_unlabeled.data["cells"][0], cell) # fp = open('qe.scf/h2o_cell') # cell = [] # for ii in fp : @@ -42,16 +44,18 @@ def test_cell(self) : # self.assertAlmostEqual(self.system_h2o.data['cells'][0][ii][jj], cell[ii][jj]) # fp.close() - - def test_coord(self) : - with open('abacus.scf/ch4_coord') as fp: + def test_coord(self): + with open("abacus.scf/ch4_coord") as fp: coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - np.testing.assert_almost_equal(self.system_ch4.data['coords'][0], coord, decimal=5) - np.testing.assert_almost_equal(self.system_ch4_unlabeled.data['coords'][0], coord, decimal=5) - + np.testing.assert_almost_equal( + self.system_ch4.data["coords"][0], coord, decimal=5 + ) + np.testing.assert_almost_equal( + self.system_ch4_unlabeled.data["coords"][0], coord, decimal=5 + ) # fp = open('qe.scf/h2o_coord') # coord = [] @@ -63,14 +67,13 @@ def test_coord(self) : # self.assertAlmostEqual(self.system_h2o.data['coords'][0][ii][jj], coord[ii][jj]) # fp.close() - def test_force(self) : - with open('abacus.scf/ch4_force') as fp: + def test_force(self): + with open("abacus.scf/ch4_force") as fp: force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - np.testing.assert_almost_equal(self.system_ch4.data['forces'][0], force) - + np.testing.assert_almost_equal(self.system_ch4.data["forces"][0], force) # fp = open('qe.scf/h2o_force') # force = [] @@ -82,14 +85,15 @@ def test_force(self) : # self.assertAlmostEqual(self.system_h2o.data['forces'][0][ii][jj], force[ii][jj]) # fp.close() - def test_virial(self) : - with open('abacus.scf/ch4_virial') as fp: + def test_virial(self): + with open("abacus.scf/ch4_virial") as fp: virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) - np.testing.assert_almost_equal(self.system_ch4.data['virials'][0], virial, decimal = 3) - + np.testing.assert_almost_equal( + self.system_ch4.data["virials"][0], virial, decimal=3 + ) # fp = open('qe.scf/h2o_virial') # virial = [] @@ -101,41 +105,43 @@ def test_virial(self) : # self.assertAlmostEqual(self.system_h2o.data['virials'][0][ii][jj], virial[ii][jj], places = 2) # fp.close() - def test_energy(self) : + def test_energy(self): ref_energy = -219.64991404276591 - self.assertAlmostEqual(self.system_ch4.data['energies'][0], ref_energy) + self.assertAlmostEqual(self.system_ch4.data["energies"][0], ref_energy) # ref_energy = -30007.651851226798 # self.assertAlmostEqual(self.system_h2o.data['energies'][0], ref_energy) - class TestABACUSLabeledOutput(unittest.TestCase, TestABACUSSinglePointEnergy): - def setUp(self): - shutil.copy('abacus.scf/INPUT.ok','abacus.scf/INPUT') - self.system_ch4 = dpdata.LabeledSystem('abacus.scf',fmt='abacus/scf') + shutil.copy("abacus.scf/INPUT.ok", "abacus.scf/INPUT") + self.system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") # self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') - self.system_ch4_unlabeled = dpdata.System('abacus.scf/STRU.ch4', fmt='abacus/stru') + self.system_ch4_unlabeled = dpdata.System( + "abacus.scf/STRU.ch4", fmt="abacus/stru" + ) + def tearDown(self): if os.path.isfile("abacus.scf/INPUT"): os.remove("abacus.scf/INPUT") class TestABACUSLabeledOutputFail(unittest.TestCase): - def setUp(self): - shutil.copy('abacus.scf/INPUT.fail','abacus.scf/INPUT') - self.system_ch4 = dpdata.LabeledSystem('abacus.scf',fmt='abacus/scf') + shutil.copy("abacus.scf/INPUT.fail", "abacus.scf/INPUT") + self.system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") # self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') - self.system_ch4_unlabeled = dpdata.System('abacus.scf/STRU.ch4', fmt='abacus/stru') + self.system_ch4_unlabeled = dpdata.System( + "abacus.scf/STRU.ch4", fmt="abacus/stru" + ) + def tearDown(self): if os.path.isfile("abacus.scf/INPUT"): os.remove("abacus.scf/INPUT") - def test_return_zero(self): - self.assertEqual(len(self.system_ch4),0) - + def test_return_zero(self): + self.assertEqual(len(self.system_ch4), 0) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_abacus_relax.py b/tests/test_abacus_relax.py index be331009..a39249bd 100644 --- a/tests/test_abacus_relax.py +++ b/tests/test_abacus_relax.py @@ -1,4 +1,4 @@ -import os,shutil +import os, shutil import numpy as np import unittest from context import dpdata @@ -6,91 +6,102 @@ bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSRelaxLabeledOutput(unittest.TestCase): +class TestABACUSRelaxLabeledOutput(unittest.TestCase): def setUp(self): - shutil.copy('abacus.relax/OUT.abacus/running_cell-relax.log.normal','abacus.relax/OUT.abacus/running_cell-relax.log') - self.system = dpdata.LabeledSystem('abacus.relax',fmt='abacus/relax') + shutil.copy( + "abacus.relax/OUT.abacus/running_cell-relax.log.normal", + "abacus.relax/OUT.abacus/running_cell-relax.log", + ) + self.system = dpdata.LabeledSystem("abacus.relax", fmt="abacus/relax") + def tearDown(self): if os.path.isfile("abacus.relax/OUT.abacus/running_cell-relax.log"): os.remove("abacus.relax/OUT.abacus/running_cell-relax.log") - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['H','O']) + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["H", "O"]) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [2,1]) + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [2, 1]) - def test_atom_types(self) : - ref_type = np.array([0,0,1]) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system.data['atom_types'][ii], ref_type[ii]) + def test_atom_types(self): + ref_type = np.array([0, 0, 1]) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system.data["atom_types"][ii], ref_type[ii]) - def test_cell(self) : + def test_cell(self): cell = bohr2ang * 28.0 * np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - for idx in range(np.shape(self.system.data['cells'])[0]): - np.testing.assert_almost_equal(cell, self.system.data['cells'][idx], decimal = 5) + for idx in range(np.shape(self.system.data["cells"])[0]): + np.testing.assert_almost_equal( + cell, self.system.data["cells"][idx], decimal=5 + ) - def test_coord(self) : - with open('abacus.relax/coord.ref') as fp: + def test_coord(self): + with open("abacus.relax/coord.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['coords'], ref, decimal = 5) + np.testing.assert_almost_equal(self.system.data["coords"], ref, decimal=5) - def test_force(self) : - with open('abacus.relax/force.ref') as fp: + def test_force(self): + with open("abacus.relax/force.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['forces'], ref, decimal=5) + np.testing.assert_almost_equal(self.system.data["forces"], ref, decimal=5) - def test_virial(self) : - with open('abacus.relax/virial.ref') as fp: + def test_virial(self): + with open("abacus.relax/virial.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['virials'], ref, decimal=5) + np.testing.assert_almost_equal(self.system.data["virials"], ref, decimal=5) - def test_stress(self) : - with open('abacus.relax/stress.ref') as fp: + def test_stress(self): + with open("abacus.relax/stress.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['stress'], ref, decimal=5) + np.testing.assert_almost_equal(self.system.data["stress"], ref, decimal=5) - def test_energy(self) : - ref_energy = np.array([-465.77753104, -464.35757552, -465.79307346, -465.80056811, - -465.81235433]) - np.testing.assert_almost_equal(self.system.data['energies'], ref_energy) + def test_energy(self): + ref_energy = np.array( + [-465.77753104, -464.35757552, -465.79307346, -465.80056811, -465.81235433] + ) + np.testing.assert_almost_equal(self.system.data["energies"], ref_energy) -class TestABACUSRelaxLabeledOutputAbnormal(unittest.TestCase): +class TestABACUSRelaxLabeledOutputAbnormal(unittest.TestCase): def setUp(self): - shutil.copy('abacus.relax/OUT.abacus/running_cell-relax.log.abnormal','abacus.relax/OUT.abacus/running_cell-relax.log') - self.system = dpdata.LabeledSystem('abacus.relax',fmt='abacus/relax') - - def test_result(self): + shutil.copy( + "abacus.relax/OUT.abacus/running_cell-relax.log.abnormal", + "abacus.relax/OUT.abacus/running_cell-relax.log", + ) + self.system = dpdata.LabeledSystem("abacus.relax", fmt="abacus/relax") + + def test_result(self): data = self.system.data - self.assertEqual(len(data['coords']),4) - self.assertEqual(len(data['energies']),len(data['coords'])) - self.assertEqual(len(data['cells']),len(data['coords'])) - self.assertEqual(len(data['forces']),len(data['coords'])) - self.assertEqual(len(data['stress']),len(data['coords'])) - self.assertEqual(len(data['virials']),len(data['coords'])) - np.testing.assert_almost_equal(data['energies'][3],-465.81235433) - + self.assertEqual(len(data["coords"]), 4) + self.assertEqual(len(data["energies"]), len(data["coords"])) + self.assertEqual(len(data["cells"]), len(data["coords"])) + self.assertEqual(len(data["forces"]), len(data["coords"])) + self.assertEqual(len(data["stress"]), len(data["coords"])) + self.assertEqual(len(data["virials"]), len(data["coords"])) + np.testing.assert_almost_equal(data["energies"][3], -465.81235433) + def tearDown(self): if os.path.isfile("abacus.relax/OUT.abacus/running_cell-relax.log"): os.remove("abacus.relax/OUT.abacus/running_cell-relax.log") -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_abacus_stru_dump.py b/tests/test_abacus_stru_dump.py index 20922cbf..6d6dbeea 100644 --- a/tests/test_abacus_stru_dump.py +++ b/tests/test_abacus_stru_dump.py @@ -10,13 +10,20 @@ def setUp(self): self.system_ch4 = dpdata.System("abacus.scf/STRU.ch4", fmt="stru") def test_dump_stru(self): - self.system_ch4.to("stru", "STRU_tmp", mass = [12, 1], pp_file = ["C.upf", "H.upf"], numerical_orbital = ["C.orb", "H.orb"], numerical_descriptor = "jle.orb") + self.system_ch4.to( + "stru", + "STRU_tmp", + mass=[12, 1], + pp_file=["C.upf", "H.upf"], + numerical_orbital=["C.orb", "H.orb"], + numerical_descriptor="jle.orb", + ) myfilecmp(self, "abacus.scf/stru_test", "STRU_tmp") - + def tearDown(self): - if os.path.isfile('STRU_tmp'): - os.remove('STRU_tmp') + if os.path.isfile("STRU_tmp"): + os.remove("STRU_tmp") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/tests/test_amber_md.py b/tests/test_amber_md.py index d3189a0d..5a9cded8 100644 --- a/tests/test_amber_md.py +++ b/tests/test_amber_md.py @@ -3,36 +3,46 @@ import shutil from context import dpdata from comp_sys import CompLabeledSys, IsPBC + try: import parmed except ModuleNotFoundError: - skip_parmed_related_test=True + skip_parmed_related_test = True else: - skip_parmed_related_test=False + skip_parmed_related_test = False + class TestAmberMD(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('amber/02_Heat', fmt = 'amber/md') - self.system_1.to('deepmd/npy','tmp.deepmd.npy') - self.system_2 = dpdata.LabeledSystem('tmp.deepmd.npy', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("amber/02_Heat", fmt="amber/md") + self.system_1.to("deepmd/npy", "tmp.deepmd.npy") + self.system_2 = dpdata.LabeledSystem("tmp.deepmd.npy", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.npy'): - shutil.rmtree('tmp.deepmd.npy') + def tearDown(self): + if os.path.exists("tmp.deepmd.npy"): + shutil.rmtree("tmp.deepmd.npy") -@unittest.skipIf(skip_parmed_related_test,"skip parmed related test. install parmed to fix") + +@unittest.skipIf( + skip_parmed_related_test, "skip parmed related test. install parmed to fix" +) class TestAmberMDTarget(unittest.TestCase, CompLabeledSys, IsPBC): def setUp(self): - ll="amber/corr/low_level" - ncfile="amber/corr/rc.nc" - parmfile="amber/corr/qmmm.parm7" + ll = "amber/corr/low_level" + ncfile = "amber/corr/rc.nc" + parmfile = "amber/corr/qmmm.parm7" target = ":1" self.system_1 = dpdata.LabeledSystem( - ll, nc_file=ncfile, parm7_file=parmfile, fmt='amber/md', use_element_symbols=target) + ll, + nc_file=ncfile, + parm7_file=parmfile, + fmt="amber/md", + use_element_symbols=target, + ) self.system_2 = dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy") self.places = 5 @@ -40,5 +50,6 @@ def setUp(self): self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_amber_sqm.py b/tests/test_amber_sqm.py index c8f762ba..f9ca80f8 100644 --- a/tests/test_amber_sqm.py +++ b/tests/test_amber_sqm.py @@ -11,64 +11,70 @@ else: skip_bond_order_system = False + class TestAmberSqmOut(unittest.TestCase, CompSys, IsNoPBC): - def setUp (self) : - self.system_1 = dpdata.System('amber/sqm_no_forces.out', fmt = 'sqm/out') - self.system_1.to('deepmd/npy','tmp.sqm.noforces') - self.system_2 = dpdata.System('tmp.sqm.noforces', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.System("amber/sqm_no_forces.out", fmt="sqm/out") + self.system_1.to("deepmd/npy", "tmp.sqm.noforces") + self.system_2 = dpdata.System("tmp.sqm.noforces", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.sqm.noforces'): - shutil.rmtree('tmp.sqm.noforces') + def tearDown(self): + if os.path.exists("tmp.sqm.noforces"): + shutil.rmtree("tmp.sqm.noforces") + class TestAmberSqmOutLabeled(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp(self) : - self.system_1 = dpdata.LabeledSystem('amber/sqm_forces.out', fmt = 'sqm/out') - self.system_1.to('deepmd/npy','tmp.sqm.forces') - self.system_2 = dpdata.LabeledSystem('tmp.sqm.forces', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("amber/sqm_forces.out", fmt="sqm/out") + self.system_1.to("deepmd/npy", "tmp.sqm.forces") + self.system_2 = dpdata.LabeledSystem("tmp.sqm.forces", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.sqm.forces'): - shutil.rmtree('tmp.sqm.forces') + def tearDown(self): + if os.path.exists("tmp.sqm.forces"): + shutil.rmtree("tmp.sqm.forces") class TestAmberSqmOutOpt(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp(self) : - self.system_1 = dpdata.LabeledSystem('amber/sqm_opt.out', fmt = 'sqm/out') - self.system_1.to('deepmd/npy','tmp.sqm.opt') - self.system_2 = dpdata.LabeledSystem('tmp.sqm.opt', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out") + self.system_1.to("deepmd/npy", "tmp.sqm.opt") + self.system_2 = dpdata.LabeledSystem("tmp.sqm.opt", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.sqm.opt'): - shutil.rmtree('tmp.sqm.opt') + def tearDown(self): + if os.path.exists("tmp.sqm.opt"): + shutil.rmtree("tmp.sqm.opt") -@unittest.skipIf(skip_bond_order_system, "dpdata does not have BondOrderSystem. One may install rdkit to fix.") +@unittest.skipIf( + skip_bond_order_system, + "dpdata does not have BondOrderSystem. One may install rdkit to fix.", +) class TestAmberSqmIn(unittest.TestCase): def setUp(self): - self.system = dpdata.BondOrderSystem("amber/methane.mol", fmt='mol', type_map=['H','C']) - with open('amber/sqm.in', 'r') as f: + self.system = dpdata.BondOrderSystem( + "amber/methane.mol", fmt="mol", type_map=["H", "C"] + ) + with open("amber/sqm.in", "r") as f: self.sqm_in = f.read() - + def test_sqm_in(self): - self.system.to("sqm/in", 'amber/sqm_test.in') - with open('amber/sqm_test.in', 'r') as f: + self.system.to("sqm/in", "amber/sqm_test.in") + with open("amber/sqm_test.in", "r") as f: self.sqm_in_test = f.read() self.assertEqual(self.sqm_in, self.sqm_in_test) - + def tearDown(self): if os.path.isfile("amber/sqm_test.in"): os.remove("amber/sqm_test.in") - diff --git a/tests/test_ase_traj.py b/tests/test_ase_traj.py index 6c37a31c..6f957f84 100644 --- a/tests/test_ase_traj.py +++ b/tests/test_ase_traj.py @@ -3,6 +3,7 @@ import unittest from context import dpdata from comp_sys import CompLabeledSys, IsPBC + try: import ase except ModuleNotFoundError: @@ -10,31 +11,37 @@ else: skip_ase = False -@unittest.skipIf(skip_ase,"skip ase related test. install ase to fix") + +@unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestASEtraj1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems = dpdata.MultiSystems.from_file('ase_traj/HeAlO.traj', fmt='ase_traj/structure') - self.system_1 = self.multi_systems.systems['Al0He4O0'] - self.system_2 = dpdata.LabeledSystem('ase_traj/Al0He4O0', fmt='deepmd') + def setUp(self): + self.multi_systems = dpdata.MultiSystems.from_file( + "ase_traj/HeAlO.traj", fmt="ase_traj/structure" + ) + self.system_1 = self.multi_systems.systems["Al0He4O0"] + self.system_2 = dpdata.LabeledSystem("ase_traj/Al0He4O0", fmt="deepmd") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -@unittest.skipIf(skip_ase,"skip ase related test. install ase to fix") + +@unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestASEtraj1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_temp0 = dpdata.MultiSystems.from_file(file_name='ase_traj/HeAlO.traj', fmt='ase/structure') - self.system_1 = self.system_temp0.systems['Al2He1O3'] # .sort_atom_types() - self.system_temp1 = dpdata.LabeledSystem('ase_traj/Al2He1O3', fmt='deepmd') - self.system_temp2 = dpdata.LabeledSystem('ase_traj/Al4He4O6', fmt='deepmd') + def setUp(self): + self.system_temp0 = dpdata.MultiSystems.from_file( + file_name="ase_traj/HeAlO.traj", fmt="ase/structure" + ) + self.system_1 = self.system_temp0.systems["Al2He1O3"] # .sort_atom_types() + self.system_temp1 = dpdata.LabeledSystem("ase_traj/Al2He1O3", fmt="deepmd") + self.system_temp2 = dpdata.LabeledSystem("ase_traj/Al4He4O6", fmt="deepmd") self.system_temp3 = dpdata.MultiSystems(self.system_temp2, self.system_temp1) - self.system_2 = self.system_temp3.systems['Al2He1O3'] + self.system_2 = self.system_temp3.systems["Al2He1O3"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_bond_order_system.py b/tests/test_bond_order_system.py index 95c595fe..d0b8fbd0 100644 --- a/tests/test_bond_order_system.py +++ b/tests/test_bond_order_system.py @@ -2,6 +2,7 @@ import unittest from context import dpdata import glob + try: from rdkit import Chem from rdkit.Chem import AllChem @@ -16,9 +17,11 @@ from copy import deepcopy -@unittest.skipIf(skip_bond_order_system, "dpdata does not have BondOrderSystem. One may install rdkit to fix.") +@unittest.skipIf( + skip_bond_order_system, + "dpdata does not have BondOrderSystem. One may install rdkit to fix.", +) class TestBondOrderSystem(unittest.TestCase): - def test_from_rdkit_mol(self): mol = Chem.MolFromSmiles("CC") mol = Chem.AddHs(mol) @@ -28,44 +31,66 @@ def test_from_rdkit_mol(self): self.assertEqual(system.get_nbonds(), 7) def test_from_mol_file(self): - syst = dpdata.BondOrderSystem("bond_order/CH3OH.mol", fmt='mol', type_map=['O','C','H']) + syst = dpdata.BondOrderSystem( + "bond_order/CH3OH.mol", fmt="mol", type_map=["O", "C", "H"] + ) self.assertEqual(syst.get_nframes(), 1) self.assertEqual(syst.get_nbonds(), 5) self.assertEqual(syst.get_natoms(), 6) - self.assertEqual(syst['atom_names'], ['O','C','H']) - self.assertAlmostEqual(syst['coords'][0][0][0], -0.3858) - + self.assertEqual(syst["atom_names"], ["O", "C", "H"]) + self.assertAlmostEqual(syst["coords"][0][0][0], -0.3858) + def test_from_sdf_file(self): - syst = dpdata.BondOrderSystem("bond_order/methane.sdf", type_map=['C','H']) + syst = dpdata.BondOrderSystem("bond_order/methane.sdf", type_map=["C", "H"]) self.assertEqual(syst.get_nframes(), 4) self.assertEqual(syst.get_nbonds(), 4) self.assertEqual(syst.get_natoms(), 5) - self.assertEqual(syst['atom_names'], ['C','H']) - self.assertAlmostEqual(syst['coords'][0][0][0], 0.0059) - self.assertAlmostEqual(syst['coords'][1][0][0], 0.0043) - self.assertAlmostEqual(syst['coords'][2][0][0], 0.0071) - self.assertAlmostEqual(syst['coords'][3][0][0], 0.0032) - + self.assertEqual(syst["atom_names"], ["C", "H"]) + self.assertAlmostEqual(syst["coords"][0][0][0], 0.0059) + self.assertAlmostEqual(syst["coords"][1][0][0], 0.0043) + self.assertAlmostEqual(syst["coords"][2][0][0], 0.0071) + self.assertAlmostEqual(syst["coords"][3][0][0], 0.0032) + def test_from_sdf_file_err(self): - self.assertRaises(ValueError, dpdata.BondOrderSystem, "bond_order/methane_ethane.sdf") + self.assertRaises( + ValueError, dpdata.BondOrderSystem, "bond_order/methane_ethane.sdf" + ) def test_regularize_formal_charges(self): - non_regular = Chem.MolFromMolFile("bond_order/formal_charge.mol", removeHs=False) + non_regular = Chem.MolFromMolFile( + "bond_order/formal_charge.mol", removeHs=False + ) regular = dpdata.BondOrderSystem("bond_order/formal_charge.mol", fmt="mol") self.assertFalse(non_regular) self.assertTrue(isinstance(regular.rdkit_mol, Chem.rdchem.Mol)) - + def test_formal_charge(self): - names = ["C5H5-", "CH3CC-", "CH3NC", "CH3NH3+", "CH3NO2", "OCH3+", - "gly", "arg", "oxpy", "CH3OPO3_2-", "CH3PH3+", "CH3OAsO3_2-", - "CH3SH", "CH3_2SO", "CH3_2SO2", "CH3SO3-", "BOH4-"] + names = [ + "C5H5-", + "CH3CC-", + "CH3NC", + "CH3NH3+", + "CH3NO2", + "OCH3+", + "gly", + "arg", + "oxpy", + "CH3OPO3_2-", + "CH3PH3+", + "CH3OAsO3_2-", + "CH3SH", + "CH3_2SO", + "CH3_2SO2", + "CH3SO3-", + "BOH4-", + ] charges = [-1, -1, 0, 1, 0, 1, 0, 1, 0, -2, 1, -2, 0, 0, 0, -1, -1] mols = [dpdata.BondOrderSystem(f"bond_order/{name}.mol") for name in names] self.assertEqual(charges, [mol.get_charge() for mol in mols]) def test_read_other_format_without_bond_info(self): self.assertRaises(RuntimeError, dpdata.BondOrderSystem, "gromacs/1h.gro") - + def test_dump_to_deepmd_raw(self): syst = dpdata.BondOrderSystem("bond_order/methane.sdf", fmt="sdf") syst.to_deepmd_raw("bond_order/methane") @@ -74,9 +99,9 @@ def test_dump_to_deepmd_raw(self): bonds = np.loadtxt("bond_order/methane/bonds.raw") for bond_idx in range(4): for ii in range(3): - self.assertEqual(syst['bonds'][bond_idx][ii], bonds[bond_idx][ii]) + self.assertEqual(syst["bonds"][bond_idx][ii], bonds[bond_idx][ii]) shutil.rmtree("bond_order/methane") - + def test_dump_to_deepmd_npy(self): syst = dpdata.BondOrderSystem("bond_order/methane.sdf", fmt="sdf") syst.to_deepmd_npy("bond_order/methane") @@ -85,9 +110,9 @@ def test_dump_to_deepmd_npy(self): bonds = np.loadtxt("bond_order/methane/bonds.raw") for bond_idx in range(4): for ii in range(3): - self.assertEqual(syst['bonds'][bond_idx][ii], bonds[bond_idx][ii]) + self.assertEqual(syst["bonds"][bond_idx][ii], bonds[bond_idx][ii]) shutil.rmtree("bond_order/methane") - + def test_dump_to_sdf_file(self): s1 = dpdata.BondOrderSystem("bond_order/methane.sdf", fmt="sdf") s2 = deepcopy(s1) @@ -97,26 +122,31 @@ def test_dump_to_sdf_file(self): nsyst = dpdata.BondOrderSystem("bond_order/test.sdf", fmt="sdf") self.assertEqual(nsyst["coords"][0, 0, 0] - s1["coords"][0, 0, 0], 1.0) os.remove("bond_order/test.sdf") - + def test_sanitize_mol_obabel(self): cnt = 0 for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): - syst = dpdata.BondOrderSystem(sdf_file, sanitize_level='high', verbose=False) + syst = dpdata.BondOrderSystem( + sdf_file, sanitize_level="high", verbose=False + ) if syst.rdkit_mol is None: cnt += 1 self.assertEqual(cnt, 0) - + def test_sanitize_mol_origin(self): cnt = 0 for sdf_file in glob.glob("bond_order/refined-set-ligands/origin/*sdf"): - syst = dpdata.BondOrderSystem(sdf_file, sanitize_level='high', verbose=False) + syst = dpdata.BondOrderSystem( + sdf_file, sanitize_level="high", verbose=False + ) if syst.rdkit_mol is None: cnt += 1 self.assertEqual(cnt, 0) - + def tearDown(self): if os.path.exists("tests/.cache"): shutil.rmtree("tests/.cache") -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_cell_to_low_triangle.py b/tests/test_cell_to_low_triangle.py index ca60b35d..6696e171 100644 --- a/tests/test_cell_to_low_triangle.py +++ b/tests/test_cell_to_low_triangle.py @@ -3,47 +3,65 @@ import unittest from context import dpdata + class TestCellToLowTriangle(unittest.TestCase): def test_func1(self): - cell_1 = dpdata.cp2k.cell.cell_to_low_triangle(6,6,6,np.pi*1/2, np.pi*1/2, np.pi*1/2) - cell_2 = np.asarray([[6,0,0],[0,6,0],[0,0,6]]) + cell_1 = dpdata.cp2k.cell.cell_to_low_triangle( + 6, 6, 6, np.pi * 1 / 2, np.pi * 1 / 2, np.pi * 1 / 2 + ) + cell_2 = np.asarray([[6, 0, 0], [0, 6, 0], [0, 0, 6]]) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(cell_1[ii,jj], cell_2[ii,jj], places=6) + self.assertAlmostEqual(cell_1[ii, jj], cell_2[ii, jj], places=6) def test_func2(self): - cell_1 = dpdata.cp2k.cell.cell_to_low_triangle(6,6,6,np.pi*1/3, np.pi*1/3, np.pi*1/3) - cell_2 = np.asarray([ - [6,0,0], - [3,3*np.sqrt(3),0], - [3,np.sqrt(3),2*np.sqrt(6)]]) + cell_1 = dpdata.cp2k.cell.cell_to_low_triangle( + 6, 6, 6, np.pi * 1 / 3, np.pi * 1 / 3, np.pi * 1 / 3 + ) + cell_2 = np.asarray( + [[6, 0, 0], [3, 3 * np.sqrt(3), 0], [3, np.sqrt(3), 2 * np.sqrt(6)]] + ) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(cell_1[ii,jj], cell_2[ii,jj], places=6) + self.assertAlmostEqual(cell_1[ii, jj], cell_2[ii, jj], places=6) def test_func3(self): - cell_1 = dpdata.cp2k.cell.cell_to_low_triangle(6,7,8,np.pi*133/180,np.pi*84/180,np.pi*69/180) - cell_2 = np.asarray([[ 6.0, 0.0, 0.0], - [ 2.5085757, 6.535063 , 0.0], - [ 0.8362277, -6.1651506, 5.0290794]], dtype='float32') + cell_1 = dpdata.cp2k.cell.cell_to_low_triangle( + 6, 7, 8, np.pi * 133 / 180, np.pi * 84 / 180, np.pi * 69 / 180 + ) + cell_2 = np.asarray( + [ + [6.0, 0.0, 0.0], + [2.5085757, 6.535063, 0.0], + [0.8362277, -6.1651506, 5.0290794], + ], + dtype="float32", + ) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(cell_1[ii,jj], cell_2[ii,jj], places=6) + self.assertAlmostEqual(cell_1[ii, jj], cell_2[ii, jj], places=6) def test_func4(self): with self.assertRaises(Exception) as c: - dpdata.cp2k.cell.cell_to_low_triangle(0.1,6,6,np.pi*1/2,np.pi*1/2,np.pi*1/2) + dpdata.cp2k.cell.cell_to_low_triangle( + 0.1, 6, 6, np.pi * 1 / 2, np.pi * 1 / 2, np.pi * 1 / 2 + ) self.assertTrue("A==0.1" in str(c.exception)) def test_func5(self): with self.assertRaises(Exception) as c: - dpdata.cp2k.cell.cell_to_low_triangle(6,6,6,np.pi*3/180,np.pi*1/2,np.pi*1/2) + dpdata.cp2k.cell.cell_to_low_triangle( + 6, 6, 6, np.pi * 3 / 180, np.pi * 1 / 2, np.pi * 1 / 2 + ) self.assertTrue("alpha" in str(c.exception)) def test_func6(self): with self.assertRaises(Exception) as c: - dpdata.cp2k.cell.cell_to_low_triangle(6,7,8,np.pi*153/180,np.pi*84/180,np.pi*69/180) + dpdata.cp2k.cell.cell_to_low_triangle( + 6, 7, 8, np.pi * 153 / 180, np.pi * 84 / 180, np.pi * 69 / 180 + ) self.assertTrue("lz^2" in str(c.exception)) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_cli.py b/tests/test_cli.py index 3d6d29e4..7275237a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -5,12 +5,22 @@ class TestCli(unittest.TestCase, TestPOSCARoh): - @classmethod def setUpClass(cls) -> None: - sp.check_output(["dpdata", "poscars/conf.lmp", "--type-map", "O", "H", "-olammps/lmp", "-O", "tmp.lmp", "--no-labeled"]) - cls.system = dpdata.System('tmp.lmp', fmt='lammps/lmp', - type_map = ['O', 'H']) + sp.check_output( + [ + "dpdata", + "poscars/conf.lmp", + "--type-map", + "O", + "H", + "-olammps/lmp", + "-O", + "tmp.lmp", + "--no-labeled", + ] + ) + cls.system = dpdata.System("tmp.lmp", fmt="lammps/lmp", type_map=["O", "H"]) @classmethod def tearDownClass(cls) -> None: diff --git a/tests/test_corr.py b/tests/test_corr.py index fd8fa2e7..74fb2a10 100644 --- a/tests/test_corr.py +++ b/tests/test_corr.py @@ -3,6 +3,7 @@ from comp_sys import CompLabeledSys from comp_sys import IsPBC + class TestCorr(unittest.TestCase, CompLabeledSys, IsPBC): """Make a test to get a correction of two systems. @@ -10,19 +11,20 @@ class TestCorr(unittest.TestCase, CompLabeledSys, IsPBC): --------- https://doi.org/10.26434/chemrxiv.14120447 """ + def setUp(self): - ll="amber/corr/low_level" - hl="amber/corr/high_level" - ncfile="amber/corr/rc.nc" - parmfile="amber/corr/qmmm.parm7" - ep = r'@%EP' + ll = "amber/corr/low_level" + hl = "amber/corr/high_level" + ncfile = "amber/corr/rc.nc" + parmfile = "amber/corr/qmmm.parm7" + ep = r"@%EP" target = ":1" - cutoff = 6. + cutoff = 6.0 interactwith = "(%s)<:%f&!%s" % (target, cutoff, ep) s_ll = dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy") s_hl = dpdata.LabeledSystem("amber/corr/dp_hl", fmt="deepmd/npy") self.system_1 = s_ll.correction(s_hl) - self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr" ,fmt="deepmd/npy") + self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 @@ -31,16 +33,21 @@ def setUp(self): class TestCorr(unittest.TestCase, CompLabeledSys, IsPBC): """Make a test to get a correction of two MultiSystems.""" + def setUp(self): - s_ll = dpdata.MultiSystems(dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy")) - s_hl = dpdata.MultiSystems(dpdata.LabeledSystem("amber/corr/dp_hl", fmt="deepmd/npy")) + s_ll = dpdata.MultiSystems( + dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy") + ) + s_hl = dpdata.MultiSystems( + dpdata.LabeledSystem("amber/corr/dp_hl", fmt="deepmd/npy") + ) self.system_1 = tuple(s_ll.correction(s_hl).systems.values())[0] - self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr" ,fmt="deepmd/npy") + self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_cp2k_aimd_output.py b/tests/test_cp2k_aimd_output.py index 8e2bdd56..471153b1 100644 --- a/tests/test_cp2k_aimd_output.py +++ b/tests/test_cp2k_aimd_output.py @@ -1,30 +1,34 @@ - #%% import os import numpy as np import unittest from context import dpdata from comp_sys import CompLabeledSys + #%% class TestCp2kAimdOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/aimd',fmt='cp2k/aimd_output') - self.system_2 = dpdata.LabeledSystem('cp2k/aimd/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem("cp2k/aimd", fmt="cp2k/aimd_output") + self.system_2 = dpdata.LabeledSystem("cp2k/aimd/deepmd", fmt="deepmd/npy") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestCp2kAimdStressOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/aimd_stress',fmt='cp2k/aimd_output') - self.system_2 = dpdata.LabeledSystem('cp2k/aimd_stress/deepmd', fmt='deepmd/raw') + self.system_1 = dpdata.LabeledSystem("cp2k/aimd_stress", fmt="cp2k/aimd_output") + self.system_2 = dpdata.LabeledSystem( + "cp2k/aimd_stress/deepmd", fmt="deepmd/raw" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -#class TestCp2kAimdRestartOutput(unittest.TestCase, CompLabeledSys): + +# class TestCp2kAimdRestartOutput(unittest.TestCase, CompLabeledSys): # def setUp(self): # self.system_1 = dpdata.LabeledSystem('cp2k/restart_aimd',fmt='cp2k/aimd_output', restart=True) # self.system_2 = dpdata.LabeledSystem('cp2k/restart_aimd/deepmd', fmt='deepmd/raw') @@ -33,7 +37,7 @@ def setUp(self): # self.f_places = 6 # self.v_places = 4 # -#class TestCp2kAimdOutputError(unittest.TestCase): +# class TestCp2kAimdOutputError(unittest.TestCase): # def setUp(self): # pass # @@ -41,7 +45,7 @@ def setUp(self): # with self.assertRaises(AssertionError): # dpdata.LabeledSystem('cp2k/restart_aimd', fmt='cp2k/aimd_output', restart=False) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_cp2k_output.py b/tests/test_cp2k_output.py index eac3b182..37c639c6 100644 --- a/tests/test_cp2k_output.py +++ b/tests/test_cp2k_output.py @@ -4,56 +4,71 @@ from context import dpdata from comp_sys import CompLabeledSys + class TestCp2kNormalOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/cp2k_normal_output/cp2k_output',fmt='cp2k/output') - self.system_2 = dpdata.LabeledSystem('cp2k/cp2k_normal_output/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem( + "cp2k/cp2k_normal_output/cp2k_output", fmt="cp2k/output" + ) + self.system_2 = dpdata.LabeledSystem( + "cp2k/cp2k_normal_output/deepmd", fmt="deepmd/npy" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 - + + class TestCP2KDuplicateHeader(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/cp2k_duplicate_header/cp2k_output_duplicate_header',fmt='cp2k/output') - self.system_2 = dpdata.LabeledSystem('cp2k/cp2k_duplicate_header/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem( + "cp2k/cp2k_duplicate_header/cp2k_output_duplicate_header", fmt="cp2k/output" + ) + self.system_2 = dpdata.LabeledSystem( + "cp2k/cp2k_duplicate_header/deepmd", fmt="deepmd/npy" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestCp2kReplaceElementOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/cp2k_element_replace/cp2k_output_element_replace',fmt='cp2k/output') - self.system_2 = dpdata.LabeledSystem('cp2k/cp2k_element_replace/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem( + "cp2k/cp2k_element_replace/cp2k_output_element_replace", fmt="cp2k/output" + ) + self.system_2 = dpdata.LabeledSystem( + "cp2k/cp2k_element_replace/deepmd", fmt="deepmd/npy" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestNonCoveragedCP2KOutput(unittest.TestCase): - def setUp (self) : - self.system = dpdata.LabeledSystem('cp2k/cp2k_nocon_output', - fmt = 'cp2k/output') - def test_atom_types(self) : - self.assertEqual(self.system.data['atom_types'], []) + def setUp(self): + self.system = dpdata.LabeledSystem("cp2k/cp2k_nocon_output", fmt="cp2k/output") - def test_cells(self) : - self.assertEqual(self.system.data['cells'], []) + def test_atom_types(self): + self.assertEqual(self.system.data["atom_types"], []) - def test_coords(self) : - self.assertEqual(self.system.data['coords'], []) + def test_cells(self): + self.assertEqual(self.system.data["cells"], []) - def test_energies(self) : - self.assertEqual(self.system.data['energies'], []) + def test_coords(self): + self.assertEqual(self.system.data["coords"], []) - def test_forces(self) : - self.assertEqual(self.system.data['forces'], []) + def test_energies(self): + self.assertEqual(self.system.data["energies"], []) - def test_virials(self) : - self.assertFalse('virials' in self.system.data) + def test_forces(self): + self.assertEqual(self.system.data["forces"], []) + def test_virials(self): + self.assertFalse("virials" in self.system.data) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_deepmd_comp.py b/tests/test_deepmd_comp.py index 840712af..3b806859 100644 --- a/tests/test_deepmd_comp.py +++ b/tests/test_deepmd_comp.py @@ -1,82 +1,79 @@ -import os,shutil +import os, shutil import numpy as np import unittest from context import dpdata from comp_sys import CompLabeledSys, CompSys, IsPBC + class TestDeepmdLoadDumpComp(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_1.to_deepmd_npy('tmp.deepmd.npy', - prec = np.float64, - set_size = 2) - - self.system_2 = dpdata.LabeledSystem('tmp.deepmd.npy', - fmt = 'deepmd/npy', - type_map = ['O', 'H']) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_1.to_deepmd_npy("tmp.deepmd.npy", prec=np.float64, set_size=2) + + self.system_2 = dpdata.LabeledSystem( + "tmp.deepmd.npy", fmt="deepmd/npy", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.npy'): - shutil.rmtree('tmp.deepmd.npy') - - -class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.h2o.md', - fmt = 'vasp/poscar') - self.system_1.to_deepmd_npy('tmp.deepmd.npy', - prec = np.float64, - set_size = 2) - self.system_2 = dpdata.System('tmp.deepmd.npy', - fmt = 'deepmd/npy', - type_map = ['O', 'H']) + def tearDown(self): + if os.path.exists("tmp.deepmd.npy"): + shutil.rmtree("tmp.deepmd.npy") + + +class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + self.system_1.to_deepmd_npy("tmp.deepmd.npy", prec=np.float64, set_size=2) + self.system_2 = dpdata.System( + "tmp.deepmd.npy", fmt="deepmd/npy", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.npy'): - shutil.rmtree('tmp.deepmd.npy') - + def tearDown(self): + if os.path.exists("tmp.deepmd.npy"): + shutil.rmtree("tmp.deepmd.npy") -class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp(self) : - self.dir_name = 'tmp.deepmd.npy.nol' + +class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.dir_name = "tmp.deepmd.npy.nol" natoms = 3 - atom_names = ['O', 'H'] + atom_names = ["O", "H"] atom_numbs = [1, 2] - atom_types = np.array([0, 1, 1], dtype = np.int32) + atom_types = np.array([0, 1, 1], dtype=np.int32) nframes = 11 half_n = 6 idx = [range(0, half_n), range(half_n, nframes)] - os.makedirs(self.dir_name, exist_ok = True) - os.makedirs(os.path.join(self.dir_name, 'set.000'), exist_ok = True) - os.makedirs(os.path.join(self.dir_name, 'set.001'), exist_ok = True) - np.savetxt(os.path.join(self.dir_name, 'type.raw'), atom_types, fmt = '%d') - + os.makedirs(self.dir_name, exist_ok=True) + os.makedirs(os.path.join(self.dir_name, "set.000"), exist_ok=True) + os.makedirs(os.path.join(self.dir_name, "set.001"), exist_ok=True) + np.savetxt(os.path.join(self.dir_name, "type.raw"), atom_types, fmt="%d") + coords = np.random.random([nframes, natoms, 3]) cells = np.random.random([nframes, 3, 3]) - np.save(os.path.join(self.dir_name, 'set.000', 'coord.npy'), coords[idx[0]]) - np.save(os.path.join(self.dir_name, 'set.000', 'box.npy'), cells [idx[0]]) - np.save(os.path.join(self.dir_name, 'set.001', 'coord.npy'), coords[idx[1]]) - np.save(os.path.join(self.dir_name, 'set.001', 'box.npy'), cells [idx[1]]) - + np.save(os.path.join(self.dir_name, "set.000", "coord.npy"), coords[idx[0]]) + np.save(os.path.join(self.dir_name, "set.000", "box.npy"), cells[idx[0]]) + np.save(os.path.join(self.dir_name, "set.001", "coord.npy"), coords[idx[1]]) + np.save(os.path.join(self.dir_name, "set.001", "box.npy"), cells[idx[1]]) + data = { - 'atom_names' : atom_names, - 'atom_types' : atom_types, - 'atom_numbs' : atom_numbs, - 'coords' : coords, - 'cells' : cells, - 'orig' : np.zeros(3), + "atom_names": atom_names, + "atom_types": atom_types, + "atom_numbs": atom_numbs, + "coords": coords, + "cells": cells, + "orig": np.zeros(3), } - self.system_1 = dpdata.System(self.dir_name, fmt = 'deepmd/npy', type_map = ['O', 'H']) + self.system_1 = dpdata.System( + self.dir_name, fmt="deepmd/npy", type_map=["O", "H"] + ) self.system_2 = dpdata.System() self.system_2.data = data @@ -85,11 +82,10 @@ def setUp(self) : self.f_places = 6 self.v_places = 6 - - def tearDown(self) : + def tearDown(self): if os.path.exists(self.dir_name): shutil.rmtree(self.dir_name) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_deepmd_hdf5.py b/tests/test_deepmd_hdf5.py index 08d25730..24ed4f0d 100644 --- a/tests/test_deepmd_hdf5.py +++ b/tests/test_deepmd_hdf5.py @@ -4,67 +4,68 @@ from context import dpdata from comp_sys import CompLabeledSys, CompSys, IsNoPBC, IsPBC, MultiSystems + class TestDeepmdLoadDumpHDF5(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5', - prec = np.float64, - set_size = 2) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_1.to_deepmd_hdf5("tmp.deepmd.hdf5", prec=np.float64, set_size=2) - self.system_2 = dpdata.LabeledSystem('tmp.deepmd.hdf5', - fmt = 'deepmd/hdf5', - type_map = ['O', 'H']) + self.system_2 = dpdata.LabeledSystem( + "tmp.deepmd.hdf5", fmt="deepmd/hdf5", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.hdf5'): - os.remove('tmp.deepmd.hdf5') + def tearDown(self): + if os.path.exists("tmp.deepmd.hdf5"): + os.remove("tmp.deepmd.hdf5") -class TestDeepmdHDF5NoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.h2o.md', - fmt = 'vasp/poscar') - self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5', - prec = np.float64, - set_size = 2) - self.system_2 = dpdata.System('tmp.deepmd.hdf5', - fmt = 'deepmd/hdf5', - type_map = ['O', 'H']) +class TestDeepmdHDF5NoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + self.system_1.to_deepmd_hdf5("tmp.deepmd.hdf5", prec=np.float64, set_size=2) + self.system_2 = dpdata.System( + "tmp.deepmd.hdf5", fmt="deepmd/hdf5", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.hdf5'): - os.remove('tmp.deepmd.hdf5') + def tearDown(self): + if os.path.exists("tmp.deepmd.hdf5"): + os.remove("tmp.deepmd.hdf5") class TestHDF5Multi(unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC): - def setUp (self): + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) systems = dpdata.MultiSystems(system_1, system_2, system_3) systems.to_deepmd_hdf5("tmp.deepmd.hdf5") self.systems = dpdata.MultiSystems().from_deepmd_hdf5("tmp.deepmd.hdf5") - self.system_names = ['C1H4', 'C1H3'] - self.system_sizes = {'C1H4':2, 'C1H3':1} - self.atom_names = ['C', 'H'] - self.system_1 = self.systems['C1H3'] + self.system_names = ["C1H4", "C1H3"] + self.system_sizes = {"C1H4": 2, "C1H3": 1} + self.atom_names = ["C", "H"] + self.system_1 = self.systems["C1H3"] self.system_2 = system_3 - def tearDown(self) : - if os.path.exists('tmp.deepmd.hdf5'): - os.remove('tmp.deepmd.hdf5') + def tearDown(self): + if os.path.exists("tmp.deepmd.hdf5"): + os.remove("tmp.deepmd.hdf5") diff --git a/tests/test_deepmd_raw.py b/tests/test_deepmd_raw.py index 241da716..5ba54877 100644 --- a/tests/test_deepmd_raw.py +++ b/tests/test_deepmd_raw.py @@ -1,16 +1,16 @@ -import os,shutil +import os, shutil import numpy as np import unittest from context import dpdata from comp_sys import CompLabeledSys, CompSys, IsPBC + class TestDeepmdLoadRaw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_2 = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_2 = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 @@ -18,163 +18,162 @@ def setUp (self) : class TestDeepmdDumpRaw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_1.to_deepmd_raw('tmp.deepmd') - self.system_2 = dpdata.LabeledSystem('tmp.deepmd', type_map = ['O', 'H']) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_1.to_deepmd_raw("tmp.deepmd") + self.system_2 = dpdata.LabeledSystem("tmp.deepmd", type_map=["O", "H"]) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd'): - shutil.rmtree('tmp.deepmd') + def tearDown(self): + if os.path.exists("tmp.deepmd"): + shutil.rmtree("tmp.deepmd") class TestDeepmdTypeMap(unittest.TestCase): - def tearDown(self) : - if os.path.exists('tmp.deepmd'): - shutil.rmtree('tmp.deepmd') - - def test_type_map (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_raw('tmp.deepmd') - with open(os.path.join('tmp.deepmd', 'type_map.raw')) as fp: + def tearDown(self): + if os.path.exists("tmp.deepmd"): + shutil.rmtree("tmp.deepmd") + + def test_type_map(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_raw("tmp.deepmd") + with open(os.path.join("tmp.deepmd", "type_map.raw")) as fp: tm = fp.read().split() - self.assertEqual(tm, ['O', 'H']) - self.assertEqual(system_1['atom_names'], ['O', 'H']) - self.assertEqual(system_1['atom_types'][0], 0) - self.assertEqual(system_1['atom_types'][1], 0) - self.assertEqual(system_1['atom_types'][2], 1) - self.assertEqual(system_1['atom_types'][3], 1) - self.assertEqual(system_1['atom_types'][4], 1) - self.assertEqual(system_1['atom_types'][5], 1) - - def test_type_map_load (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_raw('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd') - self.assertEqual(system_2['atom_names'], ['O', 'H']) - self.assertEqual(system_2['atom_types'][0], 0) - self.assertEqual(system_2['atom_types'][1], 0) - self.assertEqual(system_2['atom_types'][2], 1) - self.assertEqual(system_2['atom_types'][3], 1) - self.assertEqual(system_2['atom_types'][4], 1) - self.assertEqual(system_2['atom_types'][5], 1) - self.assertEqual(system_2['atom_numbs'][0], 2) - self.assertEqual(system_2['atom_numbs'][1], 4) - - def test_type_map_enforce (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_raw('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd', type_map = ['H', 'O']) - self.assertEqual(system_2['atom_names'], ['H', 'O']) - self.assertEqual(system_2['atom_types'][0], 1) - self.assertEqual(system_2['atom_types'][1], 1) - self.assertEqual(system_2['atom_types'][2], 0) - self.assertEqual(system_2['atom_types'][3], 0) - self.assertEqual(system_2['atom_types'][4], 0) - self.assertEqual(system_2['atom_types'][5], 0) - self.assertEqual(system_2['atom_numbs'][0], 4) - self.assertEqual(system_2['atom_numbs'][1], 2) - - def test_npy_type_map (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_npy('tmp.deepmd') - with open(os.path.join('tmp.deepmd', 'type_map.raw')) as fp: + self.assertEqual(tm, ["O", "H"]) + self.assertEqual(system_1["atom_names"], ["O", "H"]) + self.assertEqual(system_1["atom_types"][0], 0) + self.assertEqual(system_1["atom_types"][1], 0) + self.assertEqual(system_1["atom_types"][2], 1) + self.assertEqual(system_1["atom_types"][3], 1) + self.assertEqual(system_1["atom_types"][4], 1) + self.assertEqual(system_1["atom_types"][5], 1) + + def test_type_map_load(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_raw("tmp.deepmd") + system_2 = dpdata.LabeledSystem("tmp.deepmd") + self.assertEqual(system_2["atom_names"], ["O", "H"]) + self.assertEqual(system_2["atom_types"][0], 0) + self.assertEqual(system_2["atom_types"][1], 0) + self.assertEqual(system_2["atom_types"][2], 1) + self.assertEqual(system_2["atom_types"][3], 1) + self.assertEqual(system_2["atom_types"][4], 1) + self.assertEqual(system_2["atom_types"][5], 1) + self.assertEqual(system_2["atom_numbs"][0], 2) + self.assertEqual(system_2["atom_numbs"][1], 4) + + def test_type_map_enforce(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_raw("tmp.deepmd") + system_2 = dpdata.LabeledSystem("tmp.deepmd", type_map=["H", "O"]) + self.assertEqual(system_2["atom_names"], ["H", "O"]) + self.assertEqual(system_2["atom_types"][0], 1) + self.assertEqual(system_2["atom_types"][1], 1) + self.assertEqual(system_2["atom_types"][2], 0) + self.assertEqual(system_2["atom_types"][3], 0) + self.assertEqual(system_2["atom_types"][4], 0) + self.assertEqual(system_2["atom_types"][5], 0) + self.assertEqual(system_2["atom_numbs"][0], 4) + self.assertEqual(system_2["atom_numbs"][1], 2) + + def test_npy_type_map(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_npy("tmp.deepmd") + with open(os.path.join("tmp.deepmd", "type_map.raw")) as fp: tm = fp.read().split() - self.assertEqual(tm, ['O', 'H']) - self.assertEqual(system_1['atom_names'], ['O', 'H']) - self.assertEqual(system_1['atom_types'][0], 0) - self.assertEqual(system_1['atom_types'][1], 0) - self.assertEqual(system_1['atom_types'][2], 1) - self.assertEqual(system_1['atom_types'][3], 1) - self.assertEqual(system_1['atom_types'][4], 1) - self.assertEqual(system_1['atom_types'][5], 1) - - def test_npy_type_map_load (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_npy('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd', fmt = 'deepmd/npy') - self.assertEqual(system_2['atom_names'], ['O', 'H']) - self.assertEqual(system_2['atom_types'][0], 0) - self.assertEqual(system_2['atom_types'][1], 0) - self.assertEqual(system_2['atom_types'][2], 1) - self.assertEqual(system_2['atom_types'][3], 1) - self.assertEqual(system_2['atom_types'][4], 1) - self.assertEqual(system_2['atom_types'][5], 1) - self.assertEqual(system_2['atom_numbs'][0], 2) - self.assertEqual(system_2['atom_numbs'][1], 4) - - def test_npy_type_map_enforce (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_npy('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd', type_map = ['H', 'O'], fmt = 'deepmd/npy') - self.assertEqual(system_2['atom_names'], ['H', 'O']) - self.assertEqual(system_2['atom_types'][0], 1) - self.assertEqual(system_2['atom_types'][1], 1) - self.assertEqual(system_2['atom_types'][2], 0) - self.assertEqual(system_2['atom_types'][3], 0) - self.assertEqual(system_2['atom_types'][4], 0) - self.assertEqual(system_2['atom_types'][5], 0) - self.assertEqual(system_2['atom_numbs'][0], 4) - self.assertEqual(system_2['atom_numbs'][1], 2) - - - - -class TestDeepmdRawNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.h2o.md', - fmt = 'vasp/poscar') - self.system_1.to_deepmd_raw('tmp.deepmd') - self.system_2 = dpdata.System('tmp.deepmd', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + self.assertEqual(tm, ["O", "H"]) + self.assertEqual(system_1["atom_names"], ["O", "H"]) + self.assertEqual(system_1["atom_types"][0], 0) + self.assertEqual(system_1["atom_types"][1], 0) + self.assertEqual(system_1["atom_types"][2], 1) + self.assertEqual(system_1["atom_types"][3], 1) + self.assertEqual(system_1["atom_types"][4], 1) + self.assertEqual(system_1["atom_types"][5], 1) + + def test_npy_type_map_load(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_npy("tmp.deepmd") + system_2 = dpdata.LabeledSystem("tmp.deepmd", fmt="deepmd/npy") + self.assertEqual(system_2["atom_names"], ["O", "H"]) + self.assertEqual(system_2["atom_types"][0], 0) + self.assertEqual(system_2["atom_types"][1], 0) + self.assertEqual(system_2["atom_types"][2], 1) + self.assertEqual(system_2["atom_types"][3], 1) + self.assertEqual(system_2["atom_types"][4], 1) + self.assertEqual(system_2["atom_types"][5], 1) + self.assertEqual(system_2["atom_numbs"][0], 2) + self.assertEqual(system_2["atom_numbs"][1], 4) + + def test_npy_type_map_enforce(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_npy("tmp.deepmd") + system_2 = dpdata.LabeledSystem( + "tmp.deepmd", type_map=["H", "O"], fmt="deepmd/npy" + ) + self.assertEqual(system_2["atom_names"], ["H", "O"]) + self.assertEqual(system_2["atom_types"][0], 1) + self.assertEqual(system_2["atom_types"][1], 1) + self.assertEqual(system_2["atom_types"][2], 0) + self.assertEqual(system_2["atom_types"][3], 0) + self.assertEqual(system_2["atom_types"][4], 0) + self.assertEqual(system_2["atom_types"][5], 0) + self.assertEqual(system_2["atom_numbs"][0], 4) + self.assertEqual(system_2["atom_numbs"][1], 2) + + +class TestDeepmdRawNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + self.system_1.to_deepmd_raw("tmp.deepmd") + self.system_2 = dpdata.System( + "tmp.deepmd", fmt="deepmd/raw", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd'): - shutil.rmtree('tmp.deepmd') + def tearDown(self): + if os.path.exists("tmp.deepmd"): + shutil.rmtree("tmp.deepmd") -class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp(self) : - self.dir_name = 'tmp.deepmd.nol' +class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.dir_name = "tmp.deepmd.nol" natoms = 3 - atom_names = ['O', 'H'] + atom_names = ["O", "H"] atom_numbs = [1, 2] - atom_types = np.array([0, 1, 1], dtype = np.int32) + atom_types = np.array([0, 1, 1], dtype=np.int32) nframes = 11 - os.makedirs(self.dir_name, exist_ok = True) - np.savetxt(os.path.join(self.dir_name, 'type.raw'), atom_types, fmt = '%d') - + os.makedirs(self.dir_name, exist_ok=True) + np.savetxt(os.path.join(self.dir_name, "type.raw"), atom_types, fmt="%d") + coords = np.random.random([nframes, natoms, 3]) cells = np.random.random([nframes, 3, 3]) - np.savetxt(os.path.join(self.dir_name, '', 'coord.raw'), np.reshape(coords, [nframes, -1])) - np.savetxt(os.path.join(self.dir_name, '', 'box.raw'), np.reshape(cells, [nframes, -1])) - + np.savetxt( + os.path.join(self.dir_name, "", "coord.raw"), + np.reshape(coords, [nframes, -1]), + ) + np.savetxt( + os.path.join(self.dir_name, "", "box.raw"), np.reshape(cells, [nframes, -1]) + ) + data = { - 'atom_names' : atom_names, - 'atom_types' : atom_types, - 'atom_numbs' : atom_numbs, - 'coords' : coords, - 'cells' : cells, - 'orig' : np.zeros(3), + "atom_names": atom_names, + "atom_types": atom_types, + "atom_numbs": atom_numbs, + "coords": coords, + "cells": cells, + "orig": np.zeros(3), } - self.system_1 = dpdata.System(self.dir_name, fmt = 'deepmd/raw', type_map = ['O', 'H']) + self.system_1 = dpdata.System( + self.dir_name, fmt="deepmd/raw", type_map=["O", "H"] + ) self.system_2 = dpdata.System() self.system_2.data = data @@ -183,11 +182,10 @@ def setUp(self) : self.f_places = 6 self.v_places = 6 - - def tearDown(self) : + def tearDown(self): if os.path.exists(self.dir_name): shutil.rmtree(self.dir_name) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_elements_index.py b/tests/test_elements_index.py index 23edd5e2..6b924548 100644 --- a/tests/test_elements_index.py +++ b/tests/test_elements_index.py @@ -3,25 +3,28 @@ import unittest from dpdata.system import elements_index_map + class ElementIndexMap(unittest.TestCase): - def test_func1(self): - element=["C","N","H"] - ref={'C': 0, 'N': 1, 'H': 2} - self.assertEqual(ref,elements_index_map(element)) + def test_func1(self): + element = ["C", "N", "H"] + ref = {"C": 0, "N": 1, "H": 2} + self.assertEqual(ref, elements_index_map(element)) + + def test_func2(self): + element = ["C", "N", "H"] + ref = {"H": 0, "C": 1, "N": 2} + self.assertEqual(ref, elements_index_map(element, standard=True)) + + def test_func3(self): + element = ["C", "N", "H"] + ref = {0: "H", 1: "C", 2: "N"} + self.assertEqual(ref, elements_index_map(element, standard=True, inverse=True)) - def test_func2(self): - element=["C","N","H"] - ref={'H': 0, 'C': 1, 'N': 2} - self.assertEqual(ref,elements_index_map(element,standard=True)) + def test_func4(self): + element = ["C", "N", "H"] + ref = {0: "C", 1: "N", 2: "H"} + self.assertEqual(ref, elements_index_map(element, inverse=True)) - def test_func3(self): - element=["C","N","H"] - ref={0: 'H', 1: 'C', 2: 'N'} - self.assertEqual(ref,elements_index_map(element,standard=True,inverse=True)) - def test_func4(self): - element=["C","N","H"] - ref={0: 'C', 1: 'N', 2: 'H'} - self.assertEqual(ref,elements_index_map(element,inverse=True)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_empty.py b/tests/test_empty.py index 2a9fd252..0fd84ca0 100644 --- a/tests/test_empty.py +++ b/tests/test_empty.py @@ -3,29 +3,31 @@ import unittest from context import dpdata + class TestEmptySystem(unittest.TestCase): def test_empty(self): - sys1 = dpdata.System(type_map = ['A', 'H', 'B', 'O', 'D']) - sys2 = dpdata.LabeledSystem(type_map = ['A', 'H', 'B', 'O', 'D']) + sys1 = dpdata.System(type_map=["A", "H", "B", "O", "D"]) + sys2 = dpdata.LabeledSystem(type_map=["A", "H", "B", "O", "D"]) def test_data_empty(self): - data = {'atom_names' : ['A', 'B'], - 'atom_numbs' : [0,0], - 'atom_types' : np.array([], dtype = int), - 'orig': np.array([0, 0, 0]), - 'cells': np.array([]), - 'coords': np.array([]), + data = { + "atom_names": ["A", "B"], + "atom_numbs": [0, 0], + "atom_types": np.array([], dtype=int), + "orig": np.array([0, 0, 0]), + "cells": np.array([]), + "coords": np.array([]), } - sys1 = dpdata.System(data = data) - data = {'atom_names' : ['A', 'B'], - 'atom_numbs' : [0,0], - 'atom_types' : np.array([], dtype = int), - 'orig': np.array([0, 0, 0]), - 'cells': np.array([]), - 'coords': np.array([]), - 'forces': np.array([]), - 'energies': np.array([]), - 'virials': np.array([]), + sys1 = dpdata.System(data=data) + data = { + "atom_names": ["A", "B"], + "atom_numbs": [0, 0], + "atom_types": np.array([], dtype=int), + "orig": np.array([0, 0, 0]), + "cells": np.array([]), + "coords": np.array([]), + "forces": np.array([]), + "energies": np.array([]), + "virials": np.array([]), } - sys2 = dpdata.LabeledSystem(data = data) - + sys2 = dpdata.LabeledSystem(data=data) diff --git a/tests/test_fhi_md_multi_elem_output.py b/tests/test_fhi_md_multi_elem_output.py index 39cc4fb7..dc4cbfcf 100644 --- a/tests/test_fhi_md_multi_elem_output.py +++ b/tests/test_fhi_md_multi_elem_output.py @@ -5,50 +5,57 @@ class TestFhi_aims_MD: def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ["C","H","O","N"]) + self.assertEqual(self.system.data["atom_names"], ["C", "H", "O", "N"]) def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [32,36,8,4]) + self.assertEqual(self.system.data["atom_numbs"], [32, 36, 8, 4]) def test_atom_types(self): - ref_type = [0, 1, 1,] + ref_type = [ + 0, + 1, + 1, + ] ref_type = np.array(ref_type) for ii in range(ref_type.shape[0]): - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) def test_cell(self): - ref_cell=np.loadtxt('fhi_aims/ref_cell_md_m.txt') - ref_cell=ref_cell.flatten() - cells = self.system.data['cells'].flatten() + ref_cell = np.loadtxt("fhi_aims/ref_cell_md_m.txt") + ref_cell = ref_cell.flatten() + cells = self.system.data["cells"].flatten() idx = 0 for ii in range(len(cells)): self.assertAlmostEqual(cells[ii], float(ref_cell[ii])) def test_coord(self): - ref_coord=np.loadtxt('fhi_aims/ref_coord_md_m.txt') - ref_coord=ref_coord.flatten() - coords = self.system.data['coords'].flatten() + ref_coord = np.loadtxt("fhi_aims/ref_coord_md_m.txt") + ref_coord = ref_coord.flatten() + coords = self.system.data["coords"].flatten() for ii in range(len(coords)): self.assertAlmostEqual(coords[ii], float(ref_coord[ii])) def test_force(self): - ref_force=np.loadtxt('fhi_aims/ref_force_md_m.txt') - ref_force=ref_force.flatten() - forces = self.system.data['forces'].flatten() + ref_force = np.loadtxt("fhi_aims/ref_force_md_m.txt") + ref_force = ref_force.flatten() + forces = self.system.data["forces"].flatten() for ii in range(len(forces)): self.assertAlmostEqual(forces[ii], float(ref_force[ii])) def test_energy(self): - ref_energy=np.loadtxt('fhi_aims/ref_energy_md_m.txt') - ref_energy=ref_energy.flatten() - energy = self.system.data['energies'] + ref_energy = np.loadtxt("fhi_aims/ref_energy_md_m.txt") + ref_energy = ref_energy.flatten() + energy = self.system.data["energies"] for ii in range(len(energy)): self.assertAlmostEqual(energy[ii], ref_energy[ii]) class TestFhi_aims_Output(unittest.TestCase, TestFhi_aims_MD): def setUp(self): - self.system = dpdata.LabeledSystem('fhi_aims/output_multi_elements', fmt='fhi_aims/md') + self.system = dpdata.LabeledSystem( + "fhi_aims/output_multi_elements", fmt="fhi_aims/md" + ) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_fhi_md_output.py b/tests/test_fhi_md_output.py index 2b075180..3f945b31 100644 --- a/tests/test_fhi_md_output.py +++ b/tests/test_fhi_md_output.py @@ -5,50 +5,55 @@ class TestFhi_aims_MD: def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ["B","N"]) + self.assertEqual(self.system.data["atom_names"], ["B", "N"]) def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [1,2]) + self.assertEqual(self.system.data["atom_numbs"], [1, 2]) def test_atom_types(self): - ref_type = [0, 1, 1,] + ref_type = [ + 0, + 1, + 1, + ] ref_type = np.array(ref_type) for ii in range(ref_type.shape[0]): - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) def test_cell(self): - ref_cell=np.loadtxt('fhi_aims/ref_cell_md.txt') - ref_cell=ref_cell.flatten() - cells = self.system.data['cells'].flatten() + ref_cell = np.loadtxt("fhi_aims/ref_cell_md.txt") + ref_cell = ref_cell.flatten() + cells = self.system.data["cells"].flatten() idx = 0 for ii in range(len(cells)): self.assertAlmostEqual(cells[ii], float(ref_cell[ii])) def test_coord(self): - ref_coord=np.loadtxt('fhi_aims/ref_coord_md.txt') - ref_coord=ref_coord.flatten() - coords = self.system.data['coords'].flatten() + ref_coord = np.loadtxt("fhi_aims/ref_coord_md.txt") + ref_coord = ref_coord.flatten() + coords = self.system.data["coords"].flatten() for ii in range(len(coords)): self.assertAlmostEqual(coords[ii], float(ref_coord[ii])) def test_force(self): - ref_force=np.loadtxt('fhi_aims/ref_force_md.txt') - ref_force=ref_force.flatten() - forces = self.system.data['forces'].flatten() + ref_force = np.loadtxt("fhi_aims/ref_force_md.txt") + ref_force = ref_force.flatten() + forces = self.system.data["forces"].flatten() for ii in range(len(forces)): self.assertAlmostEqual(forces[ii], float(ref_force[ii])) def test_energy(self): - ref_energy=np.loadtxt('fhi_aims/ref_energy_md.txt') - ref_energy=ref_energy.flatten() - energy = self.system.data['energies'] + ref_energy = np.loadtxt("fhi_aims/ref_energy_md.txt") + ref_energy = ref_energy.flatten() + energy = self.system.data["energies"] for ii in range(len(energy)): self.assertAlmostEqual(energy[ii], ref_energy[ii]) class TestFhi_aims_Output(unittest.TestCase, TestFhi_aims_MD): def setUp(self): - self.system = dpdata.LabeledSystem('fhi_aims/out_md', fmt='fhi_aims/md') + self.system = dpdata.LabeledSystem("fhi_aims/out_md", fmt="fhi_aims/md") -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_fhi_output.py b/tests/test_fhi_output.py index b1ccb730..7c8307ea 100644 --- a/tests/test_fhi_output.py +++ b/tests/test_fhi_output.py @@ -3,72 +3,74 @@ import unittest from context import dpdata + class TestFhi_aims: - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['B','N']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [1, 1]) - def test_atom_types(self) : - ref_type = [0,1] + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["B", "N"]) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [1, 1]) + + def test_atom_types(self): + ref_type = [0, 1] ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + for ii in range(ref_type.shape[0]): + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) - def test_cell(self) : - cell = np.loadtxt('fhi_aims/ref_cell.txt').flatten() - res = self.system.data['cells'][0].flatten() + def test_cell(self): + cell = np.loadtxt("fhi_aims/ref_cell.txt").flatten() + res = self.system.data["cells"][0].flatten() for ii in range(len(cell)): self.assertAlmostEqual(res[ii], cell[ii]) - def test_coord(self) : - coord = np.loadtxt('fhi_aims/ref_coord.txt').flatten() - res = self.system.data['coords'][0].flatten() - for ii in range(len(coord)) : + def test_coord(self): + coord = np.loadtxt("fhi_aims/ref_coord.txt").flatten() + res = self.system.data["coords"][0].flatten() + for ii in range(len(coord)): self.assertAlmostEqual(res[ii], float(coord[ii])) - def test_force(self) : - force = np.loadtxt('fhi_aims/ref_force.txt').flatten() - res = self.system.data['forces'][0].flatten() + def test_force(self): + force = np.loadtxt("fhi_aims/ref_force.txt").flatten() + res = self.system.data["forces"][0].flatten() for ii in range(len(force)): self.assertAlmostEqual(res[ii], float(force[ii])) - # def test_viriale(self) : - # toViri = 1 - # fp = open('fhi_aims/ref_cell') - # cell = [] - # for ii in fp: - # for jj in ii.split(): - # cell.append(float(jj)) - # cell = np.array(cell) - # cells = cell.reshape(3,3) - # fp.close() + # def test_viriale(self) : + # toViri = 1 + # fp = open('fhi_aims/ref_cell') + # cell = [] + # for ii in fp: + # for jj in ii.split(): + # cell.append(float(jj)) + # cell = np.array(cell) + # cells = cell.reshape(3,3) + # fp.close() - # toVol = [] - # for ii in cells: - # ### calucate vol - # toVol.append(np.linalg.det(cells)) + # toVol = [] + # for ii in cells: + # ### calucate vol + # toVol.append(np.linalg.det(cells)) - # fp = open('fhi_aims/ref_virial') - # virial = [] - # for ii in fp: - # for jj in ii.split(): - # virial.append(float(jj) * toViri * toVol[0]) - # virial = np.array(virial) - # fp.close() - # res = self.system.data['virials'][0].flatten() - # for ii in range(len(virial)): - # self.assertAlmostEqual(res[ii], float(virial[ii])) + # fp = open('fhi_aims/ref_virial') + # virial = [] + # for ii in fp: + # for jj in ii.split(): + # virial.append(float(jj) * toViri * toVol[0]) + # virial = np.array(virial) + # fp.close() + # res = self.system.data['virials'][0].flatten() + # for ii in range(len(virial)): + # self.assertAlmostEqual(res[ii], float(virial[ii])) - def test_energy(self) : - ref_energy = -0.215215685892915E+04 - self.assertAlmostEqual(self.system.data['energies'][0], ref_energy,places = 6) + def test_energy(self): + ref_energy = -0.215215685892915e04 + self.assertAlmostEqual(self.system.data["energies"][0], ref_energy, places=6) class TestFhiOutput(unittest.TestCase, TestFhi_aims): - def setUp(self): - self.system = dpdata.LabeledSystem('fhi_aims/out_scf', fmt = 'fhi_aims/scf') + self.system = dpdata.LabeledSystem("fhi_aims/out_scf", fmt="fhi_aims/scf") -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_gaussian_driver.py b/tests/test_gaussian_driver.py index 69d0c42e..f1029ec1 100644 --- a/tests/test_gaussian_driver.py +++ b/tests/test_gaussian_driver.py @@ -9,78 +9,97 @@ @unittest.skipIf(shutil.which("g16") is None, "g16 is not installed") -@unittest.skipIf(importlib.util.find_spec("openbabel") is None, "openbabel is not installed") +@unittest.skipIf( + importlib.util.find_spec("openbabel") is None, "openbabel is not installed" +) class TestGaussianDriver(unittest.TestCase, CompSys, IsNoPBC): """Test Gaussian with a hydrogen ion.""" + @classmethod def setUpClass(cls): - cls.system_1 = dpdata.System(data={ - "atom_names": ["H"], - "atom_numbs": [1], - "atom_types": np.zeros((1,), dtype=int), - "coords": np.zeros((1, 1, 3), dtype=np.float32), - "cells": np.zeros((1, 3, 3), dtype=np.float32), - "orig": np.zeros(3, dtype=np.float32), - "nopbc": True, - }) - cls.system_2 = cls.system_1.predict(keywords="force B3LYP", charge=1, driver="gaussian") + cls.system_1 = dpdata.System( + data={ + "atom_names": ["H"], + "atom_numbs": [1], + "atom_types": np.zeros((1,), dtype=int), + "coords": np.zeros((1, 1, 3), dtype=np.float32), + "cells": np.zeros((1, 3, 3), dtype=np.float32), + "orig": np.zeros(3, dtype=np.float32), + "nopbc": True, + } + ) + cls.system_2 = cls.system_1.predict( + keywords="force B3LYP", charge=1, driver="gaussian" + ) cls.places = 6 - + def test_energy(self): - self.assertAlmostEqual(self.system_2['energies'].ravel()[0], 0.) - + self.assertAlmostEqual(self.system_2["energies"].ravel()[0], 0.0) + def test_forces(self): - forces = self.system_2['forces'] + forces = self.system_2["forces"] np.testing.assert_allclose(forces, np.zeros_like(forces)) class TestMakeGaussian(unittest.TestCase): """This class will not check if the output is correct, but only see if there is any errors.""" + def setUp(self): - self.system = dpdata.System(data={ - "atom_names": ["H"], - "atom_numbs": [1], - "atom_types": np.zeros((1,), dtype=int), - "coords": np.zeros((1, 1, 3), dtype=np.float32), - "cells": np.zeros((1, 3, 3), dtype=np.float32), - "orig": np.zeros(3, dtype=np.float32), - "nopbc": True, - }) - - @unittest.skipIf(importlib.util.find_spec("openbabel") is None, "requires openbabel") + self.system = dpdata.System( + data={ + "atom_names": ["H"], + "atom_numbs": [1], + "atom_types": np.zeros((1,), dtype=int), + "coords": np.zeros((1, 1, 3), dtype=np.float32), + "cells": np.zeros((1, 3, 3), dtype=np.float32), + "orig": np.zeros(3, dtype=np.float32), + "nopbc": True, + } + ) + + @unittest.skipIf( + importlib.util.find_spec("openbabel") is None, "requires openbabel" + ) def test_make_fp_gaussian(self): self.system.to_gaussian_gjf("gaussian/tmp.gjf", keywords="wb97x/6-31g* force") def test_make_fp_gaussian_multiplicity_one(self): - self.system.to_gaussian_gjf("gaussian/tmp.gjf", keywords="wb97x/6-31g* force", multiplicity=1) + self.system.to_gaussian_gjf( + "gaussian/tmp.gjf", keywords="wb97x/6-31g* force", multiplicity=1 + ) def test_detect_multiplicity(self): # oxygen O2 3 - self._check_multiplicity(['O', 'O'], 3) + self._check_multiplicity(["O", "O"], 3) # methane CH4 1 - self._check_multiplicity(['C', 'H', 'H', 'H', 'H'], 1) + self._check_multiplicity(["C", "H", "H", "H", "H"], 1) # CH3 2 - self._check_multiplicity(['C', 'H', 'H', 'H'], 2) + self._check_multiplicity(["C", "H", "H", "H"], 2) # CH2 1 - self._check_multiplicity(['C', 'H', 'H'], 1) + self._check_multiplicity(["C", "H", "H"], 1) # CH 2 - self._check_multiplicity(['C', 'H'], 2) + self._check_multiplicity(["C", "H"], 2) def _check_multiplicity(self, symbols, multiplicity): - self.assertEqual(dpdata.gaussian.gjf.detect_multiplicity(np.array(symbols)), multiplicity) + self.assertEqual( + dpdata.gaussian.gjf.detect_multiplicity(np.array(symbols)), multiplicity + ) def tearDown(self): - if os.path.exists('gaussian/tmp.gjf'): - os.remove('gaussian/tmp.gjf') + if os.path.exists("gaussian/tmp.gjf"): + os.remove("gaussian/tmp.gjf") class TestDumpGaussianGjf(unittest.TestCase): def setUp(self): - self.system = dpdata.LabeledSystem('gaussian/methane.gaussianlog', - fmt='gaussian/log') + self.system = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) def test_dump_to_gjf(self): - self.system.to_gaussian_gjf("gaussian/tmp.gjf", keywords="force B3LYP/6-31G(d)", multiplicity=1) + self.system.to_gaussian_gjf( + "gaussian/tmp.gjf", keywords="force B3LYP/6-31G(d)", multiplicity=1 + ) with open("gaussian/tmp.gjf") as f: f.readline() header = f.readline().strip() @@ -99,11 +118,11 @@ def test_dump_to_gjf(self): self.assertEqual(title, self.system.formula) self.assertEqual(charge, 0) self.assertEqual(mult, 1) - self.assertEqual(atoms, ['C', 'H', 'H', 'H', 'H']) - for i in range(self.system['coords'].shape[1]): + self.assertEqual(atoms, ["C", "H", "H", "H", "H"]) + for i in range(self.system["coords"].shape[1]): for j in range(3): - self.assertAlmostEqual(coords[i][j], self.system['coords'][0][i][j]) + self.assertAlmostEqual(coords[i][j], self.system["coords"][0][i][j]) def tearDown(self): - if os.path.exists('gaussian/tmp.gjf'): - os.remove('gaussian/tmp.gjf') + if os.path.exists("gaussian/tmp.gjf"): + os.remove("gaussian/tmp.gjf") diff --git a/tests/test_gaussian_gjf.py b/tests/test_gaussian_gjf.py index 350b5025..24cb56bd 100644 --- a/tests/test_gaussian_gjf.py +++ b/tests/test_gaussian_gjf.py @@ -5,10 +5,9 @@ class TestGaussianGJF(unittest.TestCase): - def setUp (self) : - self.system = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - + def setUp(self): + self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + def test_dump_gaussian_gjf(self): - self.system.to_gaussian_gjf('tmp.gjf', keywords="force b3lyp/6-31g*") - os.remove('tmp.gjf') + self.system.to_gaussian_gjf("tmp.gjf", keywords="force b3lyp/6-31g*") + os.remove("tmp.gjf") diff --git a/tests/test_gaussian_log.py b/tests/test_gaussian_log.py index e52f9307..8d7bec81 100644 --- a/tests/test_gaussian_log.py +++ b/tests/test_gaussian_log.py @@ -3,94 +3,105 @@ import unittest from context import dpdata -class TestGaussianLog : - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], self.atom_names) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], self.atom_numbs) - +class TestGaussianLog: + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], self.atom_names) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], self.atom_numbs) + def test_nframes(self): self.assertEqual(len(self.system), self.nframes) - def test_atom_types(self) : - for ii in range(len(self.atom_types)) : - self.assertEqual(self.system.data['atom_types'][ii], self.atom_types[ii]) + def test_atom_types(self): + for ii in range(len(self.atom_types)): + self.assertEqual(self.system.data["atom_types"][ii], self.atom_types[ii]) def test_nopbc(self): self.assertEqual(self.system.nopbc, True) + class TestGaussianLoadLog(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/methane.gaussianlog', - fmt = 'gaussian/log') - self.atom_names = ['C','H'] + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + self.atom_names = ["C", "H"] self.atom_numbs = [1, 4] self.nframes = 1 self.atom_types = [0, 1, 1, 1, 1] + class TestGaussianLoadLargeForceLog(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/largeforce.gaussianlog', - fmt = 'gaussian/log') - self.atom_names = ['C','H','O','S'] - self.atom_numbs = [33 , 65, 22, 6] + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/largeforce.gaussianlog", fmt="gaussian/log" + ) + self.atom_names = ["C", "H", "O", "S"] + self.atom_numbs = [33, 65, 22, 6] self.nframes = 1 self.atom_types = [0] * 33 + [2] * 22 + [1] * 65 + [3] * 6 - + + class TestGaussianLoadMD(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/aimd_gaussian_CH4_output', - fmt = 'gaussian/md') - self.atom_names = ['C','H'] + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/aimd_gaussian_CH4_output", fmt="gaussian/md" + ) + self.atom_names = ["C", "H"] self.atom_numbs = [1, 4] self.nframes = 22 self.atom_types = [1, 1, 1, 1, 0] class TestNonCoveragedGaussianLoadLog(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', - fmt = 'gaussian/log') + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) self.atom_names = [] self.atom_numbs = [] self.nframes = 0 - - def test_atom_types(self) : - self.assertEqual(self.system.data['atom_types'], []) - def test_cells(self) : - self.assertEqual(self.system.data['cells'], []) + def test_atom_types(self): + self.assertEqual(self.system.data["atom_types"], []) + + def test_cells(self): + self.assertEqual(self.system.data["cells"], []) - def test_coords(self) : - self.assertEqual(self.system.data['coords'], []) + def test_coords(self): + self.assertEqual(self.system.data["coords"], []) - def test_energies(self) : - self.assertEqual(self.system.data['energies'], []) + def test_energies(self): + self.assertEqual(self.system.data["energies"], []) - def test_forces(self) : - self.assertEqual(self.system.data['forces'], []) + def test_forces(self): + self.assertEqual(self.system.data["forces"], []) - def test_virials(self) : - self.assertFalse('virials' in self.system.data) + def test_virials(self): + self.assertFalse("virials" in self.system.data) class TestGaussianLoadPBCLog(unittest.TestCase, TestGaussianLog): """PBC.""" - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/h2pbc.gaussianlog', - fmt = 'gaussian/log') - self.atom_names = ['H'] + + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/h2pbc.gaussianlog", fmt="gaussian/log" + ) + self.atom_names = ["H"] self.atom_numbs = [2] self.nframes = 1 self.atom_types = [0, 0] self.cells = (np.eye(3) * 10.0).reshape(1, 3, 3) - def test_cells(self) : - self.assertTrue(np.allclose(self.system.data['cells'], self.cells)) + def test_cells(self): + self.assertTrue(np.allclose(self.system.data["cells"], self.cells)) def test_nopbc(self): self.assertEqual(self.system.nopbc, False) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_gromacs_gro.py b/tests/test_gromacs_gro.py index 16070448..d2553809 100644 --- a/tests/test_gromacs_gro.py +++ b/tests/test_gromacs_gro.py @@ -3,111 +3,152 @@ import unittest from context import dpdata + class TestGromacsGro(unittest.TestCase): def test_read_file(self): - system = dpdata.System('gromacs/1h.gro', type_map=['H', 'O']) - self.assertTrue('H' in system['atom_names']) - self.assertTrue('O' in system['atom_names']) - self.assertEqual(system['atom_numbs'], [6, 3]) - for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): - self.assertEqual(system['atom_types'][cc], ii) - self.assertEqual(len(system['cells']), 1) - self.assertEqual(len(system['coords']), 1) + system = dpdata.System("gromacs/1h.gro", type_map=["H", "O"]) + self.assertTrue("H" in system["atom_names"]) + self.assertTrue("O" in system["atom_names"]) + self.assertEqual(system["atom_numbs"], [6, 3]) + for cc, ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): + self.assertEqual(system["atom_types"][cc], ii) + self.assertEqual(len(system["cells"]), 1) + self.assertEqual(len(system["coords"]), 1) for ii in range(3): for jj in range(3): if ii != jj: - self.assertAlmostEqual(system['cells'][0][ii][jj], 0) - self.assertAlmostEqual(system['cells'][0][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][0][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][0][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][0][8][0], 7.43) - self.assertAlmostEqual(system['coords'][0][8][1], 5.12) - self.assertAlmostEqual(system['coords'][0][8][2], 3.36) + self.assertAlmostEqual(system["cells"][0][ii][jj], 0) + self.assertAlmostEqual(system["cells"][0][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][0][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][0][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][0][8][0], 7.43) + self.assertAlmostEqual(system["coords"][0][8][1], 5.12) + self.assertAlmostEqual(system["coords"][0][8][2], 3.36) def test_read_file_tri(self): - system = dpdata.System('gromacs/1h.tri.gro', type_map=['H', 'O']) - self.assertTrue('H' in system['atom_names']) - self.assertTrue('O' in system['atom_names']) - self.assertEqual(system['atom_numbs'], [6, 3]) - for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): - self.assertEqual(system['atom_types'][cc], ii) - self.assertEqual(len(system['cells']), 1) - self.assertEqual(len(system['coords']), 1) + system = dpdata.System("gromacs/1h.tri.gro", type_map=["H", "O"]) + self.assertTrue("H" in system["atom_names"]) + self.assertTrue("O" in system["atom_names"]) + self.assertEqual(system["atom_numbs"], [6, 3]) + for cc, ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): + self.assertEqual(system["atom_types"][cc], ii) + self.assertEqual(len(system["cells"]), 1) + self.assertEqual(len(system["coords"]), 1) count = 0 for ii in range(3): for jj in range(3): if ii != jj: - self.assertAlmostEqual(system['cells'][0][ii][jj], count) + self.assertAlmostEqual(system["cells"][0][ii][jj], count) count += 1 - self.assertAlmostEqual(system['cells'][0][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][0][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][0][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][0][8][0], 7.43) - self.assertAlmostEqual(system['coords'][0][8][1], 5.12) - self.assertAlmostEqual(system['coords'][0][8][2], 3.36) - system.to('vasp/poscar', 'POSCAR') + self.assertAlmostEqual(system["cells"][0][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][0][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][0][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][0][8][0], 7.43) + self.assertAlmostEqual(system["coords"][0][8][1], 5.12) + self.assertAlmostEqual(system["coords"][0][8][2], 3.36) + system.to("vasp/poscar", "POSCAR") + class TestGromacsGroMultiFrames(unittest.TestCase): def test_read_file(self): - system = dpdata.System('gromacs/multi_frames.gro', type_map=['H', 'O']) - self.assertTrue('H' in system['atom_names']) - self.assertTrue('O' in system['atom_names']) - self.assertEqual(system['atom_numbs'], [6, 3]) - for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): - self.assertEqual(system['atom_types'][cc], ii) - self.assertEqual(len(system['cells']), 2) - self.assertEqual(len(system['coords']), 2) + system = dpdata.System("gromacs/multi_frames.gro", type_map=["H", "O"]) + self.assertTrue("H" in system["atom_names"]) + self.assertTrue("O" in system["atom_names"]) + self.assertEqual(system["atom_numbs"], [6, 3]) + for cc, ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): + self.assertEqual(system["atom_types"][cc], ii) + self.assertEqual(len(system["cells"]), 2) + self.assertEqual(len(system["coords"]), 2) for ii in range(3): for jj in range(3): if ii != jj: - self.assertAlmostEqual(system['cells'][0][ii][jj], 0) # frame no.1 - self.assertAlmostEqual(system['cells'][1][ii][jj], 0) # frame no.2 + self.assertAlmostEqual(system["cells"][0][ii][jj], 0) # frame no.1 + self.assertAlmostEqual(system["cells"][1][ii][jj], 0) # frame no.2 # frame no.1 - self.assertAlmostEqual(system['cells'][0][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][0][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][0][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][0][8][0], 7.43) - self.assertAlmostEqual(system['coords'][0][8][1], 5.12) - self.assertAlmostEqual(system['coords'][0][8][2], 3.36) + self.assertAlmostEqual(system["cells"][0][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][0][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][0][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][0][8][0], 7.43) + self.assertAlmostEqual(system["coords"][0][8][1], 5.12) + self.assertAlmostEqual(system["coords"][0][8][2], 3.36) # frame no.2 - self.assertAlmostEqual(system['cells'][1][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][1][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][1][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][1][8][0], 7.43) - self.assertAlmostEqual(system['coords'][1][8][1], 5.12) - self.assertAlmostEqual(system['coords'][1][8][2], 3.36) + self.assertAlmostEqual(system["cells"][1][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][1][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][1][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][1][8][0], 7.43) + self.assertAlmostEqual(system["coords"][1][8][1], 5.12) + self.assertAlmostEqual(system["coords"][1][8][2], 3.36) class TestFormatAtomName(unittest.TestCase): def test_format_atom_name(self): - system = dpdata.System("gromacs/case_for_format_atom_name.gro", fmt='gromacs/gro', type_map=['H','C','N','O','Cl']) + system = dpdata.System( + "gromacs/case_for_format_atom_name.gro", + fmt="gromacs/gro", + type_map=["H", "C", "N", "O", "Cl"], + ) self.assertEqual(system.formula, "H11C14N3O2Cl2") - + def test_no_format_atom_name(self): - system = dpdata.System("gromacs/case_for_format_atom_name.gro", fmt='gromacs/gro', format_atom_name=False) - atoms = ['CL1', 'H6', 'C4', 'C3', 'C6', 'C11', 'H10', 'C2', 'N3', 'C14', - 'H7', 'H8', 'C13', 'H2', 'H1', 'H4', 'O2', 'H9', 'O1', 'N2', 'C9', - 'H3', 'C5', 'H11', 'N1', 'C7', 'C10', 'CL2', 'H5', 'C1', 'C8','C12'] + system = dpdata.System( + "gromacs/case_for_format_atom_name.gro", + fmt="gromacs/gro", + format_atom_name=False, + ) + atoms = [ + "CL1", + "H6", + "C4", + "C3", + "C6", + "C11", + "H10", + "C2", + "N3", + "C14", + "H7", + "H8", + "C13", + "H2", + "H1", + "H4", + "O2", + "H9", + "O1", + "N2", + "C9", + "H3", + "C5", + "H11", + "N1", + "C7", + "C10", + "CL2", + "H5", + "C1", + "C8", + "C12", + ] for at in atoms: - self.assertTrue(at in system['atom_names']) + self.assertTrue(at in system["atom_names"]) class TestDumpGromacsGro(unittest.TestCase): def setUp(self): - self.system = dpdata.System('gromacs/multi_frames.gro', type_map=['H', 'O']) - + self.system = dpdata.System("gromacs/multi_frames.gro", type_map=["H", "O"]) + def test_dump_single_frame(self): - self.system.to_gromacs_gro('gromacs/tmp_1.gro', frame_idx=0) - tmp = dpdata.System('gromacs/tmp_1.gro', type_map=['H', 'O']) - self.assertEqual(tmp.get_nframes(), 1) + self.system.to_gromacs_gro("gromacs/tmp_1.gro", frame_idx=0) + tmp = dpdata.System("gromacs/tmp_1.gro", type_map=["H", "O"]) + self.assertEqual(tmp.get_nframes(), 1) def test_dump_multi_frames(self): - self.system.to_gromacs_gro('gromacs/tmp_2.gro') - tmp = dpdata.System('gromacs/tmp_2.gro', type_map=['H', 'O']) + self.system.to_gromacs_gro("gromacs/tmp_2.gro") + tmp = dpdata.System("gromacs/tmp_2.gro", type_map=["H", "O"]) self.assertEqual(tmp.get_nframes(), 2) - + def tearDown(self): - if os.path.exists('gromacs/tmp_1.gro'): - os.remove('gromacs/tmp_1.gro') - if os.path.exists('gromacs/tmp_2.gro'): - os.remove('gromacs/tmp_2.gro') + if os.path.exists("gromacs/tmp_1.gro"): + os.remove("gromacs/tmp_1.gro") + if os.path.exists("gromacs/tmp_2.gro"): + os.remove("gromacs/tmp_2.gro") diff --git a/tests/test_json.py b/tests/test_json.py index 98be1404..7337d682 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -4,25 +4,26 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestJsonLoad(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_2 = dpdata.LabeledSystem.load('poscars/h2o.md.json') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_2 = dpdata.LabeledSystem.load("poscars/h2o.md.json") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestAsDict(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") self.system_2 = dpdata.LabeledSystem.from_dict(self.system_1.as_dict()) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_lammps_dump_idx.py b/tests/test_lammps_dump_idx.py index 7f834e81..110f4bc6 100644 --- a/tests/test_lammps_dump_idx.py +++ b/tests/test_lammps_dump_idx.py @@ -5,17 +5,22 @@ import unittest from context import dpdata + class TestLmpDumpIdx(unittest.TestCase): def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf2.dump')) - - def test_coords(self): - np.testing.assert_allclose(self.system['coords'], np.array( - [[[0., 0., 0.], - [1.2621856, 0.7018028, 0.5513885]]] - )) - + self.system = dpdata.System(os.path.join("poscars", "conf2.dump")) + + def test_coords(self): + np.testing.assert_allclose( + self.system["coords"], + np.array([[[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]]), + ) + def test_type(self): - np.testing.assert_allclose(self.system.get_atom_types(), np.array( - [1, 0], dtype=int, - )) + np.testing.assert_allclose( + self.system.get_atom_types(), + np.array( + [1, 0], + dtype=int, + ), + ) diff --git a/tests/test_lammps_dump_shift_origin.py b/tests/test_lammps_dump_shift_origin.py index 00754d1a..eec6b4f3 100644 --- a/tests/test_lammps_dump_shift_origin.py +++ b/tests/test_lammps_dump_shift_origin.py @@ -4,11 +4,13 @@ from context import dpdata from comp_sys import CompSys, IsPBC + class TestLammpsDumpShiftOrigin(unittest.TestCase, CompSys, IsPBC): - def setUp (self): - self.system_1 = dpdata.System('poscars/shift_origin.dump', fmt = 'lammps/dump')[0] - self.system_2 = dpdata.System('poscars/shift_origin.dump', fmt = 'lammps/dump')[1] + def setUp(self): + self.system_1 = dpdata.System("poscars/shift_origin.dump", fmt="lammps/dump")[0] + self.system_2 = dpdata.System("poscars/shift_origin.dump", fmt="lammps/dump")[1] self.places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_lammps_dump_skipload.py b/tests/test_lammps_dump_skipload.py index 9e6502f6..d604607a 100644 --- a/tests/test_lammps_dump_skipload.py +++ b/tests/test_lammps_dump_skipload.py @@ -4,18 +4,15 @@ from context import dpdata from comp_sys import CompSys, IsPBC + class TestLmpDumpSkip(unittest.TestCase, CompSys, IsPBC): - - def setUp(self): - self.system_1 = dpdata.System(os.path.join('poscars', 'conf.5.dump'), - type_map = ['O', 'H'], - begin = 1, - step = 2) - self.system_2 = dpdata.System(os.path.join('poscars', 'conf.5.dump'), - type_map = ['O', 'H'], - begin = 0, - step = 1) \ - .sub_system(np.arange(1,5,2)) + def setUp(self): + self.system_1 = dpdata.System( + os.path.join("poscars", "conf.5.dump"), type_map=["O", "H"], begin=1, step=2 + ) + self.system_2 = dpdata.System( + os.path.join("poscars", "conf.5.dump"), type_map=["O", "H"], begin=0, step=1 + ).sub_system(np.arange(1, 5, 2)) self.places = 6 self.e_places = 6 self.f_places = 6 diff --git a/tests/test_lammps_dump_to_system.py b/tests/test_lammps_dump_to_system.py index b63b3af8..739f4e43 100644 --- a/tests/test_lammps_dump_to_system.py +++ b/tests/test_lammps_dump_to_system.py @@ -2,25 +2,26 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf.dump'), - type_map = ['O', 'H']) - + def setUp(self): + self.system = dpdata.System( + os.path.join("poscars", "conf.dump"), type_map=["O", "H"] + ) + + class TestDump2(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.tmp_system = dpdata.System(os.path.join('poscars', 'conf.dump'), - type_map = ['O', 'H']) + def setUp(self): + self.tmp_system = dpdata.System( + os.path.join("poscars", "conf.dump"), type_map=["O", "H"] + ) self.system = self.tmp_system.sub_system([1]) - def test_nframes (self) : + def test_nframes(self): self.assertEqual(self.tmp_system.get_nframes(), 2) - - -if __name__ == '__main__': + + +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_dump_unfold.py b/tests/test_lammps_dump_unfold.py index 68aa2c8b..bce95f7b 100644 --- a/tests/test_lammps_dump_unfold.py +++ b/tests/test_lammps_dump_unfold.py @@ -2,35 +2,36 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf_unfold.dump'), - type_map = ['O', 'H']) - + def setUp(self): + self.system = dpdata.System( + os.path.join("poscars", "conf_unfold.dump"), type_map=["O", "H"] + ) + + class TestDump2(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.tmp_system = dpdata.System(os.path.join('poscars', 'conf_unfold.dump'), - type_map = ['O', 'H']) + def setUp(self): + self.tmp_system = dpdata.System( + os.path.join("poscars", "conf_unfold.dump"), type_map=["O", "H"] + ) self.system = self.tmp_system.sub_system([1]) - def test_nframes (self) : + def test_nframes(self): self.assertEqual(self.tmp_system.get_nframes(), 2) - - + + class TestDumpUnwrap(unittest.TestCase, TestPOSCARoh): def setUp(self): self.unwrap = True self.system = dpdata.System( - os.path.join('poscars', 'conf_unfold.dump'), - type_map=['O', 'H'], + os.path.join("poscars", "conf_unfold.dump"), + type_map=["O", "H"], unwrap=self.unwrap, ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_lmp_dump.py b/tests/test_lammps_lmp_dump.py index f3354648..2ded5e44 100644 --- a/tests/test_lammps_lmp_dump.py +++ b/tests/test_lammps_lmp_dump.py @@ -2,28 +2,28 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestLmpDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - tmp_system = dpdata.System(os.path.join('poscars', 'conf.lmp'), - type_map = ['O', 'H']) - tmp_system.to_lammps_lmp('tmp.lmp') + def setUp(self): + tmp_system = dpdata.System( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to_lammps_lmp("tmp.lmp") self.system = dpdata.System() - self.system.from_lammps_lmp('tmp.lmp', - type_map = ['O', 'H']) + self.system.from_lammps_lmp("tmp.lmp", type_map=["O", "H"]) + class TestToFunc(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - tmp_system = dpdata.System(os.path.join('poscars', 'conf.lmp'), - type_map = ['O', 'H']) - tmp_system.to('lammps/lmp', 'tmp.lmp') + def setUp(self): + tmp_system = dpdata.System( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to("lammps/lmp", "tmp.lmp") self.system = dpdata.System() - self.system.from_fmt('tmp.lmp', fmt='lammps/lmp', - type_map = ['O', 'H']) + self.system.from_fmt("tmp.lmp", fmt="lammps/lmp", type_map=["O", "H"]) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_lmp_to_system.py b/tests/test_lammps_lmp_to_system.py index 53402723..ea8d2157 100644 --- a/tests/test_lammps_lmp_to_system.py +++ b/tests/test_lammps_lmp_to_system.py @@ -2,15 +2,16 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestLmp(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), - type_map = ['O', 'H']) - -if __name__ == '__main__': + self.system.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + + +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_read_from_trajs.py b/tests/test_lammps_read_from_trajs.py index 338ea1c3..128aedb7 100644 --- a/tests/test_lammps_read_from_trajs.py +++ b/tests/test_lammps_read_from_trajs.py @@ -3,43 +3,130 @@ import unittest from context import dpdata + class TestLmpReadFromTrajsWithRandomTypeId(unittest.TestCase): - - def setUp(self): - self.system = \ - dpdata.System(os.path.join('lammps', 'traj_with_random_type_id.dump'), fmt = 'lammps/dump', type_map = ["Ta","Nb","W","Mo","V","Al"]) - - def test_nframes (self) : + def setUp(self): + self.system = dpdata.System( + os.path.join("lammps", "traj_with_random_type_id.dump"), + fmt="lammps/dump", + type_map=["Ta", "Nb", "W", "Mo", "V", "Al"], + ) + + def test_nframes(self): self.system.sort_atom_types() - atype = self.system['atom_types'].tolist() + atype = self.system["atom_types"].tolist() self.assertTrue(atype == [1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5]) - - coord = self.system['coords'].reshape([2, -1]) - - coord0_std = np.array([6.69832 , 3.39136 , 3.34005 , 1.80744 , 5.08708 , 5.01099 , - 5.10512 , 5.08007 , 5.01272 , 1.70086 , 1.69544 , 1.66979 , - 3.48873 , 0.0697213, 6.67774 , 3.38621 , 0.033338 , 3.34239 , - 1.79424 , 1.7281 , 5.01015 , 3.48973 , 3.42896 , 6.67795 , - 3.40064 , 3.39148 , 3.34188 , 5.09069 , 1.72876 , 5.00917 , - 0.119885 , 6.74841 , 3.33869 , 4.99379 , 1.69262 , 1.67183 , - 0.199838 , 3.4185 , 6.67565 , 1.7213 , 5.05235 , 1.66373 , - 0.21494 , 6.77616 , 6.67623 , 5.00691 , 5.05 , 1.66532 ]) + + coord = self.system["coords"].reshape([2, -1]) + + coord0_std = np.array( + [ + 6.69832, + 3.39136, + 3.34005, + 1.80744, + 5.08708, + 5.01099, + 5.10512, + 5.08007, + 5.01272, + 1.70086, + 1.69544, + 1.66979, + 3.48873, + 0.0697213, + 6.67774, + 3.38621, + 0.033338, + 3.34239, + 1.79424, + 1.7281, + 5.01015, + 3.48973, + 3.42896, + 6.67795, + 3.40064, + 3.39148, + 3.34188, + 5.09069, + 1.72876, + 5.00917, + 0.119885, + 6.74841, + 3.33869, + 4.99379, + 1.69262, + 1.67183, + 0.199838, + 3.4185, + 6.67565, + 1.7213, + 5.05235, + 1.66373, + 0.21494, + 6.77616, + 6.67623, + 5.00691, + 5.05, + 1.66532, + ] + ) self.assertTrue(np.allclose(coord[0, ...], coord0_std)) - coord1_std = np.array([4.85582828e+00, 5.12324490e+00, 1.55763728e+00, 1.82031828e+00, - 1.61210490e+00, 4.91329728e+00, 5.15568828e+00, 4.91296490e+00, - 5.02114728e+00, 1.67640828e+00, 1.62756490e+00, 1.61183728e+00, - 3.41785828e+00, 6.54050490e+00, 3.42793728e+00, 3.39324828e+00, - 3.47558490e+00, 6.50564728e+00, 3.43286828e+00, 3.44029490e+00, - 3.37871728e+00, 6.60497828e+00, 3.46782490e+00, 3.42949728e+00, - 1.82021828e+00, 5.08114490e+00, 4.93158728e+00, 5.20431828e+00, - 1.80972490e+00, 5.00061728e+00, 6.56278828e+00, 6.62718490e+00, - 3.35101728e+00, 4.97045828e+00, 1.80536490e+00, 1.73358728e+00, - 6.61765828e+00, 3.43486490e+00, 6.48447728e+00, 1.57899828e+00, - 4.89261490e+00, 1.63632728e+00, 6.59585828e+00, 1.40657901e-01, - 6.51767728e+00, 3.30914005e+00, 7.86399766e-02, 6.66581642e-04]) + coord1_std = np.array( + [ + 4.85582828e00, + 5.12324490e00, + 1.55763728e00, + 1.82031828e00, + 1.61210490e00, + 4.91329728e00, + 5.15568828e00, + 4.91296490e00, + 5.02114728e00, + 1.67640828e00, + 1.62756490e00, + 1.61183728e00, + 3.41785828e00, + 6.54050490e00, + 3.42793728e00, + 3.39324828e00, + 3.47558490e00, + 6.50564728e00, + 3.43286828e00, + 3.44029490e00, + 3.37871728e00, + 6.60497828e00, + 3.46782490e00, + 3.42949728e00, + 1.82021828e00, + 5.08114490e00, + 4.93158728e00, + 5.20431828e00, + 1.80972490e00, + 5.00061728e00, + 6.56278828e00, + 6.62718490e00, + 3.35101728e00, + 4.97045828e00, + 1.80536490e00, + 1.73358728e00, + 6.61765828e00, + 3.43486490e00, + 6.48447728e00, + 1.57899828e00, + 4.89261490e00, + 1.63632728e00, + 6.59585828e00, + 1.40657901e-01, + 6.51767728e00, + 3.30914005e00, + 7.86399766e-02, + 6.66581642e-04, + ] + ) self.assertTrue(np.allclose(coord[1, ...], coord1_std)) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_msd.py b/tests/test_msd.py index 509e5c30..fd62d13c 100644 --- a/tests/test_msd.py +++ b/tests/test_msd.py @@ -3,35 +3,29 @@ import unittest from context import dpdata -class TestMSD (unittest.TestCase) : - def setUp(self) : + +class TestMSD(unittest.TestCase): + def setUp(self): self.system = dpdata.System() - self.system.data['atom_types'] = np.array([0,1]) - self.system.data['atom_names'] = ['O', 'H'] + self.system.data["atom_types"] = np.array([0, 1]) + self.system.data["atom_names"] = ["O", "H"] nframes = 10 cell_size = 5 - self.system.data['cells'] = np.tile(cell_size * np.eye(3), - (nframes,1,1)) - self.system.data['coords'] = np.zeros([nframes, 2, 3]) - for ff in range(nframes) : - self.system.data['coords'][ff][0] = 1.0 * ff * np.array([1,0,0]) - self.system.data['coords'][ff][1] = 2.0 * ff * np.array([1,0,0]) - self.system.data['coords'] = self.system.data['coords'] % cell_size - - def test_msd(self) : + self.system.data["cells"] = np.tile(cell_size * np.eye(3), (nframes, 1, 1)) + self.system.data["coords"] = np.zeros([nframes, 2, 3]) + for ff in range(nframes): + self.system.data["coords"][ff][0] = 1.0 * ff * np.array([1, 0, 0]) + self.system.data["coords"][ff][1] = 2.0 * ff * np.array([1, 0, 0]) + self.system.data["coords"] = self.system.data["coords"] % cell_size + + def test_msd(self): # print(self.system['atom_types'] == 0) msd = dpdata.md.msd.msd(self.system) - msd0 = dpdata.md.msd.msd(self.system, self.system['atom_types'] == 0) - msd1 = dpdata.md.msd.msd(self.system, self.system['atom_types'] == 1) + msd0 = dpdata.md.msd.msd(self.system, self.system["atom_types"] == 0) + msd1 = dpdata.md.msd.msd(self.system, self.system["atom_types"] == 1) # print(msd) ncomp = msd.shape[0] - for ii in range(ncomp) : - self.assertAlmostEqual(msd0[ii], - ii * ii, - msg = 'msd0[%d]' % ii) - self.assertAlmostEqual(msd1[ii], - ii * ii * 4, - msg = 'msd1[%d]' % ii) - self.assertAlmostEqual(msd[ii], - (msd0[ii]+msd1[ii]) * 0.5, - 'msd[%d]' % ii) + for ii in range(ncomp): + self.assertAlmostEqual(msd0[ii], ii * ii, msg="msd0[%d]" % ii) + self.assertAlmostEqual(msd1[ii], ii * ii * 4, msg="msd1[%d]" % ii) + self.assertAlmostEqual(msd[ii], (msd0[ii] + msd1[ii]) * 0.5, "msd[%d]" % ii) diff --git a/tests/test_multisystems.py b/tests/test_multisystems.py index 6e05861b..689b1a86 100644 --- a/tests/test_multisystems.py +++ b/tests/test_multisystems.py @@ -16,19 +16,27 @@ def setUp(self): self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) self.systems = dpdata.MultiSystems(system_1, system_3, system_4) self.systems.append(system_2) - self.system_1 = self.systems['C1H3'] + self.system_1 = self.systems["C1H3"] self.system_2 = system_3 - - self.system_names = ['C1H4', 'C1H3'] - self.system_sizes = {'C1H4':2, 'C1H3':1} - self.atom_names = ['C', 'H'] + + self.system_names = ["C1H4", "C1H3"] + self.system_sizes = {"C1H4": 2, "C1H3": 1} + self.atom_names = ["C", "H"] def test_len(self): self.assertEqual(len(self.systems), 2) @@ -37,7 +45,9 @@ def test_get_nframes(self): self.assertEqual(self.systems.get_nframes(), 3) def test_str(self): - self.assertEqual(str(self.systems), "MultiSystems (2 systems containing 3 frames)") + self.assertEqual( + str(self.systems), "MultiSystems (2 systems containing 3 frames)" + ) class TestMultiSystemsAdd(unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC): @@ -47,97 +57,151 @@ def setUp(self): self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) self.systems = dpdata.MultiSystems(system_1) self.systems += system_2 self.systems += system_3 self.systems += system_4 for s in self.systems: - if s.formula == 'C1H3': + if s.formula == "C1H3": self.system_1 = s self.system_2 = system_3 - - self.system_names = ['C1H4', 'C1H3'] - self.system_sizes = {'C1H4':2, 'C1H3':1} - self.atom_names = ['C', 'H'] + + self.system_names = ["C1H4", "C1H3"] + self.system_sizes = {"C1H4": 2, "C1H3": 1} + self.atom_names = ["C", "H"] class TestMultiSystemsSorted(unittest.TestCase, MultiSystems): def setUp(self): # CH4 and O2 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/oxygen.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/oxygen.gaussianlog", fmt="gaussian/log" + ) self.systems = dpdata.MultiSystems(system_1, system_2) - self.system_names = ['C1H4O0', 'C0H0O2'] - self.system_sizes = {'C1H4O0':1, 'C0H0O2':1} - self.atom_names = ['C', 'H', 'O'] - + self.system_names = ["C1H4O0", "C0H0O2"] + self.system_sizes = {"C1H4O0": 1, "C0H0O2": 1} + self.atom_names = ["C", "H", "O"] + + class TestMultiDeepmdDumpRaw(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp (self) : + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) systems = dpdata.MultiSystems(system_1, system_2, system_3, system_4) path = "tmp.deepmd.multi" systems.to_deepmd_raw(path) - self.system_1 = dpdata.LabeledSystem(os.path.join(path, 'C1H3'), fmt='deepmd/raw', type_map = ['C', 'H']) + self.system_1 = dpdata.LabeledSystem( + os.path.join(path, "C1H3"), fmt="deepmd/raw", type_map=["C", "H"] + ) self.system_2 = system_3 + class TestMultiDeepmdDumpComp(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp (self) : + def setUp(self): self.places = 6 self.e_places = 4 self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) systems = dpdata.MultiSystems(system_1, system_2, system_3, system_4) path = "tmp.deepmd.npy.multi" systems.to_deepmd_npy(path) - self.system_1 = dpdata.LabeledSystem(os.path.join(path, 'C1H3'), fmt='deepmd/npy', type_map = ['C', 'H']) + self.system_1 = dpdata.LabeledSystem( + os.path.join(path, "C1H3"), fmt="deepmd/npy", type_map=["C", "H"] + ) self.system_2 = system_3 + class TestTypeMap(unittest.TestCase): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - self.system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - self.system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - self.system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + def setUp(self): + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + self.system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + self.system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + self.system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) def test_type_map(self): - for type_map in permutations(['C', 'H', 'O', 'N'], 4): - systems = dpdata.MultiSystems(self.system_1, self.system_2, self.system_3, self.system_4, type_map=type_map) + for type_map in permutations(["C", "H", "O", "N"], 4): + systems = dpdata.MultiSystems( + self.system_1, + self.system_2, + self.system_3, + self.system_4, + type_map=type_map, + ) self.assertEqual(type_map, systems.atom_names) class TestMultiSystemsTo(unittest.TestCase, MultiSystems): def setUp(self): # CH4 and O2 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/oxygen.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/oxygen.gaussianlog", fmt="gaussian/log" + ) systems1 = dpdata.MultiSystems(system_1, system_2) systems1.to_deepmd_npy("tmp.multi.to") self.systems = dpdata.MultiSystems().from_deepmd_npy("tmp.multi.to") - self.system_names = ['C1H4O0', 'C0H0O2'] - self.system_sizes = {'C1H4O0':1, 'C0H0O2':1} - self.atom_names = ['C', 'H', 'O'] + self.system_names = ["C1H4O0", "C0H0O2"] + self.system_sizes = {"C1H4O0": 1, "C0H0O2": 1} + self.atom_names = ["C", "H", "O"] + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_periodic_table.py b/tests/test_periodic_table.py index 2548a516..40a29a1c 100644 --- a/tests/test_periodic_table.py +++ b/tests/test_periodic_table.py @@ -1,27 +1,30 @@ import unittest from context import dpdata -data={"name": "Hydrogen", - "atomic_no": 1, - "X": 2.2, - "atomic_mass": 1.00794, - "radius": 0.25, - "calculated_radius": 0.53 - } +data = { + "name": "Hydrogen", + "atomic_no": 1, + "X": 2.2, + "atomic_mass": 1.00794, + "radius": 0.25, + "calculated_radius": 0.53, +} + class TestPeriodicTable(unittest.TestCase): - def setUp (self) : + def setUp(self): self.H = dpdata.periodic_table.Element("H") def test_H(self): - H=self.H - self.assertEqual(H.name,data['name']) - self.assertEqual(H.Z,data['atomic_no']) - self.assertEqual(H.X,data['X']) - self.assertEqual(H.mass,data['atomic_mass']) - self.assertEqual(H.radius,data['radius']) - self.assertEqual(H.calculated_radius,data['calculated_radius']) - self.assertEqual(H.X,dpdata.periodic_table.Element.from_Z(1).X) + H = self.H + self.assertEqual(H.name, data["name"]) + self.assertEqual(H.Z, data["atomic_no"]) + self.assertEqual(H.X, data["X"]) + self.assertEqual(H.mass, data["atomic_mass"]) + self.assertEqual(H.radius, data["radius"]) + self.assertEqual(H.calculated_radius, data["calculated_radius"]) + self.assertEqual(H.X, dpdata.periodic_table.Element.from_Z(1).X) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_perturb.py b/tests/test_perturb.py index fe63882f..c047dfa4 100644 --- a/tests/test_perturb.py +++ b/tests/test_perturb.py @@ -5,133 +5,170 @@ from comp_sys import CompSys, IsPBC from unittest.mock import Mock -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock + class NormalGenerator(object): def __init__(self): self.randn_generator = self.get_randn_generator() self.rand_generator = self.get_rand_generator() - def randn(self,number): + + def randn(self, number): return next(self.randn_generator) - def rand(self,number): + + def rand(self, number): return next(self.rand_generator) + @staticmethod def get_randn_generator(): - data = np.asarray([ - [ 0.71878148, -2.20667426, 1.49373955], - [-0.42728113, 1.43836059, -1.17553854], - [-1.70793073, -0.39588759, -0.40880927], - [ 0.17078291, -0.34856352, 1.04307936], - [-0.99103413, -0.1886479, 0.13813131], - [ 0.5839343, 1.04612646, -0.62631026], - [ 0.9752889, 1.85932517, -0.47875828], - [-0.23977172, -0.38373444, -0.04375488]]) + data = np.asarray( + [ + [0.71878148, -2.20667426, 1.49373955], + [-0.42728113, 1.43836059, -1.17553854], + [-1.70793073, -0.39588759, -0.40880927], + [0.17078291, -0.34856352, 1.04307936], + [-0.99103413, -0.1886479, 0.13813131], + [0.5839343, 1.04612646, -0.62631026], + [0.9752889, 1.85932517, -0.47875828], + [-0.23977172, -0.38373444, -0.04375488], + ] + ) count = 0 while True: yield data[count] - count +=1 - - @staticmethod + count += 1 + + @staticmethod def get_rand_generator(): - yield np.asarray([0.23182233, 0.87106847, 0.68728511, 0.94180274, 0.92860453, 0.69191187]) + yield np.asarray( + [0.23182233, 0.87106847, 0.68728511, 0.94180274, 0.92860453, 0.69191187] + ) + class UniformGenerator(object): def __init__(self): self.randn_generator = self.get_randn_generator() self.rand_generator = self.get_rand_generator() - def randn(self,number): + + def randn(self, number): return next(self.randn_generator) - def rand(self,number): + + def rand(self, number): return next(self.rand_generator) @staticmethod def get_randn_generator(): - data = [[-0.19313281, 0.80194715, 0.14050915], - [-1.47859926, 0.12921667, -0.17632456], - [-0.60836805, -0.7700423, -0.8386948 ], - [-0.03236753, 0.36690245, 0.5041072 ], - [-1.59366933, 0.37069227, 0.89608291], - [ 0.18165617, 0.53875315, -0.42233955], - [ 0.74052496, 1.26627555, -1.12094823], - [-0.89610092, -1.44247021, -1.3502529 ]] - yield np.asarray([0.0001,0.0001,0.0001]) # test for not using small vector + data = [ + [-0.19313281, 0.80194715, 0.14050915], + [-1.47859926, 0.12921667, -0.17632456], + [-0.60836805, -0.7700423, -0.8386948], + [-0.03236753, 0.36690245, 0.5041072], + [-1.59366933, 0.37069227, 0.89608291], + [0.18165617, 0.53875315, -0.42233955], + [0.74052496, 1.26627555, -1.12094823], + [-0.89610092, -1.44247021, -1.3502529], + ] + yield np.asarray([0.0001, 0.0001, 0.0001]) # test for not using small vector count = 0 while True: yield data[count] - count +=1 + count += 1 - @staticmethod + @staticmethod def get_rand_generator(): - data = np.asarray([[0.71263084], [0.61339295], - [0.22948181], [0.36087632], - [0.17582222], [0.97926742], - [0.84706761], [0.44495513]]) - - yield np.asarray([0.34453551, 0.0618966, 0.9327273, 0.43013654, 0.88624993, 0.48827425]) - count =0 + data = np.asarray( + [ + [0.71263084], + [0.61339295], + [0.22948181], + [0.36087632], + [0.17582222], + [0.97926742], + [0.84706761], + [0.44495513], + ] + ) + + yield np.asarray( + [0.34453551, 0.0618966, 0.9327273, 0.43013654, 0.88624993, 0.48827425] + ) + count = 0 while True: yield np.asarray(data[count]) - count+=1 + count += 1 + class ConstGenerator(object): def __init__(self): self.randn_generator = self.get_randn_generator() self.rand_generator = self.get_rand_generator() - def randn(self,number): + + def randn(self, number): return next(self.randn_generator) - def rand(self,number): + + def rand(self, number): return next(self.rand_generator) @staticmethod def get_randn_generator(): - data = np.asarray([[ 0.95410606, -1.62338002, -2.05359934], - [ 0.69213769, -1.26008667, 0.77970721], - [-1.77926476, -0.39227219, 2.31677298], - [ 0.08785233, -0.03966649, -0.45325656], - [-0.53860887, 0.42536802, -0.46167309], - [-0.26865791, -0.19901684, -2.51444768], - [-0.31627314, 0.22076982, -0.36032225], - [0.66731887, 1.2505806, 1.46112938]]) - yield np.asarray([0.0001,0.0001,0.0001]) # test for not using small vector + data = np.asarray( + [ + [0.95410606, -1.62338002, -2.05359934], + [0.69213769, -1.26008667, 0.77970721], + [-1.77926476, -0.39227219, 2.31677298], + [0.08785233, -0.03966649, -0.45325656], + [-0.53860887, 0.42536802, -0.46167309], + [-0.26865791, -0.19901684, -2.51444768], + [-0.31627314, 0.22076982, -0.36032225], + [0.66731887, 1.2505806, 1.46112938], + ] + ) + yield np.asarray([0.0001, 0.0001, 0.0001]) # test for not using small vector count = 0 while True: yield data[count] - count +=1 + count += 1 + + @staticmethod + def get_rand_generator(): + yield np.asarray( + [0.01525907, 0.68387374, 0.39768541, 0.55596047, 0.26557088, 0.60883073] + ) - @staticmethod - def get_rand_generator(): - yield np.asarray([0.01525907, 0.68387374, 0.39768541, 0.55596047, 0.26557088, 0.60883073]) # %% class TestPerturbNormal(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') - def setUp (self, random_mock): + @patch("numpy.random") + def setUp(self, random_mock): random_mock.rand = NormalGenerator().rand random_mock.randn = NormalGenerator().randn - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.perturb(1,0.05,0.6,'normal') - self.system_2 = dpdata.System('poscars/POSCAR.SiC.normal',fmt='vasp/poscar') + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.perturb(1, 0.05, 0.6, "normal") + self.system_2 = dpdata.System("poscars/POSCAR.SiC.normal", fmt="vasp/poscar") self.places = 6 + class TestPerturbUniform(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') - def setUp (self, random_mock) : + @patch("numpy.random") + def setUp(self, random_mock): random_mock.rand = UniformGenerator().rand random_mock.randn = UniformGenerator().randn - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.perturb(1,0.05,0.6,'uniform') - self.system_2 = dpdata.System('poscars/POSCAR.SiC.uniform',fmt='vasp/poscar') + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.perturb(1, 0.05, 0.6, "uniform") + self.system_2 = dpdata.System("poscars/POSCAR.SiC.uniform", fmt="vasp/poscar") self.places = 6 + class TestPerturbConst(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') - def setUp (self, random_mock) : + @patch("numpy.random") + def setUp(self, random_mock): random_mock.rand = ConstGenerator().rand random_mock.randn = ConstGenerator().randn - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.perturb(1,0.05,0.6,'const') - self.system_2 = dpdata.System('poscars/POSCAR.SiC.const',fmt='vasp/poscar') + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.perturb(1, 0.05, 0.6, "const") + self.system_2 = dpdata.System("poscars/POSCAR.SiC.const", fmt="vasp/poscar") self.places = 6 -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_pick_atom_idx.py b/tests/test_pick_atom_idx.py index bb6af61f..37be5d8f 100644 --- a/tests/test_pick_atom_idx.py +++ b/tests/test_pick_atom_idx.py @@ -2,40 +2,50 @@ import unittest from context import dpdata from comp_sys import CompSys, IsNoPBC + try: - import parmed - exist_module=True + import parmed + + exist_module = True except Exception: - exist_module=False + exist_module = False + class TestPickAtomIdx(unittest.TestCase, CompSys, IsNoPBC): - - def setUp(self): + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - self.system_1 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log').pick_atom_idx(slice(4)) - self.system_2 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ).pick_atom_idx(slice(4)) + self.system_2 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) -@unittest.skipIf(not exist_module,"skip") + +@unittest.skipIf(not exist_module, "skip") class TestPickByAmberMask(unittest.TestCase, CompSys, IsNoPBC): - - def setUp(self): - parmfile="amber/corr/qmmm.parm7" - ep = r'@%EP' + def setUp(self): + parmfile = "amber/corr/qmmm.parm7" + ep = r"@%EP" target = ":1" - cutoff = 6. + cutoff = 6.0 interactwith = "(%s)<:%f&!%s" % (target, cutoff, ep) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - self.system_1 = dpdata.LabeledSystem("amber/corr/dp_corr", fmt="deepmd/npy").pick_by_amber_mask( - parmfile, interactwith, pass_coords=True, nopbc=True)['C6EP0H11HW192O6OW96P1'] - self.system_2 = dpdata.LabeledSystem("amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy") - + self.system_1 = dpdata.LabeledSystem( + "amber/corr/dp_corr", fmt="deepmd/npy" + ).pick_by_amber_mask(parmfile, interactwith, pass_coords=True, nopbc=True)[ + "C6EP0H11HW192O6OW96P1" + ] + self.system_2 = dpdata.LabeledSystem( + "amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy" + ) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_predict.py b/tests/test_predict.py index 3ba62ec2..2176bf74 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -3,6 +3,7 @@ from comp_sys import CompLabeledSys, IsPBC from context import dpdata + try: import ase except ModuleNotFoundError: @@ -14,36 +15,36 @@ @dpdata.driver.Driver.register("zero") class ZeroDriver(dpdata.driver.Driver): def label(self, data): - nframes = data['coords'].shape[0] - natoms = data['coords'].shape[1] - data['energies'] = np.zeros((nframes,)) - data['forces'] = np.zeros((nframes, natoms, 3)) - data['virials'] = np.zeros((nframes, 3, 3)) + nframes = data["coords"].shape[0] + natoms = data["coords"].shape[1] + data["energies"] = np.zeros((nframes,)) + data["forces"] = np.zeros((nframes, natoms, 3)) + data["virials"] = np.zeros((nframes, 3, 3)) return data @dpdata.driver.Driver.register("one") class OneDriver(dpdata.driver.Driver): def label(self, data): - nframes = data['coords'].shape[0] - natoms = data['coords'].shape[1] - data['energies'] = np.ones((nframes,)) - data['forces'] = np.ones((nframes, natoms, 3)) - data['virials'] = np.ones((nframes, 3, 3)) + nframes = data["coords"].shape[0] + natoms = data["coords"].shape[1] + data["energies"] = np.ones((nframes,)) + data["forces"] = np.ones((nframes, natoms, 3)) + data["virials"] = np.ones((nframes, 3, 3)) return data class TestPredict(unittest.TestCase, CompLabeledSys): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) self.system_1 = ori_sys.predict(driver="zero") - self.system_2 = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) - for pp in ('energies', 'forces', 'virials'): - self.system_2.data[pp][:] = 0. + self.system_2 = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) + for pp in ("energies", "forces", "virials"): + self.system_2.data[pp][:] = 0.0 self.places = 6 self.e_places = 6 @@ -53,23 +54,26 @@ def setUp (self) : class TestHybridDriver(unittest.TestCase, CompLabeledSys): """Test HybridDriver.""" - def setUp(self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) - self.system_1 = ori_sys.predict([ + + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) + self.system_1 = ori_sys.predict( + [ {"type": "one"}, {"type": "one"}, {"type": "one"}, {"type": "zero"}, ], - driver="hybrid") + driver="hybrid", + ) # sum is 3 - self.system_2 = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) - for pp in ('energies', 'forces'): - self.system_2.data[pp][:] = 3. + self.system_2 = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) + for pp in ("energies", "forces"): + self.system_2.data[pp][:] = 3.0 self.places = 6 self.e_places = 6 @@ -77,12 +81,12 @@ def setUp(self) : self.v_places = 6 -@unittest.skipIf(skip_ase,"skip ase related test. install ase to fix") +@unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestASEDriver(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) one_driver = OneDriver() self.system_1 = ori_sys.predict(driver=one_driver) self.system_2 = ori_sys.predict(one_driver.ase_calculator, driver="ase") @@ -94,10 +98,10 @@ def setUp (self) : @unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestMinimize(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) zero_driver = ZeroDriver() self.system_1 = ori_sys.predict(driver=zero_driver) self.system_2 = ori_sys.minimize(driver=zero_driver, minimizer="ase") @@ -109,14 +113,16 @@ def setUp (self) : @unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestMinimizeMultiSystems(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) multi_sys = dpdata.MultiSystems(ori_sys) zero_driver = ZeroDriver() self.system_1 = list(multi_sys.predict(driver=zero_driver).systems.values())[0] - self.system_2 = list(multi_sys.minimize(driver=zero_driver, minimizer="ase").systems.values())[0] + self.system_2 = list( + multi_sys.minimize(driver=zero_driver, minimizer="ase").systems.values() + )[0] self.places = 6 self.e_places = 6 self.f_places = 6 diff --git a/tests/test_pwmat_config_dump.py b/tests/test_pwmat_config_dump.py index ec77ac6e..32c6ee52 100644 --- a/tests/test_pwmat_config_dump.py +++ b/tests/test_pwmat_config_dump.py @@ -2,47 +2,58 @@ import numpy as np import unittest import dpdata -from pwmat.config_ref_oh import Testconfigoh +from pwmat.config_ref_oh import Testconfigoh + def myfilecmp(test, f0, f1): - with open(f0) as fp0 : + with open(f0) as fp0: with open(f1) as fp1: test.assertTrue(fp0.read() == fp1.read()) + class TestatomconfigDump(unittest.TestCase, Testconfigoh): - def setUp(self): tmp_system = dpdata.System() - tmp_system.from_lammps_lmp(os.path.join('pwmat', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_pwmat_atomconfig('tmp.atom.config') + tmp_system.from_lammps_lmp( + os.path.join("pwmat", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to_pwmat_atomconfig("tmp.atom.config") self.system = dpdata.System() - self.system.from_pwmat_atomconfig('tmp.atom.config') + self.system.from_pwmat_atomconfig("tmp.atom.config") + class TestatomconfigDump1(unittest.TestCase, Testconfigoh): - - def setUp(self): + def setUp(self): tmp_system = dpdata.System() - tmp_system.from_pwmat_atomconfig(os.path.join('pwmat', 'atom.config.oh')) + tmp_system.from_pwmat_atomconfig(os.path.join("pwmat", "atom.config.oh")) # tmp_system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_pwmat_atomconfig('tmp.atom.config') + tmp_system.to_pwmat_atomconfig("tmp.atom.config") self.system = dpdata.System() - self.system.from_pwmat_atomconfig('tmp.atom.config') + self.system.from_pwmat_atomconfig("tmp.atom.config") + -class TestatomconfigSkipZeroAtomNumb(unittest.TestCase) : +class TestatomconfigSkipZeroAtomNumb(unittest.TestCase): def tearDown(self): - if os.path.isfile('atom.config.tmp.1'): - os.remove('atom.config.tmp.1') - if os.path.isfile('atom.config.tmp.2'): - os.remove('atom.config.tmp.2') + if os.path.isfile("atom.config.tmp.1"): + os.remove("atom.config.tmp.1") + if os.path.isfile("atom.config.tmp.2"): + os.remove("atom.config.tmp.2") def test_dump_pwmat_type_map(self): - system0 = dpdata.System(os.path.join('pwmat', 'atom.config.oh'), fmt = 'pwmat/atom.config', type_map = ['H', 'O']) - system0.to_pwmat_atomconfig('atom.config.tmp.1') - system1 = dpdata.System(os.path.join('pwmat', 'atom.config.oh'), fmt = 'pwmat/atom.config', type_map = ['C', 'H', 'A', 'O', 'B']) - system1.to_pwmat_atomconfig('atom.config.tmp.2') - myfilecmp(self, 'atom.config.tmp.1', 'atom.config.tmp.2') + system0 = dpdata.System( + os.path.join("pwmat", "atom.config.oh"), + fmt="pwmat/atom.config", + type_map=["H", "O"], + ) + system0.to_pwmat_atomconfig("atom.config.tmp.1") + system1 = dpdata.System( + os.path.join("pwmat", "atom.config.oh"), + fmt="pwmat/atom.config", + type_map=["C", "H", "A", "O", "B"], + ) + system1.to_pwmat_atomconfig("atom.config.tmp.2") + myfilecmp(self, "atom.config.tmp.1", "atom.config.tmp.2") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_pwmat_config_to_system.py b/tests/test_pwmat_config_to_system.py index 65772a70..3ff43b66 100644 --- a/tests/test_pwmat_config_to_system.py +++ b/tests/test_pwmat_config_to_system.py @@ -4,18 +4,24 @@ import dpdata from pwmat.config_ref_ch4 import Testconfigch4 + class Testconfig(unittest.TestCase, Testconfigch4): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_pwmat_atomconfig(os.path.join('pwmat', 'atom.config')) + self.system.from_pwmat_atomconfig(os.path.join("pwmat", "atom.config")) + + class TestpwmatconfigTypeMap(unittest.TestCase): def setUp(self): - sys0 = dpdata.System('pwmat/atom.config', fmt = 'atom.config') - sys0.data['atom_names'] = ['A', 'H', 'B', 'C', 'D'] - sys0.data['atom_numbs'] = [ 0, 1, 0, 1, 0] - sys0.data['atom_types'] = np.array([ 0, 0, 0, 1], dtype = int) - sys1 = dpdata.System('pwmat/atom.config', fmt = 'pwmat/atom.config', type_map = ['A', 'H', 'B', 'C', 'D']) + sys0 = dpdata.System("pwmat/atom.config", fmt="atom.config") + sys0.data["atom_names"] = ["A", "H", "B", "C", "D"] + sys0.data["atom_numbs"] = [0, 1, 0, 1, 0] + sys0.data["atom_types"] = np.array([0, 0, 0, 1], dtype=int) + sys1 = dpdata.System( + "pwmat/atom.config", + fmt="pwmat/atom.config", + type_map=["A", "H", "B", "C", "D"], + ) self.system_1 = sys0 self.system_2 = sys1 self.places = 6 @@ -24,5 +30,5 @@ def setUp(self): self.v_places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_pwmat_mlmd.py b/tests/test_pwmat_mlmd.py index f3b89c0e..f35ef42b 100644 --- a/tests/test_pwmat_mlmd.py +++ b/tests/test_pwmat_mlmd.py @@ -5,51 +5,58 @@ class TestSingleStep(unittest.TestCase): - def setUp(self): - self.LabeledSystem1 = dpdata.LabeledSystem(os.path.join('pwmat', 'OUT.MLMD'),\ - fmt='movement' ) + self.LabeledSystem1 = dpdata.LabeledSystem( + os.path.join("pwmat", "OUT.MLMD"), fmt="movement" + ) - def test_mlmd(self) : + def test_mlmd(self): - self.assertEqual(self.LabeledSystem1['energies'], -0.2197270691E+03) + self.assertEqual(self.LabeledSystem1["energies"], -0.2197270691e03) self.assertEqual(self.LabeledSystem1.get_nframes(), 1) self.assertEqual(self.LabeledSystem1.get_natoms(), 5) - self.assertEqual(self.LabeledSystem1.data['atom_names'], ['H', 'C']) - self.assertEqual(self.LabeledSystem1.data['atom_numbs'], [4, 1]) - def test_cell(self) : - fp = open('pwmat/mlmd_cell') + self.assertEqual(self.LabeledSystem1.data["atom_names"], ["H", "C"]) + self.assertEqual(self.LabeledSystem1.data["atom_numbs"], [4, 1]) + + def test_cell(self): + fp = open("pwmat/mlmd_cell") cell = [] - for ii in fp : + for ii in fp: cell.append([float(jj) for jj in ii.split()]) cell = np.array(cell) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertEqual(self.LabeledSystem1.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertEqual( + self.LabeledSystem1.data["cells"][0][ii][jj], cell[ii][jj] + ) fp.close() - - def test_coord(self) : - fp = open('pwmat/mlmd_coord') + + def test_coord(self): + fp = open("pwmat/mlmd_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertEqual(self.LabeledSystem1.data['coords'][0][ii][jj], coord[ii][jj]*10.0) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertEqual( + self.LabeledSystem1.data["coords"][0][ii][jj], coord[ii][jj] * 10.0 + ) fp.close() - def test_force(self) : - fp = open('pwmat/mlmd_force') + + def test_force(self): + fp = open("pwmat/mlmd_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertEqual(self.LabeledSystem1.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertEqual( + self.LabeledSystem1.data["forces"][0][ii][jj], force[ii][jj] + ) fp.close() - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_pwmat_movement.py b/tests/test_pwmat_movement.py index 6cf2163c..e188c746 100644 --- a/tests/test_pwmat_movement.py +++ b/tests/test_pwmat_movement.py @@ -3,61 +3,64 @@ import unittest import dpdata + class TestpwmatSinglePointEnergy: - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['H','C']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [4,1]) - def test_atom_types(self) : - ref_type = [0,0,0,0,1] - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system.data['atom_types'][ii], ref_type[ii]) - def test_cell(self) : - fp = open('pwmat/ref_cell') + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["H", "C"]) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [4, 1]) + + def test_atom_types(self): + ref_type = [0, 0, 0, 0, 1] + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system.data["atom_types"][ii], ref_type[ii]) + + def test_cell(self): + fp = open("pwmat/ref_cell") cell = [] - for ii in fp : + for ii in fp: cell.append([float(jj) for jj in ii.split()]) cell = np.array(cell) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertEqual(self.system.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertEqual(self.system.data["cells"][0][ii][jj], cell[ii][jj]) fp.close() - - def test_coord(self) : - fp = open('pwmat/ref_coord') + def test_coord(self): + fp = open("pwmat/ref_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertEqual(self.system.data['coords'][0][ii][jj], coord[ii][jj]*10.0) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertEqual( + self.system.data["coords"][0][ii][jj], coord[ii][jj] * 10.0 + ) fp.close() - def test_force(self) : - fp = open('pwmat/ref_force') + def test_force(self): + fp = open("pwmat/ref_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertEqual(self.system.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertEqual(self.system.data["forces"][0][ii][jj], force[ii][jj]) fp.close() - def test_energy(self) : - ref_energy = -0.2196929065E+03 - self.assertEqual(self.system.data['energies'][0], ref_energy) - + def test_energy(self): + ref_energy = -0.2196929065e03 + self.assertEqual(self.system.data["energies"][0], ref_energy) class TestpwmatLabeledOutput(unittest.TestCase, TestpwmatSinglePointEnergy): - def setUp(self): - self.system = dpdata.LabeledSystem('pwmat/MOVEMENT', fmt = 'pwmat/MOVEMENT') + self.system = dpdata.LabeledSystem("pwmat/MOVEMENT", fmt="pwmat/MOVEMENT") -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_pymatgen_molecule.py b/tests/test_pymatgen_molecule.py index d80acc92..8c3e72b6 100644 --- a/tests/test_pymatgen_molecule.py +++ b/tests/test_pymatgen_molecule.py @@ -2,37 +2,37 @@ import numpy as np import unittest from context import dpdata + try: import pymatgen except ModuleNotFoundError: - skip_pymatgen=True + skip_pymatgen = True else: - skip_pymatgen=False + skip_pymatgen = False + -@unittest.skipIf(skip_pymatgen,"skip pymatgen related test. install pymatgen to fix") +@unittest.skipIf(skip_pymatgen, "skip pymatgen related test. install pymatgen to fix") class TestPOSCARCart(unittest.TestCase): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_pymatgen_molecule(os.path.join('pymatgen_data', 'FA-001.xyz')) + self.system.from_pymatgen_molecule(os.path.join("pymatgen_data", "FA-001.xyz")) self.assertEqual(list(self.system["atom_types"]), [0, 1, 2, 1, 1, 2, 1, 1]) def test_poscar_to_molecule(self): tmp_system = dpdata.System() - tmp_system.from_vasp_poscar(os.path.join('pymatgen_data', 'mol2.vasp')) - natoms = len(tmp_system['coords'][0]) - tmpcoord = tmp_system['coords'][0] - cog = np.average(tmpcoord, axis = 0) + tmp_system.from_vasp_poscar(os.path.join("pymatgen_data", "mol2.vasp")) + natoms = len(tmp_system["coords"][0]) + tmpcoord = tmp_system["coords"][0] + cog = np.average(tmpcoord, axis=0) dist = tmpcoord - np.tile(cog, [natoms, 1]) - max_dist_0 = np.max(np.linalg.norm(dist, axis = 1)) + max_dist_0 = np.max(np.linalg.norm(dist, axis=1)) mols = tmp_system.to("pymatgen/molecule") - cog = np.average(mols[-1].cart_coords, axis = 0) + cog = np.average(mols[-1].cart_coords, axis=0) dist = mols[-1].cart_coords - np.tile(cog, [natoms, 1]) - max_dist_1 = np.max(np.linalg.norm(dist, axis = 1)) + max_dist_1 = np.max(np.linalg.norm(dist, axis=1)) self.assertAlmostEqual(max_dist_0, max_dist_1) - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_qe_cp_traj.py b/tests/test_qe_cp_traj.py index cad93fa6..2cb982ce 100644 --- a/tests/test_qe_cp_traj.py +++ b/tests/test_qe_cp_traj.py @@ -5,58 +5,59 @@ bohr2ang = dpdata.unit.LengthConversion("bohr", "angstrom").value() -class TestCPTRAJProps : - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['O','H']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [64,127]) +class TestCPTRAJProps: + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["O", "H"]) - def test_atom_types(self) : - for ii in range(0,64) : - self.assertEqual(self.system.data['atom_types'][ii], 0) - for ii in range(64,191) : - self.assertEqual(self.system.data['atom_types'][ii], 1) + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [64, 127]) - def test_cell(self) : + def test_atom_types(self): + for ii in range(0, 64): + self.assertEqual(self.system.data["atom_types"][ii], 0) + for ii in range(64, 191): + self.assertEqual(self.system.data["atom_types"][ii], 1) + + def test_cell(self): ref = bohr2ang * 23.5170 * np.eye(3) self.assertEqual(self.system.get_nframes(), 2) - for ff in range(self.system.get_nframes()) : - for ii in range(3) : - for jj in range(3) : - self.assertEqual(self.system['cells'][ff][ii][jj], ref[ii][jj]) - - def test_coord(self) : - with open('qe.traj/oh-md.pos') as fp : - lines = fp.read().rstrip('\n').split('\n') + for ff in range(self.system.get_nframes()): + for ii in range(3): + for jj in range(3): + self.assertEqual(self.system["cells"][ff][ii][jj], ref[ii][jj]) + + def test_coord(self): + with open("qe.traj/oh-md.pos") as fp: + lines = fp.read().rstrip("\n").split("\n") lines = lines[-191:] coords = [] - for ii in lines : + for ii in lines: coords.append([float(jj) for jj in ii.split()]) coords = bohr2ang * np.array(coords) - celll = bohr2ang * 23.5170 - for ii in range(coords.shape[0]) : - for jj in range(coords[ii].size) : - if coords[ii][jj] < 0 : + celll = bohr2ang * 23.5170 + for ii in range(coords.shape[0]): + for jj in range(coords[ii].size): + if coords[ii][jj] < 0: coords[ii][jj] += celll - elif coords[ii][jj] >= celll : + elif coords[ii][jj] >= celll: coords[ii][jj] -= celll - self.assertAlmostEqual(self.system['coords'][-1][ii][jj], coords[ii][jj]) - - -class TestCPTRAJTraj(unittest.TestCase, TestCPTRAJProps): + self.assertAlmostEqual( + self.system["coords"][-1][ii][jj], coords[ii][jj] + ) - def setUp(self): - self.system = dpdata.System('qe.traj/oh-md', fmt = 'qe/cp/traj') +class TestCPTRAJTraj(unittest.TestCase, TestCPTRAJProps): + def setUp(self): + self.system = dpdata.System("qe.traj/oh-md", fmt="qe/cp/traj") -class TestCPTRAJLabeledTraj(unittest.TestCase, TestCPTRAJProps): - def setUp(self): - self.system = dpdata.LabeledSystem('qe.traj/oh-md', fmt = 'qe/cp/traj') +class TestCPTRAJLabeledTraj(unittest.TestCase, TestCPTRAJProps): + def setUp(self): + self.system = dpdata.LabeledSystem("qe.traj/oh-md", fmt="qe/cp/traj") -class TestConverCellDim(unittest.TestCase): +class TestConverCellDim(unittest.TestCase): def test_case_null(self): cell = dpdata.qe.traj.convert_celldm(8, [1, 1, 1]) ref = np.eye(3) @@ -65,6 +66,5 @@ def test_case_null(self): self.assertAlmostEqual(cell[ii][jj], ref[ii][jj]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_qe_cp_traj_skipload.py b/tests/test_qe_cp_traj_skipload.py index b0d92e9d..e8cc84bf 100644 --- a/tests/test_qe_cp_traj_skipload.py +++ b/tests/test_qe_cp_traj_skipload.py @@ -8,53 +8,69 @@ bohr2ang = dpdata.unit.LengthConversion("bohr", "angstrom").value() + class TestPWSCFTrajSkip(unittest.TestCase, CompSys, IsPBC): - def setUp(self): - self.system_1 = dpdata.System(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 1, - step = 2) - self.system_2 = dpdata.System(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 0, - step = 1) \ - .sub_system(np.arange(1,6,2)) + def setUp(self): + self.system_1 = dpdata.System( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=1, step=2 + ) + self.system_2 = dpdata.System( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=0, step=1 + ).sub_system(np.arange(1, 6, 2)) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestPWSCFLabeledTrajSkip(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp(self): - self.system_1 = dpdata.LabeledSystem(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 1, - step = 2) - self.system_2 = dpdata.LabeledSystem(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 0, - step = 1) \ - .sub_system(np.arange(1,6,2)) + def setUp(self): + self.system_1 = dpdata.LabeledSystem( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=1, step=2 + ) + self.system_2 = dpdata.LabeledSystem( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=0, step=1 + ).sub_system(np.arange(1, 6, 2)) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 def test_cell(self): - ref_cell = [5.359985500701728967e+00, 0, 0, - 3.585941820098031974e-01, 5.317218997480877896e+00, 0, - 7.606780476053129902e-01, 7.811107228901693622e-01, 5.715864930517207121e+00 ] - ref_cell = bohr2ang * np.array(ref_cell).reshape(3,3) - - for ii in range(3) : - for jj in range(3) : - self.assertEqual(self.system_1.data['cells'][0][ii][jj], ref_cell[ii][jj]) - - ref_cell = [5.308510801020571712e+00, 0, 0, - 3.076052782312116429e-01, 5.279388982187173340e+00, 0, - 4.321921336152507731e-01, 8.121110815096156399e-01, 5.301664983741235737e+00] - ref_cell = bohr2ang * np.array(ref_cell).reshape(3,3) - - for ii in range(3) : - for jj in range(3) : - self.assertEqual(self.system_1.data['cells'][-1][ii][jj], ref_cell[ii][jj]) + ref_cell = [ + 5.359985500701728967e00, + 0, + 0, + 3.585941820098031974e-01, + 5.317218997480877896e00, + 0, + 7.606780476053129902e-01, + 7.811107228901693622e-01, + 5.715864930517207121e00, + ] + ref_cell = bohr2ang * np.array(ref_cell).reshape(3, 3) + + for ii in range(3): + for jj in range(3): + self.assertEqual( + self.system_1.data["cells"][0][ii][jj], ref_cell[ii][jj] + ) + + ref_cell = [ + 5.308510801020571712e00, + 0, + 0, + 3.076052782312116429e-01, + 5.279388982187173340e00, + 0, + 4.321921336152507731e-01, + 8.121110815096156399e-01, + 5.301664983741235737e00, + ] + ref_cell = bohr2ang * np.array(ref_cell).reshape(3, 3) + + for ii in range(3): + for jj in range(3): + self.assertEqual( + self.system_1.data["cells"][-1][ii][jj], ref_cell[ii][jj] + ) diff --git a/tests/test_qe_pw_scf.py b/tests/test_qe_pw_scf.py index 391e763e..4cb78cbd 100644 --- a/tests/test_qe_pw_scf.py +++ b/tests/test_qe_pw_scf.py @@ -3,128 +3,144 @@ import unittest from context import dpdata -class TestPWSCFSinglePointEnergy: - - def test_atom_names(self) : - self.assertEqual(self.system_ch4.data['atom_names'], ['H','C']) - self.assertEqual(self.system_h2o.data['atom_names'], ['O','H']) - - def test_atom_numbs(self) : - self.assertEqual(self.system_ch4.data['atom_numbs'], [4,1]) - self.assertEqual(self.system_h2o.data['atom_numbs'], [64,128]) - def test_atom_types(self) : - ref_type = [0,0,0,0,1] - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_ch4.data['atom_types'][ii], ref_type[ii]) - - ref_type = [0]*64 + [1]*128 - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_h2o.data['atom_types'][ii], ref_type[ii]) - - def test_cell(self) : +class TestPWSCFSinglePointEnergy: + def test_atom_names(self): + self.assertEqual(self.system_ch4.data["atom_names"], ["H", "C"]) + self.assertEqual(self.system_h2o.data["atom_names"], ["O", "H"]) + + def test_atom_numbs(self): + self.assertEqual(self.system_ch4.data["atom_numbs"], [4, 1]) + self.assertEqual(self.system_h2o.data["atom_numbs"], [64, 128]) + + def test_atom_types(self): + ref_type = [0, 0, 0, 0, 1] + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_ch4.data["atom_types"][ii], ref_type[ii]) + + ref_type = [0] * 64 + [1] * 128 + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_h2o.data["atom_types"][ii], ref_type[ii]) + + def test_cell(self): cell = 10 * np.eye(3) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["cells"][0][ii][jj], cell[ii][jj] + ) - fp = open('qe.scf/h2o_cell') + fp = open("qe.scf/h2o_cell") cell = [] - for ii in fp : + for ii in fp: cell.append([float(jj) for jj in ii.split()]) cell = np.array(cell) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["cells"][0][ii][jj], cell[ii][jj] + ) fp.close() - - def test_coord(self) : - fp = open('qe.scf/ch4_coord') + def test_coord(self): + fp = open("qe.scf/ch4_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['coords'][0][ii][jj], coord[ii][jj]) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["coords"][0][ii][jj], coord[ii][jj] + ) fp.close() - fp = open('qe.scf/h2o_coord') + fp = open("qe.scf/h2o_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['coords'][0][ii][jj], coord[ii][jj]) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["coords"][0][ii][jj], coord[ii][jj] + ) fp.close() - def test_force(self) : - fp = open('qe.scf/ch4_force') + def test_force(self): + fp = open("qe.scf/ch4_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["forces"][0][ii][jj], force[ii][jj] + ) fp.close() - fp = open('qe.scf/h2o_force') + fp = open("qe.scf/h2o_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["forces"][0][ii][jj], force[ii][jj] + ) fp.close() - def test_virial(self) : - fp = open('qe.scf/ch4_virial') + def test_virial(self): + fp = open("qe.scf/ch4_virial") virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) - for ii in range(virial.shape[0]) : - for jj in range(virial.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['virials'][0][ii][jj], virial[ii][jj], places = 3) + for ii in range(virial.shape[0]): + for jj in range(virial.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["virials"][0][ii][jj], virial[ii][jj], places=3 + ) fp.close() - fp = open('qe.scf/h2o_virial') + fp = open("qe.scf/h2o_virial") virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) - for ii in range(virial.shape[0]) : - for jj in range(virial.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['virials'][0][ii][jj], virial[ii][jj], places = 2) + for ii in range(virial.shape[0]): + for jj in range(virial.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["virials"][0][ii][jj], virial[ii][jj], places=2 + ) fp.close() - def test_energy(self) : + def test_energy(self): ref_energy = -219.74425946528794 - self.assertAlmostEqual(self.system_ch4.data['energies'][0], ref_energy) + self.assertAlmostEqual(self.system_ch4.data["energies"][0], ref_energy) ref_energy = -30007.651851226798 - self.assertAlmostEqual(self.system_h2o.data['energies'][0], ref_energy) - + self.assertAlmostEqual(self.system_h2o.data["energies"][0], ref_energy) class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFSinglePointEnergy): - def setUp(self): - self.system_ch4 = dpdata.LabeledSystem('qe.scf/01.out',fmt='qe/pw/scf') - self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') + self.system_ch4 = dpdata.LabeledSystem("qe.scf/01.out", fmt="qe/pw/scf") + self.system_h2o = dpdata.LabeledSystem("qe.scf/02.out", fmt="qe/pw/scf") -class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): +class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): def setUp(self): - self.system_ch4 = dpdata.LabeledSystem(['qe.scf/01.in', 'qe.scf/01.out'], fmt='qe/pw/scf') - self.system_h2o = dpdata.LabeledSystem(['qe.scf/02.in', 'qe.scf/02.out'], fmt='qe/pw/scf') + self.system_ch4 = dpdata.LabeledSystem( + ["qe.scf/01.in", "qe.scf/01.out"], fmt="qe/pw/scf" + ) + self.system_h2o = dpdata.LabeledSystem( + ["qe.scf/02.in", "qe.scf/02.out"], fmt="qe/pw/scf" + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_qe_pw_scf_crystal_atomic_positions.py b/tests/test_qe_pw_scf_crystal_atomic_positions.py index 22addec4..19dc7b59 100644 --- a/tests/test_qe_pw_scf_crystal_atomic_positions.py +++ b/tests/test_qe_pw_scf_crystal_atomic_positions.py @@ -3,24 +3,32 @@ import unittest from context import dpdata + class TestPWSCFCrystalAtomicPosition: + def test_coord(self): + ref_coord = np.array( + [[0, 0, 0], [0, 2.02, 2.02], [2.02, 0, 2.02], [2.02, 2.02, 0]] + ) + for ii in range(ref_coord.shape[0]): + for jj in range(ref_coord.shape[1]): + self.assertAlmostEqual( + self.system_al.data["coords"][0][ii][jj], ref_coord[ii][jj] + ) - def test_coord(self) : - ref_coord = np.array([[0,0,0], [0, 2.02, 2.02], [2.02, 0, 2.02], [2.02, 2.02, 0]]) - for ii in range(ref_coord.shape[0]) : - for jj in range(ref_coord.shape[1]) : - self.assertAlmostEqual(self.system_al.data['coords'][0][ii][jj], ref_coord[ii][jj]) class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFCrystalAtomicPosition): - def setUp(self): - self.system_al = dpdata.LabeledSystem('qe.scf/Al.out',fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem("qe.scf/Al.out", fmt="qe/pw/scf") -class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFCrystalAtomicPosition): +class TestPWSCFLabeledOutputListInput( + unittest.TestCase, TestPWSCFCrystalAtomicPosition +): def setUp(self): - self.system_al = dpdata.LabeledSystem(['qe.scf/Al.in', 'qe.scf/Al.out'], fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem( + ["qe.scf/Al.in", "qe.scf/Al.out"], fmt="qe/pw/scf" + ) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_qe_pw_scf_energy_bug.py b/tests/test_qe_pw_scf_energy_bug.py index 975aca48..85d79355 100644 --- a/tests/test_qe_pw_scf_energy_bug.py +++ b/tests/test_qe_pw_scf_energy_bug.py @@ -3,22 +3,24 @@ import unittest from context import dpdata -class TestPWSCFSinglePointEnergy: - def test_energy(self) : +class TestPWSCFSinglePointEnergy: + def test_energy(self): ref_energy = -296.08379065679094669 - self.assertAlmostEqual(self.system_al.data['energies'][0], ref_energy) + self.assertAlmostEqual(self.system_al.data["energies"][0], ref_energy) -class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFSinglePointEnergy): +class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFSinglePointEnergy): def setUp(self): - self.system_al = dpdata.LabeledSystem('qe.scf/Al.out',fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem("qe.scf/Al.out", fmt="qe/pw/scf") -class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): +class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): def setUp(self): - self.system_al = dpdata.LabeledSystem(['qe.scf/Al.in', 'qe.scf/Al.out'], fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem( + ["qe.scf/Al.in", "qe.scf/Al.out"], fmt="qe/pw/scf" + ) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_quip_gap_xyz.py b/tests/test_quip_gap_xyz.py index 8a023bc4..27285606 100644 --- a/tests/test_quip_gap_xyz.py +++ b/tests/test_quip_gap_xyz.py @@ -4,88 +4,116 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestQuipGapxyz1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems.systems['B1C9'] - self.system_2 = dpdata.LabeledSystem('xyz/B1C9', fmt='deepmd') + def setUp(self): + self.multi_systems = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems.systems["B1C9"] + self.system_2 = dpdata.LabeledSystem("xyz/B1C9", fmt="deepmd") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyz2(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_temp0 = dpdata.MultiSystems.from_file(file_name='xyz/xyz_unittest.xyz', fmt='quip/gap/xyz') - self.system_1 = self.system_temp0.systems['B5C7'] # .sort_atom_types() - self.system_temp1 = dpdata.LabeledSystem('xyz/B1C9', fmt='deepmd') - self.system_temp2 = dpdata.LabeledSystem('xyz/B5C7', fmt='deepmd') + def setUp(self): + self.system_temp0 = dpdata.MultiSystems.from_file( + file_name="xyz/xyz_unittest.xyz", fmt="quip/gap/xyz" + ) + self.system_1 = self.system_temp0.systems["B5C7"] # .sort_atom_types() + self.system_temp1 = dpdata.LabeledSystem("xyz/B1C9", fmt="deepmd") + self.system_temp2 = dpdata.LabeledSystem("xyz/B5C7", fmt="deepmd") self.system_temp3 = dpdata.MultiSystems(self.system_temp2, self.system_temp1) - self.system_2 = self.system_temp3.systems['B5C7'] + self.system_2 = self.system_temp3.systems["B5C7"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzsort1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.sort.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B5C7'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.sort.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B5C7"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B5C7'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B5C7"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzsort2(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.sort.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B1C9'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.sort.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B1C9"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B1C9'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B1C9"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzfield(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.field.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B1C9'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.field.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B1C9"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B1C9'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B1C9"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzfield2(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.field.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B5C7'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.field.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B5C7"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B5C7'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B5C7"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzNoVirials(unittest.TestCase, CompLabeledSys, IsPBC): def setUp(self): - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_B5C7_novirials.xyz', fmt='quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B5C7'] + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_B5C7_novirials.xyz", fmt="quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B5C7"] self.system_1.sort_atom_types() - self.system_2 = dpdata.LabeledSystem('xyz/B5C7_novirials', fmt='deepmd/raw') + self.system_2 = dpdata.LabeledSystem("xyz/B5C7_novirials", fmt="deepmd/raw") self.places = 6 self.e_places = 6 self.f_places = 6 - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_remove_atom_names.py b/tests/test_remove_atom_names.py index 043511fb..4d4e23ec 100644 --- a/tests/test_remove_atom_names.py +++ b/tests/test_remove_atom_names.py @@ -3,14 +3,20 @@ from comp_sys import CompLabeledSys from comp_sys import IsNoPBC + class TestRemove(unittest.TestCase, CompLabeledSys, IsNoPBC): def setUp(self): - self.system_1 = dpdata.LabeledSystem("amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy").remove_atom_names('EP') - self.system_2 = dpdata.LabeledSystem("amber/corr/dataset/C6H11HW192O6OW96P1", fmt="deepmd/npy") + self.system_1 = dpdata.LabeledSystem( + "amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy" + ).remove_atom_names("EP") + self.system_2 = dpdata.LabeledSystem( + "amber/corr/dataset/C6H11HW192O6OW96P1", fmt="deepmd/npy" + ) self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_remove_pbc.py b/tests/test_remove_pbc.py index ea2fc220..558587b0 100644 --- a/tests/test_remove_pbc.py +++ b/tests/test_remove_pbc.py @@ -3,23 +3,29 @@ import unittest from context import dpdata -class TestRemovePBC(unittest.TestCase): +class TestRemovePBC(unittest.TestCase): def test_remove(self): - coords = np.array([[[-1, -1, 2], [-1,-1,-3], [-1,-1, 7]], - [[ 3, -1, 3], [-1,-1, 3], [ 7,-1, 3]]], dtype = float) - cogs = np.average(coords, axis = 1) - data = {'atom_names' : ['A', 'B'], - 'atom_numbs' : [1, 2], - 'atom_types' : np.array([1, 0, 1], dtype = int), - 'orig': np.array([0, 0, 0]), - 'coords': coords, - 'cells': np.random.random([2, 3, 3]), + coords = np.array( + [ + [[-1, -1, 2], [-1, -1, -3], [-1, -1, 7]], + [[3, -1, 3], [-1, -1, 3], [7, -1, 3]], + ], + dtype=float, + ) + cogs = np.average(coords, axis=1) + data = { + "atom_names": ["A", "B"], + "atom_numbs": [1, 2], + "atom_types": np.array([1, 0, 1], dtype=int), + "orig": np.array([0, 0, 0]), + "coords": coords, + "cells": np.random.random([2, 3, 3]), } - sys = dpdata.System(data = data) + sys = dpdata.System(data=data) proct = 9.0 - - mol_size = np.array([5, 4], dtype = float) + + mol_size = np.array([5, 4], dtype=float) cell_size = (mol_size + proct) * 2.0 sys.remove_pbc(proct) @@ -28,10 +34,16 @@ def test_remove(self): ref = cell_size[ff] * np.eye(3) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(sys['cells'][ff][ii][jj], ref[ii][jj], msg = '%d %d %d' %(ff, ii, jj)) + self.assertAlmostEqual( + sys["cells"][ff][ii][jj], + ref[ii][jj], + msg="%d %d %d" % (ff, ii, jj), + ) dists = [] for ii in range(sys.get_natoms()): for jj in range(3): - dists.append(np.abs(sys['coords'][ff][ii][jj])) - dists.append(np.abs(sys['cells'][ff][jj][jj] - sys['coords'][ff][ii][jj])) + dists.append(np.abs(sys["coords"][ff][ii][jj])) + dists.append( + np.abs(sys["cells"][ff][jj][jj] - sys["coords"][ff][ii][jj]) + ) self.assertAlmostEqual(np.min(dists), proct) diff --git a/tests/test_replace.py b/tests/test_replace.py index a8dd917b..f0f4430f 100644 --- a/tests/test_replace.py +++ b/tests/test_replace.py @@ -5,30 +5,35 @@ from comp_sys import CompSys, IsPBC from unittest.mock import Mock -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock class ConstGenerator(object): def __init__(self): self.choice_generator = self.get_choice_generator() + def choice(self, a, size=None, replace=True, p=None): return next(self.choice_generator) @staticmethod def get_choice_generator(): - yield np.asarray([20, 6, 7, 22, 29, 2, 23, 10]) + yield np.asarray([20, 6, 7, 22, 29, 2, 23, 10]) + class TestReplace(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') + @patch("numpy.random") def setUp(self, random_mock): random_mock.choice = ConstGenerator().choice - self.system_1 = dpdata.System('poscars/POSCAR.P42nmc',fmt='vasp/poscar') - self.system_1.replace('Hf', 'Zr', 8) + self.system_1 = dpdata.System("poscars/POSCAR.P42nmc", fmt="vasp/poscar") + self.system_1.replace("Hf", "Zr", 8) # print(self.system_1.data) - self.system_2 = dpdata.System('poscars/POSCAR.P42nmc.replace',fmt='vasp/poscar') + self.system_2 = dpdata.System( + "poscars/POSCAR.P42nmc.replace", fmt="vasp/poscar" + ) # print(self.system_2.data) self.places = 6 + # class TestReplicate123_not_change_origin(unittest.TestCase, CompSys, IsPBC): # def setUp (self) : # self.system_1 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') @@ -36,5 +41,5 @@ def setUp(self, random_mock): # self.system_2 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') # self.places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_replicate.py b/tests/test_replicate.py index 16ef8636..1a7590b6 100644 --- a/tests/test_replicate.py +++ b/tests/test_replicate.py @@ -4,19 +4,36 @@ from context import dpdata from comp_sys import CompSys, IsPBC + class TestReplicate123(unittest.TestCase, CompSys, IsPBC): - def setUp (self) : - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.replicate((1,2,3,)) - self.system_2 = dpdata.System('poscars/POSCAR.SiC.replicate123',fmt='vasp/poscar') + def setUp(self): + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.replicate( + ( + 1, + 2, + 3, + ) + ) + self.system_2 = dpdata.System( + "poscars/POSCAR.SiC.replicate123", fmt="vasp/poscar" + ) self.places = 6 + class TestReplicate123_not_change_origin(unittest.TestCase, CompSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1.replicate((1,2,3,)) - self.system_2 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1.replicate( + ( + 1, + 2, + 3, + ) + ) + self.system_2 = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") self.places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_shuffle.py b/tests/test_shuffle.py index aa2d7a7e..8acbb42a 100644 --- a/tests/test_shuffle.py +++ b/tests/test_shuffle.py @@ -2,10 +2,12 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestDeepmdLoadRaw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - original_system = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') + def setUp(self): + original_system = dpdata.LabeledSystem( + "poscars/OUTCAR.h2o.md", fmt="vasp/outcar" + ) original_system += original_system original_system += original_system original_system += original_system diff --git a/tests/test_siesta_aiMD_output.py b/tests/test_siesta_aiMD_output.py index d66b1962..8ea4be35 100644 --- a/tests/test_siesta_aiMD_output.py +++ b/tests/test_siesta_aiMD_output.py @@ -6,90 +6,155 @@ class TestSIESTASinglePointEnergy: def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['Si']) + self.assertEqual(self.system.data["atom_names"], ["Si"]) def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [64]) + self.assertEqual(self.system.data["atom_numbs"], [64]) def test_atom_types(self): - ref_type = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ref_type = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] ref_type = np.array(ref_type) for ii in range(ref_type.shape[0]): # print(self.system.data['atom_types'][0][ii]) - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) def test_cell(self): - fp = open('siesta/aimd/cell') + fp = open("siesta/aimd/cell") ref_cell = [] for ii in fp: for jj in ii.split(): ref_cell.append(float(jj)) fp.close() - cells = self.system.data['cells'].flatten() + cells = self.system.data["cells"].flatten() idx = 0 for ii in range(len(cells)): self.assertAlmostEqual(cells[ii], float(ref_cell[ii])) def test_coord(self): - fp = open('siesta/aimd/coord') + fp = open("siesta/aimd/coord") ref_coord = [] for ii in fp: for jj in ii.split(): ref_coord.append(float(jj)) fp.close() - coords = self.system.data['coords'].flatten() + coords = self.system.data["coords"].flatten() for ii in range(len(coords)): self.assertAlmostEqual(coords[ii], float(ref_coord[ii])) def test_force(self): eV = 1 angstrom = 1 - fp = open('siesta/aimd/force') + fp = open("siesta/aimd/force") ref_force = [] for ii in fp: for jj in ii.split(): ref_force.append(float(jj)) fp.close() - forces = self.system.data['forces'].flatten() + forces = self.system.data["forces"].flatten() for ii in range(len(forces)): self.assertAlmostEqual(forces[ii], float(ref_force[ii])) def test_viriale(self): toViri = 1 vol = 1308.4268 - fp = open('siesta/aimd/virial') + fp = open("siesta/aimd/virial") ref_virial = [] for ii in fp: for jj in ii.split(): ref_virial.append(float(jj)) fp.close() - virials = self.system.data['virials'].flatten() + virials = self.system.data["virials"].flatten() for ii in range(len(virials)): self.assertAlmostEqual(virials[ii], float(ref_virial[ii]) * toViri * vol) def test_energy(self): eV = 1 - fp = open('siesta/aimd/energy') + fp = open("siesta/aimd/energy") ref_energy = [] for ii in fp: for jj in ii.split(): ref_energy.append(float(jj)) fp.close() - energy = self.system.data['energies'] + energy = self.system.data["energies"] for ii in range(len(energy)): self.assertAlmostEqual(energy[ii], ref_energy[ii]) class TestAimdSIESTALabeledOutput(unittest.TestCase, TestSIESTASinglePointEnergy): - def setUp(self): - self.system = dpdata.LabeledSystem('siesta/aimd/output', fmt='siesta/aiMD_output') + self.system = dpdata.LabeledSystem( + "siesta/aimd/output", fmt="siesta/aiMD_output" + ) # self.system.data = dpdata.siesta.output.obtain_frame('siesta/siesta_output') -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_siesta_output.py b/tests/test_siesta_output.py index 27e61351..2539173d 100644 --- a/tests/test_siesta_output.py +++ b/tests/test_siesta_output.py @@ -3,67 +3,70 @@ import unittest from context import dpdata + class TestSIESTASinglePointEnergy: - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['H','C']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [4, 1]) - def test_atom_types(self) : - ref_type = [0,0,0,0,1] + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["H", "C"]) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [4, 1]) + + def test_atom_types(self): + ref_type = [0, 0, 0, 0, 1] ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : + for ii in range(ref_type.shape[0]): # print(self.system.data['atom_types'][0][ii]) - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) - def test_cell(self) : - fp = open('siesta/scf/ref_cell') + def test_cell(self): + fp = open("siesta/scf/ref_cell") cell = [] - for ii in fp : + for ii in fp: for jj in ii.split(): cell.append(float(jj)) cell = np.array(cell) # print(cell) fp.close() - res = self.system.data['cells'][0].flatten() + res = self.system.data["cells"][0].flatten() for ii in range(len(cell)): self.assertAlmostEqual(res[ii], cell[ii]) - def test_coord(self) : - fp = open('siesta/scf/ref_coord') + def test_coord(self): + fp = open("siesta/scf/ref_coord") coord = [] for ii in fp: for jj in ii.split(): coord.append(float(jj)) coord = np.array(coord) fp.close() - res = self.system.data['coords'][0].flatten() - for ii in range(len(coord)) : + res = self.system.data["coords"][0].flatten() + for ii in range(len(coord)): self.assertAlmostEqual(res[ii], float(coord[ii])) - def test_force(self) : + def test_force(self): eV = 1 angstrom = 1 - fp = open('siesta/scf/ref_force') + fp = open("siesta/scf/ref_force") force = [] for ii in fp: for jj in ii.split(): force.append(float(jj)) force = np.array(force) fp.close() - res = self.system.data['forces'][0].flatten() + res = self.system.data["forces"][0].flatten() for ii in range(len(force)): self.assertAlmostEqual(res[ii], float(force[ii])) - def test_viriale(self) : + def test_viriale(self): toViri = 1 - fp = open('siesta/scf/ref_cell') + fp = open("siesta/scf/ref_cell") cell = [] for ii in fp: for jj in ii.split(): cell.append(float(jj)) cell = np.array(cell) - cells = cell.reshape(3,3) + cells = cell.reshape(3, 3) fp.close() toVol = [] @@ -71,28 +74,29 @@ def test_viriale(self) : ### calucate vol toVol.append(np.linalg.det(cells)) - fp = open('siesta/scf/ref_virial') + fp = open("siesta/scf/ref_virial") virial = [] for ii in fp: for jj in ii.split(): virial.append(float(jj) * toViri * toVol[0]) virial = np.array(virial) fp.close() - res = self.system.data['virials'][0].flatten() + res = self.system.data["virials"][0].flatten() for ii in range(len(virial)): self.assertAlmostEqual(res[ii], float(virial[ii])) - def test_energy(self) : + def test_energy(self): eV = 1 ref_energy = -219.1640 - self.assertAlmostEqual(self.system.data['energies'][0], ref_energy*eV) + self.assertAlmostEqual(self.system.data["energies"][0], ref_energy * eV) class TestSIESTALabeledOutput(unittest.TestCase, TestSIESTASinglePointEnergy): - def setUp(self): - self.system = dpdata.LabeledSystem('siesta/scf/siesta_output', fmt = 'siesta/output') + self.system = dpdata.LabeledSystem( + "siesta/scf/siesta_output", fmt="siesta/output" + ) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_sqm_driver.py b/tests/test_sqm_driver.py index 3755fac9..b06ab26c 100644 --- a/tests/test_sqm_driver.py +++ b/tests/test_sqm_driver.py @@ -9,23 +9,26 @@ @unittest.skipIf(shutil.which("sqm") is None, "sqm is not installed") class TestSQMdriver(unittest.TestCase, CompSys, IsNoPBC): """Test sqm with a hydrogen ion.""" + @classmethod def setUpClass(cls): - cls.system_1 = dpdata.System(data={ - "atom_names": ["H"], - "atom_numbs": [1], - "atom_types": np.zeros((1,), dtype=int), - "coords": np.zeros((1, 1, 3), dtype=np.float32), - "cells": np.zeros((1, 3, 3), dtype=np.float32), - "orig": np.zeros(3, dtype=np.float32), - "nopbc": True, - }) + cls.system_1 = dpdata.System( + data={ + "atom_names": ["H"], + "atom_numbs": [1], + "atom_types": np.zeros((1,), dtype=int), + "coords": np.zeros((1, 1, 3), dtype=np.float32), + "cells": np.zeros((1, 3, 3), dtype=np.float32), + "orig": np.zeros(3, dtype=np.float32), + "nopbc": True, + } + ) cls.system_2 = cls.system_1.predict(theory="DFTB3", charge=1, driver="sqm") cls.places = 6 - + def test_energy(self): - self.assertAlmostEqual(self.system_2['energies'].ravel()[0], 6.549447) - + self.assertAlmostEqual(self.system_2["energies"].ravel()[0], 6.549447) + def test_forces(self): - forces = self.system_2['forces'] + forces = self.system_2["forces"] np.testing.assert_allclose(forces, np.zeros_like(forces)) diff --git a/tests/test_stat.py b/tests/test_stat.py index 1fcc5af9..62d045f4 100644 --- a/tests/test_stat.py +++ b/tests/test_stat.py @@ -5,7 +5,9 @@ class TestStat(unittest.TestCase): def test_errors(self): - system1 = dpdata.LabeledSystem("gaussian/methane.gaussianlog", fmt="gaussian/log") + system1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) system2 = dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out") e = dpdata.stat.Errors(system1, system2) @@ -15,8 +17,12 @@ def test_errors(self): self.assertAlmostEqual(e.f_rmse, 0.005714011247538185, 6) def test_multi_errors(self): - system1 = dpdata.MultiSystems(dpdata.LabeledSystem("gaussian/methane.gaussianlog", fmt="gaussian/log")) - system2 = dpdata.MultiSystems(dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out")) + system1 = dpdata.MultiSystems( + dpdata.LabeledSystem("gaussian/methane.gaussianlog", fmt="gaussian/log") + ) + system2 = dpdata.MultiSystems( + dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out") + ) e = dpdata.stat.MultiErrors(system1, system2) self.assertAlmostEqual(e.e_mae, 1014.7946598792427, 6) diff --git a/tests/test_system_append.py b/tests/test_system_append.py index a166398b..69883247 100644 --- a/tests/test_system_append.py +++ b/tests/test_system_append.py @@ -9,15 +9,17 @@ class TestFailedAppend(unittest.TestCase): def test_failed_append(self): - sys1 = dpdata.System('poscars/POSCAR.h2o.md', fmt='vasp/poscar') - sys2 = dpdata.System('poscars/POSCAR.h4o3', fmt='vasp/poscar') + sys1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + sys2 = dpdata.System("poscars/POSCAR.h4o3", fmt="vasp/poscar") with self.assertRaises(Exception) as c: sys1.append(sys2) - self.assertTrue("systems with inconsistent formula could not be append" in str(c.exception)) + self.assertTrue( + "systems with inconsistent formula could not be append" in str(c.exception) + ) class TestVaspXmlAppend(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 6 # rotated vasp computation, subject to numerical error self.e_places = 6 @@ -26,26 +28,33 @@ def setUp (self) : begin = 2 end = 10 step = 3 - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml') - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml') + self.system_1 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.10.xml") + self.system_2 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.10.xml") self.system_1.append(self.system_2) - + self.system_1 = self.system_1.sub_system([0, 12, 4, 16, 8]) - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml').sub_system(np.arange(0,10,2)) + self.system_2 = dpdata.LabeledSystem( + "poscars/vasprun.h2o.md.10.xml" + ).sub_system(np.arange(0, 10, 2)) class TestDifferentOrderAppend(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp (self) : + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - self.system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) self.system_1.append(system_2) - + self.system_2 = self.system_1.sub_system([0, 0]) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_system_apply_pbc.py b/tests/test_system_apply_pbc.py index 12314195..96b06989 100644 --- a/tests/test_system_apply_pbc.py +++ b/tests/test_system_apply_pbc.py @@ -3,28 +3,31 @@ import unittest from context import dpdata -class TestPBC(unittest.TestCase) : - def test_pbc(self) : + +class TestPBC(unittest.TestCase): + def test_pbc(self): nframes = 10 natoms = 20 data = {} - data['coords'] = np.random.random([nframes, natoms, 3]) + [5, 5, 5] - data['cells'] = np.tile(10 * np.eye(3), [nframes, 1, 1]) - data['cells'] += np.random.random([nframes, 3, 3]) + data["coords"] = np.random.random([nframes, natoms, 3]) + [5, 5, 5] + data["cells"] = np.tile(10 * np.eye(3), [nframes, 1, 1]) + data["cells"] += np.random.random([nframes, 3, 3]) shift = 20 * (np.random.random([nframes, natoms, 3]) - 0.5) shift = shift.astype(int) - bk_coord = np.copy(data['coords']) - data['coords'] += np.matmul(shift, data['cells']) + bk_coord = np.copy(data["coords"]) + data["coords"] += np.matmul(shift, data["cells"]) sys = dpdata.System() sys.data = data sys.apply_pbc() - for ii in range(nframes) : - for jj in range(natoms) : - for dd in range(3) : - self.assertAlmostEqual(sys['coords'][ii][jj][dd], - bk_coord[ii][jj][dd], - msg = 'coord[%d][%d][%d] failed' % (ii,jj,dd)) - -if __name__ == '__main__': - unittest.main() + for ii in range(nframes): + for jj in range(natoms): + for dd in range(3): + self.assertAlmostEqual( + sys["coords"][ii][jj][dd], + bk_coord[ii][jj][dd], + msg="coord[%d][%d][%d] failed" % (ii, jj, dd), + ) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_system_set_type.py b/tests/test_system_set_type.py index b8004a35..27e0da97 100644 --- a/tests/test_system_set_type.py +++ b/tests/test_system_set_type.py @@ -7,35 +7,31 @@ class TestSetAtomTypes(unittest.TestCase): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.10.xml") self.type_1 = self.system_1.get_atom_types() - self.system_types = np.array([0,0,1,1,1,1]) - self.type_2 = self.system_1.map_atom_types(["H","C","O"]) - self.type_3 = self.system_1.map_atom_types({"H":2,"C":1,"O":3}) + self.system_types = np.array([0, 0, 1, 1, 1, 1]) + self.type_2 = self.system_1.map_atom_types(["H", "C", "O"]) + self.type_3 = self.system_1.map_atom_types({"H": 2, "C": 1, "O": 3}) - def test_types_func_1(self): - atom_types=np.array([2,2,0,0,0,0]) - atom_types_system_2=self.type_2 - atom_types_system_1=self.type_1 - for d0 in range(3) : - self.assertEqual(atom_types[d0], - atom_types_system_2[d0]) - for d0 in range(3) : - self.assertEqual(self.system_types[d0], - atom_types_system_1[d0]) + atom_types = np.array([2, 2, 0, 0, 0, 0]) + atom_types_system_2 = self.type_2 + atom_types_system_1 = self.type_1 + for d0 in range(3): + self.assertEqual(atom_types[d0], atom_types_system_2[d0]) + for d0 in range(3): + self.assertEqual(self.system_types[d0], atom_types_system_1[d0]) def test_types_func_2(self): - atom_types=np.array([3,3,2,2,2,2]) - atom_types_system_3=self.type_3 - atom_types_system_1=self.type_1 - for d0 in range(3) : - self.assertEqual(atom_types[d0], - atom_types_system_3[d0]) - for d0 in range(3) : - self.assertEqual(self.system_types[d0], - atom_types_system_1[d0]) + atom_types = np.array([3, 3, 2, 2, 2, 2]) + atom_types_system_3 = self.type_3 + atom_types_system_1 = self.type_1 + for d0 in range(3): + self.assertEqual(atom_types[d0], atom_types_system_3[d0]) + for d0 in range(3): + self.assertEqual(self.system_types[d0], atom_types_system_1[d0]) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_to_ase.py b/tests/test_to_ase.py index d1c42b8b..187d8907 100644 --- a/tests/test_to_ase.py +++ b/tests/test_to_ase.py @@ -3,24 +3,26 @@ import unittest from context import dpdata from comp_sys import CompSys, IsPBC + try: from ase import Atoms from ase.io import write except ModuleNotFoundError: - exist_module=False + exist_module = False else: - exist_module=True + exist_module = True -@unittest.skipIf(not exist_module,"skip test_ase") +@unittest.skipIf(not exist_module, "skip test_ase") class TestASE(unittest.TestCase, CompSys, IsPBC): - - def setUp(self): + def setUp(self): system_1 = dpdata.System() - system_1.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - write('tmp.POSCAR',system_1.to_ase_structure()[0],vasp5=True) - self.system_1=system_1 - self.system_2=dpdata.System('tmp.POSCAR') + system_1.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + write("tmp.POSCAR", system_1.to_ase_structure()[0], vasp5=True) + self.system_1 = system_1 + self.system_2 = dpdata.System("tmp.POSCAR") self.places = 6 self.e_places = 6 self.f_places = 6 @@ -30,9 +32,12 @@ def setUp(self): @unittest.skipIf(not exist_module, "skip test_ase") class TestFromASE(unittest.TestCase, CompSys, IsPBC): """Test ASEStructureFormat.from_system""" - def setUp(self): + + def setUp(self): system_1 = dpdata.System() - system_1.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) + system_1.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) atoms = system_1.to_ase_structure()[0] self.system_1 = system_1 self.system_2 = dpdata.System(atoms, fmt="ase/structure") @@ -44,6 +49,5 @@ def setUp(self): self.v_places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_to_list.py b/tests/test_to_list.py index e4c83cff..4aa90885 100644 --- a/tests/test_to_list.py +++ b/tests/test_to_list.py @@ -4,10 +4,10 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestToList(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - system = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') + def setUp(self): + system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") self.system_1 = system.sub_system([2]) self.system_2 = system.to_list()[2] self.places = 6 @@ -16,5 +16,5 @@ def setUp (self) : self.v_places = 4 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_to_pymatgen.py b/tests/test_to_pymatgen.py index 0077a049..abe25150 100644 --- a/tests/test_to_pymatgen.py +++ b/tests/test_to_pymatgen.py @@ -3,26 +3,30 @@ import unittest from context import dpdata from comp_sys import CompSys, IsPBC + try: - from pymatgen import Structure - exist_module=True + from pymatgen import Structure + + exist_module = True except Exception: - exist_module=False + exist_module = False -@unittest.skipIf(not exist_module,"skip pymatgen") + +@unittest.skipIf(not exist_module, "skip pymatgen") class TestPymatgen(unittest.TestCase, CompSys, IsPBC): - - def setUp(self): + def setUp(self): system_1 = dpdata.System() - system_1.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - system_1.to_pymatgen_structure()[0].to('poscar','tmp.POSCAR') - self.system_1=system_1 - self.system_2=dpdata.System('tmp.POSCAR') + system_1.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + system_1.to_pymatgen_structure()[0].to("poscar", "tmp.POSCAR") + self.system_1 = system_1 + self.system_2 = dpdata.System("tmp.POSCAR") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_to_pymatgen_entry.py b/tests/test_to_pymatgen_entry.py index e0a952be..1cad9c17 100644 --- a/tests/test_to_pymatgen_entry.py +++ b/tests/test_to_pymatgen_entry.py @@ -3,27 +3,32 @@ import unittest from context import dpdata from comp_sys import CompSys, IsPBC -from monty.serialization import loadfn +from monty.serialization import loadfn + try: - from pymatgen.entries.computed_entries import ComputedStructureEntry - exist_module=True + from pymatgen.entries.computed_entries import ComputedStructureEntry + + exist_module = True except Exception: - exist_module=False + exist_module = False -@unittest.skipIf(not exist_module,"skip pymatgen") + +@unittest.skipIf(not exist_module, "skip pymatgen") class TestPymatgen(unittest.TestCase): - - def test(self): - ls1= dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.ch4.1step'),fmt='OUTCAR') - entry1=ls1.to_pymatgen_ComputedStructureEntry() - self.assertEqual(entry1,[]) - ls2= dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.h2o.md.10'),fmt='OUTCAR') - entry2=ls2.to_pymatgen_ComputedStructureEntry() - self.assertEqual(len(entry2),10) - last_entry=loadfn("computed_structure_entry.json") - self.assertEqual(last_entry.as_dict(),entry2[-1].as_dict()) - + def test(self): + ls1 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.ch4.1step"), fmt="OUTCAR" + ) + entry1 = ls1.to_pymatgen_ComputedStructureEntry() + self.assertEqual(entry1, []) + ls2 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.h2o.md.10"), fmt="OUTCAR" + ) + entry2 = ls2.to_pymatgen_ComputedStructureEntry() + self.assertEqual(len(entry2), 10) + last_entry = loadfn("computed_structure_entry.json") + self.assertEqual(last_entry.as_dict(), entry2[-1].as_dict()) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_type_map.py b/tests/test_type_map.py index 1e1e7b3c..4d98aa7a 100644 --- a/tests/test_type_map.py +++ b/tests/test_type_map.py @@ -3,37 +3,37 @@ from itertools import permutations -class TestTypeMap(): +class TestTypeMap: def test_check_type_map(self): # read atom names system = dpdata.LabeledSystem(self.fn, fmt=self.fmt) - atom_names = system['atom_names'] + atom_names = system["atom_names"] for type_map in permutations(atom_names, len(atom_names)): type_map = list(type_map) system.check_type_map(type_map=type_map) - self.assertEqual(type_map, system['atom_names']) + self.assertEqual(type_map, system["atom_names"]) def test_type_map_is_superset(self): system = dpdata.LabeledSystem(self.fn, fmt=self.fmt) - atom_names = system['atom_names'] + ["X"] + atom_names = system["atom_names"] + ["X"] for type_map in permutations(atom_names, len(atom_names)): type_map = list(type_map) system = dpdata.LabeledSystem(self.fn, fmt=self.fmt) system.check_type_map(type_map=type_map) - self.assertEqual(type_map, system['atom_names']) + self.assertEqual(type_map, system["atom_names"]) class TestTypeMap1(TestTypeMap, unittest.TestCase): def setUp(self): - self.fn = 'gaussian/methane.gaussianlog' - self.fmt = 'gaussian/log' + self.fn = "gaussian/methane.gaussianlog" + self.fmt = "gaussian/log" class TestTypeMap2(TestTypeMap, unittest.TestCase): def setUp(self): - self.fn = 'cp2k/cp2k_normal_output/cp2k_output' - self.fmt = 'cp2k/output' + self.fn = "cp2k/cp2k_normal_output/cp2k_output" + self.fmt = "cp2k/output" -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_vasp_outcar.py b/tests/test_vasp_outcar.py index 23c40d5c..2fcaa0a1 100644 --- a/tests/test_vasp_outcar.py +++ b/tests/test_vasp_outcar.py @@ -5,24 +5,30 @@ from comp_sys import CompLabeledSys, IsPBC from dpdata.utils import uniq_atom_names + class TestVaspOUTCAR(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.system_1 = dpdata.LabeledSystem() - self.system_1.from_vasp_xml('poscars/vasprun.h2o.md.xml') + self.system_1.from_vasp_xml("poscars/vasprun.h2o.md.xml") self.system_2 = dpdata.LabeledSystem() - self.system_2.from_vasp_outcar('poscars/OUTCAR.h2o.md') + self.system_2.from_vasp_outcar("poscars/OUTCAR.h2o.md") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestVaspOUTCARTypeMap(unittest.TestCase, CompLabeledSys, IsPBC): def setUp(self): - sys0 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.unconverged', fmt = 'vasp/outcar') - sys0.data['atom_names'] = ['A', 'C', 'B', 'H', 'D'] - sys0.data['atom_numbs'] = [ 0, 1, 0, 4, 0] - sys0.data['atom_types'] = np.array([ 3, 3, 3, 3, 1], dtype = int) - sys1 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.unconverged', fmt = 'vasp/outcar', type_map = ['A', 'C', 'B', 'H', 'D']) + sys0 = dpdata.LabeledSystem("poscars/OUTCAR.ch4.unconverged", fmt="vasp/outcar") + sys0.data["atom_names"] = ["A", "C", "B", "H", "D"] + sys0.data["atom_numbs"] = [0, 1, 0, 4, 0] + sys0.data["atom_types"] = np.array([3, 3, 3, 3, 1], dtype=int) + sys1 = dpdata.LabeledSystem( + "poscars/OUTCAR.ch4.unconverged", + fmt="vasp/outcar", + type_map=["A", "C", "B", "H", "D"], + ) self.system_1 = sys0 self.system_2 = sys1 self.places = 6 @@ -30,13 +36,18 @@ def setUp(self): self.f_places = 6 self.v_places = 6 + class TestVaspOUTCARSkip(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): begin = 1 step = 3 end = 10 - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md.10', fmt = 'vasp/outcar', begin = begin, step = step) - self.system_2 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md.10', fmt = 'vasp/outcar').sub_system(np.arange(begin, end, step)) + self.system_1 = dpdata.LabeledSystem( + "poscars/OUTCAR.h2o.md.10", fmt="vasp/outcar", begin=begin, step=step + ) + self.system_2 = dpdata.LabeledSystem( + "poscars/OUTCAR.h2o.md.10", fmt="vasp/outcar" + ).sub_system(np.arange(begin, end, step)) self.places = 6 self.e_places = 6 self.f_places = 6 @@ -44,10 +55,10 @@ def setUp (self) : class TestVaspOUTCARVdw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.Ge.vdw', fmt = 'vasp/outcar') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.Ge.vdw", fmt="vasp/outcar") self.system_2 = dpdata.LabeledSystem() - self.system_2.from_vasp_xml('poscars/vasprun.Ge.vdw.xml') + self.system_2.from_vasp_xml("poscars/vasprun.Ge.vdw.xml") self.places = 5 self.e_places = 6 self.f_places = 6 @@ -56,46 +67,50 @@ def setUp (self) : class TestDuplicatedAtomNames(unittest.TestCase): def test(self): - system = dpdata.LabeledSystem('poscars/6362_OUTCAR', fmt = 'vasp/outcar') + system = dpdata.LabeledSystem("poscars/6362_OUTCAR", fmt="vasp/outcar") expected_types = [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1] - self.assertEqual(list(system['atom_types']), expected_types) - self.assertEqual(system['atom_names'], ['B', 'O']) - self.assertEqual(system['atom_numbs'], [8, 6]) + self.assertEqual(list(system["atom_types"]), expected_types) + self.assertEqual(system["atom_names"], ["B", "O"]) + self.assertEqual(system["atom_numbs"], [8, 6]) def test_type_map(self): - system = dpdata.LabeledSystem('poscars/6362_OUTCAR', fmt = 'vasp/outcar', type_map = ['O', 'B']) + system = dpdata.LabeledSystem( + "poscars/6362_OUTCAR", fmt="vasp/outcar", type_map=["O", "B"] + ) expected_types = [1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0] - self.assertEqual(list(system['atom_types']), expected_types) - self.assertEqual(system['atom_names'], ['O', 'B']) - self.assertEqual(system['atom_numbs'], [6, 8]) + self.assertEqual(list(system["atom_types"]), expected_types) + self.assertEqual(system["atom_names"], ["O", "B"]) + self.assertEqual(system["atom_numbs"], [6, 8]) class TestUniqAtomNames(unittest.TestCase): def test(self): data = {} - data['atom_names'] = ['O', 'H', 'O', 'H'] - data['atom_types'] = np.array([0, 1, 2, 3, 3, 2, 1], dtype=int) - + data["atom_names"] = ["O", "H", "O", "H"] + data["atom_types"] = np.array([0, 1, 2, 3, 3, 2, 1], dtype=int) + data = uniq_atom_names(data) - self.assertEqual(list(data['atom_types']), - [0, 1, 0, 1, 1, 0, 1]) - self.assertEqual(list(data['atom_names']), - ['O', 'H']) - self.assertEqual(list(data['atom_numbs']), - [3, 4]) + self.assertEqual(list(data["atom_types"]), [0, 1, 0, 1, 1, 0, 1]) + self.assertEqual(list(data["atom_names"]), ["O", "H"]) + self.assertEqual(list(data["atom_numbs"]), [3, 4]) + class TestVaspOUTCARML(unittest.TestCase): def test(self): - system1 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.ml', fmt = 'vasp/outcar',ml=True) - system2 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.ml', fmt = 'vasp/outcar',ml=False) + system1 = dpdata.LabeledSystem( + "poscars/OUTCAR.ch4.ml", fmt="vasp/outcar", ml=True + ) + system2 = dpdata.LabeledSystem( + "poscars/OUTCAR.ch4.ml", fmt="vasp/outcar", ml=False + ) expected_types = [0, 0, 0, 0, 1] - self.assertEqual(list(system1['atom_types']), expected_types) - self.assertEqual(system1['atom_names'], ['H', 'C']) - self.assertEqual(len(system1['energies']), 10) - self.assertEqual(list(system2['atom_types']), expected_types) - self.assertEqual(system2['atom_names'], ['H', 'C']) - self.assertEqual(len(system2['energies']), 4) + self.assertEqual(list(system1["atom_types"]), expected_types) + self.assertEqual(system1["atom_names"], ["H", "C"]) + self.assertEqual(len(system1["energies"]), 10) + self.assertEqual(list(system2["atom_types"]), expected_types) + self.assertEqual(system2["atom_names"], ["H", "C"]) + self.assertEqual(len(system2["energies"]), 4) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_vasp_poscar_dump.py b/tests/test_vasp_poscar_dump.py index 0e42e49d..f83e208c 100644 --- a/tests/test_vasp_poscar_dump.py +++ b/tests/test_vasp_poscar_dump.py @@ -2,48 +2,59 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + def myfilecmp(test, f0, f1): - with open(f0) as fp0 : + with open(f0) as fp0: with open(f1) as fp1: test.assertTrue(fp0.read() == fp1.read()) + class TestPOSCARDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): tmp_system = dpdata.System() # tmp_system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.d')) - tmp_system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_vasp_poscar('tmp.POSCAR') + tmp_system.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to_vasp_poscar("tmp.POSCAR") self.system = dpdata.System() - self.system.from_vasp_poscar('tmp.POSCAR') + self.system.from_vasp_poscar("tmp.POSCAR") + class TestPOSCARDump1(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): tmp_system = dpdata.System() - tmp_system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.d')) + tmp_system.from_vasp_poscar(os.path.join("poscars", "POSCAR.oh.d")) # tmp_system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_vasp_poscar('tmp.POSCAR') + tmp_system.to_vasp_poscar("tmp.POSCAR") self.system = dpdata.System() - self.system.from_vasp_poscar('tmp.POSCAR') + self.system.from_vasp_poscar("tmp.POSCAR") + -class TestPOSCARSkipZeroAtomNumb(unittest.TestCase) : +class TestPOSCARSkipZeroAtomNumb(unittest.TestCase): def tearDown(self): - if os.path.isfile('POSCAR.tmp.1'): - os.remove('POSCAR.tmp.1') - if os.path.isfile('POSCAR.tmp.2'): - os.remove('POSCAR.tmp.2') + if os.path.isfile("POSCAR.tmp.1"): + os.remove("POSCAR.tmp.1") + if os.path.isfile("POSCAR.tmp.2"): + os.remove("POSCAR.tmp.2") def test_dump_vasp_type_map(self): - system0 = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d'), fmt = 'vasp/poscar', type_map = ['H', 'O']) - system0.to_vasp_poscar('POSCAR.tmp.1') - system1 = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d'), fmt = 'vasp/poscar', type_map = ['C', 'H', 'A', 'O', 'B']) - system1.to_vasp_poscar('POSCAR.tmp.2') - myfilecmp(self, 'POSCAR.tmp.1', 'POSCAR.tmp.2') + system0 = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d"), + fmt="vasp/poscar", + type_map=["H", "O"], + ) + system0.to_vasp_poscar("POSCAR.tmp.1") + system1 = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d"), + fmt="vasp/poscar", + type_map=["C", "H", "A", "O", "B"], + ) + system1.to_vasp_poscar("POSCAR.tmp.2") + myfilecmp(self, "POSCAR.tmp.1", "POSCAR.tmp.2") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_vasp_poscar_to_system.py b/tests/test_vasp_poscar_to_system.py index 760da63a..7eea3757 100644 --- a/tests/test_vasp_poscar_to_system.py +++ b/tests/test_vasp_poscar_to_system.py @@ -5,38 +5,48 @@ from comp_sys import CompSys, IsPBC from poscars.poscar_ref_oh import TestPOSCARoh + class TestPOSCARCart(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.c')) + self.system.from_vasp_poscar(os.path.join("poscars", "POSCAR.oh.c")) + class TestPOSCARDirect(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.d')) + self.system.from_vasp_poscar(os.path.join("poscars", "POSCAR.oh.d")) + + +class TestPOSCARDirectDuplicated(unittest.TestCase): + def test(self): + ss = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d.dup"), fmt="vasp/poscar" + ) + self.assertEqual(ss["atom_names"], ["O", "H"]) + self.assertEqual(ss["atom_numbs"], [2, 1]) + self.assertEqual(list(ss["atom_types"]), [0, 1, 0]) -class TestPOSCARDirectDuplicated(unittest.TestCase): - def test(self): - ss = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d.dup'), fmt='vasp/poscar') - self.assertEqual(ss['atom_names'], ['O', 'H']) - self.assertEqual(ss['atom_numbs'], [2, 1]) - self.assertEqual(list(ss['atom_types']), [0, 1, 0]) + def test_type_map(self): + ss = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d.dup"), + fmt="vasp/poscar", + type_map=["H", "O"], + ) + self.assertEqual(ss["atom_names"], ["H", "O"]) + self.assertEqual(ss["atom_numbs"], [1, 2]) + self.assertEqual(list(ss["atom_types"]), [1, 0, 1]) - def test_type_map(self): - ss = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d.dup'), fmt='vasp/poscar', type_map=['H', 'O']) - self.assertEqual(ss['atom_names'], ['H', 'O']) - self.assertEqual(ss['atom_numbs'], [1, 2]) - self.assertEqual(list(ss['atom_types']), [1, 0, 1]) class TestVaspPOSCARTypeMap(unittest.TestCase, CompSys, IsPBC): def setUp(self): - sys0 = dpdata.System('poscars/POSCAR.oh.d', fmt = 'vasp/poscar') - sys0.data['atom_names'] = ['A', 'H', 'B', 'O', 'D'] - sys0.data['atom_numbs'] = [ 0, 1, 0, 1, 0] - sys0.data['atom_types'] = np.array([ 3, 1], dtype = int) - sys1 = dpdata.System('poscars/POSCAR.oh.d', fmt = 'vasp/poscar', type_map = ['A', 'H', 'B', 'O', 'D']) + sys0 = dpdata.System("poscars/POSCAR.oh.d", fmt="vasp/poscar") + sys0.data["atom_names"] = ["A", "H", "B", "O", "D"] + sys0.data["atom_numbs"] = [0, 1, 0, 1, 0] + sys0.data["atom_types"] = np.array([3, 1], dtype=int) + sys1 = dpdata.System( + "poscars/POSCAR.oh.d", fmt="vasp/poscar", type_map=["A", "H", "B", "O", "D"] + ) self.system_1 = sys0 self.system_2 = sys1 self.places = 6 @@ -45,5 +55,5 @@ def setUp(self): self.v_places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_vasp_unconverged_outcar.py b/tests/test_vasp_unconverged_outcar.py index 90bb05e8..579edf09 100644 --- a/tests/test_vasp_unconverged_outcar.py +++ b/tests/test_vasp_unconverged_outcar.py @@ -5,23 +5,24 @@ class TestSingleStep(unittest.TestCase): - def setUp(self): - self.LabeledSystem1 = dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.ch4.unconverged'),\ - fmt='outcar' ) + self.LabeledSystem1 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.ch4.unconverged"), fmt="outcar" + ) - self.LabeledSystem2 = dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.ch4.1step'),\ - fmt='outcar' ) + self.LabeledSystem2 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.ch4.1step"), fmt="outcar" + ) - def test_unconverged(self) : + def test_unconverged(self): - self.assertEqual(self.LabeledSystem1['energies'], -23.94708651) + self.assertEqual(self.LabeledSystem1["energies"], -23.94708651) self.assertEqual(self.LabeledSystem1.get_nframes(), 1) self.assertEqual(self.LabeledSystem1.get_natoms(), 5) - def test_single_step(self) : - self.assertEqual(self.LabeledSystem2.get_nframes(), 0) + def test_single_step(self): + self.assertEqual(self.LabeledSystem2.get_nframes(), 0) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_vasp_xml.py b/tests/test_vasp_xml.py index ed7ac6e8..a3cd90d0 100644 --- a/tests/test_vasp_xml.py +++ b/tests/test_vasp_xml.py @@ -6,32 +6,33 @@ from comp_sys import CompLabeledSys from comp_sys import IsPBC + class TestVaspXml(unittest.TestCase, CompSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 6 xml_sys = dpdata.LabeledSystem() - xml_sys.from_vasp_xml('poscars/vasprun.h2o.md.xml') + xml_sys.from_vasp_xml("poscars/vasprun.h2o.md.xml") # init_sys = dpdata.System() # init_sys.from_vasp_poscar('poscars/POSCAR.h2o.md') finl_sys = dpdata.System() - finl_sys.from_vasp_poscar('poscars/CONTCAR.h2o.md') + finl_sys.from_vasp_poscar("poscars/CONTCAR.h2o.md") self.system_1 = finl_sys self.system_2 = xml_sys.sub_system([-1]) class TestVaspXmlRotSys(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 4 # rotated vasp computation, subject to numerical error self.e_places = 3 self.f_places = 2 self.v_places = 1 - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.tribox.xml') - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.tribox.lower.xml') + self.system_1 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.tribox.xml") + self.system_2 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.tribox.lower.xml") class TestVaspXmlSkip(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 6 # rotated vasp computation, subject to numerical error self.e_places = 6 @@ -40,9 +41,13 @@ def setUp (self) : begin = 2 end = 10 step = 3 - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml', begin = begin, step = step) - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml').sub_system(np.arange(2,10,3)) + self.system_1 = dpdata.LabeledSystem( + "poscars/vasprun.h2o.md.10.xml", begin=begin, step=step + ) + self.system_2 = dpdata.LabeledSystem( + "poscars/vasprun.h2o.md.10.xml" + ).sub_system(np.arange(2, 10, 3)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_water_ions.py b/tests/test_water_ions.py index f2fab203..98b3838b 100644 --- a/tests/test_water_ions.py +++ b/tests/test_water_ions.py @@ -2,26 +2,32 @@ import numpy as np import unittest from context import dpdata + try: import ase - import ase.neighborlist - exist_ase=True + import ase.neighborlist + + exist_ase = True except Exception: - exist_ase=False + exist_ase = False + class TestIons(unittest.TestCase): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_lammps_lmp(os.path.join('poscars', 'conf.waterion.lmp'), - type_map = ['O', 'H']) - self.bonds = dpdata.md.water.compute_bonds(self.system.data['cells'][0], - self.system.data['coords'][0], - self.system.data['atom_types']) - - def test_ions_count(self) : - no, noh, noh2, noh3, nh \ - = dpdata.md.water.find_ions(self.system.data['atom_types'], self.bonds) + self.system.from_lammps_lmp( + os.path.join("poscars", "conf.waterion.lmp"), type_map=["O", "H"] + ) + self.bonds = dpdata.md.water.compute_bonds( + self.system.data["cells"][0], + self.system.data["coords"][0], + self.system.data["atom_types"], + ) + + def test_ions_count(self): + no, noh, noh2, noh3, nh = dpdata.md.water.find_ions( + self.system.data["atom_types"], self.bonds + ) self.assertEqual(len(no), 0) self.assertEqual(len(noh), 1) self.assertEqual(len(noh2), 125) @@ -35,14 +41,19 @@ def test_ions_count(self) : class TestAseComputeBond(unittest.TestCase): def setUp(self): self.system = dpdata.System() - self.system.from_lammps_lmp(os.path.join('poscars', 'conf.waterion.lmp'), - type_map = ['O', 'H']) - self.bonds = dpdata.md.water.compute_bonds_naive(self.system.data['cells'][0], - self.system.data['coords'][0], - self.system.data['atom_types']) - self.bonds_ase = dpdata.md.water.compute_bonds_ase(self.system.data['cells'][0], - self.system.data['coords'][0], - self.system.data['atom_types']) + self.system.from_lammps_lmp( + os.path.join("poscars", "conf.waterion.lmp"), type_map=["O", "H"] + ) + self.bonds = dpdata.md.water.compute_bonds_naive( + self.system.data["cells"][0], + self.system.data["coords"][0], + self.system.data["atom_types"], + ) + self.bonds_ase = dpdata.md.water.compute_bonds_ase( + self.system.data["cells"][0], + self.system.data["coords"][0], + self.system.data["atom_types"], + ) def test_bond_identity(self): self.assertTrue(len(self.bonds), len(self.bonds_ase)) @@ -50,7 +61,5 @@ def test_bond_identity(self): self.assertTrue(set(self.bonds[ii]) == set(self.bonds_ase[ii])) - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_xyz.py b/tests/test_xyz.py index 7d9649f9..8f0779b3 100644 --- a/tests/test_xyz.py +++ b/tests/test_xyz.py @@ -4,17 +4,20 @@ from context import dpdata from comp_sys import CompSys, IsNoPBC + class TestToXYZ(unittest.TestCase): def test_to_xyz(self): - with tempfile.NamedTemporaryFile('r') as f_xyz: - dpdata.System(data={ - "atom_names": ["C", "O"], - "atom_numbs": [1, 1], - "atom_types": np.array([0, 1]), - "coords": np.arange(6).reshape((1,2,3)), - "cells": np.zeros((1,3,3)), - "orig": np.zeros(3), - }).to("xyz", f_xyz.name) + with tempfile.NamedTemporaryFile("r") as f_xyz: + dpdata.System( + data={ + "atom_names": ["C", "O"], + "atom_numbs": [1, 1], + "atom_types": np.array([0, 1]), + "coords": np.arange(6).reshape((1, 2, 3)), + "cells": np.zeros((1, 3, 3)), + "orig": np.zeros(3), + } + ).to("xyz", f_xyz.name) xyz0 = f_xyz.read().strip() xyz1 = "2\n\nC 0.000000 1.000000 2.000000\nO 3.000000 4.000000 5.000000" self.assertEqual(xyz0, xyz1) @@ -24,15 +27,17 @@ class TestFromXYZ(unittest.TestCase, CompSys, IsNoPBC): def setUp(self): self.places = 6 # considering to_xyz has been tested.. - self.system_1 = dpdata.System(data={ + self.system_1 = dpdata.System( + data={ "atom_names": ["C", "O"], "atom_numbs": [1, 1], "atom_types": np.array([0, 1]), - "coords": np.arange(6).reshape((1,2,3)), - "cells": np.zeros((1,3,3)), + "coords": np.arange(6).reshape((1, 2, 3)), + "cells": np.zeros((1, 3, 3)), "orig": np.zeros(3), "nopbc": True, - }) - with tempfile.NamedTemporaryFile('r') as f_xyz: + } + ) + with tempfile.NamedTemporaryFile("r") as f_xyz: self.system_1.to("xyz", f_xyz.name) self.system_2 = dpdata.System(f_xyz.name, fmt="xyz") From ecdf566626c4b2d1824b946d1b7ad809cb8946dd Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 26 Jan 2023 22:41:48 -0500 Subject: [PATCH 11/20] format Python codes in docs (#414) This PR adds a pre-commit hook to use Black to format Python codes in the documentation. --- .pre-commit-config.yaml | 5 +++ README.md | 90 +++++++++++++++++++++++++---------------- 2 files changed, 60 insertions(+), 35 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 83769dfb..9ca30f7a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,5 +21,10 @@ repos: rev: 22.12.0 hooks: - id: black-jupyter +# Python inside docs +- repo: https://github.com/asottile/blacken-docs + rev: 1.13.0 + hooks: + - id: blacken-docs ci: autoupdate_branch: devel diff --git a/README.md b/README.md index 9c9fe53b..281920ba 100644 --- a/README.md +++ b/README.md @@ -34,18 +34,18 @@ The typicall workflow of `dpdata` is ## Load data ```python -d_poscar = dpdata.System('POSCAR', fmt = 'vasp/poscar') +d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar") ``` or let dpdata infer the format (`vasp/poscar`) of the file from the file name extension ```python -d_poscar = dpdata.System('my.POSCAR') +d_poscar = dpdata.System("my.POSCAR") ``` The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data `System` called `d_poscar`. A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`. It is noted that `POSCAR` only contains one frame. If the multiple frames stored in, for example, a `OUTCAR` is wanted, ```python -d_outcar = dpdata.LabeledSystem('OUTCAR') +d_outcar = dpdata.LabeledSystem("OUTCAR") ``` The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any), are loaded by `LabeledSystem`. It is noted that the forces of atoms are always assumed to exist. `LabeledSystem` is a derived class of `System`. @@ -100,51 +100,58 @@ The following commands relating to `Class dpdata.MultiSystems` may be useful. ```python # load data -xyz_multi_systems = dpdata.MultiSystems.from_file(file_name='tests/xyz/xyz_unittest.xyz',fmt='quip/gap/xyz') -vasp_multi_systems = dpdata.MultiSystems.from_dir(dir_name='./mgal_outcar', file_name='OUTCAR', fmt='vasp/outcar') +xyz_multi_systems = dpdata.MultiSystems.from_file( + file_name="tests/xyz/xyz_unittest.xyz", fmt="quip/gap/xyz" +) +vasp_multi_systems = dpdata.MultiSystems.from_dir( + dir_name="./mgal_outcar", file_name="OUTCAR", fmt="vasp/outcar" +) # use wildcard -vasp_multi_systems = dpdata.MultiSystems.from_dir(dir_name='./mgal_outcar', file_name='*OUTCAR', fmt='vasp/outcar') +vasp_multi_systems = dpdata.MultiSystems.from_dir( + dir_name="./mgal_outcar", file_name="*OUTCAR", fmt="vasp/outcar" +) # print the multi_system infomation print(xyz_multi_systems) -print(xyz_multi_systems.systems) # return a dictionaries +print(xyz_multi_systems.systems) # return a dictionaries # print the system infomation -print(xyz_multi_systems.systems['B1C9'].data) +print(xyz_multi_systems.systems["B1C9"].data) # dump a system's data to ./my_work_dir/B1C9_raw folder -xyz_multi_systems.systems['B1C9'].to_deepmd_raw('./my_work_dir/B1C9_raw') +xyz_multi_systems.systems["B1C9"].to_deepmd_raw("./my_work_dir/B1C9_raw") # dump all systems -xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/') +xyz_multi_systems.to_deepmd_raw("./my_deepmd_data/") ``` You may also use the following code to parse muti-system: ```python -from dpdata import LabeledSystem,MultiSystems +from dpdata import LabeledSystem, MultiSystems from glob import glob + """ process multi systems """ -fs=glob('./*/OUTCAR') # remeber to change here !!! -ms=MultiSystems() +fs = glob("./*/OUTCAR") # remeber to change here !!! +ms = MultiSystems() for f in fs: try: - ls=LabeledSystem(f) + ls = LabeledSystem(f) except: print(f) - if len(ls)>0: + if len(ls) > 0: ms.append(ls) -ms.to_deepmd_raw('deepmd') -ms.to_deepmd_npy('deepmd') +ms.to_deepmd_raw("deepmd") +ms.to_deepmd_npy("deepmd") ``` ## Access data These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example ```python -coords = d_outcar['coords'] +coords = d_outcar["coords"] ``` Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system) @@ -163,25 +170,25 @@ Available properties are (nframe: number of frames in the system, natoms: total ## Dump data The data stored in `System` or `LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example: ```python -d_outcar.to('lammps/lmp', 'conf.lmp', frame_idx=0) +d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0) ``` The first frames of `d_outcar` will be dumped to 'conf.lmp' ```python -d_outcar.to('vasp/poscar', 'POSCAR', frame_idx=-1) +d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1) ``` The last frames of `d_outcar` will be dumped to 'POSCAR'. The data stored in `LabeledSystem` can be dumped to deepmd-kit raw format, for example ```python -d_outcar.to('deepmd/raw', 'dpmd_raw') +d_outcar.to("deepmd/raw", "dpmd_raw") ``` Or a simpler command: ```python -dpdata.LabeledSystem('OUTCAR').to('deepmd/raw', 'dpmd_raw') +dpdata.LabeledSystem("OUTCAR").to("deepmd/raw", "dpmd_raw") ``` Frame selection can be implemented by ```python -dpdata.LabeledSystem('OUTCAR').sub_system([0,-1]).to('deepmd/raw', 'dpmd_raw') +dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw") ``` by which only the first and last frames are dumped to `dpmd_raw`. @@ -189,7 +196,13 @@ by which only the first and last frames are dumped to `dpmd_raw`. ## replicate dpdata will create a super cell of the current atom configuration. ```python -dpdata.System('./POSCAR').replicate((1,2,3,) ) +dpdata.System("./POSCAR").replicate( + ( + 1, + 2, + 3, + ) +) ``` tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction. @@ -197,27 +210,34 @@ tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in ## perturb By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). ```python -perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3, +perturbed_system = dpdata.System("./POSCAR").perturb( + pert_num=3, cell_pert_fraction=0.05, atom_pert_distance=0.6, - atom_pert_style='normal') + atom_pert_style="normal", +) print(perturbed_system.data) ``` ## replace By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged. ```python -s=dpdata.System('tests/poscars/POSCAR.P42nmc',fmt='vasp/poscar') -s.replace('Hf', 'Zr', 8) -s.to_vasp_poscar('POSCAR.P42nmc.replace') +s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar") +s.replace("Hf", "Zr", 8) +s.to_vasp_poscar("POSCAR.P42nmc.replace") ``` # BondOrderSystem A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). ```python import dpdata -system_1 = dpdata.BondOrderSystem("tests/bond_order/CH3OH.mol", fmt="mol") # read from .mol file -system_2 = dpdata.BondOrderSystem("tests/bond_order/methane.sdf", fmt="sdf") # read from .sdf file + +system_1 = dpdata.BondOrderSystem( + "tests/bond_order/CH3OH.mol", fmt="mol" +) # read from .mol file +system_2 = dpdata.BondOrderSystem( + "tests/bond_order/methane.sdf", fmt="sdf" +) # read from .sdf file ``` In sdf file, all molecules must be of the same topology (i.e. conformers of the same molecular configuration). `BondOrderSystem` also supports initialize from a `rdkit.Chem.rdchem.Mol` object directly. @@ -244,16 +264,16 @@ According to our test, our sanitization procedure can successfully read 4852 sma import dpdata for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): - syst = dpdata.BondOrderSystem(sdf_file, sanitize_level='high', verbose=False) + syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False) ``` ## Formal Charge Assignment BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As ```python import dpdata -syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt='mol') -print(syst.get_formal_charges()) # return the formal charge on each atom -print(syst.get_charge()) # return the total charge of the system +syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt="mol") +print(syst.get_formal_charges()) # return the formal charge on each atom +print(syst.get_charge()) # return the total charge of the system ``` If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule. From d7829e515c3c2f307f603238e8eae50934dc4280 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sun, 29 Jan 2023 03:27:42 -0500 Subject: [PATCH 12/20] pin parmed version for py37 (#416) The new version of parmed drops Python 3.7. However, it did not set `requires-python`, making pip on Python 3.7 still installs the latest version. --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ee926aae..8c1dac02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,10 @@ dpdata = "dpdata.cli:dpdata_cli" [project.optional-dependencies] ase = ['ase'] -amber = ['parmed'] +amber = [ + 'parmed; python_version >= "3.8"', + 'parmed<4; python_version < "3.8"', +] pymatgen = ['pymatgen'] docs = [ 'sphinx', From 4283c9d050d3c574db27d91e82c9980b6dcdaa57 Mon Sep 17 00:00:00 2001 From: pxlxingliang <91927439+pxlxingliang@users.noreply.github.com> Date: Tue, 31 Jan 2023 08:23:54 +0800 Subject: [PATCH 13/20] refactor(abacus): get energy by keyword "final etot is" in abacus/scf (#417) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- dpdata/abacus/scf.py | 18 +++++++++--------- tests/test_abacus_pw_scf.py | 28 +++++++++++++--------------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index 94f4bf7b..4bbd7091 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -109,16 +109,16 @@ def get_coords(celldm, cell, geometry_inlines, inlines=None): def get_energy(outlines): Etot = None - for line in outlines: - if "!FINAL_ETOT_IS" in line: - Etot = float(line.split()[1]) # in eV - break - if not Etot: - return Etot, False - for line in outlines: - if "convergence has NOT been achieved!" in line: + for line in reversed(outlines): + if "final etot is" in line: + Etot = float(line.split()[-2]) # in eV + return Etot, True + elif "convergence has NOT been achieved!" in line: + return Etot, False + elif "convergence has not been achieved" in line: return Etot, False - return Etot, True + + return Etot, False def get_force(outlines, natoms): diff --git a/tests/test_abacus_pw_scf.py b/tests/test_abacus_pw_scf.py index fc06f227..6ada7736 100644 --- a/tests/test_abacus_pw_scf.py +++ b/tests/test_abacus_pw_scf.py @@ -7,7 +7,19 @@ bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSSinglePointEnergy: +class TestABACUSLabeledOutput(unittest.TestCase): + def setUp(self): + shutil.copy("abacus.scf/INPUT.ok", "abacus.scf/INPUT") + self.system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") + # self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') + self.system_ch4_unlabeled = dpdata.System( + "abacus.scf/STRU.ch4", fmt="abacus/stru" + ) + + def tearDown(self): + if os.path.isfile("abacus.scf/INPUT"): + os.remove("abacus.scf/INPUT") + def test_atom_names(self): self.assertEqual(self.system_ch4.data["atom_names"], ["C", "H"]) # self.assertEqual(self.system_h2o.data['atom_names'], ['O','H']) @@ -112,20 +124,6 @@ def test_energy(self): # self.assertAlmostEqual(self.system_h2o.data['energies'][0], ref_energy) -class TestABACUSLabeledOutput(unittest.TestCase, TestABACUSSinglePointEnergy): - def setUp(self): - shutil.copy("abacus.scf/INPUT.ok", "abacus.scf/INPUT") - self.system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") - # self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') - self.system_ch4_unlabeled = dpdata.System( - "abacus.scf/STRU.ch4", fmt="abacus/stru" - ) - - def tearDown(self): - if os.path.isfile("abacus.scf/INPUT"): - os.remove("abacus.scf/INPUT") - - class TestABACUSLabeledOutputFail(unittest.TestCase): def setUp(self): shutil.copy("abacus.scf/INPUT.fail", "abacus.scf/INPUT") From d9d6d5fff7932cf42488cb75fd556d18906fbfb2 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 30 Jan 2023 19:26:53 -0500 Subject: [PATCH 14/20] docs: add the list of drivers and minimizers (#415) Signed-off-by: Jinzhe Zeng Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/drivers.rst | 8 +++++++ docs/index.rst | 2 ++ docs/make_format.py | 51 +++++++++++++++++++++++++++++++++++++++++++++ docs/minimizers.rst | 8 +++++++ dpdata/driver.py | 22 +++++++++++++++++++ 5 files changed, 91 insertions(+) create mode 100644 docs/drivers.rst create mode 100644 docs/minimizers.rst diff --git a/docs/drivers.rst b/docs/drivers.rst new file mode 100644 index 00000000..19cc0619 --- /dev/null +++ b/docs/drivers.rst @@ -0,0 +1,8 @@ +Supported Drivers +================= + +dpdata supports the following drivers: + +.. csv-table:: Supported Drivers + :file: drivers.csv + :header-rows: 1 diff --git a/docs/index.rst b/docs/index.rst index 6b0c5344..c25592c1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,8 @@ Welcome to dpdata's documentation! Overview cli formats + drivers + minimizers api/api credits diff --git a/docs/make_format.py b/docs/make_format.py index ae8002c1..801e0830 100644 --- a/docs/make_format.py +++ b/docs/make_format.py @@ -1,9 +1,12 @@ import csv +from typing import Any from collections import defaultdict # ensure all plugins are loaded! import dpdata.plugins from dpdata.format import Format +from dpdata.driver import Driver +from dpdata.driver import Minimizer from dpdata.system import get_cls_name @@ -14,6 +17,20 @@ def get_formats() -> dict: return formats +def get_driver() -> dict: + drivers = defaultdict(list) + for kk, ff in Driver.get_drivers().items(): + drivers[ff].append(kk) + return drivers + + +def get_minimizer() -> dict: + minimizers = defaultdict(list) + for kk, ff in Minimizer.get_minimizers().items(): + minimizers[ff].append(kk) + return minimizers + + def detect_overridden(cls: Format, method: str) -> bool: """Check whether a method is override @@ -102,3 +119,37 @@ def check_supported(fmt: Format): ), } ) + + drivers = get_driver() + with open("drivers.csv", "w", newline="") as csvfile: + fieldnames = [ + "Class", + "Alias", + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for kk, vv in drivers.items(): + writer.writerow( + { + "Class": get_cls_link(kk), + "Alias": "\n".join(("``%s``" % vvv for vvv in vv)), + } + ) + + minimizers = get_minimizer() + with open("minimizers.csv", "w", newline="") as csvfile: + fieldnames = [ + "Class", + "Alias", + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for kk, vv in minimizers.items(): + writer.writerow( + { + "Class": get_cls_link(kk), + "Alias": "\n".join(("``%s``" % vvv for vvv in vv)), + } + ) diff --git a/docs/minimizers.rst b/docs/minimizers.rst new file mode 100644 index 00000000..88551081 --- /dev/null +++ b/docs/minimizers.rst @@ -0,0 +1,8 @@ +Supported Minimizers +==================== + +dpdata supports the following minimizers: + +.. csv-table:: Supported Minimizers + :file: minimizers.csv + :header-rows: 1 diff --git a/dpdata/driver.py b/dpdata/driver.py index 0f903947..2d66ae41 100644 --- a/dpdata/driver.py +++ b/dpdata/driver.py @@ -64,6 +64,17 @@ def get_driver(key: str) -> "Driver": except KeyError as e: raise RuntimeError("Unknown driver: " + key) from e + @staticmethod + def get_drivers() -> dict: + """Get all driver plugins. + + Returns + ------- + dict + dict for all driver plugisn + """ + return Driver.__DriverPlugin.plugins + def __init__(self, *args, **kwargs) -> None: """Setup the driver.""" @@ -206,6 +217,17 @@ def get_minimizer(key: str) -> "Minimizer": except KeyError as e: raise RuntimeError("Unknown minimizer: " + key) from e + @staticmethod + def get_minimizers() -> dict: + """Get all minimizer plugins. + + Returns + ------- + dict + dict for all minimizer plugisn + """ + return Minimizer.__MinimizerPlugin.plugins + def __init__(self, *args, **kwargs) -> None: """Setup the minimizer.""" From 4f1a91167b58f9042ae17cae05b609b7fdf5f20c Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 2 Feb 2023 22:51:42 -0500 Subject: [PATCH 15/20] add isort to sort imports (#418) This is an automatic tool to format and sort imports. See https://github.com/PyCQA/isort for details. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 5 ++++ docs/conf.py | 2 +- docs/make_format.py | 5 ++-- dpdata/__init__.py | 8 ++---- dpdata/abacus/md.py | 15 ++++++----- dpdata/abacus/relax.py | 7 +++-- dpdata/abacus/scf.py | 9 ++++--- dpdata/amber/md.py | 7 +++-- dpdata/amber/sqm.py | 1 + dpdata/ase_calculator.py | 5 ++-- dpdata/bond_order_system.py | 10 ++++--- dpdata/cli.py | 2 +- dpdata/cp2k/cell.py | 5 ++-- dpdata/cp2k/output.py | 7 ++--- dpdata/deepmd/comp.py | 6 ++++- dpdata/deepmd/hdf5.py | 2 -- dpdata/deepmd/raw.py | 1 + dpdata/driver.py | 5 ++-- dpdata/fhi_aims/output.py | 3 ++- dpdata/format.py | 2 +- dpdata/gaussian/gjf.py | 5 ++-- dpdata/gaussian/log.py | 3 ++- dpdata/gromacs/gro.py | 2 ++ dpdata/lammps/dump.py | 7 +++-- dpdata/md/msd.py | 1 + dpdata/md/rdf.py | 2 +- dpdata/md/water.py | 6 ++--- dpdata/periodic_table.py | 3 ++- dpdata/plugins/3dmol.py | 1 + dpdata/plugins/abacus.py | 2 +- dpdata/plugins/amber.py | 4 +-- dpdata/plugins/ase.py | 8 +++--- dpdata/plugins/cp2k.py | 3 ++- dpdata/plugins/deepmd.py | 11 ++++---- dpdata/plugins/gaussian.py | 6 ++--- dpdata/plugins/lammps.py | 2 +- dpdata/plugins/list.py | 2 +- dpdata/plugins/pwmat.py | 5 ++-- dpdata/plugins/pymatgen.py | 5 ++-- dpdata/plugins/qe.py | 4 +-- dpdata/plugins/rdkit.py | 1 + dpdata/plugins/siesta.py | 2 +- dpdata/plugins/vasp.py | 5 ++-- dpdata/plugins/xyz.py | 2 +- dpdata/pwmat/atomconfig.py | 3 ++- dpdata/pwmat/movement.py | 4 ++- dpdata/pymatgen/molecule.py | 1 + dpdata/qe/scf.py | 4 ++- dpdata/qe/traj.py | 8 ++++-- dpdata/rdkit/sanitize.py | 7 ++--- dpdata/system.py | 26 ++++++++----------- dpdata/unit.py | 1 + dpdata/utils.py | 1 + dpdata/vasp/outcar.py | 3 ++- dpdata/vasp/xml.py | 1 + dpdata/xyz/quip_gap_xyz.py | 5 ++-- plugin_example/dpdata_random/__init__.py | 3 ++- pyproject.toml | 3 +++ tests/context.py | 9 ++++--- tests/poscars/test_lammps_dump_s_su.py | 3 ++- tests/test_abacus_md.py | 4 ++- tests/test_abacus_pw_scf.py | 5 +++- tests/test_abacus_relax.py | 7 +++-- tests/test_abacus_stru_dump.py | 3 ++- tests/test_amber_md.py | 5 ++-- tests/test_amber_sqm.py | 5 ++-- tests/test_ase_traj.py | 5 ++-- tests/test_bond_order_system.py | 7 +++-- tests/test_cell_to_low_triangle.py | 3 ++- tests/test_cli.py | 3 ++- tests/test_corr.py | 4 +-- tests/test_cp2k_aimd_output.py | 6 +++-- tests/test_cp2k_output.py | 5 ++-- tests/test_deepmd_comp.py | 8 +++--- tests/test_deepmd_hdf5.py | 5 ++-- tests/test_deepmd_raw.py | 8 +++--- tests/test_elements_index.py | 4 ++- tests/test_empty.py | 3 ++- tests/test_fhi_md_multi_elem_output.py | 3 ++- tests/test_fhi_md_output.py | 3 ++- tests/test_fhi_output.py | 3 ++- tests/test_gaussian_driver.py | 6 ++--- tests/test_gaussian_gjf.py | 2 +- tests/test_gaussian_log.py | 3 ++- tests/test_gromacs_gro.py | 3 ++- tests/test_json.py | 5 ++-- tests/test_lammps_dump_idx.py | 3 ++- tests/test_lammps_dump_shift_origin.py | 5 ++-- tests/test_lammps_dump_skipload.py | 5 ++-- tests/test_lammps_dump_to_system.py | 3 ++- tests/test_lammps_dump_unfold.py | 3 ++- tests/test_lammps_lmp_dump.py | 3 ++- tests/test_lammps_lmp_to_system.py | 3 ++- tests/test_lammps_read_from_trajs.py | 3 ++- tests/test_msd.py | 3 ++- tests/test_multisystems.py | 10 +++---- tests/test_periodic_table.py | 1 + tests/test_perturb.py | 9 +++---- tests/test_pick_atom_idx.py | 5 ++-- tests/test_predict.py | 2 +- tests/test_pwmat_config_dump.py | 6 +++-- tests/test_pwmat_config_to_system.py | 6 +++-- tests/test_pwmat_mlmd.py | 4 ++- tests/test_pwmat_movement.py | 4 ++- tests/test_pymatgen_molecule.py | 3 ++- tests/test_qe_cp_traj.py | 3 ++- tests/test_qe_cp_traj_skipload.py | 7 +++-- tests/test_qe_pw_scf.py | 3 ++- ...test_qe_pw_scf_crystal_atomic_positions.py | 3 ++- tests/test_qe_pw_scf_energy_bug.py | 3 ++- tests/test_quip_gap_xyz.py | 5 ++-- tests/test_remove_atom_names.py | 4 +-- tests/test_remove_pbc.py | 3 ++- tests/test_replace.py | 9 +++---- tests/test_replicate.py | 5 ++-- tests/test_shuffle.py | 3 ++- tests/test_siesta_aiMD_output.py | 3 ++- tests/test_siesta_output.py | 3 ++- tests/test_sqm_driver.py | 4 +-- tests/test_stat.py | 4 +-- tests/test_system_append.py | 7 +++-- tests/test_system_apply_pbc.py | 3 ++- tests/test_system_set_type.py | 3 +-- tests/test_to_ase.py | 5 ++-- tests/test_to_list.py | 5 ++-- tests/test_to_pymatgen.py | 5 ++-- tests/test_to_pymatgen_entry.py | 5 ++-- tests/test_type_map.py | 3 ++- tests/test_vasp_outcar.py | 6 +++-- tests/test_vasp_poscar_dump.py | 3 ++- tests/test_vasp_poscar_to_system.py | 5 ++-- tests/test_vasp_unconverged_outcar.py | 3 ++- tests/test_vasp_xml.py | 7 +++-- tests/test_water_ions.py | 3 ++- tests/test_xyz.py | 5 ++-- 135 files changed, 369 insertions(+), 241 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9ca30f7a..0624240e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,6 +21,11 @@ repos: rev: 22.12.0 hooks: - id: black-jupyter +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + files: \.py$ # Python inside docs - repo: https://github.com/asottile/blacken-docs rev: 1.13.0 diff --git a/docs/conf.py b/docs/conf.py index eabf1c84..d09cf3fd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,8 +13,8 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # import os -import sys import subprocess as sp +import sys from datetime import date sys.path.insert(0, os.path.abspath("..")) diff --git a/docs/make_format.py b/docs/make_format.py index 801e0830..a7962208 100644 --- a/docs/make_format.py +++ b/docs/make_format.py @@ -1,12 +1,11 @@ import csv -from typing import Any from collections import defaultdict +from typing import Any # ensure all plugins are loaded! import dpdata.plugins +from dpdata.driver import Driver, Minimizer from dpdata.format import Format -from dpdata.driver import Driver -from dpdata.driver import Minimizer from dpdata.system import get_cls_name diff --git a/dpdata/__init__.py b/dpdata/__init__.py index f426b790..d160717c 100644 --- a/dpdata/__init__.py +++ b/dpdata/__init__.py @@ -1,9 +1,5 @@ -from . import vasp -from . import lammps -from . import md -from .system import System -from .system import LabeledSystem -from .system import MultiSystems +from . import lammps, md, vasp +from .system import LabeledSystem, MultiSystems, System try: from ._version import version as __version__ diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py index be6bee47..7532b7b2 100644 --- a/dpdata/abacus/md.py +++ b/dpdata/abacus/md.py @@ -1,17 +1,20 @@ +import os +import re +import sys +import warnings from ast import dump -import os, sys + import numpy as np + from .scf import ( - ry2ev, bohr2ang, - kbar2evperang3, get_block, - get_geometry_in, get_cell, get_coords, + get_geometry_in, + kbar2evperang3, + ry2ev, ) -import re -import warnings # Read in geometries from an ABACUS MD trajectory. # The atomic coordinates are read in from generated files in OUT.XXXX. diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py index 6b552174..7d0da0fa 100644 --- a/dpdata/abacus/relax.py +++ b/dpdata/abacus/relax.py @@ -1,6 +1,9 @@ -import os, sys +import os +import sys + import numpy as np -from .scf import bohr2ang, kbar2evperang3, get_geometry_in, get_cell, get_coords + +from .scf import bohr2ang, get_cell, get_coords, get_geometry_in, kbar2evperang3 # Read in geometries from an ABACUS RELAX(CELL-RELAX) trajectory in OUT.XXXX/runnning_relax/cell-relax.log. diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index 4bbd7091..e82089ad 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -1,7 +1,10 @@ -import os, sys -import numpy as np -from ..unit import EnergyConversion, PressureConversion, LengthConversion +import os import re +import sys + +import numpy as np + +from ..unit import EnergyConversion, LengthConversion, PressureConversion bohr2ang = LengthConversion("bohr", "angstrom").value() ry2ev = EnergyConversion("rydberg", "eV").value() diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py index 0d178104..f477efab 100644 --- a/dpdata/amber/md.py +++ b/dpdata/amber/md.py @@ -1,9 +1,12 @@ -import re import os -from scipy.io import netcdf +import re + import numpy as np +from scipy.io import netcdf + from dpdata.amber.mask import pick_by_amber_mask from dpdata.unit import EnergyConversion + from ..periodic_table import ELEMENTS kcalmol2eV = EnergyConversion("kcal_mol", "eV").value() diff --git a/dpdata/amber/sqm.py b/dpdata/amber/sqm.py index 7826d201..9b450501 100644 --- a/dpdata/amber/sqm.py +++ b/dpdata/amber/sqm.py @@ -1,4 +1,5 @@ import numpy as np + from dpdata.periodic_table import ELEMENTS from dpdata.unit import EnergyConversion diff --git a/dpdata/ase_calculator.py b/dpdata/ase_calculator.py index ea5e5009..65a462a5 100644 --- a/dpdata/ase_calculator.py +++ b/dpdata/ase_calculator.py @@ -1,12 +1,13 @@ -from typing import List, Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, List, Optional from ase.calculators.calculator import ( Calculator, - all_changes, PropertyNotImplementedError, + all_changes, ) import dpdata + from .driver import Driver if TYPE_CHECKING: diff --git a/dpdata/bond_order_system.py b/dpdata/bond_order_system.py index 6cb834d4..124a6faa 100644 --- a/dpdata/bond_order_system.py +++ b/dpdata/bond_order_system.py @@ -1,12 +1,14 @@ #%% # Bond Order System -import numpy as np -from dpdata.system import System, LabeledSystem, load_format, DataType, Axis -import dpdata.rdkit.utils -from dpdata.rdkit.sanitize import Sanitizer, SanitizeError from copy import deepcopy + +import numpy as np from rdkit.Chem import Conformer +import dpdata.rdkit.utils +from dpdata.rdkit.sanitize import SanitizeError, Sanitizer +from dpdata.system import Axis, DataType, LabeledSystem, System, load_format + # import dpdata.rdkit.mol2 diff --git a/dpdata/cli.py b/dpdata/cli.py index 88e49ba5..c80de99c 100644 --- a/dpdata/cli.py +++ b/dpdata/cli.py @@ -3,7 +3,7 @@ from typing import Optional from . import __version__ -from .system import System, LabeledSystem, MultiSystems +from .system import LabeledSystem, MultiSystems, System def dpdata_parser() -> argparse.ArgumentParser: diff --git a/dpdata/cp2k/cell.py b/dpdata/cp2k/cell.py index 3fd8b6c5..33e56637 100644 --- a/dpdata/cp2k/cell.py +++ b/dpdata/cp2k/cell.py @@ -1,7 +1,8 @@ #%% -import numpy as np -from collections import OrderedDict import re +from collections import OrderedDict + +import numpy as np def cell_to_low_triangle(A, B, C, alpha, beta, gamma): diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py index 965d0656..f7b94f25 100644 --- a/dpdata/cp2k/output.py +++ b/dpdata/cp2k/output.py @@ -1,16 +1,17 @@ #%% -import numpy as np import re from collections import OrderedDict +import numpy as np from scipy.constants import R -from .cell import cell_to_low_triangle + from ..unit import ( EnergyConversion, - LengthConversion, ForceConversion, + LengthConversion, PressureConversion, ) +from .cell import cell_to_low_triangle #%% AU_TO_ANG = LengthConversion("bohr", "angstrom").value() diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index a5eb7334..66bf4ee6 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -1,5 +1,9 @@ -import os, glob, shutil +import glob +import os +import shutil + import numpy as np + from .raw import load_type diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py index 1e44b790..69b993ab 100644 --- a/dpdata/deepmd/hdf5.py +++ b/dpdata/deepmd/hdf5.py @@ -3,10 +3,8 @@ import h5py import numpy as np - from wcmatch.glob import globfilter - __all__ = ["to_system_data", "dump"] diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index 7de14baa..2f2021d4 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -1,4 +1,5 @@ import os + import numpy as np diff --git a/dpdata/driver.py b/dpdata/driver.py index 2d66ae41..8f952984 100644 --- a/dpdata/driver.py +++ b/dpdata/driver.py @@ -1,7 +1,8 @@ """Driver plugin system.""" -from typing import Callable, List, Union, TYPE_CHECKING -from .plugin import Plugin from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Callable, List, Union + +from .plugin import Plugin if TYPE_CHECKING: import ase diff --git a/dpdata/fhi_aims/output.py b/dpdata/fhi_aims/output.py index 423957b7..9706b91a 100755 --- a/dpdata/fhi_aims/output.py +++ b/dpdata/fhi_aims/output.py @@ -1,7 +1,8 @@ -import numpy as np import re import warnings +import numpy as np + latt_patt = "\|\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)" pos_patt_first = "\|\s+[0-9]{1,}[:]\s\w+\s(\w+)(\s.*[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)" pos_patt_other = "\s+[a][t][o][m]\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+(\w{1,2})" diff --git a/dpdata/format.py b/dpdata/format.py index 84813c76..e40ad190 100644 --- a/dpdata/format.py +++ b/dpdata/format.py @@ -1,7 +1,7 @@ """Implement the format plugin system.""" import os -from collections import abc from abc import ABC +from collections import abc from .plugin import Plugin diff --git a/dpdata/gaussian/gjf.py b/dpdata/gaussian/gjf.py index be089e24..354187f0 100644 --- a/dpdata/gaussian/gjf.py +++ b/dpdata/gaussian/gjf.py @@ -3,10 +3,11 @@ # under LGPL 3.0 license """Generate Gaussian input file.""" -from typing import Optional, List, Tuple, Union -import uuid import itertools +import uuid import warnings +from typing import List, Optional, Tuple, Union + import numpy as np from scipy.sparse import csr_matrix from scipy.sparse.csgraph import connected_components diff --git a/dpdata/gaussian/log.py b/dpdata/gaussian/log.py index 54bc1d51..73ae693d 100644 --- a/dpdata/gaussian/log.py +++ b/dpdata/gaussian/log.py @@ -1,6 +1,7 @@ import numpy as np -from ..unit import LengthConversion, EnergyConversion, ForceConversion + from ..periodic_table import ELEMENTS +from ..unit import EnergyConversion, ForceConversion, LengthConversion length_convert = LengthConversion("bohr", "angstrom").value() energy_convert = EnergyConversion("hartree", "eV").value() diff --git a/dpdata/gromacs/gro.py b/dpdata/gromacs/gro.py index b9930f2b..b643eea8 100644 --- a/dpdata/gromacs/gro.py +++ b/dpdata/gromacs/gro.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 import re + import numpy as np + from ..unit import LengthConversion nm2ang = LengthConversion("nm", "angstrom").value() diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py index 85b87ba8..2e4dd561 100644 --- a/dpdata/lammps/dump.py +++ b/dpdata/lammps/dump.py @@ -1,13 +1,16 @@ #!/usr/bin/env python3 -import os, sys +import os +import sys + import numpy as np lib_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(lib_path) -import lmp import warnings +import lmp + class UnwrapWarning(UserWarning): pass diff --git a/dpdata/md/msd.py b/dpdata/md/msd.py index eebc7296..cfb446dd 100644 --- a/dpdata/md/msd.py +++ b/dpdata/md/msd.py @@ -1,4 +1,5 @@ import numpy as np + from .pbc import system_pbc_shift diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py index 220bdcb0..16f6bacc 100644 --- a/dpdata/md/rdf.py +++ b/dpdata/md/rdf.py @@ -68,8 +68,8 @@ def _compute_rdf_1frame(box, posis, atype, sel_type=[None, None], max_r=5, nbins if type(sel_type[1]) is not list: sel_type[1] = [sel_type[1]] natoms = len(posis) - from ase import Atoms import ase.neighborlist + from ase import Atoms atoms = Atoms(positions=posis, cell=box, pbc=[1, 1, 1]) nlist = ase.neighborlist.NeighborList( diff --git a/dpdata/md/water.py b/dpdata/md/water.py index b9ab833b..42e27243 100644 --- a/dpdata/md/water.py +++ b/dpdata/md/water.py @@ -1,6 +1,6 @@ import numpy as np -from .pbc import posi_diff -from .pbc import posi_shift + +from .pbc import posi_diff, posi_shift def compute_bonds(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): @@ -17,8 +17,8 @@ def compute_bonds(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True def compute_bonds_ase(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): natoms = len(posis) - from ase import Atoms import ase.neighborlist + from ase import Atoms atoms = Atoms(positions=posis, cell=box, pbc=[1, 1, 1]) nlist = ase.neighborlist.NeighborList( diff --git a/dpdata/periodic_table.py b/dpdata/periodic_table.py index dc64d40a..30ffb1be 100644 --- a/dpdata/periodic_table.py +++ b/dpdata/periodic_table.py @@ -1,5 +1,6 @@ from pathlib import Path -from monty.serialization import loadfn, dumpfn + +from monty.serialization import dumpfn, loadfn fpdt = str(Path(__file__).absolute().parent / "periodic_table.json") _pdt = loadfn(fpdt) diff --git a/dpdata/plugins/3dmol.py b/dpdata/plugins/3dmol.py index fa9f02aa..3b463abb 100644 --- a/dpdata/plugins/3dmol.py +++ b/dpdata/plugins/3dmol.py @@ -1,4 +1,5 @@ from typing import Tuple + import numpy as np from dpdata.format import Format diff --git a/dpdata/plugins/abacus.py b/dpdata/plugins/abacus.py index a9c82b05..43abf318 100644 --- a/dpdata/plugins/abacus.py +++ b/dpdata/plugins/abacus.py @@ -1,6 +1,6 @@ -import dpdata.abacus.scf import dpdata.abacus.md import dpdata.abacus.relax +import dpdata.abacus.scf from dpdata.format import Format diff --git a/dpdata/plugins/amber.py b/dpdata/plugins/amber.py index cf2df3ca..4d6a2229 100644 --- a/dpdata/plugins/amber.py +++ b/dpdata/plugins/amber.py @@ -1,11 +1,11 @@ -import tempfile import os import subprocess as sp +import tempfile import dpdata.amber.md import dpdata.amber.sqm -from dpdata.format import Format from dpdata.driver import Driver, Minimizer +from dpdata.format import Format @Format.register("amber/md") diff --git a/dpdata/plugins/ase.py b/dpdata/plugins/ase.py index b6e0fcb7..9a626be8 100644 --- a/dpdata/plugins/ase.py +++ b/dpdata/plugins/ase.py @@ -1,8 +1,10 @@ -from typing import Optional, TYPE_CHECKING, Type -from dpdata.driver import Driver, Minimizer -from dpdata.format import Format +from typing import TYPE_CHECKING, Optional, Type + import numpy as np + import dpdata +from dpdata.driver import Driver, Minimizer +from dpdata.format import Format try: import ase.io diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index 143c1821..e1df43e5 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -1,5 +1,6 @@ -import dpdata.cp2k.output import glob + +import dpdata.cp2k.output from dpdata.cp2k.output import Cp2kSystems from dpdata.format import Format diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py index 2e885009..d16b86a5 100644 --- a/dpdata/plugins/deepmd.py +++ b/dpdata/plugins/deepmd.py @@ -1,13 +1,14 @@ -from typing import Optional, Union, List +from typing import List, Optional, Union + +import h5py +import numpy as np import dpdata -import dpdata.deepmd.raw import dpdata.deepmd.comp import dpdata.deepmd.hdf5 -import numpy as np -import h5py -from dpdata.format import Format +import dpdata.deepmd.raw from dpdata.driver import Driver +from dpdata.format import Format @Format.register("deepmd") diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py index 37a5ee8b..416c61ad 100644 --- a/dpdata/plugins/gaussian.py +++ b/dpdata/plugins/gaussian.py @@ -1,11 +1,11 @@ import os -import tempfile import subprocess as sp +import tempfile -import dpdata.gaussian.log import dpdata.gaussian.gjf -from dpdata.format import Format +import dpdata.gaussian.log from dpdata.driver import Driver +from dpdata.format import Format @Format.register("gaussian/log") diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index d4bce01b..300bd4ef 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -1,5 +1,5 @@ -import dpdata.lammps.lmp import dpdata.lammps.dump +import dpdata.lammps.lmp from dpdata.format import Format diff --git a/dpdata/plugins/list.py b/dpdata/plugins/list.py index 99ac6d4a..f7880fa0 100644 --- a/dpdata/plugins/list.py +++ b/dpdata/plugins/list.py @@ -7,7 +7,7 @@ def to_system(self, data, **kwargs): """ convert system to list, usefull for data collection """ - from dpdata import System, LabeledSystem + from dpdata import LabeledSystem, System if "forces" in data: system = LabeledSystem(data=data) diff --git a/dpdata/plugins/pwmat.py b/dpdata/plugins/pwmat.py index baa415d6..4d123f1e 100644 --- a/dpdata/plugins/pwmat.py +++ b/dpdata/plugins/pwmat.py @@ -1,6 +1,7 @@ -import dpdata.pwmat.movement -import dpdata.pwmat.atomconfig import numpy as np + +import dpdata.pwmat.atomconfig +import dpdata.pwmat.movement from dpdata.format import Format diff --git a/dpdata/plugins/pymatgen.py b/dpdata/plugins/pymatgen.py index 514b8d76..36efc12c 100644 --- a/dpdata/plugins/pymatgen.py +++ b/dpdata/plugins/pymatgen.py @@ -1,7 +1,8 @@ -from dpdata.format import Format -import dpdata.pymatgen.molecule import numpy as np +import dpdata.pymatgen.molecule +from dpdata.format import Format + @Format.register("pymatgen/structure") class PyMatgenStructureFormat(Format): diff --git a/dpdata/plugins/qe.py b/dpdata/plugins/qe.py index 1b95a6d4..d524462f 100644 --- a/dpdata/plugins/qe.py +++ b/dpdata/plugins/qe.py @@ -1,6 +1,6 @@ -import dpdata.qe.traj -import dpdata.qe.scf import dpdata.md.pbc +import dpdata.qe.scf +import dpdata.qe.traj from dpdata.format import Format diff --git a/dpdata/plugins/rdkit.py b/dpdata/plugins/rdkit.py index 043fad1e..64a2f343 100644 --- a/dpdata/plugins/rdkit.py +++ b/dpdata/plugins/rdkit.py @@ -2,6 +2,7 @@ try: import rdkit.Chem + import dpdata.rdkit.utils except ModuleNotFoundError: pass diff --git a/dpdata/plugins/siesta.py b/dpdata/plugins/siesta.py index 5b38e8b2..662b5c0e 100644 --- a/dpdata/plugins/siesta.py +++ b/dpdata/plugins/siesta.py @@ -1,5 +1,5 @@ -import dpdata.siesta.output import dpdata.siesta.aiMD_output +import dpdata.siesta.output from dpdata.format import Format diff --git a/dpdata/plugins/vasp.py b/dpdata/plugins/vasp.py index 5b151f80..6e2fe5f0 100644 --- a/dpdata/plugins/vasp.py +++ b/dpdata/plugins/vasp.py @@ -1,7 +1,8 @@ +import numpy as np + +import dpdata.vasp.outcar import dpdata.vasp.poscar import dpdata.vasp.xml -import dpdata.vasp.outcar -import numpy as np from dpdata.format import Format from dpdata.utils import sort_atom_names, uniq_atom_names diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 4db722e3..69cca61b 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -1,8 +1,8 @@ import numpy as np +from dpdata.format import Format from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord -from dpdata.format import Format @Format.register("xyz") diff --git a/dpdata/pwmat/atomconfig.py b/dpdata/pwmat/atomconfig.py index 5e953c47..28cfaebc 100644 --- a/dpdata/pwmat/atomconfig.py +++ b/dpdata/pwmat/atomconfig.py @@ -1,7 +1,8 @@ #!/usr/bin/python3 -from ..periodic_table import ELEMENTS import numpy as np +from ..periodic_table import ELEMENTS + def _to_system_data_lower(lines): system = {} diff --git a/dpdata/pwmat/movement.py b/dpdata/pwmat/movement.py index c2e0bf3a..2440641e 100644 --- a/dpdata/pwmat/movement.py +++ b/dpdata/pwmat/movement.py @@ -1,6 +1,8 @@ +import warnings + import numpy as np + from ..periodic_table import ELEMENTS -import warnings def system_info(lines, type_idx_zero=False): diff --git a/dpdata/pymatgen/molecule.py b/dpdata/pymatgen/molecule.py index c2559bef..25e13c29 100644 --- a/dpdata/pymatgen/molecule.py +++ b/dpdata/pymatgen/molecule.py @@ -5,6 +5,7 @@ except ImportError: pass from collections import Counter + import dpdata diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py index afdb6ae1..f78be7cb 100755 --- a/dpdata/qe/scf.py +++ b/dpdata/qe/scf.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 -import os, sys +import os +import sys + import numpy as np ry2ev = 13.605693009 diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py index 1bdacab3..700f2ebc 100644 --- a/dpdata/qe/traj.py +++ b/dpdata/qe/traj.py @@ -1,10 +1,14 @@ #!/usr/bin/python3 +import warnings + import numpy as np -import dpdata, warnings + +import dpdata + from ..unit import ( EnergyConversion, - LengthConversion, ForceConversion, + LengthConversion, PressureConversion, ) diff --git a/dpdata/rdkit/sanitize.py b/dpdata/rdkit/sanitize.py index f3b1690e..6a5a1c34 100644 --- a/dpdata/rdkit/sanitize.py +++ b/dpdata/rdkit/sanitize.py @@ -1,8 +1,9 @@ -from copy import deepcopy -from rdkit import Chem -from rdkit.Chem.rdchem import Atom, Bond, Mol, BondType import os import time +from copy import deepcopy + +from rdkit import Chem +from rdkit.Chem.rdchem import Atom, Bond, BondType, Mol # openbabel try: diff --git a/dpdata/system.py b/dpdata/system.py index f7ad7b5f..5a985e04 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -1,30 +1,26 @@ #%% -import os import glob import inspect -import numpy as np -import dpdata.md.pbc +import os from copy import deepcopy from enum import Enum, unique from typing import Any, Tuple, Union + +import numpy as np from monty.json import MSONable -from monty.serialization import loadfn, dumpfn -from dpdata.periodic_table import Element -from dpdata.amber.mask import pick_by_amber_mask, load_param_file +from monty.serialization import dumpfn, loadfn + import dpdata +import dpdata.md.pbc # ensure all plugins are loaded! import dpdata.plugins -from dpdata.plugin import Plugin -from dpdata.format import Format +from dpdata.amber.mask import load_param_file, pick_by_amber_mask from dpdata.driver import Driver, Minimizer - -from dpdata.utils import ( - elements_index_map, - remove_pbc, - sort_atom_names, - add_atom_names, -) +from dpdata.format import Format +from dpdata.periodic_table import Element +from dpdata.plugin import Plugin +from dpdata.utils import add_atom_names, elements_index_map, remove_pbc, sort_atom_names def load_format(fmt): diff --git a/dpdata/unit.py b/dpdata/unit.py index a1dc1c7b..1c7255cc 100644 --- a/dpdata/unit.py +++ b/dpdata/unit.py @@ -1,4 +1,5 @@ from abc import ABC + from scipy import constants AVOGADRO = constants.Avogadro # Avagadro constant diff --git a/dpdata/utils.py b/dpdata/utils.py index 90fef137..c461d8c0 100644 --- a/dpdata/utils.py +++ b/dpdata/utils.py @@ -1,4 +1,5 @@ import numpy as np + from dpdata.periodic_table import Element diff --git a/dpdata/vasp/outcar.py b/dpdata/vasp/outcar.py index ec26a181..e8280d05 100644 --- a/dpdata/vasp/outcar.py +++ b/dpdata/vasp/outcar.py @@ -1,7 +1,8 @@ -import numpy as np import re import warnings +import numpy as np + def system_info(lines, type_idx_zero=False): atom_names = [] diff --git a/dpdata/vasp/xml.py b/dpdata/vasp/xml.py index f87b5716..9d4cb803 100755 --- a/dpdata/vasp/xml.py +++ b/dpdata/vasp/xml.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import xml.etree.ElementTree as ET + import numpy as np diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index ea2d9a77..a0b9fd4a 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 #%% -import numpy as np -from collections import OrderedDict import re +from collections import OrderedDict + +import numpy as np class QuipGapxyzSystems(object): diff --git a/plugin_example/dpdata_random/__init__.py b/plugin_example/dpdata_random/__init__.py index 8e1450c9..22820e0f 100644 --- a/plugin_example/dpdata_random/__init__.py +++ b/plugin_example/dpdata_random/__init__.py @@ -1,6 +1,7 @@ -from dpdata.format import Format import numpy as np +from dpdata.format import Format + @Format.register("random") class RandomFormat(Format): diff --git a/pyproject.toml b/pyproject.toml index 8c1dac02..cdb7a25d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,3 +64,6 @@ dpdata = ['*.json'] [tool.setuptools_scm] write_to = "dpdata/_version.py" + +[tool.isort] +profile = "black" diff --git a/tests/context.py b/tests/context.py index e305ff0f..10348a8d 100644 --- a/tests/context.py +++ b/tests/context.py @@ -1,9 +1,10 @@ -import sys, os +import os +import sys sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) import dpdata -import dpdata.md.water -import dpdata.md.msd import dpdata.gaussian.gjf -import dpdata.system +import dpdata.md.msd +import dpdata.md.water import dpdata.stat +import dpdata.system diff --git a/tests/poscars/test_lammps_dump_s_su.py b/tests/poscars/test_lammps_dump_s_su.py index 5e914ea5..4b40cf35 100644 --- a/tests/poscars/test_lammps_dump_s_su.py +++ b/tests/poscars/test_lammps_dump_s_su.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh diff --git a/tests/test_abacus_md.py b/tests/test_abacus_md.py index 89df93b6..f2b80295 100644 --- a/tests/test_abacus_md.py +++ b/tests/test_abacus_md.py @@ -1,7 +1,9 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata + from dpdata.unit import LengthConversion bohr2ang = LengthConversion("bohr", "angstrom").value() diff --git a/tests/test_abacus_pw_scf.py b/tests/test_abacus_pw_scf.py index 6ada7736..eb712fbe 100644 --- a/tests/test_abacus_pw_scf.py +++ b/tests/test_abacus_pw_scf.py @@ -1,7 +1,10 @@ import os +import shutil +import unittest + import numpy as np -import unittest, shutil from context import dpdata + from dpdata.unit import LengthConversion bohr2ang = LengthConversion("bohr", "angstrom").value() diff --git a/tests/test_abacus_relax.py b/tests/test_abacus_relax.py index a39249bd..ed9b77dd 100644 --- a/tests/test_abacus_relax.py +++ b/tests/test_abacus_relax.py @@ -1,7 +1,10 @@ -import os, shutil -import numpy as np +import os +import shutil import unittest + +import numpy as np from context import dpdata + from dpdata.unit import LengthConversion bohr2ang = LengthConversion("bohr", "angstrom").value() diff --git a/tests/test_abacus_stru_dump.py b/tests/test_abacus_stru_dump.py index 6d6dbeea..78fcd088 100644 --- a/tests/test_abacus_stru_dump.py +++ b/tests/test_abacus_stru_dump.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata from test_vasp_poscar_dump import myfilecmp diff --git a/tests/test_amber_md.py b/tests/test_amber_md.py index 5a9cded8..8d8304cc 100644 --- a/tests/test_amber_md.py +++ b/tests/test_amber_md.py @@ -1,8 +1,9 @@ import os -import unittest import shutil -from context import dpdata +import unittest + from comp_sys import CompLabeledSys, IsPBC +from context import dpdata try: import parmed diff --git a/tests/test_amber_sqm.py b/tests/test_amber_sqm.py index f9ca80f8..99c1a285 100644 --- a/tests/test_amber_sqm.py +++ b/tests/test_amber_sqm.py @@ -1,8 +1,9 @@ import os -import unittest import shutil +import unittest + +from comp_sys import CompLabeledSys, CompSys, IsNoPBC from context import dpdata -from comp_sys import CompSys, CompLabeledSys, IsNoPBC try: from dpdata import BondOrderSystem diff --git a/tests/test_ase_traj.py b/tests/test_ase_traj.py index 6f957f84..2a3e9267 100644 --- a/tests/test_ase_traj.py +++ b/tests/test_ase_traj.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, IsPBC +from context import dpdata try: import ase diff --git a/tests/test_bond_order_system.py b/tests/test_bond_order_system.py index d0b8fbd0..52f259ef 100644 --- a/tests/test_bond_order_system.py +++ b/tests/test_bond_order_system.py @@ -1,11 +1,13 @@ +import glob import os import unittest + from context import dpdata -import glob try: from rdkit import Chem from rdkit.Chem import AllChem + from dpdata import BondOrderSystem except ImportError: skip_bond_order_system = True @@ -13,9 +15,10 @@ skip_bond_order_system = False import shutil -import numpy as np from copy import deepcopy +import numpy as np + @unittest.skipIf( skip_bond_order_system, diff --git a/tests/test_cell_to_low_triangle.py b/tests/test_cell_to_low_triangle.py index 6696e171..d3121e38 100644 --- a/tests/test_cell_to_low_triangle.py +++ b/tests/test_cell_to_low_triangle.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_cli.py b/tests/test_cli.py index 7275237a..423b8896 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,7 +1,8 @@ +import subprocess as sp import unittest + from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh -import subprocess as sp class TestCli(unittest.TestCase, TestPOSCARoh): diff --git a/tests/test_corr.py b/tests/test_corr.py index 74fb2a10..3224c950 100644 --- a/tests/test_corr.py +++ b/tests/test_corr.py @@ -1,7 +1,7 @@ import unittest + +from comp_sys import CompLabeledSys, IsPBC from context import dpdata -from comp_sys import CompLabeledSys -from comp_sys import IsPBC class TestCorr(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_cp2k_aimd_output.py b/tests/test_cp2k_aimd_output.py index 471153b1..f8780426 100644 --- a/tests/test_cp2k_aimd_output.py +++ b/tests/test_cp2k_aimd_output.py @@ -1,9 +1,11 @@ #%% import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys +from context import dpdata + #%% class TestCp2kAimdOutput(unittest.TestCase, CompLabeledSys): diff --git a/tests/test_cp2k_output.py b/tests/test_cp2k_output.py index 37c639c6..a7af4696 100644 --- a/tests/test_cp2k_output.py +++ b/tests/test_cp2k_output.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys +from context import dpdata class TestCp2kNormalOutput(unittest.TestCase, CompLabeledSys): diff --git a/tests/test_deepmd_comp.py b/tests/test_deepmd_comp.py index 3b806859..616caa46 100644 --- a/tests/test_deepmd_comp.py +++ b/tests/test_deepmd_comp.py @@ -1,8 +1,10 @@ -import os, shutil -import numpy as np +import os +import shutil import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, CompSys, IsPBC +from context import dpdata class TestDeepmdLoadDumpComp(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_deepmd_hdf5.py b/tests/test_deepmd_hdf5.py index 24ed4f0d..20d16c37 100644 --- a/tests/test_deepmd_hdf5.py +++ b/tests/test_deepmd_hdf5.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, CompSys, IsNoPBC, IsPBC, MultiSystems +from context import dpdata class TestDeepmdLoadDumpHDF5(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_deepmd_raw.py b/tests/test_deepmd_raw.py index 5ba54877..1b056726 100644 --- a/tests/test_deepmd_raw.py +++ b/tests/test_deepmd_raw.py @@ -1,8 +1,10 @@ -import os, shutil -import numpy as np +import os +import shutil import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, CompSys, IsPBC +from context import dpdata class TestDeepmdLoadRaw(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_elements_index.py b/tests/test_elements_index.py index 6b924548..17a7ea91 100644 --- a/tests/test_elements_index.py +++ b/tests/test_elements_index.py @@ -1,6 +1,8 @@ import os -import numpy as np import unittest + +import numpy as np + from dpdata.system import elements_index_map diff --git a/tests/test_empty.py b/tests/test_empty.py index 0fd84ca0..3892ab3b 100644 --- a/tests/test_empty.py +++ b/tests/test_empty.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_fhi_md_multi_elem_output.py b/tests/test_fhi_md_multi_elem_output.py index dc4cbfcf..a20c45bd 100644 --- a/tests/test_fhi_md_multi_elem_output.py +++ b/tests/test_fhi_md_multi_elem_output.py @@ -1,5 +1,6 @@ -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_fhi_md_output.py b/tests/test_fhi_md_output.py index 3f945b31..d205e391 100644 --- a/tests/test_fhi_md_output.py +++ b/tests/test_fhi_md_output.py @@ -1,5 +1,6 @@ -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_fhi_output.py b/tests/test_fhi_output.py index 7c8307ea..acbe2aac 100644 --- a/tests/test_fhi_output.py +++ b/tests/test_fhi_output.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_gaussian_driver.py b/tests/test_gaussian_driver.py index f1029ec1..07150bc7 100644 --- a/tests/test_gaussian_driver.py +++ b/tests/test_gaussian_driver.py @@ -1,11 +1,11 @@ -import unittest -import shutil import importlib import os +import shutil +import unittest import numpy as np -from context import dpdata from comp_sys import CompSys, IsNoPBC +from context import dpdata @unittest.skipIf(shutil.which("g16") is None, "g16 is not installed") diff --git a/tests/test_gaussian_gjf.py b/tests/test_gaussian_gjf.py index 24cb56bd..861eae28 100644 --- a/tests/test_gaussian_gjf.py +++ b/tests/test_gaussian_gjf.py @@ -1,5 +1,5 @@ -import unittest import os +import unittest from context import dpdata diff --git a/tests/test_gaussian_log.py b/tests/test_gaussian_log.py index 8d7bec81..67e13910 100644 --- a/tests/test_gaussian_log.py +++ b/tests/test_gaussian_log.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_gromacs_gro.py b/tests/test_gromacs_gro.py index d2553809..27d2742e 100644 --- a/tests/test_gromacs_gro.py +++ b/tests/test_gromacs_gro.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_json.py b/tests/test_json.py index 7337d682..09fc46dd 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, IsPBC +from context import dpdata class TestJsonLoad(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_lammps_dump_idx.py b/tests/test_lammps_dump_idx.py index 110f4bc6..272cc222 100644 --- a/tests/test_lammps_dump_idx.py +++ b/tests/test_lammps_dump_idx.py @@ -1,8 +1,9 @@ # The index should map to that in the dump file import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_lammps_dump_shift_origin.py b/tests/test_lammps_dump_shift_origin.py index eec6b4f3..60eb94c0 100644 --- a/tests/test_lammps_dump_shift_origin.py +++ b/tests/test_lammps_dump_shift_origin.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsPBC +from context import dpdata class TestLammpsDumpShiftOrigin(unittest.TestCase, CompSys, IsPBC): diff --git a/tests/test_lammps_dump_skipload.py b/tests/test_lammps_dump_skipload.py index d604607a..224ec6d1 100644 --- a/tests/test_lammps_dump_skipload.py +++ b/tests/test_lammps_dump_skipload.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsPBC +from context import dpdata class TestLmpDumpSkip(unittest.TestCase, CompSys, IsPBC): diff --git a/tests/test_lammps_dump_to_system.py b/tests/test_lammps_dump_to_system.py index 739f4e43..b2061c09 100644 --- a/tests/test_lammps_dump_to_system.py +++ b/tests/test_lammps_dump_to_system.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh diff --git a/tests/test_lammps_dump_unfold.py b/tests/test_lammps_dump_unfold.py index bce95f7b..3e5ca47d 100644 --- a/tests/test_lammps_dump_unfold.py +++ b/tests/test_lammps_dump_unfold.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh diff --git a/tests/test_lammps_lmp_dump.py b/tests/test_lammps_lmp_dump.py index 2ded5e44..e083fc8d 100644 --- a/tests/test_lammps_lmp_dump.py +++ b/tests/test_lammps_lmp_dump.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh diff --git a/tests/test_lammps_lmp_to_system.py b/tests/test_lammps_lmp_to_system.py index ea8d2157..80909def 100644 --- a/tests/test_lammps_lmp_to_system.py +++ b/tests/test_lammps_lmp_to_system.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh diff --git a/tests/test_lammps_read_from_trajs.py b/tests/test_lammps_read_from_trajs.py index 128aedb7..f1e5afdd 100644 --- a/tests/test_lammps_read_from_trajs.py +++ b/tests/test_lammps_read_from_trajs.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_msd.py b/tests/test_msd.py index fd62d13c..f102ea9d 100644 --- a/tests/test_msd.py +++ b/tests/test_msd.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_multisystems.py b/tests/test_multisystems.py index 689b1a86..3df15192 100644 --- a/tests/test_multisystems.py +++ b/tests/test_multisystems.py @@ -1,13 +1,11 @@ import os -import numpy as np import unittest -from context import dpdata -from comp_sys import CompSys -from comp_sys import CompLabeledSys -from comp_sys import MultiSystems -from comp_sys import IsNoPBC from itertools import permutations +import numpy as np +from comp_sys import CompLabeledSys, CompSys, IsNoPBC, MultiSystems +from context import dpdata + class TestMultiSystems(unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC): def setUp(self): diff --git a/tests/test_periodic_table.py b/tests/test_periodic_table.py index 40a29a1c..6b856e91 100644 --- a/tests/test_periodic_table.py +++ b/tests/test_periodic_table.py @@ -1,4 +1,5 @@ import unittest + from context import dpdata data = { diff --git a/tests/test_perturb.py b/tests/test_perturb.py index c047dfa4..162f1728 100644 --- a/tests/test_perturb.py +++ b/tests/test_perturb.py @@ -1,11 +1,10 @@ import os -import numpy as np import unittest -from context import dpdata -from comp_sys import CompSys, IsPBC +from unittest.mock import MagicMock, Mock, patch -from unittest.mock import Mock -from unittest.mock import patch, MagicMock +import numpy as np +from comp_sys import CompSys, IsPBC +from context import dpdata class NormalGenerator(object): diff --git a/tests/test_pick_atom_idx.py b/tests/test_pick_atom_idx.py index 37be5d8f..1de7ad32 100644 --- a/tests/test_pick_atom_idx.py +++ b/tests/test_pick_atom_idx.py @@ -1,7 +1,8 @@ -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsNoPBC +from context import dpdata try: import parmed diff --git a/tests/test_predict.py b/tests/test_predict.py index 2176bf74..1de04d89 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -1,6 +1,6 @@ import unittest -import numpy as np +import numpy as np from comp_sys import CompLabeledSys, IsPBC from context import dpdata diff --git a/tests/test_pwmat_config_dump.py b/tests/test_pwmat_config_dump.py index 32c6ee52..db79a923 100644 --- a/tests/test_pwmat_config_dump.py +++ b/tests/test_pwmat_config_dump.py @@ -1,9 +1,11 @@ import os -import numpy as np import unittest -import dpdata + +import numpy as np from pwmat.config_ref_oh import Testconfigoh +import dpdata + def myfilecmp(test, f0, f1): with open(f0) as fp0: diff --git a/tests/test_pwmat_config_to_system.py b/tests/test_pwmat_config_to_system.py index 3ff43b66..0956f956 100644 --- a/tests/test_pwmat_config_to_system.py +++ b/tests/test_pwmat_config_to_system.py @@ -1,9 +1,11 @@ import os -import numpy as np import unittest -import dpdata + +import numpy as np from pwmat.config_ref_ch4 import Testconfigch4 +import dpdata + class Testconfig(unittest.TestCase, Testconfigch4): def setUp(self): diff --git a/tests/test_pwmat_mlmd.py b/tests/test_pwmat_mlmd.py index f35ef42b..ae2d1334 100644 --- a/tests/test_pwmat_mlmd.py +++ b/tests/test_pwmat_mlmd.py @@ -1,6 +1,8 @@ import os -import numpy as np import unittest + +import numpy as np + import dpdata diff --git a/tests/test_pwmat_movement.py b/tests/test_pwmat_movement.py index e188c746..bba4f2ca 100644 --- a/tests/test_pwmat_movement.py +++ b/tests/test_pwmat_movement.py @@ -1,6 +1,8 @@ import os -import numpy as np import unittest + +import numpy as np + import dpdata diff --git a/tests/test_pymatgen_molecule.py b/tests/test_pymatgen_molecule.py index 8c3e72b6..4000480a 100644 --- a/tests/test_pymatgen_molecule.py +++ b/tests/test_pymatgen_molecule.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata try: diff --git a/tests/test_qe_cp_traj.py b/tests/test_qe_cp_traj.py index 2cb982ce..e947a0f4 100644 --- a/tests/test_qe_cp_traj.py +++ b/tests/test_qe_cp_traj.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata bohr2ang = dpdata.unit.LengthConversion("bohr", "angstrom").value() diff --git a/tests/test_qe_cp_traj_skipload.py b/tests/test_qe_cp_traj_skipload.py index e8cc84bf..2964e716 100644 --- a/tests/test_qe_cp_traj_skipload.py +++ b/tests/test_qe_cp_traj_skipload.py @@ -1,10 +1,9 @@ import os -import numpy as np import unittest + +import numpy as np +from comp_sys import CompLabeledSys, CompSys, IsPBC from context import dpdata -from comp_sys import CompSys -from comp_sys import CompLabeledSys -from comp_sys import IsPBC bohr2ang = dpdata.unit.LengthConversion("bohr", "angstrom").value() diff --git a/tests/test_qe_pw_scf.py b/tests/test_qe_pw_scf.py index 4cb78cbd..4d3032da 100644 --- a/tests/test_qe_pw_scf.py +++ b/tests/test_qe_pw_scf.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_qe_pw_scf_crystal_atomic_positions.py b/tests/test_qe_pw_scf_crystal_atomic_positions.py index 19dc7b59..e335c51a 100644 --- a/tests/test_qe_pw_scf_crystal_atomic_positions.py +++ b/tests/test_qe_pw_scf_crystal_atomic_positions.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_qe_pw_scf_energy_bug.py b/tests/test_qe_pw_scf_energy_bug.py index 85d79355..49ff0e34 100644 --- a/tests/test_qe_pw_scf_energy_bug.py +++ b/tests/test_qe_pw_scf_energy_bug.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_quip_gap_xyz.py b/tests/test_quip_gap_xyz.py index 27285606..03f69d95 100644 --- a/tests/test_quip_gap_xyz.py +++ b/tests/test_quip_gap_xyz.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, IsPBC +from context import dpdata class TestQuipGapxyz1(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_remove_atom_names.py b/tests/test_remove_atom_names.py index 4d4e23ec..d2d4abc7 100644 --- a/tests/test_remove_atom_names.py +++ b/tests/test_remove_atom_names.py @@ -1,7 +1,7 @@ import unittest + +from comp_sys import CompLabeledSys, IsNoPBC from context import dpdata -from comp_sys import CompLabeledSys -from comp_sys import IsNoPBC class TestRemove(unittest.TestCase, CompLabeledSys, IsNoPBC): diff --git a/tests/test_remove_pbc.py b/tests/test_remove_pbc.py index 558587b0..28310088 100644 --- a/tests/test_remove_pbc.py +++ b/tests/test_remove_pbc.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_replace.py b/tests/test_replace.py index f0f4430f..eaef289c 100644 --- a/tests/test_replace.py +++ b/tests/test_replace.py @@ -1,11 +1,10 @@ import os -import numpy as np import unittest -from context import dpdata -from comp_sys import CompSys, IsPBC +from unittest.mock import MagicMock, Mock, patch -from unittest.mock import Mock -from unittest.mock import patch, MagicMock +import numpy as np +from comp_sys import CompSys, IsPBC +from context import dpdata class ConstGenerator(object): diff --git a/tests/test_replicate.py b/tests/test_replicate.py index 1a7590b6..fc41e62d 100644 --- a/tests/test_replicate.py +++ b/tests/test_replicate.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsPBC +from context import dpdata class TestReplicate123(unittest.TestCase, CompSys, IsPBC): diff --git a/tests/test_shuffle.py b/tests/test_shuffle.py index 8acbb42a..9c462214 100644 --- a/tests/test_shuffle.py +++ b/tests/test_shuffle.py @@ -1,6 +1,7 @@ import unittest -from context import dpdata + from comp_sys import CompLabeledSys, IsPBC +from context import dpdata class TestDeepmdLoadRaw(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_siesta_aiMD_output.py b/tests/test_siesta_aiMD_output.py index 8ea4be35..e7f61641 100644 --- a/tests/test_siesta_aiMD_output.py +++ b/tests/test_siesta_aiMD_output.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_siesta_output.py b/tests/test_siesta_output.py index 2539173d..0c51ae21 100644 --- a/tests/test_siesta_output.py +++ b/tests/test_siesta_output.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_sqm_driver.py b/tests/test_sqm_driver.py index b06ab26c..3dbc6df4 100644 --- a/tests/test_sqm_driver.py +++ b/tests/test_sqm_driver.py @@ -1,9 +1,9 @@ -import unittest import shutil +import unittest import numpy as np -from context import dpdata from comp_sys import CompSys, IsNoPBC +from context import dpdata @unittest.skipIf(shutil.which("sqm") is None, "sqm is not installed") diff --git a/tests/test_stat.py b/tests/test_stat.py index 62d045f4..9ae8a175 100644 --- a/tests/test_stat.py +++ b/tests/test_stat.py @@ -1,7 +1,7 @@ -from context import dpdata - import unittest +from context import dpdata + class TestStat(unittest.TestCase): def test_errors(self): diff --git a/tests/test_system_append.py b/tests/test_system_append.py index 69883247..32107a4c 100644 --- a/tests/test_system_append.py +++ b/tests/test_system_append.py @@ -1,10 +1,9 @@ import os -import numpy as np import unittest + +import numpy as np +from comp_sys import CompLabeledSys, CompSys, IsNoPBC, IsPBC from context import dpdata -from comp_sys import CompSys -from comp_sys import CompLabeledSys -from comp_sys import IsPBC, IsNoPBC class TestFailedAppend(unittest.TestCase): diff --git a/tests/test_system_apply_pbc.py b/tests/test_system_apply_pbc.py index 96b06989..c9f14d92 100644 --- a/tests/test_system_apply_pbc.py +++ b/tests/test_system_apply_pbc.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_system_set_type.py b/tests/test_system_set_type.py index 27e0da97..edcddc64 100644 --- a/tests/test_system_set_type.py +++ b/tests/test_system_set_type.py @@ -1,9 +1,8 @@ import os -import numpy as np import unittest -from context import dpdata import numpy as np +from context import dpdata class TestSetAtomTypes(unittest.TestCase): diff --git a/tests/test_to_ase.py b/tests/test_to_ase.py index 187d8907..2440962c 100644 --- a/tests/test_to_ase.py +++ b/tests/test_to_ase.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsPBC +from context import dpdata try: from ase import Atoms diff --git a/tests/test_to_list.py b/tests/test_to_list.py index 4aa90885..68c98bf6 100644 --- a/tests/test_to_list.py +++ b/tests/test_to_list.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, IsPBC +from context import dpdata class TestToList(unittest.TestCase, CompLabeledSys, IsPBC): diff --git a/tests/test_to_pymatgen.py b/tests/test_to_pymatgen.py index abe25150..e2b91e24 100644 --- a/tests/test_to_pymatgen.py +++ b/tests/test_to_pymatgen.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsPBC +from context import dpdata try: from pymatgen import Structure diff --git a/tests/test_to_pymatgen_entry.py b/tests/test_to_pymatgen_entry.py index 1cad9c17..9510224b 100644 --- a/tests/test_to_pymatgen_entry.py +++ b/tests/test_to_pymatgen_entry.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsPBC +from context import dpdata from monty.serialization import loadfn try: diff --git a/tests/test_type_map.py b/tests/test_type_map.py index 4d98aa7a..2cc50865 100644 --- a/tests/test_type_map.py +++ b/tests/test_type_map.py @@ -1,7 +1,8 @@ import unittest -from context import dpdata from itertools import permutations +from context import dpdata + class TestTypeMap: def test_check_type_map(self): diff --git a/tests/test_vasp_outcar.py b/tests/test_vasp_outcar.py index 2fcaa0a1..d7e38867 100644 --- a/tests/test_vasp_outcar.py +++ b/tests/test_vasp_outcar.py @@ -1,8 +1,10 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompLabeledSys, IsPBC +from context import dpdata + from dpdata.utils import uniq_atom_names diff --git a/tests/test_vasp_poscar_dump.py b/tests/test_vasp_poscar_dump.py index f83e208c..c67f42fc 100644 --- a/tests/test_vasp_poscar_dump.py +++ b/tests/test_vasp_poscar_dump.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh diff --git a/tests/test_vasp_poscar_to_system.py b/tests/test_vasp_poscar_to_system.py index 7eea3757..dcb83bfd 100644 --- a/tests/test_vasp_poscar_to_system.py +++ b/tests/test_vasp_poscar_to_system.py @@ -1,8 +1,9 @@ import os -import numpy as np import unittest -from context import dpdata + +import numpy as np from comp_sys import CompSys, IsPBC +from context import dpdata from poscars.poscar_ref_oh import TestPOSCARoh diff --git a/tests/test_vasp_unconverged_outcar.py b/tests/test_vasp_unconverged_outcar.py index 579edf09..97318223 100644 --- a/tests/test_vasp_unconverged_outcar.py +++ b/tests/test_vasp_unconverged_outcar.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata diff --git a/tests/test_vasp_xml.py b/tests/test_vasp_xml.py index a3cd90d0..d5df49da 100644 --- a/tests/test_vasp_xml.py +++ b/tests/test_vasp_xml.py @@ -1,10 +1,9 @@ import os -import numpy as np import unittest + +import numpy as np +from comp_sys import CompLabeledSys, CompSys, IsPBC from context import dpdata -from comp_sys import CompSys -from comp_sys import CompLabeledSys -from comp_sys import IsPBC class TestVaspXml(unittest.TestCase, CompSys, IsPBC): diff --git a/tests/test_water_ions.py b/tests/test_water_ions.py index 98b3838b..8b70dc73 100644 --- a/tests/test_water_ions.py +++ b/tests/test_water_ions.py @@ -1,6 +1,7 @@ import os -import numpy as np import unittest + +import numpy as np from context import dpdata try: diff --git a/tests/test_xyz.py b/tests/test_xyz.py index 8f0779b3..a84ad28b 100644 --- a/tests/test_xyz.py +++ b/tests/test_xyz.py @@ -1,8 +1,9 @@ +import tempfile import unittest + import numpy as np -import tempfile -from context import dpdata from comp_sys import CompSys, IsNoPBC +from context import dpdata class TestToXYZ(unittest.TestCase): From 4688fd6e964781bdf8bff1d4e663e0b8548c5ed4 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 3 Feb 2023 06:12:57 -0500 Subject: [PATCH 16/20] add .git-blame-ignore-revs to hide formatting changes in git blame (#419) See https://docs.github.com/en/repositories/working-with-files/using-files/viewing-a-file#ignore-commits-in-the-blame-view Note: it only works for `git blame` (or GitHub blame page) but does not work for `git history` (or GitHub file history page). --- .git-blame-ignore-revs | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..a479b2cf --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,7 @@ +# .git-blame-ignore-revs +# pre-commit +ffa52c5d6230303d6f7ee4f1356f01aa5b2a011d +# pre-commit docs python block +ecdf566626c4b2d1824b946d1b7ad809cb8946dd +# pre-commit imports +4f1a91167b58f9042ae17cae05b609b7fdf5f20c From 18508af9733546f344c6221d13aca7013bd40c2c Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 4 Feb 2023 22:27:55 -0500 Subject: [PATCH 17/20] follow the numpydoc style guide (#420) Add a pre-commit hook to run automatic tools to follow the Numpydoc style guide. --- .pre-commit-config.yaml | 6 ++++++ dpdata/amber/mask.py | 12 ++++++------ dpdata/amber/md.py | 12 ++++++------ dpdata/driver.py | 24 ++++++++++++------------ dpdata/format.py | 6 +++--- dpdata/md/rdf.py | 6 +++--- dpdata/plugin.py | 6 +++--- dpdata/plugins/abacus.py | 8 ++++---- dpdata/plugins/deepmd.py | 4 ++-- dpdata/plugins/lammps.py | 2 +- dpdata/system.py | 34 +++++++++++++++++----------------- dpdata/unit.py | 10 +++++----- dpdata/xyz/xyz.py | 4 ++-- 13 files changed, 70 insertions(+), 64 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0624240e..30c86388 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,12 @@ repos: hooks: - id: isort files: \.py$ +# numpydoc +- repo: https://github.com/Carreau/velin + rev: 0.0.12 + hooks: + - id: velin + args: ["--write"] # Python inside docs - repo: https://github.com/asottile/blacken-docs rev: 1.13.0 diff --git a/dpdata/amber/mask.py b/dpdata/amber/mask.py index f44ee047..34eba90b 100644 --- a/dpdata/amber/mask.py +++ b/dpdata/amber/mask.py @@ -10,12 +10,12 @@ def pick_by_amber_mask(param, maskstr, coords=None): Parameters ---------- - param: str or parmed.Structure - filename of Amber param file or parmed.Structure - maskstr: str - Amber masks - coords: np.ndarray (optional) - frame coordinates, shape: N*3 + param : str or parmed.Structure + filename of Amber param file or parmed.Structure + maskstr : str + Amber masks + coords : np.ndarray (optional) + frame coordinates, shape: N*3 """ parm = load_param_file(param) if coords is not None: diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py index f477efab..28b4535c 100644 --- a/dpdata/amber/md.py +++ b/dpdata/amber/md.py @@ -35,12 +35,12 @@ def read_amber_traj( Parameters ---------- parm7_file, nc_file, mdfrc_file, mden_file, mdout_file: - filenames - use_element_symbols: None or list or str - If use_element_symbols is a list of atom indexes, these atoms will use element symbols - instead of amber types. For example, a ligand will use C, H, O, N, and so on - instead of h1, hc, o, os, and so on. - IF use_element_symbols is str, it will be considered as Amber mask. + filenames + use_element_symbols : None or list or str + If use_element_symbols is a list of atom indexes, these atoms will use element symbols + instead of amber types. For example, a ligand will use C, H, O, N, and so on + instead of h1, hc, o, os, and so on. + IF use_element_symbols is str, it will be considered as Amber mask. """ flag_atom_type = False diff --git a/dpdata/driver.py b/dpdata/driver.py index 8f952984..b446a5e8 100644 --- a/dpdata/driver.py +++ b/dpdata/driver.py @@ -23,9 +23,9 @@ class Driver(ABC): def register(key: str) -> Callable: """Register a driver plugin. Used as decorators. - Parameter - --------- - key: str + Parameters + ---------- + key : str key of the plugin. Returns @@ -45,9 +45,9 @@ def register(key: str) -> Callable: def get_driver(key: str) -> "Driver": """Get a driver plugin. - Parameter - --------- - key: str + Parameters + ---------- + key : str key of the plugin. Returns @@ -176,9 +176,9 @@ class Minimizer(ABC): def register(key: str) -> Callable: """Register a minimizer plugin. Used as decorators. - Parameter - --------- - key: str + Parameters + ---------- + key : str key of the plugin. Returns @@ -198,9 +198,9 @@ def register(key: str) -> Callable: def get_minimizer(key: str) -> "Minimizer": """Get a minimizer plugin. - Parameter - --------- - key: str + Parameters + ---------- + key : str key of the plugin. Returns diff --git a/dpdata/format.py b/dpdata/format.py index e40ad190..0ad991d8 100644 --- a/dpdata/format.py +++ b/dpdata/format.py @@ -51,7 +51,7 @@ def from_system(self, file_name, **kwargs): Parameters ---------- - file_name: str + file_name : str file name Returns @@ -68,7 +68,7 @@ def to_system(self, data, *args, **kwargs): Parameters ---------- - data: dict + data : dict system data """ raise NotImplementedError( @@ -107,7 +107,7 @@ def from_multi_systems(self, directory, **kwargs): Parameters ---------- - directory: str + directory : str directory of system Returns diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py index 16f6bacc..4dcbece6 100644 --- a/dpdata/md/rdf.py +++ b/dpdata/md/rdf.py @@ -9,15 +9,15 @@ def rdf(sys, sel_type=[None, None], max_r=5, nbins=100): ---------- sys : System or LabeledSystem The dpdata system - sel_type: list + sel_type : list List of size 2. The first element specifies the type of the first atom, while the second element specifies the type of the second atom. Both elements can be ints or list of ints. If the element is None, all types are specified. Examples are sel_type = [0, 0], sel_type = [0, [0, 1]] or sel_type = [0, None] - max_r: float + max_r : float Maximal range of rdf calculation - nbins: int + nbins : int Number of bins for rdf calculation Returns diff --git a/dpdata/plugin.py b/dpdata/plugin.py index 4f163ced..0b5027a9 100644 --- a/dpdata/plugin.py +++ b/dpdata/plugin.py @@ -19,9 +19,9 @@ def __init__(self): def register(self, key): """Register a plugin. - Parameter - --------- - key: str + Parameters + ---------- + key : str Key of the plugin. """ diff --git a/dpdata/plugins/abacus.py b/dpdata/plugins/abacus.py index 43abf318..db983b7d 100644 --- a/dpdata/plugins/abacus.py +++ b/dpdata/plugins/abacus.py @@ -20,13 +20,13 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): The output file name frame_idx : int The index of the frame to dump - pp_file: list of string, optional + pp_file : list of string, optional List of pseudo potential files - numerical_orbital: list of string, optional + numerical_orbital : list of string, optional List of orbital files - mass: list of float, optional + mass : list of float, optional List of atomic masses - numerical_descriptor: str, optional + numerical_descriptor : str, optional numerical descriptor file """ diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py index d16b86a5..df56bc49 100644 --- a/dpdata/plugins/deepmd.py +++ b/dpdata/plugins/deepmd.py @@ -50,13 +50,13 @@ def to_system(self, data, file_name, set_size=5000, prec=np.float64, **kwargs): Parameters ---------- - data: dict + data : dict System data file_name : str The output folder set_size : int The size of each set. - prec: {numpy.float32, numpy.float64} + prec : {numpy.float32, numpy.float64} The floating point precision of the compressed data """ dpdata.deepmd.comp.dump(file_name, data, set_size=set_size, comp_prec=prec) diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 300bd4ef..bd041d95 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -18,7 +18,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): Parameters ---------- - data: dict + data : dict System data file_name : str The output file name diff --git a/dpdata/system.py b/dpdata/system.py index 5a985e04..017797d7 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -397,7 +397,7 @@ def map_atom_types(self, type_map=None) -> np.ndarray: Parameters ---------- - type_map : + type_map dict : {"H":0,"O":1} or list ["H","C","O","N"] The map between elements and index @@ -673,7 +673,7 @@ def remove_pbc(self, protect_layer=9): Parameters ---------- protect_layer : the protect layer between the atoms and the cell - boundary + boundary """ assert protect_layer >= 0, "the protect_layer should be no less than 0" remove_pbc(self.data, protect_layer) @@ -726,7 +726,7 @@ def replicate(self, ncopy): Parameters ---------- - ncopy : + ncopy list: [4,2,3] or tuple: (4,2,3,) make `ncopy[0]` copys in x dimensions, @@ -846,7 +846,7 @@ def perturb( The cell of each frame is deformed by a symmetric matrix perturbed from identity. The perturbation to the diagonal part is subject to a uniform distribution in [-cell_pert_fraction, cell_pert_fraction), and the perturbation to the off-diagonal part is subject to a uniform distribution in [-0.5*cell_pert_fraction, 0.5*cell_pert_fraction). - atom_pert_distance: float + atom_pert_distance : float unit: Angstrom. A distance determines how far atoms will move. Atoms will move about `atom_pert_distance` in random direction. The distribution of the distance atoms move is determined by atom_pert_style @@ -964,9 +964,9 @@ def pick_atom_idx(self, idx, nopbc=None): Parameters ---------- - idx: int or list or slice + idx : int or list or slice atom index - nopbc: Boolen (default: None) + nopbc : Boolen (default: None) If nopbc is True or False, set nopbc Returns @@ -1026,16 +1026,16 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): Parameters ---------- - param: str or parmed.Structure - filename of Amber param file or parmed.Structure - maskstr: str - Amber masks - pass_coords: Boolen (default: False) + param : str or parmed.Structure + filename of Amber param file or parmed.Structure + maskstr : str + Amber masks + pass_coords : Boolen (default: False) If pass_coords is true, the function will pass coordinates and return a MultiSystem. Otherwise, the result is coordinate-independent, and the function will return System or LabeledSystem. - nopbc: Boolen (default: None) + nopbc : Boolen (default: None) If nopbc is True or False, set nopbc """ parm = load_param_file(param) @@ -1221,11 +1221,11 @@ def correction(self, hl_sys): Parameters ---------- - hl_sys: LabeledSystem + hl_sys : LabeledSystem high-level LabeledSystem Returns - ---------- + ------- corrected_sys: LabeledSystem Corrected LabeledSystem """ @@ -1248,7 +1248,7 @@ def __init__(self, *systems, type_map=None): """ Parameters ---------- - systems : System + *systems : System The systems contained type_map : list of str Maps atom type to name @@ -1461,9 +1461,9 @@ def pick_atom_idx(self, idx, nopbc=None): Parameters ---------- - idx: int or list or slice + idx : int or list or slice atom index - nopbc: Boolen (default: None) + nopbc : Boolen (default: None) If nopbc is True or False, set nopbc Returns diff --git a/dpdata/unit.py b/dpdata/unit.py index 1c7255cc..fa0d1e24 100644 --- a/dpdata/unit.py +++ b/dpdata/unit.py @@ -48,9 +48,9 @@ def __init__(self, unitA, unitB, check=True): Parameters ---------- - unitA : str, unit to be converted - unitB : str, unit which unitA is converted to, i.e. `1 unitA = self._value unitB` - check : bool, whether to check unit validity + unitA : str, unit to be converted + unitB : str, unit which unitA is converted to, i.e. `1 unitA = self._value unitB` + check : bool, whether to check unit validity Examples -------- @@ -118,7 +118,7 @@ def __init__(self, unitA, unitB): Parameters ---------- - unitA, unitB : str, in format of "energy_unit/length_unit" + unitA, unitB : str, in format of "energy_unit/length_unit" Examples -------- @@ -139,7 +139,7 @@ def __init__(self, unitA, unitB): Parameters ---------- - unitA, unitB : str, in format of "energy_unit/length_unit^3", or in `["Pa", "pa", "kPa", "kpa", "bar", "kbar"]` + unitA, unitB : str, in format of "energy_unit/length_unit^3", or in `["Pa", "pa", "kPa", "kpa", "bar", "kbar"]` Examples -------- diff --git a/dpdata/xyz/xyz.py b/dpdata/xyz/xyz.py index a28bafa0..745a97b1 100644 --- a/dpdata/xyz/xyz.py +++ b/dpdata/xyz/xyz.py @@ -8,9 +8,9 @@ def coord_to_xyz(coord: np.ndarray, types: list) -> str: Parameters ---------- - coord: np.ndarray + coord : np.ndarray coordinates, Nx3 array - types: list + types : list list of types Returns From d622c95a8bcbb32655a3d5a09137b428a5de9810 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Thu, 23 Feb 2023 10:40:45 +0800 Subject: [PATCH 18/20] Add support for 'deepmd/mixed' format with dpdata.MultiSystems (#422) Add support for 'deepmd/mixed' format with dpdata.MultiSystems 1. Support dump from dpdata.MultiSystems to [mixed type format](https://github.com/deepmodeling/deepmd-kit/blob/master/doc/model/train-se-atten.md#data-format): dpdata.MultiSystems.to_deepmd_mixed('dir_name_mixed') 2. Support load from mixed type format to dpdata.MultiSystems: dpdata.MultiSystems.load_systems_from_file('dir_name_mixed', fmt='deepmd/mixed') --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- README.md | 26 ++++ dpdata/deepmd/mixed.py | 251 +++++++++++++++++++++++++++++++++++++ dpdata/format.py | 21 ++++ dpdata/plugins/deepmd.py | 104 +++++++++++++++ dpdata/system.py | 82 ++++++++++-- tests/test_deepmd_mixed.py | 110 ++++++++++++++++ 6 files changed, 581 insertions(+), 13 deletions(-) create mode 100644 dpdata/deepmd/mixed.py create mode 100644 tests/test_deepmd_mixed.py diff --git a/README.md b/README.md index 281920ba..4af23cc0 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ The `System` or `LabeledSystem` can be constructed from the following file forma | deepmd | npy | True | False | System | 'deepmd/npy' | | deepmd | raw | True | True | LabeledSystem | 'deepmd/raw' | | deepmd | npy | True | True | LabeledSystem | 'deepmd/npy' | +| deepmd | npy | True | True | MultiSystems | 'deepmd/npy/mixed' | +| deepmd | npy | True | False | MultiSystems | 'deepmd/npy/mixed' | | gaussian| log | False | True | LabeledSystem | 'gaussian/log'| | gaussian| log | True | True | LabeledSystem | 'gaussian/md' | | siesta | output | False | True | LabeledSystem | 'siesta/output'| @@ -278,6 +280,30 @@ print(syst.get_charge()) # return the total charge of the system If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule. +## Mixed Type Format +The format `deepmd/npy/mixed` is the mixed type numpy format for DeePMD-kit, and can be loaded or dumped through class `dpdata.MultiSystems`. + +Under this format, systems with the same number of atoms but different formula can be put together +for a larger system, especially when the frame numbers in systems are sparse. + +This also helps to mixture the type information together for model training with type embedding network. + +Here are examples using `deepmd/npy/mixed` format: + +- Dump a MultiSystems into a mixed type numpy directory: +```python +import dpdata + +dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") +``` + +- Load a mixed type data into a MultiSystems: +```python +import dpdata + +dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") +``` + # Plugins One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's critical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in [`pyproject.toml`](plugin_example/pyproject.toml): diff --git a/dpdata/deepmd/mixed.py b/dpdata/deepmd/mixed.py new file mode 100644 index 00000000..0e770dc2 --- /dev/null +++ b/dpdata/deepmd/mixed.py @@ -0,0 +1,251 @@ +import glob +import os +import shutil + +import numpy as np + + +def load_type(folder): + data = {} + data["atom_names"] = [] + # if find type_map.raw, use it + assert os.path.isfile( + os.path.join(folder, "type_map.raw") + ), "Mixed type system must have type_map.raw!" + with open(os.path.join(folder, "type_map.raw")) as fp: + data["atom_names"] = fp.read().split() + + return data + + +def formula(atom_names, atom_numbs): + """ + Return the formula of this system, like C3H5O2 + """ + return "".join( + ["{}{}".format(symbol, numb) for symbol, numb in zip(atom_names, atom_numbs)] + ) + + +def _cond_load_data(fname): + tmp = None + if os.path.isfile(fname): + tmp = np.load(fname) + return tmp + + +def _load_set(folder, nopbc: bool): + coords = np.load(os.path.join(folder, "coord.npy")) + if nopbc: + cells = np.zeros((coords.shape[0], 3, 3)) + else: + cells = np.load(os.path.join(folder, "box.npy")) + eners = _cond_load_data(os.path.join(folder, "energy.npy")) + forces = _cond_load_data(os.path.join(folder, "force.npy")) + virs = _cond_load_data(os.path.join(folder, "virial.npy")) + real_atom_types = np.load(os.path.join(folder, "real_atom_types.npy")) + return cells, coords, eners, forces, virs, real_atom_types + + +def to_system_data(folder, type_map=None, labels=True): + # data is empty + data = load_type(folder) + data["orig"] = np.zeros([3]) + if os.path.isfile(os.path.join(folder, "nopbc")): + data["nopbc"] = True + sets = sorted(glob.glob(os.path.join(folder, "set.*"))) + assert len(sets) == 1, "Mixed type must have only one set!" + cells, coords, eners, forces, virs, real_atom_types = _load_set( + sets[0], data.get("nopbc", False) + ) + nframes = np.reshape(cells, [-1, 3, 3]).shape[0] + cells = np.reshape(cells, [nframes, 3, 3]) + coords = np.reshape(coords, [nframes, -1, 3]) + real_atom_types = np.reshape(real_atom_types, [nframes, -1]) + natom = real_atom_types.shape[1] + if labels: + if eners is not None and eners.size > 0: + eners = np.reshape(eners, [nframes]) + if forces is not None and forces.size > 0: + forces = np.reshape(forces, [nframes, -1, 3]) + if virs is not None and virs.size > 0: + virs = np.reshape(virs, [nframes, 3, 3]) + data_list = [] + while True: + if real_atom_types.size == 0: + break + temp_atom_numbs = [ + np.count_nonzero(real_atom_types[0] == i) + for i in range(len(data["atom_names"])) + ] + # temp_formula = formula(data['atom_names'], temp_atom_numbs) + temp_idx = np.arange(real_atom_types.shape[0])[ + (real_atom_types == real_atom_types[0]).all(-1) + ] + rest_idx = np.arange(real_atom_types.shape[0])[ + (real_atom_types != real_atom_types[0]).any(-1) + ] + temp_data = data.copy() + temp_data["atom_numbs"] = temp_atom_numbs + temp_data["atom_types"] = real_atom_types[0] + real_atom_types = real_atom_types[rest_idx] + temp_data["cells"] = cells[temp_idx] + cells = cells[rest_idx] + temp_data["coords"] = coords[temp_idx] + coords = coords[rest_idx] + if labels: + if eners is not None and eners.size > 0: + temp_data["energies"] = eners[temp_idx] + eners = eners[rest_idx] + if forces is not None and forces.size > 0: + temp_data["forces"] = forces[temp_idx] + forces = forces[rest_idx] + if virs is not None and virs.size > 0: + temp_data["virials"] = virs[temp_idx] + virs = virs[rest_idx] + data_list.append(temp_data) + return data_list + + +def dump(folder, data, comp_prec=np.float32, remove_sets=True): + os.makedirs(folder, exist_ok=True) + sets = sorted(glob.glob(os.path.join(folder, "set.*"))) + if len(sets) > 0: + if remove_sets: + for ii in sets: + shutil.rmtree(ii) + else: + raise RuntimeError( + "found " + + str(sets) + + " in " + + folder + + "not a clean deepmd raw dir. please firstly clean set.* then try compress" + ) + # if not converted to mixed + if "real_atom_types" not in data: + from dpdata import LabeledSystem, System + + if "energies" in data: + temp_sys = LabeledSystem(data=data) + else: + temp_sys = System(data=data) + temp_sys.convert_to_mixed_type() + # dump raw + np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d") + np.savetxt(os.path.join(folder, "type_map.raw"), data["real_atom_names"], fmt="%s") + # BondOrder System + if "bonds" in data: + np.savetxt( + os.path.join(folder, "bonds.raw"), + data["bonds"], + header="begin_atom, end_atom, bond_order", + ) + if "formal_charges" in data: + np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"]) + # reshape frame properties and convert prec + nframes = data["cells"].shape[0] + cells = np.reshape(data["cells"], [nframes, 9]).astype(comp_prec) + coords = np.reshape(data["coords"], [nframes, -1]).astype(comp_prec) + eners = None + forces = None + virials = None + real_atom_types = None + if "energies" in data: + eners = np.reshape(data["energies"], [nframes]).astype(comp_prec) + if "forces" in data: + forces = np.reshape(data["forces"], [nframes, -1]).astype(comp_prec) + if "virials" in data: + virials = np.reshape(data["virials"], [nframes, 9]).astype(comp_prec) + if "atom_pref" in data: + atom_pref = np.reshape(data["atom_pref"], [nframes, -1]).astype(comp_prec) + if "real_atom_types" in data: + real_atom_types = np.reshape(data["real_atom_types"], [nframes, -1]).astype( + np.int64 + ) + # dump frame properties: cell, coord, energy, force and virial + set_folder = os.path.join(folder, "set.%03d" % 0) + os.makedirs(set_folder) + np.save(os.path.join(set_folder, "box"), cells) + np.save(os.path.join(set_folder, "coord"), coords) + if eners is not None: + np.save(os.path.join(set_folder, "energy"), eners) + if forces is not None: + np.save(os.path.join(set_folder, "force"), forces) + if virials is not None: + np.save(os.path.join(set_folder, "virial"), virials) + if real_atom_types is not None: + np.save(os.path.join(set_folder, "real_atom_types"), real_atom_types) + if "atom_pref" in data: + np.save(os.path.join(set_folder, "atom_pref"), atom_pref) + try: + os.remove(os.path.join(folder, "nopbc")) + except OSError: + pass + if data.get("nopbc", False): + with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: + pass + + +def mix_system(*system, type_map, split_num=200, **kwargs): + """Mix the systems into mixed_type ones + + Parameters + ---------- + *system : System + The systems to mix + type_map : list of str + Maps atom type to name + split_num : int + Number of frames in each system + + Returns + ------- + mixed_systems: dict + dict of mixed system with key '{atom_numbs}/sys.xxx' + """ + mixed_systems = {} + temp_systems = {} + atom_numbs_sys_index = {} # index of sys + atom_numbs_frame_index = {} # index of frames in cur sys + for sys in system: + tmp_sys = sys.copy() + natom = tmp_sys.get_natoms() + tmp_sys.convert_to_mixed_type(type_map=type_map) + if str(natom) not in atom_numbs_sys_index: + atom_numbs_sys_index[str(natom)] = 0 + if str(natom) not in atom_numbs_frame_index: + atom_numbs_frame_index[str(natom)] = 0 + atom_numbs_frame_index[str(natom)] += tmp_sys.get_nframes() + if str(natom) not in temp_systems or not temp_systems[str(natom)]: + temp_systems[str(natom)] = tmp_sys + else: + temp_systems[str(natom)].append(tmp_sys) + if atom_numbs_frame_index[str(natom)] >= split_num: + while True: + sys_split, temp_systems[str(natom)], rest_num = split_system( + temp_systems[str(natom)], split_num=split_num + ) + sys_name = ( + f"{str(natom)}/sys." + "%.6d" % atom_numbs_sys_index[str(natom)] + ) + mixed_systems[sys_name] = sys_split + atom_numbs_sys_index[str(natom)] += 1 + if rest_num < split_num: + atom_numbs_frame_index[str(natom)] = rest_num + break + for natom in temp_systems: + if atom_numbs_frame_index[natom] > 0: + sys_name = f"{natom}/sys." + "%.6d" % atom_numbs_sys_index[natom] + mixed_systems[sys_name] = temp_systems[natom] + return mixed_systems + + +def split_system(sys, split_num=100): + rest = sys.get_nframes() - split_num + if rest <= 0: + return sys, None, 0 + else: + split_sys = sys.sub_system(range(split_num)) + rest_sys = sys.sub_system(range(split_num, sys.get_nframes())) + return split_sys, rest_sys, rest diff --git a/dpdata/format.py b/dpdata/format.py index 0ad991d8..b4fc5a8e 100644 --- a/dpdata/format.py +++ b/dpdata/format.py @@ -131,3 +131,24 @@ def to_multi_systems(self, formulas, directory, **kwargs): raise NotImplementedError( "%s doesn't support MultiSystems.to" % (self.__class__.__name__) ) + + def mix_system(self, *system, type_map, split_num=200, **kwargs): + """Mix the systems into mixed_type ones according to the unified given type_map. + + Parameters + ---------- + *system : System + The systems to mix + type_map : list of str + Maps atom type to name + split_num : int + Number of frames in each system + + Returns + ------- + mixed_systems: dict + dict of mixed system with key '{atom_numbs}/sys.xxx' + """ + raise NotImplementedError( + "%s doesn't support System.from" % (self.__class__.__name__) + ) diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py index df56bc49..dcb9d810 100644 --- a/dpdata/plugins/deepmd.py +++ b/dpdata/plugins/deepmd.py @@ -1,3 +1,4 @@ +import os from typing import List, Optional, Union import h5py @@ -6,6 +7,7 @@ import dpdata import dpdata.deepmd.comp import dpdata.deepmd.hdf5 +import dpdata.deepmd.mixed import dpdata.deepmd.raw from dpdata.driver import Driver from dpdata.format import Format @@ -69,6 +71,108 @@ def from_labeled_system(self, file_name, type_map=None, **kwargs): MultiMode = Format.MultiModes.Directory +@Format.register("deepmd/npy/mixed") +class DeePMDMixedFormat(Format): + """Mixed type numpy format for DeePMD-kit. + Under this format, systems with the same number of atoms but different formula can be put together + for a larger system, especially when the frame numbers in systems are sparse. + This also helps to mixture the type information together for model training with type embedding network. + + Examples + -------- + Dump a MultiSystems into a mixed type numpy directory: + >>> import dpdata + >>> dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") + + Load a mixed type data into a MultiSystems: + >>> import dpdata + >>> dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") + """ + + def from_system_mix(self, file_name, type_map=None, **kwargs): + return dpdata.deepmd.mixed.to_system_data( + file_name, type_map=type_map, labels=False + ) + + def to_system(self, data, file_name, prec=np.float64, **kwargs): + """ + Dump the system in deepmd mixed type format (numpy binary) to `folder`. + + The frames were already split to different systems, so these frames can be dumped to one single subfolders + named as `folder/set.000`, containing less than `set_size` frames. + + Parameters + ---------- + data : dict + System data + file_name : str + The output folder + prec : {numpy.float32, numpy.float64} + The floating point precision of the compressed data + """ + dpdata.deepmd.mixed.dump(file_name, data, comp_prec=prec) + + def from_labeled_system_mix(self, file_name, type_map=None, **kwargs): + return dpdata.deepmd.mixed.to_system_data( + file_name, type_map=type_map, labels=True + ) + + def mix_system(self, *system, type_map, split_num=200, **kwargs): + """Mix the systems into mixed_type ones according to the unified given type_map. + + Parameters + ---------- + *system : System + The systems to mix + type_map : list of str + Maps atom type to name + split_num : int + Number of frames in each system + + Returns + ------- + mixed_systems: dict + dict of mixed system with key '{atom_numbs}/sys.xxx' + """ + return dpdata.deepmd.mixed.mix_system( + *system, type_map=type_map, split_num=split_num, **kwargs + ) + + def from_multi_systems(self, directory, **kwargs): + """MultiSystems.from + + Parameters + ---------- + directory : str + directory of system + + Returns + ------- + filenames: list[str] + list of filenames + """ + if self.MultiMode == self.MultiModes.Directory: + level_1_dir = [ + os.path.join(directory, name) + for name in os.listdir(directory) + if os.path.isdir(os.path.join(directory, name)) + and os.path.isfile(os.path.join(directory, name, "type_map.raw")) + ] + level_2_dir = [ + os.path.join(directory, name1, name2) + for name1 in os.listdir(directory) + for name2 in os.listdir(os.path.join(directory, name1)) + if os.path.isdir(os.path.join(directory, name1)) + and os.path.isdir(os.path.join(directory, name1, name2)) + and os.path.isfile( + os.path.join(directory, name1, name2, "type_map.raw") + ) + ] + return level_1_dir + level_2_dir + + MultiMode = Format.MultiModes.Directory + + @Format.register("deepmd/hdf5") class DeePMDHDF5Format(Format): """HDF5 format for DeePMD-kit. diff --git a/dpdata/system.py b/dpdata/system.py index 017797d7..887aba15 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -178,6 +178,10 @@ class System(MSONable): DataType("orig", np.ndarray, (3,)), DataType("cells", np.ndarray, (Axis.NFRAMES, 3, 3)), DataType("coords", np.ndarray, (Axis.NFRAMES, Axis.NATOMS, 3)), + DataType( + "real_atom_types", np.ndarray, (Axis.NFRAMES, Axis.NATOMS), required=False + ), + DataType("real_atom_names", list, (Axis.NTYPES,), required=False), DataType("nopbc", bool, required=False), ) @@ -558,6 +562,33 @@ def append(self, system): self.data["nopbc"] = False return True + def convert_to_mixed_type(self, type_map=None): + """ + Convert the data dict to mixed type format structure, in order to append systems + with different formula but the same number of atoms. Change the 'atom_names' to + one placeholder type 'MIXED_TOKEN' and add 'real_atom_types' to store the real type + vectors according to the given type_map. + + Parameters + ---------- + type_map : list + type_map + """ + if "real_atom_types" in self.data.keys(): + return + if type_map is None: + type_map = self.get_atom_names() + type_index = [type_map.index(i) for i in self.data["atom_names"]] + frames = self.get_nframes() + self.data["real_atom_types"] = np.tile( + np.array([type_index[i] for i in self.data["atom_types"]]), [frames, 1] + ) + self.data["real_atom_names"] = type_map + natoms = self.get_natoms() + self.data["atom_types"] = np.zeros((natoms,), dtype=int) + self.data["atom_numbs"] = [natoms] + self.data["atom_names"] = ["MIXED_TOKEN"] + def sort_atom_names(self, type_map=None): """ Sort atom_names of the system and reorder atom_numbs and atom_types accoarding @@ -1261,21 +1292,46 @@ def __init__(self, *systems, type_map=None): self.append(*systems) def from_fmt_obj(self, fmtobj, directory, labeled=True, **kwargs): - for dd in fmtobj.from_multi_systems(directory, **kwargs): - if labeled: - system = LabeledSystem().from_fmt_obj(fmtobj, dd, **kwargs) - else: - system = System().from_fmt_obj(fmtobj, dd, **kwargs) - system.sort_atom_names() - self.append(system) - return self + if not isinstance(fmtobj, dpdata.plugins.deepmd.DeePMDMixedFormat): + for dd in fmtobj.from_multi_systems(directory, **kwargs): + if labeled: + system = LabeledSystem().from_fmt_obj(fmtobj, dd, **kwargs) + else: + system = System().from_fmt_obj(fmtobj, dd, **kwargs) + system.sort_atom_names() + self.append(system) + return self + else: + system_list = [] + for dd in fmtobj.from_multi_systems(directory, **kwargs): + if labeled: + data_list = fmtobj.from_labeled_system_mix(dd, **kwargs) + for data_item in data_list: + system_list.append(LabeledSystem(data=data_item)) + else: + data_list = fmtobj.from_system_mix(dd, **kwargs) + for data_item in data_list: + system_list.append(System(data=data_item)) + return self.__class__( + *system_list, + type_map=kwargs["type_map"] if "type_map" in kwargs else None, + ) def to_fmt_obj(self, fmtobj, directory, *args, **kwargs): - for fn, ss in zip( - fmtobj.to_multi_systems(self.systems.keys(), directory, **kwargs), - self.systems.values(), - ): - ss.to_fmt_obj(fmtobj, fn, *args, **kwargs) + if not isinstance(fmtobj, dpdata.plugins.deepmd.DeePMDMixedFormat): + for fn, ss in zip( + fmtobj.to_multi_systems(self.systems.keys(), directory, **kwargs), + self.systems.values(), + ): + ss.to_fmt_obj(fmtobj, fn, *args, **kwargs) + else: + mixed_systems = fmtobj.mix_system( + *list(self.systems.values()), type_map=self.atom_names, **kwargs + ) + for fn in mixed_systems: + mixed_systems[fn].to_fmt_obj( + fmtobj, os.path.join(directory, fn), *args, **kwargs + ) return self def to(self, fmt: str, *args, **kwargs) -> "MultiSystems": diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py new file mode 100644 index 00000000..9e6ee9dd --- /dev/null +++ b/tests/test_deepmd_mixed.py @@ -0,0 +1,110 @@ +import os +import shutil +import unittest +from itertools import permutations + +import numpy as np +from comp_sys import CompLabeledSys, CompSys, IsNoPBC, MultiSystems +from context import dpdata + + +class TestMixedMultiSystems(unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC): + def setUp(self): + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + # C1H4 + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + + # C1H3 + system_2 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1B2 + system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 2, 1] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3]) + # C1H1A2B1 + system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "D"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1C2 + system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data) + + self.ms = dpdata.MultiSystems( + system_1, + system_2, + system_1_modified_type_1, + system_1_modified_type_2, + system_1_modified_type_3, + ) + self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed") + self.place_holder_ms = dpdata.MultiSystems().load_systems_from_file( + "tmp.deepmd.mixed/5", fmt="deepmd/npy" + ) + self.place_holder_ms += dpdata.MultiSystems().load_systems_from_file( + "tmp.deepmd.mixed/4", fmt="deepmd/npy" + ) + self.systems = dpdata.MultiSystems().load_systems_from_file( + "tmp.deepmd.mixed", fmt="deepmd/npy/mixed" + ) + self.system_1 = self.ms["C1H4A0B0D0"] + self.system_2 = self.systems["C1H4A0B0D0"] + + self.system_names = [ + "C1H4A0B0D0", + "C1H3A0B0D0", + "C1H1A1B2D0", + "C1H1A2B1D0", + "C1H1A1B0D2", + ] + self.system_sizes = { + "C1H4A0B0D0": 1, + "C1H3A0B0D0": 1, + "C1H1A1B2D0": 1, + "C1H1A2B1D0": 1, + "C1H1A1B0D2": 1, + } + self.atom_names = ["C", "H", "A", "B", "D"] + + def tearDown(self): + if os.path.exists("tmp.deepmd.mixed"): + shutil.rmtree("tmp.deepmd.mixed") + + def test_len(self): + self.assertEqual(len(self.ms), 5) + self.assertEqual(len(self.place_holder_ms), 2) + self.assertEqual(len(self.systems), 5) + + def test_get_nframes(self): + self.assertEqual(self.ms.get_nframes(), 5) + self.assertEqual(self.place_holder_ms.get_nframes(), 5) + self.assertEqual(self.systems.get_nframes(), 5) + + def test_str(self): + self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)") + self.assertEqual( + str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)" + ) + self.assertEqual( + str(self.systems), "MultiSystems (5 systems containing 5 frames)" + ) + + +if __name__ == "__main__": + unittest.main() From aec7747b084b3909c3e7e32a4c3dbcf9d4e60ff0 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 24 Feb 2023 08:31:01 +0800 Subject: [PATCH 19/20] Correct the headers in README.md (#424) --- README.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 4af23cc0..cfa611d9 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ dpdata only works with python 3.7 or above. -# Installation +## Installation One can download the source code of dpdata by ```bash git clone https://github.com/deepmodeling/dpdata.git dpdata @@ -19,7 +19,7 @@ pip install dpdata ``` -# Quick start +## Quick start This section gives some examples on how dpdata works. Firstly one needs to import the module in a python 3.x compatible code. ```python @@ -32,7 +32,7 @@ The typicall workflow of `dpdata` is 3. Dump data to in a desired format -## Load data +### Load data ```python d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar") ``` @@ -150,7 +150,7 @@ ms.to_deepmd_raw("deepmd") ms.to_deepmd_npy("deepmd") ``` -## Access data +### Access data These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example ```python coords = d_outcar["coords"] @@ -169,7 +169,7 @@ Available properties are (nframe: number of frames in the system, natoms: total | 'virials' | np.ndarray | nframes x 3 x 3 | True | The virial tensor of each frame -## Dump data +### Dump data The data stored in `System` or `LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example: ```python d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0) @@ -195,7 +195,7 @@ dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw") by which only the first and last frames are dumped to `dpmd_raw`. -## replicate +### replicate dpdata will create a super cell of the current atom configuration. ```python dpdata.System("./POSCAR").replicate( @@ -209,7 +209,7 @@ dpdata.System("./POSCAR").replicate( tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction. -## perturb +### perturb By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). ```python perturbed_system = dpdata.System("./POSCAR").perturb( @@ -221,7 +221,7 @@ perturbed_system = dpdata.System("./POSCAR").perturb( print(perturbed_system.data) ``` -## replace +### replace By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged. ```python s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar") @@ -229,7 +229,7 @@ s.replace("Hf", "Zr", 8) s.to_vasp_poscar("POSCAR.P42nmc.replace") ``` -# BondOrderSystem +## BondOrderSystem A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). ```python import dpdata @@ -254,7 +254,7 @@ AllChem.EmbedMultipleConfs(mol, 10) system = dpdata.BondOrderSystem(rdkit_mol=mol) ``` -## Bond Order Assignment +### Bond Order Assignment The `BondOrderSystem` implements a more robust sanitize procedure for rdkit Mol, as defined in `dpdata.rdkit.santizie.Sanitizer`. This class defines 3 level of sanitization process by: low, medium and high. (default is medium). + low: use `rdkit.Chem.SanitizeMol()` function to sanitize molecule. + medium: before using rdkit, the programm will first assign formal charge of each atom to avoid inappropriate valence exceptions. However, this mode requires the rightness of the bond order information in the given molecule. @@ -268,7 +268,7 @@ import dpdata for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False) ``` -## Formal Charge Assignment +### Formal Charge Assignment BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As ```python import dpdata @@ -304,7 +304,7 @@ import dpdata dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") ``` -# Plugins +## Plugins One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's critical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in [`pyproject.toml`](plugin_example/pyproject.toml): ```toml From 06c21b6068cdbccd0df143f502a2ef510381a57a Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Sat, 25 Feb 2023 22:28:40 +0800 Subject: [PATCH 20/20] fix: standardize the deepmd/npy/mixed format (#425) This PR has concated two commits together: 1. Update the dpdata.MultiSystems() when from_deepmd_npy_mixed method is called; dpdata.MultiSystems().from_deepmd_npy_mixed only returned the results before but did not change itself, which is fixed in this commit, to be consistent with other from methods. (another bug is also fixed: not using .copy() in data["atom_names"] may cause error when manually changing type_map for this system. UTs are added in the next commit.) 2. Allow multiple sets in mixed-type format; Now for maximum 50000 frames in one sys and 2000 frames in one set. The reason I did not use 5000 frames per set, is that I think maximum set frames will be much more often used in mixed-type format than other format, and 2000 will be enough for large batch and more friendly for memory. Add UTs for type_map changing and mixed_type dir check. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- dpdata/deepmd/mixed.py | 159 ++++++++++++++++++++----------------- dpdata/format.py | 6 +- dpdata/plugins/deepmd.py | 47 +++-------- dpdata/system.py | 10 +-- tests/test_deepmd_mixed.py | 126 ++++++++++++++++++++++++++--- 5 files changed, 217 insertions(+), 131 deletions(-) diff --git a/dpdata/deepmd/mixed.py b/dpdata/deepmd/mixed.py index 0e770dc2..fff8e930 100644 --- a/dpdata/deepmd/mixed.py +++ b/dpdata/deepmd/mixed.py @@ -54,60 +54,80 @@ def to_system_data(folder, type_map=None, labels=True): if os.path.isfile(os.path.join(folder, "nopbc")): data["nopbc"] = True sets = sorted(glob.glob(os.path.join(folder, "set.*"))) - assert len(sets) == 1, "Mixed type must have only one set!" - cells, coords, eners, forces, virs, real_atom_types = _load_set( - sets[0], data.get("nopbc", False) - ) - nframes = np.reshape(cells, [-1, 3, 3]).shape[0] - cells = np.reshape(cells, [nframes, 3, 3]) - coords = np.reshape(coords, [nframes, -1, 3]) - real_atom_types = np.reshape(real_atom_types, [nframes, -1]) - natom = real_atom_types.shape[1] - if labels: - if eners is not None and eners.size > 0: + all_cells = [] + all_coords = [] + all_eners = [] + all_forces = [] + all_virs = [] + all_real_atom_types = [] + for ii in sets: + cells, coords, eners, forces, virs, real_atom_types = _load_set( + ii, data.get("nopbc", False) + ) + nframes = np.reshape(cells, [-1, 3, 3]).shape[0] + all_cells.append(np.reshape(cells, [nframes, 3, 3])) + all_coords.append(np.reshape(coords, [nframes, -1, 3])) + all_real_atom_types.append(np.reshape(real_atom_types, [nframes, -1])) + if eners is not None: eners = np.reshape(eners, [nframes]) - if forces is not None and forces.size > 0: - forces = np.reshape(forces, [nframes, -1, 3]) - if virs is not None and virs.size > 0: - virs = np.reshape(virs, [nframes, 3, 3]) + if labels: + if eners is not None and eners.size > 0: + all_eners.append(np.reshape(eners, [nframes])) + if forces is not None and forces.size > 0: + all_forces.append(np.reshape(forces, [nframes, -1, 3])) + if virs is not None and virs.size > 0: + all_virs.append(np.reshape(virs, [nframes, 3, 3])) + all_cells_concat = np.concatenate(all_cells, axis=0) + all_coords_concat = np.concatenate(all_coords, axis=0) + all_real_atom_types_concat = np.concatenate(all_real_atom_types, axis=0) + all_eners_concat = None + all_forces_concat = None + all_virs_concat = None + if len(all_eners) > 0: + all_eners_concat = np.concatenate(all_eners, axis=0) + if len(all_forces) > 0: + all_forces_concat = np.concatenate(all_forces, axis=0) + if len(all_virs) > 0: + all_virs_concat = np.concatenate(all_virs, axis=0) data_list = [] while True: - if real_atom_types.size == 0: + if all_real_atom_types_concat.size == 0: break temp_atom_numbs = [ - np.count_nonzero(real_atom_types[0] == i) + np.count_nonzero(all_real_atom_types_concat[0] == i) for i in range(len(data["atom_names"])) ] # temp_formula = formula(data['atom_names'], temp_atom_numbs) - temp_idx = np.arange(real_atom_types.shape[0])[ - (real_atom_types == real_atom_types[0]).all(-1) + temp_idx = np.arange(all_real_atom_types_concat.shape[0])[ + (all_real_atom_types_concat == all_real_atom_types_concat[0]).all(-1) ] - rest_idx = np.arange(real_atom_types.shape[0])[ - (real_atom_types != real_atom_types[0]).any(-1) + rest_idx = np.arange(all_real_atom_types_concat.shape[0])[ + (all_real_atom_types_concat != all_real_atom_types_concat[0]).any(-1) ] temp_data = data.copy() + temp_data["atom_names"] = data["atom_names"].copy() temp_data["atom_numbs"] = temp_atom_numbs - temp_data["atom_types"] = real_atom_types[0] - real_atom_types = real_atom_types[rest_idx] - temp_data["cells"] = cells[temp_idx] - cells = cells[rest_idx] - temp_data["coords"] = coords[temp_idx] - coords = coords[rest_idx] + temp_data["atom_types"] = all_real_atom_types_concat[0] + all_real_atom_types_concat = all_real_atom_types_concat[rest_idx] + temp_data["cells"] = all_cells_concat[temp_idx] + all_cells_concat = all_cells_concat[rest_idx] + temp_data["coords"] = all_coords_concat[temp_idx] + all_coords_concat = all_coords_concat[rest_idx] if labels: - if eners is not None and eners.size > 0: - temp_data["energies"] = eners[temp_idx] - eners = eners[rest_idx] - if forces is not None and forces.size > 0: - temp_data["forces"] = forces[temp_idx] - forces = forces[rest_idx] - if virs is not None and virs.size > 0: - temp_data["virials"] = virs[temp_idx] - virs = virs[rest_idx] + if all_eners_concat is not None and all_eners_concat.size > 0: + temp_data["energies"] = all_eners_concat[temp_idx] + all_eners_concat = all_eners_concat[rest_idx] + if all_forces_concat is not None and all_forces_concat.size > 0: + temp_data["forces"] = all_forces_concat[temp_idx] + all_forces_concat = all_forces_concat[rest_idx] + if all_virs_concat is not None and all_virs_concat.size > 0: + temp_data["virials"] = all_virs_concat[temp_idx] + all_virs_concat = all_virs_concat[rest_idx] data_list.append(temp_data) return data_list -def dump(folder, data, comp_prec=np.float32, remove_sets=True): +def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True): os.makedirs(folder, exist_ok=True) sets = sorted(glob.glob(os.path.join(folder, "set.*"))) if len(sets) > 0: @@ -164,20 +184,29 @@ def dump(folder, data, comp_prec=np.float32, remove_sets=True): np.int64 ) # dump frame properties: cell, coord, energy, force and virial - set_folder = os.path.join(folder, "set.%03d" % 0) - os.makedirs(set_folder) - np.save(os.path.join(set_folder, "box"), cells) - np.save(os.path.join(set_folder, "coord"), coords) - if eners is not None: - np.save(os.path.join(set_folder, "energy"), eners) - if forces is not None: - np.save(os.path.join(set_folder, "force"), forces) - if virials is not None: - np.save(os.path.join(set_folder, "virial"), virials) - if real_atom_types is not None: - np.save(os.path.join(set_folder, "real_atom_types"), real_atom_types) - if "atom_pref" in data: - np.save(os.path.join(set_folder, "atom_pref"), atom_pref) + nsets = nframes // set_size + if set_size * nsets < nframes: + nsets += 1 + for ii in range(nsets): + set_stt = ii * set_size + set_end = (ii + 1) * set_size + set_folder = os.path.join(folder, "set.%06d" % ii) + os.makedirs(set_folder) + np.save(os.path.join(set_folder, "box"), cells[set_stt:set_end]) + np.save(os.path.join(set_folder, "coord"), coords[set_stt:set_end]) + if eners is not None: + np.save(os.path.join(set_folder, "energy"), eners[set_stt:set_end]) + if forces is not None: + np.save(os.path.join(set_folder, "force"), forces[set_stt:set_end]) + if virials is not None: + np.save(os.path.join(set_folder, "virial"), virials[set_stt:set_end]) + if real_atom_types is not None: + np.save( + os.path.join(set_folder, "real_atom_types"), + real_atom_types[set_stt:set_end], + ) + if "atom_pref" in data: + np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end]) try: os.remove(os.path.join(folder, "nopbc")) except OSError: @@ -187,8 +216,8 @@ def dump(folder, data, comp_prec=np.float32, remove_sets=True): pass -def mix_system(*system, type_map, split_num=200, **kwargs): - """Mix the systems into mixed_type ones +def mix_system(*system, type_map, **kwargs): + """Mix the systems into mixed_type ones according to the unified given type_map. Parameters ---------- @@ -196,24 +225,19 @@ def mix_system(*system, type_map, split_num=200, **kwargs): The systems to mix type_map : list of str Maps atom type to name - split_num : int - Number of frames in each system Returns ------- mixed_systems: dict - dict of mixed system with key '{atom_numbs}/sys.xxx' + dict of mixed system with key 'atom_numbs' """ mixed_systems = {} temp_systems = {} - atom_numbs_sys_index = {} # index of sys atom_numbs_frame_index = {} # index of frames in cur sys for sys in system: tmp_sys = sys.copy() natom = tmp_sys.get_natoms() tmp_sys.convert_to_mixed_type(type_map=type_map) - if str(natom) not in atom_numbs_sys_index: - atom_numbs_sys_index[str(natom)] = 0 if str(natom) not in atom_numbs_frame_index: atom_numbs_frame_index[str(natom)] = 0 atom_numbs_frame_index[str(natom)] += tmp_sys.get_nframes() @@ -221,27 +245,14 @@ def mix_system(*system, type_map, split_num=200, **kwargs): temp_systems[str(natom)] = tmp_sys else: temp_systems[str(natom)].append(tmp_sys) - if atom_numbs_frame_index[str(natom)] >= split_num: - while True: - sys_split, temp_systems[str(natom)], rest_num = split_system( - temp_systems[str(natom)], split_num=split_num - ) - sys_name = ( - f"{str(natom)}/sys." + "%.6d" % atom_numbs_sys_index[str(natom)] - ) - mixed_systems[sys_name] = sys_split - atom_numbs_sys_index[str(natom)] += 1 - if rest_num < split_num: - atom_numbs_frame_index[str(natom)] = rest_num - break for natom in temp_systems: if atom_numbs_frame_index[natom] > 0: - sys_name = f"{natom}/sys." + "%.6d" % atom_numbs_sys_index[natom] + sys_name = f"{natom}" mixed_systems[sys_name] = temp_systems[natom] return mixed_systems -def split_system(sys, split_num=100): +def split_system(sys, split_num=10000): rest = sys.get_nframes() - split_num if rest <= 0: return sys, None, 0 diff --git a/dpdata/format.py b/dpdata/format.py index b4fc5a8e..c6ba91b7 100644 --- a/dpdata/format.py +++ b/dpdata/format.py @@ -132,7 +132,7 @@ def to_multi_systems(self, formulas, directory, **kwargs): "%s doesn't support MultiSystems.to" % (self.__class__.__name__) ) - def mix_system(self, *system, type_map, split_num=200, **kwargs): + def mix_system(self, *system, type_map, **kwargs): """Mix the systems into mixed_type ones according to the unified given type_map. Parameters @@ -141,13 +141,11 @@ def mix_system(self, *system, type_map, split_num=200, **kwargs): The systems to mix type_map : list of str Maps atom type to name - split_num : int - Number of frames in each system Returns ------- mixed_systems: dict - dict of mixed system with key '{atom_numbs}/sys.xxx' + dict of mixed system with key 'atom_numbs' """ raise NotImplementedError( "%s doesn't support System.from" % (self.__class__.__name__) diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py index dcb9d810..499e23b2 100644 --- a/dpdata/plugins/deepmd.py +++ b/dpdata/plugins/deepmd.py @@ -117,7 +117,7 @@ def from_labeled_system_mix(self, file_name, type_map=None, **kwargs): file_name, type_map=type_map, labels=True ) - def mix_system(self, *system, type_map, split_num=200, **kwargs): + def mix_system(self, *system, type_map, **kwargs): """Mix the systems into mixed_type ones according to the unified given type_map. Parameters @@ -126,49 +126,22 @@ def mix_system(self, *system, type_map, split_num=200, **kwargs): The systems to mix type_map : list of str Maps atom type to name - split_num : int - Number of frames in each system Returns ------- mixed_systems: dict - dict of mixed system with key '{atom_numbs}/sys.xxx' + dict of mixed system with key 'atom_numbs' """ - return dpdata.deepmd.mixed.mix_system( - *system, type_map=type_map, split_num=split_num, **kwargs - ) + return dpdata.deepmd.mixed.mix_system(*system, type_map=type_map, **kwargs) def from_multi_systems(self, directory, **kwargs): - """MultiSystems.from - - Parameters - ---------- - directory : str - directory of system - - Returns - ------- - filenames: list[str] - list of filenames - """ - if self.MultiMode == self.MultiModes.Directory: - level_1_dir = [ - os.path.join(directory, name) - for name in os.listdir(directory) - if os.path.isdir(os.path.join(directory, name)) - and os.path.isfile(os.path.join(directory, name, "type_map.raw")) - ] - level_2_dir = [ - os.path.join(directory, name1, name2) - for name1 in os.listdir(directory) - for name2 in os.listdir(os.path.join(directory, name1)) - if os.path.isdir(os.path.join(directory, name1)) - and os.path.isdir(os.path.join(directory, name1, name2)) - and os.path.isfile( - os.path.join(directory, name1, name2, "type_map.raw") - ) - ] - return level_1_dir + level_2_dir + sys_dir = [] + for root, dirs, files in os.walk(directory): + if ( + "type_map.raw" in files + ): # mixed_type format systems must have type_map.raw + sys_dir.append(root) + return sys_dir MultiMode = Format.MultiModes.Directory diff --git a/dpdata/system.py b/dpdata/system.py index 887aba15..802b352c 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -1307,15 +1307,13 @@ def from_fmt_obj(self, fmtobj, directory, labeled=True, **kwargs): if labeled: data_list = fmtobj.from_labeled_system_mix(dd, **kwargs) for data_item in data_list: - system_list.append(LabeledSystem(data=data_item)) + system_list.append(LabeledSystem(data=data_item, **kwargs)) else: data_list = fmtobj.from_system_mix(dd, **kwargs) for data_item in data_list: - system_list.append(System(data=data_item)) - return self.__class__( - *system_list, - type_map=kwargs["type_map"] if "type_map" in kwargs else None, - ) + system_list.append(System(data=data_item, **kwargs)) + self.append(*system_list) + return self def to_fmt_obj(self, fmtobj, directory, *args, **kwargs): if not isinstance(fmtobj, dpdata.plugins.deepmd.DeePMDMixedFormat): diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py index 9e6ee9dd..19c46e72 100644 --- a/tests/test_deepmd_mixed.py +++ b/tests/test_deepmd_mixed.py @@ -1,6 +1,7 @@ import os import shutil import unittest +from glob import glob from itertools import permutations import numpy as np @@ -8,7 +9,9 @@ from context import dpdata -class TestMixedMultiSystems(unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC): +class TestMixedMultiSystemsDumpLoad( + unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC +): def setUp(self): self.places = 6 self.e_places = 6 @@ -54,17 +57,18 @@ def setUp(self): system_1_modified_type_3, ) self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed") - self.place_holder_ms = dpdata.MultiSystems().load_systems_from_file( - "tmp.deepmd.mixed/5", fmt="deepmd/npy" - ) - self.place_holder_ms += dpdata.MultiSystems().load_systems_from_file( - "tmp.deepmd.mixed/4", fmt="deepmd/npy" - ) - self.systems = dpdata.MultiSystems().load_systems_from_file( - "tmp.deepmd.mixed", fmt="deepmd/npy/mixed" - ) + self.place_holder_ms = dpdata.MultiSystems() + self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") + self.systems = dpdata.MultiSystems() + self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") self.system_1 = self.ms["C1H4A0B0D0"] self.system_2 = self.systems["C1H4A0B0D0"] + mixed_sets = glob("tmp.deepmd.mixed/*/set.*") + self.assertEqual(len(mixed_sets), 2) + for i in mixed_sets: + self.assertEqual( + os.path.exists(os.path.join(i, "real_atom_types.npy")), True + ) self.system_names = [ "C1H4A0B0D0", @@ -106,5 +110,107 @@ def test_str(self): ) +class TestMixedMultiSystemsTypeChange( + unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC +): + def setUp(self): + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + # C1H4 + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + + # C1H3 + system_2 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1B2 + system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 2, 1] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3]) + # C1H1A2B1 + system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "D"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1C2 + system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data) + + self.ms = dpdata.MultiSystems( + system_1, + system_2, + system_1_modified_type_1, + system_1_modified_type_2, + system_1_modified_type_3, + type_map=["TOKEN"], + ) + self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed") + self.place_holder_ms = dpdata.MultiSystems() + self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") + self.systems = dpdata.MultiSystems(type_map=["TOKEN"]) + self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") + self.system_1 = self.ms["TOKEN0C1H4A0B0D0"] + self.system_2 = self.systems["TOKEN0C1H4A0B0D0"] + mixed_sets = glob("tmp.deepmd.mixed/*/set.*") + self.assertEqual(len(mixed_sets), 2) + for i in mixed_sets: + self.assertEqual( + os.path.exists(os.path.join(i, "real_atom_types.npy")), True + ) + + self.system_names = [ + "TOKEN0C1H4A0B0D0", + "TOKEN0C1H3A0B0D0", + "TOKEN0C1H1A1B2D0", + "TOKEN0C1H1A2B1D0", + "TOKEN0C1H1A1B0D2", + ] + self.system_sizes = { + "TOKEN0C1H4A0B0D0": 1, + "TOKEN0C1H3A0B0D0": 1, + "TOKEN0C1H1A1B2D0": 1, + "TOKEN0C1H1A2B1D0": 1, + "TOKEN0C1H1A1B0D2": 1, + } + self.atom_names = ["C", "H", "A", "B", "D"] + + def tearDown(self): + if os.path.exists("tmp.deepmd.mixed"): + shutil.rmtree("tmp.deepmd.mixed") + + def test_len(self): + self.assertEqual(len(self.ms), 5) + self.assertEqual(len(self.place_holder_ms), 2) + self.assertEqual(len(self.systems), 5) + + def test_get_nframes(self): + self.assertEqual(self.ms.get_nframes(), 5) + self.assertEqual(self.place_holder_ms.get_nframes(), 5) + self.assertEqual(self.systems.get_nframes(), 5) + + def test_str(self): + self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)") + self.assertEqual( + str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)" + ) + self.assertEqual( + str(self.systems), "MultiSystems (5 systems containing 5 frames)" + ) + + if __name__ == "__main__": unittest.main()