From 2968cdf3923cad857e80d049504e9825841d521e Mon Sep 17 00:00:00 2001 From: Yongbin Zhuang <38876805+robinzyb@users.noreply.github.com> Date: Fri, 29 Dec 2023 11:40:09 +0100 Subject: [PATCH] add precommit config and fix problem (#35) --- .github/workflows/doc.yml | 4 +- .pre-commit-config.yaml | 11 ++ cp2kdata/block_parser/atomic_kind.py | 2 +- cp2kdata/block_parser/cells.py | 20 +-- cp2kdata/block_parser/converge.py | 4 +- cp2kdata/block_parser/dipole.py | 14 +-- cp2kdata/block_parser/errors_handle.py | 2 +- cp2kdata/block_parser/header_info.py | 22 ++-- cp2kdata/block_parser/md_xyz.py | 10 +- cp2kdata/block_parser/mulliken.py | 4 +- cp2kdata/block_parser/stress.py | 16 +-- cp2kdata/cell.py | 31 +++-- cp2kdata/cli/cmd.py | 162 ++++++++++++------------- cp2kdata/cube/cube.py | 36 +++--- cp2kdata/dpdata_plugin.py | 30 ++--- cp2kdata/output.py | 44 +++---- cp2kdata/plots/fep_plot.py | 4 +- cp2kdata/plots/test_plot.py | 44 +++---- cp2kdata/test_input.py | 34 +++--- cp2kdata/utils.py | 8 +- docs/backlog.md | 4 +- docs/input_test.md | 4 +- docs/output.md | 10 +- docs/pdos/README.md | 2 +- pyproject.toml | 4 +- pytest.ini | 2 +- 26 files changed, 269 insertions(+), 259 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index bf21ed7..96da54f 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -18,13 +18,13 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.8 - + - name: Install dependencies run: | pip install . pip install sphinx pip install jupyter-book - + - name: Sphinx APIDoc run: | sphinx-apidoc -f -o jupyter-book/_api/ cp2kdata/ --separate diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..0406c24 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + exclude: "^tests/.*$" +# - id: end-of-file-fixer + - id: check-yaml +# - id: check-added-large-files \ No newline at end of file diff --git a/cp2kdata/block_parser/atomic_kind.py b/cp2kdata/block_parser/atomic_kind.py index 657fba2..0dc3f89 100644 --- a/cp2kdata/block_parser/atomic_kind.py +++ b/cp2kdata/block_parser/atomic_kind.py @@ -28,7 +28,7 @@ def parse_atomic_kinds(output_file): for match in ATOMIC_KINDS_RE.finditer(output_file): atomic_kinds.append(match["atomic_kind"]) if atomic_kinds: - # only return the last atomic kinds + # only return the last atomic kinds return np.array(atomic_kinds[-num_atomic_kinds_list[-1]:], dtype=str) else: return None \ No newline at end of file diff --git a/cp2kdata/block_parser/cells.py b/cp2kdata/block_parser/cells.py index d46ea03..06d2482 100644 --- a/cp2kdata/block_parser/cells.py +++ b/cp2kdata/block_parser/cells.py @@ -9,20 +9,20 @@ ALL_CELL_RE = re.compile( r""" \s+CELL\|\sVector\sa\s\[angstrom\]: - \s+(?P[\s-]\d+\.\d+) - \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) \s+(?P[\s-]\d+\.\d+) \s+\|a\|\s+=\s+\S+ \n \s+CELL\|\sVector\sb\s\[angstrom\]: - \s+(?P[\s-]\d+\.\d+) - \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) \s+(?P[\s-]\d+\.\d+) \s+\|b\|\s+=\s+\S+ \n \s+CELL\|\sVector\sc\s\[angstrom\]: - \s+(?P[\s-]\d+\.\d+) - \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) \s+(?P[\s-]\d+\.\d+) \s+\|c\|\s+=\s+\S+ \n @@ -45,7 +45,7 @@ def parse_all_cells(output_file): return np.array(all_cells, dtype=float) else: return None - + ALL_MD_CELL_RE_V7 = re.compile( r""" \sCELL\sLNTHS\[bohr\]\s{13}=\s @@ -75,7 +75,7 @@ def parse_all_cells(output_file): #skip three lines (.{80}\n){3} #parse angles - (\sMD\|\sCell\sangles\s\[deg\]\s{10} + (\sMD\|\sCell\sangles\s\[deg\]\s{10} \s{2}(?P\d\.\d{8}E(\+|\-)\d{2}) \s{2}(?P\d\.\d{8}E(\+|\-)\d{2}) \s{2}(?P\d\.\d{8}E(\+|\-)\d{2}))? @@ -83,14 +83,14 @@ def parse_all_cells(output_file): re.VERBOSE ) -def parse_all_md_cells(output_file: List[str], +def parse_all_md_cells(output_file: List[str], cp2k_info: Cp2kInfo, init_cell_info=None): # init_cell_info are used for npt_I parse. # because npt_I doesn't include angle info in MD| block # notice that the cell of step 0 is excluded from MD| block - + # choose parser according to cp2k_info.version if cp2k_info.version in ['2023.1']: ALL_MD_CELL_RE = ALL_MD_CELL_RE_V2023 diff --git a/cp2kdata/block_parser/converge.py b/cp2kdata/block_parser/converge.py index 67798b7..bccf678 100644 --- a/cp2kdata/block_parser/converge.py +++ b/cp2kdata/block_parser/converge.py @@ -17,7 +17,7 @@ def parse_e_f_converge(filename) -> ConvergeInfo: info_dict = regrep( filename=filename, reverse=True, - patterns={"converge": CONVERGE_PATTERN}, + patterns={"converge": CONVERGE_PATTERN}, terminate_on_match=True ) @@ -32,7 +32,7 @@ def parse_md_converge(filename): info_dict = regrep( filename=filename, reverse=True, - patterns={"converge": CONVERGE_PATTERN}, + patterns={"converge": CONVERGE_PATTERN}, terminate_on_match=False ) #print(info_dict['converge']) diff --git a/cp2kdata/block_parser/dipole.py b/cp2kdata/block_parser/dipole.py index 769c86d..3782460 100644 --- a/cp2kdata/block_parser/dipole.py +++ b/cp2kdata/block_parser/dipole.py @@ -28,13 +28,13 @@ def parse_dipole_list(output_file): """ Reference Point [Bohr] 0.00000000 0.00000000 0.00000000 - Charges + Charges Electronic= 864.00000000 Core= -864.00000000 Total= 0.00000000 - Dipole vectors are based on the periodic (Berry phase) operator. - They are defined modulo integer multiples of the cell matrix [Debye]. - [X] [ 46.55265580 0.00000000 0.00000000 ] [i] - [Y]=[ 0.00000000 54.46353324 0.00000000 ]*[j] - [Z] [ 0.00000000 0.00000000 54.47313965 ] [k] - Dipole moment [Debye] + Dipole vectors are based on the periodic (Berry phase) operator. + They are defined modulo integer multiples of the cell matrix [Debye]. + [X] [ 46.55265580 0.00000000 0.00000000 ] [i] + [Y]=[ 0.00000000 54.46353324 0.00000000 ]*[j] + [Z] [ 0.00000000 0.00000000 54.47313965 ] [k] + Dipole moment [Debye] X= -0.07183634 Y= -0.07690441 Z= 1.13302571 Total= 1.13790246 """ \ No newline at end of file diff --git a/cp2kdata/block_parser/errors_handle.py b/cp2kdata/block_parser/errors_handle.py index 6538f0b..eb3cea7 100644 --- a/cp2kdata/block_parser/errors_handle.py +++ b/cp2kdata/block_parser/errors_handle.py @@ -10,7 +10,7 @@ def parse_errors(output_file): errors_info = {} - + for match in EXCEED_WALL_TIME_RE.finditer(output_file): #print(match) if match: diff --git a/cp2kdata/block_parser/header_info.py b/cp2kdata/block_parser/header_info.py index 6e35f66..550849f 100644 --- a/cp2kdata/block_parser/header_info.py +++ b/cp2kdata/block_parser/header_info.py @@ -17,8 +17,8 @@ class Cp2kInfo: def parse_cp2k_info(filename) -> Cp2kInfo: cp2k_info = regrep( - filename=filename, - patterns={"version": CP2K_INFO_VERSION_PATTERN}, + filename=filename, + patterns={"version": CP2K_INFO_VERSION_PATTERN}, terminate_on_match=True ) @@ -42,10 +42,10 @@ class GlobalInfo: def parse_global_info(filename) -> GlobalInfo: global_info = {} - + global_info = regrep( - filename=filename, - patterns={"run_type": GLOBAL_INFO_RUN_TYPE_PATTERN, + filename=filename, + patterns={"run_type": GLOBAL_INFO_RUN_TYPE_PATTERN, "print_level": GLOBAL_INFO_PRINT_LEVEL_PATTERN }, terminate_on_match=True @@ -78,16 +78,16 @@ class DFTInfo: def parse_dft_info(filename) -> DFTInfo: dft_info = {} - + dft_info = regrep( - filename=filename, + filename=filename, patterns={ "ks_type": DFT_INFO_KS_TYPE_PATTERN, "multiplicity": DFT_INFO_MULTIPLICITY_PATTERN }, terminate_on_match=True ) - + if dft_info: return DFTInfo(ks_type=dft_info["ks_type"][0][0][0], multiplicity=dft_info["multiplicity"][0][0][0]) else: @@ -108,12 +108,12 @@ class MDInfo: def parse_md_info(filename): md_info = {} - + md_info = regrep( - filename=filename, + filename=filename, patterns={ "ensemble_type": MD_INFO_ENSEMBLE_TYPE_PATTERN - }, + }, terminate_on_match=True ) diff --git a/cp2kdata/block_parser/md_xyz.py b/cp2kdata/block_parser/md_xyz.py index d63a45b..d037b28 100644 --- a/cp2kdata/block_parser/md_xyz.py +++ b/cp2kdata/block_parser/md_xyz.py @@ -81,12 +81,12 @@ def parse_pos_xyz_from_wannier(wannier_xyz_fiel): def parse_md_stress(stress_file): print(f"Parsing Stresses from {stress_file}") stresses_list = np.loadtxt( - stress_file, - usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10), + stress_file, + usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10), ndmin=2, dtype=np.float64 ) - + numb_frames = stresses_list.shape[0] return stresses_list.reshape(numb_frames, 3, 3) @@ -94,8 +94,8 @@ def parse_md_stress(stress_file): def parse_md_cell(cell_file): print(f"Parsing Cells from {cell_file}") cells_list = np.loadtxt( - cell_file, - usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10), + cell_file, + usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10), ndmin=2, dtype=np.float64 ) diff --git a/cp2kdata/block_parser/mulliken.py b/cp2kdata/block_parser/mulliken.py index 2df808e..dce87aa 100644 --- a/cp2kdata/block_parser/mulliken.py +++ b/cp2kdata/block_parser/mulliken.py @@ -30,7 +30,7 @@ \s+(?P\d+) \s+(?P[\s-]\d+\.\d+) \s+(?P[\s-]\d+\.\d+) - + )+ """, re.VERBOSE @@ -67,7 +67,7 @@ def parse_mulliken_pop_list(output_file, DFTInfo): "net_charge": float(net_charge), } ) - mulliken_pop_list.append(mulliken_pop) + mulliken_pop_list.append(mulliken_pop) if mulliken_pop_list: return mulliken_pop_list diff --git a/cp2kdata/block_parser/stress.py b/cp2kdata/block_parser/stress.py index 252a5a3..9bee5bf 100644 --- a/cp2kdata/block_parser/stress.py +++ b/cp2kdata/block_parser/stress.py @@ -6,17 +6,17 @@ (\sSTRESS\sTENSOR\s\[GPa\] \n \s+X\s+Y\s+Z\s*\n - \s+X - \s+(?P[\s-]\d+\.\d+) - \s+(?P[\s-]\d+\.\d+) + \s+X + \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) \s+(?P[\s-]\d+\.\d+)\n \s+Y - \s+(?P[\s-]\d+\.\d+) - \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) \s+(?P[\s-]\d+\.\d+)\n - \s+Z - \s+(?P[\s-]\d+\.\d+) - \s+(?P[\s-]\d+\.\d+) + \s+Z + \s+(?P[\s-]\d+\.\d+) + \s+(?P[\s-]\d+\.\d+) \s+(?P[\s-]\d+\.\d+)\n |# or another pattern used in v8.1 \s+STRESS\|\sAnalytical\sstress\stensor\s\[GPa\]\s*\n diff --git a/cp2kdata/cell.py b/cp2kdata/cell.py index d10b94e..fa27da2 100644 --- a/cp2kdata/cell.py +++ b/cp2kdata/cell.py @@ -7,9 +7,9 @@ class Cp2kCell: def __init__( - self, - cell_param: npt.NDArray[np.float64], - grid_point: npt.NDArray[np.int_] = None, + self, + cell_param: npt.NDArray[np.float64], + grid_point: npt.NDArray[np.int_] = None, grid_spacing_matrix: npt.NDArray[np.float64] = None ): """ @@ -32,8 +32,8 @@ def __init__( if isinstance(cell_param, float): self.cell_matrix = np.array( [ - [cell_param, 0, 0], - [0, cell_param, 0], + [cell_param, 0, 0], + [0, cell_param, 0], [0, 0, cell_param] ] ) @@ -41,12 +41,12 @@ def __init__( elif cell_param.shape == (3,): self.cell_matrix = np.array( [ - [cell_param[0], 0, 0], - [0, cell_param[1], 0], + [cell_param[0], 0, 0], + [0, cell_param[1], 0], [0, 0, cell_param[2]] ] ) - print("the length of input cell_param is 3, " + print("the length of input cell_param is 3, " "the cell is assumed to be orthorhombic") elif cell_param.shape == (6,): self.cell_matrix = cellpar_to_cell(cell_param) @@ -55,12 +55,12 @@ def __init__( "which will be converted to cell matrix") elif cell_param.shape == (3, 3): self.cell_matrix = cell_param - print("input cell_param is a matrix with shape of (3,3), " + print("input cell_param is a matrix with shape of (3,3), " "the cell is read as is") else: raise ValueError("The input cell_param is not supported") - - + + if (grid_point is None) and (grid_spacing_matrix is None): self.grid_point = None self.grid_spacing_matrix = None @@ -74,7 +74,7 @@ def __init__( elif (grid_point is not None) and (grid_spacing_matrix is not None): self.grid_point = np.array(grid_point) self.grid_spacing_matrix = np.array(grid_spacing_matrix) - + if grid_point is not None: self.grid_point = self.grid_point.astype(int) @@ -83,7 +83,7 @@ def __init__( def copy(self): return deepcopy(self) - + def get_volume(self): return np.linalg.det(self.cell_matrix) @@ -92,14 +92,13 @@ def get_dv(self): return np.linalg.det(self.grid_spacing_matrix) except LinAlgError as ae: print("No grid point information is available") - + def get_cell_param(self): return self.cell_param def get_cell_angles(self): return self.cell_param[3:] - + def get_cell_lengths(self): return self.cell_param[:3] - \ No newline at end of file diff --git a/cp2kdata/cli/cmd.py b/cp2kdata/cli/cmd.py index 40e1c0d..131c3a9 100644 --- a/cp2kdata/cli/cmd.py +++ b/cp2kdata/cli/cmd.py @@ -24,32 +24,32 @@ def cube(): click.echo('Manipulate Cube Files') cli.add_command(cube) -#-- for gen test --# +#-- for gen test --# #-- Cutoff --# @click.command() @click.option( - '--target_dir', - type=str, - default=".", + '--target_dir', + type=str, + default=".", help='test directories are generated under target directory' ) @click.option( - '--cutoff_range', - '-crange', - type=(int, int, int), - default=(300, 601, 50), + '--cutoff_range', + '-crange', + type=(int, int, int), + default=(300, 601, 50), help='cutoff range, min, max, stepsize' ) @click.option( - '--scf_converge', - type=bool, + '--scf_converge', + type=bool, default=False, help="whether converge scf" ) @click.argument('cp2k_input_file', type=str, nargs=1) @click.argument( - 'other_file_list', - type=str, + 'other_file_list', + type=str, nargs=-1, default=None ) @@ -57,8 +57,8 @@ def cutoff(cp2k_input_file, target_dir, cutoff_range, other_file_list, scf_conve inp = get_CP2K(cp2k_input_file) #click.echo(other_file_list) write_cutoff_test_inp( - inp, - target_dir=target_dir, + inp, + target_dir=target_dir, cutoff_range=cutoff_range, other_file_list=other_file_list, scf_converge=scf_converge) @@ -70,29 +70,29 @@ def cutoff(cp2k_input_file, target_dir, cutoff_range, other_file_list, scf_conve #-- Basis --# @click.command() @click.option( - '--target_dir', - type=str, - default=".", + '--target_dir', + type=str, + default=".", help='test directories are generated under target directory' ) @click.option( - '--test_element', - '-e', + '--test_element', + '-e', type=str, - default="O", + default="O", help='test element for basis set' ) @click.option( '--short_range', - '-sr', - type=bool, + '-sr', + type=bool, default=True, help="whether use short range basis set" ) @click.argument('cp2k_input_file', type=str, nargs=1) @click.argument( - 'other_file_list', - type=str, + 'other_file_list', + type=str, nargs=-1, default=None ) @@ -100,8 +100,8 @@ def basis(cp2k_input_file, target_dir, test_element, other_file_list, short_rang inp = get_CP2K(cp2k_input_file) #click.echo(other_file_list) write_basis_test_inp( - inp, - target_dir=target_dir, + inp, + target_dir=target_dir, test_element=test_element, other_file_list=other_file_list, short_range=short_range) @@ -113,36 +113,36 @@ def basis(cp2k_input_file, target_dir, test_element, other_file_list, short_rang #-- U --# @click.command() @click.option( - '--target_dir', - type=str, - default=".", + '--target_dir', + type=str, + default=".", help='test directories are generated under target directory' ) @click.option( - '--u_range', - '-ur', - type=(float, float, float), - default=(0, 8, 1), + '--u_range', + '-ur', + type=(float, float, float), + default=(0, 8, 1), help='Hubbard U range: min, max, stepsize' ) @click.option( - '--test_element', - '-e', + '--test_element', + '-e', type=str, - default="O", + default="O", help='test element for Hubbard U test' ) @click.option( - '--test_orbital', - '-orb', + '--test_orbital', + '-orb', type=str, - default="p", + default="p", help='test orbital for Hubbard U test' ) @click.argument('cp2k_input_file', type=str, nargs=1) @click.argument( - 'other_file_list', - type=str, + 'other_file_list', + type=str, nargs=-1, default=None ) @@ -150,8 +150,8 @@ def hubbardU(cp2k_input_file, target_dir, u_range, test_element, test_orbital, o inp = get_CP2K(cp2k_input_file) #click.echo(other_file_list) write_hubbard_U_test_inp( - inp, - target_dir=target_dir, + inp, + target_dir=target_dir, u_range=u_range, test_element=test_element, test_orbital=test_orbital, @@ -161,14 +161,14 @@ def hubbardU(cp2k_input_file, target_dir, u_range, test_element, test_orbital, o gen.add_command(hubbardU) #-- U --# -#-- for plot -- # +#-- for plot -- # #--cutoff --# @click.command() @click.option( - '--target_dir', - type=str, - default=".", + '--target_dir', + type=str, + default=".", help='plot ther results under target directory' ) def cutoff(target_dir): @@ -180,27 +180,27 @@ def cutoff(target_dir): #--basis --# @click.command() @click.option( - '--target_dir', - type=str, - default=".", + '--target_dir', + type=str, + default=".", help='plot ther results under target directory' ) def basis(target_dir): #click.echo(other_file_list) plot_basis_test(target_dir=target_dir) - + plot.add_command(basis) #--U --# @click.command() @click.option( - '--target_dir', - type=str, - default=".", + '--target_dir', + type=str, + default=".", help='plot ther results under target directory' ) @click.option( - '--exp_yaml', + '--exp_yaml', type=str, default="No", help='experimental values' @@ -219,8 +219,8 @@ def hubbardU(target_dir, exp_yaml): @click.option( '--fig_name', '-fn', - type=str, - default="pKa.pdf", + type=str, + default="pKa.pdf", help='name of fep figure' ) def ti(fig_name): @@ -232,51 +232,51 @@ def ti(fig_name): # -- for cube -- # @click.command() @click.option( - '--cube_file', - type=str, - default=".", + '--cube_file', + type=str, + default=".", help='cube file' ) @click.option( - '--axis', - type=str, - default="z", + '--axis', + type=str, + default="z", help='axis' ) @click.option( - '--mav', - type=bool, - default=False, + '--mav', + type=bool, + default=False, help='switch on macro average or not' ) @click.option( - '--l1', - type=float, - default=1, + '--l1', + type=float, + default=1, help='l1' ) @click.option( - '--l2', - type=float, - default=1, + '--l2', + type=float, + default=1, help='l2' ) @click.option( - '--ncov', - type=int, - default=1, + '--ncov', + type=int, + default=1, help='ncov' ) @click.option( - '--unit', - type=str, - default="eV", + '--unit', + type=str, + default="eV", help='unit' ) @click.option( - '--width', - type=int, - default=135, + '--width', + type=int, + default=135, help='width' ) def view(cube_file, axis, mav, l1, l2, ncov, unit, width): diff --git a/cp2kdata/cube/cube.py b/cp2kdata/cube/cube.py index 71def03..2aa78d6 100644 --- a/cp2kdata/cube/cube.py +++ b/cp2kdata/cube/cube.py @@ -67,7 +67,7 @@ def get_stc(self): stc_vals = file_content(self.file, (6+i, 6+i+1)) stc_vals = stc_vals.split() atom = Atom( - symbol=int(stc_vals[0]), + symbol=int(stc_vals[0]), position=(float(stc_vals[2])*au2A, float(stc_vals[3])*au2A, float(stc_vals[4])*au2A) ) atom_list.append(atom) @@ -75,7 +75,7 @@ def get_stc(self): stc = Atoms(atom_list) stc.set_cell([self.cell_x, self.cell_y, self.cell_z]) return stc - + def read_cube_vals(self): # read the cube value from file cube_vals = file_content(self.file, (6+self.num_atoms,)) @@ -112,7 +112,7 @@ def get_pav(self, axis="z", interpolate=False): return new_points, new_vals else: return points, vals - + def get_mav(self, l1, l2=0, ncov=1, interpolate=False): axis="z" pav_x, pav = self.get_pav(axis=axis, interpolate=interpolate) @@ -162,8 +162,8 @@ def __init__(self, fname=None, cube_vals=None, cell=None, stc=None): self.stc = stc if cube_vals is None: - self.cube_vals = self.read_cube_vals(self.file, - self.num_atoms, + self.cube_vals = self.read_cube_vals(self.file, + self.num_atoms, self.cell.grid_point ) else: @@ -180,7 +180,7 @@ def num_atoms(self): line = file_content(self.file, 2) num_atoms = int(line.split()[0]) return num_atoms - + def as_dict(self): """Returns data dict of Cp2kCube instance.""" data_dict = { @@ -202,7 +202,7 @@ def __add__(self, others): else: raise RuntimeError("Unspported Class") return other_copy - + def __sub__(self, others): """magic method for subtracting two Cp2kCube instances""" self_copy = self.copy() @@ -212,14 +212,14 @@ def __sub__(self, others): else: raise RuntimeError("Unspported Class") return other_copy - + def get_stc(self): atom_list = [] for i in range(self.num_atoms): stc_vals = file_content(self.file, (6+i, 6+i+1)) stc_vals = stc_vals.split() atom = Atom( - symbol=int(stc_vals[0]), + symbol=int(stc_vals[0]), position=(float(stc_vals[2])*au2A, float(stc_vals[3])*au2A, float(stc_vals[4])*au2A) ) atom_list.append(atom) @@ -227,7 +227,7 @@ def get_stc(self): stc = Atoms(atom_list) stc.set_cell(self.cell.cell_matrix*au2A) return stc - + def copy(self): return deepcopy(self) @@ -237,7 +237,7 @@ def get_pav(self, axis='z', interpolate=False): np.array([90.0, 90.0, 90.0]), err_msg="The cell is not orthorhombic, the pav can not be used!" ) - + # do the planar average along specific axis lengths = self.cell.get_cell_lengths() grid_point = self.cell.grid_point @@ -267,7 +267,7 @@ def get_pav(self, axis='z', interpolate=False): return new_points, new_vals else: return points, vals - + def get_mav(self, l1, l2=0, ncov=1, interpolate=False, axis="z"): cell_length = { "x": self.cell.get_cell_lengths()[0], @@ -287,7 +287,7 @@ def get_mav(self, l1, l2=0, ncov=1, interpolate=False, axis="z"): return pav_x, np.real(mav) def quick_plot(self, axis="z", interpolate=False): - + x, y = self.get_pav(axis=axis, interpolate=interpolate) plt.style.use('cp2kdata.matplotlibstyle.jcp') row = 1 @@ -306,7 +306,7 @@ def view_cube_acsii(self, axis='z', mav=False, l1=None, l2=None, ncov=1, unit='a x, y = self.get_mav(l1, l2, ncov, axis=axis) else: x, y = self.get_pav(axis=axis) - + if unit == 'au': pass elif unit == 'eV': @@ -315,7 +315,7 @@ def view_cube_acsii(self, axis='z', mav=False, l1=None, l2=None, ncov=1, unit='a print("not such unit, the available options are 'au' and 'eV'") step = int(len(y)/width) print(acp.plot(y[::step], {'height': 20})) - + def write_cube(self, fname, comments='#'): grid_point = self.cell.grid_point gs_matrix = self.cell.grid_spacing_matrix @@ -343,7 +343,7 @@ def write_cube(self, fname, comments='#'): fw.write(f'{self.cube_vals[i,j,k]:13.5E}') if (k+1)%6 == 0: fw.write('\n') - # write a blank line after each z value + # write a blank line after each z value if grid_point[2]%6 != 0: fw.write('\n') @@ -351,7 +351,7 @@ def get_integration(self): dv = self.cell.get_dv() result = np.sum(self.cube_vals)*dv return result - + def get_cell(self): return self.cell.copy() @@ -404,7 +404,7 @@ def read_grid_point(fname): num_y = int(content_list[4]) num_z = int(content_list[8]) return np.array([num_x, num_y, num_z]) - + @staticmethod def read_cube_vals(fname, num_atoms, grid_point): # read the cube value from file diff --git a/cp2kdata/dpdata_plugin.py b/cp2kdata/dpdata_plugin.py index 5bfe054..284bba1 100644 --- a/cp2kdata/dpdata_plugin.py +++ b/cp2kdata/dpdata_plugin.py @@ -37,14 +37,14 @@ def from_labeled_system(self, file_name, **kwargs): return data cp2k_e_f = Cp2kOutput(file_name) - + chemical_symbols = get_chemical_symbols_from_cp2kdata( - cp2koutput=cp2k_e_f, + cp2koutput=cp2k_e_f, true_symbols=true_symbols - ) + ) # -- data dict collects information, and return to dpdata -- - + data = {} data['atom_names'], data['atom_numbs'], data["atom_types"] = get_uniq_atom_names_and_types(chemical_symbols=chemical_symbols) # atom_numbs not total num of atoms! @@ -69,15 +69,15 @@ def from_labeled_system(self, file_name, **kwargs): cells = kwargs.get('cells', None) cp2k_output_name = kwargs.get('cp2k_output_name', None) - # -- start parsing -- + # -- start parsing -- print(WRAPPER) cp2kmd = Cp2kOutput(output_file=cp2k_output_name, run_type="MD", path_prefix=path_prefix) - + num_frames = cp2kmd.get_num_frames() chemical_symbols = get_chemical_symbols_from_cp2kdata( - cp2koutput=cp2kmd, + cp2koutput=cp2kmd, true_symbols=true_symbols ) @@ -98,9 +98,9 @@ def from_labeled_system(self, file_name, **kwargs): else: print("Illegal Cell Information, cells shape should be (num_frames, 3, 3) or (3, 3)") else: - print("Illegal Cell Information, cp2kdata accepts np.ndarray as cells information") + print("Illegal Cell Information, cp2kdata accepts np.ndarray as cells information") + - # -- data dict collects information, and return to dpdata -- data = {} data['atom_names'], data['atom_numbs'], data["atom_types"] = get_uniq_atom_names_and_types(chemical_symbols=chemical_symbols) @@ -122,7 +122,7 @@ def get_chemical_symbols_from_cp2kdata(cp2koutput, true_symbols): elif true_symbols: print("You have manually true_symbols=True, atom names are true chemical symbols.") chemical_symbols = cp2koutput.get_chemical_symbols() - else: + else: print("Atom names are fake chemical symbols as you set in cp2k input.") chemical_symbols = cp2koutput.get_chemical_symbols_fake() chemical_symbols = np.array(chemical_symbols) @@ -157,15 +157,15 @@ def from_labeled_system(self, file_name, **kwargs): cells = kwargs.get('cells', None) cp2k_output_name = kwargs.get('cp2k_output_name', None) - # -- start parsing -- + # -- start parsing -- print(WRAPPER) cp2kmd = Cp2kOutput(output_file=cp2k_output_name, run_type="MD", path_prefix=path_prefix) - + num_frames = cp2kmd.get_num_frames() chemical_symbols = get_chemical_symbols_from_cp2kdata( - cp2koutput=cp2kmd, + cp2koutput=cp2kmd, true_symbols=true_symbols ) @@ -186,9 +186,9 @@ def from_labeled_system(self, file_name, **kwargs): else: print("Illegal Cell Information, cells shape should be (num_frames, 3, 3) or (3, 3)") else: - print("Illegal Cell Information, cp2kdata accepts np.ndarray as cells information") + print("Illegal Cell Information, cp2kdata accepts np.ndarray as cells information") + - # -- data dict collects information, and return to dpdata -- data = {} data['atom_names'], data['atom_numbs'], data["atom_types"] = get_uniq_atom_names_and_types(chemical_symbols=chemical_symbols) diff --git a/cp2kdata/output.py b/cp2kdata/output.py index 3e51ca9..cf173e9 100644 --- a/cp2kdata/output.py +++ b/cp2kdata/output.py @@ -38,7 +38,7 @@ def __init__(self, output_file=None, run_type: str=None, path_prefix=".", **kwar raise FileNotFoundError(f'cp2k output file {output_file} is not found') try: - self.global_info = self.get_global_info(run_type=run_type, + self.global_info = self.get_global_info(run_type=run_type, filename=self.filename ) except ValueError as err: @@ -47,7 +47,7 @@ def __init__(self, output_file=None, run_type: str=None, path_prefix=".", **kwar "Cannot Obtain CP2K RUN_TYPE information.\n" "Please check if you have provided an existing cp2k output file.\n" - "If not, you can manually set RUN_TYPE through run_type argument\n" + "If not, you can manually set RUN_TYPE through run_type argument\n" "for md calculation.\n" "Example:\n" "Cp2kOutput(run_type='MD')\n" @@ -65,14 +65,14 @@ def __init__(self, output_file=None, run_type: str=None, path_prefix=".", **kwar if self.global_info.print_level == 'LOW': raise ValueError("please provide cp2k output file with MEDIUM print level. Print Level Low doesn't provide necessary information for initialize the cp2kdata class.") - # -- set some basic attribute -- + # -- set some basic attribute -- self.num_frames = None self.init_atomic_coordinates = None self.atomic_kind = None self.atom_kind_list = None # -- start parse necessary information -- - # sometimes I use self.filename and sometimes I use self.output_file + # sometimes I use self.filename and sometimes I use self.output_file # self.filename is used for parsing information by monty package. if self.filename: with open(self.filename, 'r') as fp: @@ -81,7 +81,7 @@ def __init__(self, output_file=None, run_type: str=None, path_prefix=".", **kwar self.dft_info = parse_dft_info(self.filename) else: self.cp2k_info = Cp2kInfo(version="Unknown") - + self.check_run_type(run_type=self.global_info.run_type) run_type_parser_candidates = { @@ -267,9 +267,9 @@ def get_spin_moment_mulliken_list(self): spin_moment_mulliken = np.array([mulliken_atom['spin_moment'] for mulliken_atom in mulliken_pop], dtype=float) spin_moment_mulliken_list.append(spin_moment_mulliken) spin_moment_mulliken_list = np.array(spin_moment_mulliken_list) - + return spin_moment_mulliken_list - + def get_spin_moment_list(self, type='mulliken'): if type == 'mulliken': return self.get_spin_moment_mulliken_list() @@ -309,7 +309,7 @@ def parse_energy_force(self): "atomic_kinds": parse_atomic_kinds, "cells": parse_all_cells } - #TODO: convert kwargs to flexible attribute! + #TODO: convert kwargs to flexible attribute! self.geo_opt_info = None self.num_frames = 1 self.init_atomic_coordinates, self.atom_kind_list, self.chemical_symbols = parse_init_atomic_coordinates( @@ -351,7 +351,7 @@ def parse_cell_opt(self): def parse_md(self): self.md_info = parse_md_info(self.filename) self.check_md_type(md_type=self.md_info.ensemble_type) - + ener_file_list = glob.glob(os.path.join(self.path_prefix, "*.ener")) if ener_file_list: self.energies_list = parse_md_ener(ener_file_list[0]) @@ -386,13 +386,13 @@ def parse_md(self): ( "\n" "cp2kdata is parsing md cell information from output file.\n" - "The raw data of cell information are lengths and angles,\n" + "The raw data of cell information are lengths and angles,\n" "which are latter transformed to cell matrices by codes.\n" - "However, the a axis of the cell are always assumed to be aligned to " - "the x axis of the coordinate.\n" + "However, the a axis of the cell are always assumed to be aligned to " + "the x axis of the coordinate.\n" "Make sure the a axis in real cell matrices are always aligned to x axis.\n" "Otherwise, parsing cell information from `-1.cell` file is recommended.\n" - + "CP2K input setting\n" "------------------\n" "&MOTION\n" @@ -406,7 +406,7 @@ def parse_md(self): "&END MOTION\n" "------------------\n" ) - + WARNING_MSG = "cp2kdata obtains more than one initial cell from the output file, \ please check if your output file has duplicated header information." @@ -436,11 +436,11 @@ def parse_md(self): first_cell = parse_all_cells(self.output_file) assert first_cell.shape == (1, 3, 3), WARNING_MSG # parse the rest of the cells - self.all_cells = parse_all_md_cells(self.output_file, + self.all_cells = parse_all_md_cells(self.output_file, cp2k_info=self.cp2k_info) # prepend the first cell self.all_cells = np.insert(self.all_cells, 0, first_cell[0], axis=0) - + elif (self.md_info.ensemble_type == "NPT_I"): if cell_file_list: self.all_cells = parse_md_cell(cell_file_list[0]) @@ -451,12 +451,12 @@ def parse_md(self): first_cell = parse_all_cells(self.output_file) assert first_cell.shape == (1, 3, 3), WARNING_MSG # parse the rest of the cells - self.all_cells = parse_all_md_cells(self.output_file, - cp2k_info=self.cp2k_info, + self.all_cells = parse_all_md_cells(self.output_file, + cp2k_info=self.cp2k_info, init_cell_info=first_cell[0]) # prepend the first cell self.all_cells = np.insert(self.all_cells, 0, first_cell[0], axis=0) - + self.init_atomic_coordinates, self.atom_kind_list, self.chemical_symbols = parse_init_atomic_coordinates( self.output_file) self.atomic_kind = parse_atomic_kinds(self.output_file) @@ -470,7 +470,7 @@ def get_global_info(run_type=None, filename=None): else: raise ValueError("cp2kdata dosen't know your run type!") return global_info - + @staticmethod def check_run_type(run_type): implemented_run_type_parsers = \ @@ -480,7 +480,7 @@ def check_run_type(run_type): f"Parser for Run Type {run_type} haven't been implemented yet!" "Please contact the developer for more information." ) - + @staticmethod def check_md_type(md_type): implemented_ensemble_type_parsers = \ @@ -490,4 +490,4 @@ def check_md_type(md_type): f"Parser for MD Type {md_type} haven't been implemented yet!\n" "Please contact the developer for more information." ) - + diff --git a/cp2kdata/plots/fep_plot.py b/cp2kdata/plots/fep_plot.py index 1bafdee..f989f28 100644 --- a/cp2kdata/plots/fep_plot.py +++ b/cp2kdata/plots/fep_plot.py @@ -3,7 +3,7 @@ import glob import os from cp2kdata.block_parser.fep import parse_vertical_gap -from scipy import integrate +from scipy import integrate from cycler import cycler from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) @@ -27,7 +27,7 @@ def plot_ti(fig_name): def get_fep_gaps(eta_sub_dir_list): vgap_list = [] - cum_vgap_list = [] + cum_vgap_list = [] ave_vgap_list = [] for eta in eta_sub_dir_list: mix_ener_file_list = glob.glob(f"{eta}/*-mix-1.ener") diff --git a/cp2kdata/plots/test_plot.py b/cp2kdata/plots/test_plot.py index 732aa85..cf31fe4 100644 --- a/cp2kdata/plots/test_plot.py +++ b/cp2kdata/plots/test_plot.py @@ -63,14 +63,14 @@ def get_matrix_rmse_and_max_err(matrix_list): abs_err = np.abs(err) max_abs_err = np.max(abs_err, axis=(1,2)) return rmse, max_abs_err - + def plot_axes_cutoff(cutoff_list, y_list, ax, idx): marker_cycle = ["o", "v", "d"] title_cycle = ["Energy", "Force", "Stress"] ylabel_cycle = ["error per atom [eV/atom]", r"RMSE [eV$\cdot$A$^{-1}$]", r"RMSE [eV$\cdot$A$^{-3}$]"] - + fontdict = {"fontsize": 22} - + ax.plot(cutoff_list[1:], y_list, color=f"C{idx}", marker=marker_cycle[idx], markeredgecolor="black") #ylim = ax.get_ylim() @@ -81,16 +81,16 @@ def plot_axes_cutoff(cutoff_list, y_list, ax, idx): ax.set_title(title_cycle[idx], fontdict=fontdict) ax.set_xlabel("Cutoff [Ry]", fontdict=fontdict) ax.set_ylabel(ylabel_cycle[idx], fontdict=fontdict) - + def plot_cutoff_test(target_dir="."): cutoff_test_sub_dir_list = glob.glob(os.path.join(target_dir,"cutoff_*")) cutoff_test_sub_dir_list.sort() num_atoms = get_num_atoms(cutoff_test_sub_dir_list) cutoff_list = get_cutoff_list(cutoff_test_sub_dir_list) ener_list, forces_list, stresses_list = get_multiple_e_f_s(cutoff_test_sub_dir_list) - ener_err_per_atom = get_err_per_atom(ener_list * AU_TO_EV, num_atoms) - forces_rmse, forces_max_err = get_matrix_rmse_and_max_err(forces_list * AU_TO_EV/AU_TO_ANG ) - stresses_rmse, stresses_max_err = get_matrix_rmse_and_max_err(stresses_list/EV_ANG_m3_TO_GPa) + ener_err_per_atom = get_err_per_atom(ener_list * AU_TO_EV, num_atoms) + forces_rmse, forces_max_err = get_matrix_rmse_and_max_err(forces_list * AU_TO_EV/AU_TO_ANG ) + stresses_rmse, stresses_max_err = get_matrix_rmse_and_max_err(stresses_list/EV_ANG_m3_TO_GPa) test_collect = (ener_err_per_atom, forces_rmse, stresses_rmse) @@ -126,7 +126,7 @@ def plot_cutoff_test(target_dir="."): def basis_dir_name_converter(basis_test_sub_dir): name_split_list = basis_test_sub_dir.split(sep="-") if "SR" in name_split_list: - basis_name = name_split_list[0].split(sep="_")[1] + "-SR" + basis_name = name_split_list[0].split(sep="_")[1] + "-SR" else: basis_name = name_split_list[0].split(sep="_")[1] return basis_name @@ -160,19 +160,19 @@ def get_basis_name_list(basis_test_sub_dir_list): return basis_list - + def plot_axes_basis(basis_list, y_list, ax, idx): marker_cycle = ["o", "v", "d"] title_cycle = ["Energy", "Force", "Stress"] ylabel_cycle = ["error per atom [eV/atom]", r"RMSE [eV$\cdot$A$^{-1}$]", r"RMSE [eV$\cdot$A$^{-3}$]"] - + fontdict = {"fontsize": 22} - + x_list = list(range(len(basis_list))) ax.plot(x_list[1:], y_list, color=f"C{idx}", marker=marker_cycle[idx], markeredgecolor="black") #print(len(y_list)) - + #ylim = ax.get_ylim() ax.ticklabel_format(style='plain', useOffset=False) ax.tick_params(direction="in") @@ -190,9 +190,9 @@ def plot_basis_test(target_dir="."): num_atoms = get_num_atoms(basis_test_sub_dir_list) basis_list = get_basis_name_list(basis_test_sub_dir_list) ener_list, forces_list, stresses_list = get_multiple_e_f_s(basis_test_sub_dir_list) - ener_err_per_atom = get_err_per_atom(ener_list * AU_TO_EV, num_atoms) - forces_rmse, forces_max_err = get_matrix_rmse_and_max_err(forces_list * AU_TO_EV/AU_TO_ANG ) - stresses_rmse, stresses_max_err = get_matrix_rmse_and_max_err(stresses_list/EV_ANG_m3_TO_GPa) + ener_err_per_atom = get_err_per_atom(ener_list * AU_TO_EV, num_atoms) + forces_rmse, forces_max_err = get_matrix_rmse_and_max_err(forces_list * AU_TO_EV/AU_TO_ANG ) + stresses_rmse, stresses_max_err = get_matrix_rmse_and_max_err(stresses_list/EV_ANG_m3_TO_GPa) test_collect = (ener_err_per_atom, forces_rmse, stresses_rmse) @@ -262,7 +262,7 @@ def get_multiple_cell_param(U_test_sub_dir_list): return np.array(a_list), np.array(b_list), np.array(c_list), np.array(alpha_list), np.array(beta_list), np.array(gamma_list) def get_dos_param(cp2k_pdos): - + homo = cp2k_pdos.get_homo_ener() lumo = cp2k_pdos.get_lumo_ener() return homo, lumo @@ -278,7 +278,7 @@ def get_min_gap(U_test_sub_dir): homo = a_homo else: homo = b_homo - + if a_lumo < b_lumo: lumo = a_lumo else: @@ -301,9 +301,9 @@ def plot_axes_U(U_list, y_list, ax, idx): marker_cycle = ["o", "o", "o", "o", "v", "d", "s", "p", "h"] title_cycle = ["band gap", "p1", "p2","a", "b", "c", "alpha", "beta", "gamma"] ylabel_cycle = ["energy [eV]", "length [A]", "length [A]", "length [A]", "length [A]", "length [A]", "angle [deg]", "angle [deg]", "angle [deg]"] - + fontdict = {"fontsize": 22} - + ax.plot(U_list, y_list, color=f"C{idx}", marker=marker_cycle[idx], markeredgecolor="black") #ylim = ax.get_ylim() @@ -325,17 +325,17 @@ def plot_axes_exp(exp_value, ax, idx): def plot_U_test( - target_dir=".", + target_dir=".", exp_collect=(None, None, None, None, None, None, None, None, None) ): U_test_sub_dir_list = glob.glob(os.path.join(target_dir,"U_*")) U_test_sub_dir_list.sort() U_list = get_U_list(U_test_sub_dir_list) gap_collect = get_multiple_min_gap(U_test_sub_dir_list) - property_collect = (gap_collect, None, None) + property_collect = (gap_collect, None, None) cell_param_collect = get_multiple_cell_param(U_test_sub_dir_list) - total_collect = property_collect + cell_param_collect + total_collect = property_collect + cell_param_collect plt.rc('font', size=18) plt.rc('axes', titlesize=23) #fontsize of the title diff --git a/cp2kdata/test_input.py b/cp2kdata/test_input.py index 44f9503..8180141 100644 --- a/cp2kdata/test_input.py +++ b/cp2kdata/test_input.py @@ -56,11 +56,11 @@ def get_batch_inp( new_cp2k.create_cell(SUBSYS, stc) new_cp2k.create_coord(SUBSYS, stc) cp2k_list.append(new_cp2k) - + return cp2k_list def batch_sub( - sub_cmd: str="bsub -e -ur u -e -orb +cp2kdata gen hubbardu