Merge pull request #29 from robinzyb:devel

New update
robinzyb · Dec 13, 2023 · b972071 · b972071
2 parents 5f44c64 + 031d3be
commit b972071
Show file tree

Hide file tree

Showing 34 changed files with 6,020 additions and 109 deletions.
diff --git a/cp2kdata/block_parser/cells.py b/cp2kdata/block_parser/cells.py
@@ -1,6 +1,8 @@
 import regex as re
 import numpy as np
 from ase.geometry.cell import cellpar_to_cell
+from ase.geometry.cell import cell_to_cellpar
+from typing import List
 
 ALL_CELL_RE = re.compile(
     r"""
@@ -54,24 +56,43 @@ def parse_all_cells(output_file):
     #skip two lines
     (.{80}\n){2}
     #parse angles
-    \sMD\|\sCell\sangles\s\[deg\]\s{10} 
+    (\sMD\|\sCell\sangles\s\[deg\]\s{10} 
     \s{2}(?P<alpha>\d\.\d{8}E(\+|\-)\d{2})
     \s{2}(?P<beta>\d\.\d{8}E(\+|\-)\d{2})
-    \s{2}(?P<gamma>\d\.\d{8}E(\+|\-)\d{2})
+    \s{2}(?P<gamma>\d\.\d{8}E(\+|\-)\d{2}))?
     """,
     re.VERBOSE
 )
 
-def parse_all_md_cells(output_file):
-    # notice that the cell of step 0 is not included
+def parse_all_md_cells(output_file: List[str], 
+                       init_cell_info=None):
+    # init_cell_info are used for npt_I parse.
+    # because npt_I doesn't include angle info in MD| block
+
+    # notice that the cell of step 0 is excluded from MD| block
     all_md_cells = []
-    for match in ALL_MD_CELL_RE.finditer(output_file):
-        #print(match)
-        cell = [match["a"], match["b"], match["c"],
-             match["alpha"], match["beta"], match["gamma"]]
-        cell = np.array(cell, dtype=float)
-        cell = cellpar_to_cell(cell)
-        all_md_cells.append(cell)
+    if init_cell_info is None:
+        # for NPT_F parser, cell info is complete in MD| block
+        for match in ALL_MD_CELL_RE.finditer(output_file):
+            #print(match)
+            cell = [match["a"], match["b"], match["c"],
+                match["alpha"], match["beta"], match["gamma"]]
+            cell = np.array(cell, dtype=float)
+            cell = cellpar_to_cell(cell)
+            all_md_cells.append(cell)
+    else:
+        # for NPT_I parser, cell angle info is lost in MD| block
+        init_cell_param = cell_to_cellpar(init_cell_info)
+        init_cell_angles = init_cell_param[3:]
+        for match in ALL_MD_CELL_RE.finditer(output_file):
+            #print(match)
+            cell = [match["a"], match["b"], match["c"],
+                match["alpha"], match["beta"], match["gamma"]]
+            cell = np.array(cell, dtype=float)
+            cell[3:] = init_cell_angles
+            cell = cellpar_to_cell(cell)
+            all_md_cells.append(cell)
+
 
     if all_md_cells:
         return np.array(all_md_cells, dtype=float)

diff --git a/cp2kdata/block_parser/dipole.py b/cp2kdata/block_parser/dipole.py
@@ -5,10 +5,10 @@
     r"""
     \s{2}Dipole\smoment\s\[Debye\]\n
     \s{4}
-    X=\s{3}(?P<x>[\s-]\d+\.\d+)\s
-    Y=\s{3}(?P<y>[\s-]\d+\.\d+)\s
-    Z=\s{3}(?P<z>[\s-]\d+\.\d+)\s
-    \s{4}Total=\s{4}(?P<total>[\s-]\d+\.\d+)
+    X=\s{,3}(?P<x>[\s-]\d+\.\d+)\s
+    Y=\s{,3}(?P<y>[\s-]\d+\.\d+)\s
+    Z=\s{,3}(?P<z>[\s-]\d+\.\d+)\s
+    \s{4}Total=\s{,4}(?P<total>[\s-]\d+\.\d+)
     """,
     re.VERBOSE
 )

diff --git a/cp2kdata/block_parser/header_info.py b/cp2kdata/block_parser/header_info.py
@@ -87,8 +87,11 @@ def parse_dft_info(filename) -> DFTInfo:
         },
         terminate_on_match=True
         )
-
-    return DFTInfo(ks_type=dft_info["ks_type"][0][0][0], multiplicity=dft_info["multiplicity"][0][0][0])
+
+    if dft_info:
+        return DFTInfo(ks_type=dft_info["ks_type"][0][0][0], multiplicity=dft_info["multiplicity"][0][0][0])
+    else:
+        return None
 
 
 

diff --git a/cp2kdata/block_parser/md_xyz.py b/cp2kdata/block_parser/md_xyz.py
@@ -1,4 +1,4 @@
-from monty.io import zopen
+#from monty.io import zopen
 import regex as re
 import numpy as np
 ENERGY_RE = re.compile(
@@ -8,13 +8,13 @@
 )
 
 def parse_md_ener(ener_file):
-    print(f"Parsing Energies From {ener_file}")
+    print(f"Parsing Energies from {ener_file}")
     energies_list = np.loadtxt(ener_file, usecols=4, ndmin=1, dtype=np.float64)
     return energies_list
 
 def parse_pos_xyz(posxyz_file):
-    print(f"Parsing Structures From {posxyz_file}")
-    fp = zopen(posxyz_file, "r")
+    print(f"Parsing Structures from {posxyz_file}")
+    fp = open(posxyz_file, "r")
     lines = fp.readlines()
     energies_list = []
     pos_list = []
@@ -36,8 +36,8 @@ def parse_pos_xyz(posxyz_file):
     return pos_list, energies_list, chemical_symbols
 
 def parse_frc_xyz(frcxyz_file):
-    print(f"Parsing Froces From {frcxyz_file}")
-    fp = zopen(frcxyz_file, "r")
+    print(f"Parsing Froces from {frcxyz_file}")
+    fp = open(frcxyz_file, "r")
     lines = fp.readlines()
     force_list = []
     while len(lines) > 0:
@@ -57,8 +57,8 @@ def parse_frc_xyz(frcxyz_file):
 
 #NOTE: incomplete function, do not release!
 def parse_pos_xyz_from_wannier(wannier_xyz_fiel):
-    print(f"Parsing Structures From {wannier_xyz_fiel}")
-    fp = zopen(wannier_xyz_fiel, "r")
+    print(f"Parsing Structures from {wannier_xyz_fiel}")
+    fp = open(wannier_xyz_fiel, "r")
     lines = fp.readlines()
     force_list = []
     while len(lines) > 0:
@@ -79,7 +79,7 @@ def parse_pos_xyz_from_wannier(wannier_xyz_fiel):
     return force_list
 
 def parse_md_stress(stress_file):
-    print(f"Parsing Stresses From {stress_file}")
+    print(f"Parsing Stresses from {stress_file}")
     stresses_list = np.loadtxt(
         stress_file, 
         usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10), 
@@ -89,4 +89,16 @@ def parse_md_stress(stress_file):
 
     numb_frames = stresses_list.shape[0]
 
-    return stresses_list.reshape(numb_frames, 3, 3)
+    return stresses_list.reshape(numb_frames, 3, 3)
+
+def parse_md_cell(cell_file):
+    print(f"Parsing Cells from {cell_file}")
+    cells_list = np.loadtxt(
+        cell_file, 
+        usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10), 
+        ndmin=2,
+        dtype=np.float64
+        )
+    numb_frames = cells_list.shape[0]
+
+    return cells_list.reshape(numb_frames, 3, 3)
diff --git a/cp2kdata/block_parser/mulliken.py b/cp2kdata/block_parser/mulliken.py
@@ -3,11 +3,11 @@
 
 MULLIKEN_UKS_RE = re.compile(
     r"""
-    \s+Mulliken\sPopulation\sAnalysis\s*\n
+    \s{21}Mulliken\sPopulation\sAnalysis\s{1}
     \n
-    \s\#.+\n
+    \s\#\s{2}Atom\s{2}Element\s{2}Kind\s{2}Atomic\spopulation\s\(alpha,beta\)\sNet\scharge\s{2}Spin\smoment\n
     (
-        \s+(?P<atom>\d+)
+        \s{,7}(?P<atom>\d+)
         \s+(?P<element>\w+)
         \s+(?P<kind>\d+)
         \s+(?P<alpha>[\s-]\d+\.\d+)
@@ -30,7 +30,7 @@
         \s+(?P<kind>\d+)
         \s+(?P<alpha>[\s-]\d+\.\d+)
         \s+(?P<net_charge>[\s-]\d+\.\d+)
-        \n
+        
     )+
     """,
     re.VERBOSE
@@ -41,8 +41,10 @@ def parse_mulliken_pop_list(output_file, DFTInfo):
     mulliken_pop_list = []
     if DFTInfo.ks_type == 'UKS':
         for match in MULLIKEN_UKS_RE.finditer(output_file):
+            #print(match)
             mulliken_pop = []
             for element, alpha, beta, net_charge, spin_moment in zip(*match.captures("element", "alpha", "beta", "net_charge", "spin_moment")):
+                #print(match.captures("element", "alpha", "beta", "net_charge", "spin_moment"))
                 mulliken_pop.append(
                     {
                         "element": element,
@@ -54,6 +56,7 @@ def parse_mulliken_pop_list(output_file, DFTInfo):
                 )
             mulliken_pop_list.append(mulliken_pop)
     elif DFTInfo.ks_type == "RKS":
+        raise NotImplementedError("RKS Mulliken population analysis not implemented")
         for match in MULLIKEN_RKS_RE.finditer(output_file):
             mulliken_pop = []
             for element, alpha, net_charge in zip(*match.captures("element", "alpha", "net_charge")):
@@ -65,7 +68,8 @@ def parse_mulliken_pop_list(output_file, DFTInfo):
                     }
                 )
             mulliken_pop_list.append(mulliken_pop)      
+
     if  mulliken_pop_list:
-        return  mulliken_pop_list[:-1]
+        return  mulliken_pop_list
     else:
         return None
diff --git a/cp2kdata/cell.py b/cp2kdata/cell.py
@@ -2,6 +2,7 @@
 from ase.geometry.cell import cell_to_cellpar
 import numpy.typing as npt
 import numpy as np
+from numpy.linalg import LinAlgError
 from copy import deepcopy
 
 class Cp2kCell:
@@ -77,23 +78,19 @@ def __init__(
         if grid_point is not None:
             self.grid_point = self.grid_point.astype(int)
 
-        self.volume = np.linalg.det(self.cell_matrix)
-
-        if grid_point is not None:
-            self.dv = np.linalg.det(self.grid_spacing_matrix)
 
         self.cell_param = cell_to_cellpar(self.cell_matrix)
 
     def copy(self):
         return deepcopy(self)
 
     def get_volume(self):
-        return self.volume
+        return np.linalg.det(self.cell_matrix)
 
     def get_dv(self):
         try:
-            return self.dv
-        except AttributeError as ae:
+            return np.linalg.det(self.grid_spacing_matrix)
+        except LinAlgError as ae:
             print("No grid point information is available")
 
     def get_cell_param(self):

diff --git a/cp2kdata/cube/cube.py b/cp2kdata/cube/cube.py
@@ -25,11 +25,8 @@ class Cp2kCubeOld:
     timestep: unit ps
     """
     def __init__(self, cube_file_name, timestep=0):
-        print("Warning: This Cp2kCube will be deprecated in version 0.6.x, use Cp2kCubeNew instead!")
-        print("After version 0.6.x, Cp2kCube will be removed from cp2kdata.cube.cube")
-        print("Cp2kCubeNew will be renamed as Cp2kCube")
-        print("Test Cp2kCubeNew in your old code before upgrade to 0.6.x")
-        print("Usage: from cp2kdata.cube.cube import Cp2kCubeNew as Cp2kCube")
+        print("Warning: This is Cp2kCubeOld is deprecated after version 0.6.x, use Cp2kCube instead!")
+        print("Warning: to use old one, from cp2kdata.cube.cube import Cp2kCubeOld")
         self.file = cube_file_name
         self.timestep = timestep
         self.cube_vals = self.read_cube_vals()
@@ -146,7 +143,7 @@ class Cp2kCube(MSONable):
     """
     def __init__(self, fname=None, cube_vals=None, cell=None, stc=None):
         print("Warning: This is New Cp2kCube Class, if you want to use old Cp2kCube")
-        print("try, from cp2kdata.cube.cube import Cp2kCube")
+        print("try, from cp2kdata.cube.cube import Cp2kCubeOld")
         print("New Cp2kCube return raw values in cp2k cube file")
         print("that is, length in bohr and energy in hartree for potential file")
         print("that is, length in bohr and density in e/bohr^3 for density file")
@@ -201,20 +198,20 @@ def __add__(self, others):
         self_copy = self.copy()
         if isinstance(others, Cp2kCube):
             other_copy = others.copy()
-            self_copy.cube_vals += other_copy.cube_vals
+            other_copy.cube_vals =  self_copy.cube_vals + other_copy.cube_vals
         else:
             raise RuntimeError("Unspported Class")
-        return self_copy
+        return other_copy
 
     def __sub__(self, others):
         """magic method for subtracting two Cp2kCube instances"""
         self_copy = self.copy()
         if isinstance(others, Cp2kCube):
             other_copy = others.copy()
-            self_copy.cube_vals -= other_copy.cube_vals
+            other_copy.cube_vals =  self_copy.cube_vals - other_copy.cube_vals
         else:
             raise RuntimeError("Unspported Class")
-        return self_copy
+        return other_copy
 
     def get_stc(self):
         atom_list = []
@@ -228,7 +225,7 @@ def get_stc(self):
             atom_list.append(atom)
 
         stc = Atoms(atom_list)
-        stc.set_cell(self.cell.cell_matrix)
+        stc.set_cell(self.cell.cell_matrix*au2A)
         return stc
 
     def copy(self):
@@ -358,6 +355,33 @@ def get_integration(self):
     def get_cell(self):
         return self.cell.copy()
 
+    def reduce_resolution(self, stride, axis='xyz'):
+        new_cube = self.copy()
+
+        stride_dict = {
+            "xyz": np.array([stride, stride, stride]),
+            "xy": np.array([stride, stride, 1]),
+            "xz": np.array([stride, 1, stride]),
+            "yz": np.array([1, stride, stride]),
+            "x": np.array([stride, 1, 1]),
+            "y": np.array([1, stride, 1]),
+            "z": np.array([1, 1, stride])
+        }
+        stride_array = stride_dict[axis]
+        # reduce the grid point
+        grid_point = self.cell.grid_point
+        grid_point = np.floor((grid_point-1)/stride_array) + 1
+        grid_point = grid_point.astype(int)
+        new_cube.cell.grid_point = grid_point
+
+        # increase the grid spacing
+        new_cube.cell.grid_spacing_matrix = self.cell.grid_spacing_matrix * stride_array[:, np.newaxis]
+
+        new_cube.cube_vals = self.cube_vals[::stride_array[0], ::stride_array[1], ::stride_array[2]]
+
+        return new_cube
+
+
     @staticmethod
     def read_gs_matrix(fname):
         content_list = file_content(fname, (3,6))

diff --git a/cp2kdata/dpdata_plugin.py b/cp2kdata/dpdata_plugin.py
@@ -13,6 +13,7 @@
 
 WRAPPER = "--- You are parsing data using package Cp2kData ---"
 
+@Format.register("cp2k/output")
 @Format.register("cp2kdata/e_f")
 class CP2KEnergyForceFormat(Format):
     def from_labeled_system(self, file_name, **kwargs):
@@ -57,6 +58,7 @@ def from_labeled_system(self, file_name, **kwargs):
         print(WRAPPER)
         return data
 
+@Format.register("cp2k/aimd_output")
 @Format.register("cp2kdata/md")
 class CP2KMDFormat(Format):
     def from_labeled_system(self, file_name, **kwargs):