robinzyb · robinzyb · Sep 7, 2023 · Nov 14, 2022 · Mar 9, 2023 · Mar 9, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: ['3.8', '3.9', '3.10']
 
     steps:
       - uses: actions/checkout@v2

diff --git a/README.md b/README.md
@@ -10,31 +10,22 @@ Python Package to postprocess cp2k data.
 including cube file, pdos file, output file
 
 - [CP2KDATA](#cp2kdata)
-- [Idea List](#idea-list)
-- [TO DO](#to-do)
 - [Installation](#installation)
 - [Generate Standard Test Inputs](#generate-standard-test-inputs)
 - [Plot Standard Test Output](#plot-standard-test-output)
 - [Processing Output File](#processing-output-file)
   - [Basick Usage](#basick-usage)
-  - [Parse ENERGY_FORCE Outputs](#parse-energy_force-outputs)
-  - [Parse GEO_OPT Outputs](#parse-geo_opt-outputs)
+  - [Parse ENERGY\_FORCE Outputs](#parse-energy_force-outputs)
+  - [Parse GEO\_OPT Outputs](#parse-geo_opt-outputs)
   - [Parse MD outputs](#parse-md-outputs)
 - [Plug-in for `dpdata`](#plug-in-for-dpdata)
-- [Processing Cube File](#processing-cube-file)
 - [Processing PDOS File](#processing-pdos-file)
   - [Processing Single PDOS File](#processing-single-pdos-file)
   - [Quickplot of  PDOS Files in Single Point Energy Calculation](#quickplot-of--pdos-files-in-single-point-energy-calculation)
+- [Idea List](#idea-list)
+- [TO DO](#to-do)
+
 
-# Idea List
-1. manipulate cube, pdos data
-2. modify step information on cube files
-3. extract information from output
-4. generate standard test input and directory
-5. generate nice figures 
-
-# TO DO
-cli interface
 
 # Installation
 
@@ -65,6 +56,8 @@ cp2kdata gen hubbardu input.inp coord.xyz cp2k.lsf -ur 0 8.1 1 -e Fe -orb d
 # Plot Standard Test Output
 After you finished the above tests, you readily plot the result using command `cp2kdata plot cutoff`, `cp2kdata plot basis`, `cp2kdata plot hubbardu` 
 
+[Process CP2K Cube Files](./docs/cube/README.md)
+
 # Processing Output File
 
 ## Basick Usage
@@ -187,42 +180,6 @@ print(dp)
 
 ```
 
-# Processing Cube File
-
-```python
-from cp2kdata.cube import Cp2kCube
-cube_file = "xxx.cube"
-mycube = Cp2kCube(cube_file)
-
-# structure is include in cube file
-# you can obtain ASE atoms from cube
-stc = mycube.get_stc()
-print(stc)
-
-# get Planar average data without interpolation.
-pav_x, pav = mycube.get_pav(axis="z", interpolate=False)
-# get Planar average data  with interpolation. the number of interpolation point is 4096
-pav_x, pav = mycube.get_pav(axis="z", interpolate=True)
-
-l1 = 4.8 # length for first periodicity
-l2 = 4.8 # length for second periodicity
-ncov = 1 # set 1 if the system is slab-vacuum system.
-ncov = 2 # set 2 if the system is interface.
-# get Macro average data without interpolation of the original data.
-mav_x, mav = mycube.get_mav(l1=l1, l2=l2, ncov=ncov, interpolate=False)
-# get Macro average data with interpolation of the original data.
-mav_x, mav = mycube.get_mav(l1=l1, l2=l2, ncov=ncov, interpolate=True)
-
-# quick plot
-mycube.quick_plot(axis="z", interpolate=False, output_dir="./")
-```
-The Planar Average and Macro Average results are benchmarked from MACROAVE used in 
-[Siesta and Abinit](https://docs.siesta-project.org/projects/siesta/reference/macroave.html)
-and shown in the following figures
-
-![pav_plot](./figures/PAV_compare.png)
-![mav_plot](./figures/MAV_compare.png)
-
 # Processing PDOS File
 
 ## Processing Single PDOS File
@@ -245,3 +202,13 @@ quick_plot_uks(Calculation_dir)
 quick_plot_rks(Calculation_dir)
 ```
 
+
+# Idea List
+1. manipulate cube, pdos data
+2. modify step information on cube files
+3. extract information from output
+4. generate standard test input and directory
+5. generate nice figures 
+
+# TO DO
+cli interface
diff --git a/cp2kdata/__init__.py b/cp2kdata/__init__.py
@@ -1,4 +1,4 @@
 from .output import Cp2kOutput
 from .utils import get_opt_cell_stc
 from .pdos import Cp2kPdos
-from .cube import Cp2kCube
+from .cube.cube import Cp2kCube
diff --git a/cp2kdata/block_parser/coordinates.py b/cp2kdata/block_parser/coordinates.py
@@ -3,7 +3,7 @@
 
 INIT_ATOMIC_COORDINATES_RE = re.compile(
     r"""
-    \sMODULE\sQUICKSTEP:\s\sATOMIC\sCOORDINATES\sIN\sangstrom\s*\n
+    \sMODULE\sQUICKSTEP:\s+ATOMIC\sCOORDINATES\sIN\sangstrom\s*\n
     \n
     \s+Atom\s+Kind\s+Element\s+X\s+Y\s+Z\s+Z\(eff\)\s+Mass\s*\n
     (\n)?
@@ -12,15 +12,15 @@
         \s+(?P<kind>\d+)
         \s+(?P<element>\w+)
         \s+\d+
-        \s+(?P<x>[\s-]\d+\.\d+)
-        \s+(?P<y>[\s-]\d+\.\d+)
-        \s+(?P<z>[\s-]\d+\.\d+)
-        \s+[\s-]\d+\.\d+
-        \s+[\s-]\d+\.\d+
+        \s+(?P<x>[-]?\d+\.\d+)
+        \s+(?P<y>[-]?\d+\.\d+)
+        \s+(?P<z>[-]?\d+\.\d+)
+        \s+[-]?\d+\.\d+
+        \s+[-]?\d+\.\d+
         \n
     )+
     """,
-    re.VERBOSE
+    re.VERBOSE | re.IGNORECASE,
 )
 
 
@@ -34,7 +34,7 @@ def parse_init_atomic_coordinates(output_file):
         init_atomic_coordinates.append([x, y, z])
     atom_kind_list = [int(kind) for kind in match.captures("kind")]
     chemical_symbols = match.captures("element")
-    
+
     if init_atomic_coordinates:
         return np.array(init_atomic_coordinates, dtype=float), np.array(atom_kind_list, dtype=int), chemical_symbols
     else:

diff --git a/cp2kdata/block_parser/header_info.py b/cp2kdata/block_parser/header_info.py
@@ -10,7 +10,7 @@ class Cp2kInfo:
 CP2K_INFO_VERSION_PATTERN = \
     r"""(?xm)
     ^\sCP2K\|\sversion\sstring:\s{20,42}
-    CP2K\sversion\s(\d\.\d)(?:\s\(Development\sVersion\))?$
+    CP2K\sversion\s(?P<version>\d{1,4}\.\d)(?:\s\(Development\sVersion\))?$
     """
 
 def parse_cp2k_info(filename) -> Cp2kInfo:
@@ -27,23 +27,32 @@ def parse_cp2k_info(filename) -> Cp2kInfo:
 @dataclass
 class GlobalInfo:
     run_type: str = None
+    print_level: str = None
 
+# PATTERNS
 GLOBAL_INFO_RUN_TYPE_PATTERN = \
     r"""(?xm)
-    ^\sGLOBAL\|\sRun\stype\s+(?P<run_type>\w+)\n
-    #(\s+GLOBAL\|.+\n)+
+    ^\sGLOBAL\|\sRun\stype\s{33,}(?P<run_type>\w+)\n
+    """
+GLOBAL_INFO_PRINT_LEVEL_PATTERN = \
+    r"""(?xm)
+    ^\sGLOBAL\|\sGlobal\sprint\slevel\s{42,}(?P<print_level>\w+)\n
     """
 
 def parse_global_info(filename) -> GlobalInfo:
     global_info = {}
 
     global_info = regrep(
         filename=filename, 
-        patterns={"run_type": GLOBAL_INFO_RUN_TYPE_PATTERN}, 
+        patterns={"run_type": GLOBAL_INFO_RUN_TYPE_PATTERN, 
+                  "print_level": GLOBAL_INFO_PRINT_LEVEL_PATTERN
+                  },
         terminate_on_match=True
         )
 
-    return GlobalInfo(run_type=global_info["run_type"][0][0][0])
+    return GlobalInfo(run_type=global_info["run_type"][0][0][0],
+                      print_level=global_info["print_level"][0][0][0]
+                      )
 
 
 @dataclass

diff --git a/cp2kdata/block_parser/md_xyz.py b/cp2kdata/block_parser/md_xyz.py
@@ -9,7 +9,7 @@
 
 def parse_md_ener(ener_file):
     print(f"Obtian Energies From {ener_file}")
-    energies_list = np.loadtxt(ener_file, usecols=4, dtype=np.float64)
+    energies_list = np.loadtxt(ener_file, usecols=4, ndmin=1, dtype=np.float64)
     return energies_list
 
 def parse_pos_xyz(posxyz_file):
@@ -55,13 +55,38 @@ def parse_frc_xyz(frcxyz_file):
     force_list = np.array(force_list, dtype=np.float64)
     return force_list
 
+#NOTE: incomplete function, do not release!
+def parse_pos_xyz_from_wannier(wannier_xyz_fiel):
+    print(f"Obtian Structures From {wannier_xyz_fiel}")
+    fp = zopen(wannier_xyz_fiel, "r")
+    lines = fp.readlines()
+    force_list = []
+    while len(lines) > 0:
+        symbols = []
+        positions = []
+        natoms = int(lines.pop(0))
+        lines.pop(0)
+        for _ in range(natoms):
+            line = lines.pop(0)
+            symbol, x, y, z = line.split()[:4]
+            symbol = symbol.lower().capitalize()
+            if symbol == 'X':
+                continue
+            symbols.append(symbol)
+            positions.append([float(x), float(y), float(z)])
+        force_list.append(positions)
+    force_list = np.array(force_list, dtype=np.float64)
+    return force_list
+
 def parse_md_stress(stress_file):
     print(f"Obtian Stresses From {stress_file}")
     stresses_list = np.loadtxt(
         stress_file, 
         usecols=(2, 3, 4, 5, 6, 7, 8, 9, 10), 
+        ndmin=2,
         dtype=np.float64
         )
+
     numb_frames = stresses_list.shape[0]
 
     return stresses_list.reshape(numb_frames, 3, 3)
diff --git a/cp2kdata/cube/__init__.py b/cp2kdata/cube/__init__.py
diff --git a/cp2kdata/cube.py → cp2kdata/cube/cube.py b/cp2kdata/cube.py → cp2kdata/cube/cube.py
@@ -1,5 +1,5 @@
-from .utils import file_content, interpolate_spline
-from .utils import au2A, au2eV
+from cp2kdata.utils import file_content, interpolate_spline
+from cp2kdata.utils import au2A, au2eV
 import numpy as np
 import matplotlib.pyplot as plt
 import os

diff --git a/cp2kdata/dpdata_plugin.py b/cp2kdata/dpdata_plugin.py
@@ -3,6 +3,8 @@
 from . import Cp2kOutput
 from .block_parser.converge import parse_e_f_converge
 import numpy as np
+from cp2kdata.block_parser.md_xyz import parse_pos_xyz_from_wannier
+import os
 
 AU_TO_EV = EnergyConversion("hartree", "eV").value()
 AU_TO_ANG = LengthConversion("bohr", "angstrom").value()
@@ -140,3 +142,70 @@ def get_uniq_atom_names_and_types(chemical_symbols):
     atom_types = np.array(atom_types)
 
     return list(atom_names), atom_numbs, atom_types
+
+#NOTE: incomplete function, do not release!
+@Format.register("cp2kdata/md_wannier")
+class CP2KMDWannierFormat(Format):
+    def from_labeled_system(self, file_name, **kwargs):
+
+        # -- Set Basic Parameters --
+        path_prefix = file_name #in cp2k md, file_name is directory name.
+        true_symbols = kwargs.get('true_symbols', False)
+        cells = kwargs.get('cells', None)
+        cp2k_output_name = kwargs.get('cp2k_output_name', None)
+
+        # -- start parsing -- 
+        print(WRAPPER)
+
+        cp2kmd = Cp2kOutput(output_file=cp2k_output_name, run_type="MD", path_prefix=path_prefix)
+
+        num_frames = cp2kmd.get_num_frames()
+
+        chemical_symbols = get_chemical_symbols_from_cp2kdata(
+            cp2koutput=cp2kmd, 
+            true_symbols=true_symbols
+            )
+
+        if cells is None:
+            if cp2kmd.filename:
+                cells = cp2kmd.get_init_cell()
+                cells = cells[np.newaxis, :, :]
+                cells = np.repeat(cells, repeats=num_frames, axis=0)
+            else:
+                print("No cell information, please check if your inputs are correct.")
+        elif isinstance(cells, np.ndarray):
+            if cells.shape == (3,3):
+                cells = cells[np.newaxis, :, :]
+                cells = np.repeat(cells, repeats=num_frames, axis=0)
+            elif cells.shape == (num_frames, 3, 3):
+                pass
+            else:
+                print("Illegal Cell Information, cells shape should be (num_frames, 3, 3) or (3, 3)")
+        else:
+            print("Illegal Cell Information, cp2kdata accepts np.ndarray as cells information") 
+
+
+        # -- data dict collects information, and return to dpdata --
+        data = {}
+        data['atom_names'], data['atom_numbs'], data["atom_types"] = get_uniq_atom_names_and_types(chemical_symbols=chemical_symbols)
+        # atom_numbs not total num of atoms!
+        data['energies'] = cp2kmd.energies_list * AU_TO_EV
+        data['cells'] = cells
+
+        # get wannier centers from wannier xyz file
+
+        cp2k_wannier_file = kwargs.get('cp2k_wannier_file', None)
+        if cp2k_wannier_file:
+            print("This is wannier center parser")
+            print("Position parsed from pos files are not used.")
+            cp2k_wannier_file = os.path.join(path_prefix, cp2k_wannier_file)
+            data['coords'] = parse_pos_xyz_from_wannier(cp2k_wannier_file)
+        else:
+            raise ValueError("Please specify the cp2k wannier file name!")
+
+        data['forces'] = cp2kmd.atomic_forces_list * AU_TO_EV/AU_TO_ANG
+        if cp2kmd.has_stress():
+            data['virials'] = cp2kmd.stress_tensor_list/EV_ANG_m3_TO_GPa
+        #print(len(data['cells']), len(data['coords']), len(data['energies']))
+        print(WRAPPER)
+        return data