Merge branch 'release/0.9.0'

cp2k · Dec 8, 2023 · 0772fa9 · 0772fa9
2 parents b797eca + a2636b8
commit 0772fa9
Show file tree

Hide file tree

Showing 28 changed files with 1,236 additions and 289 deletions.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -5,6 +5,8 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
-    - uses: pre-commit/[email protected]
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+    - uses: pre-commit/[email protected]
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -6,27 +6,27 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: ['3.9', '3.10', '3.11']
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Set up python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
         architecture: x64
 
     - name: Install Poetry
-      run: pip install poetry==1.1.6
+      uses: snok/install-poetry@v1.3.1
 
     - name: Install dependencies
       run: poetry install -E yaml -E lsp
 
     - name: Run pytest
-      run: poetry run pytest --cov-report=xml --cov-report=term-missing --cov-append --cov=./ tests/
+      run: poetry run pytest --cov-report=xml --cov-report=term-missing --cov-append --cov=cp2k_input_tools tests/
 
-    - uses: codecov/codecov-action@v1
+    - uses: codecov/codecov-action@v3.1.0
       with:
-        file: ./coverage.xml
+        files: ./coverage.xml
         fail_ci_if_error: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,23 +3,17 @@
 # pre-commit install
 
 repos:
-- repo: https://github.com/pycqa/isort
-  rev: 5.10.1
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: 'v0.1.6'
   hooks:
-  - id: isort
-    name: isort (python)
-  - id: isort
-    name: isort (cython)
-    types: [cython]
-  - id: isort
-    name: isort (pyi)
-    types: [pyi]
-- repo: https://github.com/ambv/black
-  rev: 22.3.0
+  - id: ruff
+    args: [ --fix, --exit-non-zero-on-fix ]
+- repo: https://github.com/psf/black
+  rev: 23.11.0
   hooks:
   - id: black
-- repo: https://github.com/pycqa/flake8
-  rev: 4.0.1
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.7.1
   hooks:
-  - id: flake8
-    additional_dependencies: ["flake8-bugbear"]
+  - id: mypy
+    additional_dependencies: ["pydantic>=2"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Change Log
 
+## [0.9.0] - 2023-12-08
+
+* basissets: add support for new-style All-Electron basis sets
+* cp2kgen: add `--zip` option, similar to Python's zip function
+* reorganize basisset and pseudo datatypes dir (although should remain compatible)
+* basissets: add parsing and converting options from CRYSTAL07 format
+* reformat code as part of the move from pyflake to ruff
+* bump minimal required Python version to 3.9
+* update and fix LSP implementation
+* update to Pydantic 2+
+* resolved some code smells 
+
 ## [0.8.2] - 2022-04-21
 
 * fix issue with `base_dir` and click v7's `path_type`, thanks to @yakutovicha for the initial fix

diff --git a/cp2k_input_tools/__init__.py b/cp2k_input_tools/__init__.py
@@ -1,5 +1,5 @@
 import pathlib
 
-__version__ = "0.8.2"
+__version__ = "0.9.0"
 
 DEFAULT_CP2K_INPUT_XML = pathlib.Path(__file__).resolve().parent.joinpath("cp2k_input.xml")
diff --git a/cp2k_input_tools/basissets/__init__.py b/cp2k_input_tools/basissets/__init__.py
@@ -0,0 +1,4 @@
+# backwards compatibility and the default
+from .cp2k import BasisSetCoefficients, BasisSetData
+
+__all__ = ["BasisSetCoefficients", "BasisSetData"]
diff --git a/cp2k_input_tools/basissets.py → cp2k_input_tools/basissets/cp2k.py b/cp2k_input_tools/basissets.py → cp2k_input_tools/basissets/cp2k.py
@@ -6,22 +6,22 @@
 from decimal import Decimal
 from typing import Iterator, List, Optional, Sequence, Tuple
 
-from pydantic import BaseModel, Extra
+from pydantic import BaseModel
 
-from .utils import SYM2NUM, DatafileIterMixin, FromDictMixin, dformat
+from ..utils import SYM2NUM, DatafileIterMixin, FromDictMixin, dformat
 
 N_VAL_EL_MATCH = re.compile(r"q(?P<nvalel>\d+)$")
 
 
-class BasisSetCoefficients(BaseModel, extra=Extra.forbid):
+class BasisSetCoefficients(BaseModel, extra="forbid"):
     """A 'shell' in one single basis set"""
 
     n: int
-    l: List[Tuple[int, int]]
+    l: List[Tuple[int, int]]  # noqa: E741
     coefficients: List[List[Decimal]]
 
 
-class BasisSetData(BaseModel, DatafileIterMixin, FromDictMixin, extra=Extra.forbid):
+class BasisSetData(BaseModel, DatafileIterMixin, FromDictMixin, extra="forbid"):
     """Basis set data for a single element"""
 
     element: str
@@ -44,7 +44,11 @@ def from_lines(cls, lines: Sequence[str]) -> "BasisSetData":
 
         # the ALL* tags indicate an all-electron basis set, but they might be ambigious,
         # ignore them if we found an explicit #(val.el.) spec already
-        if not n_el and any(kw in identifiers for kw in ("ALL", "ALLELECTRON")):
+        if (
+            not n_el
+            and any(kw in identifiers for kw in ("ALL", "ALLELECTRON"))
+            or any(identifier.endswith("-ae") for identifier in identifiers)
+        ):
             n_el = SYM2NUM[element]
 
         # The second line contains the number of sets, conversion to int ignores any whitespace
@@ -89,7 +93,7 @@ def cp2k_format_line_iter(self) -> Iterator[str]:
         yield f"{self.element:2} {' '.join(n for n in self.identifiers)}"
         yield f" {len(self.blocks):2}"  # the number of sets this basis set contains
 
-        max_exp = -min(c.as_tuple().exponent for b in self.blocks for r in b.coefficients for c in r)
+        max_exp = -min(int(c.as_tuple().exponent) for b in self.blocks for r in b.coefficients for c in r)
         max_len = max(len(f"{c:.{max_exp}f}") for b in self.blocks for r in b.coefficients for c in r[1:])
         max_len_exp = max(9 + max_exp, *(len(str(r[0])) for b in self.blocks for r in b.coefficients))
 

diff --git a/cp2k_input_tools/basissets/crystal.py b/cp2k_input_tools/basissets/crystal.py
@@ -0,0 +1,162 @@
+"""
+Parsers and serializers for the Basis Set format used by Crystal
+"""
+
+import re
+from decimal import Decimal
+from typing import Iterator, List, Optional, Sequence, Tuple
+
+from pydantic import BaseModel
+
+from ..pseudopotentials.ecp import ECP
+from ..utils import NUM2SYM, DatafileIterMixin, FromDictMixin
+
+BLOCK_MATCH = re.compile(r"^\s*\d+\s+\d+\s*$")
+
+
+class BasisSetCoefficients(BaseModel, extra="forbid"):
+    """A 'shell' in one single basis set"""
+
+    shell: int  # 0: s, 1: s and p, 2: p, 3: d, 4:f
+    charge: Decimal
+    scaling: Decimal
+    coefficients: List[Tuple[Decimal, Decimal]]
+
+
+class BasisSetData(BaseModel, DatafileIterMixin, FromDictMixin, extra="forbid"):
+    """Basis set data for a single element"""
+
+    Z: int
+    shells: List[BasisSetCoefficients]
+    ecp: Optional[ECP] = None
+
+    @classmethod
+    def from_lines(cls, lines: Sequence[str]) -> "BasisSetData":
+        # the first line contains Z and the number of shells
+        nat, nshells = (int(w) for w in lines[0].split())
+        nline = 1
+        shells = []
+        ecp: Optional[ECP] = None
+
+        Z = nat % 100  # according to CRYSTAL manual, I guess Oganesson will always require an ECP ;-)
+
+        if nat > 200:
+            pseudo_type = lines[nline].strip()
+            assert pseudo_type == "INPUT", f"Unsupported pseudo type: {pseudo_type}, only INPUT is currently supported"
+            nline += 1
+
+            if pseudo_type == "INPUT":
+                tokens = lines[nline].split()
+                znuc = Decimal(tokens[0])
+                M = tuple(int(t) for t in tokens[1:])
+                assert len(M) == 6, f"Invalid number of term numbers M found in ECP spec, expected: 6, found: {len(M)}"
+                nline += 1
+
+                ecp_coefficients = []
+
+                for _ in range(sum(M)):
+                    tokens = lines[nline].split()
+                    ecp_coefficients.append((Decimal(tokens[0]), Decimal(tokens[1]), int(tokens[2])))
+                    nline += 1
+
+                ecp = ECP(Z=Z, Znuc=znuc, M=M, coefficients=ecp_coefficients)
+
+        # go through all blocks containing different sets of orbitals (in CRYSTAL shells)
+        for shelln in range(nshells):
+            tokens = lines[nline].split()
+            btype, shell, ngaussians = (int(qn) for qn in tokens[:3])
+            charge, scaling = (Decimal(v) for v in tokens[3:])
+
+            assert btype == 0, "Unsupported basis set type, currently only 'free' is supported"
+
+            nline += 1
+
+            try:
+                coefficients = [tuple(Decimal(c) for c in lines[nline + n].split(maxsplit=1)) for n in range(ngaussians)]
+            except IndexError:
+                raise ValueError(f"Not enough exponents found. Expected {ngaussians} lines for block {shelln+1}") from None
+
+            shells.append(
+                BasisSetCoefficients(
+                    shell=shell,
+                    charge=charge,
+                    scaling=scaling,
+                    coefficients=coefficients,
+                )
+            )
+
+            # advance by the number of exponents
+            nline += ngaussians
+
+        return cls(Z=Z, shells=shells, ecp=ecp)
+
+    def cp2k_format_line_iter(self, identifier) -> Iterator[str]:
+        identifiers = [identifier]
+
+        if self.ecp:
+            identifiers.append(f"{identifiers[0]}-q{self.ecp.Znuc}")
+
+        yield from self._to_cp2k(identifiers).cp2k_format_line_iter()
+
+    def crystal_format_line_iter(self) -> Iterator[str]:
+        """Generate lines of strings from this Basis Set in the format expected by CRYSTAL."""
+
+        if self.ecp:
+            yield f"{self.Z + 200} {len(self.shells)}"
+            yield "INPUT"
+            yield from self.ecp.crystal_format_line_iter()
+        else:
+            yield f"{self.Z} {len(self.shells)}"
+
+        for shell in self.shells:
+            yield f"0 {shell.shell} {len(shell.coefficients)} {shell.charge} {shell.scaling}"
+
+            for row in shell.coefficients:
+                yield f" {str(row[0]):>13} {str(row[1]):>20}"
+
+    def nwchem_ecp_format_line_iter(self) -> Iterator[str]:
+        if not self.ecp:
+            return
+
+        yield from self.ecp.nwchem_format_line_iter()
+
+    @staticmethod
+    def is_block_start(line: str) -> bool:
+        return BLOCK_MATCH.match(line) is not None
+
+    def _to_cp2k(self, identifiers: List[str]):
+        from .cp2k import BasisSetCoefficients as BasisSetCoefficientsCP2K
+        from .cp2k import BasisSetData as BasisSetDataCP2K
+
+        element = NUM2SYM[self.Z]
+        total_charges = Decimal(0)
+        blocks = []
+
+        shell_cnt = {0: -1, 1: -1, 2: -1, 3: -1, 4: -1}
+
+        for shell in self.shells:
+            # CRYSTAL has like Gaussian 0: s, 1: sp, 2: p, 3: d, 4: f
+            if shell.shell > 1:
+                qn_lmin = qn_lmax = shell.shell - 1
+            elif shell.shell == 1:
+                qn_lmin = 0
+                qn_lmax = 1
+            else:
+                qn_lmin = qn_lmax = 0
+
+            if shell.charge > 0:
+                shell_cnt[shell.shell] += 1
+
+            n = qn_lmax + 1 + max(0, shell_cnt[shell.shell])
+
+            blocks.append(
+                BasisSetCoefficientsCP2K(
+                    n=n,
+                    l=[(qn_l, 1) for qn_l in range(qn_lmin, qn_lmax + 1)],
+                    coefficients=shell.coefficients,
+                )
+            )
+
+            total_charges += shell.charge
+
+        return BasisSetDataCP2K(element=element, identifiers=identifiers, n_el=int(total_charges // 1), blocks=blocks)
diff --git a/cp2k_input_tools/cli/cp2kgen.py b/cp2k_input_tools/cli/cp2kgen.py
@@ -19,7 +19,10 @@
 @base_dir_option
 @canonical_option
 @var_values_option
-def cp2kgen(fhandle, expressions, base_dir, canonical, var_values):
+@click.option(
+    "zipped", "--zip/--no-zip", default=False, help="whether multiple expressions are zip'ed or a cartesian product is built"
+)
+def cp2kgen(fhandle, expressions, base_dir, canonical, var_values, zipped):
     """
     Generates variations of the given CP2K input file
 
@@ -55,7 +58,12 @@ def cp2kgen(fhandle, expressions, base_dir, canonical, var_values):
     onamesuffix = fpath.suffix
 
     # first generate a list of list of tuples [ [(key/a, 10), (key/a, 20), ...], [(key/b, 100), ...], ...]
-    for substtuple in itertools.product(*[[(k, v) for v in values] for k, values in substitutions]):
+    if zipped:
+        iter_func = zip
+    else:
+        iter_func = itertools.product
+
+    for substtuple in iter_func(*[[(k, v) for v in values] for k, values in substitutions]):
         # ... then iterate over the cartesian product
         curr_tree = deepcopy(tree)  # create a full copy of the initial tree
         onameparts = []  # output name parts