Skip to content

Commit

Permalink
Merge branch 'release/0.9.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
dev-zero committed Dec 8, 2023
2 parents b797eca + a2636b8 commit 0772fa9
Show file tree
Hide file tree
Showing 28 changed files with 1,236 additions and 289 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: pre-commit/[email protected]
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- uses: pre-commit/[email protected]
14 changes: 7 additions & 7 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,27 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: ['3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64

- name: Install Poetry
run: pip install poetry==1.1.6
uses: snok/install-poetry@v1.3.1

- name: Install dependencies
run: poetry install -E yaml -E lsp

- name: Run pytest
run: poetry run pytest --cov-report=xml --cov-report=term-missing --cov-append --cov=./ tests/
run: poetry run pytest --cov-report=xml --cov-report=term-missing --cov-append --cov=cp2k_input_tools tests/

- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v3.1.0
with:
file: ./coverage.xml
files: ./coverage.xml
fail_ci_if_error: true
26 changes: 10 additions & 16 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,17 @@
# pre-commit install

repos:
- repo: https://github.com/pycqa/isort
rev: 5.10.1
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.1.6'
hooks:
- id: isort
name: isort (python)
- id: isort
name: isort (cython)
types: [cython]
- id: isort
name: isort (pyi)
types: [pyi]
- repo: https://github.com/ambv/black
rev: 22.3.0
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
- repo: https://github.com/psf/black
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.1
hooks:
- id: flake8
additional_dependencies: ["flake8-bugbear"]
- id: mypy
additional_dependencies: ["pydantic>=2"]
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Change Log

## [0.9.0] - 2023-12-08

* basissets: add support for new-style All-Electron basis sets
* cp2kgen: add `--zip` option, similar to Python's zip function
* reorganize basisset and pseudo datatypes dir (although should remain compatible)
* basissets: add parsing and converting options from CRYSTAL07 format
* reformat code as part of the move from pyflake to ruff
* bump minimal required Python version to 3.9
* update and fix LSP implementation
* update to Pydantic 2+
* resolved some code smells

## [0.8.2] - 2022-04-21

* fix issue with `base_dir` and click v7's `path_type`, thanks to @yakutovicha for the initial fix
Expand Down
2 changes: 1 addition & 1 deletion cp2k_input_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pathlib

__version__ = "0.8.2"
__version__ = "0.9.0"

DEFAULT_CP2K_INPUT_XML = pathlib.Path(__file__).resolve().parent.joinpath("cp2k_input.xml")
4 changes: 4 additions & 0 deletions cp2k_input_tools/basissets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# backwards compatibility and the default
from .cp2k import BasisSetCoefficients, BasisSetData

__all__ = ["BasisSetCoefficients", "BasisSetData"]
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,22 @@
from decimal import Decimal
from typing import Iterator, List, Optional, Sequence, Tuple

from pydantic import BaseModel, Extra
from pydantic import BaseModel

from .utils import SYM2NUM, DatafileIterMixin, FromDictMixin, dformat
from ..utils import SYM2NUM, DatafileIterMixin, FromDictMixin, dformat

N_VAL_EL_MATCH = re.compile(r"q(?P<nvalel>\d+)$")


class BasisSetCoefficients(BaseModel, extra=Extra.forbid):
class BasisSetCoefficients(BaseModel, extra="forbid"):
"""A 'shell' in one single basis set"""

n: int
l: List[Tuple[int, int]]
l: List[Tuple[int, int]] # noqa: E741
coefficients: List[List[Decimal]]


class BasisSetData(BaseModel, DatafileIterMixin, FromDictMixin, extra=Extra.forbid):
class BasisSetData(BaseModel, DatafileIterMixin, FromDictMixin, extra="forbid"):
"""Basis set data for a single element"""

element: str
Expand All @@ -44,7 +44,11 @@ def from_lines(cls, lines: Sequence[str]) -> "BasisSetData":

# the ALL* tags indicate an all-electron basis set, but they might be ambigious,
# ignore them if we found an explicit #(val.el.) spec already
if not n_el and any(kw in identifiers for kw in ("ALL", "ALLELECTRON")):
if (
not n_el
and any(kw in identifiers for kw in ("ALL", "ALLELECTRON"))
or any(identifier.endswith("-ae") for identifier in identifiers)
):
n_el = SYM2NUM[element]

# The second line contains the number of sets, conversion to int ignores any whitespace
Expand Down Expand Up @@ -89,7 +93,7 @@ def cp2k_format_line_iter(self) -> Iterator[str]:
yield f"{self.element:2} {' '.join(n for n in self.identifiers)}"
yield f" {len(self.blocks):2}" # the number of sets this basis set contains

max_exp = -min(c.as_tuple().exponent for b in self.blocks for r in b.coefficients for c in r)
max_exp = -min(int(c.as_tuple().exponent) for b in self.blocks for r in b.coefficients for c in r)
max_len = max(len(f"{c:.{max_exp}f}") for b in self.blocks for r in b.coefficients for c in r[1:])
max_len_exp = max(9 + max_exp, *(len(str(r[0])) for b in self.blocks for r in b.coefficients))

Expand Down
162 changes: 162 additions & 0 deletions cp2k_input_tools/basissets/crystal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
"""
Parsers and serializers for the Basis Set format used by Crystal
"""

import re
from decimal import Decimal
from typing import Iterator, List, Optional, Sequence, Tuple

from pydantic import BaseModel

from ..pseudopotentials.ecp import ECP
from ..utils import NUM2SYM, DatafileIterMixin, FromDictMixin

BLOCK_MATCH = re.compile(r"^\s*\d+\s+\d+\s*$")


class BasisSetCoefficients(BaseModel, extra="forbid"):
"""A 'shell' in one single basis set"""

shell: int # 0: s, 1: s and p, 2: p, 3: d, 4:f
charge: Decimal
scaling: Decimal
coefficients: List[Tuple[Decimal, Decimal]]


class BasisSetData(BaseModel, DatafileIterMixin, FromDictMixin, extra="forbid"):
"""Basis set data for a single element"""

Z: int
shells: List[BasisSetCoefficients]
ecp: Optional[ECP] = None

@classmethod
def from_lines(cls, lines: Sequence[str]) -> "BasisSetData":
# the first line contains Z and the number of shells
nat, nshells = (int(w) for w in lines[0].split())
nline = 1
shells = []
ecp: Optional[ECP] = None

Z = nat % 100 # according to CRYSTAL manual, I guess Oganesson will always require an ECP ;-)

if nat > 200:
pseudo_type = lines[nline].strip()
assert pseudo_type == "INPUT", f"Unsupported pseudo type: {pseudo_type}, only INPUT is currently supported"
nline += 1

if pseudo_type == "INPUT":
tokens = lines[nline].split()
znuc = Decimal(tokens[0])
M = tuple(int(t) for t in tokens[1:])
assert len(M) == 6, f"Invalid number of term numbers M found in ECP spec, expected: 6, found: {len(M)}"
nline += 1

ecp_coefficients = []

for _ in range(sum(M)):
tokens = lines[nline].split()
ecp_coefficients.append((Decimal(tokens[0]), Decimal(tokens[1]), int(tokens[2])))
nline += 1

ecp = ECP(Z=Z, Znuc=znuc, M=M, coefficients=ecp_coefficients)

# go through all blocks containing different sets of orbitals (in CRYSTAL shells)
for shelln in range(nshells):
tokens = lines[nline].split()
btype, shell, ngaussians = (int(qn) for qn in tokens[:3])
charge, scaling = (Decimal(v) for v in tokens[3:])

assert btype == 0, "Unsupported basis set type, currently only 'free' is supported"

nline += 1

try:
coefficients = [tuple(Decimal(c) for c in lines[nline + n].split(maxsplit=1)) for n in range(ngaussians)]
except IndexError:
raise ValueError(f"Not enough exponents found. Expected {ngaussians} lines for block {shelln+1}") from None

shells.append(
BasisSetCoefficients(
shell=shell,
charge=charge,
scaling=scaling,
coefficients=coefficients,
)
)

# advance by the number of exponents
nline += ngaussians

return cls(Z=Z, shells=shells, ecp=ecp)

def cp2k_format_line_iter(self, identifier) -> Iterator[str]:
identifiers = [identifier]

if self.ecp:
identifiers.append(f"{identifiers[0]}-q{self.ecp.Znuc}")

yield from self._to_cp2k(identifiers).cp2k_format_line_iter()

def crystal_format_line_iter(self) -> Iterator[str]:
"""Generate lines of strings from this Basis Set in the format expected by CRYSTAL."""

if self.ecp:
yield f"{self.Z + 200} {len(self.shells)}"
yield "INPUT"
yield from self.ecp.crystal_format_line_iter()
else:
yield f"{self.Z} {len(self.shells)}"

for shell in self.shells:
yield f"0 {shell.shell} {len(shell.coefficients)} {shell.charge} {shell.scaling}"

for row in shell.coefficients:
yield f" {str(row[0]):>13} {str(row[1]):>20}"

def nwchem_ecp_format_line_iter(self) -> Iterator[str]:
if not self.ecp:
return

yield from self.ecp.nwchem_format_line_iter()

@staticmethod
def is_block_start(line: str) -> bool:
return BLOCK_MATCH.match(line) is not None

def _to_cp2k(self, identifiers: List[str]):
from .cp2k import BasisSetCoefficients as BasisSetCoefficientsCP2K
from .cp2k import BasisSetData as BasisSetDataCP2K

element = NUM2SYM[self.Z]
total_charges = Decimal(0)
blocks = []

shell_cnt = {0: -1, 1: -1, 2: -1, 3: -1, 4: -1}

for shell in self.shells:
# CRYSTAL has like Gaussian 0: s, 1: sp, 2: p, 3: d, 4: f
if shell.shell > 1:
qn_lmin = qn_lmax = shell.shell - 1
elif shell.shell == 1:
qn_lmin = 0
qn_lmax = 1
else:
qn_lmin = qn_lmax = 0

if shell.charge > 0:
shell_cnt[shell.shell] += 1

n = qn_lmax + 1 + max(0, shell_cnt[shell.shell])

blocks.append(
BasisSetCoefficientsCP2K(
n=n,
l=[(qn_l, 1) for qn_l in range(qn_lmin, qn_lmax + 1)],
coefficients=shell.coefficients,
)
)

total_charges += shell.charge

return BasisSetDataCP2K(element=element, identifiers=identifiers, n_el=int(total_charges // 1), blocks=blocks)
12 changes: 10 additions & 2 deletions cp2k_input_tools/cli/cp2kgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
@base_dir_option
@canonical_option
@var_values_option
def cp2kgen(fhandle, expressions, base_dir, canonical, var_values):
@click.option(
"zipped", "--zip/--no-zip", default=False, help="whether multiple expressions are zip'ed or a cartesian product is built"
)
def cp2kgen(fhandle, expressions, base_dir, canonical, var_values, zipped):
"""
Generates variations of the given CP2K input file
Expand Down Expand Up @@ -55,7 +58,12 @@ def cp2kgen(fhandle, expressions, base_dir, canonical, var_values):
onamesuffix = fpath.suffix

# first generate a list of list of tuples [ [(key/a, 10), (key/a, 20), ...], [(key/b, 100), ...], ...]
for substtuple in itertools.product(*[[(k, v) for v in values] for k, values in substitutions]):
if zipped:
iter_func = zip
else:
iter_func = itertools.product

for substtuple in iter_func(*[[(k, v) for v in values] for k, values in substitutions]):
# ... then iterate over the cartesian product
curr_tree = deepcopy(tree) # create a full copy of the initial tree
onameparts = [] # output name parts
Expand Down
Loading

0 comments on commit 0772fa9

Please sign in to comment.