zerothi
diff --git a/‎CHANGELOG.md
Lines changed: 6 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 6 additions & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 14 additions & 13 deletions b/‎CMakeLists.txt
Lines changed: 14 additions & 13 deletions
diff --git a/‎benchmarks/optimizations/hamiltonian.ipynb
Lines changed: 77 additions & 0 deletions b/‎benchmarks/optimizations/hamiltonian.ipynb
Lines changed: 77 additions & 0 deletions
diff --git a/‎benchmarks/run.sh
Lines changed: 2 additions & 3 deletions b/‎benchmarks/run.sh
Lines changed: 2 additions & 3 deletions
diff --git a/‎benchmarks/run3.sh
Lines changed: 0 additions & 20 deletions b/‎benchmarks/run3.sh
Lines changed: 0 additions & 20 deletions
diff --git a/‎src/sisl/CMakeLists.txt
Lines changed: 7 additions & 0 deletions b/‎src/sisl/CMakeLists.txt
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/sisl/__init__.py
Lines changed: 2 additions & 2 deletions b/‎src/sisl/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/sisl/_core/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎src/sisl/_core/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sisl/_core/_dtypes.pxd
Lines changed: 102 additions & 0 deletions b/‎src/sisl/_core/_dtypes.pxd
Lines changed: 102 additions & 0 deletions
@@ -16,9 +16,14 @@ we hit release version 1.0.0.
       sisl.geom.graphene
 
 ### Fixed
-
 - `projection` arguments of several functions has been streamlined
 
+### Changed
+- internal Cython code for performance improvements.
+  This yield significant perf. improvements for DFT sparse matrices
+  with *many* edges in the sparse matrix, but a perf. hit for very
+  small TB matrices.
+
 
 ## [0.15.2] - 2024-11-06
 
 
@@ -65,7 +65,6 @@ add_compile_definitions(CYTHON_NO_PYINIT_EXPORT=1)
 #: lib, perhaps we should change this
 set(CMAKE_SHARED_MODULE_PREFIX "")
 
-
 # Determine whether we are in CIBUILDWHEEL
 # and whether we are building for the universal target
 set(_def_fortran TRUE)
@@ -81,6 +80,8 @@ option(WITH_FORTRAN
 
 # Define all options for the user
 if( WITH_FORTRAN )
+  enable_language(Fortran)
+
   set(F2PY_REPORT_ON_ARRAY_COPY 10
     CACHE STRING
     "The minimum (element) size of arrays before warning about copies")
@@ -209,6 +210,18 @@ if(WITH_FORTRAN)
 endif(WITH_FORTRAN)
 
 
+message(STATUS "Python variables:")
+list(APPEND CMAKE_MESSAGE_INDENT "  ")
+
+cmake_print_variables(Python_INCLUDE_DIRS)
+cmake_print_variables(Python_NumPy_INCLUDE_DIRS)
+if(WITH_FORTRAN)
+  cmake_print_variables(Python_NumPy_F2Py_INCLUDE_DIR)
+endif()
+
+list(POP_BACK CMAKE_MESSAGE_INDENT)
+
+
 message(STATUS "sisl options")
 list(APPEND CMAKE_MESSAGE_INDENT "  ")
 
@@ -230,18 +243,6 @@ endif()
 list(POP_BACK CMAKE_MESSAGE_INDENT)
 
 
-message(STATUS "Python variables:")
-list(APPEND CMAKE_MESSAGE_INDENT "  ")
-
-cmake_print_variables(Python_INCLUDE_DIRS)
-cmake_print_variables(Python_NumPy_INCLUDE_DIRS)
-if(WITH_FORTRAN)
-  cmake_print_variables(Python_NumPy_F2Py_INCLUDE_DIR)
-endif()
-
-list(POP_BACK CMAKE_MESSAGE_INDENT)
-
-
 
 # Return in _result whether the _file should be built, or not
 # It checks whether the file is present in the NO_COMPILATION
 
@@ -0,0 +1,77 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we test and check the performance of the `Hk` implementation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "import numpy as np\n",
+    "import sisl as si\n",
+    "\n",
+    "files = Path(os.environ[\"SISL_FILES_TESTS\"])\n",
+    "siesta = files / \"siesta\"\n",
+    "\n",
+    "N = 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "H = si.Hamiltonian.read(siesta / \"Si_pdos_k\" / \"Si_pdos.TSHS\").tile(N, 0).tile(N, 1)\n",
+    "\n",
+    "%timeit H.Hk()\n",
+    "%timeit H.Hk([0.1] * 3)\n",
+    "%timeit H.Hk(format=\"array\")\n",
+    "%timeit H.Hk([0.1] * 3, format=\"array\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "H = si.Hamiltonian.read(siesta / \"Pt2_soc\" / \"Pt2_xx.TSHS\").tile(N, 0).tile(N // 2, 1)\n",
+    "\n",
+    "%timeit H.Hk()\n",
+    "%timeit H.Hk([0.1] * 3)\n",
+    "%timeit H.Hk(format=\"array\")\n",
+    "%timeit H.Hk([0.1] * 3, format=\"array\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -15,6 +15,5 @@ profile=$base.profile
 # Stats
 stats=$base.stats
 
-python -m cProfile -o $profile $script $@
-python stats.py $profile > $stats
-
+python3 -m cProfile -o $profile $script $@
+python3 stats.py $profile > $stats
@@ -1,3 +1,9 @@
+set_property(DIRECTORY
+  APPEND
+  PROPERTY INCLUDE_DIRECTORIES
+  ${CMAKE_CURRENT_SOURCE_DIR}/_core
+  )
+
 foreach(source _indices _math_small)
   add_cython_library(
     SOURCE ${source}.pyx
@@ -29,6 +35,7 @@ endforeach()
 get_directory_property( SISL_DEFINITIONS DIRECTORY
   ${CMAKE_CURRENT_SOURCE_DIR}
   COMPILE_DEFINITIONS )
+
 # Join to stringify list
 list(JOIN SISL_DEFINITIONS " " SISL_DEFINITIONS)
 
 
@@ -88,6 +88,8 @@
 # import the common options used
 from ._common import *
 
+from ._core import *
+
 # Import warning classes
 # We currently do not import warn and info
 # as they are too generic names in case one does from sisl import *
@@ -106,8 +108,6 @@
 # Below are sisl-specific imports
 from .shape import *
 
-from ._core import *
-
 # Physical quantities and required classes
 from .physics import *
 
 
@@ -1,4 +1,4 @@
-foreach(source _lattice _sparse)
+foreach(source _lattice _dtypes _sparse)
   add_cython_library(
     SOURCE ${source}.pyx
     LIBRARY ${source}
 
@@ -0,0 +1,102 @@
+"""
+Shared header for fused dtypes
+"""
+cimport cython
+
+import numpy as np
+
+cimport numpy as cnp
+from numpy cimport (
+    complex64_t,
+    complex128_t,
+    float32_t,
+    float64_t,
+    int8_t,
+    int16_t,
+    int32_t,
+    int64_t,
+    uint8_t,
+    uint16_t,
+    uint32_t,
+    uint64_t,
+)
+
+# Generic typedefs for sisl internal naming convention
+ctypedef size_t size_st
+ctypedef Py_ssize_t ssize_st
+
+
+ctypedef fused ints_st:
+    int
+    long
+
+
+ctypedef fused floats_st:
+    float
+    double
+
+
+ctypedef fused complexs_st:
+    float complex
+    double complex
+
+
+ctypedef fused floatcomplexs_st:
+    float
+    double
+    float complex
+    double complex
+
+
+# We need this fused data-type to omit complex data-types
+ctypedef fused reals_st:
+    int
+    long
+    float
+    double
+
+ctypedef fused numerics_st:
+    int
+    long
+    float
+    double
+    float complex
+    double complex
+
+ctypedef fused _type2dtype_types_st:
+    short
+    int
+    long
+    float
+    double
+    float complex
+    double complex
+    float32_t
+    float64_t
+    #complex64_t # not usable...
+    #complex128_t
+    int8_t
+    int16_t
+    int32_t
+    int64_t
+    uint8_t
+    uint16_t
+    uint32_t
+    uint64_t
+
+
+cdef object type2dtype(const _type2dtype_types_st v)
+
+
+ctypedef fused _inline_sum_st:
+    short
+    int
+    long
+    int16_t
+    int32_t
+    int64_t
+    uint16_t
+    uint32_t
+    uint64_t
+
+cdef ssize_st inline_sum(const _inline_sum_st[::1] array) noexcept nogil
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-foreach(source _lattice _sparse)`
	`1`	`+foreach(source _lattice _dtypes _sparse)`
`2`	`2`	`add_cython_library(`
`3`	`3`	`SOURCE ${source}.pyx`
`4`	`4`	`LIBRARY ${source}`