deepmodeling
diff --git a/‎docs/advanced/scf/hsolver.md
Lines changed: 1 addition & 1 deletion b/‎docs/advanced/scf/hsolver.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/module_basis/module_nao/atomic_radials.cpp
Lines changed: 39 additions & 35 deletions b/‎source/module_basis/module_nao/atomic_radials.cpp
Lines changed: 39 additions & 35 deletions
diff --git a/‎source/module_cell/klist.cpp
Lines changed: 32 additions & 30 deletions b/‎source/module_cell/klist.cpp
Lines changed: 32 additions & 30 deletions
diff --git a/‎source/module_cell/klist.h
Lines changed: 26 additions & 33 deletions b/‎source/module_cell/klist.h
Lines changed: 26 additions & 33 deletions
diff --git a/‎source/module_cell/parallel_kpoints.cpp
Lines changed: 2 additions & 10 deletions b/‎source/module_cell/parallel_kpoints.cpp
Lines changed: 2 additions & 10 deletions
diff --git a/‎source/module_cell/parallel_kpoints.h
Lines changed: 8 additions & 9 deletions b/‎source/module_cell/parallel_kpoints.h
Lines changed: 8 additions & 9 deletions
@@ -4,7 +4,7 @@
 
 Method of explicit solving KS-equation can be chosen by variable "ks_solver" in INPUT file.
 
-When "basis_type = pw", `ks_solver` can be `cg`, `bpcg` or `dav`. The `bpcg` method only supports K-point parallelism currently. The default setting `cg` is recommended, which is band-by-band conjugate gradient diagonalization method. There is a large probability that the use of setting of `dav` , which is block Davidson diagonalization method, can be tried to improve performance.  
+When "basis_type = pw", `ks_solver` can be `cg`, `bpcg` or `dav`. The default setting `cg` is recommended, which is band-by-band conjugate gradient diagonalization method. There is a large probability that the use of setting of `dav` , which is block Davidson diagonalization method, can be tried to improve performance.  
 
 When "basis_type = lcao", `ks_solver` can be `genelpa` or `scalapack_gvx`. The default setting `genelpa` is recommended, which is based on ELPA (EIGENVALUE SOLVERS FOR PETAFLOP APPLICATIONS) (https://elpa.mpcdf.mpg.de/) and the kernel is auto choosed by GENELPA(https://github.com/pplab/GenELPA), usually faster than the setting of "scalapack_gvx", which is based on ScaLAPACK(Scalable Linear Algebra PACKage)  
 
 
@@ -221,51 +221,55 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log,
         is_read[i] = false;
     }
 
-    int l = 0;
-    int izeta = 0;
-    for (int i = 0; i != nchi_; ++i)
+    for (int l = 0; l <= lmax_; ++l)
     {
-        if (rank == 0)
+        for (int izeta = 0; izeta < nzeta_[l]; ++izeta)
         {
-            /*
-             * read the orbital information, including
-             *
-             * 1. angular momentum
-             * 2. zeta number
-             * 3. values on the grid
-             *                                                                              */
-            // ifs >> tmp >> tmp >> tmp; // skip "Type" "L" "N"
-            ifs >> tmp >> tmp >> tmp;
-#ifdef __DEBUG
-            assert(tmp == "N");
-#endif
-
-            ifs >> tmp >> l >> izeta;
-#ifdef __DEBUG
-            assert(l >= 0 && l <= lmax_);
-            assert(izeta >= 0 && izeta < nzeta_[l]);
-#endif
-
-            for (int ir = 0; ir != ngrid; ++ir)
+            if (rank == 0)
             {
-                ifs >> rvalue[ir];
+                /*
+                 * read the orbital information, including
+                 *
+                 * 1. angular momentum
+                 * 2. zeta number
+                 * 3. values on the grid
+                 *                                                                              */
+                while (ifs.good())
+                {
+                    while (ifs >> tmp)
+                    {
+                        if (tmp == "N")
+                        {
+                            break;
+                        }
+                    }
+                    int read_l, read_izeta;
+                    ifs >> tmp >> read_l >> read_izeta;
+                    if (l == read_l && izeta == read_izeta)
+                    {
+                        break;
+                    }
+                }
+
+                for (int ir = 0; ir != ngrid; ++ir)
+                {
+                    ifs >> rvalue[ir];
+                }
             }
-        }
 
 #ifdef __MPI
-        Parallel_Common::bcast_int(l);
-        Parallel_Common::bcast_int(izeta);
-        Parallel_Common::bcast_double(rvalue, ngrid);
+            Parallel_Common::bcast_double(rvalue, ngrid);
 #endif
 #ifdef __DEBUG
-        assert(index(l, izeta) >= 0 && index(l, izeta) < nchi_);
-        assert(!is_read[index(l, izeta)]);
+            assert(index(l, izeta) >= 0 && index(l, izeta) < nchi_);
+            assert(!is_read[index(l, izeta)]);
 #endif
-        is_read[index(l, izeta)] = true;
+            is_read[index(l, izeta)] = true;
 
-        // skip the initialization of sbt_ in this stage
-        chi_[index(l, izeta)].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false);
-        chi_[index(l, izeta)].normalize();
+            // skip the initialization of sbt_ in this stage
+            chi_[index(l, izeta)].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false);
+            chi_[index(l, izeta)].normalize();
+        }
     }
 
     delete[] is_read;
 
@@ -13,34 +13,33 @@
 #include "module_cell/module_paw/paw_cell.h"
 #endif
 
-K_Vectors::K_Vectors()
+void K_Vectors::cal_ik_global()
 {
-
-    nspin = 0; // default spin.
-    kc_done = false;
-    kd_done = false;
-    nkstot_full = 0;
-    nks = 0;
-    nkstot = 0;
-    k_nkstot = 0; // LiuXh add 20180619
-}
-
-K_Vectors::~K_Vectors()
-{
-}
-
-int K_Vectors::get_ik_global(const int& ik, const int& nkstot)
-{
-    int nkp = nkstot / PARAM.inp.kpar;
-    int rem = nkstot % PARAM.inp.kpar;
-    if (GlobalV::MY_POOL < rem)
+    const int my_pool = this->para_k.my_pool;
+    this->ik2iktot.resize(this->nks);
+#ifdef __MPI
+    if(this->nspin == 2)
     {
-        return GlobalV::MY_POOL * nkp + GlobalV::MY_POOL + ik;
+        for (int ik = 0; ik < this->nks / 2; ++ik)
+        {
+            this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik;
+            this->ik2iktot[ik + this->nks / 2] = this->nkstot / 2 + this->para_k.startk_pool[my_pool] + ik;
+        }
     }
     else
     {
-        return GlobalV::MY_POOL * nkp + rem + ik;
+        for (int ik = 0; ik < this->nks; ++ik)
+        {
+            this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik;
+        }
     }
+#else
+    for (int ik = 0; ik < this->nks; ++ik)
+    {
+        this->ik2iktot[ik] = ik;
+    }
+#endif
+
 }
 
 void K_Vectors::set(const UnitCell& ucell,
@@ -148,12 +147,12 @@ void K_Vectors::set(const UnitCell& ucell,
     // It's very important in parallel case,
     // firstly do the mpi_k() and then
     // do set_kup_and_kdw()
-    GlobalC::Pkpoints.kinfo(nkstot,
-                            GlobalV::KPAR,
-                            GlobalV::MY_POOL,
-                            GlobalV::RANK_IN_POOL,
-                            GlobalV::NPROC,
-                            nspin_in); // assign k points to several process pools
+    this->para_k.kinfo(nkstot,
+                       GlobalV::KPAR,
+                       GlobalV::MY_POOL,
+                       GlobalV::RANK_IN_POOL,
+                       GlobalV::NPROC,
+                       nspin_in); // assign k points to several process pools
 #ifdef __MPI
     // distribute K point data to the corresponding process
     this->mpi_k(); // 2008-4-29
@@ -162,6 +161,9 @@ void K_Vectors::set(const UnitCell& ucell,
     // set the k vectors for the up and down spin
     this->set_kup_and_kdw();
 
+    // get ik2iktot
+    this->cal_ik_global();
+
     this->print_klists(ofs);
 
     // std::cout << " NUMBER OF K-POINTS   : " << nkstot << std::endl;
@@ -1163,7 +1165,7 @@ void K_Vectors::mpi_k()
 
     Parallel_Common::bcast_double(koffset, 3);
 
-    this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL];
+    this->nks = this->para_k.nks_pool[GlobalV::MY_POOL];
 
     GlobalV::ofs_running << std::endl;
     ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "k-point number in this process", nks);
@@ -1217,7 +1219,7 @@ void K_Vectors::mpi_k()
     for (int i = 0; i < nks; i++)
     {
         // 3 is because each k point has three value:kx, ky, kz
-        k_index = i + GlobalC::Pkpoints.startk_pool[GlobalV::MY_POOL];
+        k_index = i + this->para_k.startk_pool[GlobalV::MY_POOL];
         kvec_c[i].x = kvec_c_aux[k_index * 3];
         kvec_c[i].y = kvec_c_aux[k_index * 3 + 1];
         kvec_c[i].z = kvec_c_aux[k_index * 3 + 2];
 
@@ -5,7 +5,7 @@
 #include "module_base/global_variable.h"
 #include "module_base/matrix3.h"
 #include "module_cell/unitcell.h"
-
+#include "parallel_kpoints.h"
 #include <vector>
 
 class K_Vectors
@@ -26,11 +26,14 @@ class K_Vectors
     /// dim: [iks_ibz][(isym, kvec_d)]
     std::vector<std::map<int, ModuleBase::Vector3<double>>> kstars;
 
-    K_Vectors();
-    ~K_Vectors();
+    K_Vectors(){};
+    ~K_Vectors(){};
     K_Vectors& operator=(const K_Vectors&) = default;
     K_Vectors& operator=(K_Vectors&& rhs) = default;
 
+    Parallel_Kpoints para_k; ///< parallel for kpoints
+
+
     /**
      * @brief Set up the k-points for the system.
      *
@@ -103,23 +106,6 @@ class K_Vectors
      */
     void set_after_vc(const int& nspin, const ModuleBase::Matrix3& reciprocal_vec, const ModuleBase::Matrix3& latvec);
 
-    /**
-     * @brief Gets the global index of a k-point.
-     *
-     * This function gets the global index of a k-point based on its local index and the process pool ID.
-     * The global index is used when the k-points are distributed among multiple process pools.
-     *
-     * @param nkstot The total number of k-points.
-     * @param ik The local index of the k-point.
-     *
-     * @return int Returns the global index of the k-point.
-     *
-     * @note The function calculates the global index by dividing the total number of k-points (nkstot) by the number of
-     * process pools (KPAR), and adding the remainder if the process pool ID (MY_POOL) is less than the remainder.
-     * @note The function is declared as inline for efficiency.
-     */
-    static int get_ik_global(const int& ik, const int& nkstot);
-
     int get_nks() const
     {
         return this->nks;
@@ -154,19 +140,20 @@ class K_Vectors
     {
         this->nkstot_full = value;
     }
+    std::vector<int> ik2iktot; ///<[nks] map ik to the global index of k points
 
-private:
-    int nks;         // number of symmetry-reduced k points in this pool(processor, up+dw)
-    int nkstot;      /// number of symmetry-reduced k points in full k mesh
-    int nkstot_full; /// number of k points before symmetry reduction in full k mesh
+  private:
+    int nks = 0;         ///< number of symmetry-reduced k points in this pool(processor, up+dw)
+    int nkstot = 0;      ///< number of symmetry-reduced k points in full k mesh
+    int nkstot_full = 0; ///< number of k points before symmetry reduction in full k mesh
 
-    int nspin;
-    bool kc_done;
-    bool kd_done;
-    double koffset[3]={0.0};   // used only in automatic k-points.
-    std::string k_kword; // LiuXh add 20180619
-    int k_nkstot;        // LiuXh add 20180619
-    bool is_mp = false;  // Monkhorst-Pack
+    int nspin = 0;
+    bool kc_done = false;
+    bool kd_done = false;
+    double koffset[3] = {0.0}; // used only in automatic k-points.
+    std::string k_kword;       // LiuXh add 20180619
+    int k_nkstot = 0;          // LiuXh add 20180619
+    bool is_mp = false;        // Monkhorst-Pack
 
     /**
      * @brief Resize the k-point related vectors according to the new k-point number.
@@ -285,8 +272,8 @@ class K_Vectors
      * be recalculated.
      */
     void update_use_ibz(const int& nkstot_ibz,
-        const std::vector<ModuleBase::Vector3<double>>& kvec_d_ibz,
-        const std::vector<double>& wk_ibz);
+                        const std::vector<ModuleBase::Vector3<double>>& kvec_d_ibz,
+                        const std::vector<double>& wk_ibz);
 
     /**
      * @brief Sets both the direct and Cartesian k-vectors.
@@ -391,5 +378,11 @@ class K_Vectors
      * @note The function uses the FmtCore::format function to format the output.
      */
     void print_klists(std::ofstream& fn);
+
+    /**
+     * @brief Gets the global index of a k-point.
+     * @return this->ik2iktot[ik]
+     */
+    void cal_ik_global();
 };
 #endif // KVECT_H
@@ -3,14 +3,6 @@
 #include "module_base/parallel_common.h"
 #include "module_base/parallel_global.h"
 
-Parallel_Kpoints::Parallel_Kpoints()
-{
-}
-
-Parallel_Kpoints::~Parallel_Kpoints()
-{
-}
-
 // the kpoints here are reduced after symmetry applied.
 void Parallel_Kpoints::kinfo(int& nkstot_in,
                              const int& kpar_in,
@@ -227,7 +219,7 @@ void Parallel_Kpoints::pool_collection(double* value_re,
     return;
 }
 
-void Parallel_Kpoints::pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik)
+void Parallel_Kpoints::pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik) const
 {
     const int dim2 = w.getBound2();
     const int dim3 = w.getBound3();
@@ -237,7 +229,7 @@ void Parallel_Kpoints::pool_collection(std::complex<double>* value, const Module
 }
 
 template <class T, class V>
-void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, const int& ik)
+void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const
 {
 #ifdef __MPI
     const int ik_now = ik - this->startk_pool[this->my_pool];
 
@@ -9,8 +9,8 @@
 class Parallel_Kpoints
 {
   public:
-    Parallel_Kpoints();
-    ~Parallel_Kpoints();
+    Parallel_Kpoints(){};
+    ~Parallel_Kpoints(){};
 
     void kinfo(int& nkstot_in,
                const int& kpar_in,
@@ -28,9 +28,9 @@ class Parallel_Kpoints
                          const ModuleBase::realArray& a,
                          const ModuleBase::realArray& b,
                          const int& ik);
-    void pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik);
+    void pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik) const;
     template <class T, class V>
-    void pool_collection_aux(T* value, const V& w, const int& dim, const int& ik);
+    void pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const;
 #ifdef __MPI
     /**
      * @brief gather kpoints from all processors
@@ -46,8 +46,8 @@ class Parallel_Kpoints
     // int* nproc_pool = nullptr;    it is not used
 
     // inforamation about kpoints, dim: KPAR
-    std::vector<int> nks_pool;    // number of k-points in each pool
-    std::vector<int> startk_pool; // the first k-point in each pool
+    std::vector<int> nks_pool;    // number of k-points in each pool, here use k-points without spin
+    std::vector<int> startk_pool; // the first k-point in each pool, here use k-points without spin
 
     // information about which pool each k-point belongs to,
     std::vector<int> whichpool; // whichpool[k] : the pool which k belongs to, dim: nkstot_np
@@ -68,14 +68,13 @@ class Parallel_Kpoints
         return *std::max_element(nks_pool.begin(), nks_pool.end());
     }
 
-  private:
-
+  public:
     int kpar = 0;         // number of pools
     int my_pool = 0;      // the pool index of the present processor
     int rank_in_pool = 0; // the rank in the present pool
     int nproc = 1;        // number of processors
     int nspin = 1;        // number of spins
-
+  private:
     std::vector<int> startpro_pool; // the first processor in each pool
 #ifdef __MPI
     void get_nks_pool(const int& nkstot);