Skip to content

Commit

Permalink
Merge branch 'develop' into blas_update_3
Browse files Browse the repository at this point in the history
  • Loading branch information
Critsium-xy authored Jan 13, 2025
2 parents 91e8dc2 + b809ce6 commit 521b5c3
Show file tree
Hide file tree
Showing 135 changed files with 1,560 additions and 1,385 deletions.
2 changes: 1 addition & 1 deletion docs/advanced/scf/hsolver.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

Method of explicit solving KS-equation can be chosen by variable "ks_solver" in INPUT file.

When "basis_type = pw", `ks_solver` can be `cg`, `bpcg` or `dav`. The `bpcg` method only supports K-point parallelism currently. The default setting `cg` is recommended, which is band-by-band conjugate gradient diagonalization method. There is a large probability that the use of setting of `dav` , which is block Davidson diagonalization method, can be tried to improve performance.
When "basis_type = pw", `ks_solver` can be `cg`, `bpcg` or `dav`. The default setting `cg` is recommended, which is band-by-band conjugate gradient diagonalization method. There is a large probability that the use of setting of `dav` , which is block Davidson diagonalization method, can be tried to improve performance.

When "basis_type = lcao", `ks_solver` can be `genelpa` or `scalapack_gvx`. The default setting `genelpa` is recommended, which is based on ELPA (EIGENVALUE SOLVERS FOR PETAFLOP APPLICATIONS) (https://elpa.mpcdf.mpg.de/) and the kernel is auto choosed by GENELPA(https://github.com/pplab/GenELPA), usually faster than the setting of "scalapack_gvx", which is based on ScaLAPACK(Scalable Linear Algebra PACKage)

Expand Down
74 changes: 39 additions & 35 deletions source/module_basis/module_nao/atomic_radials.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,51 +221,55 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log,
is_read[i] = false;
}

int l = 0;
int izeta = 0;
for (int i = 0; i != nchi_; ++i)
for (int l = 0; l <= lmax_; ++l)
{
if (rank == 0)
for (int izeta = 0; izeta < nzeta_[l]; ++izeta)
{
/*
* read the orbital information, including
*
* 1. angular momentum
* 2. zeta number
* 3. values on the grid
* */
// ifs >> tmp >> tmp >> tmp; // skip "Type" "L" "N"
ifs >> tmp >> tmp >> tmp;
#ifdef __DEBUG
assert(tmp == "N");
#endif

ifs >> tmp >> l >> izeta;
#ifdef __DEBUG
assert(l >= 0 && l <= lmax_);
assert(izeta >= 0 && izeta < nzeta_[l]);
#endif

for (int ir = 0; ir != ngrid; ++ir)
if (rank == 0)
{
ifs >> rvalue[ir];
/*
* read the orbital information, including
*
* 1. angular momentum
* 2. zeta number
* 3. values on the grid
* */
while (ifs.good())
{
while (ifs >> tmp)
{
if (tmp == "N")
{
break;
}
}
int read_l, read_izeta;
ifs >> tmp >> read_l >> read_izeta;
if (l == read_l && izeta == read_izeta)
{
break;
}
}

for (int ir = 0; ir != ngrid; ++ir)
{
ifs >> rvalue[ir];
}
}
}

#ifdef __MPI
Parallel_Common::bcast_int(l);
Parallel_Common::bcast_int(izeta);
Parallel_Common::bcast_double(rvalue, ngrid);
Parallel_Common::bcast_double(rvalue, ngrid);
#endif
#ifdef __DEBUG
assert(index(l, izeta) >= 0 && index(l, izeta) < nchi_);
assert(!is_read[index(l, izeta)]);
assert(index(l, izeta) >= 0 && index(l, izeta) < nchi_);
assert(!is_read[index(l, izeta)]);
#endif
is_read[index(l, izeta)] = true;
is_read[index(l, izeta)] = true;

// skip the initialization of sbt_ in this stage
chi_[index(l, izeta)].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false);
chi_[index(l, izeta)].normalize();
// skip the initialization of sbt_ in this stage
chi_[index(l, izeta)].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false);
chi_[index(l, izeta)].normalize();
}
}

delete[] is_read;
Expand Down
62 changes: 32 additions & 30 deletions source/module_cell/klist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,33 @@
#include "module_cell/module_paw/paw_cell.h"
#endif

K_Vectors::K_Vectors()
void K_Vectors::cal_ik_global()
{

nspin = 0; // default spin.
kc_done = false;
kd_done = false;
nkstot_full = 0;
nks = 0;
nkstot = 0;
k_nkstot = 0; // LiuXh add 20180619
}

K_Vectors::~K_Vectors()
{
}

int K_Vectors::get_ik_global(const int& ik, const int& nkstot)
{
int nkp = nkstot / PARAM.inp.kpar;
int rem = nkstot % PARAM.inp.kpar;
if (GlobalV::MY_POOL < rem)
const int my_pool = this->para_k.my_pool;
this->ik2iktot.resize(this->nks);
#ifdef __MPI
if(this->nspin == 2)
{
return GlobalV::MY_POOL * nkp + GlobalV::MY_POOL + ik;
for (int ik = 0; ik < this->nks / 2; ++ik)
{
this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik;
this->ik2iktot[ik + this->nks / 2] = this->nkstot / 2 + this->para_k.startk_pool[my_pool] + ik;
}
}
else
{
return GlobalV::MY_POOL * nkp + rem + ik;
for (int ik = 0; ik < this->nks; ++ik)
{
this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik;
}
}
#else
for (int ik = 0; ik < this->nks; ++ik)
{
this->ik2iktot[ik] = ik;
}
#endif

}

void K_Vectors::set(const UnitCell& ucell,
Expand Down Expand Up @@ -148,12 +147,12 @@ void K_Vectors::set(const UnitCell& ucell,
// It's very important in parallel case,
// firstly do the mpi_k() and then
// do set_kup_and_kdw()
GlobalC::Pkpoints.kinfo(nkstot,
GlobalV::KPAR,
GlobalV::MY_POOL,
GlobalV::RANK_IN_POOL,
GlobalV::NPROC,
nspin_in); // assign k points to several process pools
this->para_k.kinfo(nkstot,
GlobalV::KPAR,
GlobalV::MY_POOL,
GlobalV::RANK_IN_POOL,
GlobalV::NPROC,
nspin_in); // assign k points to several process pools
#ifdef __MPI
// distribute K point data to the corresponding process
this->mpi_k(); // 2008-4-29
Expand All @@ -162,6 +161,9 @@ void K_Vectors::set(const UnitCell& ucell,
// set the k vectors for the up and down spin
this->set_kup_and_kdw();

// get ik2iktot
this->cal_ik_global();

this->print_klists(ofs);

// std::cout << " NUMBER OF K-POINTS : " << nkstot << std::endl;
Expand Down Expand Up @@ -1163,7 +1165,7 @@ void K_Vectors::mpi_k()

Parallel_Common::bcast_double(koffset, 3);

this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL];
this->nks = this->para_k.nks_pool[GlobalV::MY_POOL];

GlobalV::ofs_running << std::endl;
ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "k-point number in this process", nks);
Expand Down Expand Up @@ -1217,7 +1219,7 @@ void K_Vectors::mpi_k()
for (int i = 0; i < nks; i++)
{
// 3 is because each k point has three value:kx, ky, kz
k_index = i + GlobalC::Pkpoints.startk_pool[GlobalV::MY_POOL];
k_index = i + this->para_k.startk_pool[GlobalV::MY_POOL];
kvec_c[i].x = kvec_c_aux[k_index * 3];
kvec_c[i].y = kvec_c_aux[k_index * 3 + 1];
kvec_c[i].z = kvec_c_aux[k_index * 3 + 2];
Expand Down
59 changes: 26 additions & 33 deletions source/module_cell/klist.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "module_base/global_variable.h"
#include "module_base/matrix3.h"
#include "module_cell/unitcell.h"

#include "parallel_kpoints.h"
#include <vector>

class K_Vectors
Expand All @@ -26,11 +26,14 @@ class K_Vectors
/// dim: [iks_ibz][(isym, kvec_d)]
std::vector<std::map<int, ModuleBase::Vector3<double>>> kstars;

K_Vectors();
~K_Vectors();
K_Vectors(){};
~K_Vectors(){};
K_Vectors& operator=(const K_Vectors&) = default;
K_Vectors& operator=(K_Vectors&& rhs) = default;

Parallel_Kpoints para_k; ///< parallel for kpoints


/**
* @brief Set up the k-points for the system.
*
Expand Down Expand Up @@ -103,23 +106,6 @@ class K_Vectors
*/
void set_after_vc(const int& nspin, const ModuleBase::Matrix3& reciprocal_vec, const ModuleBase::Matrix3& latvec);

/**
* @brief Gets the global index of a k-point.
*
* This function gets the global index of a k-point based on its local index and the process pool ID.
* The global index is used when the k-points are distributed among multiple process pools.
*
* @param nkstot The total number of k-points.
* @param ik The local index of the k-point.
*
* @return int Returns the global index of the k-point.
*
* @note The function calculates the global index by dividing the total number of k-points (nkstot) by the number of
* process pools (KPAR), and adding the remainder if the process pool ID (MY_POOL) is less than the remainder.
* @note The function is declared as inline for efficiency.
*/
static int get_ik_global(const int& ik, const int& nkstot);

int get_nks() const
{
return this->nks;
Expand Down Expand Up @@ -154,19 +140,20 @@ class K_Vectors
{
this->nkstot_full = value;
}
std::vector<int> ik2iktot; ///<[nks] map ik to the global index of k points

private:
int nks; // number of symmetry-reduced k points in this pool(processor, up+dw)
int nkstot; /// number of symmetry-reduced k points in full k mesh
int nkstot_full; /// number of k points before symmetry reduction in full k mesh
private:
int nks = 0; ///< number of symmetry-reduced k points in this pool(processor, up+dw)
int nkstot = 0; ///< number of symmetry-reduced k points in full k mesh
int nkstot_full = 0; ///< number of k points before symmetry reduction in full k mesh

int nspin;
bool kc_done;
bool kd_done;
double koffset[3]={0.0}; // used only in automatic k-points.
std::string k_kword; // LiuXh add 20180619
int k_nkstot; // LiuXh add 20180619
bool is_mp = false; // Monkhorst-Pack
int nspin = 0;
bool kc_done = false;
bool kd_done = false;
double koffset[3] = {0.0}; // used only in automatic k-points.
std::string k_kword; // LiuXh add 20180619
int k_nkstot = 0; // LiuXh add 20180619
bool is_mp = false; // Monkhorst-Pack

/**
* @brief Resize the k-point related vectors according to the new k-point number.
Expand Down Expand Up @@ -285,8 +272,8 @@ class K_Vectors
* be recalculated.
*/
void update_use_ibz(const int& nkstot_ibz,
const std::vector<ModuleBase::Vector3<double>>& kvec_d_ibz,
const std::vector<double>& wk_ibz);
const std::vector<ModuleBase::Vector3<double>>& kvec_d_ibz,
const std::vector<double>& wk_ibz);

/**
* @brief Sets both the direct and Cartesian k-vectors.
Expand Down Expand Up @@ -391,5 +378,11 @@ class K_Vectors
* @note The function uses the FmtCore::format function to format the output.
*/
void print_klists(std::ofstream& fn);

/**
* @brief Gets the global index of a k-point.
* @return this->ik2iktot[ik]
*/
void cal_ik_global();
};
#endif // KVECT_H
12 changes: 2 additions & 10 deletions source/module_cell/parallel_kpoints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,6 @@
#include "module_base/parallel_common.h"
#include "module_base/parallel_global.h"

Parallel_Kpoints::Parallel_Kpoints()
{
}

Parallel_Kpoints::~Parallel_Kpoints()
{
}

// the kpoints here are reduced after symmetry applied.
void Parallel_Kpoints::kinfo(int& nkstot_in,
const int& kpar_in,
Expand Down Expand Up @@ -227,7 +219,7 @@ void Parallel_Kpoints::pool_collection(double* value_re,
return;
}

void Parallel_Kpoints::pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik)
void Parallel_Kpoints::pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik) const
{
const int dim2 = w.getBound2();
const int dim3 = w.getBound3();
Expand All @@ -237,7 +229,7 @@ void Parallel_Kpoints::pool_collection(std::complex<double>* value, const Module
}

template <class T, class V>
void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, const int& ik)
void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const
{
#ifdef __MPI
const int ik_now = ik - this->startk_pool[this->my_pool];
Expand Down
17 changes: 8 additions & 9 deletions source/module_cell/parallel_kpoints.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
class Parallel_Kpoints
{
public:
Parallel_Kpoints();
~Parallel_Kpoints();
Parallel_Kpoints(){};
~Parallel_Kpoints(){};

void kinfo(int& nkstot_in,
const int& kpar_in,
Expand All @@ -28,9 +28,9 @@ class Parallel_Kpoints
const ModuleBase::realArray& a,
const ModuleBase::realArray& b,
const int& ik);
void pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik);
void pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik) const;
template <class T, class V>
void pool_collection_aux(T* value, const V& w, const int& dim, const int& ik);
void pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const;
#ifdef __MPI
/**
* @brief gather kpoints from all processors
Expand All @@ -46,8 +46,8 @@ class Parallel_Kpoints
// int* nproc_pool = nullptr; it is not used

// inforamation about kpoints, dim: KPAR
std::vector<int> nks_pool; // number of k-points in each pool
std::vector<int> startk_pool; // the first k-point in each pool
std::vector<int> nks_pool; // number of k-points in each pool, here use k-points without spin
std::vector<int> startk_pool; // the first k-point in each pool, here use k-points without spin

// information about which pool each k-point belongs to,
std::vector<int> whichpool; // whichpool[k] : the pool which k belongs to, dim: nkstot_np
Expand All @@ -68,14 +68,13 @@ class Parallel_Kpoints
return *std::max_element(nks_pool.begin(), nks_pool.end());
}

private:

public:
int kpar = 0; // number of pools
int my_pool = 0; // the pool index of the present processor
int rank_in_pool = 0; // the rank in the present pool
int nproc = 1; // number of processors
int nspin = 1; // number of spins

private:
std::vector<int> startpro_pool; // the first processor in each pool
#ifdef __MPI
void get_nks_pool(const int& nkstot);
Expand Down
Loading

0 comments on commit 521b5c3

Please sign in to comment.