Skip to content

Commit 521b5c3

Browse files
authored
Merge branch 'develop' into blas_update_3
2 parents 91e8dc2 + b809ce6 commit 521b5c3

File tree

135 files changed

+1560
-1385
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+1560
-1385
lines changed

docs/advanced/scf/hsolver.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
Method of explicit solving KS-equation can be chosen by variable "ks_solver" in INPUT file.
66

7-
When "basis_type = pw", `ks_solver` can be `cg`, `bpcg` or `dav`. The `bpcg` method only supports K-point parallelism currently. The default setting `cg` is recommended, which is band-by-band conjugate gradient diagonalization method. There is a large probability that the use of setting of `dav` , which is block Davidson diagonalization method, can be tried to improve performance.
7+
When "basis_type = pw", `ks_solver` can be `cg`, `bpcg` or `dav`. The default setting `cg` is recommended, which is band-by-band conjugate gradient diagonalization method. There is a large probability that the use of setting of `dav` , which is block Davidson diagonalization method, can be tried to improve performance.
88

99
When "basis_type = lcao", `ks_solver` can be `genelpa` or `scalapack_gvx`. The default setting `genelpa` is recommended, which is based on ELPA (EIGENVALUE SOLVERS FOR PETAFLOP APPLICATIONS) (https://elpa.mpcdf.mpg.de/) and the kernel is auto choosed by GENELPA(https://github.com/pplab/GenELPA), usually faster than the setting of "scalapack_gvx", which is based on ScaLAPACK(Scalable Linear Algebra PACKage)
1010

source/module_basis/module_nao/atomic_radials.cpp

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -221,51 +221,55 @@ void AtomicRadials::read_abacus_orb(std::ifstream& ifs, std::ofstream* ptr_log,
221221
is_read[i] = false;
222222
}
223223

224-
int l = 0;
225-
int izeta = 0;
226-
for (int i = 0; i != nchi_; ++i)
224+
for (int l = 0; l <= lmax_; ++l)
227225
{
228-
if (rank == 0)
226+
for (int izeta = 0; izeta < nzeta_[l]; ++izeta)
229227
{
230-
/*
231-
* read the orbital information, including
232-
*
233-
* 1. angular momentum
234-
* 2. zeta number
235-
* 3. values on the grid
236-
* */
237-
// ifs >> tmp >> tmp >> tmp; // skip "Type" "L" "N"
238-
ifs >> tmp >> tmp >> tmp;
239-
#ifdef __DEBUG
240-
assert(tmp == "N");
241-
#endif
242-
243-
ifs >> tmp >> l >> izeta;
244-
#ifdef __DEBUG
245-
assert(l >= 0 && l <= lmax_);
246-
assert(izeta >= 0 && izeta < nzeta_[l]);
247-
#endif
248-
249-
for (int ir = 0; ir != ngrid; ++ir)
228+
if (rank == 0)
250229
{
251-
ifs >> rvalue[ir];
230+
/*
231+
* read the orbital information, including
232+
*
233+
* 1. angular momentum
234+
* 2. zeta number
235+
* 3. values on the grid
236+
* */
237+
while (ifs.good())
238+
{
239+
while (ifs >> tmp)
240+
{
241+
if (tmp == "N")
242+
{
243+
break;
244+
}
245+
}
246+
int read_l, read_izeta;
247+
ifs >> tmp >> read_l >> read_izeta;
248+
if (l == read_l && izeta == read_izeta)
249+
{
250+
break;
251+
}
252+
}
253+
254+
for (int ir = 0; ir != ngrid; ++ir)
255+
{
256+
ifs >> rvalue[ir];
257+
}
252258
}
253-
}
254259

255260
#ifdef __MPI
256-
Parallel_Common::bcast_int(l);
257-
Parallel_Common::bcast_int(izeta);
258-
Parallel_Common::bcast_double(rvalue, ngrid);
261+
Parallel_Common::bcast_double(rvalue, ngrid);
259262
#endif
260263
#ifdef __DEBUG
261-
assert(index(l, izeta) >= 0 && index(l, izeta) < nchi_);
262-
assert(!is_read[index(l, izeta)]);
264+
assert(index(l, izeta) >= 0 && index(l, izeta) < nchi_);
265+
assert(!is_read[index(l, izeta)]);
263266
#endif
264-
is_read[index(l, izeta)] = true;
267+
is_read[index(l, izeta)] = true;
265268

266-
// skip the initialization of sbt_ in this stage
267-
chi_[index(l, izeta)].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false);
268-
chi_[index(l, izeta)].normalize();
269+
// skip the initialization of sbt_ in this stage
270+
chi_[index(l, izeta)].build(l, true, ngrid, rgrid, rvalue, 0, izeta, symbol_, itype_, false);
271+
chi_[index(l, izeta)].normalize();
272+
}
269273
}
270274

271275
delete[] is_read;

source/module_cell/klist.cpp

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,33 @@
1313
#include "module_cell/module_paw/paw_cell.h"
1414
#endif
1515

16-
K_Vectors::K_Vectors()
16+
void K_Vectors::cal_ik_global()
1717
{
18-
19-
nspin = 0; // default spin.
20-
kc_done = false;
21-
kd_done = false;
22-
nkstot_full = 0;
23-
nks = 0;
24-
nkstot = 0;
25-
k_nkstot = 0; // LiuXh add 20180619
26-
}
27-
28-
K_Vectors::~K_Vectors()
29-
{
30-
}
31-
32-
int K_Vectors::get_ik_global(const int& ik, const int& nkstot)
33-
{
34-
int nkp = nkstot / PARAM.inp.kpar;
35-
int rem = nkstot % PARAM.inp.kpar;
36-
if (GlobalV::MY_POOL < rem)
18+
const int my_pool = this->para_k.my_pool;
19+
this->ik2iktot.resize(this->nks);
20+
#ifdef __MPI
21+
if(this->nspin == 2)
3722
{
38-
return GlobalV::MY_POOL * nkp + GlobalV::MY_POOL + ik;
23+
for (int ik = 0; ik < this->nks / 2; ++ik)
24+
{
25+
this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik;
26+
this->ik2iktot[ik + this->nks / 2] = this->nkstot / 2 + this->para_k.startk_pool[my_pool] + ik;
27+
}
3928
}
4029
else
4130
{
42-
return GlobalV::MY_POOL * nkp + rem + ik;
31+
for (int ik = 0; ik < this->nks; ++ik)
32+
{
33+
this->ik2iktot[ik] = this->para_k.startk_pool[my_pool] + ik;
34+
}
4335
}
36+
#else
37+
for (int ik = 0; ik < this->nks; ++ik)
38+
{
39+
this->ik2iktot[ik] = ik;
40+
}
41+
#endif
42+
4443
}
4544

4645
void K_Vectors::set(const UnitCell& ucell,
@@ -148,12 +147,12 @@ void K_Vectors::set(const UnitCell& ucell,
148147
// It's very important in parallel case,
149148
// firstly do the mpi_k() and then
150149
// do set_kup_and_kdw()
151-
GlobalC::Pkpoints.kinfo(nkstot,
152-
GlobalV::KPAR,
153-
GlobalV::MY_POOL,
154-
GlobalV::RANK_IN_POOL,
155-
GlobalV::NPROC,
156-
nspin_in); // assign k points to several process pools
150+
this->para_k.kinfo(nkstot,
151+
GlobalV::KPAR,
152+
GlobalV::MY_POOL,
153+
GlobalV::RANK_IN_POOL,
154+
GlobalV::NPROC,
155+
nspin_in); // assign k points to several process pools
157156
#ifdef __MPI
158157
// distribute K point data to the corresponding process
159158
this->mpi_k(); // 2008-4-29
@@ -162,6 +161,9 @@ void K_Vectors::set(const UnitCell& ucell,
162161
// set the k vectors for the up and down spin
163162
this->set_kup_and_kdw();
164163

164+
// get ik2iktot
165+
this->cal_ik_global();
166+
165167
this->print_klists(ofs);
166168

167169
// std::cout << " NUMBER OF K-POINTS : " << nkstot << std::endl;
@@ -1163,7 +1165,7 @@ void K_Vectors::mpi_k()
11631165

11641166
Parallel_Common::bcast_double(koffset, 3);
11651167

1166-
this->nks = GlobalC::Pkpoints.nks_pool[GlobalV::MY_POOL];
1168+
this->nks = this->para_k.nks_pool[GlobalV::MY_POOL];
11671169

11681170
GlobalV::ofs_running << std::endl;
11691171
ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "k-point number in this process", nks);
@@ -1217,7 +1219,7 @@ void K_Vectors::mpi_k()
12171219
for (int i = 0; i < nks; i++)
12181220
{
12191221
// 3 is because each k point has three value:kx, ky, kz
1220-
k_index = i + GlobalC::Pkpoints.startk_pool[GlobalV::MY_POOL];
1222+
k_index = i + this->para_k.startk_pool[GlobalV::MY_POOL];
12211223
kvec_c[i].x = kvec_c_aux[k_index * 3];
12221224
kvec_c[i].y = kvec_c_aux[k_index * 3 + 1];
12231225
kvec_c[i].z = kvec_c_aux[k_index * 3 + 2];

source/module_cell/klist.h

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "module_base/global_variable.h"
66
#include "module_base/matrix3.h"
77
#include "module_cell/unitcell.h"
8-
8+
#include "parallel_kpoints.h"
99
#include <vector>
1010

1111
class K_Vectors
@@ -26,11 +26,14 @@ class K_Vectors
2626
/// dim: [iks_ibz][(isym, kvec_d)]
2727
std::vector<std::map<int, ModuleBase::Vector3<double>>> kstars;
2828

29-
K_Vectors();
30-
~K_Vectors();
29+
K_Vectors(){};
30+
~K_Vectors(){};
3131
K_Vectors& operator=(const K_Vectors&) = default;
3232
K_Vectors& operator=(K_Vectors&& rhs) = default;
3333

34+
Parallel_Kpoints para_k; ///< parallel for kpoints
35+
36+
3437
/**
3538
* @brief Set up the k-points for the system.
3639
*
@@ -103,23 +106,6 @@ class K_Vectors
103106
*/
104107
void set_after_vc(const int& nspin, const ModuleBase::Matrix3& reciprocal_vec, const ModuleBase::Matrix3& latvec);
105108

106-
/**
107-
* @brief Gets the global index of a k-point.
108-
*
109-
* This function gets the global index of a k-point based on its local index and the process pool ID.
110-
* The global index is used when the k-points are distributed among multiple process pools.
111-
*
112-
* @param nkstot The total number of k-points.
113-
* @param ik The local index of the k-point.
114-
*
115-
* @return int Returns the global index of the k-point.
116-
*
117-
* @note The function calculates the global index by dividing the total number of k-points (nkstot) by the number of
118-
* process pools (KPAR), and adding the remainder if the process pool ID (MY_POOL) is less than the remainder.
119-
* @note The function is declared as inline for efficiency.
120-
*/
121-
static int get_ik_global(const int& ik, const int& nkstot);
122-
123109
int get_nks() const
124110
{
125111
return this->nks;
@@ -154,19 +140,20 @@ class K_Vectors
154140
{
155141
this->nkstot_full = value;
156142
}
143+
std::vector<int> ik2iktot; ///<[nks] map ik to the global index of k points
157144

158-
private:
159-
int nks; // number of symmetry-reduced k points in this pool(processor, up+dw)
160-
int nkstot; /// number of symmetry-reduced k points in full k mesh
161-
int nkstot_full; /// number of k points before symmetry reduction in full k mesh
145+
private:
146+
int nks = 0; ///< number of symmetry-reduced k points in this pool(processor, up+dw)
147+
int nkstot = 0; ///< number of symmetry-reduced k points in full k mesh
148+
int nkstot_full = 0; ///< number of k points before symmetry reduction in full k mesh
162149

163-
int nspin;
164-
bool kc_done;
165-
bool kd_done;
166-
double koffset[3]={0.0}; // used only in automatic k-points.
167-
std::string k_kword; // LiuXh add 20180619
168-
int k_nkstot; // LiuXh add 20180619
169-
bool is_mp = false; // Monkhorst-Pack
150+
int nspin = 0;
151+
bool kc_done = false;
152+
bool kd_done = false;
153+
double koffset[3] = {0.0}; // used only in automatic k-points.
154+
std::string k_kword; // LiuXh add 20180619
155+
int k_nkstot = 0; // LiuXh add 20180619
156+
bool is_mp = false; // Monkhorst-Pack
170157

171158
/**
172159
* @brief Resize the k-point related vectors according to the new k-point number.
@@ -285,8 +272,8 @@ class K_Vectors
285272
* be recalculated.
286273
*/
287274
void update_use_ibz(const int& nkstot_ibz,
288-
const std::vector<ModuleBase::Vector3<double>>& kvec_d_ibz,
289-
const std::vector<double>& wk_ibz);
275+
const std::vector<ModuleBase::Vector3<double>>& kvec_d_ibz,
276+
const std::vector<double>& wk_ibz);
290277

291278
/**
292279
* @brief Sets both the direct and Cartesian k-vectors.
@@ -391,5 +378,11 @@ class K_Vectors
391378
* @note The function uses the FmtCore::format function to format the output.
392379
*/
393380
void print_klists(std::ofstream& fn);
381+
382+
/**
383+
* @brief Gets the global index of a k-point.
384+
* @return this->ik2iktot[ik]
385+
*/
386+
void cal_ik_global();
394387
};
395388
#endif // KVECT_H

source/module_cell/parallel_kpoints.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,6 @@
33
#include "module_base/parallel_common.h"
44
#include "module_base/parallel_global.h"
55

6-
Parallel_Kpoints::Parallel_Kpoints()
7-
{
8-
}
9-
10-
Parallel_Kpoints::~Parallel_Kpoints()
11-
{
12-
}
13-
146
// the kpoints here are reduced after symmetry applied.
157
void Parallel_Kpoints::kinfo(int& nkstot_in,
168
const int& kpar_in,
@@ -227,7 +219,7 @@ void Parallel_Kpoints::pool_collection(double* value_re,
227219
return;
228220
}
229221

230-
void Parallel_Kpoints::pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik)
222+
void Parallel_Kpoints::pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik) const
231223
{
232224
const int dim2 = w.getBound2();
233225
const int dim3 = w.getBound3();
@@ -237,7 +229,7 @@ void Parallel_Kpoints::pool_collection(std::complex<double>* value, const Module
237229
}
238230

239231
template <class T, class V>
240-
void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, const int& ik)
232+
void Parallel_Kpoints::pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const
241233
{
242234
#ifdef __MPI
243235
const int ik_now = ik - this->startk_pool[this->my_pool];

source/module_cell/parallel_kpoints.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
class Parallel_Kpoints
1010
{
1111
public:
12-
Parallel_Kpoints();
13-
~Parallel_Kpoints();
12+
Parallel_Kpoints(){};
13+
~Parallel_Kpoints(){};
1414

1515
void kinfo(int& nkstot_in,
1616
const int& kpar_in,
@@ -28,9 +28,9 @@ class Parallel_Kpoints
2828
const ModuleBase::realArray& a,
2929
const ModuleBase::realArray& b,
3030
const int& ik);
31-
void pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik);
31+
void pool_collection(std::complex<double>* value, const ModuleBase::ComplexArray& w, const int& ik) const;
3232
template <class T, class V>
33-
void pool_collection_aux(T* value, const V& w, const int& dim, const int& ik);
33+
void pool_collection_aux(T* value, const V& w, const int& dim, const int& ik) const;
3434
#ifdef __MPI
3535
/**
3636
* @brief gather kpoints from all processors
@@ -46,8 +46,8 @@ class Parallel_Kpoints
4646
// int* nproc_pool = nullptr; it is not used
4747

4848
// inforamation about kpoints, dim: KPAR
49-
std::vector<int> nks_pool; // number of k-points in each pool
50-
std::vector<int> startk_pool; // the first k-point in each pool
49+
std::vector<int> nks_pool; // number of k-points in each pool, here use k-points without spin
50+
std::vector<int> startk_pool; // the first k-point in each pool, here use k-points without spin
5151

5252
// information about which pool each k-point belongs to,
5353
std::vector<int> whichpool; // whichpool[k] : the pool which k belongs to, dim: nkstot_np
@@ -68,14 +68,13 @@ class Parallel_Kpoints
6868
return *std::max_element(nks_pool.begin(), nks_pool.end());
6969
}
7070

71-
private:
72-
71+
public:
7372
int kpar = 0; // number of pools
7473
int my_pool = 0; // the pool index of the present processor
7574
int rank_in_pool = 0; // the rank in the present pool
7675
int nproc = 1; // number of processors
7776
int nspin = 1; // number of spins
78-
77+
private:
7978
std::vector<int> startpro_pool; // the first processor in each pool
8079
#ifdef __MPI
8180
void get_nks_pool(const int& nkstot);

0 commit comments

Comments
 (0)