diff --git a/src/rcb.c b/src/rcb.c index 5defa8c5..802c5ced 100644 --- a/src/rcb.c +++ b/src/rcb.c @@ -143,9 +143,7 @@ int rcb(struct array *elements, size_t unit_size, int ndim, struct comm *ci, struct comm c, t; comm_dup(&c, ci); - int size = c.np; - int rank = c.id; - + int size = c.np, rank = c.id; while (size > 1) { rcb_level(elements, unit_size, ndim, &c, bfr); diff --git a/src/rib.c b/src/rib.c index 1aac9031..889848b6 100644 --- a/src/rib.c +++ b/src/rib.c @@ -2,18 +2,14 @@ #include "sort.h" extern int power_serial(double *y, int N, double *A, int verbose); +extern int inv_power_serial(double *y, uint N, double *A, int verbose); -static void get_rib_axis(char *elems, uint nel, size_t unit_size, int ndim, +static void get_rib_proj(char *elems, uint nel, size_t unit_size, int ndim, struct comm *c) { - double avg[3]; - avg[0] = avg[1] = avg[2] = 0.0; - struct rcb_element *elem; - uint i; - for (i = 0; i < nel; i++) { - elem = (struct rcb_element *)(elems + i * unit_size); - avg[0] += elem->coord[0]; - avg[1] += elem->coord[1]; - avg[2] += elem->coord[2]; + double avg[3] = {0, 0, 0}; + for (uint i = 0; i < nel; i++) { + struct rcb_element *ei = (struct rcb_element *)(elems + i * unit_size); + avg[0] += ei->coord[0], avg[1] += ei->coord[1], avg[2] += ei->coord[2]; } slong nelg = nel; @@ -23,37 +19,44 @@ static void get_rib_axis(char *elems, uint nel, size_t unit_size, int ndim, comm_allreduce(c, gs_long, gs_add, &nelg, 1, buf); } - avg[0] /= nelg; - avg[1] /= nelg; - avg[2] /= nelg; + avg[0] /= nelg, avg[1] /= nelg, avg[2] /= nelg; - double I[3][3]; - for (i = 0; i < 3; i++) - I[i][0] = I[i][1] = I[i][2] = 0.0; + double I[9]; + for (unsigned i = 0; i < 9; i++) + I[i] = 0; double x, y, z; - for (i = 0; i < nel; i++) { - elem = (struct rcb_element *)(elems + i * unit_size); - x = elem->coord[0] - avg[0]; - y = elem->coord[1] - avg[1]; - z = elem->coord[2] - avg[2]; - I[0][0] += x * x, I[0][1] += x * y, I[0][2] += x * z; - I[1][0] += y * x, I[1][1] += y * y, I[1][2] += y * z; - I[2][0] += z * x, I[2][1] += z * y, I[2][2] += z * z; + for (uint i = 0; i < nel; i++) { + struct rcb_element *ei = (struct rcb_element *)(elems + i * unit_size); + x = ei->coord[0] - avg[0]; + y = ei->coord[1] - avg[1]; + z = ei->coord[2] - avg[2]; + I[0] += x * x, I[1] += x * y, I[2] += x * z; + I[3] += y * x, I[4] += y * y, I[5] += y * z; + I[6] += z * x, I[7] += z * y, I[8] += z * z; } if (c != NULL) comm_allreduce(c, gs_double, gs_add, I, 9, buf); - double ev[3]; // ev[2] = 0 if 2D - power_serial(ev, ndim, (double *)I, 0); // FIXME: 2D does not work + // FIXME: 2D does not work + double ev[3]; + power_serial(ev, ndim, (double *)I, 0); - for (i = 0; i < nel; i++) { - elem = (struct rcb_element *)(elems + i * unit_size); - x = elem->coord[0] - avg[0]; - y = elem->coord[1] - avg[1]; - z = elem->coord[2] - avg[2]; - elem->fiedler = x * ev[0] + y * ev[1] + z * ev[2]; + double norm = 0; + for (unsigned i = 0; i < ndim; i++) + norm += ev[i] * ev[i]; + norm = sqrt(norm); + + for (unsigned i = 0; i < ndim; i++) + ev[i] /= norm; + + for (uint i = 0; i < nel; i++) { + struct rcb_element *ei = (struct rcb_element *)(elems + i * unit_size); + x = ei->coord[0] - avg[0]; + y = ei->coord[1] - avg[1]; + z = ei->coord[2] - avg[2]; + ei->fiedler = x * ev[0] + y * ev[1] + z * ev[2]; } } @@ -64,7 +67,7 @@ void rib_local(struct array *a, size_t unit_size, uint start, uint end, return; char *st = (char *)a->ptr + unit_size * start; - get_rib_axis(st, size, unit_size, ndim, NULL); + get_rib_proj(st, size, unit_size, ndim, NULL); if (unit_size == sizeof(struct rcb_element)) sarray_sort(struct rcb_element, st, size, fiedler, 3, buf); @@ -81,7 +84,7 @@ static int rib_level(struct array *a, size_t unit_size, int ndim, if (c->np == 1) return 0; - get_rib_axis((char *)a->ptr, a->n, unit_size, ndim, c); + get_rib_proj((char *)a->ptr, a->n, unit_size, ndim, c); if (unit_size == sizeof(struct rcb_element)) parallel_sort(struct rcb_element, a, fiedler, gs_double, 0, 1, c, bfr); @@ -93,26 +96,23 @@ static int rib_level(struct array *a, size_t unit_size, int ndim, int rib(struct array *elements, size_t unit_size, int ndim, struct comm *ci, buffer *bfr) { - struct comm c; + struct comm c, t; comm_dup(&c, ci); - int size = c.np; - int rank = c.id; - + int size = c.np, rank = c.id; while (size > 1) { rib_level(elements, unit_size, ndim, &c, bfr); - int p = (size + 1) / 2; - int bin = (rank >= p); + int bin = 1; + if (rank < (size + 1) / 2) + bin = 0; - MPI_Comm comm_rib; - MPI_Comm_split(c.c, bin, rank, &comm_rib); + comm_split(&c, bin, rank, &t); comm_free(&c); - comm_init(&c, comm_rib); - MPI_Comm_free(&comm_rib); + comm_dup(&c, &t); + comm_free(&t); - size = c.np; - rank = c.id; + size = c.np, rank = c.id; } comm_free(&c); diff --git a/src/rsb.c b/src/rsb.c index ea6bce8a..2ec28d9f 100644 --- a/src/rsb.c +++ b/src/rsb.c @@ -10,6 +10,8 @@ extern int rsb(struct array *elements, int nv, int check, parrsb_options *options, struct comm *gc, buffer *bfr); extern int rcb(struct array *elements, size_t unit_size, int ndim, struct comm *ci, buffer *bfr); +extern int rib(struct array *elements, size_t unit_size, int ndim, + struct comm *ci, buffer *bfr); parrsb_options parrsb_default_options = { // General options @@ -56,7 +58,7 @@ static void update_options(parrsb_options *options) { UPDATE_OPTION(rsb_mg_grammian, "PARRSB_RSB_MG_GRAMMIAN", 1); UPDATE_OPTION(rsb_mg_factor, "PARRSB_RSB_MG_FACTOR", 1); UPDATE_OPTION(rsb_mg_sagg, "PARRSB_RSB_MG_SMOOTH_AGGREGATION", 1); - if (options->verbose_level == 0) + if (options->verbose_level == 0) options->profile_level = 0; } @@ -99,11 +101,11 @@ static size_t load_balance(struct array *elist, uint nel, int nv, double *coord, unit_size = sizeof(struct rsb_element); array_init_(elist, nel, unit_size, __FILE__, __LINE__); + elist->n = 0; + int ndim = (nv == 8) ? 3 : 2; struct rcb_element *pe = (struct rcb_element *)calloc(1, unit_size); pe->origin = c->id; - - int ndim = (nv == 8) ? 3 : 2; for (uint e = 0; e < nel; ++e) { slong eg = pe->globalId = start + e + 1; if (eg <= lower) @@ -200,10 +202,12 @@ int parrsb_part_mesh(int *part, int *seq, long long *vtx, double *coord, slong nelg = out[1][0]; if (ca.np > nelg) { - if (ca.id == 0) - printf("Total number of elements is smaller than the " - "number of processors.\n" - "Run with smaller number of processors.\n"); + if (ca.id == 0) { + fprintf(stderr, "Total number of elements is smaller than the " + "number of processors.\n" + "Run with smaller number of processors.\n"); + fflush(stderr); + } return 1; }