24#include "fasp_functs.h"
34static inline void invperm (
const INT,
const INT,
const REAL *,
const INT *,
REAL *);
66 const INT nb2 = nb*nb;
67 const INT size = ROW*nb2;
68 const INT *IA = A->
IA;
69 const INT *JA = A->
JA;
80 nthreads = fasp_get_num_threads();
86 INT mybegin,myend,myid;
88#pragma omp parallel for private(myid,mybegin,myend,i,k)
90 for (myid=0; myid<nthreads; myid++) {
92 for (i=mybegin; i<myend; i++) {
93 for (k=IA[i]; k<IA[i+1]; ++k)
95 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
100 for (i = 0; i < ROW; ++i) {
101 for (k = IA[i]; k < IA[i+1]; ++k) {
103 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
111 INT mybegin,myend,myid;
113#pragma omp parallel for private(myid,mybegin,myend,i)
115 for (myid=0; myid<nthreads; myid++) {
117 for (i=mybegin; i<myend; i++) {
123 for (i = 0; i < ROW; ++i) {
130 INT mybegin, myend, myid;
132#pragma omp parallel for private(myid,mybegin,myend,i)
134 for (myid=0; myid<nthreads; myid++) {
136 for (i=mybegin; i<myend; i++) {
137 diaginv[i] = 1.0 / diaginv[i];
142 for (i = 0; i < ROW; ++i) {
144 diaginv[i] = 1.0 / diaginv[i];
173 const INT nb = A->
nb;
174 const INT nb2 = nb*nb;
175 const INT *IA = A->
IA;
176 const INT *JA = A->
JA;
187 nthreads = fasp_get_num_threads();
193 INT mybegin,myend,myid;
195#pragma omp parallel for private(myid,mybegin,myend,i,k)
197 for (myid=0; myid<nthreads; myid++) {
199 for (i=mybegin; i<myend; i++) {
200 for (k=IA[i]; k<IA[i+1]; ++k)
202 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
207 for (i = 0; i < ROW; ++i) {
208 for (k = IA[i]; k < IA[i+1]; ++k) {
210 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
218 INT mybegin,myend,myid;
220#pragma omp parallel for private(myid,mybegin,myend,i)
222 for (myid=0; myid<nthreads; myid++) {
224 for (i=mybegin; i<myend; i++) {
230 for (i = 0; i < ROW; ++i) {
237 INT mybegin, myend, myid;
239#pragma omp parallel for private(myid,mybegin,myend,i)
241 for (myid=0; myid<nthreads; myid++) {
243 for (i=mybegin; i<myend; i++) {
244 diaginv[i] = 1.0 / diaginv[i];
249 for (i = 0; i < ROW; ++i) {
251 diaginv[i] = 1.0 / diaginv[i];
281 const INT nb = A->
nb;
282 const INT nb2 = nb*nb;
283 const INT size = ROW*nb;
284 const INT *IA = A->
IA;
285 const INT *JA = A->
JA;
293 nthreads = fasp_get_num_threads();
310 memcpy(b_tmp, b_val, size*
sizeof(
REAL));
315 INT mybegin, myend, myid;
317#pragma omp parallel for private(myid,mybegin,myend,i,j,k)
319 for (myid=0; myid<nthreads; myid++) {
321 for (i=mybegin; i<myend; i++) {
322 for (k = IA[i]; k < IA[i+1]; ++k) {
325 b_tmp[i] -= val[k]*u_val[j];
330#pragma omp parallel for private(myid,mybegin,myend,i)
332 for (myid=0; myid<nthreads; myid++) {
334 for (i=mybegin; i<myend; i++) {
335 u_val[i] = b_tmp[i]*diaginv[i];
340 for (i = 0; i < ROW; ++i) {
341 for (k = IA[i]; k < IA[i+1]; ++k) {
344 b_tmp[i] -= val[k]*u_val[j];
347 for (i = 0; i < ROW; ++i) {
348 u_val[i] = b_tmp[i]*diaginv[i];
356 INT mybegin, myend, myid;
358#pragma omp parallel for private(myid,mybegin,myend,i,pb,k,j)
360 for (myid=0; myid<nthreads; myid++) {
362 for (i=mybegin; i<myend; i++) {
364 for (k = IA[i]; k < IA[i+1]; ++k) {
372#pragma omp parallel for private(myid,mybegin,myend,i,pb)
374 for (myid=0; myid<nthreads; myid++) {
376 for (i=mybegin; i<myend; i++) {
383 for (i = 0; i < ROW; ++i) {
385 for (k = IA[i]; k < IA[i+1]; ++k) {
392 for (i = 0; i < ROW; ++i) {
401 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
436 const INT nb = A->
nb;
437 const INT nb2 = nb*nb;
438 const INT size = ROW*nb2;
439 const INT *IA = A->
IA;
440 const INT *JA = A->
JA;
451 nthreads = fasp_get_num_threads();
457 INT mybegin,myend,myid;
459#pragma omp parallel for private(myid,mybegin,myend,i,k)
461 for (myid=0; myid<nthreads; myid++) {
463 for (i=mybegin; i<myend; i++) {
464 for (k=IA[i]; k<IA[i+1]; ++k)
466 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
471 for (i = 0; i < ROW; ++i) {
472 for (k = IA[i]; k < IA[i+1]; ++k) {
474 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
482 INT mybegin,myend,myid;
484#pragma omp parallel for private(myid,mybegin,myend,i)
486 for (myid=0; myid<nthreads; myid++) {
488 for (i=mybegin; i<myend; i++) {
494 for (i = 0; i < ROW; ++i) {
501 INT mybegin, myend, myid;
503#pragma omp parallel for private(myid,mybegin,myend,i)
505 for (myid=0; myid<nthreads; myid++) {
507 for (i=mybegin; i<myend; i++) {
508 diaginv[i] = 1.0 / diaginv[i];
513 for (i = 0; i < ROW; ++i) {
515 diaginv[i] = 1.0 / diaginv[i];
589 const INT nb = A->
nb;
590 const INT nb2 = nb*nb;
591 const INT *IA = A->
IA;
592 const INT *JA = A->
JA;
605 for (i = 0; i < ROW; ++i) {
607 for (k = IA[i]; k < IA[i+1]; ++k) {
610 rhs -= val[k]*u_val[j];
612 u_val[i] = rhs*diaginv[i];
618 for (i = 0; i < ROW; ++i) {
620 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
621 for (k = IA[i]; k < IA[i+1]; ++k) {
632 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
661 const INT nb = A->
nb;
662 const INT nb2 = nb*nb;
663 const INT *IA = A->
IA;
664 const INT *JA = A->
JA;
677 for (i = 0; i < ROW; ++i) {
679 for (k = IA[i]; k < IA[i+1]; ++k) {
682 rhs -= val[k]*u_val[j];
690 for (i = 0; i < ROW; ++i) {
692 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
693 for (k = IA[i]; k < IA[i+1]; ++k) {
698 memcpy(u_val+pb, b_tmp, nb*
sizeof(
REAL));
704 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
731 const INT nb = A->
nb;
732 const INT nb2 = nb*nb;
733 const INT *IA = A->
IA;
734 const INT *JA = A->
JA;
747 for (i = ROW-1; i >= 0; i--) {
749 for (k = IA[i]; k < IA[i+1]; ++k) {
752 rhs -= val[k]*u_val[j];
754 u_val[i] = rhs*diaginv[i];
760 for (i = ROW-1; i >= 0; i--) {
762 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
763 for (k = IA[i]; k < IA[i+1]; ++k) {
774 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
804 const INT nb = A->
nb;
805 const INT nb2 = nb*nb;
806 const INT *IA = A->
IA;
807 const INT *JA = A->
JA;
820 for (i = ROW-1; i >= 0; i--) {
822 for (k = IA[i]; k < IA[i+1]; ++k) {
825 rhs -= val[k]*u_val[j];
833 for (i = ROW-1; i >= 0; i--) {
835 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
836 for (k = IA[i]; k < IA[i+1]; ++k) {
841 memcpy(u_val+pb, b_tmp, nb*
sizeof(
REAL));
847 printf(
"### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
876 const INT nb = A->
nb;
877 const INT nb2 = nb*nb;
878 const INT *IA = A->
IA;
879 const INT *JA = A->
JA;
892 for (I = 0; I < ROW; ++I) {
895 for (k = IA[i]; k < IA[i+1]; ++k) {
898 rhs -= val[k]*u_val[j];
900 u_val[i] = rhs*diaginv[i];
906 for (I = 0; I < ROW; ++I) {
909 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
910 for (k = IA[i]; k < IA[i+1]; ++k) {
954 const INT nb = A->
nb;
955 const INT nb2 = nb*nb;
956 const INT *IA = A->
IA;
957 const INT *JA = A->
JA;
972 for (I = 0; I < ROW; ++I) {
975 for (k = IA[i]; k < IA[i+1]; ++k) {
978 rhs -= val[k]*u_val[j];
984 for (I = 0; I < ROW; ++I) {
987 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
988 for (k = IA[i]; k < IA[i+1]; ++k) {
993 memcpy(u_val+pb, b_tmp, nb*
sizeof(
REAL));
1032 const INT nb = A->
nb;
1033 const INT nb2 = nb*nb;
1034 const INT size = ROW*nb2;
1035 const INT *IA = A->
IA;
1036 const INT *JA = A->
JA;
1041 REAL *diaginv = NULL;
1048 nthreads = fasp_get_num_threads();
1057 INT mybegin,myend,myid;
1059#pragma omp parallel for private(myid,mybegin,myend,i,k)
1061 for (myid=0; myid<nthreads; myid++) {
1063 for (i=mybegin; i<myend; i++) {
1064 for (k=IA[i]; k<IA[i+1]; ++k)
1066 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
1071 for (i = 0; i < ROW; ++i) {
1072 for (k = IA[i]; k < IA[i+1]; ++k) {
1074 memcpy(diaginv+i*nb2, val+k*nb2, nb2*
sizeof(
REAL));
1082 INT mybegin,myend,myid;
1084#pragma omp parallel for private(myid,mybegin,myend,i)
1086 for (myid=0; myid<nthreads; myid++) {
1088 for (i=mybegin; i<myend; i++) {
1094 for (i = 0; i < ROW; ++i) {
1101 INT mybegin, myend, myid;
1103#pragma omp parallel for private(myid,mybegin,myend,i)
1105 for (myid=0; myid<nthreads; myid++) {
1107 for (i=mybegin; i<myend; i++) {
1108 diaginv[i] = 1.0 / diaginv[i];
1113 for (i = 0; i < ROW; ++i) {
1115 diaginv[i] = 1.0 / diaginv[i];
1195 const INT nb = A->
nb;
1196 const INT *IA = A->
IA;
1197 const INT *JA = A->
JA;
1205 const INT nb2 = nb*nb;
1209 REAL one_minus_weight = 1.0 - weight;
1213 INT myid, mybegin, myend;
1214 INT nthreads = fasp_get_num_threads();
1220#pragma omp parallel for private(myid, mybegin, myend, i, rhs, k, j)
1221 for (myid = 0; myid < nthreads; myid++) {
1223 for (i = mybegin; i < myend; i++) {
1225 for (k = IA[i]; k < IA[i+1]; ++k) {
1228 rhs -= val[k]*u_val[j];
1230 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1236 for (i = 0; i < ROW; ++i) {
1238 for (k = IA[i]; k < IA[i+1]; ++k) {
1241 rhs -= val[k]*u_val[j];
1243 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1253#pragma omp parallel for private(myid, mybegin, myend, i, pb, k, j)
1254 for (myid = 0; myid < nthreads; myid++) {
1256 for (i = mybegin; i < myend; i++) {
1258 memcpy(b_tmp+myid*nb, b_val+pb, nb*
sizeof(
REAL));
1259 for (k = IA[i]; k < IA[i+1]; ++k) {
1272 for (i = 0; i < ROW; ++i) {
1274 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
1275 for (k = IA[i]; k < IA[i+1]; ++k) {
1318 const INT nb = A->
nb;
1319 const INT nb2 = nb*nb;
1320 const INT *IA = A->
IA;
1321 const INT *JA = A->
JA;
1323 const REAL one_minus_weight = 1.0 - weight;
1336 INT myid, mybegin, myend;
1337 INT nthreads = fasp_get_num_threads();
1343#pragma omp parallel for private(myid, mybegin, myend, i, rhs, k, j)
1344 for (myid = 0; myid < nthreads; myid++) {
1346 mybegin = ROW-1-mybegin; myend = ROW-1-myend;
1347 for (i = mybegin; i > myend; i--) {
1349 for (k = IA[i]; k < IA[i+1]; ++k) {
1352 rhs -= val[k]*u_val[j];
1354 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1360 for (i = ROW-1; i >= 0; i--) {
1362 for (k = IA[i]; k < IA[i+1]; ++k) {
1365 rhs -= val[k]*u_val[j];
1367 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1377#pragma omp parallel for private(myid, mybegin, myend, i, pb, k, j)
1378 for (myid = 0; myid < nthreads; myid++) {
1380 mybegin = ROW-1-mybegin; myend = ROW-1-myend;
1381 for (i = mybegin; i > myend; i--) {
1383 memcpy(b_tmp+myid*nb, b_val+pb, nb*
sizeof(
REAL));
1384 for (k = IA[i]; k < IA[i+1]; ++k) {
1390 one_minus_weight, u_val+pb, nb);
1398 for (i = ROW-1; i >= 0; i--) {
1400 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
1401 for (k = IA[i]; k < IA[i+1]; ++k) {
1447 const INT nb = A->
nb;
1448 const INT nb2 = nb*nb;
1449 const INT *IA = A->
IA;
1450 const INT *JA = A->
JA;
1452 const REAL one_minus_weight = 1.0 - weight;
1465 INT myid, mybegin, myend;
1466 INT nthreads = fasp_get_num_threads();
1472#pragma omp parallel for private(myid, mybegin, myend, I, i, rhs, k, j)
1473 for (myid = 0; myid < nthreads; myid++) {
1475 for (I = mybegin; I < myend; ++I) {
1478 for (k = IA[i]; k < IA[i+1]; ++k) {
1481 rhs -= val[k]*u_val[j];
1483 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1489 for (I = 0; I < ROW; ++I) {
1492 for (k = IA[i]; k < IA[i+1]; ++k) {
1495 rhs -= val[k]*u_val[j];
1497 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1507#pragma omp parallel for private(myid, mybegin, myend, I, i, pb, k, j)
1508 for (myid = 0; myid < nthreads; myid++) {
1510 for (I = mybegin; I < myend; ++I) {
1513 memcpy(b_tmp+myid*nb, b_val+pb, nb*
sizeof(
REAL));
1514 for (k = IA[i]; k < IA[i+1]; ++k) {
1520 one_minus_weight, u_val+pb, nb);
1528 for (I = 0; I < ROW; ++I) {
1531 memcpy(b_tmp, b_val+pb, nb*
sizeof(
REAL));
1532 for (k = IA[i]; k < IA[i+1]; ++k) {
1572 const INT nb=iludata->
nb,m=A->
ROW*nb, memneed=5*m;
1580 if (iludata->
nwork<memneed)
goto MEMERR;
1591 perm(A->
ROW, A->
nb, zr, iludata->
jlevL, tzr);
1597 invperm(A->
ROW, A->
nb, tz, iludata->
jlevL, z);
1623 printf(
"### ERROR: ILU needs %d memory, only %d available! [%s:%d]\n",
1624 memneed, iludata->
nwork, __FILE__, __LINE__);
1651static inline void perm (
const INT n,
1657 INT i, j, indx, indy;
1660#pragma omp parallel for private(i, j, indx, indy)
1662 for (i=0; i<n; ++i) {
1665 for (j=0; j<nb; ++j) {
1666 y[indy+j] = x[indx+j];
1686static inline void invperm (
const INT n,
1692 INT i, j, indx, indy;
1695#pragma omp parallel for private(i, j, indx, indy)
1697 for (i=0; i<n; ++i) {
1700 for (j=0; j<nb; ++j) {
1701 y[indy+j] = x[indx+j];
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_gettime(REAL *time)
Get system time.
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
SHORT fasp_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
void fasp_blas_smat_aAxpby(const REAL alpha, const REAL *A, const REAL *x, const REAL beta, REAL *y, const INT n)
Compute y:=alpha*A*x + beta*y
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
void fasp_blas_smat_ymAx(const REAL *A, const REAL *x, REAL *y, const INT n)
Compute y := y - Ax, where 'A' is a n*n dense matrix.
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
void fasp_smoother_dbsr_gs_descend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel relaxation in the descending order.
void fasp_smoother_dbsr_gs1(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL *diaginv)
Gauss-Seidel relaxation.
void fasp_smoother_dbsr_gs_order2(dBSRmat *A, dvector *b, dvector *u, INT *mark, REAL *work)
Gauss-Seidel relaxation in the user-defined order.
void fasp_smoother_dbsr_gs_descend1(dBSRmat *A, dvector *b, dvector *u)
Gauss-Seidel relaxation in the descending order.
void fasp_smoother_dbsr_jacobi1(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Jacobi relaxation.
void fasp_smoother_dbsr_gs_ascend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel relaxation in the ascending order.
void fasp_smoother_dbsr_gs_ascend1(dBSRmat *A, dvector *b, dvector *u)
Gauss-Seidel relaxation in the ascending order.
void fasp_smoother_dbsr_sor_ascend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR relaxation in the ascending order.
void fasp_smoother_dbsr_ilu(dBSRmat *A, dvector *b, dvector *x, void *data)
ILU method as the smoother in solving Au=b with multigrid method.
void fasp_smoother_dbsr_jacobi(dBSRmat *A, dvector *b, dvector *u)
Jacobi relaxation.
void fasp_smoother_dbsr_gs(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark)
Gauss-Seidel relaxation.
void fasp_smoother_dbsr_sor1(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL *diaginv, REAL weight)
SOR relaxation.
void fasp_smoother_dbsr_sor_order(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark, REAL weight)
SOR relaxation in the user-defined order.
void fasp_smoother_dbsr_sor_descend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR relaxation in the descending order.
void fasp_smoother_dbsr_jacobi_setup(dBSRmat *A, REAL *diaginv)
Setup for jacobi relaxation, fetch the diagonal sub-block matrixes and make them inverse first.
void fasp_smoother_dbsr_gs_order1(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark)
Gauss-Seidel relaxation in the user-defined order.
void fasp_smoother_dbsr_sor(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL weight)
SOR relaxation.
void fasp_precond_dbsr_ilu_mc_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on graph coloring.
void fasp_precond_dbsr_ilu_ls_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on level schedule strategy.
void fasp_precond_dbsr_ilu(REAL *r, REAL *z, void *data)
ILU preconditioner.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define TRUE
Definition of logic type.
INT nb
block size for BSR type only
INT * jlevL
mapping from row to color for lower triangle
Block sparse row storage matrix of REAL type.
INT nb
dimension of each sub-block
INT * IA
integer array of row pointers, the size is ROW+1
INT ROW
number of rows of sub-blocks in matrix A, M
Vector with n entries of REAL type.
REAL * val
actual vector entries