22#include "fasp_functs.h"
59 const INT N =
ABS(i_n - i_1) + 1;
60 const INT *ia = A->
IA, *ja = A->
JA;
65 INT i,j,k,begin_row,end_row;
69 INT myid, mybegin, myend;
70 INT nthreads = fasp_get_num_threads();
81#pragma omp parallel for private(myid, mybegin, myend, begin_row, end_row, i, k, j)
82 for (myid=0; myid<nthreads; ++myid) {
84 mybegin += i_1; myend += i_1;
85 for (i=mybegin; i<myend; i+=s) {
87 begin_row=ia[i],end_row=ia[i+1];
88 for (k=begin_row; k<end_row; ++k) {
90 if (i!=j) t[i]-=aval[k]*uval[j];
98 for (i=i_1;i<=i_n;i+=s) {
100 begin_row=ia[i]; end_row=ia[i+1];
101 for (k=begin_row;k<end_row;++k) {
103 if (i!=j) t[i]-=aval[k]*uval[j];
112#pragma omp parallel for private (i)
114 for (i=i_1;i<=i_n;i+=s) {
115 if (
ABS(d[i])>
SMALLREAL) uval[i]=(1-w)*uval[i]+ w*t[i]/d[i];
124#pragma omp parallel for private(myid, mybegin, myend, i, begin_row, end_row, k, j)
125 for (myid=0; myid<nthreads; myid++) {
127 mybegin = i_1-mybegin; myend = i_1-myend;
128 for (i=mybegin; i>myend; i+=s) {
130 begin_row=ia[i],end_row=ia[i+1];
131 for (k=begin_row; k<end_row; ++k) {
133 if (i!=j) t[i]-=aval[k]*uval[j];
141 for (i=i_1;i>=i_n;i+=s) {
143 begin_row=ia[i]; end_row=ia[i+1];
144 for (k=begin_row;k<end_row;++k) {
146 if (i!=j) t[i]-=aval[k]*uval[j];
155#pragma omp parallel for private(i)
157 for (i=i_1;i>=i_n;i+=s) {
158 if (
ABS(d[i])>
SMALLREAL) uval[i]=(1-w)*uval[i]+ w*t[i]/d[i];
198 const INT *ia = A->
IA, *ja = A->
JA;
203 INT i,j,k,begin_row,end_row;
207 const INT N =
ABS(i_n - i_1)+1;
208 INT myid, mybegin, myend;
209 INT nthreads = fasp_get_num_threads();
217#pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, d, k, j)
218 for (myid=0; myid<nthreads; myid++) {
220 mybegin += i_1, myend += i_1;
221 for (i=mybegin; i<myend; i+=s) {
223 begin_row=ia[i],end_row=ia[i+1];
227 for (k=begin_row+1;k<end_row;++k) {
234 for (k=begin_row;k<end_row;++k) {
249 for (i=i_1;i<=i_n;i+=s) {
251 begin_row=ia[i]; end_row=ia[i+1];
256 for (k=begin_row+1;k<end_row;++k) {
263 for (k=begin_row;k<end_row;++k) {
283#pragma omp parallel for private(myid, mybegin, myend, i, begin_row, end_row, d, k, j, t)
284 for (myid=0; myid<nthreads; myid++) {
286 mybegin = i_1 - mybegin; myend = i_1 - myend;
287 for (i=mybegin; i>myend; i+=s) {
289 begin_row=ia[i],end_row=ia[i+1];
293 for (k=begin_row+1;k<end_row;++k) {
300 for (k=begin_row;k<end_row;++k) {
313 for (i=i_1;i>=i_n;i+=s) {
315 begin_row=ia[i]; end_row=ia[i+1];
319 for (k=begin_row+1;k<end_row;++k) {
326 for (k=begin_row;k<end_row;++k) {
371 const INT *ia = A->
IA, *ja = A->
JA;
375 INT i,j,k,begin_row,end_row;
379 INT myid,mybegin,myend;
380 INT nthreads = fasp_get_num_threads();
390#pragma omp parallel for private(myid, mybegin, myend, i,t,begin_row,end_row,k,j,d)
391 for (myid = 0; myid < nthreads; myid ++) {
393 for (i=mybegin; i<myend; i++) {
396 begin_row = ia[i], end_row = ia[i+1];
399 for (k = begin_row+1; k < end_row; k ++) {
401 t -= aval[k]*uval[j];
404 for (k = begin_row; k < end_row; k ++) {
406 if (i!=j) t -= aval[k]*uval[j];
417 for (i = 0; i < nrow; i ++) {
420 begin_row = ia[i]; end_row = ia[i+1];
423 for (k = begin_row+1; k < end_row; k ++) {
425 t -= aval[k]*uval[j];
428 for (k = begin_row; k < end_row; k ++) {
430 if (i!=j) t -= aval[k]*uval[j];
443#pragma omp parallel for private(myid,mybegin,myend,i,t,begin_row,end_row,k,j,d)
444 for (myid = 0; myid < nthreads; myid ++) {
446 for (i=mybegin; i<myend; i++) {
449 begin_row = ia[i], end_row = ia[i+1];
452 for (k = begin_row+1; k < end_row; k ++) {
454 t -= aval[k]*uval[j];
457 for (k = begin_row; k < end_row; k ++) {
459 if (i!=j) t -= aval[k]*uval[j];
470 for (i = 0; i < nrow; i ++) {
473 begin_row = ia[i]; end_row = ia[i+1];
476 for (k = begin_row+1; k < end_row; k ++) {
478 t -= aval[k]*uval[j];
481 for (k = begin_row; k < end_row; k ++) {
483 if (i!=j) t -= aval[k]*uval[j];
503#pragma omp parallel for private(myid,mybegin,myend,t,i,begin_row,end_row,k,j,d)
504 for (myid = 0; myid < nthreads; myid ++) {
506 for (i=mybegin; i<myend; i++) {
509 begin_row = ia[i],end_row = ia[i+1];
512 for (k = begin_row+1; k < end_row; k ++) {
514 t -= aval[k]*uval[j];
517 for (k = begin_row; k < end_row; k ++) {
519 if (i!=j) t -= aval[k]*uval[j];
530 for (i = 0; i < nrow; i ++) {
533 begin_row = ia[i]; end_row = ia[i+1];
536 for (k = begin_row+1; k < end_row; k ++) {
538 t -= aval[k]*uval[j];
541 for (k = begin_row; k < end_row; k ++) {
543 if (i!=j) t -= aval[k]*uval[j];
556#pragma omp parallel for private(myid, mybegin, myend, i,t,begin_row,end_row,k,j,d)
557 for (myid = 0; myid < nthreads; myid ++) {
559 for (i=mybegin; i<myend; i++) {
562 begin_row = ia[i],end_row = ia[i+1];
565 for (k = begin_row+1; k < end_row; k ++) {
567 t -= aval[k]*uval[j];
570 for (k = begin_row; k < end_row; k ++) {
572 if (i!=j) t -= aval[k]*uval[j];
583 for (i = 0; i < nrow; i ++) {
586 begin_row = ia[i]; end_row = ia[i+1];
589 for (k = begin_row+1; k < end_row; k ++) {
591 t -= aval[k]*uval[j];
594 for (k = begin_row; k < end_row; k ++) {
596 if (i!=j) t -= aval[k]*uval[j];
639 INT i,j,k,begin_row,end_row;
643 INT myid, mybegin, myend, up;
644 INT nthreads = fasp_get_num_threads();
652#pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, j, k, d)
653 for (myid=0; myid<nthreads; myid++) {
655 for (i=mybegin; i<myend; i++) {
657 begin_row=ia[i], end_row=ia[i+1];
658 for (k=begin_row;k<end_row;++k) {
660 if (i!=j) t-=aval[k]*uval[j];
669 for (i=0;i<=nm1;++i) {
671 begin_row=ia[i]; end_row=ia[i+1];
672 for (k=begin_row;k<end_row;++k) {
674 if (i!=j) t-=aval[k]*uval[j];
687#pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
688 for (myid=0; myid<nthreads; myid++) {
690 mybegin = nm1-1-mybegin; myend = nm1-1-myend;
691 for (i=mybegin; i>myend; i--) {
693 begin_row=ia[i], end_row=ia[i+1];
694 for (k=begin_row; k<end_row; k++) {
696 if (i!=j) t-=aval[k]*uval[j];
705 for (i=nm1-1;i>=0;--i) {
707 begin_row=ia[i]; end_row=ia[i+1];
708 for (k=begin_row;k<end_row;++k) {
710 if (i!=j) t-=aval[k]*uval[j];
758 INT i,j,k,begin_row,end_row;
762 const INT N =
ABS(i_n - i_1)+1;
763 INT myid, mybegin, myend;
764 INT nthreads = fasp_get_num_threads();
771#pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
772 for (myid=0; myid<nthreads; myid++) {
774 mybegin += i_1, myend += i_1;
775 for (i=mybegin; i<myend; i+=s) {
777 begin_row=ia[i], end_row=ia[i+1];
778 for (k=begin_row; k<end_row; k++) {
792 for (i=i_1; i<=i_n; i+=s) {
794 begin_row=ia[i]; end_row=ia[i+1];
795 for (k=begin_row; k<end_row; ++k) {
811#pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
812 for (myid=0; myid<nthreads; myid++) {
814 mybegin = i_1 - mybegin, myend = i_1 - myend;
815 for (i=mybegin; i>myend; i+=s) {
817 begin_row=ia[i],end_row=ia[i+1];
818 for (k=begin_row;k<end_row;++k) {
831 for (i=i_1;i>=i_n;i+=s) {
833 begin_row=ia[i]; end_row=ia[i+1];
834 for (k=begin_row;k<end_row;++k) {
880 const INT *ia = A->
IA, *ja=A->
JA;
885 INT i,j,k,begin_row,end_row;
889 INT myid, mybegin, myend;
890 INT nthreads = fasp_get_num_threads();
898#pragma omp parallel for private (myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
899 for (myid = 0; myid < nthreads; myid++) {
901 for (i = mybegin; i < myend; i ++) {
902 if (mark[i] == 0 || mark[i] == 2) {
904 begin_row = ia[i], end_row = ia[i+1];
905 for (k = begin_row; k < end_row; k ++) {
907 if (i!=j) t -= aval[k]*uval[j];
917 for (i = 0; i < nrow; i ++) {
918 if (mark[i] == 0 || mark[i] == 2) {
920 begin_row = ia[i]; end_row = ia[i+1];
921 for (k = begin_row; k < end_row; k ++) {
923 if (i!=j) t -= aval[k]*uval[j];
935#pragma omp parallel for private(myid, i, mybegin, myend, t, begin_row, end_row, k, j, d)
936 for (myid = 0; myid < nthreads; myid++) {
938 for (i = mybegin; i < myend; i++) {
941 begin_row = ia[i], end_row = ia[i+1];
942 for (k = begin_row; k < end_row; k ++) {
944 if (i!=j) t -= aval[k]*uval[j];
954 for (i = 0; i < nrow; i ++) {
957 begin_row = ia[i]; end_row = ia[i+1];
958 for (k = begin_row; k < end_row; k ++) {
960 if (i!=j) t -= aval[k]*uval[j];
975#pragma omp parallel for private(myid, mybegin, myend, i, t, k, j, d, begin_row, end_row)
976 for (myid = 0; myid < nthreads; myid++) {
978 for (i = mybegin; i < myend; i++) {
981 begin_row = ia[i], end_row = ia[i+1];
982 for (k = begin_row; k < end_row; k ++) {
984 if (i!=j) t -= aval[k]*uval[j];
994 for (i = 0; i < nrow; i ++) {
997 begin_row = ia[i]; end_row = ia[i+1];
998 for (k = begin_row; k < end_row; k ++) {
1000 if (i!=j) t -= aval[k]*uval[j];
1012#pragma omp parallel for private (myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
1013 for (myid = 0; myid < nthreads; myid++) {
1015 for (i = mybegin; i < myend; i++) {
1018 begin_row = ia[i], end_row = ia[i+1];
1019 for (k = begin_row; k < end_row; k ++) {
1021 if (i!=j) t -= aval[k]*uval[j];
1031 for (i = 0; i < nrow; i ++) {
1034 begin_row = ia[i]; end_row = ia[i+1];
1035 for (k = begin_row; k < end_row; k ++) {
1037 if (i!=j) t -= aval[k]*uval[j];
1070 const INT m=A->
row, m2=2*m, memneed=3*m;
1077 if (iludata->
nwork<memneed)
goto MEMERR;
1080 INT i, j, jj, begin_row, end_row;
1091 begin_row=ijlu[i]; end_row=ijlu[i+1];
1092 for (j=begin_row;j<end_row;++j) {
1094 if (jj<i) zr[i]-=lu[j]*zz[jj];
1101 z[m-1]=zz[m-1]*lu[m-1];
1102 for (i=m-2;i>=0;--i) {
1103 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
1104 for (j=end_row;j>=begin_row;--j) {
1106 if (jj>i) zz[i]-=lu[j]*z[jj];
1118 printf(
"### ERROR: ILU needs %d memory, only %d available! [%s:%d]\n",
1119 memneed, iludata->
nwork, __FILE__, __LINE__);
1153 const INT *ia=A->
IA,*ja=A->
JA;
1158 INT i,j,k,begin_row,end_row;
1159 REAL temp1,temp2,alpha;
1162 const INT N =
ABS(i_n - i_1)+1;
1163 INT myid, mybegin, myend;
1164 INT nthreads = fasp_get_num_threads();
1172#pragma omp parallel for private(myid, mybegin, myend, i, temp1, temp2, begin_row, end_row, k, alpha, j)
1173 for (myid=0; myid<nthreads; myid++) {
1175 mybegin += i_1, myend += i_1;
1176 for (i=mybegin; i<myend; i+=s) {
1177 temp1 = 0; temp2 = 0;
1178 begin_row=ia[i], end_row=ia[i+1];
1179 for (k=begin_row; k<end_row; k++) {
1181 temp1 += aval[k]*aval[k];
1182 temp2 += aval[k]*uval[j];
1185 alpha = (bval[i] - temp2)/temp1;
1186 for (k=begin_row; k<end_row; ++k){
1188 uval[j] += w*alpha*aval[k];
1194 for (i=i_1;i<=i_n;i+=s) {
1195 temp1 = 0; temp2 = 0;
1196 begin_row=ia[i]; end_row=ia[i+1];
1197 for (k=begin_row;k<end_row;++k) {
1199 temp1 += aval[k]*aval[k];
1200 temp2 += aval[k]*uval[j];
1202 alpha = (bval[i] - temp2)/temp1;
1203 for (k=begin_row;k<end_row;++k){
1205 uval[j] += w*alpha*aval[k];
1219#pragma omp parallel for private(myid, mybegin, myend, i, temp1, temp2, begin_row, end_row, k, alpha, j)
1220 for (myid=0; myid<nthreads; myid++) {
1222 mybegin = i_1 - mybegin, myend = i_1 - myend;
1223 for (i=mybegin; i>myend; i+=s) {
1224 temp1 = 0; temp2 = 0;
1225 begin_row=ia[i], end_row=ia[i+1];
1226 for (k=begin_row;k<end_row;++k) {
1228 temp1 += aval[k]*aval[k];
1229 temp2 += aval[k]*uval[j];
1231 alpha = (bval[i] - temp2)/temp1;
1232 for (k=begin_row;k<end_row;++k){
1234 uval[j] += w*alpha*aval[k];
1241 for (i=i_1;i>=i_n;i+=s) {
1242 temp1 = 0; temp2 = 0;
1243 begin_row=ia[i]; end_row=ia[i+1];
1244 for (k=begin_row;k<end_row;++k) {
1246 temp1 += aval[k]*aval[k];
1247 temp2 += aval[k]*uval[j];
1249 alpha = (bval[i] - temp2)/temp1;
1250 for (k=begin_row;k<end_row;++k){
1252 uval[j] += w*alpha*aval[k];
1292 const INT N =
ABS(i_n - i_1)+1;
1293 const INT *ia=A->
IA, *ja=A->
JA;
1298 INT i,j,k,begin_row,end_row;
1301 INT myid, mybegin, myend;
1302 INT nthreads = fasp_get_num_threads();
1313#pragma omp parallel for private(myid, mybegin, myend, i, begin_row, end_row, k, j)
1314 for (myid=0; myid<nthreads; myid++) {
1316 mybegin += i_1, myend += i_1;
1317 for (i=mybegin; i<myend; i+=s) {
1318 t[i]=bval[i]; d[i]=0.0;
1319 begin_row=ia[i], end_row=ia[i+1];
1320 for (k=begin_row; k<end_row; k++) {
1322 t[i]-=aval[k]*uval[j];
1327#pragma omp parallel for private(i)
1328 for (i=i_1;i<=i_n;i+=s) {
1334 for (i=i_1;i<=i_n;i+=s) {
1335 t[i]=bval[i]; d[i]=0.0;
1336 begin_row=ia[i]; end_row=ia[i+1];
1337 for (k=begin_row;k<end_row;++k) {
1339 t[i]-=aval[k]*uval[j];
1344 for (i=i_1;i<=i_n;i+=s) {
1354#pragma omp parallel for private(myid, mybegin, myend, i, k, j, begin_row, end_row)
1355 for (myid=0; myid<nthreads; myid++) {
1357 mybegin = i_1 - mybegin, myend = i_1 - myend;
1358 for (i=mybegin; i>myend; i+=s) {
1359 t[i]=bval[i]; d[i]=0.0;
1360 begin_row=ia[i], end_row=ia[i+1];
1361 for (k=begin_row; k<end_row; k++) {
1363 t[i]-=aval[k]*uval[j];
1368#pragma omp parallel for private(i)
1369 for (i=i_1;i>=i_n;i+=s) {
1375 for (i=i_1;i>=i_n;i+=s) {
1376 t[i]=bval[i];d[i]=0.0;
1377 begin_row=ia[i]; end_row=ia[i+1];
1378 for (k=begin_row;k<end_row;++k) {
1380 t[i]-=aval[k]*uval[j];
1385 for (i=i_1;i>=i_n;i+=s) {
1436 b.
val=work; x.
val=work+n;
1440 for (i=0; i<n; ++i) index[i]=i;
1446 for (i=0; i<n; ++i){
1486 printf(
"### ERROR: Unknown smoother type! [%s:%d]\n",
1487 __FILE__, __LINE__);
1493 memcpy(&(B.
JA[i*n]), index, n*
sizeof(
INT));
1501 compress_dCSRmat(&B, &D, dtol);
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_dvec_set(INT n, dvector *x, const REAL val)
Initialize dvector x[i]=val for i=0:n-1.
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
void fasp_dcsr_free(dCSRmat *A)
Free CSR sparse matrix data memory space.
void fasp_dcsr_getcol(const INT n, const dCSRmat *A, REAL *col)
Get the n-th column of a CSR matrix A.
INT fasp_dcsr_trans(const dCSRmat *A, dCSRmat *AT)
Find transpose of dCSRmat matrix A.
void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
void fasp_smoother_dcsr_sor_cf(dvector *u, dCSRmat *A, dvector *b, INT L, const REAL w, INT *mark, const INT order)
SOR smoother with C/F ordering for Au=b.
void fasp_smoother_dcsr_sor(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L, const REAL w)
SOR method as a smoother.
void fasp_smoother_dcsr_kaczmarz(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L, const REAL w)
Kaczmarz method as a smoother.
void fasp_smoother_dcsr_sgs(dvector *u, dCSRmat *A, dvector *b, INT L)
Symmetric Gauss-Seidel method as a smoother.
void fasp_smoother_dcsr_gs(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L)
Gauss-Seidel method as a smoother.
void fasp_smoother_dcsr_ilu(dCSRmat *A, dvector *b, dvector *x, void *data)
ILU method as a smoother.
void fasp_smoother_dcsr_L1diag(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L)
Diagonal scaling (using L1 norm) as a smoother.
void fasp_smoother_dcsr_jacobi(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L, const REAL w)
Weighted Jacobi method as a smoother.
void fasp_smoother_dcsr_gs_cf(dvector *u, dCSRmat *A, dvector *b, INT L, INT *mark, const INT order)
Gauss-Seidel smoother with C/F ordering for Au=b.
void fasp_smoother_dcsr_poly(dCSRmat *Amat, dvector *brhs, dvector *usol, INT n, INT ndeg, INT L)
poly approx to A^{-1} as MG smoother
Main header file for the FASP project.
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
REAL * luval
nonzero entries of LU
Sparse matrix of REAL type in CSR format.
REAL * val
nonzero entries of A
INT row
row number of matrix A, m
INT * IA
integer array of row pointers, the size is m+1
INT * JA
integer array of column indexes, the size is nnz
Vector with n entries of REAL type.
REAL * val
actual vector entries