21#include "fasp_functs.h"
27#include "PreMGUtil.inl"
54 const INT nb = diag->
nb;
76 const INT nb2 = nb*nb;
82 INT myid, mybegin, myend;
83 INT nthreads = fasp_get_num_threads();
84#pragma omp parallel for private(myid, mybegin, myend, i)
85 for (myid = 0; myid < nthreads; myid++) {
87 for (i=mybegin; i<myend; ++i) {
94 for (i = 0; i < m; ++i) {
133 INT myid, mybegin, myend;
134 INT nthreads = fasp_get_num_threads();
135#pragma omp parallel for private(myid, mybegin, myend, i)
136 for (myid = 0; myid < nthreads; myid++) {
138 for (i = mybegin; i < myend; ++i) {
145 for (i = 0; i < m; ++i) {
181 INT myid, mybegin, myend;
182 INT nthreads = fasp_get_num_threads();
183#pragma omp parallel for private(myid, mybegin, myend, i)
184 for (myid = 0; myid < nthreads; myid++) {
186 for (i = mybegin; i < myend; ++i) {
193 for (i = 0; i < m; ++i) {
229 INT myid, mybegin, myend;
230 INT nthreads = fasp_get_num_threads();
231#pragma omp parallel for private(myid, mybegin, myend, i)
232 for (myid = 0; myid < nthreads; myid++) {
234 for (i = mybegin; i < myend; ++i) {
241 for (i = 0; i < m; ++i) {
277 INT myid, mybegin, myend;
278 INT nthreads = fasp_get_num_threads();
279#pragma omp parallel for private(myid, mybegin, myend, i)
280 for (myid = 0; myid < nthreads; myid++) {
282 for (i = mybegin; i < myend; ++i) {
289 for (i = 0; i < m; ++i) {
316 const INT m=iludata->
row, mm1=m-1, mm2=m-2, memneed=2*m;
317 const INT nb=iludata->
nb, nb2=nb*nb, size=m*nb;
322 INT ib, ibstart,ibstart1;
323 INT i, j, jj, begin_row, end_row;
324 REAL *zz, *zr, *mult;
326 if (iludata->
nwork<memneed) {
327 printf(
"### ERROR: Need %d memory, only %d available!\n",
328 memneed, iludata->
nwork);
336 memcpy(zr, r, size*
sizeof(
REAL));
344 for (i=1;i<=mm1;++i) {
345 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
346 for (j=begin_row;j<=end_row;++j) {
348 if (jj<i) zr[i]-=lu[j]*zz[jj];
355 z[mm1]=zz[mm1]*lu[mm1];
356 for (i=mm2;i>=0;i--) {
357 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
358 for (j=end_row;j>=begin_row;j--) {
360 if (jj>i) zz[i]-=lu[j]*z[jj];
375 for (i=1;i<=mm1;++i) {
376 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
378 for (j=begin_row;j<=end_row;++j) {
383 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
388 zz[ibstart] = zr[ibstart];
389 zz[ibstart+1] = zr[ibstart+1];
390 zz[ibstart+2] = zr[ibstart+2];
398 for (i=mm2;i>=0;i--) {
399 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
402 for (j=end_row;j>=begin_row;j--) {
406 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
423 for (i=1;i<=mm1;++i) {
424 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
426 for (j=begin_row;j<=end_row;++j) {
430 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
443 for (i=mm2;i>=0;i--) {
444 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
447 for (j=end_row;j>=begin_row;j--) {
451 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
468 for (i=1;i<=mm1;++i) {
469 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
471 for (j=begin_row;j<=end_row;++j) {
475 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
488 for (i=mm2;i>=0;i--) {
489 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
492 for (j=end_row;j>=begin_row;j--) {
496 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
512 for (i=1;i<=mm1;++i) {
513 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
515 for (j=begin_row;j<=end_row;++j) {
519 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
532 for (i=mm2;i>=0;i--) {
533 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
536 for (j=end_row;j>=begin_row;j--) {
540 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
575 const INT m=iludata->
row, memneed=2*m;
576 const INT nb=iludata->
nb, nb2=nb*nb, size=m*nb;
583 INT ib, ibstart,ibstart1;
584 INT i, j, jj, k, begin_row, end_row;
585 REAL *zz, *zr, *mult;
587 if (iludata->
nwork<memneed) {
588 printf(
"### ERROR: Need %d memory, only %d available!\n",
589 memneed, iludata->
nwork);
596 memcpy(zr, r, size*
sizeof(
REAL));
602 for (k=0; k<ncolors; ++k) {
603#pragma omp parallel for private(i,begin_row,end_row,j,jj)
604 for (i=ic[k];i<ic[k+1];++i) {
605 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
606 for (j=begin_row;j<=end_row;++j) {
608 if (jj<i) zr[i]-=lu[j]*zz[jj];
615 for (k=ncolors-1; k>=0; k--) {
616#pragma omp parallel for private(i,begin_row,end_row,j,jj)
617 for (i=ic[k+1]-1;i>=ic[k];i--) {
618 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
619 for (j=end_row;j>=begin_row;j--) {
621 if (jj>i) zz[i]-=lu[j]*z[jj];
632 for (k=0; k<ncolors; ++k) {
633#pragma omp parallel private(i,begin_row,end_row,ibstart,j,jj,ib,mult)
637 for (i=ic[k];i<ic[k+1];++i) {
638 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
640 for (j=begin_row;j<=end_row;++j) {
645 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
650 zz[ibstart] = zr[ibstart];
651 zz[ibstart+1] = zr[ibstart+1];
658 for (k=ncolors-1; k>=0; k--) {
659#pragma omp parallel private(i,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
663 for (i=ic[k+1]-1;i>=ic[k];i--) {
664 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
667 for (j=end_row;j>=begin_row;j--) {
671 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
688 for (k=0; k<ncolors; ++k) {
689#pragma omp parallel private(i,begin_row,end_row,ibstart,j,jj,ib,mult)
693 for (i=ic[k];i<ic[k+1];++i) {
694 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
696 for (j=begin_row;j<=end_row;++j) {
701 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
706 zz[ibstart] = zr[ibstart];
707 zz[ibstart+1] = zr[ibstart+1];
708 zz[ibstart+2] = zr[ibstart+2];
715 for (k=ncolors-1; k>=0; k--) {
716#pragma omp parallel private(i,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
720 for (i=ic[k+1]-1;i>=ic[k];i--) {
721 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
724 for (j=end_row;j>=begin_row;j--) {
728 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
747 printf(
"### ERROR: Multi-thread Parallel ILU for %d components \
748 has not yet been implemented!!!", nb);
779 const INT m=iludata->
row, memneed=2*m;
780 const INT nb=iludata->
nb, nb2=nb*nb, size=m*nb;
791 INT ib, ibstart,ibstart1;
792 INT i, ii, j, jj, k, begin_row, end_row;
793 REAL *zz, *zr, *mult;
795 if (iludata->
nwork<memneed) {
796 printf(
"### ERROR: Need %d memory, only %d available!\n",
797 memneed, iludata->
nwork);
805 memcpy(zr, r, size*
sizeof(
REAL));
811 for (k=0; k<nlevL; ++k) {
812#pragma omp parallel for private(i,ii,begin_row,end_row,j,jj)
813 for (ii=ilevL[k];ii<ilevL[k+1];++ii) {
815 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
816 for (j=begin_row;j<=end_row;++j) {
818 if (jj<i) zr[i]-=lu[j]*zz[jj];
825 for (k=0; k<nlevU; k++) {
826#pragma omp parallel for private(i,ii,begin_row,end_row,j,jj)
827 for (ii=ilevU[k+1]-1;ii>=ilevU[k];ii--) {
829 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
830 for (j=end_row;j>=begin_row;j--) {
832 if (jj>i) zz[i]-=lu[j]*z[jj];
843 for (k=0; k<nlevL; ++k) {
844#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,j,jj,ib,mult)
848 for (ii=ilevL[k];ii<ilevL[k+1];++ii) {
850 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
852 for (j=begin_row;j<=end_row;++j) {
857 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
862 zz[ibstart] = zr[ibstart];
863 zz[ibstart+1] = zr[ibstart+1];
870 for (k=0; k<nlevU; k++) {
871#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
875 for (ii=ilevU[k+1]-1;ii>=ilevU[k];ii--) {
877 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
880 for (j=end_row;j>=begin_row;j--) {
884 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
901 for (k=0; k<nlevL; ++k) {
902#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,j,jj,ib,mult)
906 for (ii=ilevL[k];ii<ilevL[k+1];++ii) {
908 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
910 for (j=begin_row;j<=end_row;++j) {
915 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
920 zz[ibstart] = zr[ibstart];
921 zz[ibstart+1] = zr[ibstart+1];
922 zz[ibstart+2] = zr[ibstart+2];
929 for (k=0; k<nlevU; k++) {
930#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
934 for (ii=ilevU[k+1]-1;ii>=ilevU[k];ii--) {
936 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
939 for (j=end_row;j>=begin_row;j--) {
943 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
962 printf(
"### ERROR: Multi-thread Parallel ILU for %d components \
963 has not yet been implemented!!!", nb);
994 const INT m = row*nb;
1038 const INT m = row*nb;
1063 fasp_coarse_itsolver(A_nk, &r_nk, &z_nk, 1e-12, 0);
1105 fasp_coarse_itsolver(A_nk, &r_nk, &z_nk, 1e-12, 0);
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_param_precbsr_to_amg(AMG_param *amgparam, const precond_data_bsr *pcdata)
Set AMG_param with precond_data.
void fasp_param_amg_init(AMG_param *amgparam)
Initialize AMG parameters.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_dvec_set(INT n, dvector *x, const REAL val)
Initialize dvector x[i]=val for i=0:n-1.
void fasp_dvec_alloc(const INT m, dvector *u)
Create dvector data space of REAL type.
void fasp_blas_smat_mxv_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 7*7 matrix a and a array b, stored in c.
void fasp_blas_smat_mxv_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 5*5 matrix a and a array b, stored in c.
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
void fasp_blas_smat_mxv_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 3*3 matrix a and a array b, stored in c.
void fasp_blas_smat_mxv_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 2*2 matrix a and a array b, stored in c.
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
void fasp_blas_dcsr_mxv(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
void fasp_precond_dbsr_diag_nc7(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_amg(REAL *r, REAL *z, void *data)
AMG preconditioner.
void fasp_precond_dbsr_ilu_mc_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on graph coloring.
void fasp_precond_dbsr_diag_nc5(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_ilu_ls_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on level schedule strategy.
void fasp_precond_dbsr_amg_nk(REAL *r, REAL *z, void *data)
AMG with extra near kernel solve preconditioner.
void fasp_precond_dbsr_diag(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_ilu(REAL *r, REAL *z, void *data)
ILU preconditioner.
void fasp_precond_dbsr_diag_nc2(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_diag_nc3(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
void fasp_precond_dbsr_namli(REAL *r, REAL *z, void *data)
Nonlinear AMLI-cycle AMG preconditioner.
void fasp_solver_mgcycle_bsr(AMG_data_bsr *mgl, AMG_param *param)
Solve Ax=b with non-recursive multigrid cycle.
void fasp_solver_namli_bsr(AMG_data_bsr *mgl, AMG_param *param, INT l, INT num_levels)
Solve Ax=b with recursive nonlinear AMLI-cycle.
INT fasp_solver_umfpack(dCSRmat *ptrA, dvector *b, dvector *u, const SHORT prtlvl)
Solve Au=b by UMFpack.
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
Data for multigrid levels in dBSRmat format.
dBSRmat A
pointer to the matrix at level level_num
dvector b
pointer to the right-hand side at level level_num
INT ILU_levels
number of levels use ILU smoother
dvector x
pointer to the iterative solution at level level_num
Parameters for AMG methods.
SHORT coarse_scaling
switch of scaling of the coarse grid correction
SHORT ILU_levels
number of levels use ILU smoother
REAL relaxation
relaxation parameter for Jacobi and SOR smoother
SHORT smoother
smoother type
SHORT cycle_type
type of AMG cycle
REAL tentative_smooth
relaxation parameter for smoothing the tentative prolongation
SHORT postsmooth_iter
number of postsmoothers
SHORT presmooth_iter
number of presmoothers
SHORT smooth_order
smoother order
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
REAL * luval
nonzero entries of LU
INT * jlevU
mapping from row to color for upper triangle
INT nlevU
number of colors for upper triangle
INT nb
block size for BSR type only
INT row
row number of matrix LU, m
INT nlevL
number of colors for lower triangle
INT * jlevL
mapping from row to color for lower triangle
INT * ilevL
number of vertices in each color for lower triangle
INT * ilevU
number of vertices in each color for upper triangle
INT nb
dimension of each sub-block
INT ROW
number of rows of sub-blocks in matrix A, M
Sparse matrix of REAL type in CSR format.
INT row
row number of matrix A, m
Vector with n entries of REAL type.
REAL * val
actual vector entries
Data for preconditioners in dBSRmat format.
SHORT coarse_scaling
switch of scaling of the coarse grid correction
dCSRmat * A_nk
Matrix data for near kernal.
REAL relaxation
relaxation parameter for SOR smoother
SHORT smoother
AMG smoother type.
SHORT cycle_type
AMG cycle type.
REAL tentative_smooth
smooth factor for smoothing the tentative prolongation
SHORT postsmooth_iter
number of postsmoothing
dCSRmat * R_nk
Resriction for near kernal.
AMG_data_bsr * mgl_data
AMG preconditioner data.
INT max_levels
max number of AMG levels
dCSRmat * P_nk
Prolongation for near kernal.
SHORT presmooth_iter
number of presmoothing
INT maxit
max number of iterations of AMG preconditioner
SHORT smooth_order
AMG smoother ordering.
Data for diagnal preconditioners in dBSRmat format.
INT nb
dimension of each sub-block
dvector diag
diagnal elements