21#include "fasp_functs.h"
95 nthreads = fasp_get_num_threads();
115 memset(y, 0X0, size*
sizeof(
REAL));
133 INT myid, mybegin, myend;
135#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend)
137 for (myid =0; myid < nthreads; myid++) {
139 for (i=mybegin; i < myend; ++i) {
142 for (k = IA[i]; k < iend; ++k) {
153 for (i = 0; i < ROW; ++i) {
156 for (k = IA[i]; k < iend; ++k) {
171 INT myid, mybegin, myend;
173#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend )
175 for (myid =0; myid < nthreads; myid++) {
177 for (i=mybegin; i < myend; ++i) {
180 for (k = IA[i]; k < iend; ++k) {
191 for (i = 0; i < ROW; ++i) {
194 for (k = IA[i]; k < iend; ++k) {
209 INT myid, mybegin, myend;
211#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend )
213 for (myid =0; myid < nthreads; myid++) {
215 for (i=mybegin; i < myend; ++i) {
218 for (k = IA[i]; k < iend; ++k) {
229 for (i = 0; i < ROW; ++i) {
232 for (k = IA[i]; k < iend; ++k) {
247 INT myid, mybegin, myend;
249#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend )
251 for (myid =0; myid < nthreads; myid++) {
253 for (i=mybegin; i < myend; ++i) {
256 for (k = IA[i]; k < iend; ++k) {
267 for (i = 0; i < ROW; ++i) {
270 for (k = IA[i]; k < iend; ++k) {
285 INT myid, mybegin, myend;
287#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend)
289 for (myid =0; myid < nthreads; myid++) {
291 for (i=mybegin; i < myend; ++i) {
294 for (k = IA[i]; k < iend; ++k) {
305 for (i = 0; i < ROW; ++i) {
308 for (k = IA[i]; k < iend; ++k) {
355 const INT nb = A->
nb;
356 const INT *IA = A->
IA;
357 const INT *JA = A->
JA;
361 const REAL *pA = NULL;
362 const REAL *px0 = NULL;
376 nthreads = fasp_get_num_threads();
407 INT myid, mybegin, myend;
409#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend)
411 for (myid =0; myid < nthreads; myid++) {
413 for (i=mybegin; i < myend; ++i) {
416 for (k = IA[i]; k < iend; ++k) {
427 for (i = 0; i < ROW; ++i) {
430 for (k = IA[i]; k < iend; ++k) {
445 INT myid, mybegin, myend;
447#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend )
449 for (myid =0; myid < nthreads; myid++) {
451 for (i=mybegin; i < myend; ++i) {
454 for (k = IA[i]; k < iend; ++k) {
465 for (i = 0; i < ROW; ++i){
468 for (k = IA[i]; k < iend; ++k) {
483 INT myid, mybegin, myend;
485#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend )
487 for (myid =0; myid < nthreads; myid++) {
489 for (i=mybegin; i < myend; ++i) {
492 for (k = IA[i]; k < iend; ++k) {
503 for (i = 0; i < ROW; ++i){
506 for (k = IA[i]; k < iend; ++k) {
521 INT myid, mybegin, myend;
523#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend )
525 for (myid =0; myid < nthreads; myid++) {
527 for (i=mybegin; i < myend; ++i) {
530 for (k = IA[i]; k < iend; ++k) {
541 for (i = 0; i < ROW; ++i) {
544 for (k = IA[i]; k < iend; ++k) {
560 INT myid, mybegin, myend;
562#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, pA, px0, py,iend)
564 for (myid =0; myid < nthreads; myid++) {
566 for (i=mybegin; i < myend; ++i) {
569 for (k = IA[i]; k < iend; ++k) {
581 for (i = 0; i < ROW; ++i) {
584 for (k = IA[i]; k < iend; ++k) {
631 const INT nb = A->
nb;
632 const INT *IA = A->
IA;
633 const INT *JA = A->
JA;
636 const REAL *px0 = NULL;
637 REAL *py0 = NULL, *py = NULL;
647 nthreads = fasp_get_num_threads();
678 INT myid, mybegin, myend;
680#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py,iend)
682 for (myid =0; myid < nthreads; myid++) {
684 for (i=mybegin; i < myend; ++i) {
687 for (k = IA[i]; k < iend; ++k) {
698 for (i = 0; i < ROW; ++i) {
701 for (k = IA[i]; k < iend; ++k) {
716 INT myid, mybegin, myend;
718#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py,iend )
720 for (myid =0; myid < nthreads; myid++) {
722 for (i=mybegin; i < myend; ++i) {
725 for (k = IA[i]; k < iend; ++k) {
737 for (i = 0; i < ROW; ++i){
740 for (k = IA[i]; k < iend; ++k) {
756 INT myid, mybegin, myend;
758#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py,iend )
760 for (myid =0; myid < nthreads; myid++) {
762 for (i=mybegin; i < myend; ++i) {
765 for (k = IA[i]; k < iend; ++k) {
779 for (i = 0; i < ROW; ++i){
782 for (k = IA[i]; k < iend; ++k) {
800 INT myid, mybegin, myend;
802#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py,iend )
804 for (myid =0; myid < nthreads; myid++) {
806 for (i=mybegin; i < myend; ++i) {
809 for (k = IA[i]; k < iend; ++k) {
825 for (i = 0; i < ROW; ++i) {
828 for (k = IA[i]; k < iend; ++k) {
849 INT myid, mybegin, myend;
851#pragma omp parallel for private(myid, mybegin, myend, i, py0, k, j, px0, py,iend)
853 for (myid =0; myid < nthreads; myid++) {
855 for (i=mybegin; i < myend; ++i) {
858 for (k = IA[i]; k < iend; ++k) {
869 for (i = 0; i < ROW; ++i) {
872 for (k = IA[i]; k < iend; ++k) {
916 const INT nb = A->
nb;
917 const INT *IA = A->
IA;
918 const INT *JA = A->
JA;
924 INT i,j,k, num_nnz_row;
926 const REAL *pA = NULL;
927 const REAL *px0 = NULL;
934 INT myid, mybegin, myend, nthreads;
937 nthreads = fasp_get_num_threads();
957#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
959 myid = omp_get_thread_num();
961 for (i=mybegin; i < myend; ++i)
964 num_nnz_row = IA[i+1] - IA[i];
1158 for (k = IA[i]; k < IA[i+1]; ++k)
1173 for (i = 0; i < ROW; ++i)
1176 num_nnz_row = IA[i+1] - IA[i];
1370 for (k = IA[i]; k < IA[i+1]; ++k)
1389#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
1391 myid = omp_get_thread_num();
1393 for (i=mybegin; i < myend; ++i)
1396 num_nnz_row = IA[i+1] - IA[i];
1590 for (k = IA[i]; k < IA[i+1]; ++k)
1605 for (i = 0; i < ROW; ++i)
1608 num_nnz_row = IA[i+1] - IA[i];
1802 for (k = IA[i]; k < IA[i+1]; ++k)
1821#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
1823 myid = omp_get_thread_num();
1825 for (i=mybegin; i < myend; ++i)
1828 num_nnz_row = IA[i+1] - IA[i];
2022 for (k = IA[i]; k < IA[i+1]; ++k)
2037 for (i = 0; i < ROW; ++i)
2040 num_nnz_row = IA[i+1] - IA[i];
2234 for (k = IA[i]; k < IA[i+1]; ++k)
2253#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
2255 myid = omp_get_thread_num();
2257 for (i=mybegin; i < myend; ++i)
2260 num_nnz_row = IA[i+1] - IA[i];
2454 for (k = IA[i]; k < IA[i+1]; ++k)
2469 for (i = 0; i < ROW; ++i)
2472 num_nnz_row = IA[i+1] - IA[i];
2666 for (k = IA[i]; k < IA[i+1]; ++k)
2703 const INT nb = A->
nb;
2704 const INT size = ROW*nb;
2705 const INT *IA = A->
IA;
2706 const INT *JA = A->
JA;
2709 const REAL *px0 = NULL;
2710 REAL *py0 = NULL, *py = NULL;
2711 INT i,j,k, num_nnz_row;
2717 INT myid, mybegin, myend, nthreads;
2720 nthreads = fasp_get_num_threads();
2740#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
2742 myid = omp_get_thread_num();
2744 for (i=mybegin; i < myend; ++i) {
2746 num_nnz_row = IA[i+1] - IA[i];
2747 switch(num_nnz_row) {
2939 for (k = IA[i]; k < IA[i+1]; ++k)
2954 for (i = 0; i < ROW; ++i) {
2956 num_nnz_row = IA[i+1] - IA[i];
2957 switch(num_nnz_row) {
3174 for (k = IA[i]; k < IA[i+1]; ++k) {
3193#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
3195 myid = omp_get_thread_num();
3197 for (i=mybegin; i < myend; ++i) {
3199 num_nnz_row = IA[i+1] - IA[i];
3200 switch(num_nnz_row) {
3393 for (k = IA[i]; k < IA[i+1]; ++k)
3408 for (i = 0; i < ROW; ++i) {
3410 num_nnz_row = IA[i+1] - IA[i];
3411 switch(num_nnz_row) {
3679 for (k = IA[i]; k < IA[i+1]; ++k) {
3700#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
3702 myid = omp_get_thread_num();
3704 for (i=mybegin; i < myend; ++i) {
3706 num_nnz_row = IA[i+1] - IA[i];
3707 switch(num_nnz_row) {
3900 for (k = IA[i]; k < IA[i+1]; ++k) {
3914 for (i = 0; i < ROW; ++i) {
3916 num_nnz_row = IA[i+1] - IA[i];
3917 switch(num_nnz_row) {
4235 for (k = IA[i]; k < IA[i+1]; ++k) {
4258#pragma omp parallel private(myid, mybegin, myend, i, py0, num_nnz_row, k, j, pA, px0, py)
4260 myid = omp_get_thread_num();
4262 for (i=mybegin; i < myend; ++i) {
4264 num_nnz_row = IA[i+1] - IA[i];
4265 switch(num_nnz_row) {
4433 for (k = IA[i]; k < IA[i+1]; ++k) {
4446 for (i = 0; i < ROW; ++i) {
4448 num_nnz_row = IA[i+1] - IA[i];
4449 switch(num_nnz_row) {
4617 for (k = IA[i]; k < IA[i+1]; ++k) {
4654 const INT nb = A->
nb;
4655 const INT nb2 = nb*nb;
4658 if ( (A->
COL != B->
ROW) && (A->
nb != B->
nb ) ) {
4659 printf(
"### ERROR: Matrix sizes do not match!\n");
4674 for (i=0;i<B->
COL;++i) JD[i]=-1;
4677 for (i=0;i<C->
ROW;++i) {
4680 for (k=A->
IA[i];k<A->IA[i+1];++k) {
4681 for (j=B->
IA[A->
JA[k]];j<B->IA[A->
JA[k]+1];++j) {
4682 for (l=0;l<count;l++) {
4683 if (JD[l]==B->
JA[j])
break;
4693 for (j=0;j<count;++j) {
4698 for (i=0;i<C->
ROW;++i) C->
IA[i+1]+=C->
IA[i];
4705 for (i=0;i<C->
ROW;++i) {
4708 for (k=A->
IA[i];k<A->IA[i+1];++k) {
4709 for (j=B->
IA[A->
JA[k]];j<B->IA[A->
JA[k]+1];++j) {
4710 for (l=0;l<countJD;l++) {
4711 if (JD[l]==B->
JA[j])
break;
4715 C->
JA[count]=B->
JA[j];
4716 JD[countJD]=B->
JA[j];
4732 for (i=0;i<C->
ROW;++i) {
4733 for (j=C->
IA[i];j<C->IA[i+1];++j) {
4737 for (k=A->
IA[i];k<A->IA[i+1];++k) {
4738 for (l=B->
IA[A->
JA[k]];l<B->IA[A->
JA[k]+1];l++) {
4739 if (B->
JA[l]==C->
JA[j]) {
4779 const INT *ir=R->
IA, *ia=A->
IA, *ip=P->
IA;
4780 const INT *jr=R->
JA, *ja=A->
JA, *jp=P->
JA;
4786 INT i,i1,j,jj,k,length;
4787 INT begin_row,end_row,begin_rowA,end_rowA,begin_rowR,end_rowR;
4788 INT istart,iistart,count;
4796 for (i=0; i<A->
COL; ++i) index[i] = -2;
4798 memcpy(iindex,index,col*
sizeof(
INT));
4809 for (i=0; i < row; ++i) {
4811 istart = -1; length = 0; i1 = i+1;
4814 begin_rowR=ir[i]; end_rowR=ir[i1];
4815 for (jj=begin_rowR; jj<end_rowR; ++jj) {
4818 begin_rowA=ia[j]; end_rowA=ia[j+1];
4819 for (k=begin_rowA; k<end_rowA; ++k) {
4820 if (index[ja[k]] == -2) {
4821 index[ja[k]] = istart;
4829 count = length; iistart = -1; length = 0;
4832 for (j=0; j < count; ++j) {
4834 istart = index[istart];
4838 begin_row=ip[jj]; end_row=ip[jj+1];
4839 for (k=begin_row; k<end_row; ++k) {
4841 if (iindex[jp[k]] == -2){
4842 iindex[jp[k]] = iistart;
4850 iac[i1]=iac[i]+length;
4858 begin_row=iac[i]; end_row=iac[i1];
4859 for (j=begin_row; j<end_row; ++j) {
4863 iistart = iindex[iistart];
4865 iindex[jac[j]] = -2;
4876 for (i=0; i<row; ++i) {
4879 begin_row=iac[i]; end_row=iac[i1];
4880 for (j=begin_row; j<end_row; ++j) {
4884 istart = -1; length = 0;
4887 begin_rowR=ir[i]; end_rowR=ir[i1];
4888 for ( jj=begin_rowR; jj<end_rowR; ++jj ) {
4891 begin_rowA=ia[j]; end_rowA=ia[j+1];
4892 for (k=begin_rowA; k<end_rowA; ++k) {
4893 if (index[ja[k]] == -2) {
4894 index[ja[k]] = istart;
4909 for (j=0; j<length; ++j) {
4911 istart = index[istart];
4915 begin_row=ip[jj]; end_row=ip[jj+1];
4916 for (k=begin_row; k<end_row; ++k) {
4931 B->
IA=iac; B->
JA=jac; B->
val=acj;
4969 const INT *ir=R->
IA, *ia=A->
IA, *ip=P->
IA;
4970 const INT *jr=R->
JA, *ja=A->
JA, *jp=P->
JA;
4975 INT *Ps_marker = NULL;
4976 INT *As_marker = NULL;
4979 INT *P_marker = NULL;
4980 INT *A_marker = NULL;
4981 REAL *smat_tmp = NULL;
4984 INT i, i1, i2, i3, jj1, jj2, jj3;
4985 INT counter, jj_row_begining;
4990 INT myid, mybegin, myend, Ctemp;
4991 nthreads = fasp_get_num_threads();
4996 INT coarse_mul_nthreads = n_coarse * nthreads;
4997 INT fine_mul_nthreads = n_fine * nthreads;
4998 INT coarse_add_nthreads = n_coarse + nthreads;
4999 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
5000 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
5003 As_marker = Ps_marker + coarse_mul_nthreads;
5015 INT * RAP_temp = As_marker + fine_mul_nthreads;
5016 INT * part_end = RAP_temp + coarse_add_nthreads;
5019#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
5020counter, i, jj_row_begining, jj1, i1, jj2, i2, jj3, i3)
5021 for (myid = 0; myid < nthreads; myid++) {
5023 P_marker = Ps_marker + myid * n_coarse;
5024 A_marker = As_marker + myid * n_fine;
5026 for (i = mybegin; i < myend; ++i) {
5027 P_marker[i] = counter;
5028 jj_row_begining = counter;
5030 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5032 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5034 if (A_marker[i2] != i) {
5036 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5038 if (P_marker[i3] < jj_row_begining) {
5039 P_marker[i3] = counter;
5046 RAP_temp[i+myid] = jj_row_begining;
5048 RAP_temp[myend+myid] = counter;
5049 part_end[myid] = myend + myid + 1;
5052 counter = part_end[0];
5054 for (i1 = 1; i1 < nthreads; i1 ++) {
5055 Ctemp += RAP_temp[part_end[i1-1]-1];
5056 for (jj1 = part_end[i1-1]+1; jj1 < part_end[i1]; jj1 ++) {
5057 iac[counter] = RAP_temp[jj1] + Ctemp;
5065 for (i = 0; i < row; ++ i) {
5066 Ps_marker[i] = counter;
5067 jj_row_begining = counter;
5070 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5072 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5074 if (As_marker[i2] != i) {
5076 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5078 if (Ps_marker[i3] < jj_row_begining) {
5079 Ps_marker[i3] = counter;
5086 iac[i] = jj_row_begining;
5105#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
5106counter, i, jj_row_begining, jj1, i1, jj2, i2, \
5108 for (myid = 0; myid < nthreads; myid++) {
5110 P_marker = Ps_marker + myid * n_coarse;
5111 A_marker = As_marker + myid * n_fine;
5112 smat_tmp = tmp + myid*2*nb2;
5113 counter = iac[mybegin];
5114 for (i = mybegin; i < myend; ++i) {
5115 P_marker[i] = counter;
5116 jj_row_begining = counter;
5121 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5123 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5126 if (A_marker[i2] != i) {
5128 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5131 if (P_marker[i3] < jj_row_begining) {
5132 P_marker[i3] = counter;
5139 &acj[P_marker[i3]*nb2]);
5144 for (jj3 = ip[i2]; jj3 < ip[i2+1]; jj3 ++) {
5148 &acj[P_marker[i3]*nb2]);
5159 for (i = 0; i < row; ++i) {
5160 Ps_marker[i] = counter;
5161 jj_row_begining = counter;
5166 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5168 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5171 if (As_marker[i2] != i) {
5173 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5176 if (Ps_marker[i3] < jj_row_begining) {
5177 Ps_marker[i3] = counter;
5184 &acj[Ps_marker[i3]*nb2]);
5189 for (jj3 = ip[i2]; jj3 < ip[i2+1]; jj3 ++) {
5203 B->
IA=iac; B->
JA=jac; B->
val=acj;
5235 const INT *ir=R->
IA, *ia=A->
IA, *ip=P->
IA;
5236 const INT *jr=R->
JA, *ja=A->
JA, *jp=P->
JA;
5240 INT *Ps_marker = NULL;
5241 INT *As_marker = NULL;
5244 INT *P_marker = NULL;
5245 INT *A_marker = NULL;
5248 INT i, i1, i2, i3, jj1, jj2, jj3;
5249 INT counter, jj_row_begining;
5254 INT myid, mybegin, myend, Ctemp;
5255 nthreads = fasp_get_num_threads();
5260 INT coarse_mul_nthreads = n_coarse * nthreads;
5261 INT fine_mul_nthreads = n_fine * nthreads;
5262 INT coarse_add_nthreads = n_coarse + nthreads;
5263 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
5264 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
5267 As_marker = Ps_marker + coarse_mul_nthreads;
5277 INT * RAP_temp = As_marker + fine_mul_nthreads;
5278 INT * part_end = RAP_temp + coarse_add_nthreads;
5281#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
5282counter, i, jj_row_begining, jj1, i1, jj2, i2, jj3, i3)
5283 for (myid = 0; myid < nthreads; myid++) {
5285 P_marker = Ps_marker + myid * n_coarse;
5286 A_marker = As_marker + myid * n_fine;
5288 for (i = mybegin; i < myend; ++i) {
5289 P_marker[i] = counter;
5290 jj_row_begining = counter;
5292 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5294 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5296 if (A_marker[i2] != i) {
5298 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5300 if (P_marker[i3] < jj_row_begining) {
5301 P_marker[i3] = counter;
5308 RAP_temp[i+myid] = jj_row_begining;
5310 RAP_temp[myend+myid] = counter;
5311 part_end[myid] = myend + myid + 1;
5314 counter = part_end[0];
5316 for (i1 = 1; i1 < nthreads; i1 ++) {
5317 Ctemp += RAP_temp[part_end[i1-1]-1];
5318 for (jj1 = part_end[i1-1]+1; jj1 < part_end[i1]; jj1 ++) {
5319 iac[counter] = RAP_temp[jj1] + Ctemp;
5327 for (i = 0; i < row; ++ i) {
5328 Ps_marker[i] = counter;
5329 jj_row_begining = counter;
5332 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5334 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5336 if (As_marker[i2] != i) {
5338 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5340 if (Ps_marker[i3] < jj_row_begining) {
5341 Ps_marker[i3] = counter;
5348 iac[i] = jj_row_begining;
5367#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
5368counter, i, jj_row_begining, jj1, i1, jj2, i2, jj3, i3)
5369 for (myid = 0; myid < nthreads; myid++) {
5371 P_marker = Ps_marker + myid * n_coarse;
5372 A_marker = As_marker + myid * n_fine;
5373 counter = iac[mybegin];
5374 for (i = mybegin; i < myend; ++i) {
5375 P_marker[i] = counter;
5376 jj_row_begining = counter;
5381 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5383 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5386 if (A_marker[i2] != i) {
5388 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5391 if (P_marker[i3] < jj_row_begining) {
5392 P_marker[i3] = counter;
5399 &acj[P_marker[i3]*nb2]);
5404 for (jj3 = ip[i2]; jj3 < ip[i2+1]; jj3 ++) {
5407 &acj[P_marker[i3]*nb2]);
5418 for (i = 0; i < row; ++i) {
5419 Ps_marker[i] = counter;
5420 jj_row_begining = counter;
5425 for (jj1 = ir[i]; jj1 < ir[i+1]; ++jj1) {
5427 for (jj2 = ia[i1]; jj2 < ia[i1+1]; ++jj2) {
5430 if (As_marker[i2] != i) {
5432 for (jj3 = ip[i2]; jj3 < ip[i2+1]; ++jj3) {
5434 if (Ps_marker[i3] < jj_row_begining) {
5435 Ps_marker[i3] = counter;
5442 &acj[Ps_marker[i3]*nb2]);
5447 for (jj3 = ip[i2]; jj3 < ip[i2+1]; jj3 ++) {
5450 &acj[Ps_marker[i3]*nb2]);
5462 B->
IA=iac; B->
JA=jac; B->
val=acj;
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
void fasp_blas_smat_ypAx(const REAL *A, const REAL *x, REAL *y, const INT n)
Compute y := y + Ax, where 'A' is a n*n dense matrix.
void fasp_blas_smat_ypAx_nc3(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 3*3 dense matrix.
void fasp_blas_smat_ypAx_nc2(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 2*2 dense matrix.
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
void fasp_blas_smat_ypAx_nc5(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 5*5 dense matrix.
void fasp_blas_smat_ypAx_nc7(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 7*7 dense matrix.
void fasp_blas_dbsr_rap1(const dBSRmat *R, const dBSRmat *A, const dBSRmat *P, dBSRmat *B)
dBSRmat sparse matrix multiplication B=R*A*P
void fasp_blas_dbsr_axm(dBSRmat *A, const REAL alpha)
Multiply a sparse matrix A in BSR format by a scalar alpha.
void fasp_blas_dbsr_mxm(const dBSRmat *A, const dBSRmat *B, dBSRmat *C)
Sparse matrix multiplication C=A*B.
void fasp_blas_dbsr_rap_agg(const dBSRmat *R, const dBSRmat *A, const dBSRmat *P, dBSRmat *B)
dBSRmat sparse matrix multiplication B=R*A*P, where small block matrices in P and R are identity matr...
void fasp_blas_dbsr_aAxpby(const REAL alpha, dBSRmat *A, REAL *x, const REAL beta, REAL *y)
Compute y := alpha*A*x + beta*y.
void fasp_blas_dbsr_mxv_agg(const dBSRmat *A, const REAL *x, REAL *y)
Compute y := A*x, where each small block matrices of A is an identity.
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
void fasp_blas_dbsr_mxv(const dBSRmat *A, const REAL *x, REAL *y)
Compute y := A*x.
void fasp_blas_dbsr_aAxpy_agg(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y where each small block matrix is an identity matrix.
void fasp_blas_dbsr_rap(const dBSRmat *R, const dBSRmat *A, const dBSRmat *P, dBSRmat *B)
dBSRmat sparse matrix multiplication B=R*A*P
Main header file for the FASP project.
#define SHORT
FASP integer and floating point numbers.
#define TRUE
Definition of logic type.
Block sparse row storage matrix of REAL type.
INT COL
number of cols of sub-blocks in matrix A, N
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
INT nb
dimension of each sub-block
INT * IA
integer array of row pointers, the size is ROW+1
INT ROW
number of rows of sub-blocks in matrix A, M
INT storage_manner
storage manner for each sub-block