Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
PreBSR.c
Go to the documentation of this file.
1
16#ifdef _OPENMP
17#include <omp.h>
18#endif
19
20#include "fasp.h"
21#include "fasp_functs.h"
22
23/*---------------------------------*/
24/*-- Declare Private Functions --*/
25/*---------------------------------*/
26
27#include "PreMGUtil.inl"
28
29/*---------------------------------*/
30/*-- Public Functions --*/
31/*---------------------------------*/
32
50 REAL *z,
51 void *data)
52{
53 precond_diag_bsr * diag = (precond_diag_bsr *)data;
54 const INT nb = diag->nb;
55
56 switch (nb) {
57
58 case 2:
59 fasp_precond_dbsr_diag_nc2( r, z, diag);
60 break;
61 case 3:
62 fasp_precond_dbsr_diag_nc3( r, z, diag);
63 break;
64
65 case 5:
66 fasp_precond_dbsr_diag_nc5( r, z, diag);
67 break;
68
69 case 7:
70 fasp_precond_dbsr_diag_nc7( r, z, diag);
71 break;
72
73 default:
74 {
75 REAL *diagptr = diag->diag.val;
76 const INT nb2 = nb*nb;
77 const INT m = diag->diag.row/nb2;
78 INT i;
79
80#ifdef _OPENMP
81 if (m > OPENMP_HOLDS) {
82 INT myid, mybegin, myend;
83 INT nthreads = fasp_get_num_threads();
84#pragma omp parallel for private(myid, mybegin, myend, i)
85 for (myid = 0; myid < nthreads; myid++) {
86 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
87 for (i=mybegin; i<myend; ++i) {
88 fasp_blas_smat_mxv(&(diagptr[i*nb2]),&(r[i*nb]),&(z[i*nb]),nb);
89 }
90 }
91 }
92 else {
93#endif
94 for (i = 0; i < m; ++i) {
95 fasp_blas_smat_mxv(&(diagptr[i*nb2]),&(r[i*nb]),&(z[i*nb]),nb);
96 }
97#ifdef _OPENMP
98 }
99#endif
100 break;
101 }
102 }
103}
104
122 REAL *z,
123 void *data)
124{
125 precond_diag_bsr * diag = (precond_diag_bsr *)data;
126 REAL * diagptr = diag->diag.val;
127
128 INT i;
129 const INT m = diag->diag.row/4;
130
131#ifdef _OPENMP
132 if (m > OPENMP_HOLDS) {
133 INT myid, mybegin, myend;
134 INT nthreads = fasp_get_num_threads();
135#pragma omp parallel for private(myid, mybegin, myend, i)
136 for (myid = 0; myid < nthreads; myid++) {
137 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
138 for (i = mybegin; i < myend; ++i) {
139 fasp_blas_smat_mxv_nc2(&(diagptr[i*4]),&(r[i*2]),&(z[i*2]));
140 }
141 }
142 }
143 else {
144#endif
145 for (i = 0; i < m; ++i) {
146 fasp_blas_smat_mxv_nc2(&(diagptr[i*4]),&(r[i*2]),&(z[i*2]));
147 }
148#ifdef _OPENMP
149 }
150#endif
151}
152
170 REAL *z,
171 void *data)
172{
173 precond_diag_bsr * diag = (precond_diag_bsr *)data;
174 REAL * diagptr = diag->diag.val;
175
176 const INT m = diag->diag.row/9;
177 INT i;
178
179#ifdef _OPENMP
180 if (m > OPENMP_HOLDS) {
181 INT myid, mybegin, myend;
182 INT nthreads = fasp_get_num_threads();
183#pragma omp parallel for private(myid, mybegin, myend, i)
184 for (myid = 0; myid < nthreads; myid++) {
185 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
186 for (i = mybegin; i < myend; ++i) {
187 fasp_blas_smat_mxv_nc3(&(diagptr[i*9]),&(r[i*3]),&(z[i*3]));
188 }
189 }
190 }
191 else {
192#endif
193 for (i = 0; i < m; ++i) {
194 fasp_blas_smat_mxv_nc3(&(diagptr[i*9]),&(r[i*3]),&(z[i*3]));
195 }
196#ifdef _OPENMP
197 }
198#endif
199}
200
218 REAL *z,
219 void *data)
220{
221 precond_diag_bsr * diag = (precond_diag_bsr *)data;
222 REAL * diagptr = diag->diag.val;
223
224 const INT m = diag->diag.row/25;
225 INT i;
226
227#ifdef _OPENMP
228 if (m > OPENMP_HOLDS) {
229 INT myid, mybegin, myend;
230 INT nthreads = fasp_get_num_threads();
231#pragma omp parallel for private(myid, mybegin, myend, i)
232 for (myid = 0; myid < nthreads; myid++) {
233 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
234 for (i = mybegin; i < myend; ++i) {
235 fasp_blas_smat_mxv_nc5(&(diagptr[i*25]),&(r[i*5]),&(z[i*5]));
236 }
237 }
238 }
239 else {
240#endif
241 for (i = 0; i < m; ++i) {
242 fasp_blas_smat_mxv_nc5(&(diagptr[i*25]),&(r[i*5]),&(z[i*5]));
243 }
244#ifdef _OPENMP
245 }
246#endif
247}
248
266 REAL *z,
267 void *data)
268{
269 precond_diag_bsr * diag = (precond_diag_bsr *)data;
270 REAL * diagptr = diag->diag.val;
271
272 const INT m = diag->diag.row/49;
273 INT i;
274
275#ifdef _OPENMP
276 if (m > OPENMP_HOLDS) {
277 INT myid, mybegin, myend;
278 INT nthreads = fasp_get_num_threads();
279#pragma omp parallel for private(myid, mybegin, myend, i)
280 for (myid = 0; myid < nthreads; myid++) {
281 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
282 for (i = mybegin; i < myend; ++i) {
283 fasp_blas_smat_mxv_nc7(&(diagptr[i*49]),&(r[i*7]),&(z[i*7]));
284 }
285 }
286 }
287 else {
288#endif
289 for (i = 0; i < m; ++i) {
290 fasp_blas_smat_mxv_nc7(&(diagptr[i*49]),&(r[i*7]),&(z[i*7]));
291 }
292#ifdef _OPENMP
293 }
294#endif
295}
296
312 REAL *z,
313 void *data)
314{
315 const ILU_data *iludata=(ILU_data *)data;
316 const INT m=iludata->row, mm1=m-1, mm2=m-2, memneed=2*m;
317 const INT nb=iludata->nb, nb2=nb*nb, size=m*nb;
318
319 INT *ijlu=iludata->ijlu;
320 REAL *lu=iludata->luval;
321
322 INT ib, ibstart,ibstart1;
323 INT i, j, jj, begin_row, end_row;
324 REAL *zz, *zr, *mult;
325
326 if (iludata->nwork<memneed) {
327 printf("### ERROR: Need %d memory, only %d available!\n",
328 memneed, iludata->nwork);
329 fasp_chkerr(ERROR_ALLOC_MEM, __FUNCTION__);
330 }
331
332 zz = iludata->work;
333 zr = zz + size;
334 mult = zr + size;
335
336 memcpy(zr, r, size*sizeof(REAL));
337
338 switch (nb) {
339
340 case 1:
341
342 // forward sweep: solve unit lower matrix equation L*zz=zr
343 zz[0]=zr[0];
344 for (i=1;i<=mm1;++i) {
345 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
346 for (j=begin_row;j<=end_row;++j) {
347 jj=ijlu[j];
348 if (jj<i) zr[i]-=lu[j]*zz[jj];
349 else break;
350 }
351 zz[i]=zr[i];
352 }
353
354 // backward sweep: solve upper matrix equation U*z=zz
355 z[mm1]=zz[mm1]*lu[mm1];
356 for (i=mm2;i>=0;i--) {
357 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
358 for (j=end_row;j>=begin_row;j--) {
359 jj=ijlu[j];
360 if (jj>i) zz[i]-=lu[j]*z[jj];
361 else break;
362 }
363 z[i]=zz[i]*lu[i];
364 }
365
366 break; //end (if nb==1)
367
368 case 3:
369
370 // forward sweep: solve unit lower matrix equation L*zz=zr
371 zz[0] = zr[0];
372 zz[1] = zr[1];
373 zz[2] = zr[2];
374
375 for (i=1;i<=mm1;++i) {
376 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
377 ibstart=i*nb;
378 for (j=begin_row;j<=end_row;++j) {
379 jj=ijlu[j];
380 if (jj<i)
381 {
382 fasp_blas_smat_mxv_nc3(&(lu[j*nb2]),&(zz[jj*nb]),mult);
383 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
384 }
385 else break;
386 }
387
388 zz[ibstart] = zr[ibstart];
389 zz[ibstart+1] = zr[ibstart+1];
390 zz[ibstart+2] = zr[ibstart+2];
391 }
392
393 // backward sweep: solve upper matrix equation U*z=zz
394 ibstart=mm1*nb2;
395 ibstart1=mm1*nb;
396 fasp_blas_smat_mxv_nc3(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
397
398 for (i=mm2;i>=0;i--) {
399 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
400 ibstart=i*nb2;
401 ibstart1=i*nb;
402 for (j=end_row;j>=begin_row;j--) {
403 jj=ijlu[j];
404 if (jj>i) {
405 fasp_blas_smat_mxv_nc3(&(lu[j*nb2]),&(z[jj*nb]),mult);
406 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
407 }
408
409 else break;
410 }
411
412 fasp_blas_smat_mxv_nc3(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
413
414 }
415
416 break; // end (if nb=3)
417
418 case 5:
419
420 // forward sweep: solve unit lower matrix equation L*zz=zr
421 fasp_darray_cp(nb,&(zr[0]),&(zz[0]));
422
423 for (i=1;i<=mm1;++i) {
424 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
425 ibstart=i*nb;
426 for (j=begin_row;j<=end_row;++j) {
427 jj=ijlu[j];
428 if (jj<i) {
429 fasp_blas_smat_mxv_nc5(&(lu[j*nb2]),&(zz[jj*nb]),mult);
430 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
431 }
432 else break;
433 }
434
435 fasp_darray_cp(nb,&(zr[ibstart]),&(zz[ibstart]));
436 }
437
438 // backward sweep: solve upper matrix equation U*z=zz
439 ibstart=mm1*nb2;
440 ibstart1=mm1*nb;
441 fasp_blas_smat_mxv_nc5(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
442
443 for (i=mm2;i>=0;i--) {
444 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
445 ibstart=i*nb2;
446 ibstart1=i*nb;
447 for (j=end_row;j>=begin_row;j--) {
448 jj=ijlu[j];
449 if (jj>i) {
450 fasp_blas_smat_mxv_nc5(&(lu[j*nb2]),&(z[jj*nb]),mult);
451 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
452 }
453
454 else break;
455 }
456
457 fasp_blas_smat_mxv_nc5(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
458
459 }
460
461 break; //end (if nb==5)
462
463 case 7:
464
465 // forward sweep: solve unit lower matrix equation L*zz=zr
466 fasp_darray_cp(nb,&(zr[0]),&(zz[0]));
467
468 for (i=1;i<=mm1;++i) {
469 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
470 ibstart=i*nb;
471 for (j=begin_row;j<=end_row;++j) {
472 jj=ijlu[j];
473 if (jj<i) {
474 fasp_blas_smat_mxv_nc7(&(lu[j*nb2]),&(zz[jj*nb]),mult);
475 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
476 }
477 else break;
478 }
479
480 fasp_darray_cp(nb,&(zr[ibstart]),&(zz[ibstart]));
481 }
482
483 // backward sweep: solve upper matrix equation U*z=zz
484 ibstart=mm1*nb2;
485 ibstart1=mm1*nb;
486 fasp_blas_smat_mxv_nc7(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
487
488 for (i=mm2;i>=0;i--) {
489 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
490 ibstart=i*nb2;
491 ibstart1=i*nb;
492 for (j=end_row;j>=begin_row;j--) {
493 jj=ijlu[j];
494 if (jj>i) {
495 fasp_blas_smat_mxv_nc7(&(lu[j*nb2]),&(z[jj*nb]),mult);
496 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
497 }
498
499 else break;
500 }
501
502 fasp_blas_smat_mxv_nc7(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
503 }
504
505 break; //end (if nb==7)
506
507 default:
508
509 // forward sweep: solve unit lower matrix equation L*zz=zr
510 fasp_darray_cp(nb,&(zr[0]),&(zz[0]));
511
512 for (i=1;i<=mm1;++i) {
513 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
514 ibstart=i*nb;
515 for (j=begin_row;j<=end_row;++j) {
516 jj=ijlu[j];
517 if (jj<i) {
518 fasp_blas_smat_mxv(&(lu[j*nb2]),&(zz[jj*nb]),mult,nb);
519 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
520 }
521 else break;
522 }
523
524 fasp_darray_cp(nb,&(zr[ibstart]),&(zz[ibstart]));
525 }
526
527 // backward sweep: solve upper matrix equation U*z=zz
528 ibstart=mm1*nb2;
529 ibstart1=mm1*nb;
530 fasp_blas_smat_mxv(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]),nb);
531
532 for (i=mm2;i>=0;i--) {
533 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
534 ibstart=i*nb2;
535 ibstart1=i*nb;
536 for (j=end_row;j>=begin_row;j--) {
537 jj=ijlu[j];
538 if (jj>i) {
539 fasp_blas_smat_mxv(&(lu[j*nb2]),&(z[jj*nb]),mult,nb);
540 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
541 }
542
543 else break;
544 }
545
546 fasp_blas_smat_mxv(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]),nb);
547 }
548
549 break; // end everything else
550 }
551
552 return;
553}
554
570 REAL *z,
571 void *data)
572{
573#ifdef _OPENMP
574 const ILU_data *iludata=(ILU_data *)data;
575 const INT m=iludata->row, memneed=2*m;
576 const INT nb=iludata->nb, nb2=nb*nb, size=m*nb;
577
578 INT *ijlu=iludata->ijlu;
579 REAL *lu=iludata->luval;
580 INT ncolors = iludata->nlevL;
581 INT *ic = iludata->ilevL;
582
583 INT ib, ibstart,ibstart1;
584 INT i, j, jj, k, begin_row, end_row;
585 REAL *zz, *zr, *mult;
586
587 if (iludata->nwork<memneed) {
588 printf("### ERROR: Need %d memory, only %d available!\n",
589 memneed, iludata->nwork);
590 fasp_chkerr(ERROR_ALLOC_MEM, __FUNCTION__);
591 }
592
593 zz = iludata->work;
594 zr = zz + size;
595
596 memcpy(zr, r, size*sizeof(REAL));
597
598 switch (nb) {
599
600 case 1:
601 // forward sweep: solve unit lower matrix equation L*zz=zr
602 for (k=0; k<ncolors; ++k) {
603#pragma omp parallel for private(i,begin_row,end_row,j,jj)
604 for (i=ic[k];i<ic[k+1];++i) {
605 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
606 for (j=begin_row;j<=end_row;++j) {
607 jj=ijlu[j];
608 if (jj<i) zr[i]-=lu[j]*zz[jj];
609 else break;
610 }
611 zz[i]=zr[i];
612 }
613 }
614 // backward sweep: solve upper matrix equation U*z=zz
615 for (k=ncolors-1; k>=0; k--) {
616#pragma omp parallel for private(i,begin_row,end_row,j,jj)
617 for (i=ic[k+1]-1;i>=ic[k];i--) {
618 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
619 for (j=end_row;j>=begin_row;j--) {
620 jj=ijlu[j];
621 if (jj>i) zz[i]-=lu[j]*z[jj];
622 else break;
623 }
624 z[i]=zz[i]*lu[i];
625 }
626 }
627
628 break; //end (if nb==1)
629
630 case 2:
631
632 for (k=0; k<ncolors; ++k) {
633#pragma omp parallel private(i,begin_row,end_row,ibstart,j,jj,ib,mult)
634 {
635 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
636#pragma omp for
637 for (i=ic[k];i<ic[k+1];++i) {
638 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
639 ibstart=i*nb;
640 for (j=begin_row;j<=end_row;++j) {
641 jj=ijlu[j];
642 if (jj<i)
643 {
644 fasp_blas_smat_mxv_nc2(&(lu[j*nb2]),&(zz[jj*nb]),mult);
645 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
646 }
647 else break;
648 }
649
650 zz[ibstart] = zr[ibstart];
651 zz[ibstart+1] = zr[ibstart+1];
652 }
653
654 fasp_mem_free(mult); mult = NULL;
655 }
656 }
657
658 for (k=ncolors-1; k>=0; k--) {
659#pragma omp parallel private(i,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
660 {
661 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
662#pragma omp for
663 for (i=ic[k+1]-1;i>=ic[k];i--) {
664 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
665 ibstart=i*nb2;
666 ibstart1=i*nb;
667 for (j=end_row;j>=begin_row;j--) {
668 jj=ijlu[j];
669 if (jj>i) {
670 fasp_blas_smat_mxv_nc2(&(lu[j*nb2]),&(z[jj*nb]),mult);
671 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
672 }
673
674 else break;
675 }
676
677 fasp_blas_smat_mxv_nc2(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
678
679 }
680
681 fasp_mem_free(mult); mult = NULL;
682 }
683 }
684
685 break; // end (if nb=2)
686 case 3:
687
688 for (k=0; k<ncolors; ++k) {
689#pragma omp parallel private(i,begin_row,end_row,ibstart,j,jj,ib,mult)
690 {
691 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
692#pragma omp for
693 for (i=ic[k];i<ic[k+1];++i) {
694 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
695 ibstart=i*nb;
696 for (j=begin_row;j<=end_row;++j) {
697 jj=ijlu[j];
698 if (jj<i)
699 {
700 fasp_blas_smat_mxv_nc3(&(lu[j*nb2]),&(zz[jj*nb]),mult);
701 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
702 }
703 else break;
704 }
705
706 zz[ibstart] = zr[ibstart];
707 zz[ibstart+1] = zr[ibstart+1];
708 zz[ibstart+2] = zr[ibstart+2];
709 }
710
711 fasp_mem_free(mult); mult = NULL;
712 }
713 }
714
715 for (k=ncolors-1; k>=0; k--) {
716#pragma omp parallel private(i,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
717 {
718 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
719#pragma omp for
720 for (i=ic[k+1]-1;i>=ic[k];i--) {
721 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
722 ibstart=i*nb2;
723 ibstart1=i*nb;
724 for (j=end_row;j>=begin_row;j--) {
725 jj=ijlu[j];
726 if (jj>i) {
727 fasp_blas_smat_mxv_nc3(&(lu[j*nb2]),&(z[jj*nb]),mult);
728 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
729 }
730
731 else break;
732 }
733
734 fasp_blas_smat_mxv_nc3(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
735
736 }
737
738 fasp_mem_free(mult); mult = NULL;
739 }
740 }
741
742 break; // end (if nb=3)
743
744 default:
745 {
746 if (nb > 3) {
747 printf("### ERROR: Multi-thread Parallel ILU for %d components \
748 has not yet been implemented!!!", nb);
749 fasp_chkerr(ERROR_UNKNOWN, __FUNCTION__);
750 }
751 break;
752 }
753 }
754
755 return;
756#endif
757}
758
774 REAL *z,
775 void *data)
776{
777#ifdef _OPENMP
778 const ILU_data *iludata=(ILU_data *)data;
779 const INT m=iludata->row, memneed=2*m;
780 const INT nb=iludata->nb, nb2=nb*nb, size=m*nb;
781
782 INT *ijlu=iludata->ijlu;
783 REAL *lu=iludata->luval;
784 INT nlevL = iludata->nlevL;
785 INT *ilevL = iludata->ilevL;
786 INT *jlevL = iludata->jlevL;
787 INT nlevU = iludata->nlevU;
788 INT *ilevU = iludata->ilevU;
789 INT *jlevU = iludata->jlevU;
790
791 INT ib, ibstart,ibstart1;
792 INT i, ii, j, jj, k, begin_row, end_row;
793 REAL *zz, *zr, *mult;
794
795 if (iludata->nwork<memneed) {
796 printf("### ERROR: Need %d memory, only %d available!\n",
797 memneed, iludata->nwork);
798 fasp_chkerr(ERROR_ALLOC_MEM, __FUNCTION__);
799 }
800
801 zz = iludata->work;
802 zr = zz + size;
803 //mult = zr + size;
804
805 memcpy(zr, r, size*sizeof(REAL));
806
807 switch (nb) {
808
809 case 1:
810 // forward sweep: solve unit lower matrix equation L*zz=zr
811 for (k=0; k<nlevL; ++k) {
812#pragma omp parallel for private(i,ii,begin_row,end_row,j,jj)
813 for (ii=ilevL[k];ii<ilevL[k+1];++ii) {
814 i = jlevL[ii];
815 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
816 for (j=begin_row;j<=end_row;++j) {
817 jj=ijlu[j];
818 if (jj<i) zr[i]-=lu[j]*zz[jj];
819 else break;
820 }
821 zz[i]=zr[i];
822 }
823 }
824 // backward sweep: solve upper matrix equation U*z=zz
825 for (k=0; k<nlevU; k++) {
826#pragma omp parallel for private(i,ii,begin_row,end_row,j,jj)
827 for (ii=ilevU[k+1]-1;ii>=ilevU[k];ii--) {
828 i = jlevU[ii];
829 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
830 for (j=end_row;j>=begin_row;j--) {
831 jj=ijlu[j];
832 if (jj>i) zz[i]-=lu[j]*z[jj];
833 else break;
834 }
835 z[i]=zz[i]*lu[i];
836 }
837 }
838
839 break; //end (if nb==1)
840
841 case 2:
842
843 for (k=0; k<nlevL; ++k) {
844#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,j,jj,ib,mult)
845 {
846 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
847#pragma omp for
848 for (ii=ilevL[k];ii<ilevL[k+1];++ii) {
849 i = jlevL[ii];
850 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
851 ibstart=i*nb;
852 for (j=begin_row;j<=end_row;++j) {
853 jj=ijlu[j];
854 if (jj<i)
855 {
856 fasp_blas_smat_mxv_nc2(&(lu[j*nb2]),&(zz[jj*nb]),mult);
857 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
858 }
859 else break;
860 }
861
862 zz[ibstart] = zr[ibstart];
863 zz[ibstart+1] = zr[ibstart+1];
864 }
865
866 fasp_mem_free(mult); mult = NULL;
867 }
868 }
869
870 for (k=0; k<nlevU; k++) {
871#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
872 {
873 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
874#pragma omp for
875 for (ii=ilevU[k+1]-1;ii>=ilevU[k];ii--) {
876 i = jlevU[ii];
877 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
878 ibstart=i*nb2;
879 ibstart1=i*nb;
880 for (j=end_row;j>=begin_row;j--) {
881 jj=ijlu[j];
882 if (jj>i) {
883 fasp_blas_smat_mxv_nc2(&(lu[j*nb2]),&(z[jj*nb]),mult);
884 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
885 }
886
887 else break;
888 }
889
890 fasp_blas_smat_mxv_nc2(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
891
892 }
893
894 fasp_mem_free(mult); mult = NULL;
895 }
896 }
897
898 break; // end (if nb=2)
899 case 3:
900
901 for (k=0; k<nlevL; ++k) {
902#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,j,jj,ib,mult)
903 {
904 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
905#pragma omp for
906 for (ii=ilevL[k];ii<ilevL[k+1];++ii) {
907 i = jlevL[ii];
908 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
909 ibstart=i*nb;
910 for (j=begin_row;j<=end_row;++j) {
911 jj=ijlu[j];
912 if (jj<i)
913 {
914 fasp_blas_smat_mxv_nc3(&(lu[j*nb2]),&(zz[jj*nb]),mult);
915 for (ib=0;ib<nb;++ib) zr[ibstart+ib]-=mult[ib];
916 }
917 else break;
918 }
919
920 zz[ibstart] = zr[ibstart];
921 zz[ibstart+1] = zr[ibstart+1];
922 zz[ibstart+2] = zr[ibstart+2];
923 }
924
925 fasp_mem_free(mult); mult = NULL;
926 }
927 }
928
929 for (k=0; k<nlevU; k++) {
930#pragma omp parallel private(i,ii,begin_row,end_row,ibstart,ibstart1,j,jj,ib,mult)
931 {
932 mult = (REAL*)fasp_mem_calloc(nb,sizeof(REAL));
933#pragma omp for
934 for (ii=ilevU[k+1]-1;ii>=ilevU[k];ii--) {
935 i = jlevU[ii];
936 begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
937 ibstart=i*nb2;
938 ibstart1=i*nb;
939 for (j=end_row;j>=begin_row;j--) {
940 jj=ijlu[j];
941 if (jj>i) {
942 fasp_blas_smat_mxv_nc3(&(lu[j*nb2]),&(z[jj*nb]),mult);
943 for (ib=0;ib<nb;++ib) zz[ibstart1+ib]-=mult[ib];
944 }
945
946 else break;
947 }
948
949 fasp_blas_smat_mxv_nc3(&(lu[ibstart]),&(zz[ibstart1]),&(z[ibstart1]));
950
951 }
952
953 fasp_mem_free(mult); mult = NULL;
954 }
955 }
956
957 break; // end (if nb=3)
958
959 default:
960 {
961 if (nb > 3) {
962 printf("### ERROR: Multi-thread Parallel ILU for %d components \
963 has not yet been implemented!!!", nb);
964 fasp_chkerr(ERROR_UNKNOWN, __FUNCTION__);
965 }
966 break;
967 }
968 }
969
970 return;
971#endif
972}
973
987 REAL *z,
988 void *data)
989{
990 precond_data_bsr *predata=(precond_data_bsr *)data;
991 const INT row=predata->mgl_data[0].A.ROW;
992 const INT nb = predata->mgl_data[0].A.nb;
993 const INT maxit=predata->maxit;
994 const INT m = row*nb;
995
996 INT i;
997
998 AMG_param amgparam; fasp_param_amg_init(&amgparam);
999 amgparam.cycle_type = predata->cycle_type;
1000 amgparam.smoother = predata->smoother;
1001 amgparam.smooth_order = predata->smooth_order;
1002 amgparam.presmooth_iter = predata->presmooth_iter;
1003 amgparam.postsmooth_iter = predata->postsmooth_iter;
1004 amgparam.relaxation = predata->relaxation;
1005 amgparam.coarse_scaling = predata->coarse_scaling;
1006 amgparam.tentative_smooth = predata->tentative_smooth;
1007 amgparam.ILU_levels = predata->mgl_data->ILU_levels;
1008
1009 AMG_data_bsr *mgl = predata->mgl_data;
1010 mgl->b.row=m; fasp_darray_cp(m,r,mgl->b.val); // residual is an input
1011 mgl->x.row=m; fasp_dvec_set(m,&mgl->x,0.0);
1012
1013 for ( i=maxit; i--; ) fasp_solver_mgcycle_bsr(mgl,&amgparam);
1014
1015 fasp_darray_cp(m,mgl->x.val,z);
1016}
1017
1031 REAL *z,
1032 void *data)
1033{
1034 precond_data_bsr *predata=(precond_data_bsr *)data;
1035 const INT row=predata->mgl_data[0].A.ROW;
1036 const INT nb = predata->mgl_data[0].A.nb;
1037 const INT maxit=predata->maxit;
1038 const INT m = row*nb;
1039
1040 INT i;
1041
1042 dCSRmat *A_nk = predata->A_nk;
1043 dCSRmat *P_nk = predata->P_nk;
1044 dCSRmat *R_nk = predata->R_nk;
1045
1046 fasp_darray_set(m, z, 0.0);
1047
1048 // local variables
1049 dvector r_nk, z_nk;
1050 fasp_dvec_alloc(A_nk->row, &r_nk);
1051 fasp_dvec_alloc(A_nk->row, &z_nk);
1052
1053 //----------------------
1054 // extra kernel solve
1055 //----------------------
1056 // r_nk = R_nk*r
1057 fasp_blas_dcsr_mxv(R_nk, r, r_nk.val);
1058
1059 // z_nk = A_nk^{-1}*r_nk
1060#if WITH_UMFPACK // use UMFPACK directly
1061 fasp_solver_umfpack(A_nk, &r_nk, &z_nk, 0);
1062#else
1063 fasp_coarse_itsolver(A_nk, &r_nk, &z_nk, 1e-12, 0);
1064#endif
1065
1066 // z = z + P_nk*z_nk;
1067 fasp_blas_dcsr_aAxpy(1.0, P_nk, z_nk.val, z);
1068
1069 //----------------------
1070 // AMG solve
1071 //----------------------
1072 AMG_param amgparam; fasp_param_amg_init(&amgparam);
1073 amgparam.cycle_type = predata->cycle_type;
1074 amgparam.smoother = predata->smoother;
1075 amgparam.smooth_order = predata->smooth_order;
1076 amgparam.presmooth_iter = predata->presmooth_iter;
1077 amgparam.postsmooth_iter = predata->postsmooth_iter;
1078 amgparam.relaxation = predata->relaxation;
1079 amgparam.coarse_scaling = predata->coarse_scaling;
1080 amgparam.tentative_smooth = predata->tentative_smooth;
1081 amgparam.ILU_levels = predata->mgl_data->ILU_levels;
1082
1083 AMG_data_bsr *mgl = predata->mgl_data;
1084 mgl->b.row=m; fasp_darray_cp(m,r,mgl->b.val); // residual is an input
1085 mgl->x.row=m; //fasp_dvec_set(m,&mgl->x,0.0);
1086 fasp_darray_cp(m, z, mgl->x.val);
1087
1088 for ( i=maxit; i--; ) fasp_solver_mgcycle_bsr(mgl,&amgparam);
1089
1090 fasp_darray_cp(m,mgl->x.val,z);
1091
1092 //----------------------
1093 // extra kernel solve
1094 //----------------------
1095 // r = r - A*z
1096 fasp_blas_dbsr_aAxpy(-1.0, &(predata->mgl_data[0].A), z, mgl->b.val);
1097
1098 // r_nk = R_nk*r
1099 fasp_blas_dcsr_mxv(R_nk, mgl->b.val, r_nk.val);
1100
1101 // z_nk = A_nk^{-1}*r_nk
1102#if WITH_UMFPACK // use UMFPACK directly
1103 fasp_solver_umfpack(A_nk, &r_nk, &z_nk, 0);
1104#else
1105 fasp_coarse_itsolver(A_nk, &r_nk, &z_nk, 1e-12, 0);
1106#endif
1107
1108 // z = z + P_nk*z_nk;
1109 fasp_blas_dcsr_aAxpy(1.0, P_nk, z_nk.val, z);
1110}
1111
1125 REAL *z,
1126 void *data)
1127{
1128 precond_data_bsr *pcdata=(precond_data_bsr *)data;
1129 const INT row=pcdata->mgl_data[0].A.ROW;
1130 const INT nb=pcdata->mgl_data[0].A.nb;
1131 const INT maxit=pcdata->maxit;
1132 const SHORT num_levels=pcdata->max_levels;
1133 const INT m=row*nb;
1134
1135 INT i;
1136
1137 AMG_param amgparam;
1138 fasp_param_amg_init(&amgparam);
1139 fasp_param_precbsr_to_amg(&amgparam,pcdata);
1140
1141 AMG_data_bsr *mgl = pcdata->mgl_data;
1142 mgl->b.row=m; fasp_darray_cp(m,r,mgl->b.val); // residual is an input
1143 mgl->x.row=m; fasp_dvec_set(m,&mgl->x,0.0);
1144
1145 for ( i=maxit; i--; ) fasp_solver_namli_bsr(mgl,&amgparam,0, num_levels);
1146
1147 fasp_darray_cp(m,mgl->x.val,z);
1148}
1149
1150/*---------------------------------*/
1151/*-- End of File --*/
1152/*---------------------------------*/
void fasp_darray_set(const INT n, REAL *x, const REAL val)
Set initial value for an array to be x=val.
Definition: AuxArray.c:41
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
Definition: AuxArray.c:164
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:155
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
Definition: AuxMessage.c:213
void fasp_param_precbsr_to_amg(AMG_param *amgparam, const precond_data_bsr *pcdata)
Set AMG_param with precond_data.
Definition: AuxParam.c:790
void fasp_param_amg_init(AMG_param *amgparam)
Initialize AMG parameters.
Definition: AuxParam.c:407
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:92
void fasp_dvec_set(INT n, dvector *x, const REAL val)
Initialize dvector x[i]=val for i=0:n-1.
Definition: AuxVector.c:222
void fasp_dvec_alloc(const INT m, dvector *u)
Create dvector data space of REAL type.
Definition: AuxVector.c:105
void fasp_blas_smat_mxv_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 7*7 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:188
void fasp_blas_smat_mxv_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 5*5 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:162
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:221
void fasp_blas_smat_mxv_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 3*3 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:115
void fasp_blas_smat_mxv_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 2*2 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:93
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
Definition: BlaSpmvBSR.c:348
void fasp_blas_dcsr_mxv(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Definition: BlaSpmvCSR.c:241
void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: BlaSpmvCSR.c:493
void fasp_precond_dbsr_diag_nc7(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
Definition: PreBSR.c:265
void fasp_precond_dbsr_amg(REAL *r, REAL *z, void *data)
AMG preconditioner.
Definition: PreBSR.c:986
void fasp_precond_dbsr_ilu_mc_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on graph coloring.
Definition: PreBSR.c:569
void fasp_precond_dbsr_diag_nc5(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
Definition: PreBSR.c:217
void fasp_precond_dbsr_ilu_ls_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on level schedule strategy.
Definition: PreBSR.c:773
void fasp_precond_dbsr_amg_nk(REAL *r, REAL *z, void *data)
AMG with extra near kernel solve preconditioner.
Definition: PreBSR.c:1030
void fasp_precond_dbsr_diag(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
Definition: PreBSR.c:49
void fasp_precond_dbsr_ilu(REAL *r, REAL *z, void *data)
ILU preconditioner.
Definition: PreBSR.c:311
void fasp_precond_dbsr_diag_nc2(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
Definition: PreBSR.c:121
void fasp_precond_dbsr_diag_nc3(REAL *r, REAL *z, void *data)
Diagonal preconditioner z=inv(D)*r.
Definition: PreBSR.c:169
void fasp_precond_dbsr_namli(REAL *r, REAL *z, void *data)
Nonlinear AMLI-cycle AMG preconditioner.
Definition: PreBSR.c:1124
void fasp_solver_mgcycle_bsr(AMG_data_bsr *mgl, AMG_param *param)
Solve Ax=b with non-recursive multigrid cycle.
Definition: PreMGCycle.c:268
void fasp_solver_namli_bsr(AMG_data_bsr *mgl, AMG_param *param, INT l, INT num_levels)
Solve Ax=b with recursive nonlinear AMLI-cycle.
INT fasp_solver_umfpack(dCSRmat *ptrA, dvector *b, dvector *u, const SHORT prtlvl)
Solve Au=b by UMFpack.
Definition: XtrUmfpack.c:44
Main header file for the FASP project.
#define REAL
Definition: fasp.h:67
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
#define INT
Definition: fasp.h:64
#define OPENMP_HOLDS
Definition: fasp_const.h:260
#define ERROR_ALLOC_MEM
Definition: fasp_const.h:30
#define ERROR_UNKNOWN
Definition: fasp_const.h:56
Data for multigrid levels in dBSRmat format.
Definition: fasp_block.h:146
dBSRmat A
pointer to the matrix at level level_num
Definition: fasp_block.h:155
dvector b
pointer to the right-hand side at level level_num
Definition: fasp_block.h:164
INT ILU_levels
number of levels use ILU smoother
Definition: fasp_block.h:203
dvector x
pointer to the iterative solution at level level_num
Definition: fasp_block.h:167
Parameters for AMG methods.
Definition: fasp.h:447
SHORT coarse_scaling
switch of scaling of the coarse grid correction
Definition: fasp.h:495
SHORT ILU_levels
number of levels use ILU smoother
Definition: fasp.h:549
REAL relaxation
relaxation parameter for Jacobi and SOR smoother
Definition: fasp.h:486
SHORT smoother
smoother type
Definition: fasp.h:474
SHORT cycle_type
type of AMG cycle
Definition: fasp.h:468
REAL tentative_smooth
relaxation parameter for smoothing the tentative prolongation
Definition: fasp.h:540
SHORT postsmooth_iter
number of postsmoothers
Definition: fasp.h:483
SHORT presmooth_iter
number of presmoothers
Definition: fasp.h:480
SHORT smooth_order
smoother order
Definition: fasp.h:477
Data for ILU setup.
Definition: fasp.h:637
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
Definition: fasp.h:655
REAL * luval
nonzero entries of LU
Definition: fasp.h:658
INT * jlevU
mapping from row to color for upper triangle
Definition: fasp.h:702
INT nlevU
number of colors for upper triangle
Definition: fasp.h:690
INT nwork
work space size
Definition: fasp.h:664
INT nb
block size for BSR type only
Definition: fasp.h:661
INT row
row number of matrix LU, m
Definition: fasp.h:646
INT nlevL
number of colors for lower triangle
Definition: fasp.h:687
INT * jlevL
mapping from row to color for lower triangle
Definition: fasp.h:699
REAL * work
work space
Definition: fasp.h:667
INT * ilevL
number of vertices in each color for lower triangle
Definition: fasp.h:693
INT * ilevU
number of vertices in each color for upper triangle
Definition: fasp.h:696
INT nb
dimension of each sub-block
Definition: fasp_block.h:46
INT ROW
number of rows of sub-blocks in matrix A, M
Definition: fasp_block.h:37
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:143
INT row
row number of matrix A, m
Definition: fasp.h:146
Vector with n entries of REAL type.
Definition: fasp.h:346
REAL * val
actual vector entries
Definition: fasp.h:352
INT row
number of rows
Definition: fasp.h:349
Data for preconditioners in dBSRmat format.
Definition: fasp_block.h:257
SHORT coarse_scaling
switch of scaling of the coarse grid correction
Definition: fasp_block.h:299
dCSRmat * A_nk
Matrix data for near kernal.
Definition: fasp_block.h:328
REAL relaxation
relaxation parameter for SOR smoother
Definition: fasp_block.h:293
SHORT smoother
AMG smoother type.
Definition: fasp_block.h:278
SHORT cycle_type
AMG cycle type.
Definition: fasp_block.h:275
REAL tentative_smooth
smooth factor for smoothing the tentative prolongation
Definition: fasp_block.h:308
SHORT postsmooth_iter
number of postsmoothing
Definition: fasp_block.h:287
dCSRmat * R_nk
Resriction for near kernal.
Definition: fasp_block.h:334
AMG_data_bsr * mgl_data
AMG preconditioner data.
Definition: fasp_block.h:314
INT max_levels
max number of AMG levels
Definition: fasp_block.h:269
dCSRmat * P_nk
Prolongation for near kernal.
Definition: fasp_block.h:331
SHORT presmooth_iter
number of presmoothing
Definition: fasp_block.h:284
INT maxit
max number of iterations of AMG preconditioner
Definition: fasp_block.h:266
SHORT smooth_order
AMG smoother ordering.
Definition: fasp_block.h:281
Data for diagnal preconditioners in dBSRmat format.
Definition: fasp_block.h:241
INT nb
dimension of each sub-block
Definition: fasp_block.h:244
dvector diag
diagnal elements
Definition: fasp_block.h:247