Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
ItrSmootherBSR.c
Go to the documentation of this file.
1
17#include <math.h>
18
19#ifdef _OPENMP
20#include <omp.h>
21#endif
22
23#include "fasp.h"
24#include "fasp_functs.h"
25
26/*---------------------------------*/
27/*-- Declare Private Functions --*/
28/*---------------------------------*/
29
30#ifdef _OPENMP
31
32#if ILU_MC_OMP
33static inline void perm (const INT, const INT, const REAL *, const INT *, REAL *);
34static inline void invperm (const INT, const INT, const REAL *, const INT *, REAL *);
35#endif
36
37#endif
38
41/*---------------------------------*/
42/*-- Public Functions --*/
43/*---------------------------------*/
44
60 dvector *b,
61 dvector *u)
62{
63 // members of A
64 const INT ROW = A->ROW;
65 const INT nb = A->nb;
66 const INT nb2 = nb*nb;
67 const INT size = ROW*nb2;
68 const INT *IA = A->IA;
69 const INT *JA = A->JA;
70 const REAL *val = A->val;
71
72 // local variables
73 INT i,k;
74 SHORT nthreads = 1, use_openmp = FALSE;
75 REAL *diaginv = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
76
77#ifdef _OPENMP
78 if ( ROW > OPENMP_HOLDS ) {
79 use_openmp = TRUE;
80 nthreads = fasp_get_num_threads();
81 }
82#endif
83
84 // get all the diagonal sub-blocks
85 if (use_openmp) {
86 INT mybegin,myend,myid;
87#ifdef _OPENMP
88#pragma omp parallel for private(myid,mybegin,myend,i,k)
89#endif
90 for (myid=0; myid<nthreads; myid++) {
91 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
92 for (i=mybegin; i<myend; i++) {
93 for (k=IA[i]; k<IA[i+1]; ++k)
94 if (JA[k] == i)
95 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
96 }
97 }
98 }
99 else {
100 for (i = 0; i < ROW; ++i) {
101 for (k = IA[i]; k < IA[i+1]; ++k) {
102 if (JA[k] == i)
103 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
104 }
105 }
106 }
107
108 // compute the inverses of all the diagonal sub-blocks
109 if (nb > 1) {
110 if (use_openmp) {
111 INT mybegin,myend,myid;
112#ifdef _OPENMP
113#pragma omp parallel for private(myid,mybegin,myend,i)
114#endif
115 for (myid=0; myid<nthreads; myid++) {
116 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
117 for (i=mybegin; i<myend; i++) {
118 fasp_smat_inv(diaginv+i*nb2, nb);
119 }
120 }
121 }
122 else {
123 for (i = 0; i < ROW; ++i) {
124 fasp_smat_inv(diaginv+i*nb2, nb);
125 }
126 }
127 }
128 else {
129 if (use_openmp) {
130 INT mybegin, myend, myid;
131#ifdef _OPENMP
132#pragma omp parallel for private(myid,mybegin,myend,i)
133#endif
134 for (myid=0; myid<nthreads; myid++) {
135 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
136 for (i=mybegin; i<myend; i++) {
137 diaginv[i] = 1.0 / diaginv[i];
138 }
139 }
140 }
141 else {
142 for (i = 0; i < ROW; ++i) {
143 // zero-diagonal should be tested previously
144 diaginv[i] = 1.0 / diaginv[i];
145 }
146 }
147 }
148
149 fasp_smoother_dbsr_jacobi1(A, b, u, diaginv);
150
151 fasp_mem_free(diaginv); diaginv = NULL;
152}
153
169 REAL *diaginv)
170{
171 // members of A
172 const INT ROW = A->ROW;
173 const INT nb = A->nb;
174 const INT nb2 = nb*nb;
175 const INT *IA = A->IA;
176 const INT *JA = A->JA;
177 const REAL *val = A->val;
178
179 // local variables
180 INT i,k;
181
182 SHORT nthreads = 1, use_openmp = FALSE;
183
184#ifdef _OPENMP
185 if ( ROW > OPENMP_HOLDS ) {
186 use_openmp = TRUE;
187 nthreads = fasp_get_num_threads();
188 }
189#endif
190
191 // get all the diagonal sub-blocks
192 if (use_openmp) {
193 INT mybegin,myend,myid;
194#ifdef _OPENMP
195#pragma omp parallel for private(myid,mybegin,myend,i,k)
196#endif
197 for (myid=0; myid<nthreads; myid++) {
198 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
199 for (i=mybegin; i<myend; i++) {
200 for (k=IA[i]; k<IA[i+1]; ++k)
201 if (JA[k] == i)
202 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
203 }
204 }
205 }
206 else {
207 for (i = 0; i < ROW; ++i) {
208 for (k = IA[i]; k < IA[i+1]; ++k) {
209 if (JA[k] == i)
210 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
211 }
212 }
213 }
214
215 // compute the inverses of all the diagonal sub-blocks
216 if (nb > 1) {
217 if (use_openmp) {
218 INT mybegin,myend,myid;
219#ifdef _OPENMP
220#pragma omp parallel for private(myid,mybegin,myend,i)
221#endif
222 for (myid=0; myid<nthreads; myid++) {
223 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
224 for (i=mybegin; i<myend; i++) {
225 fasp_smat_inv(diaginv+i*nb2, nb);
226 }
227 }
228 }
229 else {
230 for (i = 0; i < ROW; ++i) {
231 fasp_smat_inv(diaginv+i*nb2, nb);
232 }
233 }
234 }
235 else {
236 if (use_openmp) {
237 INT mybegin, myend, myid;
238#ifdef _OPENMP
239#pragma omp parallel for private(myid,mybegin,myend,i)
240#endif
241 for (myid=0; myid<nthreads; myid++) {
242 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
243 for (i=mybegin; i<myend; i++) {
244 diaginv[i] = 1.0 / diaginv[i];
245 }
246 }
247 }
248 else {
249 for (i = 0; i < ROW; ++i) {
250 // zero-diagonal should be tested previously
251 diaginv[i] = 1.0 / diaginv[i];
252 }
253 }
254 }
255
256}
257
275 dvector *b,
276 dvector *u,
277 REAL *diaginv)
278{
279 // members of A
280 const INT ROW = A->ROW;
281 const INT nb = A->nb;
282 const INT nb2 = nb*nb;
283 const INT size = ROW*nb;
284 const INT *IA = A->IA;
285 const INT *JA = A->JA;
286 REAL *val = A->val;
287
288 SHORT nthreads = 1, use_openmp = FALSE;
289
290#ifdef _OPENMP
291 if ( ROW > OPENMP_HOLDS ) {
292 use_openmp = TRUE;
293 nthreads = fasp_get_num_threads();
294 }
295#endif
296
297 // values of dvector b and u
298 REAL *b_val = b->val;
299 REAL *u_val = u->val;
300
301 // auxiliary array
302 REAL *b_tmp = NULL;
303
304 // local variables
305 INT i,j,k;
306 INT pb;
307
308 // b_tmp = b_val
309 b_tmp = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
310 memcpy(b_tmp, b_val, size*sizeof(REAL));
311
312 // No need to assign the smoothing order since the result doesn't depend on it
313 if (nb == 1) {
314 if (use_openmp) {
315 INT mybegin, myend, myid;
316#ifdef _OPENMP
317#pragma omp parallel for private(myid,mybegin,myend,i,j,k)
318#endif
319 for (myid=0; myid<nthreads; myid++) {
320 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
321 for (i=mybegin; i<myend; i++) {
322 for (k = IA[i]; k < IA[i+1]; ++k) {
323 j = JA[k];
324 if (j != i)
325 b_tmp[i] -= val[k]*u_val[j];
326 }
327 }
328 }
329#ifdef _OPENMP
330#pragma omp parallel for private(myid,mybegin,myend,i)
331#endif
332 for (myid=0; myid<nthreads; myid++) {
333 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
334 for (i=mybegin; i<myend; i++) {
335 u_val[i] = b_tmp[i]*diaginv[i];
336 }
337 }
338 }
339 else {
340 for (i = 0; i < ROW; ++i) {
341 for (k = IA[i]; k < IA[i+1]; ++k) {
342 j = JA[k];
343 if (j != i)
344 b_tmp[i] -= val[k]*u_val[j];
345 }
346 }
347 for (i = 0; i < ROW; ++i) {
348 u_val[i] = b_tmp[i]*diaginv[i];
349 }
350 }
351
352 fasp_mem_free(b_tmp); b_tmp = NULL;
353 }
354 else if (nb > 1) {
355 if (use_openmp) {
356 INT mybegin, myend, myid;
357#ifdef _OPENMP
358#pragma omp parallel for private(myid,mybegin,myend,i,pb,k,j)
359#endif
360 for (myid=0; myid<nthreads; myid++) {
361 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
362 for (i=mybegin; i<myend; i++) {
363 pb = i*nb;
364 for (k = IA[i]; k < IA[i+1]; ++k) {
365 j = JA[k];
366 if (j != i)
367 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp+pb, nb);
368 }
369 }
370 }
371#ifdef _OPENMP
372#pragma omp parallel for private(myid,mybegin,myend,i,pb)
373#endif
374 for (myid=0; myid<nthreads; myid++) {
375 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
376 for (i=mybegin; i<myend; i++) {
377 pb = i*nb;
378 fasp_blas_smat_mxv(diaginv+nb2*i, b_tmp+pb, u_val+pb, nb);
379 }
380 }
381 }
382 else {
383 for (i = 0; i < ROW; ++i) {
384 pb = i*nb;
385 for (k = IA[i]; k < IA[i+1]; ++k) {
386 j = JA[k];
387 if (j != i)
388 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp+pb, nb);
389 }
390 }
391
392 for (i = 0; i < ROW; ++i) {
393 pb = i*nb;
394 fasp_blas_smat_mxv(diaginv+nb2*i, b_tmp+pb, u_val+pb, nb);
395 }
396
397 }
398 fasp_mem_free(b_tmp); b_tmp = NULL;
399 }
400 else {
401 printf("### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
402 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
403 }
404
405}
406
429 dvector *b,
430 dvector *u,
431 INT order,
432 INT *mark )
433{
434 // members of A
435 const INT ROW = A->ROW;
436 const INT nb = A->nb;
437 const INT nb2 = nb*nb;
438 const INT size = ROW*nb2;
439 const INT *IA = A->IA;
440 const INT *JA = A->JA;
441 const REAL *val = A->val;
442
443 // local variables
444 INT i,k;
445 SHORT nthreads = 1, use_openmp = FALSE;
446 REAL *diaginv = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
447
448#ifdef _OPENMP
449 if ( ROW > OPENMP_HOLDS ) {
450 use_openmp = TRUE;
451 nthreads = fasp_get_num_threads();
452 }
453#endif
454
455 // get all the diagonal sub-blocks
456 if (use_openmp) {
457 INT mybegin,myend,myid;
458#ifdef _OPENMP
459#pragma omp parallel for private(myid,mybegin,myend,i,k)
460#endif
461 for (myid=0; myid<nthreads; myid++) {
462 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
463 for (i=mybegin; i<myend; i++) {
464 for (k=IA[i]; k<IA[i+1]; ++k)
465 if (JA[k] == i)
466 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
467 }
468 }
469 }
470 else {
471 for (i = 0; i < ROW; ++i) {
472 for (k = IA[i]; k < IA[i+1]; ++k) {
473 if (JA[k] == i)
474 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
475 }
476 }
477 }
478
479 // compute the inverses of all the diagonal sub-blocks
480 if (nb > 1) {
481 if (use_openmp) {
482 INT mybegin,myend,myid;
483#ifdef _OPENMP
484#pragma omp parallel for private(myid,mybegin,myend,i)
485#endif
486 for (myid=0; myid<nthreads; myid++) {
487 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
488 for (i=mybegin; i<myend; i++) {
489 fasp_smat_inv(diaginv+i*nb2, nb);
490 }
491 }
492 }
493 else {
494 for (i = 0; i < ROW; ++i) {
495 fasp_smat_inv(diaginv+i*nb2, nb);
496 }
497 }
498 }
499 else {
500 if (use_openmp) {
501 INT mybegin, myend, myid;
502#ifdef _OPENMP
503#pragma omp parallel for private(myid,mybegin,myend,i)
504#endif
505 for (myid=0; myid<nthreads; myid++) {
506 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
507 for (i=mybegin; i<myend; i++) {
508 diaginv[i] = 1.0 / diaginv[i];
509 }
510 }
511 }
512 else {
513 for (i = 0; i < ROW; ++i) {
514 // zero-diagonal should be tested previously
515 diaginv[i] = 1.0 / diaginv[i];
516 }
517 }
518 }
519
520 fasp_smoother_dbsr_gs1(A, b, u, order, mark, diaginv);
521
522 fasp_mem_free(diaginv); diaginv = NULL;
523}
524
546 dvector *b,
547 dvector *u,
548 INT order,
549 INT *mark,
550 REAL *diaginv)
551{
552 if (!mark) {
553 if (order == ASCEND) // smooth ascendingly
554 {
555 fasp_smoother_dbsr_gs_ascend(A, b, u, diaginv);
556 }
557 else if (order == DESCEND) // smooth descendingly
558 {
559 fasp_smoother_dbsr_gs_descend(A, b, u, diaginv);
560 }
561 }
562 // smooth according to the order 'mark' defined by user
563 else {
564 fasp_smoother_dbsr_gs_order1(A, b, u, diaginv, mark);
565 }
566}
567
583 dvector *b,
584 dvector *u,
585 REAL *diaginv)
586{
587 // members of A
588 const INT ROW = A->ROW;
589 const INT nb = A->nb;
590 const INT nb2 = nb*nb;
591 const INT *IA = A->IA;
592 const INT *JA = A->JA;
593 REAL *val = A->val;
594
595 // values of dvector b and u
596 REAL *b_val = b->val;
597 REAL *u_val = u->val;
598
599 // local variables
600 INT i,j,k;
601 INT pb;
602 REAL rhs = 0.0;
603
604 if (nb == 1) {
605 for (i = 0; i < ROW; ++i) {
606 rhs = b_val[i];
607 for (k = IA[i]; k < IA[i+1]; ++k) {
608 j = JA[k];
609 if (j != i)
610 rhs -= val[k]*u_val[j];
611 }
612 u_val[i] = rhs*diaginv[i];
613 }
614 }
615 else if (nb > 1) {
616 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
617
618 for (i = 0; i < ROW; ++i) {
619 pb = i*nb;
620 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
621 for (k = IA[i]; k < IA[i+1]; ++k) {
622 j = JA[k];
623 if (j != i)
624 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
625 }
626 fasp_blas_smat_mxv(diaginv+nb2*i, b_tmp, u_val+pb, nb);
627 }
628
629 fasp_mem_free(b_tmp); b_tmp = NULL;
630 }
631 else {
632 printf("### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
633 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
634 }
635
636}
637
656 dvector *b,
657 dvector *u)
658{
659 // members of A
660 const INT ROW = A->ROW;
661 const INT nb = A->nb;
662 const INT nb2 = nb*nb;
663 const INT *IA = A->IA;
664 const INT *JA = A->JA;
665 REAL *val = A->val;
666
667 // values of dvector b and u
668 REAL *b_val = b->val;
669 REAL *u_val = u->val;
670
671 // local variables
672 INT i,j,k;
673 INT pb;
674 REAL rhs = 0.0;
675
676 if (nb == 1) {
677 for (i = 0; i < ROW; ++i) {
678 rhs = b_val[i];
679 for (k = IA[i]; k < IA[i+1]; ++k) {
680 j = JA[k];
681 if (j != i)
682 rhs -= val[k]*u_val[j];
683 }
684 u_val[i] = rhs;
685 }
686 }
687 else if (nb > 1) {
688 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
689
690 for (i = 0; i < ROW; ++i) {
691 pb = i*nb;
692 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
693 for (k = IA[i]; k < IA[i+1]; ++k) {
694 j = JA[k];
695 if (j != i)
696 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
697 }
698 memcpy(u_val+pb, b_tmp, nb*sizeof(REAL));
699 }
700
701 fasp_mem_free(b_tmp); b_tmp = NULL;
702 }
703 else {
704 printf("### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
705 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
706 }
707
708}
709
725 dvector *b,
726 dvector *u,
727 REAL *diaginv )
728{
729 // members of A
730 const INT ROW = A->ROW;
731 const INT nb = A->nb;
732 const INT nb2 = nb*nb;
733 const INT *IA = A->IA;
734 const INT *JA = A->JA;
735 REAL *val = A->val;
736
737 // values of dvector b and u
738 REAL *b_val = b->val;
739 REAL *u_val = u->val;
740
741 // local variables
742 INT i,j,k;
743 INT pb;
744 REAL rhs = 0.0;
745
746 if (nb == 1) {
747 for (i = ROW-1; i >= 0; i--) {
748 rhs = b_val[i];
749 for (k = IA[i]; k < IA[i+1]; ++k) {
750 j = JA[k];
751 if (j != i)
752 rhs -= val[k]*u_val[j];
753 }
754 u_val[i] = rhs*diaginv[i];
755 }
756 }
757 else if (nb > 1) {
758 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
759
760 for (i = ROW-1; i >= 0; i--) {
761 pb = i*nb;
762 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
763 for (k = IA[i]; k < IA[i+1]; ++k) {
764 j = JA[k];
765 if (j != i)
766 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
767 }
768 fasp_blas_smat_mxv(diaginv+nb2*i, b_tmp, u_val+pb, nb);
769 }
770
771 fasp_mem_free(b_tmp); b_tmp = NULL;
772 }
773 else {
774 printf("### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
775 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
776 }
777
778}
779
799 dvector *b,
800 dvector *u)
801{
802 // members of A
803 const INT ROW = A->ROW;
804 const INT nb = A->nb;
805 const INT nb2 = nb*nb;
806 const INT *IA = A->IA;
807 const INT *JA = A->JA;
808 REAL *val = A->val;
809
810 // values of dvector b and u
811 REAL *b_val = b->val;
812 REAL *u_val = u->val;
813
814 // local variables
815 INT i,j,k;
816 INT pb;
817 REAL rhs = 0.0;
818
819 if (nb == 1) {
820 for (i = ROW-1; i >= 0; i--) {
821 rhs = b_val[i];
822 for (k = IA[i]; k < IA[i+1]; ++k) {
823 j = JA[k];
824 if (j != i)
825 rhs -= val[k]*u_val[j];
826 }
827 u_val[i] = rhs;
828 }
829 }
830 else if (nb > 1) {
831 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
832
833 for (i = ROW-1; i >= 0; i--) {
834 pb = i*nb;
835 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
836 for (k = IA[i]; k < IA[i+1]; ++k) {
837 j = JA[k];
838 if (j != i)
839 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
840 }
841 memcpy(u_val+pb, b_tmp, nb*sizeof(REAL));
842 }
843
844 fasp_mem_free(b_tmp); b_tmp = NULL;
845 }
846 else {
847 printf("### ERROR: nb is illegal! [%s:%d]\n", __FILE__, __LINE__);
848 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
849 }
850
851}
852
869 dvector *b,
870 dvector *u,
871 REAL *diaginv,
872 INT *mark)
873{
874 // members of A
875 const INT ROW = A->ROW;
876 const INT nb = A->nb;
877 const INT nb2 = nb*nb;
878 const INT *IA = A->IA;
879 const INT *JA = A->JA;
880 REAL *val = A->val;
881
882 // values of dvector b and u
883 REAL *b_val = b->val;
884 REAL *u_val = u->val;
885
886 // local variables
887 INT i,j,k;
888 INT I,pb;
889 REAL rhs = 0.0;
890
891 if (nb == 1) {
892 for (I = 0; I < ROW; ++I) {
893 i = mark[I];
894 rhs = b_val[i];
895 for (k = IA[i]; k < IA[i+1]; ++k) {
896 j = JA[k];
897 if (j != i)
898 rhs -= val[k]*u_val[j];
899 }
900 u_val[i] = rhs*diaginv[i];
901 }
902 }
903 else if (nb > 1) {
904 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
905
906 for (I = 0; I < ROW; ++I) {
907 i = mark[I];
908 pb = i*nb;
909 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
910 for (k = IA[i]; k < IA[i+1]; ++k) {
911 j = JA[k];
912 if (j != i)
913 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
914 }
915 fasp_blas_smat_mxv(diaginv+nb2*i, b_tmp, u_val+pb, nb);
916 }
917
918 fasp_mem_free(b_tmp); b_tmp = NULL;
919 }
920 else {
921 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
922 }
923
924}
925
947 dvector *b,
948 dvector *u,
949 INT *mark,
950 REAL *work)
951{
952 // members of A
953 const INT ROW = A->ROW;
954 const INT nb = A->nb;
955 const INT nb2 = nb*nb;
956 const INT *IA = A->IA;
957 const INT *JA = A->JA;
958 REAL *val = A->val;
959
960 // values of dvector b and u
961 REAL *b_val = b->val;
962 REAL *u_val = u->val;
963
964 // auxiliary array
965 REAL *b_tmp = work;
966
967 // local variables
968 INT i,j,k,I,pb;
969 REAL rhs = 0.0;
970
971 if (nb == 1) {
972 for (I = 0; I < ROW; ++I) {
973 i = mark[I];
974 rhs = b_val[i];
975 for (k = IA[i]; k < IA[i+1]; ++k) {
976 j = JA[k];
977 if (j != i)
978 rhs -= val[k]*u_val[j];
979 }
980 u_val[i] = rhs;
981 }
982 }
983 else if (nb > 1) {
984 for (I = 0; I < ROW; ++I) {
985 i = mark[I];
986 pb = i*nb;
987 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
988 for (k = IA[i]; k < IA[i+1]; ++k) {
989 j = JA[k];
990 if (j != i)
991 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
992 }
993 memcpy(u_val+pb, b_tmp, nb*sizeof(REAL));
994 }
995 }
996 else {
997 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
998 }
999}
1000
1024 dvector *b,
1025 dvector *u,
1026 INT order,
1027 INT *mark,
1028 REAL weight)
1029{
1030 // members of A
1031 const INT ROW = A->ROW;
1032 const INT nb = A->nb;
1033 const INT nb2 = nb*nb;
1034 const INT size = ROW*nb2;
1035 const INT *IA = A->IA;
1036 const INT *JA = A->JA;
1037 const REAL *val = A->val;
1038
1039 // local variables
1040 INT i,k;
1041 REAL *diaginv = NULL;
1042
1043 SHORT nthreads = 1, use_openmp = FALSE;
1044
1045#ifdef _OPENMP
1046 if ( ROW > OPENMP_HOLDS ) {
1047 use_openmp = TRUE;
1048 nthreads = fasp_get_num_threads();
1049 }
1050#endif
1051
1052 // allocate memory
1053 diaginv = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
1054
1055 // get all the diagonal sub-blocks
1056 if (use_openmp) {
1057 INT mybegin,myend,myid;
1058#ifdef _OPENMP
1059#pragma omp parallel for private(myid,mybegin,myend,i,k)
1060#endif
1061 for (myid=0; myid<nthreads; myid++) {
1062 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1063 for (i=mybegin; i<myend; i++) {
1064 for (k=IA[i]; k<IA[i+1]; ++k)
1065 if (JA[k] == i)
1066 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
1067 }
1068 }
1069 }
1070 else {
1071 for (i = 0; i < ROW; ++i) {
1072 for (k = IA[i]; k < IA[i+1]; ++k) {
1073 if (JA[k] == i)
1074 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
1075 }
1076 }
1077 }
1078
1079 // compute the inverses of all the diagonal sub-blocks
1080 if (nb > 1) {
1081 if (use_openmp) {
1082 INT mybegin,myend,myid;
1083#ifdef _OPENMP
1084#pragma omp parallel for private(myid,mybegin,myend,i)
1085#endif
1086 for (myid=0; myid<nthreads; myid++) {
1087 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1088 for (i=mybegin; i<myend; i++) {
1089 fasp_smat_inv(diaginv+i*nb2, nb);
1090 }
1091 }
1092 }
1093 else {
1094 for (i = 0; i < ROW; ++i) {
1095 fasp_smat_inv(diaginv+i*nb2, nb);
1096 }
1097 }
1098 }
1099 else {
1100 if (use_openmp) {
1101 INT mybegin, myend, myid;
1102#ifdef _OPENMP
1103#pragma omp parallel for private(myid,mybegin,myend,i)
1104#endif
1105 for (myid=0; myid<nthreads; myid++) {
1106 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1107 for (i=mybegin; i<myend; i++) {
1108 diaginv[i] = 1.0 / diaginv[i];
1109 }
1110 }
1111 }
1112 else {
1113 for (i = 0; i < ROW; ++i) {
1114 // zero-diagonal should be tested previously
1115 diaginv[i] = 1.0 / diaginv[i];
1116 }
1117 }
1118 }
1119
1120 fasp_smoother_dbsr_sor1(A, b, u, order, mark, diaginv, weight);
1121
1122 fasp_mem_free(diaginv); diaginv = NULL;
1123}
1124
1147 dvector *b,
1148 dvector *u,
1149 INT order,
1150 INT *mark,
1151 REAL *diaginv,
1152 REAL weight)
1153{
1154 if (!mark) {
1155 if (order == ASCEND) // smooth ascendingly
1156 {
1157 fasp_smoother_dbsr_sor_ascend(A, b, u, diaginv, weight);
1158 }
1159 else if (order == DESCEND) // smooth descendingly
1160 {
1161 fasp_smoother_dbsr_sor_descend(A, b, u, diaginv, weight);
1162 }
1163 }
1164 // smooth according to the order 'mark' defined by user
1165 else {
1166 fasp_smoother_dbsr_sor_order(A, b, u, diaginv, mark, weight);
1167 }
1168}
1169
1188 dvector *b,
1189 dvector *u,
1190 REAL *diaginv,
1191 REAL weight)
1192{
1193 // members of A
1194 const INT ROW = A->ROW;
1195 const INT nb = A->nb;
1196 const INT *IA = A->IA;
1197 const INT *JA = A->JA;
1198 const REAL *val = A->val;
1199
1200 // values of dvector b and u
1201 const REAL *b_val = b->val;
1202 REAL *u_val = u->val;
1203
1204 // local variables
1205 const INT nb2 = nb*nb;
1206 INT i,j,k;
1207 INT pb;
1208 REAL rhs = 0.0;
1209 REAL one_minus_weight = 1.0 - weight;
1210
1211#ifdef _OPENMP
1212 // variables for OpenMP
1213 INT myid, mybegin, myend;
1214 INT nthreads = fasp_get_num_threads();
1215#endif
1216
1217 if (nb == 1) {
1218#ifdef _OPENMP
1219 if (ROW > OPENMP_HOLDS) {
1220#pragma omp parallel for private(myid, mybegin, myend, i, rhs, k, j)
1221 for (myid = 0; myid < nthreads; myid++) {
1222 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1223 for (i = mybegin; i < myend; i++) {
1224 rhs = b_val[i];
1225 for (k = IA[i]; k < IA[i+1]; ++k) {
1226 j = JA[k];
1227 if (j != i)
1228 rhs -= val[k]*u_val[j];
1229 }
1230 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1231 }
1232 }
1233 }
1234 else {
1235#endif
1236 for (i = 0; i < ROW; ++i) {
1237 rhs = b_val[i];
1238 for (k = IA[i]; k < IA[i+1]; ++k) {
1239 j = JA[k];
1240 if (j != i)
1241 rhs -= val[k]*u_val[j];
1242 }
1243 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1244 }
1245#ifdef _OPENMP
1246 }
1247#endif
1248 }
1249 else if (nb > 1) {
1250#ifdef _OPENMP
1251 if (ROW > OPENMP_HOLDS) {
1252 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb*nthreads, sizeof(REAL));
1253#pragma omp parallel for private(myid, mybegin, myend, i, pb, k, j)
1254 for (myid = 0; myid < nthreads; myid++) {
1255 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1256 for (i = mybegin; i < myend; i++) {
1257 pb = i*nb;
1258 memcpy(b_tmp+myid*nb, b_val+pb, nb*sizeof(REAL));
1259 for (k = IA[i]; k < IA[i+1]; ++k) {
1260 j = JA[k];
1261 if (j != i)
1262 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
1263 }
1264 fasp_blas_smat_aAxpby(weight, diaginv+nb2*i, b_tmp+myid*nb, one_minus_weight, u_val+pb, nb);
1265 }
1266 }
1267 fasp_mem_free(b_tmp); b_tmp = NULL;
1268 }
1269 else {
1270#endif
1271 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
1272 for (i = 0; i < ROW; ++i) {
1273 pb = i*nb;
1274 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
1275 for (k = IA[i]; k < IA[i+1]; ++k) {
1276 j = JA[k];
1277 if (j != i)
1278 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
1279 }
1280 fasp_blas_smat_aAxpby(weight, diaginv+nb2*i, b_tmp, one_minus_weight, u_val+pb, nb);
1281 }
1282 fasp_mem_free(b_tmp); b_tmp = NULL;
1283#ifdef _OPENMP
1284 }
1285#endif
1286 }
1287 else {
1288 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
1289 }
1290
1291}
1292
1311 dvector *b,
1312 dvector *u,
1313 REAL *diaginv,
1314 REAL weight)
1315{
1316 // members of A
1317 const INT ROW = A->ROW;
1318 const INT nb = A->nb;
1319 const INT nb2 = nb*nb;
1320 const INT *IA = A->IA;
1321 const INT *JA = A->JA;
1322 REAL *val = A->val;
1323 const REAL one_minus_weight = 1.0 - weight;
1324
1325 // values of dvector b and u
1326 REAL *b_val = b->val;
1327 REAL *u_val = u->val;
1328
1329 // local variables
1330 INT i,j,k;
1331 INT pb;
1332 REAL rhs = 0.0;
1333
1334#ifdef _OPENMP
1335 // variables for OpenMP
1336 INT myid, mybegin, myend;
1337 INT nthreads = fasp_get_num_threads();
1338#endif
1339
1340 if (nb == 1) {
1341#ifdef _OPENMP
1342 if (ROW > OPENMP_HOLDS) {
1343#pragma omp parallel for private(myid, mybegin, myend, i, rhs, k, j)
1344 for (myid = 0; myid < nthreads; myid++) {
1345 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1346 mybegin = ROW-1-mybegin; myend = ROW-1-myend;
1347 for (i = mybegin; i > myend; i--) {
1348 rhs = b_val[i];
1349 for (k = IA[i]; k < IA[i+1]; ++k) {
1350 j = JA[k];
1351 if (j != i)
1352 rhs -= val[k]*u_val[j];
1353 }
1354 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1355 }
1356 }
1357 }
1358 else {
1359#endif
1360 for (i = ROW-1; i >= 0; i--) {
1361 rhs = b_val[i];
1362 for (k = IA[i]; k < IA[i+1]; ++k) {
1363 j = JA[k];
1364 if (j != i)
1365 rhs -= val[k]*u_val[j];
1366 }
1367 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1368 }
1369#ifdef _OPENMP
1370 }
1371#endif
1372 }
1373 else if (nb > 1) {
1374#ifdef _OPENMP
1375 if (ROW > OPENMP_HOLDS) {
1376 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb*nthreads, sizeof(REAL));
1377#pragma omp parallel for private(myid, mybegin, myend, i, pb, k, j)
1378 for (myid = 0; myid < nthreads; myid++) {
1379 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1380 mybegin = ROW-1-mybegin; myend = ROW-1-myend;
1381 for (i = mybegin; i > myend; i--) {
1382 pb = i*nb;
1383 memcpy(b_tmp+myid*nb, b_val+pb, nb*sizeof(REAL));
1384 for (k = IA[i]; k < IA[i+1]; ++k) {
1385 j = JA[k];
1386 if (j != i)
1387 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp+myid*nb, nb);
1388 }
1389 fasp_blas_smat_aAxpby(weight, diaginv+nb2*i, b_tmp+myid*nb,
1390 one_minus_weight, u_val+pb, nb);
1391 }
1392 }
1393 fasp_mem_free(b_tmp); b_tmp = NULL;
1394 }
1395 else {
1396#endif
1397 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
1398 for (i = ROW-1; i >= 0; i--) {
1399 pb = i*nb;
1400 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
1401 for (k = IA[i]; k < IA[i+1]; ++k) {
1402 j = JA[k];
1403 if (j != i)
1404 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
1405 }
1406 fasp_blas_smat_aAxpby(weight, diaginv+nb2*i, b_tmp, one_minus_weight,
1407 u_val+pb, nb);
1408 }
1409 fasp_mem_free(b_tmp); b_tmp = NULL;
1410#ifdef _OPENMP
1411 }
1412#endif
1413 }
1414 else {
1415 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
1416 }
1417
1418}
1419
1439 dvector *b,
1440 dvector *u,
1441 REAL *diaginv,
1442 INT *mark,
1443 REAL weight)
1444{
1445 // members of A
1446 const INT ROW = A->ROW;
1447 const INT nb = A->nb;
1448 const INT nb2 = nb*nb;
1449 const INT *IA = A->IA;
1450 const INT *JA = A->JA;
1451 REAL *val = A->val;
1452 const REAL one_minus_weight = 1.0 - weight;
1453
1454 // values of dvector b and u
1455 REAL *b_val = b->val;
1456 REAL *u_val = u->val;
1457
1458 // local variables
1459 INT i,j,k;
1460 INT I,pb;
1461 REAL rhs = 0.0;
1462
1463#ifdef _OPENMP
1464 // variables for OpenMP
1465 INT myid, mybegin, myend;
1466 INT nthreads = fasp_get_num_threads();
1467#endif
1468
1469 if (nb == 1) {
1470#ifdef _OPENMP
1471 if (ROW > OPENMP_HOLDS) {
1472#pragma omp parallel for private(myid, mybegin, myend, I, i, rhs, k, j)
1473 for (myid = 0; myid < nthreads; myid++) {
1474 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1475 for (I = mybegin; I < myend; ++I) {
1476 i = mark[I];
1477 rhs = b_val[i];
1478 for (k = IA[i]; k < IA[i+1]; ++k) {
1479 j = JA[k];
1480 if (j != i)
1481 rhs -= val[k]*u_val[j];
1482 }
1483 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1484 }
1485 }
1486 }
1487 else {
1488#endif
1489 for (I = 0; I < ROW; ++I) {
1490 i = mark[I];
1491 rhs = b_val[i];
1492 for (k = IA[i]; k < IA[i+1]; ++k) {
1493 j = JA[k];
1494 if (j != i)
1495 rhs -= val[k]*u_val[j];
1496 }
1497 u_val[i] = one_minus_weight*u_val[i] + weight*(rhs*diaginv[i]);
1498 }
1499#ifdef _OPENMP
1500 }
1501#endif
1502 }
1503 else if (nb > 1) {
1504#ifdef _OPENMP
1505 if (ROW > OPENMP_HOLDS) {
1506 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb*nthreads, sizeof(REAL));
1507#pragma omp parallel for private(myid, mybegin, myend, I, i, pb, k, j)
1508 for (myid = 0; myid < nthreads; myid++) {
1509 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1510 for (I = mybegin; I < myend; ++I) {
1511 i = mark[I];
1512 pb = i*nb;
1513 memcpy(b_tmp+myid*nb, b_val+pb, nb*sizeof(REAL));
1514 for (k = IA[i]; k < IA[i+1]; ++k) {
1515 j = JA[k];
1516 if (j != i)
1517 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp+myid*nb, nb);
1518 }
1519 fasp_blas_smat_aAxpby(weight, diaginv+nb2*i, b_tmp+myid*nb,
1520 one_minus_weight, u_val+pb, nb);
1521 }
1522 }
1523 fasp_mem_free(b_tmp); b_tmp = NULL;
1524 }
1525 else {
1526#endif
1527 REAL *b_tmp = (REAL *)fasp_mem_calloc(nb, sizeof(REAL));
1528 for (I = 0; I < ROW; ++I) {
1529 i = mark[I];
1530 pb = i*nb;
1531 memcpy(b_tmp, b_val+pb, nb*sizeof(REAL));
1532 for (k = IA[i]; k < IA[i+1]; ++k) {
1533 j = JA[k];
1534 if (j != i)
1535 fasp_blas_smat_ymAx(val+k*nb2, u_val+j*nb, b_tmp, nb);
1536 }
1537 fasp_blas_smat_aAxpby(weight, diaginv+nb2*i, b_tmp, one_minus_weight,
1538 u_val+pb, nb);
1539 }
1540 fasp_mem_free(b_tmp); b_tmp = NULL;
1541#ifdef _OPENMP
1542 }
1543#endif
1544 }
1545 else {
1546 fasp_chkerr(ERROR_NUM_BLOCKS, __FUNCTION__);
1547 }
1548
1549}
1550
1567 dvector *b,
1568 dvector *x,
1569 void *data)
1570{
1571 ILU_data *iludata=(ILU_data *)data;
1572 const INT nb=iludata->nb,m=A->ROW*nb, memneed=5*m;
1573
1574 REAL *xval = x->val, *bval = b->val;
1575 REAL *zr = iludata->work + 3*m;
1576 REAL *z = zr + m;
1577
1578 double start, end;
1579
1580 if (iludata->nwork<memneed) goto MEMERR;
1581
1583 fasp_darray_cp(m,bval,zr); fasp_blas_dbsr_aAxpy(-1.0,A,xval,zr);
1584
1586#ifdef __OPENMP
1587
1588#if ILU_MC_OMP
1589 REAL *tz = (REAL*)fasp_mem_calloc(A->ROW*A->nb, sizeof(REAL));
1590 REAL *tzr = (REAL*)fasp_mem_calloc(A->ROW*A->nb, sizeof(REAL));
1591 perm(A->ROW, A->nb, zr, iludata->jlevL, tzr);
1592
1593 fasp_gettime(&start);
1594 fasp_precond_dbsr_ilu_mc_omp(tzr,tz,iludata);
1595 fasp_gettime(&end);
1596
1597 invperm(A->ROW, A->nb, tz, iludata->jlevL, z);
1598 fasp_mem_free(tzr); tzr = NULL;
1599 fasp_mem_free(tz); tz = NULL;
1600#else
1601 fasp_gettime(&start);
1602 fasp_precond_dbsr_ilu_ls_omp(zr,z,iludata);
1603 fasp_gettime(&end);
1604#endif
1605
1606 ilu_solve_time += end-start;
1607
1608#else
1609
1610 fasp_gettime(&start);
1611 fasp_precond_dbsr_ilu(zr,z,iludata);
1612 fasp_gettime(&end);
1613 ilu_solve_time += end-start;
1614
1615#endif
1616
1618 fasp_blas_darray_axpy(m,1,z,xval);
1619
1620 return;
1621
1622MEMERR:
1623 printf("### ERROR: ILU needs %d memory, only %d available! [%s:%d]\n",
1624 memneed, iludata->nwork, __FILE__, __LINE__);
1625 fasp_chkerr(ERROR_ALLOC_MEM, __FUNCTION__);
1626}
1627
1628/*---------------------------------*/
1629/*-- Private Functions --*/
1630/*---------------------------------*/
1631
1632#ifdef _OPENMP
1633
1634#if ILU_MC_OMP
1635
1651static inline void perm (const INT n,
1652 const INT nb,
1653 const REAL *x,
1654 const INT *p,
1655 REAL *y)
1656{
1657 INT i, j, indx, indy;
1658
1659#ifdef _OPENMP
1660#pragma omp parallel for private(i, j, indx, indy)
1661#endif
1662 for (i=0; i<n; ++i) {
1663 indx = p[i]*nb;
1664 indy = i*nb;
1665 for (j=0; j<nb; ++j) {
1666 y[indy+j] = x[indx+j];
1667 }
1668 }
1669}
1670
1686static inline void invperm (const INT n,
1687 const INT nb,
1688 const REAL *x,
1689 const INT *p,
1690 REAL *y)
1691{
1692 INT i, j, indx, indy;
1693
1694#ifdef _OPENMP
1695#pragma omp parallel for private(i, j, indx, indy)
1696#endif
1697 for (i=0; i<n; ++i) {
1698 indx = i*nb;
1699 indy = p[i]*nb;
1700 for (j=0; j<nb; ++j) {
1701 y[indy+j] = x[indx+j];
1702 }
1703 }
1704}
1705
1706#endif // end of ILU_MC_OMP
1707
1708#endif // end of _OPENMP
1709
1710/*---------------------------------*/
1711/*-- End of File --*/
1712/*---------------------------------*/
void fasp_darray_cp(const INT n, const REAL *x, REAL *y)
Copy an array to the other y=x.
Definition: AuxArray.c:164
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:155
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
Definition: AuxMessage.c:213
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:92
void fasp_gettime(REAL *time)
Get system time.
Definition: AuxTiming.c:36
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
Definition: BlaArray.c:93
SHORT fasp_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
void fasp_blas_smat_aAxpby(const REAL alpha, const REAL *A, const REAL *x, const REAL beta, REAL *y, const INT n)
Compute y:=alpha*A*x + beta*y
Definition: BlaSmallMat.c:1064
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:221
void fasp_blas_smat_ymAx(const REAL *A, const REAL *x, REAL *y, const INT n)
Compute y := y - Ax, where 'A' is a n*n dense matrix.
Definition: BlaSmallMat.c:962
void fasp_blas_dbsr_aAxpy(const REAL alpha, const dBSRmat *A, const REAL *x, REAL *y)
Compute y := alpha*A*x + y.
Definition: BlaSpmvBSR.c:348
void fasp_smoother_dbsr_gs_descend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel relaxation in the descending order.
void fasp_smoother_dbsr_gs1(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL *diaginv)
Gauss-Seidel relaxation.
void fasp_smoother_dbsr_gs_order2(dBSRmat *A, dvector *b, dvector *u, INT *mark, REAL *work)
Gauss-Seidel relaxation in the user-defined order.
void fasp_smoother_dbsr_gs_descend1(dBSRmat *A, dvector *b, dvector *u)
Gauss-Seidel relaxation in the descending order.
void fasp_smoother_dbsr_jacobi1(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Jacobi relaxation.
void fasp_smoother_dbsr_gs_ascend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv)
Gauss-Seidel relaxation in the ascending order.
void fasp_smoother_dbsr_gs_ascend1(dBSRmat *A, dvector *b, dvector *u)
Gauss-Seidel relaxation in the ascending order.
void fasp_smoother_dbsr_sor_ascend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR relaxation in the ascending order.
void fasp_smoother_dbsr_ilu(dBSRmat *A, dvector *b, dvector *x, void *data)
ILU method as the smoother in solving Au=b with multigrid method.
void fasp_smoother_dbsr_jacobi(dBSRmat *A, dvector *b, dvector *u)
Jacobi relaxation.
void fasp_smoother_dbsr_gs(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark)
Gauss-Seidel relaxation.
void fasp_smoother_dbsr_sor1(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL *diaginv, REAL weight)
SOR relaxation.
REAL ilu_solve_time
void fasp_smoother_dbsr_sor_order(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark, REAL weight)
SOR relaxation in the user-defined order.
void fasp_smoother_dbsr_sor_descend(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, REAL weight)
SOR relaxation in the descending order.
void fasp_smoother_dbsr_jacobi_setup(dBSRmat *A, REAL *diaginv)
Setup for jacobi relaxation, fetch the diagonal sub-block matrixes and make them inverse first.
void fasp_smoother_dbsr_gs_order1(dBSRmat *A, dvector *b, dvector *u, REAL *diaginv, INT *mark)
Gauss-Seidel relaxation in the user-defined order.
void fasp_smoother_dbsr_sor(dBSRmat *A, dvector *b, dvector *u, INT order, INT *mark, REAL weight)
SOR relaxation.
void fasp_precond_dbsr_ilu_mc_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on graph coloring.
Definition: PreBSR.c:569
void fasp_precond_dbsr_ilu_ls_omp(REAL *r, REAL *z, void *data)
Multi-thread Parallel ILU preconditioner based on level schedule strategy.
Definition: PreBSR.c:773
void fasp_precond_dbsr_ilu(REAL *r, REAL *z, void *data)
ILU preconditioner.
Definition: PreBSR.c:311
Main header file for the FASP project.
#define REAL
Definition: fasp.h:67
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
#define INT
Definition: fasp.h:64
#define ERROR_NUM_BLOCKS
Definition: fasp_const.h:27
#define ASCEND
Definition: fasp_const.h:242
#define OPENMP_HOLDS
Definition: fasp_const.h:260
#define TRUE
Definition of logic type.
Definition: fasp_const.h:61
#define FALSE
Definition: fasp_const.h:62
#define DESCEND
Definition: fasp_const.h:243
#define ERROR_ALLOC_MEM
Definition: fasp_const.h:30
Data for ILU setup.
Definition: fasp.h:637
INT nwork
work space size
Definition: fasp.h:664
INT nb
block size for BSR type only
Definition: fasp.h:661
INT * jlevL
mapping from row to color for lower triangle
Definition: fasp.h:699
REAL * work
work space
Definition: fasp.h:667
Block sparse row storage matrix of REAL type.
Definition: fasp_block.h:34
REAL * val
Definition: fasp_block.h:57
INT nb
dimension of each sub-block
Definition: fasp_block.h:46
INT * IA
integer array of row pointers, the size is ROW+1
Definition: fasp_block.h:60
INT ROW
number of rows of sub-blocks in matrix A, M
Definition: fasp_block.h:37
INT * JA
Definition: fasp_block.h:64
Vector with n entries of REAL type.
Definition: fasp.h:346
REAL * val
actual vector entries
Definition: fasp.h:352