Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
BlaSparseBSR.c
Go to the documentation of this file.
1
15#include <math.h>
16
17#ifdef _OPENMP
18#include <omp.h>
19#endif
20
21#include "fasp.h"
22#include "fasp_functs.h"
23
24/*---------------------------------*/
25/*-- Public Functions --*/
26/*---------------------------------*/
27
46 const INT COL,
47 const INT NNZ,
48 const INT nb,
49 const INT storage_manner)
50{
51 dBSRmat A;
52
53 if ( ROW > 0 ) {
54 A.IA = (INT*)fasp_mem_calloc(ROW+1, sizeof(INT));
55 }
56 else {
57 A.IA = NULL;
58 }
59
60 if ( NNZ > 0 ) {
61 A.JA = (INT*)fasp_mem_calloc(NNZ ,sizeof(INT));
62 }
63 else {
64 A.JA = NULL;
65 }
66
67 if ( nb > 0 && NNZ > 0) {
68 A.val = (REAL*)fasp_mem_calloc(NNZ*nb*nb, sizeof(REAL));
69 }
70 else {
71 A.val = NULL;
72 }
73
74 A.storage_manner = storage_manner;
75 A.ROW = ROW;
76 A.COL = COL;
77 A.NNZ = NNZ;
78 A.nb = nb;
79
80 return A;
81}
82
99void fasp_dbsr_alloc (const INT ROW,
100 const INT COL,
101 const INT NNZ,
102 const INT nb,
103 const INT storage_manner,
104 dBSRmat *A)
105{
106 if ( ROW > 0 ) {
107 A->IA = (INT*)fasp_mem_calloc(ROW+1, sizeof(INT));
108 }
109 else {
110 A->IA = NULL;
111 }
112
113 if ( NNZ > 0 ) {
114 A->JA = (INT*)fasp_mem_calloc(NNZ, sizeof(INT));
115 }
116 else {
117 A->JA = NULL;
118 }
119
120 if ( nb > 0 ) {
121 A->val = (REAL*)fasp_mem_calloc(NNZ*nb*nb, sizeof(REAL));
122 }
123 else {
124 A->val = NULL;
125 }
126
127 A->storage_manner = storage_manner;
128 A->ROW = ROW;
129 A->COL = COL;
130 A->NNZ = NNZ;
131 A->nb = nb;
132
133 return;
134}
135
147{
148 if (A==NULL) return;
149
150 fasp_mem_free(A->IA); A->IA = NULL;
151 fasp_mem_free(A->JA); A->JA = NULL;
152 fasp_mem_free(A->val); A->val = NULL;
153
154 A->ROW = 0;
155 A->COL = 0;
156 A->NNZ = 0;
157 A->nb = 0;
158 A->storage_manner = 0;
159}
160
172void fasp_dbsr_cp (const dBSRmat *A,
173 dBSRmat *B)
174{
175 B->ROW = A->ROW;
176 B->COL = A->COL;
177 B->NNZ = A->NNZ;
178 B->nb = A->nb;
180
181 memcpy(B->IA,A->IA,(A->ROW+1)*sizeof(INT));
182 memcpy(B->JA,A->JA,(A->NNZ)*sizeof(INT));
183 memcpy(B->val,A->val,(A->NNZ)*(A->nb)*(A->nb)*sizeof(REAL));
184}
185
200 dBSRmat *AT)
201{
202 const INT n = A->ROW, m = A->COL, nnz = A->NNZ, nb = A->nb;
203
204 INT status = FASP_SUCCESS;
205 INT i,j,k,p,inb,jnb,nb2;
206
207 AT->ROW = m;
208 AT->COL = n;
209 AT->NNZ = nnz;
210 AT->nb = nb;
212
213 AT->IA = (INT*)fasp_mem_calloc(m+1,sizeof(INT));
214 AT->JA = (INT*)fasp_mem_calloc(nnz,sizeof(INT));
215 nb2 = nb*nb;
216
217 if (A->val) {
218 AT->val = (REAL*)fasp_mem_calloc(nnz*nb2,sizeof(REAL));
219 }
220 else {
221 AT->val = NULL;
222 }
223
224 // first pass: find the number of nonzeros in the first m-1 columns of A
225 // Note: these numbers are stored in the array AT.IA from 1 to m-1
226 fasp_iarray_set(m+1, AT->IA, 0);
227
228 for ( j=0; j<nnz; ++j ) {
229 i=A->JA[j]; // column number of A = row number of A'
230 if (i<m-1) AT->IA[i+2]++;
231 }
232
233 for ( i=2; i<=m; ++i ) AT->IA[i]+=AT->IA[i-1];
234
235 // second pass: form A'
236 if ( A->val ) {
237 for ( i=0; i<n; ++i ) {
238 INT ibegin=A->IA[i], iend1=A->IA[i+1];
239 for ( p=ibegin; p<iend1; p++ ) {
240 j=A->JA[p]+1;
241 k=AT->IA[j];
242 AT->JA[k]=i;
243 for ( inb=0; inb<nb; inb++ )
244 for ( jnb=0; jnb<nb; jnb++ )
245 AT->val[nb2*k + inb*nb + jnb] = A->val[nb2*p + jnb*nb + inb];
246 AT->IA[j]=k+1;
247 } // end for p
248 } // end for i
249
250 }
251 else {
252 for ( i=0; i<n; ++i ) {
253 INT ibegin=A->IA[i], iend1=A->IA[i+1];
254 for ( p=ibegin; p<iend1; p++ ) {
255 j=A->JA[p]+1;
256 k=AT->IA[j];
257 AT->JA[k]=i;
258 AT->IA[j]=k+1;
259 } // end for p
260 } // end of i
261
262 } // end if
263
264 return (status);
265}
266
288 const INT *Is,
289 const INT *Js,
290 const INT m,
291 const INT n,
292 dBSRmat *B)
293{
294 INT status = FASP_SUCCESS;
295 INT i,j,k,nnz=0;
296 INT *col_flag;
297 SHORT use_openmp = FALSE;
298
299 const INT nb = A->nb;
300 const INT nb2=nb*nb;
301
302#ifdef _OPENMP
303 INT myid, mybegin, stride_i, myend, nthreads;
304 if ( n > OPENMP_HOLDS ) {
305 use_openmp = TRUE;
306 nthreads = fasp_get_num_threads();
307 }
308#endif
309
310 // create colum flags
311 col_flag = (INT*)fasp_mem_calloc(A->COL,sizeof(INT));
312
313 B->ROW=m; B->COL=n; B->nb=nb; B->storage_manner=A->storage_manner;
314
315 B->IA = (INT*)fasp_mem_calloc(m+1,sizeof(INT));
316
317 if ( use_openmp ) {
318#ifdef _OPENMP
319 stride_i = n/nthreads;
320#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
321 {
322 myid = omp_get_thread_num();
323 mybegin = myid*stride_i;
324 if ( myid < nthreads-1 ) myend = mybegin+stride_i;
325 else myend = n;
326 for ( i = mybegin; i < myend; ++i ) {
327 col_flag[Js[i]]=i+1;
328 }
329 }
330#endif
331 }
332 else {
333 for ( i=0; i<n; ++i ) col_flag[Js[i]]=i+1;
334 }
335
336 // first pass: count nonzeros for sub matrix
337 B->IA[0] = 0;
338 for ( i=0; i<m; ++i ) {
339 for ( k=A->IA[Is[i]]; k<A->IA[Is[i]+1]; ++k ) {
340 j=A->JA[k];
341 if (col_flag[j]>0) nnz++;
342 } /* end for k */
343 B->IA[i+1] = nnz;
344 } /* end for i */
345 B->NNZ = nnz;
346
347 // allocate
348 B->JA = (INT*)fasp_mem_calloc(nnz,sizeof(INT));
349 B->val = (REAL*)fasp_mem_calloc(nnz*nb2,sizeof(REAL));
350
351 // second pass: copy data to B
352 nnz = 0;
353 for ( i=0; i<m; ++i) {
354 for ( k=A->IA[Is[i]]; k<A->IA[Is[i]+1]; ++k ) {
355 j = A->JA[k];
356 if ( col_flag[j] > 0 ) {
357 B->JA[nnz]=col_flag[j]-1;
358 memcpy(B->val+nnz*nb2, A->val+k*nb2, nb2*sizeof(REAL));
359 nnz++;
360 }
361 } /* end for k */
362 } /* end for i */
363
364 fasp_mem_free(col_flag); col_flag = NULL;
365
366 return(status);
367}
368
386{
387 SHORT status = FASP_SUCCESS;
388 const INT num_rowsA = A -> ROW;
389 const INT num_colsA = A -> COL;
390 const INT nb = A->nb;
391 const INT nb2 = nb*nb;
392
393 const INT *A_i = A -> IA;
394 INT *A_j = A -> JA;
395 REAL *A_data = A -> val;
396
397 INT i, j, tempi, row_size;
398
399#ifdef _OPENMP
400 // variables for OpenMP
401 INT myid, mybegin, myend, ibegin, iend;
402 INT nthreads = fasp_get_num_threads();
403#endif
404
405 /* the matrix should be square */
406 if (num_rowsA != num_colsA) return ERROR_INPUT_PAR;
407
408#ifdef _OPENMP
409 if (num_rowsA > OPENMP_HOLDS) {
410 REAL *tempd = (REAL*)fasp_mem_calloc(nb2*nthreads, sizeof(REAL));
411#pragma omp parallel for private (myid,mybegin,myend,i,j,tempi,ibegin,iend)
412 for (myid = 0; myid < nthreads; myid++) {
413 fasp_get_start_end(myid, nthreads, num_rowsA, &mybegin, &myend);
414 for (i = mybegin; i < myend; i++) {
415 ibegin = A_i[i+1]; iend = A_i[i];
416 for (j = ibegin; j < iend; j ++) {
417 if (A_j[j] == i) {
418 if (j != ibegin) {
419 // swap index
420 tempi = A_j[ibegin];
421 A_j[ibegin] = A_j[j];
422 A_j[j] = tempi;
423 // swap block
424 memcpy(tempd+myid*nb2, A_data+ibegin*nb2, (nb2)*sizeof(REAL));
425 memcpy(A_data+ibegin*nb2, A_data+j*nb2, (nb2)*sizeof(REAL));
426 memcpy(A_data+j*nb2, tempd+myid*nb2, (nb2)*sizeof(REAL));
427 }
428 break;
429 }
430 /* diagonal element is missing */
431 if (j == iend-1) {
432 status = -2;
433 break;
434 }
435 }
436 }
437 }
438 fasp_mem_free(tempd); tempd = NULL;
439 }
440 else {
441#endif
442 REAL *tempd = (REAL*)fasp_mem_calloc(nb2, sizeof(REAL));
443 for (i = 0; i < num_rowsA; i ++) {
444 row_size = A_i[i+1] - A_i[i];
445 for (j = 0; j < row_size; j ++) {
446 if (A_j[j] == i) {
447 if (j != 0) {
448 // swap index
449 tempi = A_j[0];
450 A_j[0] = A_j[j];
451 A_j[j] = tempi;
452 // swap block
453 memcpy(tempd, A_data, (nb2)*sizeof(REAL));
454 memcpy(A_data, A_data+j*nb2, (nb2)*sizeof(REAL));
455 memcpy(A_data+j*nb2, tempd, (nb2)*sizeof(REAL));
456 }
457 break;
458 }
459 /* diagonal element is missing */
460 if (j == row_size-1) return -2;
461 }
462 A_j += row_size;
463 A_data += row_size*nb2;
464 }
465 fasp_mem_free(tempd); tempd = NULL;
466#ifdef _OPENMP
467 }
468#endif
469
470 if (status < 0) return status;
471 else return FASP_SUCCESS;
472}
473
487{
488 // members of A
489 const INT ROW = A->ROW;
490 const INT nb = A->nb;
491 const INT nb2 = nb*nb;
492 const INT size = ROW*nb2;
493 const INT *IA = A->IA;
494 const INT *JA = A->JA;
495 REAL *val = A->val;
496
497 dvector diaginv;
498
499 INT i,k;
500
501 // Variables for OpenMP
502 SHORT nthreads = 1, use_openmp = FALSE;
503 INT myid, mybegin, myend;
504
505#ifdef _OPENMP
506 if ( ROW > OPENMP_HOLDS ) {
507 use_openmp = TRUE;
508 nthreads = fasp_get_num_threads();
509 }
510#endif
511
512 // allocate memory
513 diaginv.row = size;
514 diaginv.val = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
515
516 // get all the diagonal sub-blocks
517 if (use_openmp) {
518#ifdef _OPENMP
519#pragma omp parallel for private(myid, i, mybegin, myend, k)
520#endif
521 for (myid = 0; myid < nthreads; myid++) {
522 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
523 for (i = mybegin; i < myend; ++i) {
524 for (k = IA[i]; k < IA[i+1]; ++k) {
525 if (JA[k] == i)
526 memcpy(diaginv.val+i*nb2, val+k*nb2, nb2*sizeof(REAL));
527 }
528 }
529 }
530 }
531 else {
532 for (i = 0; i < ROW; ++i) {
533 for (k = IA[i]; k < IA[i+1]; ++k) {
534 if (JA[k] == i)
535 memcpy(diaginv.val+i*nb2, val+k*nb2, nb2*sizeof(REAL));
536 }
537 }
538 }
539 // compute the inverses of all the diagonal sub-blocks
540 if (use_openmp) {
541#ifdef _OPENMP
542#pragma omp parallel for private(myid, i, mybegin, myend)
543#endif
544 for (myid = 0; myid < nthreads; myid++) {
545 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
546 if (nb > 1) {
547 for (i = mybegin; i < myend; ++i) {
548 fasp_smat_inv(diaginv.val+i*nb2, nb);
549 }
550 }
551 else {
552 for (i = mybegin; i < myend; ++i) {
553 // zero-diagonal should be tested previously
554 diaginv.val[i] = 1.0 / diaginv.val[i];
555 }
556 }
557 }
558 }
559 else {
560 if (nb > 1) {
561 for (i = 0; i < ROW; ++i) {
562 fasp_smat_inv(diaginv.val+i*nb2, nb);
563 }
564 }
565 else {
566 for (i = 0; i < ROW; ++i) {
567 // zero-diagonal should be tested previously
568 diaginv.val[i] = 1.0 / diaginv.val[i];
569 }
570 }
571 }
572
573 return (diaginv);
574}
575
592{
593 // members of A
594 const INT ROW = A->ROW;
595 const INT COL = A->COL;
596 const INT NNZ = A->NNZ;
597 const INT nb = A->nb;
598 const INT nb2 = nb*nb;
599 const INT size = ROW*nb2;
600 const INT *IA = A->IA;
601 const INT *JA = A->JA;
602 REAL *val = A->val;
603
604 // create a dBSRmat B
605 dBSRmat B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
606 INT *IAb = B.IA;
607 INT *JAb = B.JA;
608 REAL *valb = B.val;
609
610 INT i, j, k, m, l;
611
612 // variables for OpenMP
613 SHORT nthreads = 1, use_openmp = FALSE;
614 INT myid, mybegin, myend;
615
616 // allocate memory
617 REAL *diaginv = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
618
619 if ( IAb ) memcpy(IAb, IA, (ROW+1)*sizeof(INT));
620 else goto FINISHED;
621
622 if ( JAb ) memcpy(JAb, JA, NNZ*sizeof(INT));
623 else goto FINISHED;
624
625#ifdef _OPENMP
626 if (ROW > OPENMP_HOLDS) {
627 use_openmp = TRUE;
628 nthreads = fasp_get_num_threads();
629 }
630#endif
631
632 // get all the diagonal sub-blocks
633 if (use_openmp) {
634#ifdef _OPENMP
635#pragma omp parallel for private(myid, i, mybegin, myend, k)
636#endif
637 for (myid = 0; myid < nthreads; myid++) {
638 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
639 for (i = mybegin; i < myend; ++i) {
640 for (k = IA[i]; k < IA[i+1]; ++k) {
641 if (JA[k] == i)
642 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
643 }
644 }
645 }
646 }
647 else {
648 for (i = 0; i < ROW; ++i) {
649 for (k = IA[i]; k < IA[i+1]; ++k) {
650 if (JA[k] == i)
651 memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
652 }
653 }
654 }
655
656 // compute the inverses of all the diagonal sub-blocks
657 if (use_openmp) {
658#ifdef _OPENMP
659#pragma omp parallel for private(myid, i, mybegin, myend)
660#endif
661 for (myid = 0; myid < nthreads; myid++) {
662 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
663 if (nb > 1) {
664 for (i = mybegin; i < myend; ++i) {
665 fasp_smat_inv(diaginv+i*nb2, nb);
666 }
667 }
668 else {
669 for (i = mybegin; i < myend; ++i) {
670 // zero-diagonal should be tested previously
671 diaginv[i] = 1.0 / diaginv[i];
672 }
673 }
674 }
675 }
676 else {
677 if (nb > 1) {
678 for (i = 0; i < ROW; ++i) {
679 fasp_smat_inv(diaginv+i*nb2, nb);
680 }
681 }
682 else {
683 for (i = 0; i < ROW; ++i) {
684 // zero-diagonal should be tested previously
685 diaginv[i] = 1.0 / diaginv[i];
686 }
687 }
688 }
689
690 // compute D^{-1}*A
691 if (use_openmp) {
692#ifdef _OPENMP
693#pragma omp parallel for private(myid, mybegin, myend, i, k, m, j, l)
694#endif
695 for (myid = 0; myid < nthreads; myid++) {
696 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
697 for (i = mybegin; i < myend; ++i) {
698 for (k = IA[i]; k < IA[i+1]; ++k) {
699 m = k*nb2;
700 j = JA[k];
701 if (j == i) {
702 // Identity sub-block
703 memset(valb+m, 0X0, nb2*sizeof(REAL));
704 for (l = 0; l < nb; l ++) valb[m+l*nb+l] = 1.0;
705 }
706 else {
707 fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
708 }
709 }
710 }
711 }
712 }
713 else {
714 for (i = 0; i < ROW; ++i) {
715 for (k = IA[i]; k < IA[i+1]; ++k) {
716 m = k*nb2;
717 j = JA[k];
718 if (j == i) {
719 // Identity sub-block
720 memset(valb+m, 0X0, nb2*sizeof(REAL));
721 for (l = 0; l < nb; l ++) valb[m+l*nb+l] = 1.0;
722 }
723 else {
724 fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
725 }
726 }
727 }
728 }
729
730FINISHED:
731 fasp_mem_free(diaginv); diaginv = NULL;
732
733 return (B);
734}
735
752 REAL *diaginv)
753{
754 // members of A
755 const INT ROW = A->ROW;
756 const INT COL = A->COL;
757 const INT NNZ = A->NNZ;
758 const INT nb = A->nb, nbp1 = nb+1;
759 const INT nb2 = nb*nb;
760
761 INT *IA = A->IA;
762 INT *JA = A->JA;
763 REAL *val = A->val;
764
765 dBSRmat B;
766 INT *IAb = NULL;
767 INT *JAb = NULL;
768 REAL *valb = NULL;
769
770 INT i,k,m,l,ibegin,iend;
771
772 // Variables for OpenMP
773 SHORT nthreads = 1, use_openmp = FALSE;
774 INT myid, mybegin, myend;
775
776#ifdef _OPENMP
777 if (ROW > OPENMP_HOLDS) {
778 use_openmp = TRUE;
779 nthreads = fasp_get_num_threads();
780 }
781#endif
782
783 // Create a dBSRmat 'B'
784 B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
785 IAb = B.IA;
786 JAb = B.JA;
787 valb = B.val;
788
789 if (IAb) memcpy(IAb, IA, (ROW+1)*sizeof(INT));
790 else goto FINISHED;
791
792 if (JAb) memcpy(JAb, JA, NNZ*sizeof(INT));
793 else goto FINISHED;
794
795 // compute D^{-1}*A
796 if (use_openmp) {
797#ifdef _OPENMP
798#pragma omp parallel for private (myid, i, mybegin, myend, ibegin, iend, k, m, l)
799#endif
800 for (myid = 0; myid < nthreads; myid++) {
801 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
802 for (i = mybegin; i < myend; ++i) {
803 ibegin = IA[i]; iend = IA[i+1];
804 for (k = ibegin; k < iend; ++k) {
805 m = k*nb2;
806 if (JA[k] != i) {
807 fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
808 }
809 else {
810 // Identity sub-block
811 memset(valb+m, 0X0, nb2*sizeof(REAL));
812 for (l = 0; l < nb; l ++) valb[m+l*nbp1] = 1.0;
813 }
814 }
815 }
816 }
817 }
818 else {
819 // compute D^{-1}*A
820 for (i = 0; i < ROW; ++i) {
821 ibegin = IA[i]; iend = IA[i+1];
822 for (k = ibegin; k < iend; ++k) {
823 m = k*nb2;
824 if (JA[k] != i) {
825 fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
826 }
827 else {
828 // Identity sub-block
829 memset(valb+m, 0X0, nb2*sizeof(REAL));
830 for (l = 0; l < nb; l ++) valb[m+l*nbp1] = 1.0;
831 }
832 }
833 }
834 }
835
836FINISHED:
837 return (B);
838}
839
858 REAL *diaginv)
859{
860 dBSRmat B;
861 // members of A
862 INT ROW = A->ROW;
863 INT ROW_plus_one = ROW+1;
864 INT COL = A->COL;
865 INT NNZ = A->NNZ;
866 INT nb = A->nb;
867 INT *IA = A->IA;
868 INT *JA = A->JA;
869 REAL *val = A->val;
870
871 INT *IAb = NULL;
872 INT *JAb = NULL;
873 REAL *valb = NULL;
874
875 INT nb2 = nb*nb;
876 INT i,j,k,m;
877
878 SHORT use_openmp = FALSE;
879
880#ifdef _OPENMP
881 INT myid, mybegin, myend, stride_i, nthreads = 1;
882 if ( ROW > OPENMP_HOLDS ) {
883 use_openmp = TRUE;
884 nthreads = fasp_get_num_threads();
885 }
886#endif
887
888 // Create a dBSRmat 'B'
889 B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
890
891 IAb = B.IA;
892 JAb = B.JA;
893 valb = B.val;
894
895 fasp_iarray_cp(ROW_plus_one, IA, IAb);
896 fasp_iarray_cp(NNZ, JA, JAb);
897
898 switch (nb) {
899
900 case 2:
901 // main loop
902 if (use_openmp) {
903#ifdef _OPENMP
904 stride_i = ROW/nthreads;
905#pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
906 {
907 myid = omp_get_thread_num();
908 mybegin = myid*stride_i;
909 if (myid < nthreads-1) myend = mybegin+stride_i;
910 else myend = ROW;
911 for (i=mybegin; i < myend; ++i) {
912 // get the diagonal sub-blocks
913
914 k = IA[i];
915 m = k*4;
916 memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
918
919 // compute the inverses of the diagonal sub-blocks
920 fasp_smat_inv_nc2(diaginv+i*4);
921 // compute D^{-1}*A
922 for (k = IA[i]+1; k < IA[i+1]; ++k)
923 {
924 m = k*4;
925 j = JA[k];
926 fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
927 }
928 }// end of main loop
929 }
930#endif
931 }
932 else {
933 // main loop
934 for (i = 0; i < ROW; ++i) {
935 // get the diagonal sub-blocks
936 k = IA[i];
937 m = k*4;
938 memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
940
941 // compute the inverses of the diagonal sub-blocks
942 fasp_smat_inv_nc2(diaginv+i*4);
943 // compute D^{-1}*A
944 for (k = IA[i]+1; k < IA[i+1]; ++k) {
945 m = k*4;
946 fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
947 }
948 }// end of main loop
949 }
950
951 break;
952
953 case 3:
954 // main loop
955 if (use_openmp) {
956#ifdef _OPENMP
957 stride_i = ROW/nthreads;
958#pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
959 {
960 myid = omp_get_thread_num();
961 mybegin = myid*stride_i;
962 if (myid < nthreads-1) myend = mybegin+stride_i;
963 else myend = ROW;
964 for (i=mybegin; i < myend; ++i) {
965 // get the diagonal sub-blocks
966 for (k = IA[i]; k < IA[i+1]; ++k) {
967 if (JA[k] == i) {
968 m = k*9;
969 memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
971 }
972 }
973 // compute the inverses of the diagonal sub-blocks
974 fasp_smat_inv_nc3(diaginv+i*9);
975 // compute D^{-1}*A
976 for (k = IA[i]; k < IA[i+1]; ++k) {
977 m = k*9;
978 j = JA[k];
979 if (j != i) fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
980 }
981 }// end of main loop
982 }
983#endif
984 }
985
986 else {
987 for (i = 0; i < ROW; ++i) {
988 // get the diagonal sub-blocks
989 for (k = IA[i]; k < IA[i+1]; ++k) {
990 if (JA[k] == i) {
991 m = k*9;
992 memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
994 }
995 }
996#if DEBUG_MODE > 0
997 printf("### DEBUG: row, col = %d\n", i);
998#endif
999 // compute the inverses of the diagonal sub-blocks
1000 fasp_smat_inv_nc3(diaginv+i*9);
1001
1002 // compute D^{-1}*A
1003 for (k = IA[i]; k < IA[i+1]; ++k) {
1004 m = k*9;
1005 j = JA[k];
1006 if (j != i) fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
1007 }
1008 }// end of main loop
1009 }
1010
1011 break;
1012
1013 case -5:
1014 // main loop
1015 if (use_openmp) {
1016#ifdef _OPENMP
1017 stride_i = ROW/nthreads;
1018#pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
1019 {
1020 myid = omp_get_thread_num();
1021 mybegin = myid*stride_i;
1022 if (myid < nthreads-1) myend = mybegin+stride_i;
1023 else myend = ROW;
1024 for (i=mybegin; i < myend; ++i) {
1025 // get the diagonal sub-blocks
1026 for (k = IA[i]; k < IA[i+1]; ++k) {
1027 if (JA[k] == i) {
1028 m = k*25;
1029 memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
1030 fasp_smat_identity_nc5(valb+m);
1031 }
1032 }
1033
1034 // compute the inverses of the diagonal sub-blocks
1035 fasp_smat_inv_nc5(diaginv+i*25);
1036
1037 // compute D^{-1}*A
1038 for (k = IA[i]; k < IA[i+1]; ++k) {
1039 m = k*25;
1040 j = JA[k];
1041 if (j != i) fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
1042 }
1043 }// end of main loop
1044 }
1045#endif
1046 }
1047
1048 else {
1049
1050 for (i = 0; i < ROW; ++i) {
1051 // get the diagonal sub-blocks
1052 for (k = IA[i]; k < IA[i+1]; ++k) {
1053 if (JA[k] == i)
1054 {
1055 m = k*25;
1056 memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
1057 fasp_smat_identity_nc5(valb+m);
1058 }
1059 }
1060
1061 // compute the inverses of the diagonal sub-blocks
1062 // fasp_smat_inv_nc5(diaginv+i*25); // Not numerically stable!!! --zcs 04/26/2021
1063 fasp_smat_invp_nc(diaginv + i * 25, 5);
1064
1065#if 0
1066 REAL aa[25], bb[25]; // for debug inverse of diag
1067 for (k = 0; k < 25; k++) bb[k] = diaginv[i * 25 + k]; // before inversion
1068 for (k = 0; k < 25; k++) aa[k] = diaginv[i * 25 + k]; // aftger inversion
1069
1070 printf("### DEBUG: Check inverse matrix...\n");
1071 printf("##----------------------------------------------\n");
1072 printf("## %12.5e %12.5e %12.5e %12.5e %12.5e\n",
1073 bb[0]* aa[0] + bb[1] * aa[5] + bb[2] * aa[10] + bb[3] * aa[15] + bb[4] * aa[20],
1074 bb[0]* aa[1] + bb[1] * aa[6] + bb[2] * aa[11] + bb[3] * aa[16] + bb[4] * aa[21],
1075 bb[0]* aa[2] + bb[1] * aa[7] + bb[2] * aa[12] + bb[3] * aa[17] + bb[4] * aa[22],
1076 bb[0]* aa[3] + bb[1] * aa[8] + bb[2] * aa[13] + bb[3] * aa[18] + bb[4] * aa[23],
1077 bb[0]* aa[4] + bb[1] * aa[9] + bb[3] * aa[14] + bb[3] * aa[19] + bb[4] * aa[24]);
1078 printf("## %12.5e %12.5e %12.5e %12.5e %12.5e\n",
1079 bb[5]* aa[0] + bb[6] * aa[5] + bb[7] * aa[10] + bb[8] * aa[15] + bb[9] * aa[20],
1080 bb[5]* aa[1] + bb[6] * aa[6] + bb[7] * aa[11] + bb[8] * aa[16] + bb[9] * aa[21],
1081 bb[5]* aa[2] + bb[6] * aa[7] + bb[7] * aa[12] + bb[8] * aa[17] + bb[9] * aa[22],
1082 bb[5]* aa[3] + bb[6] * aa[8] + bb[7] * aa[13] + bb[8] * aa[18] + bb[9] * aa[23],
1083 bb[5]* aa[4] + bb[6] * aa[9] + bb[7] * aa[14] + bb[8] * aa[19] + bb[9] * aa[24]);
1084 printf("## %12.5e %12.5e %12.5e %12.5e %12.5e\n",
1085 bb[10]* aa[0] + bb[11] * aa[5] + bb[12] * aa[10] + bb[13] * aa[15] + bb[14] * aa[20],
1086 bb[10]* aa[1] + bb[11] * aa[6] + bb[12] * aa[11] + bb[13] * aa[16] + bb[14] * aa[21],
1087 bb[10]* aa[2] + bb[11] * aa[7] + bb[12] * aa[12] + bb[13] * aa[17] + bb[14] * aa[22],
1088 bb[10]* aa[3] + bb[11] * aa[8] + bb[12] * aa[13] + bb[13] * aa[18] + bb[14] * aa[23],
1089 bb[10]* aa[4] + bb[11] * aa[9] + bb[12] * aa[14] + bb[13] * aa[19] + bb[14] * aa[24]);
1090 printf("## %12.5e %12.5e %12.5e %12.5e %12.5e\n",
1091 bb[15]* aa[0] + bb[16] * aa[5] + bb[17] * aa[10] + bb[18] * aa[15] + bb[19] * aa[20],
1092 bb[15]* aa[1] + bb[16] * aa[6] + bb[17] * aa[11] + bb[18] * aa[16] + bb[19] * aa[21],
1093 bb[15]* aa[2] + bb[16] * aa[7] + bb[17] * aa[12] + bb[18] * aa[17] + bb[19] * aa[22],
1094 bb[15]* aa[3] + bb[16] * aa[8] + bb[17] * aa[13] + bb[18] * aa[18] + bb[19] * aa[23],
1095 bb[15]* aa[4] + bb[16] * aa[9] + bb[17] * aa[14] + bb[18] * aa[19] + bb[19] * aa[24]);
1096 printf("## %12.5e %12.5e %12.5e %12.5e %12.5e\n",
1097 bb[20]* aa[0] + bb[21] * aa[5] + bb[22] * aa[10] + bb[23] * aa[15] + bb[24] * aa[20],
1098 bb[20]* aa[1] + bb[21] * aa[6] + bb[22] * aa[11] + bb[23] * aa[16] + bb[24] * aa[21],
1099 bb[20]* aa[2] + bb[21] * aa[7] + bb[22] * aa[12] + bb[23] * aa[17] + bb[24] * aa[22],
1100 bb[20]* aa[3] + bb[21] * aa[8] + bb[22] * aa[13] + bb[23] * aa[18] + bb[24] * aa[23],
1101 bb[20]* aa[4] + bb[21] * aa[9] + bb[22] * aa[14] + bb[23] * aa[19] + bb[24] * aa[24]);
1102 printf("##----------------------------------------------\n");
1103#endif
1104
1105 // compute D^{-1}*A
1106 for (k = IA[i]; k < IA[i+1]; ++k) {
1107 m = k*25;
1108 j = JA[k];
1109 if (j != i) fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
1110 }
1111 }// end of main loop
1112 }
1113
1114 break;
1115
1116 case -7:
1117 // main loop
1118 if (use_openmp) {
1119#ifdef _OPENMP
1120 stride_i = ROW/nthreads;
1121#pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
1122 {
1123 myid = omp_get_thread_num();
1124 mybegin = myid*stride_i;
1125 if (myid < nthreads-1) myend = mybegin+stride_i;
1126 else myend = ROW;
1127 for (i=mybegin; i < myend; ++i) {
1128 // get the diagonal sub-blocks
1129 for (k = IA[i]; k < IA[i+1]; ++k) {
1130 if (JA[k] == i) {
1131 m = k*49;
1132 memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
1133 fasp_smat_identity_nc7(valb+m);
1134 }
1135 }
1136
1137 // compute the inverses of the diagonal sub-blocks
1138 fasp_smat_inv_nc7(diaginv+i*49);
1139
1140 // compute D^{-1}*A
1141 for (k = IA[i]; k < IA[i+1]; ++k) {
1142 m = k*49;
1143 j = JA[k];
1144 if (j != i) fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1145 }
1146 }// end of main loop
1147 }
1148#endif
1149 }
1150
1151 else {
1152 for (i = 0; i < ROW; ++i) {
1153 // get the diagonal sub-blocks
1154 for (k = IA[i]; k < IA[i+1]; ++k) {
1155 if (JA[k] == i) {
1156 m = k*49;
1157 memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
1158 fasp_smat_identity_nc7(valb+m);
1159 }
1160 }
1161
1162 // compute the inverses of the diagonal sub-blocks
1163 // fasp_smat_inv_nc7(diaginv+i*49); // Not numerically stable!!! --zcs 04/26/2021
1164 fasp_smat_invp_nc(diaginv + i * 49, 7);
1165
1166 // compute D^{-1}*A
1167 for (k = IA[i]; k < IA[i+1]; ++k) {
1168 m = k*49;
1169 j = JA[k];
1170 if (j != i) fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1171 }
1172 }// end of main loop
1173 }
1174
1175 break;
1176
1177 default:
1178 // main loop
1179 if (use_openmp) {
1180#ifdef _OPENMP
1181 stride_i = ROW/nthreads;
1182#pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
1183 {
1184 myid = omp_get_thread_num();
1185 mybegin = myid*stride_i;
1186 if (myid < nthreads-1) myend = mybegin+stride_i;
1187 else myend = ROW;
1188 for (i=mybegin; i < myend; ++i) {
1189 // get the diagonal sub-blocks
1190 for (k = IA[i]; k < IA[i+1]; ++k) {
1191 if (JA[k] == i) {
1192 m = k*nb2;
1193 memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1194 fasp_smat_identity(valb+m, nb, nb2);
1195 }
1196 }
1197
1198 // compute the inverses of the diagonal sub-blocks
1199 fasp_smat_inv(diaginv+i*nb2, nb);
1200
1201 // compute D^{-1}*A
1202 for (k = IA[i]; k < IA[i+1]; ++k) {
1203 m = k*nb2;
1204 j = JA[k];
1205 if (j != i) fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1206 }
1207 }// end of main loop
1208 }
1209#endif
1210 }
1211 else {
1212 for (i = 0; i < ROW; ++i) {
1213 // get the diagonal sub-blocks
1214 for (k = IA[i]; k < IA[i+1]; ++k) {
1215 if (JA[k] == i) {
1216 m = k*nb2;
1217 memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1218 fasp_smat_identity(valb+m, nb, nb2);
1219 }
1220 }
1221
1222 // compute the inverses of the diagonal sub-blocks
1223 // fasp_smat_inv(diaginv+i*nb2, nb); // Not numerically stable!!! --zcs 04/26/2021
1224 fasp_smat_invp_nc(diaginv + i * nb2, nb);
1225
1226 // compute D^{-1}*A
1227 for (k = IA[i]; k < IA[i+1]; ++k) {
1228 m = k*nb2;
1229 j = JA[k];
1230 if (j != i) fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1231 }
1232 } // end of main loop
1233 }
1234
1235 break;
1236 }
1237
1238 return (B);
1239}
1240
1261 REAL *diaginv)
1262{
1263 // members of A
1264 const INT ROW = A->ROW;
1265 const INT COL = A->COL;
1266 const INT NNZ = A->NNZ;
1267 const INT nb = A->nb;
1268 const INT nb2 = nb*nb;
1269 const INT *IA = A->IA;
1270 const INT *JA = A->JA;
1271 REAL *val = A->val;
1272
1273 dBSRmat B;
1274 INT *IAb = NULL;
1275 INT *JAb = NULL;
1276 REAL *valb = NULL;
1277
1278 INT i,k,m;
1279 INT ibegin, iend;
1280
1281 // Variables for OpenMP
1282 SHORT nthreads = 1, use_openmp = FALSE;
1283 INT myid, mybegin, myend;
1284
1285#ifdef _OPENMP
1286 if (ROW > OPENMP_HOLDS) {
1287 use_openmp = TRUE;
1288 nthreads = fasp_get_num_threads();
1289 }
1290#endif
1291
1292 // Create a dBSRmat 'B'
1293 B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
1294
1295 IAb = B.IA;
1296 JAb = B.JA;
1297 valb = B.val;
1298
1299 if (IAb) memcpy(IAb, IA, (ROW+1)*sizeof(INT));
1300 else goto FINISHED;
1301
1302 if (JAb) memcpy(JAb, JA, NNZ*sizeof(INT));
1303 else goto FINISHED;
1304
1305 switch (nb) {
1306
1307 case 2:
1308 if (use_openmp) {
1309#ifdef _openmp
1310#pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1311#endif
1312 for (myid = 0; myid < nthreads; myid++) {
1313 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1314 for (i = mybegin; i < myend; ++i) {
1315 ibegin = IA[i]; iend = IA[i+1];
1316 // get the diagonal sub-blocks (It is the first block of each row)
1317 m = ibegin*4;
1318 memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
1319 fasp_smat_identity_nc2(valb+m);
1320
1321 // compute the inverses of the diagonal sub-blocks
1322 fasp_smat_inv_nc2(diaginv+i*4); // fixed by Zheng Li
1323
1324 // compute D^{-1}*A
1325 for (k = ibegin+1; k < iend; ++k) {
1326 m = k*4;
1327 fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
1328 }
1329 }
1330 }// end of main loop
1331 }
1332 else {
1333 for (i = 0; i < ROW; ++i) {
1334 ibegin = IA[i]; iend = IA[i+1];
1335 // get the diagonal sub-blocks (It is the first block of each row)
1336 m = ibegin*4;
1337 memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
1338 fasp_smat_identity_nc2(valb+m);
1339
1340 // compute the inverses of the diagonal sub-blocks
1341 fasp_smat_inv_nc2(diaginv+i*4); // fixed by Zheng Li
1342
1343 // compute D^{-1}*A
1344 for (k = ibegin+1; k < iend; ++k) {
1345 m = k*4;
1346 fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
1347 }
1348 } // end of main loop
1349 }
1350
1351 break;
1352
1353 case 3:
1354 if (use_openmp) {
1355#ifdef _openmp
1356#pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1357#endif
1358 for (myid = 0; myid < nthreads; myid++) {
1359 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1360 for (i = mybegin; i < myend; ++i) {
1361 ibegin = IA[i]; iend = IA[i+1];
1362 // get the diagonal sub-blocks (It is the first block of each row)
1363 m = ibegin*9;
1364 memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
1365 fasp_smat_identity_nc3(valb+m);
1366 // compute the inverses of the diagonal sub-blocks
1367 fasp_smat_inv_nc3(diaginv+i*9);
1368 // compute D^{-1}*A
1369 for (k = ibegin+1; k < iend; ++k) {
1370 m = k*9;
1371 fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
1372 }
1373 }
1374 }// end of main loop
1375 }
1376 else {
1377 for (i = 0; i < ROW; ++i) {
1378 ibegin = IA[i]; iend = IA[i+1];
1379 // get the diagonal sub-blocks (It is the first block of each row)
1380 m = ibegin*9;
1381 memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
1382 fasp_smat_identity_nc3(valb+m);
1383
1384 // compute the inverses of the diagonal sub-blocks
1385 fasp_smat_inv_nc3(diaginv+i*9);
1386
1387 // compute D^{-1}*A
1388 for (k = ibegin+1; k < iend; ++k) {
1389 m = k*9;
1390 fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
1391 }
1392 }// end of main loop
1393 }
1394
1395 break;
1396
1397 case 5:
1398 if (use_openmp) {
1399#ifdef _OPENMP
1400#pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1401#endif
1402 for (myid = 0; myid < nthreads; myid++) {
1403 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1404 for (i = mybegin; i < myend; ++i) {
1405 // get the diagonal sub-blocks
1406 ibegin = IA[i]; iend = IA[i+1];
1407 m = ibegin*25;
1408 memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
1409 fasp_smat_identity_nc5(valb+m);
1410
1411 // compute the inverses of the diagonal sub-blocks
1412 fasp_smat_inv_nc5(diaginv+i*25);
1413
1414 // compute D^{-1}*A
1415 for (k = ibegin+1; k < iend; ++k) {
1416 m = k*25;
1417 fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
1418 }
1419 }
1420 }
1421 }
1422 else {
1423 for (i = 0; i < ROW; ++i) {
1424 // get the diagonal sub-blocks
1425 ibegin = IA[i]; iend = IA[i+1];
1426 m = ibegin*25;
1427 memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
1428 fasp_smat_identity_nc5(valb+m);
1429
1430 // compute the inverses of the diagonal sub-blocks
1431 fasp_smat_inv_nc5(diaginv+i*25);
1432
1433 // compute D^{-1}*A
1434 for (k = ibegin+1; k < iend; ++k) {
1435 m = k*25;
1436 fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
1437 }
1438 }// end of main loop
1439 }
1440 break;
1441
1442 case 7:
1443 if (use_openmp) {
1444#ifdef _OPENMP
1445#pragma omp parallel for private(myid, i, mybegin, myend, ibegin, iend, m, k)
1446#endif
1447 for (myid = 0; myid < nthreads; myid++) {
1448 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1449 for (i = mybegin; i < myend; ++i) {
1450 // get the diagonal sub-blocks
1451 ibegin = IA[i]; iend = IA[i+1];
1452 m = ibegin*49;
1453 memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
1454 fasp_smat_identity_nc7(valb+m);
1455
1456 // compute the inverses of the diagonal sub-blocks
1457 fasp_smat_inv_nc7(diaginv+i*49);
1458
1459 // compute D^{-1}*A
1460 for (k = ibegin+1; k < iend; ++k) {
1461 m = k*49;
1462 fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1463 }
1464 }
1465 }// end of main loop
1466 }
1467 else {
1468 for (i = 0; i < ROW; ++i) {
1469 // get the diagonal sub-blocks
1470 ibegin = IA[i]; iend = IA[i+1];
1471 m = ibegin*49;
1472 memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
1473 fasp_smat_identity_nc7(valb+m);
1474
1475 // compute the inverses of the diagonal sub-blocks
1476 fasp_smat_inv_nc7(diaginv+i*49);
1477
1478 // compute D^{-1}*A
1479 for (k = ibegin+1; k < iend; ++k) {
1480 m = k*49;
1481 fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1482 }
1483 }// end of main loop
1484 }
1485
1486 break;
1487
1488 default:
1489 if (use_openmp) {
1490#ifdef _OPENMP
1491#pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1492#endif
1493 for (myid = 0; myid < nthreads; myid++) {
1494 fasp_get_start_end(myid, nthreads, ROW, &mybegin, &myend);
1495 for (i = mybegin; i < myend; ++i) {
1496 // get the diagonal sub-blocks
1497 ibegin = IA[i]; iend = IA[i+1];
1498 m = ibegin*nb2;
1499 memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1500 fasp_smat_identity(valb+m, nb, nb2);
1501
1502 // compute the inverses of the diagonal sub-blocks
1503 fasp_smat_inv(diaginv+i*nb2, nb);
1504
1505 // compute D^{-1}*A
1506 for (k = ibegin+1; k < iend; ++k) {
1507 m = k*nb2;
1508 fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1509 }
1510 }
1511 }// end of main loop
1512 }
1513 else {
1514 for (i = 0; i < ROW; ++i) {
1515 // get the diagonal sub-blocks
1516 ibegin = IA[i]; iend = IA[i+1];
1517 m = ibegin*nb2;
1518 memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1519 fasp_smat_identity(valb+m, nb, nb2);
1520
1521 // compute the inverses of the diagonal sub-blocks
1522 fasp_smat_inv(diaginv+i*nb2, nb);
1523
1524 // compute D^{-1}*A
1525 for (k = ibegin+1; k < iend; ++k) {
1526 m = k*nb2;
1527 fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1528 }
1529 } // end of main loop
1530 }
1531
1532 break;
1533 }
1534
1535FINISHED:
1536 return (B);
1537}
1538
1556 const dBSRmat *A,
1557 REAL *diag )
1558{
1559 const INT nb2 = A->nb*A->nb;
1560
1561 INT i,k;
1562
1563 if ( n==0 || n>A->ROW || n>A->COL ) n = MIN(A->ROW,A->COL);
1564
1565#ifdef _OPENMP
1566#pragma omp parallel for private(i,k) if(n>OPENMP_HOLDS)
1567#endif
1568 for (i = 0; i < n; ++i) {
1569 for (k = A->IA[i]; k < A->IA[i+1]; ++k) {
1570 if (A->JA[k] == i) {
1571 memcpy(diag+i*nb2, A->val+k*nb2, nb2*sizeof(REAL));
1572 break;
1573 }
1574 }
1575 }
1576}
1577
1594 REAL *DL,
1595 REAL *DU)
1596{
1597
1598 // members of A
1599 const INT ROW = A->ROW;
1600 const INT ROW_plus_one = ROW+1;
1601 const INT COL = A->COL;
1602 const INT NNZ = A->NNZ;
1603 const INT nb = A->nb;
1604
1605 const INT *IA = A->IA;
1606 const INT *JA = A->JA;
1607 const REAL *val = A->val;
1608
1609 INT *IAb = NULL;
1610 INT *JAb = NULL;
1611 REAL *valb = NULL;
1612
1613 INT nb2 = nb*nb;
1614 INT i, j, k;
1615
1616 // Create a dBSRmat 'B'
1617 dBSRmat B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
1618
1619 IAb = B.IA;
1620 JAb = B.JA;
1621 valb = B.val;
1622
1623 fasp_iarray_cp(ROW_plus_one, IA, IAb);
1624 fasp_iarray_cp(NNZ, JA, JAb);
1625
1626 // work array
1627 REAL *temp = (REAL *)fasp_mem_calloc(nb2, sizeof(REAL));
1628
1629 // get DL and DU
1630 switch (nb) {
1631
1632 case 2:
1633
1634 for (i=0; i<ROW; i++){
1635
1636 for (j=IA[i]; j<IA[i+1]; j++){
1637
1638 if (JA[j] == i){
1639
1640 temp[0] = val[j*nb2];
1641 temp[1] = val[j*nb2+1];
1642 temp[2] = val[j*nb2+2];
1643 temp[3] = val[j*nb2+3];
1644
1645 // form DL
1646 DL[i*nb2] = 1.0;
1647 DL[i*nb2+1] = 0.0;
1648 DL[i*nb2+2] = -temp[2]/temp[0];
1649 DL[i*nb2+3] = 1.0;
1650 //DL[i*nb2+2] = -temp[2]/(temp[0]*s);
1651 //DL[i*nb2+3] = 1.0/s;
1652
1653 // form DU
1654 DU[i*nb2] = 1.0;
1655 DU[i*nb2+1] = -temp[1]/temp[0];
1656 DU[i*nb2+2] = 0.0;
1657 DU[i*nb2+3] = 1.0;
1658
1659 break;
1660
1661 } // end of if (JA[j] == i)
1662
1663 } // end of for (j=IA[i]; j<IA[i+1]; j++)
1664
1665 } // end of for (i=0; i<ROW; i++)
1666
1667 break;
1668
1669 case 3:
1670
1671 for (i=0; i<ROW; i++){
1672
1673 for (j=IA[i]; j<IA[i+1]; j++){
1674
1675 if (JA[j] == i){
1676
1677 temp[0] = val[j*nb2];
1678 temp[1] = val[j*nb2+1];
1679 temp[2] = val[j*nb2+2];
1680 temp[3] = val[j*nb2+3];
1681 temp[4] = val[j*nb2+4];
1682 temp[5] = val[j*nb2+5];
1683 temp[6] = val[j*nb2+6];
1684 temp[7] = val[j*nb2+7];
1685 temp[8] = val[j*nb2+8];
1686
1687 // some auxiliry variables
1688 REAL s22 = temp[4] - ((temp[1]*temp[3])/temp[0]);
1689 REAL s23 = temp[5] - ((temp[2]*temp[3])/temp[0]);
1690 REAL s32 = temp[7] - ((temp[1]*temp[6])/temp[0]);
1691
1692 // form DL
1693 DL[i*nb2] = 1.0;
1694 DL[i*nb2+1] = 0.0;
1695 DL[i*nb2+2] = 0.0;
1696 DL[i*nb2+3] = -temp[3]/temp[0];
1697 DL[i*nb2+4] = 1.0;
1698 DL[i*nb2+5] = 0.0;
1699 DL[i*nb2+6] = -temp[6]/temp[0] + (temp[3]/temp[0])*(s32/s22);
1700 DL[i*nb2+7] = -s32/s22;
1701 DL[i*nb2+8] = 1.0;
1702
1703 // form DU
1704 DU[i*nb2] = 1.0;
1705 DU[i*nb2+1] = -temp[1]/temp[0];
1706 DU[i*nb2+2] = -temp[2]/temp[0] + (temp[1]/temp[0])*(s23/s22);
1707 DU[i*nb2+3] = 0.0;
1708 DU[i*nb2+4] = 1.0;
1709 DU[i*nb2+5] = -s23/s22;
1710 DU[i*nb2+6] = 0.0;
1711 DU[i*nb2+7] = 0.0;
1712 DU[i*nb2+8] = 1.0;
1713
1714 break;
1715
1716 } // end of if (JA[j] == i)
1717
1718 } // end of for (j=IA[i]; j<IA[i+1]; j++)
1719
1720 } // end of for (i=0; i<ROW; i++)
1721
1722 break;
1723
1724 default:
1725 printf("### ERROR: Only works for nb = 2, 3! [%s]\n", __FUNCTION__);
1726 break;
1727
1728 } // end of switch
1729
1730 // compute B = DL*A*DU
1731 switch (nb) {
1732
1733 case 2:
1734
1735 for (i=0; i<ROW; i++){
1736
1737 for (j=IA[i]; j<IA[i+1]; j++){
1738
1739 k = JA[j];
1740
1741 // left multiply DL
1742 fasp_blas_smat_mul_nc2(DL+i*nb2, val+j*nb2, temp);
1743
1744 // right multiply DU
1745 fasp_blas_smat_mul_nc2(temp, DU+k*nb2, valb+j*nb2);
1746
1747 // for diagonal block, set it to be diagonal matrix
1748 if (JA[j] == i){
1749
1750 valb[j*nb2+1] = 0.0;
1751 valb[j*nb2+2] = 0.0;
1752
1753 } // end if (JA[j] == i)
1754
1755
1756 } // end for (j=IA[i]; j<IA[i+1]; j++)
1757
1758 } // end of for (i=0; i<ROW; i++)
1759
1760 break;
1761
1762 case 3:
1763
1764 for (i=0; i<ROW; i++){
1765
1766 for (j=IA[i]; j<IA[i+1]; j++){
1767
1768 k = JA[j];
1769
1770 // left multiply DL
1771 fasp_blas_smat_mul_nc3(DL+i*nb2, val+j*nb2, temp);
1772
1773 // right multiply DU
1774 fasp_blas_smat_mul_nc3(temp, DU+k*nb2, valb+j*nb2);
1775
1776 // for diagonal block, set it to be diagonal matrix
1777 if (JA[j] == i){
1778
1779 valb[j*nb2+1] = 0.0;
1780 valb[j*nb2+2] = 0.0;
1781 valb[j*nb2+3] = 0.0;
1782 valb[j*nb2+5] = 0.0;
1783 valb[j*nb2+6] = 0.0;
1784 valb[j*nb2+7] = 0.0;
1785 if (ABS(valb[j*nb2+4]) < SMALLREAL) valb[j*nb2+4] = SMALLREAL;
1786 if (ABS(valb[j*nb2+8]) < SMALLREAL) valb[j*nb2+8] = SMALLREAL;
1787
1788 } // end if (JA[j] == i)
1789
1790 } // end for (j=IA[i]; j<IA[i+1]; j++)
1791
1792 } // end of for (i=0; i<ROW; i++)
1793
1794 break;
1795
1796 default:
1797 printf("### ERROR: Only works for nb = 2, 3! [%s]\n", __FUNCTION__);
1798 break;
1799
1800 }
1801
1802 // return
1803 return B;
1804
1805}
1806
1823 REAL *DL,
1824 REAL *DU)
1825{
1826
1827 // members of A
1828 INT ROW = A->ROW;
1829 INT ROW_plus_one = ROW+1;
1830 INT COL = A->COL;
1831 INT NNZ = A->NNZ;
1832 INT nb = A->nb;
1833 INT *IA = A->IA;
1834 INT *JA = A->JA;
1835 REAL *val = A->val;
1836
1837 INT *IAb = NULL;
1838 INT *JAb = NULL;
1839 REAL *valb = NULL;
1840
1841 INT nb2 = nb*nb;
1842 INT i,j,k;
1843
1844 REAL sqt3, sqt4, sqt8;
1845
1846 // Create a dBSRmat 'B'
1847 dBSRmat B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
1848
1849 REAL *temp = (REAL *)fasp_mem_calloc(nb*nb, sizeof(REAL));
1850
1851 IAb = B.IA;
1852 JAb = B.JA;
1853 valb = B.val;
1854
1855 fasp_iarray_cp(ROW_plus_one, IA, IAb);
1856 fasp_iarray_cp(NNZ, JA, JAb);
1857
1858 // get DL and DU
1859 switch (nb) {
1860 case 2:
1861 for (i=0; i<ROW; i++){
1862 for (j=IA[i]; j<IA[i+1]; j++){
1863 if (JA[j] == i){
1864 REAL temp0 = val[j*nb2];
1865 REAL temp1 = val[j*nb2+1];
1866 REAL temp2 = val[j*nb2+2];
1867 REAL temp3 = val[j*nb2+3];
1868
1869 if (ABS(temp3) < SMALLREAL) temp3 = SMALLREAL;
1870
1871 sqt3 = sqrt(ABS(temp3));
1872
1873 // form DL
1874 DL[i*nb2] = 1.0;
1875 DL[i*nb2+1] = 0.0;
1876 DL[i*nb2+2] = -temp2/temp0/sqt3;
1877 DL[i*nb2+3] = 1.0/sqt3;
1878
1879 // form DU
1880 DU[i*nb2] = 1.0;
1881 DU[i*nb2+1] = -temp1/temp0/sqt3;
1882 DU[i*nb2+2] = 0.0;
1883 DU[i*nb2+3] = 1.0/sqt3;
1884 break;
1885
1886 }
1887 }
1888 }
1889
1890 break;
1891
1892 case 3:
1893 for (i=0; i<ROW; i++){
1894 for (j=IA[i]; j<IA[i+1]; j++){
1895 if (JA[j] == i){
1896 REAL temp0 = val[j*nb2];
1897 REAL temp1 = val[j*nb2+1];
1898 REAL temp2 = val[j*nb2+2];
1899 REAL temp3 = val[j*nb2+3];
1900 REAL temp4 = val[j*nb2+4];
1901 REAL temp5 = val[j*nb2+5];
1902 REAL temp6 = val[j*nb2+6];
1903 REAL temp7 = val[j*nb2+7];
1904 REAL temp8 = val[j*nb2+8];
1905
1906 if (ABS(temp4) < SMALLREAL) temp4 = SMALLREAL;
1907 if (ABS(temp8) < SMALLREAL) temp8 = SMALLREAL;
1908
1909 sqt4 = sqrt(ABS(temp4));
1910 sqt8 = sqrt(ABS(temp8));
1911
1912 // some auxiliary variables
1913 REAL s22 = temp4 - ((temp1*temp3)/temp0);
1914 REAL s23 = temp5 - ((temp2*temp3)/temp0);
1915 REAL s32 = temp7 - ((temp1*temp6)/temp0);
1916
1917 // form DL
1918 DL[i*nb2] = 1.0;
1919 DL[i*nb2+1] = 0.0;
1920 DL[i*nb2+2] = 0.0;
1921 DL[i*nb2+3] = -temp3/temp0/sqt4;
1922 DL[i*nb2+4] = 1.0/sqt4;
1923 DL[i*nb2+5] = 0.0;
1924 DL[i*nb2+6] = (-temp6/temp0 + (temp3/temp0)*(s32/s22))/sqt8;
1925 DL[i*nb2+7] = -s32/s22/sqt8;
1926 DL[i*nb2+8] = 1.0/sqt8;
1927
1928 // form DU
1929 DU[i*nb2] = 1.0;
1930 DU[i*nb2+1] = -temp1/temp0/sqt4;
1931 DU[i*nb2+2] = (-temp2/temp0 + (temp1/temp0)*(s23/s22))/sqt8;
1932 DU[i*nb2+3] = 0.0;
1933 DU[i*nb2+4] = 1.0/sqt4;
1934 DU[i*nb2+5] = -s23/s22/sqt8;
1935 DU[i*nb2+6] = 0.0;
1936 DU[i*nb2+7] = 0.0;
1937 DU[i*nb2+8] = 1.0/sqt8;
1938
1939 break;
1940
1941 }
1942 }
1943 }
1944
1945 break;
1946
1947 default:
1948 printf("### ERROR: Only works for nb = 2, 3! [%s]\n", __FUNCTION__);
1949 break;
1950
1951 } // end of switch
1952
1953 // compute B = DL*A*DU
1954 switch (nb) {
1955
1956 case 2:
1957 for (i=0; i<ROW; i++){
1958 for (j=IA[i]; j<IA[i+1]; j++){
1959 k = JA[j];
1960 // left multiply DL
1961 fasp_blas_smat_mul_nc2(DL+i*nb2, val+j*nb2, temp);
1962 // right multiply DU
1963 fasp_blas_smat_mul_nc2(temp, DU+k*nb2, valb+j*nb2);
1964 // for diagonal block, set it to be diagonal matrix
1965 if (JA[j] == i){
1966 valb[j*nb2+1] = 0.0;
1967 valb[j*nb2+2] = 0.0;
1968 if (ABS(valb[j*nb2+3]) < SMALLREAL) valb[j*nb2+3] = SMALLREAL;
1969 }
1970 }
1971 }
1972
1973 break;
1974
1975 case 3:
1976 for (i=0; i<ROW; i++){
1977 for (j=IA[i]; j<IA[i+1]; j++){
1978 k = JA[j];
1979 // left multiply DL
1980 fasp_blas_smat_mul_nc3(DL+i*nb2, val+j*nb2, temp);
1981 // right multiply DU
1982 fasp_blas_smat_mul_nc3(temp, DU+k*nb2, valb+j*nb2);
1983 // for diagonal block, set it to be diagonal matrix
1984 if (JA[j] == i){
1985 valb[j*nb2+1] = 0.0;
1986 valb[j*nb2+2] = 0.0;
1987 valb[j*nb2+3] = 0.0;
1988 valb[j*nb2+5] = 0.0;
1989 valb[j*nb2+6] = 0.0;
1990 valb[j*nb2+7] = 0.0;
1991 if (ABS(valb[j*nb2+4]) < SMALLREAL) valb[j*nb2+4] = SMALLREAL;
1992 if (ABS(valb[j*nb2+8]) < SMALLREAL) valb[j*nb2+8] = SMALLREAL;
1993 }
1994 }
1995 }
1996 break;
1997
1998 default:
1999 printf("### ERROR: Only works for nb = 2, 3! [%s]\n", __FUNCTION__);
2000 break;
2001 }
2002
2003 // return
2004 return B;
2005
2006}
2007
2024 const INT *P)
2025{
2026 const INT n = A->ROW, nnz = A->NNZ;
2027 const INT *ia= A->IA, *ja = A->JA;
2028 const REAL *Aval = A->val;
2029 const INT nb = A->nb, nb2 = nb*nb;
2030 const INT manner = A->storage_manner;
2031 SHORT nthreads = 1, use_openmp = FALSE;
2032
2033 INT i,j,k,jaj,i1,i2,start,jj;
2034
2035#ifdef _OPENMP
2036 if ( MIN(n, nnz) > OPENMP_HOLDS ) {
2037 use_openmp = 0;//TRUE;
2038 nthreads = fasp_get_num_threads();
2039 }
2040#endif
2041
2042 dBSRmat Aperm = fasp_dbsr_create(n,n,nnz,nb,manner);
2043
2044 // form the transpose of P
2045 INT *Pt = (INT*)fasp_mem_calloc(n,sizeof(INT));
2046
2047 if (use_openmp) {
2048 INT myid, mybegin, myend;
2049#ifdef _OPENMP
2050#pragma omp parallel for private(myid, mybegin, myend, i)
2051#endif
2052 for (myid=0; myid<nthreads; ++myid) {
2053 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
2054 for (i=mybegin; i<myend; ++i) Pt[P[i]] = i;
2055 }
2056 }
2057 else {
2058 for (i=0; i<n; ++i) Pt[P[i]] = i;
2059 }
2060
2061 // compute IA of P*A (row permutation)
2062 Aperm.IA[0] = 0;
2063 for (i=0; i<n; ++i) {
2064 k = P[i];
2065 Aperm.IA[i+1] = Aperm.IA[i]+(ia[k+1]-ia[k]);
2066 }
2067
2068 // perform actual P*A
2069 if (use_openmp) {
2070 INT myid, mybegin, myend;
2071#ifdef _OPENMP
2072#pragma omp parallel for private(myid, mybegin, myend, i, i1, i2, k, start, j, jaj, jj)
2073#endif
2074 for (myid=0; myid<nthreads; ++myid) {
2075 fasp_get_start_end(myid, nthreads, n, &mybegin, &myend);
2076 for (i=mybegin; i<myend; ++i) {
2077 i1 = Aperm.IA[i]; i2 = Aperm.IA[i+1]-1;
2078 k = P[i];
2079 start = ia[k];
2080 for (j=i1; j<=i2; ++j) {
2081 jaj = start+j-i1;
2082 Aperm.JA[j] = ja[jaj];
2083 for (jj=0; jj<nb2;++jj)
2084 Aperm.val[j*nb2+jj] = Aval[jaj*nb2+jj];
2085 }
2086 }
2087 }
2088 }
2089 else {
2090 for (i=0; i<n; ++i) {
2091 i1 = Aperm.IA[i]; i2 = Aperm.IA[i+1]-1;
2092 k = P[i];
2093 start = ia[k];
2094 for (j=i1; j<=i2; ++j) {
2095 jaj = start+j-i1;
2096 Aperm.JA[j] = ja[jaj];
2097 for (jj=0; jj<nb2;++jj)
2098 Aperm.val[j*nb2+jj] = Aval[jaj*nb2+jj];
2099 }
2100 }
2101 }
2102
2103 // perform P*A*P' (column permutation)
2104 if (use_openmp) {
2105 INT myid, mybegin, myend;
2106#ifdef _OPENMP
2107#pragma omp parallel for private(myid, mybegin, myend, k, j)
2108#endif
2109 for (myid=0; myid<nthreads; ++myid) {
2110 fasp_get_start_end(myid, nthreads, nnz, &mybegin, &myend);
2111 for (k=mybegin; k<myend; ++k) {
2112 j = Aperm.JA[k];
2113 Aperm.JA[k] = Pt[j];
2114 }
2115 }
2116 }
2117 else {
2118 for (k=0; k<nnz; ++k) {
2119 j = Aperm.JA[k];
2120 Aperm.JA[k] = Pt[j];
2121 }
2122 }
2123
2124 fasp_mem_free(Pt); Pt = NULL;
2125
2126 return(Aperm);
2127}
2128
2142{
2143 INT count = 0;
2144 const INT num_rowsA = A -> ROW;
2145 const INT nb = A->nb;
2146 const INT nb2 = nb*nb;
2147 INT *A_i = A -> IA;
2148 INT *A_j = A -> JA;
2149 REAL *A_data = A -> val;
2150
2151 INT i, ii, j, jj, ibegin, iend, iend1;
2152
2153#ifdef _OPENMP
2154 // variables for OpenMP
2155 INT myid, mybegin, myend;
2156 INT nthreads = fasp_get_num_threads();
2157#endif
2158
2159#ifdef _OPENMP
2160 if (num_rowsA > OPENMP_HOLDS) {
2161#pragma omp parallel for private (myid,mybegin,myend,i,ii,j,jj,ibegin,iend,iend1)
2162 for (myid = 0; myid < nthreads; myid++) {
2163 fasp_get_start_end(myid, nthreads, num_rowsA, &mybegin, &myend);
2164 for (i = mybegin; i < myend; i++) {
2165 ibegin = A_i[i]; iend = A_i[i+1]; iend1 = iend-1;
2166 for (j = ibegin; j < iend1; j ++) {
2167 if (A_j[j] > -1) {
2168 for (jj = j+1; jj < iend; jj ++) {
2169 if (A_j[j] == A_j[jj]) {
2170 // add jj col to j
2171 for ( ii=0; ii <nb2; ii++ )
2172 A_data[j*nb2 +ii] += A_data[ jj*nb2+ii];
2173 A_j[jj] = -1;
2174 count ++;
2175 }
2176 }
2177 }
2178 }
2179 }
2180 }
2181 }
2182 else {
2183#endif
2184 for (i = 0; i < num_rowsA; i ++) {
2185 ibegin = A_i[i]; iend = A_i[i+1]; iend1 = iend-1;
2186 for (j = ibegin; j < iend1; j ++) {
2187 if (A_j[j] > -1) {
2188 for (jj = j+1; jj < iend; jj ++) {
2189 if (A_j[j] == A_j[jj]) {
2190 // add jj col to j
2191 for ( ii=0; ii <nb2; ii++ )
2192 A_data[j*nb2 +ii] += A_data[ jj*nb2+ii];
2193 printf("### WARNING: Same col indices at %d, col %d (%d %d)!\n",
2194 i, A_j[j], j, jj );
2195 A_j[jj] = -1;
2196 count ++;
2197 }
2198 }
2199 }
2200 }
2201 }
2202#ifdef _OPENMP
2203 }
2204#endif
2205
2206 if ( count > 0 ) {
2207 INT *tempA_i = (INT*)fasp_mem_calloc(num_rowsA+1, sizeof(INT));
2208 memcpy(tempA_i, A_i, (num_rowsA+1 )*sizeof(INT));
2209 jj = 0; A_i[0] = jj;
2210 for (i = 0; i < num_rowsA; i ++) {
2211 ibegin = tempA_i[i]; iend = tempA_i[i+1];
2212 for (j = ibegin; j < iend; j ++) {
2213 if (A_j[j] > -1 ) {
2214 memcpy(A_data +jj*nb2, A_data+j*nb2, (nb2)*sizeof(REAL));
2215 A_j[jj] = A_j[j];
2216 jj++;
2217 }
2218 }
2219 A_i[i+1] = jj;
2220 }
2221 A-> NNZ = jj;
2222 fasp_mem_free(tempA_i); tempA_i = NULL;
2223
2224 printf("### WARNING: %d col indices have been merged!\n", count);
2225 }
2226
2227 return count;
2228}
2229
2230/*---------------------------------*/
2231/*-- End of File --*/
2232/*---------------------------------*/
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
Definition: AuxArray.c:103
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
Definition: AuxArray.c:184
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:155
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:92
void fasp_smat_inv_nc3(REAL *a)
Compute the inverse matrix of a 3*3 full matrix A (in place)
void fasp_smat_identity_nc7(REAL *a)
Set a 7*7 full matrix to be a identity.
void fasp_smat_identity_nc3(REAL *a)
Set a 3*3 full matrix to be a identity.
SHORT fasp_smat_invp_nc(REAL *a, const INT n)
Compute the inverse of a matrix using Gauss Elimination with Pivoting.
void fasp_smat_inv_nc7(REAL *a)
Compute the inverse matrix of a 7*7 matrix a.
void fasp_smat_inv_nc2(REAL *a)
Compute the inverse matrix of a 2*2 full matrix A (in place)
SHORT fasp_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
void fasp_smat_identity_nc2(REAL *a)
Set a 2*2 full matrix to be a identity.
void fasp_smat_inv_nc5(REAL *a)
Compute the inverse matrix of a 5*5 full matrix A (in place)
void fasp_smat_identity_nc5(REAL *a)
Set a 5*5 full matrix to be a identity.
void fasp_smat_identity(REAL *a, const INT n, const INT n2)
Set a n*n full matrix to be a identity.
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
Definition: BlaSmallMat.c:540
void fasp_blas_smat_mul_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 2* matrices a and b, stored in c.
Definition: BlaSmallMat.c:275
void fasp_blas_smat_mul_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 3*3 matrices a and b, stored in c.
Definition: BlaSmallMat.c:304
void fasp_blas_smat_mul_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 7*7 matrices a and b, stored in c.
Definition: BlaSmallMat.c:447
void fasp_blas_smat_mul_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 5*5 matrices a and b, stored in c.
Definition: BlaSmallMat.c:388
dBSRmat fasp_dbsr_create(const INT ROW, const INT COL, const INT NNZ, const INT nb, const INT storage_manner)
Create BSR sparse matrix data memory space.
Definition: BlaSparseBSR.c:45
dBSRmat fasp_dbsr_diaginv(const dBSRmat *A)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
Definition: BlaSparseBSR.c:591
INT fasp_dbsr_trans(const dBSRmat *A, dBSRmat *AT)
Find A^T from given dBSRmat matrix A.
Definition: BlaSparseBSR.c:199
SHORT fasp_dbsr_getblk(const dBSRmat *A, const INT *Is, const INT *Js, const INT m, const INT n, dBSRmat *B)
Get a sub BSR matrix of A with specified rows and columns.
Definition: BlaSparseBSR.c:287
void fasp_dbsr_getdiag(INT n, const dBSRmat *A, REAL *diag)
Abstract the diagonal blocks of a BSR matrix.
dBSRmat fasp_dbsr_diagLU(const dBSRmat *A, REAL *DL, REAL *DU)
Compute B := DL*A*DU. We decompose each diagonal block of A into LDU form and DL = diag(L^{-1}) and D...
void fasp_dbsr_cp(const dBSRmat *A, dBSRmat *B)
copy a dCSRmat to a new one B=A
Definition: BlaSparseBSR.c:172
dBSRmat fasp_dbsr_diagLU2(dBSRmat *A, REAL *DL, REAL *DU)
Compute B := DL*A*DU. We decompose each diagonal block of A into LDU form and DL = diag(L^{-1}) and D...
dBSRmat fasp_dbsr_perm(const dBSRmat *A, const INT *P)
Apply permutation of A, i.e. Aperm=PAP' by the orders given in P.
void fasp_dbsr_alloc(const INT ROW, const INT COL, const INT NNZ, const INT nb, const INT storage_manner, dBSRmat *A)
Allocate memory space for BSR format sparse matrix.
Definition: BlaSparseBSR.c:99
void fasp_dbsr_free(dBSRmat *A)
Free memory space for BSR format sparse matrix.
Definition: BlaSparseBSR.c:146
dBSRmat fasp_dbsr_diaginv2(const dBSRmat *A, REAL *diaginv)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
Definition: BlaSparseBSR.c:751
SHORT fasp_dbsr_diagpref(dBSRmat *A)
Reorder the column and data arrays of a square BSR matrix, so that the first entry in each row is the...
Definition: BlaSparseBSR.c:385
dBSRmat fasp_dbsr_diaginv4(const dBSRmat *A, REAL *diaginv)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
INT fasp_dbsr_merge_col(dBSRmat *A)
Check and merge some same col index in one row.
dBSRmat fasp_dbsr_diaginv3(const dBSRmat *A, REAL *diaginv)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
Definition: BlaSparseBSR.c:857
dvector fasp_dbsr_getdiaginv(const dBSRmat *A)
Get D^{-1} of matrix A.
Definition: BlaSparseBSR.c:486
Main header file for the FASP project.
#define MIN(a, b)
Definition: fasp.h:75
#define REAL
Definition: fasp.h:67
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
#define ABS(a)
Definition: fasp.h:76
#define INT
Definition: fasp.h:64
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:19
#define OPENMP_HOLDS
Definition: fasp_const.h:260
#define TRUE
Definition of logic type.
Definition: fasp_const.h:61
#define FALSE
Definition: fasp_const.h:62
#define ERROR_INPUT_PAR
Definition: fasp_const.h:24
#define SMALLREAL
Definition: fasp_const.h:249
Block sparse row storage matrix of REAL type.
Definition: fasp_block.h:34
INT COL
number of cols of sub-blocks in matrix A, N
Definition: fasp_block.h:40
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
Definition: fasp_block.h:43
REAL * val
Definition: fasp_block.h:57
INT nb
dimension of each sub-block
Definition: fasp_block.h:46
INT * IA
integer array of row pointers, the size is ROW+1
Definition: fasp_block.h:60
INT ROW
number of rows of sub-blocks in matrix A, M
Definition: fasp_block.h:37
INT * JA
Definition: fasp_block.h:64
INT storage_manner
storage manner for each sub-block
Definition: fasp_block.h:49
Vector with n entries of REAL type.
Definition: fasp.h:346
REAL * val
actual vector entries
Definition: fasp.h:352
INT row
number of rows
Definition: fasp.h:349