Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
BlaSpmvCSR.c
Go to the documentation of this file.
1
24#include <math.h>
25#include <time.h>
26
27#ifdef _OPENMP
28#include <omp.h>
29#endif
30
31#include "fasp.h"
32#include "fasp_functs.h"
33
34extern unsigned long total_alloc_mem;
35extern unsigned long total_alloc_count;
37/*---------------------------------*/
38/*-- Public Functions --*/
39/*---------------------------------*/
40
60SHORT fasp_blas_dcsr_add(const dCSRmat* A, const REAL alpha, const dCSRmat* B,
61 const REAL beta, dCSRmat* C)
62{
63 INT i, j, k, l;
64 INT count = 0, added, countrow;
65
66 SHORT status = FASP_SUCCESS, use_openmp = FALSE;
67
68#ifdef _OPENMP
69 INT mybegin, myend, myid, nthreads;
70 if (A->nnz > OPENMP_HOLDS) {
71 use_openmp = TRUE;
72 nthreads = fasp_get_num_threads();
73 }
74#endif
75
76 if (A->row != B->row || A->col != B->col) {
77 printf("### ERROR: Matrix sizes do not match!\n");
78 status = ERROR_MAT_SIZE;
79 goto FINISHED;
80 }
81
82 if (A == NULL && B == NULL) {
83 C->row = 0;
84 C->col = 0;
85 C->nnz = 0;
86 status = FASP_SUCCESS;
87 goto FINISHED;
88 }
89
90 if (A->nnz == 0 && B->nnz == 0) {
91 C->row = A->row;
92 C->col = A->col;
93 C->nnz = A->nnz;
94 status = FASP_SUCCESS;
95 goto FINISHED;
96 }
97
98 // empty matrix A
99 if (A->nnz == 0 || A == NULL) {
100 fasp_dcsr_alloc(B->row, B->col, B->nnz, C);
101 memcpy(C->IA, B->IA, (B->row + 1) * sizeof(INT));
102 memcpy(C->JA, B->JA, (B->nnz) * sizeof(INT));
103
104 if (use_openmp) {
105#ifdef _OPENMP
106#pragma omp parallel private(myid, mybegin, myend, i)
107 {
108 myid = omp_get_thread_num();
109 fasp_get_start_end(myid, nthreads, A->nnz, &mybegin, &myend);
110 for (i = mybegin; i < myend; ++i) C->val[i] = B->val[i] * beta;
111 }
112#endif
113 } else {
114 for (i = 0; i < A->nnz; ++i) C->val[i] = B->val[i] * beta;
115 }
116
117 status = FASP_SUCCESS;
118 goto FINISHED;
119 }
120
121 // empty matrix B
122 if (B->nnz == 0 || B == NULL) {
123 fasp_dcsr_alloc(A->row, A->col, A->nnz, C);
124 memcpy(C->IA, A->IA, (A->row + 1) * sizeof(INT));
125 memcpy(C->JA, A->JA, (A->nnz) * sizeof(INT));
126
127 if (use_openmp) {
128#ifdef _OPENMP
129 INT mybegin, myend, myid;
130#pragma omp parallel private(myid, mybegin, myend, i)
131 {
132 myid = omp_get_thread_num();
133 fasp_get_start_end(myid, nthreads, A->nnz, &mybegin, &myend);
134 for (i = mybegin; i < myend; ++i) C->val[i] = A->val[i] * alpha;
135 }
136#endif
137 } else {
138 for (i = 0; i < A->nnz; ++i) C->val[i] = A->val[i] * alpha;
139 }
140
141 status = FASP_SUCCESS;
142 goto FINISHED;
143 }
144
145 C->row = A->row;
146 C->col = A->col;
147
148 C->IA = (INT*)fasp_mem_calloc(C->row + 1, sizeof(INT));
149
150 // allocate work space for C->JA and C->val
151 C->JA = (INT*)fasp_mem_calloc(A->nnz + B->nnz, sizeof(INT));
152
153 C->val = (REAL*)fasp_mem_calloc(A->nnz + B->nnz, sizeof(REAL));
154
155 // initial C->IA
156 memset(C->IA, 0, sizeof(INT) * (C->row + 1));
157 memset(C->JA, -1, sizeof(INT) * (A->nnz + B->nnz));
158
159 for (i = 0; i < A->row; ++i) {
160 countrow = 0;
161 for (j = A->IA[i]; j < A->IA[i + 1]; ++j) {
162 C->val[count] = alpha * A->val[j];
163 C->JA[count] = A->JA[j];
164 C->IA[i + 1]++;
165 count++;
166 countrow++;
167 } // end for js
168
169 for (k = B->IA[i]; k < B->IA[i + 1]; ++k) {
170 added = 0;
171
172 for (l = C->IA[i]; l < C->IA[i] + countrow + 1; l++) {
173 if (B->JA[k] == C->JA[l]) {
174 C->val[l] = C->val[l] + beta * B->val[k];
175 added = 1;
176 break;
177 }
178 } // end for l
179
180 if (added == 0) {
181 C->val[count] = beta * B->val[k];
182 C->JA[count] = B->JA[k];
183 C->IA[i + 1]++;
184 count++;
185 }
186
187 } // end for k
188
189 C->IA[i + 1] += C->IA[i];
190 }
191
192 C->nnz = count;
193 C->JA = (INT*)fasp_mem_realloc(C->JA, (count) * sizeof(INT));
194 C->val = (REAL*)fasp_mem_realloc(C->val, (count) * sizeof(REAL));
195
196#if MULTI_COLOR_ORDER
197 C->color = 0;
198 C->IC = NULL;
199 C->ICMAP = NULL;
200#endif
201
202FINISHED:
203 return status;
204}
205
219void fasp_blas_dcsr_axm(dCSRmat* A, const REAL alpha)
220{
221 const INT nnz = A->nnz;
222
223 // A direct calculation can be written as:
224 fasp_blas_darray_ax(nnz, alpha, A->val);
225}
226
241void fasp_blas_dcsr_mxv(const dCSRmat* A, const REAL* x, REAL* y)
242{
243 const INT m = A->row;
244 const INT * ia = A->IA, *ja = A->JA;
245 const REAL* aj = A->val;
246
247 INT i, k, begin_row, end_row, nnz_row;
248 register REAL temp;
249
250 SHORT nthreads = 1, use_openmp = FALSE;
251
252#ifdef _OPENMP
253 if (m > OPENMP_HOLDS) {
254 use_openmp = TRUE;
255 nthreads = fasp_get_num_threads();
256 }
257#endif
258
259 if (use_openmp) {
260 INT myid, mybegin, myend;
261
262#ifdef _OPENMP
263#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, \
264 nnz_row, k)
265#endif
266 for (myid = 0; myid < nthreads; myid++) {
267 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
268 for (i = mybegin; i < myend; ++i) {
269 temp = 0.0;
270 begin_row = ia[i];
271 end_row = ia[i + 1];
272 nnz_row = end_row - begin_row;
273 switch (nnz_row) {
274 case 3:
275 k = begin_row;
276 temp += aj[k] * x[ja[k]];
277 k++;
278 temp += aj[k] * x[ja[k]];
279 k++;
280 temp += aj[k] * x[ja[k]];
281 break;
282 case 4:
283 k = begin_row;
284 temp += aj[k] * x[ja[k]];
285 k++;
286 temp += aj[k] * x[ja[k]];
287 k++;
288 temp += aj[k] * x[ja[k]];
289 k++;
290 temp += aj[k] * x[ja[k]];
291 break;
292 case 5:
293 k = begin_row;
294 temp += aj[k] * x[ja[k]];
295 k++;
296 temp += aj[k] * x[ja[k]];
297 k++;
298 temp += aj[k] * x[ja[k]];
299 k++;
300 temp += aj[k] * x[ja[k]];
301 k++;
302 temp += aj[k] * x[ja[k]];
303 break;
304 case 6:
305 k = begin_row;
306 temp += aj[k] * x[ja[k]];
307 k++;
308 temp += aj[k] * x[ja[k]];
309 k++;
310 temp += aj[k] * x[ja[k]];
311 k++;
312 temp += aj[k] * x[ja[k]];
313 k++;
314 temp += aj[k] * x[ja[k]];
315 k++;
316 temp += aj[k] * x[ja[k]];
317 break;
318 case 7:
319 k = begin_row;
320 temp += aj[k] * x[ja[k]];
321 k++;
322 temp += aj[k] * x[ja[k]];
323 k++;
324 temp += aj[k] * x[ja[k]];
325 k++;
326 temp += aj[k] * x[ja[k]];
327 k++;
328 temp += aj[k] * x[ja[k]];
329 k++;
330 temp += aj[k] * x[ja[k]];
331 k++;
332 temp += aj[k] * x[ja[k]];
333 break;
334 default:
335 for (k = begin_row; k < end_row; ++k) {
336 temp += aj[k] * x[ja[k]];
337 }
338 break;
339 }
340 y[i] = temp;
341 }
342 }
343 }
344
345 else {
346 for (i = 0; i < m; ++i) {
347 temp = 0.0;
348 begin_row = ia[i];
349 end_row = ia[i + 1];
350 nnz_row = end_row - begin_row;
351 switch (nnz_row) {
352 case 3:
353 k = begin_row;
354 temp += aj[k] * x[ja[k]];
355 k++;
356 temp += aj[k] * x[ja[k]];
357 k++;
358 temp += aj[k] * x[ja[k]];
359 break;
360 case 4:
361 k = begin_row;
362 temp += aj[k] * x[ja[k]];
363 k++;
364 temp += aj[k] * x[ja[k]];
365 k++;
366 temp += aj[k] * x[ja[k]];
367 k++;
368 temp += aj[k] * x[ja[k]];
369 break;
370 case 5:
371 k = begin_row;
372 temp += aj[k] * x[ja[k]];
373 k++;
374 temp += aj[k] * x[ja[k]];
375 k++;
376 temp += aj[k] * x[ja[k]];
377 k++;
378 temp += aj[k] * x[ja[k]];
379 k++;
380 temp += aj[k] * x[ja[k]];
381 break;
382 case 6:
383 k = begin_row;
384 temp += aj[k] * x[ja[k]];
385 k++;
386 temp += aj[k] * x[ja[k]];
387 k++;
388 temp += aj[k] * x[ja[k]];
389 k++;
390 temp += aj[k] * x[ja[k]];
391 k++;
392 temp += aj[k] * x[ja[k]];
393 k++;
394 temp += aj[k] * x[ja[k]];
395 break;
396 case 7:
397 k = begin_row;
398 temp += aj[k] * x[ja[k]];
399 k++;
400 temp += aj[k] * x[ja[k]];
401 k++;
402 temp += aj[k] * x[ja[k]];
403 k++;
404 temp += aj[k] * x[ja[k]];
405 k++;
406 temp += aj[k] * x[ja[k]];
407 k++;
408 temp += aj[k] * x[ja[k]];
409 k++;
410 temp += aj[k] * x[ja[k]];
411 break;
412 default:
413 for (k = begin_row; k < end_row; ++k) {
414 temp += aj[k] * x[ja[k]];
415 }
416 break;
417 }
418 y[i] = temp;
419 }
420 }
421}
422
437void fasp_blas_dcsr_mxv_agg(const dCSRmat* A, const REAL* x, REAL* y)
438{
439 const INT m = A->row;
440 const INT * ia = A->IA, *ja = A->JA;
441 INT i, k, begin_row, end_row;
442 register REAL temp;
443
444#ifdef _OPENMP
445 // variables for OpenMP
446 INT myid, mybegin, myend;
447 INT nthreads = fasp_get_num_threads();
448#endif
449
450#ifdef _OPENMP
451 if (m > OPENMP_HOLDS) {
452#pragma omp parallel for private(myid, i, mybegin, myend, temp, begin_row, end_row, k)
453 for (myid = 0; myid < nthreads; myid++) {
454 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
455 for (i = mybegin; i < myend; i++) {
456 temp = 0.0;
457 begin_row = ia[i];
458 end_row = ia[i + 1];
459 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
460 y[i] = temp;
461 }
462 }
463 } else {
464#endif
465 for (i = 0; i < m; ++i) {
466 temp = 0.0;
467 begin_row = ia[i];
468 end_row = ia[i + 1];
469 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
470 y[i] = temp;
471 }
472#ifdef _OPENMP
473 }
474#endif
475}
476
493void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat* A, const REAL* x, REAL* y)
494{
495 const INT m = A->row;
496 const INT * ia = A->IA, *ja = A->JA;
497 const REAL* aj = A->val;
498 INT i, k, begin_row, end_row;
499 register REAL temp;
500 SHORT nthreads = 1, use_openmp = FALSE;
501
502#ifdef _OPENMP
503 if (m > OPENMP_HOLDS) {
504 use_openmp = TRUE;
505 nthreads = fasp_get_num_threads();
506 }
507#endif
508 if (alpha == 1.0) {
509 if (use_openmp) {
510 INT myid, mybegin, myend;
511#ifdef _OPENMP
512#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
513#endif
514 for (myid = 0; myid < nthreads; myid++) {
515 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
516 for (i = mybegin; i < myend; ++i) {
517 temp = 0.0;
518 begin_row = ia[i];
519 end_row = ia[i + 1];
520 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
521 y[i] += temp;
522 }
523 }
524 } else {
525 for (i = 0; i < m; ++i) {
526 temp = 0.0;
527 begin_row = ia[i];
528 end_row = ia[i + 1];
529 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
530 y[i] += temp;
531 }
532 }
533 }
534
535 else if (alpha == -1.0) {
536 if (use_openmp) {
537 INT myid, mybegin, myend;
538 INT S = 0;
539#ifdef _OPENMP
540#pragma omp parallel for private(myid, mybegin, myend, temp, i, begin_row, end_row, k)
541#endif
542
543 for (myid = 0; myid < nthreads; myid++) {
544 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
545 for (i = mybegin; i < myend; ++i) {
546 temp = 0.0;
547 begin_row = ia[i];
548 end_row = ia[i + 1];
549 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
550 y[i] -= temp;
551 }
552 }
553 } else {
554 for (i = 0; i < m; ++i) {
555 temp = 0.0;
556 begin_row = ia[i];
557 end_row = ia[i + 1];
558 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
559 y[i] -= temp;
560 }
561 }
562 }
563
564 else {
565 if (use_openmp) {
566 INT myid, mybegin, myend;
567#ifdef _OPENMP
568#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
569#endif
570 for (myid = 0; myid < nthreads; myid++) {
571 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
572 for (i = mybegin; i < myend; ++i) {
573 temp = 0.0;
574 begin_row = ia[i];
575 end_row = ia[i + 1];
576 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
577 y[i] += temp * alpha;
578 }
579 }
580 } else {
581 for (i = 0; i < m; ++i) {
582 temp = 0.0;
583 begin_row = ia[i];
584 end_row = ia[i + 1];
585 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
586 y[i] += temp * alpha;
587 }
588 }
589 }
590}
591
608void fasp_blas_ldcsr_aAxpy(const REAL alpha, const dCSRmat* A, const LONGREAL* x,
609 REAL* y)
610{
611 const INT m = A->row;
612 const INT * ia = A->IA, *ja = A->JA;
613 const REAL* aj = A->val;
614 INT i, k, begin_row, end_row;
615 register LONGREAL temp;
616
617 SHORT nthreads = 1, use_openmp = FALSE;
618
619#ifdef _OPENMP
620 if (m > OPENMP_HOLDS) {
621 use_openmp = TRUE;
622 nthreads = fasp_get_num_threads();
623 }
624#endif
625
626 if (alpha == 1.0) {
627 if (use_openmp) {
628 INT myid, mybegin, myend;
629#ifdef _OPENMP
630#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
631#endif
632 for (myid = 0; myid < nthreads; myid++) {
633 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
634 for (i = mybegin; i < myend; ++i) {
635 temp = 0.0;
636 begin_row = ia[i];
637 end_row = ia[i + 1];
638 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
639 y[i] += temp;
640 }
641 }
642 } else {
643 for (i = 0; i < m; ++i) {
644 temp = 0.0;
645 begin_row = ia[i];
646 end_row = ia[i + 1];
647 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
648 y[i] += temp;
649 }
650 }
651 }
652
653 else if (alpha == -1.0) {
654 if (use_openmp) {
655 INT myid, mybegin, myend;
656#ifdef _OPENMP
657#pragma omp parallel for private(myid, mybegin, myend, temp, i, begin_row, end_row, k)
658#endif
659 for (myid = 0; myid < nthreads; myid++) {
660 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
661 for (i = mybegin; i < myend; ++i) {
662 temp = 0.0;
663 begin_row = ia[i];
664 end_row = ia[i + 1];
665 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
666 y[i] -= temp;
667 }
668 }
669 } else {
670 for (i = 0; i < m; ++i) {
671 temp = 0.0;
672 begin_row = ia[i];
673 end_row = ia[i + 1];
674 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
675 y[i] -= temp;
676 }
677 }
678 }
679
680 else {
681 if (use_openmp) {
682 INT myid, mybegin, myend;
683#ifdef _OPENMP
684#pragma omp parallel for private(myid, mybegin, myend, i, temp, begin_row, end_row, k)
685#endif
686 for (myid = 0; myid < nthreads; myid++) {
687 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
688 for (i = mybegin; i < myend; ++i) {
689 temp = 0.0;
690 begin_row = ia[i];
691 end_row = ia[i + 1];
692 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
693 y[i] += temp * alpha;
694 }
695 }
696 } else {
697 for (i = 0; i < m; ++i) {
698 temp = 0.0;
699 begin_row = ia[i];
700 end_row = ia[i + 1];
701 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
702 y[i] += temp * alpha;
703 }
704 }
705 }
706}
707
724void fasp_blas_dcsr_aAxpy_agg(const REAL alpha, const dCSRmat* A, const REAL* x,
725 REAL* y)
726{
727 const INT m = A->row;
728 const INT *ia = A->IA, *ja = A->JA;
729
730 INT i, k, begin_row, end_row;
731 register REAL temp;
732
733 if (alpha == 1.0) {
734#ifdef _OPENMP
735 if (m > OPENMP_HOLDS) {
736 INT myid, mybegin, myend;
737 INT nthreads = fasp_get_num_threads();
738#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
739 for (myid = 0; myid < nthreads; myid++) {
740 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
741 for (i = mybegin; i < myend; ++i) {
742 temp = 0.0;
743 begin_row = ia[i];
744 end_row = ia[i + 1];
745 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
746 y[i] += temp;
747 }
748 }
749 } else {
750#endif
751 for (i = 0; i < m; ++i) {
752 temp = 0.0;
753 begin_row = ia[i];
754 end_row = ia[i + 1];
755 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
756 y[i] += temp;
757 }
758#ifdef _OPENMP
759 }
760#endif
761 } else if (alpha == -1.0) {
762#ifdef _OPENMP
763 if (m > OPENMP_HOLDS) {
764 INT myid, mybegin, myend;
765 INT nthreads = fasp_get_num_threads();
766#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
767 for (myid = 0; myid < nthreads; myid++) {
768 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
769 for (i = mybegin; i < myend; ++i) {
770 temp = 0.0;
771 begin_row = ia[i];
772 end_row = ia[i + 1];
773 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
774 y[i] -= temp;
775 }
776 }
777 } else {
778#endif
779 for (i = 0; i < m; ++i) {
780 temp = 0.0;
781 begin_row = ia[i];
782 end_row = ia[i + 1];
783 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
784 y[i] -= temp;
785 }
786#ifdef _OPENMP
787 }
788#endif
789 }
790
791 else {
792#ifdef _OPENMP
793 if (m > OPENMP_HOLDS) {
794 INT myid, mybegin, myend;
795 INT nthreads = fasp_get_num_threads();
796#pragma omp parallel for private(myid, i, mybegin, myend, begin_row, end_row, temp, k)
797 for (myid = 0; myid < nthreads; myid++) {
798 fasp_get_start_end(myid, nthreads, m, &mybegin, &myend);
799 for (i = mybegin; i < myend; ++i) {
800 temp = 0.0;
801 begin_row = ia[i];
802 end_row = ia[i + 1];
803 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
804 y[i] += temp * alpha;
805 }
806 }
807 } else {
808#endif
809 for (i = 0; i < m; ++i) {
810 temp = 0.0;
811 begin_row = ia[i];
812 end_row = ia[i + 1];
813 for (k = begin_row; k < end_row; ++k) temp += x[ja[k]];
814 y[i] += temp * alpha;
815 }
816#ifdef _OPENMP
817 }
818#endif
819 }
820}
821
834REAL fasp_blas_dcsr_vmv(const dCSRmat* A, const REAL* x, const REAL* y)
835{
836 register REAL value = 0.0;
837 const INT m = A->row;
838 const INT * ia = A->IA, *ja = A->JA;
839 const REAL* aj = A->val;
840 INT i, k, begin_row, end_row;
841 register REAL temp;
842
843 SHORT use_openmp = FALSE;
844
845#ifdef _OPENMP
846 if (m > OPENMP_HOLDS) {
847 use_openmp = TRUE;
848 }
849#endif
850
851 if (use_openmp) {
852#ifdef _OPENMP
853#pragma omp parallel for reduction(+ : value) private(i, temp, begin_row, end_row, k)
854#endif
855 for (i = 0; i < m; ++i) {
856 temp = 0.0;
857 begin_row = ia[i];
858 end_row = ia[i + 1];
859 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
860 value += y[i] * temp;
861 }
862 } else {
863 for (i = 0; i < m; ++i) {
864 temp = 0.0;
865 begin_row = ia[i];
866 end_row = ia[i + 1];
867 for (k = begin_row; k < end_row; ++k) temp += aj[k] * x[ja[k]];
868 value += y[i] * temp;
869 }
870 }
871 return value;
872}
873
888void fasp_blas_dcsr_mxm(const dCSRmat* A, const dCSRmat* B, dCSRmat* C)
889{
890 INT i, j, k, l, count;
891
892 INT* JD = (INT*)fasp_mem_calloc(B->col, sizeof(INT));
893
894 C->row = A->row;
895 C->col = B->col;
896 C->val = NULL;
897 C->JA = NULL;
898 C->IA = (INT*)fasp_mem_calloc(C->row + 1, sizeof(INT));
899
900 for (i = 0; i < B->col; ++i) JD[i] = -1;
901
902 // step 1: Find first the structure IA of C
903 for (i = 0; i < C->row; ++i) {
904 count = 0;
905
906 for (k = A->IA[i]; k < A->IA[i + 1]; ++k) {
907 for (j = B->IA[A->JA[k]]; j < B->IA[A->JA[k] + 1]; ++j) {
908 for (l = 0; l < count; l++) {
909 if (JD[l] == B->JA[j]) break;
910 }
911
912 if (l == count) {
913 JD[count] = B->JA[j];
914 count++;
915 }
916 }
917 }
918 C->IA[i + 1] = count;
919 for (j = 0; j < count; ++j) {
920 JD[j] = -1;
921 }
922 }
923
924 for (i = 0; i < C->row; ++i) C->IA[i + 1] += C->IA[i];
925
926 // step 2: Find the structure JA of C
927 INT countJD;
928
929 C->JA = (INT*)fasp_mem_calloc(C->IA[C->row], sizeof(INT));
930
931 for (i = 0; i < C->row; ++i) {
932 countJD = 0;
933 count = C->IA[i];
934 for (k = A->IA[i]; k < A->IA[i + 1]; ++k) {
935 for (j = B->IA[A->JA[k]]; j < B->IA[A->JA[k] + 1]; ++j) {
936 for (l = 0; l < countJD; l++) {
937 if (JD[l] == B->JA[j]) break;
938 }
939
940 if (l == countJD) {
941 C->JA[count] = B->JA[j];
942 JD[countJD] = B->JA[j];
943 count++;
944 countJD++;
945 }
946 }
947 }
948
949 // for (j=0;j<countJD;++j) JD[j]=-1;
950 fasp_iarray_set(countJD, JD, -1);
951 }
952
953 fasp_mem_free(JD);
954 JD = NULL;
955
956 // step 3: Find the structure A of C
957 C->val = (REAL*)fasp_mem_calloc(C->IA[C->row], sizeof(REAL));
958
959 for (i = 0; i < C->row; ++i) {
960 for (j = C->IA[i]; j < C->IA[i + 1]; ++j) {
961 C->val[j] = 0;
962 for (k = A->IA[i]; k < A->IA[i + 1]; ++k) {
963 for (l = B->IA[A->JA[k]]; l < B->IA[A->JA[k] + 1]; l++) {
964 if (B->JA[l] == C->JA[j]) {
965 C->val[j] += A->val[k] * B->val[l];
966 } // end if
967 } // end for l
968 } // end for k
969 } // end for j
970 } // end for i
971
972 C->nnz = C->IA[C->row] - C->IA[0];
973}
974
994void fasp_blas_dcsr_rap(const dCSRmat* R, const dCSRmat* A, const dCSRmat* P,
995 dCSRmat* RAP)
996{
997 const INT n_coarse = R->row;
998 const INT* R_i = R->IA;
999 const INT* R_j = R->JA;
1000 const REAL* R_data = R->val;
1001
1002 const INT n_fine = A->row;
1003 const INT* A_i = A->IA;
1004 const INT* A_j = A->JA;
1005 const REAL* A_data = A->val;
1006
1007 const INT* P_i = P->IA;
1008 const INT* P_j = P->JA;
1009 const REAL* P_data = P->val;
1010
1011 INT RAP_size;
1012 INT* RAP_i = NULL;
1013 INT* RAP_j = NULL;
1014 REAL* RAP_data = NULL;
1015
1016#ifdef _OPENMP
1017 INT* P_marker = NULL;
1018 INT* A_marker = NULL;
1019#endif
1020
1021 INT* Ps_marker = NULL;
1022 INT* As_marker = NULL;
1023
1024 INT ic, i1, i2, i3, jj1, jj2, jj3;
1025 INT jj_counter, jj_row_begining;
1026 REAL r_entry, r_a_product, r_a_p_product;
1027
1028 INT nthreads = 1;
1029
1030#ifdef _OPENMP
1031 INT myid, mybegin, myend, Ctemp;
1032 nthreads = fasp_get_num_threads();
1033#endif
1034
1035 INT coarse_mul_nthreads = n_coarse * nthreads;
1036 INT fine_mul_nthreads = n_fine * nthreads;
1037 INT coarse_add_nthreads = n_coarse + nthreads;
1038 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
1039 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
1040
1041 Ps_marker = (INT*)fasp_mem_calloc(total_calloc, sizeof(INT));
1042 As_marker = Ps_marker + coarse_mul_nthreads;
1043
1044 /*------------------------------------------------------*
1045 * First Pass: Determine size of RAP and set up RAP_i *
1046 *------------------------------------------------------*/
1047 RAP_i = (INT*)fasp_mem_calloc(n_coarse + 1, sizeof(INT));
1048
1049 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1050
1051#ifdef _OPENMP
1052 INT* RAP_temp = As_marker + fine_mul_nthreads;
1053 INT* part_end = RAP_temp + coarse_add_nthreads;
1054
1055 if (n_coarse > OPENMP_HOLDS) {
1056#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
1057 jj_counter, ic, jj_row_begining, jj1, i1, jj2, i2, \
1058 jj3, i3)
1059 for (myid = 0; myid < nthreads; myid++) {
1060 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1061 P_marker = Ps_marker + myid * n_coarse;
1062 A_marker = As_marker + myid * n_fine;
1063 jj_counter = 0;
1064 for (ic = mybegin; ic < myend; ic++) {
1065 P_marker[ic] = jj_counter;
1066 jj_row_begining = jj_counter;
1067 jj_counter++;
1068
1069 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1070 i1 = R_j[jj1];
1071 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1072 i2 = A_j[jj2];
1073 if (A_marker[i2] != ic) {
1074 A_marker[i2] = ic;
1075 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1076 i3 = P_j[jj3];
1077 if (P_marker[i3] < jj_row_begining) {
1078 P_marker[i3] = jj_counter;
1079 jj_counter++;
1080 }
1081 }
1082 }
1083 }
1084 }
1085
1086 RAP_temp[ic + myid] = jj_row_begining;
1087 }
1088 RAP_temp[myend + myid] = jj_counter;
1089
1090 part_end[myid] = myend + myid + 1;
1091 }
1092 fasp_iarray_cp(part_end[0], RAP_temp, RAP_i);
1093 jj_counter = part_end[0];
1094 Ctemp = 0;
1095 for (i1 = 1; i1 < nthreads; i1++) {
1096 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
1097 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
1098 RAP_i[jj_counter] = RAP_temp[jj1] + Ctemp;
1099 jj_counter++;
1100 }
1101 }
1102 RAP_size = RAP_i[n_coarse];
1103 }
1104
1105 else {
1106#endif
1107 jj_counter = 0;
1108 for (ic = 0; ic < n_coarse; ic++) {
1109 Ps_marker[ic] = jj_counter;
1110 jj_row_begining = jj_counter;
1111 jj_counter++;
1112
1113 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1114 i1 = R_j[jj1];
1115
1116 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1117 i2 = A_j[jj2];
1118 if (As_marker[i2] != ic) {
1119 As_marker[i2] = ic;
1120 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1121 i3 = P_j[jj3];
1122 if (Ps_marker[i3] < jj_row_begining) {
1123 Ps_marker[i3] = jj_counter;
1124 jj_counter++;
1125 }
1126 }
1127 }
1128 }
1129 }
1130
1131 RAP_i[ic] = jj_row_begining;
1132 }
1133
1134 RAP_i[n_coarse] = jj_counter;
1135 RAP_size = jj_counter;
1136#ifdef _OPENMP
1137 }
1138#endif
1139
1140 RAP_j = (INT*)fasp_mem_calloc(RAP_size, sizeof(INT));
1141 RAP_data = (REAL*)fasp_mem_calloc(RAP_size, sizeof(REAL));
1142
1143 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1144
1145#ifdef _OPENMP
1146 if (n_coarse > OPENMP_HOLDS) {
1147#pragma omp parallel for private(myid, mybegin, myend, P_marker, A_marker, jj_counter, \
1148 ic, jj_row_begining, jj1, r_entry, i1, jj2, \
1149 r_a_product, i2, jj3, r_a_p_product, i3)
1150 for (myid = 0; myid < nthreads; myid++) {
1151 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1152 P_marker = Ps_marker + myid * n_coarse;
1153 A_marker = As_marker + myid * n_fine;
1154 jj_counter = RAP_i[mybegin];
1155 for (ic = mybegin; ic < myend; ic++) {
1156 P_marker[ic] = jj_counter;
1157 jj_row_begining = jj_counter;
1158 RAP_j[jj_counter] = ic;
1159 RAP_data[jj_counter] = 0.0;
1160 jj_counter++;
1161 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1162 r_entry = R_data[jj1];
1163
1164 i1 = R_j[jj1];
1165 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1166 r_a_product = r_entry * A_data[jj2];
1167
1168 i2 = A_j[jj2];
1169 if (A_marker[i2] != ic) {
1170 A_marker[i2] = ic;
1171 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1172 r_a_p_product = r_a_product * P_data[jj3];
1173
1174 i3 = P_j[jj3];
1175 if (P_marker[i3] < jj_row_begining) {
1176 P_marker[i3] = jj_counter;
1177 RAP_data[jj_counter] = r_a_p_product;
1178 RAP_j[jj_counter] = i3;
1179 jj_counter++;
1180 } else {
1181 RAP_data[P_marker[i3]] += r_a_p_product;
1182 }
1183 }
1184 } else {
1185 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1186 i3 = P_j[jj3];
1187 r_a_p_product = r_a_product * P_data[jj3];
1188 RAP_data[P_marker[i3]] += r_a_p_product;
1189 }
1190 }
1191 }
1192 }
1193 }
1194 }
1195 } else {
1196#endif
1197 jj_counter = 0;
1198 for (ic = 0; ic < n_coarse; ic++) {
1199 Ps_marker[ic] = jj_counter;
1200 jj_row_begining = jj_counter;
1201 RAP_j[jj_counter] = ic;
1202 RAP_data[jj_counter] = 0.0;
1203 jj_counter++;
1204
1205 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1206 r_entry = R_data[jj1];
1207
1208 i1 = R_j[jj1];
1209 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1210 r_a_product = r_entry * A_data[jj2];
1211
1212 i2 = A_j[jj2];
1213 if (As_marker[i2] != ic) {
1214 As_marker[i2] = ic;
1215 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1216 r_a_p_product = r_a_product * P_data[jj3];
1217
1218 i3 = P_j[jj3];
1219 if (Ps_marker[i3] < jj_row_begining) {
1220 Ps_marker[i3] = jj_counter;
1221 RAP_data[jj_counter] = r_a_p_product;
1222 RAP_j[jj_counter] = i3;
1223 jj_counter++;
1224 } else {
1225 RAP_data[Ps_marker[i3]] += r_a_p_product;
1226 }
1227 }
1228 } else {
1229 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1230 i3 = P_j[jj3];
1231 r_a_p_product = r_a_product * P_data[jj3];
1232 RAP_data[Ps_marker[i3]] += r_a_p_product;
1233 }
1234 }
1235 }
1236 }
1237 }
1238#ifdef _OPENMP
1239 }
1240#endif
1241
1242 RAP->row = n_coarse;
1243 RAP->col = n_coarse;
1244 RAP->nnz = RAP_size;
1245 RAP->IA = RAP_i;
1246 RAP->JA = RAP_j;
1247 RAP->val = RAP_data;
1248
1249 fasp_mem_free(Ps_marker);
1250 Ps_marker = NULL;
1251}
1252
1269void fasp_blas_dcsr_rap_agg(const dCSRmat* R, const dCSRmat* A, const dCSRmat* P,
1270 dCSRmat* RAP)
1271{
1272 const INT n_coarse = R->row;
1273 const INT* R_i = R->IA;
1274 const INT* R_j = R->JA;
1275
1276 const INT n_fine = A->row;
1277 const INT* A_i = A->IA;
1278 const INT* A_j = A->JA;
1279 const REAL* A_data = A->val;
1280
1281 const INT* P_i = P->IA;
1282 const INT* P_j = P->JA;
1283
1284 INT RAP_size;
1285 INT* RAP_i = NULL;
1286 INT* RAP_j = NULL;
1287 REAL* RAP_data = NULL;
1288
1289#ifdef _OPENMP
1290 INT* P_marker = NULL;
1291 INT* A_marker = NULL;
1292#endif
1293
1294 INT* Ps_marker = NULL;
1295 INT* As_marker = NULL;
1296
1297 INT ic, i1, i2, i3, jj1, jj2, jj3;
1298 INT jj_counter, jj_row_begining;
1299
1300 INT nthreads = 1;
1301
1302#ifdef _OPENMP
1303 INT myid, mybegin, myend, Ctemp;
1304 nthreads = fasp_get_num_threads();
1305#endif
1306
1307 INT coarse_mul_nthreads = n_coarse * nthreads;
1308 INT fine_mul_nthreads = n_fine * nthreads;
1309 INT coarse_add_nthreads = n_coarse + nthreads;
1310 INT minus_one_length = coarse_mul_nthreads + fine_mul_nthreads;
1311 INT total_calloc = minus_one_length + coarse_add_nthreads + nthreads;
1312
1313 Ps_marker = (INT*)fasp_mem_calloc(total_calloc, sizeof(INT));
1314 As_marker = Ps_marker + coarse_mul_nthreads;
1315
1316 /*------------------------------------------------------*
1317 * First Pass: Determine size of RAP and set up RAP_i *
1318 *------------------------------------------------------*/
1319 RAP_i = (INT*)fasp_mem_calloc(n_coarse + 1, sizeof(INT));
1320
1321 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1322
1323#ifdef _OPENMP
1324 INT* RAP_temp = As_marker + fine_mul_nthreads;
1325 INT* part_end = RAP_temp + coarse_add_nthreads;
1326
1327 if (n_coarse > OPENMP_HOLDS) {
1328#pragma omp parallel for private(myid, mybegin, myend, Ctemp, P_marker, A_marker, \
1329 jj_counter, ic, jj_row_begining, jj1, i1, jj2, i2, \
1330 jj3, i3)
1331 for (myid = 0; myid < nthreads; myid++) {
1332 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1333 P_marker = Ps_marker + myid * n_coarse;
1334 A_marker = As_marker + myid * n_fine;
1335 jj_counter = 0;
1336 for (ic = mybegin; ic < myend; ic++) {
1337 P_marker[ic] = jj_counter;
1338 jj_row_begining = jj_counter;
1339 jj_counter++;
1340
1341 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1342 i1 = R_j[jj1];
1343 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1344 i2 = A_j[jj2];
1345 if (A_marker[i2] != ic) {
1346 A_marker[i2] = ic;
1347 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1348 i3 = P_j[jj3];
1349 if (P_marker[i3] < jj_row_begining) {
1350 P_marker[i3] = jj_counter;
1351 jj_counter++;
1352 }
1353 }
1354 }
1355 }
1356 }
1357
1358 RAP_temp[ic + myid] = jj_row_begining;
1359 }
1360 RAP_temp[myend + myid] = jj_counter;
1361
1362 part_end[myid] = myend + myid + 1;
1363 }
1364 fasp_iarray_cp(part_end[0], RAP_temp, RAP_i);
1365 jj_counter = part_end[0];
1366 Ctemp = 0;
1367 for (i1 = 1; i1 < nthreads; i1++) {
1368 Ctemp += RAP_temp[part_end[i1 - 1] - 1];
1369 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
1370 RAP_i[jj_counter] = RAP_temp[jj1] + Ctemp;
1371 jj_counter++;
1372 }
1373 }
1374 RAP_size = RAP_i[n_coarse];
1375 }
1376
1377 else {
1378#endif
1379 jj_counter = 0;
1380 for (ic = 0; ic < n_coarse; ic++) {
1381 Ps_marker[ic] = jj_counter;
1382 jj_row_begining = jj_counter;
1383 jj_counter++;
1384
1385 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1386 i1 = R_j[jj1];
1387
1388 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1389 i2 = A_j[jj2];
1390 if (As_marker[i2] != ic) {
1391 As_marker[i2] = ic;
1392 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1393 i3 = P_j[jj3];
1394 if (Ps_marker[i3] < jj_row_begining) {
1395 Ps_marker[i3] = jj_counter;
1396 jj_counter++;
1397 }
1398 }
1399 }
1400 }
1401 }
1402
1403 RAP_i[ic] = jj_row_begining;
1404 }
1405
1406 RAP_i[n_coarse] = jj_counter;
1407 RAP_size = jj_counter;
1408#ifdef _OPENMP
1409 }
1410#endif
1411
1412 RAP_j = (INT*)fasp_mem_calloc(RAP_size, sizeof(INT));
1413 RAP_data = (REAL*)fasp_mem_calloc(RAP_size, sizeof(REAL));
1414
1415 fasp_iarray_set(minus_one_length, Ps_marker, -1);
1416
1417#ifdef _OPENMP
1418 if (n_coarse > OPENMP_HOLDS) {
1419#pragma omp parallel for private(myid, mybegin, myend, P_marker, A_marker, jj_counter, \
1420 ic, jj_row_begining, jj1, i1, jj2, i2, jj3, i3)
1421 for (myid = 0; myid < nthreads; myid++) {
1422 fasp_get_start_end(myid, nthreads, n_coarse, &mybegin, &myend);
1423 P_marker = Ps_marker + myid * n_coarse;
1424 A_marker = As_marker + myid * n_fine;
1425 jj_counter = RAP_i[mybegin];
1426 for (ic = mybegin; ic < myend; ic++) {
1427 P_marker[ic] = jj_counter;
1428 jj_row_begining = jj_counter;
1429 RAP_j[jj_counter] = ic;
1430 RAP_data[jj_counter] = 0.0;
1431 jj_counter++;
1432 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1433
1434 i1 = R_j[jj1];
1435 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1436
1437 i2 = A_j[jj2];
1438 if (A_marker[i2] != ic) {
1439 A_marker[i2] = ic;
1440 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1441
1442 i3 = P_j[jj3];
1443 if (P_marker[i3] < jj_row_begining) {
1444 P_marker[i3] = jj_counter;
1445 RAP_data[jj_counter] = A_data[jj2];
1446 RAP_j[jj_counter] = i3;
1447 jj_counter++;
1448 } else {
1449 RAP_data[P_marker[i3]] += A_data[jj2];
1450 }
1451 }
1452 } else {
1453 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1454 i3 = P_j[jj3];
1455 RAP_data[P_marker[i3]] += A_data[jj2];
1456 }
1457 }
1458 }
1459 }
1460 }
1461 }
1462 } else {
1463#endif
1464 jj_counter = 0;
1465 for (ic = 0; ic < n_coarse; ic++) {
1466 Ps_marker[ic] = jj_counter;
1467 jj_row_begining = jj_counter;
1468 RAP_j[jj_counter] = ic;
1469 RAP_data[jj_counter] = 0.0;
1470 jj_counter++;
1471
1472 for (jj1 = R_i[ic]; jj1 < R_i[ic + 1]; jj1++) {
1473 i1 = R_j[jj1];
1474 for (jj2 = A_i[i1]; jj2 < A_i[i1 + 1]; jj2++) {
1475 i2 = A_j[jj2];
1476 if (As_marker[i2] != ic) {
1477 As_marker[i2] = ic;
1478 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1479 i3 = P_j[jj3];
1480 if (Ps_marker[i3] < jj_row_begining) {
1481 Ps_marker[i3] = jj_counter;
1482 RAP_data[jj_counter] = A_data[jj2];
1483 RAP_j[jj_counter] = i3;
1484 jj_counter++;
1485 } else {
1486 RAP_data[Ps_marker[i3]] += A_data[jj2];
1487 }
1488 }
1489 } else {
1490 for (jj3 = P_i[i2]; jj3 < P_i[i2 + 1]; jj3++) {
1491 i3 = P_j[jj3];
1492 RAP_data[Ps_marker[i3]] += A_data[jj2];
1493 }
1494 }
1495 }
1496 }
1497 }
1498#ifdef _OPENMP
1499 }
1500#endif
1501
1502 RAP->row = n_coarse;
1503 RAP->col = n_coarse;
1504 RAP->nnz = RAP_size;
1505 RAP->IA = RAP_i;
1506 RAP->JA = RAP_j;
1507 RAP->val = RAP_data;
1508
1509 fasp_mem_free(Ps_marker);
1510 Ps_marker = NULL;
1511}
1512
1530void fasp_blas_dcsr_rap_agg1(const dCSRmat* R, const dCSRmat* A, const dCSRmat* P,
1531 dCSRmat* B)
1532{
1533 const INT row = R->row, col = P->col;
1534 const INT * ir = R->IA, *ia = A->IA, *ip = P->IA;
1535 const INT * jr = R->JA, *ja = A->JA, *jp = P->JA;
1536 const REAL* aj = A->val;
1537
1538 INT * iac, *jac;
1539 REAL* acj;
1540
1541 INT* index = (INT*)fasp_mem_calloc(A->col, sizeof(INT));
1542 INT* iindex = (INT*)fasp_mem_calloc(col, sizeof(INT));
1543
1544 INT nB = A->nnz;
1545 INT i, i1, j, jj, k, length;
1546 INT begin_row, end_row, begin_rowA, end_rowA, begin_rowR, end_rowR;
1547 INT istart, iistart, count;
1548
1549 // for (i=0; i<A->col; ++i) index[i] = -2;
1550 fasp_iarray_set(A->col, index, -2);
1551
1552 // memcpy(iindex,index,col*sizeof(INT));
1553 fasp_iarray_cp(col, index, iindex);
1554
1555 jac = (INT*)fasp_mem_calloc(nB, sizeof(INT));
1556
1557 iac = (INT*)fasp_mem_calloc(row + 1, sizeof(INT));
1558
1559 REAL* temp = (REAL*)fasp_mem_calloc(A->col, sizeof(REAL));
1560
1561 iac[0] = 0;
1562
1563 // First loop: form sparsity partern of R*A*P
1564 for (i = 0; i < row; ++i) {
1565 // reset istart and length at the begining of each loop
1566 istart = -1;
1567 length = 0;
1568 i1 = i + 1;
1569
1570 // go across the rows in R
1571 begin_rowR = ir[i];
1572 end_rowR = ir[i1];
1573 for (jj = begin_rowR; jj < end_rowR; ++jj) {
1574 j = jr[jj];
1575 // for each column in A
1576 begin_rowA = ia[j];
1577 end_rowA = ia[j + 1];
1578 for (k = begin_rowA; k < end_rowA; ++k) {
1579 if (index[ja[k]] == -2) {
1580 index[ja[k]] = istart;
1581 istart = ja[k];
1582 ++length;
1583 }
1584 }
1585 }
1586
1587 // book-keeping [reseting length and setting iistart]
1588 count = length;
1589 iistart = -1;
1590 length = 0;
1591
1592 // use each column that would have resulted from R*A
1593 for (j = 0; j < count; ++j) {
1594 jj = istart;
1595 istart = index[istart];
1596 index[jj] = -2;
1597
1598 // go across the row of P
1599 begin_row = ip[jj];
1600 end_row = ip[jj + 1];
1601 for (k = begin_row; k < end_row; ++k) {
1602 // pull out the appropriate columns of P
1603 if (iindex[jp[k]] == -2) {
1604 iindex[jp[k]] = iistart;
1605 iistart = jp[k];
1606 ++length;
1607 }
1608 } // end for k
1609 } // end for j
1610
1611 // set B->IA
1612 iac[i1] = iac[i] + length;
1613
1614 if (iac[i1] > nB) { // Memory not enough!!!
1615 nB = nB * 2;
1616 jac = (INT*)fasp_mem_realloc(jac, nB * sizeof(INT));
1617 }
1618
1619 // put the correct columns of p into the column list of the products
1620 begin_row = iac[i];
1621 end_row = iac[i1];
1622 for (j = begin_row; j < end_row; ++j) {
1623 // put the value in B->JA
1624 jac[j] = iistart;
1625 // set istart to the next value
1626 iistart = iindex[iistart];
1627 // set the iindex spot to 0
1628 iindex[jac[j]] = -2;
1629 } // end j
1630
1631 } // end i: First loop
1632
1633 jac = (INT*)fasp_mem_realloc(jac, (iac[row]) * sizeof(INT));
1634
1635 acj = (REAL*)fasp_mem_calloc(iac[row], sizeof(REAL));
1636
1637 INT* BTindex = (INT*)fasp_mem_calloc(col, sizeof(INT));
1638
1639 // Second loop: compute entries of R*A*P
1640 for (i = 0; i < row; ++i) {
1641 i1 = i + 1;
1642
1643 // each col of B
1644 begin_row = iac[i];
1645 end_row = iac[i1];
1646 for (j = begin_row; j < end_row; ++j) {
1647 BTindex[jac[j]] = j;
1648 }
1649
1650 // reset istart and length at the beginning of each loop
1651 istart = -1;
1652 length = 0;
1653
1654 // go across the rows in R
1655 begin_rowR = ir[i];
1656 end_rowR = ir[i1];
1657 for (jj = begin_rowR; jj < end_rowR; ++jj) {
1658 j = jr[jj];
1659
1660 // for each column in A
1661 begin_rowA = ia[j];
1662 end_rowA = ia[j + 1];
1663 for (k = begin_rowA; k < end_rowA; ++k) {
1664 if (index[ja[k]] == -2) {
1665 index[ja[k]] = istart;
1666 istart = ja[k];
1667 ++length;
1668 }
1669 temp[ja[k]] += aj[k];
1670 }
1671 }
1672
1673 // book-keeping [resetting length and setting iistart]
1674 // use each column that would have resulted from R*A
1675 for (j = 0; j < length; ++j) {
1676 jj = istart;
1677 istart = index[istart];
1678 index[jj] = -2;
1679
1680 // go across the row of P
1681 begin_row = ip[jj];
1682 end_row = ip[jj + 1];
1683 for (k = begin_row; k < end_row; ++k) {
1684 // pull out the appropriate columns of P
1685 acj[BTindex[jp[k]]] += temp[jj];
1686 }
1687 temp[jj] = 0.0;
1688 }
1689
1690 } // end for i: Second loop
1691
1692 // setup coarse matrix B
1693 B->row = row;
1694 B->col = col;
1695 B->IA = iac;
1696 B->JA = jac;
1697 B->val = acj;
1698 B->nnz = B->IA[B->row] - B->IA[0];
1699
1700 fasp_mem_free(temp);
1701 temp = NULL;
1702 fasp_mem_free(index);
1703 index = NULL;
1704 fasp_mem_free(iindex);
1705 iindex = NULL;
1706 fasp_mem_free(BTindex);
1707 BTindex = NULL;
1708}
1709
1734void fasp_blas_dcsr_ptap(const dCSRmat* Pt, const dCSRmat* A, const dCSRmat* P,
1735 dCSRmat* Ac)
1736{
1737 const INT nc = Pt->row, n = Pt->col, nnzP = P->nnz, nnzA = A->nnz;
1738 INT i, maxrpout;
1739
1740 // shift A from usual to ltz format
1741#ifdef _OPENMP
1742#pragma omp parallel for if (n > OPENMP_HOLDS)
1743#endif
1744 for (i = 0; i <= n; ++i) {
1745 A->IA[i]++;
1746 P->IA[i]++;
1747 }
1748
1749#ifdef _OPENMP
1750#pragma omp parallel for if (nnzA > OPENMP_HOLDS)
1751#endif
1752 for (i = 0; i < nnzA; ++i) {
1753 A->JA[i]++;
1754 }
1755
1756#ifdef _OPENMP
1757#pragma omp parallel for if (nc > OPENMP_HOLDS)
1758#endif
1759 for (i = 0; i <= nc; ++i) {
1760 Pt->IA[i]++;
1761 }
1762
1763#ifdef _OPENMP
1764#pragma omp parallel for if (nnzP > OPENMP_HOLDS)
1765#endif
1766 for (i = 0; i < nnzP; ++i) {
1767 P->JA[i]++;
1768 Pt->JA[i]++;
1769 }
1770
1771 // compute P' A P
1772 dCSRmat PtAP =
1773 fasp_blas_dcsr_rap2(Pt->IA, Pt->JA, Pt->val, A->IA, A->JA, A->val, Pt->IA,
1774 Pt->JA, Pt->val, n, nc, &maxrpout, P->IA, P->JA);
1775
1776 Ac->row = PtAP.row;
1777 Ac->col = PtAP.col;
1778 Ac->nnz = PtAP.nnz;
1779 Ac->IA = PtAP.IA;
1780 Ac->JA = PtAP.JA;
1781 Ac->val = PtAP.val;
1782
1783 // shift A back from ltz format
1784#ifdef _OPENMP
1785#pragma omp parallel for if (Ac->row > OPENMP_HOLDS)
1786#endif
1787 for (i = 0; i <= Ac->row; ++i) Ac->IA[i]--;
1788
1789#ifdef _OPENMP
1790#pragma omp parallel for if (Ac->nnz > OPENMP_HOLDS)
1791#endif
1792 for (i = 0; i < Ac->nnz; ++i) Ac->JA[i]--;
1793
1794#ifdef _OPENMP
1795#pragma omp parallel for if (n > OPENMP_HOLDS)
1796#endif
1797 for (i = 0; i <= n; ++i) A->IA[i]--;
1798
1799#ifdef _OPENMP
1800#pragma omp parallel for if (nnzA > OPENMP_HOLDS)
1801#endif
1802 for (i = 0; i < nnzA; ++i) A->JA[i]--;
1803
1804#ifdef _OPENMP
1805#pragma omp parallel for if (n > OPENMP_HOLDS)
1806#endif
1807 for (i = 0; i <= n; ++i) P->IA[i]--;
1808
1809#ifdef _OPENMP
1810#pragma omp parallel for if (nnzP > OPENMP_HOLDS)
1811#endif
1812 for (i = 0; i < nnzP; ++i) P->JA[i]--;
1813
1814#ifdef _OPENMP
1815#pragma omp parallel for if (nc > OPENMP_HOLDS)
1816#endif
1817 for (i = 0; i <= nc; ++i) Pt->IA[i]--;
1818
1819#ifdef _OPENMP
1820#pragma omp parallel for if (nnzP > OPENMP_HOLDS)
1821#endif
1822 for (i = 0; i < nnzP; ++i) Pt->JA[i]--;
1823
1824 return;
1825}
1826
1842dCSRmat fasp_blas_dcsr_rap2(INT* ir, INT* jr, REAL* r, INT* ia, INT* ja, REAL* a,
1843 INT* ipt, INT* jpt, REAL* pt, INT n, INT nc, INT* maxrpout,
1844 INT* ipin, INT* jpin)
1845{
1846 dCSRmat ac;
1847 INT n1, nc1, nnzp, maxrp;
1848 INT * ip = NULL, *jp = NULL;
1849
1850 /*
1851 if ipin is null, this
1852 means that we need to do the transpose of p here; otherwise,
1853 these are considered to be input
1854 */
1855 maxrp = 0;
1856 nnzp = ipt[nc] - 1;
1857 n1 = n + 1;
1858
1859 if (!ipin) {
1860 ip = (INT*)calloc(n1, sizeof(INT));
1861 jp = (INT*)calloc(nnzp, sizeof(INT));
1862 /* these must be null anyway, so no need to assign null
1863 ipin=NULL;
1864 jpin=NULL;
1865 */
1866 } else {
1867 ip = ipin;
1868 jp = jpin;
1869 }
1870
1871 fasp_sparse_iit_(ipt, jpt, &nc, &n, ip, jp);
1872
1873 /* triple matrix product: R * A * transpose(P^T)=R*A*P.*/
1874 /* A is square n by n*/
1875 /* Note: to compute R*A* P the input are R, A and P^T */
1876 /* we need to transpose now the structure of P, because the input is P^T */
1877 /* end of transpose of the boolean corresponding to P */
1878 /* ic are the addresses of the rows of the output */
1879 nc1 = nc + 1;
1880 ac.IA = (INT*)calloc(nc1, sizeof(INT));
1881
1882 /*
1883 First call is with jc=null so that we find the number of
1884 nonzeros in the result
1885 */
1886 ac.JA = NULL;
1887 fasp_sparse_rapms_(ir, jr, ia, ja, ip, jp, &n, &nc, ac.IA, ac.JA, &maxrp);
1888 ac.nnz = ac.IA[nc] - 1;
1889 ac.JA = (INT*)calloc(ac.nnz, sizeof(INT));
1890
1891 /*
1892 second call is to fill the column indexes array jc.
1893 */
1894 fasp_sparse_rapms_(ir, jr, ia, ja, ip, jp, &n, &nc, ac.IA, ac.JA, &maxrp);
1895 if (!ipin) {
1896 if (ip) free(ip);
1897 if (jp) free(jp);
1898 }
1899 ac.val = (REAL*)calloc(ac.nnz, sizeof(REAL));
1900 /* this is the compute with the entries */
1901 fasp_sparse_rapcmp_(ir, jr, r, ia, ja, a, ipt, jpt, pt, &n, &nc, ac.IA, ac.JA,
1902 ac.val, &maxrp);
1903 ac.row = nc;
1904 ac.col = nc;
1905
1906 /*=========================================================*/
1907 *maxrpout = maxrp;
1908
1909 return ac;
1910}
1911
1930void fasp_blas_dcsr_rap4(dCSRmat* R, dCSRmat* A, dCSRmat* P, dCSRmat* B, INT* icor_ysk)
1931{
1932 SHORT nthreads = 1, use_openmp = FALSE;
1933
1934#ifdef _OPENMP
1935 if (R->row > OPENMP_HOLDS) {
1936 use_openmp = TRUE;
1937 nthreads = fasp_get_num_threads();
1938 }
1939#endif
1940
1941 if (use_openmp) {
1942 const INT row = R->row, col = P->col;
1943 INT * ir = R->IA, *ia = A->IA, *ip = P->IA;
1944 INT * jr = R->JA, *ja = A->JA, *jp = P->JA;
1945 REAL * rj = R->val, *aj = A->val, *pj = P->val;
1946 INT istart, iistart;
1947 INT end_row, end_rowA, end_rowR;
1948 INT i, j, jj, k, length, myid, mybegin, myend;
1949 INT jj_counter, ic, jj_row_begining, jj1, i1, jj2, i2, jj3, i3;
1950 INT* index = NULL;
1951 INT* iindex = NULL;
1952 INT* BTindex = NULL;
1953 REAL* temp = NULL;
1954 INT FiveMyid, min_A, min_P, A_pos, P_pos, FiveIc;
1955 INT minus_one_length_A = icor_ysk[5 * nthreads];
1956 INT minus_one_length_P = icor_ysk[5 * nthreads + 1];
1957 INT minus_one_length = minus_one_length_A + minus_one_length_P;
1958
1959 INT* iindexs =
1960 (INT*)fasp_mem_calloc(minus_one_length + minus_one_length_P, sizeof(INT));
1961
1962#if DEBUG_MODE > 1
1963 total_alloc_mem += minus_one_length * sizeof(INT);
1964#endif
1965 INT* indexs = iindexs + minus_one_length_P;
1966 INT* BTindexs = indexs + minus_one_length_A;
1967
1968 INT* iac = (INT*)fasp_mem_calloc(row + 1, sizeof(INT));
1969
1970#if DEBUG_MODE > 1
1971 total_alloc_mem += (row + 1) * sizeof(INT);
1972#endif
1973
1974 INT* part_end = (INT*)fasp_mem_calloc(2 * nthreads + row, sizeof(INT));
1975
1976#if DEBUG_MODE > 1
1977 total_alloc_mem += (2 * nthreads + row) * sizeof(INT);
1978#endif
1979
1980 INT* iac_temp = part_end + nthreads;
1981 INT** iindex_array = (INT**)fasp_mem_calloc(nthreads, sizeof(INT*));
1982 INT** index_array = (INT**)fasp_mem_calloc(nthreads, sizeof(INT*));
1983
1984 fasp_iarray_set(minus_one_length, iindexs, -2);
1985
1986#ifdef _OPENMP
1987#pragma omp parallel for private(myid, FiveMyid, mybegin, myend, min_A, min_P, index, \
1988 iindex, A_pos, P_pos, ic, FiveIc, jj_counter, \
1989 jj_row_begining, end_rowR, jj1, i1, end_rowA, jj2, \
1990 i2, end_row, jj3, i3)
1991#endif
1992 for (myid = 0; myid < nthreads; myid++) {
1993 FiveMyid = myid * 5;
1994 mybegin = icor_ysk[FiveMyid];
1995 if (myid == nthreads - 1) {
1996 myend = row;
1997 } else {
1998 myend = icor_ysk[FiveMyid + 5];
1999 }
2000 min_A = icor_ysk[FiveMyid + 2];
2001 min_P = icor_ysk[FiveMyid + 4];
2002 A_pos = 0;
2003 P_pos = 0;
2004 for (ic = myid - 1; ic >= 0; ic--) {
2005 FiveIc = ic * 5;
2006 A_pos += icor_ysk[FiveIc + 1];
2007 P_pos += icor_ysk[FiveIc + 3];
2008 }
2009 iindex_array[myid] = iindex = iindexs + P_pos - min_P;
2010 index_array[myid] = index = indexs + A_pos - min_A;
2011 jj_counter = 0;
2012 for (ic = mybegin; ic < myend; ic++) {
2013 iindex[ic] = jj_counter;
2014 jj_row_begining = jj_counter;
2015 jj_counter++;
2016 end_rowR = ir[ic + 1];
2017 for (jj1 = ir[ic]; jj1 < end_rowR; jj1++) {
2018 i1 = jr[jj1];
2019 end_rowA = ia[i1 + 1];
2020 for (jj2 = ia[i1]; jj2 < end_rowA; jj2++) {
2021 i2 = ja[jj2];
2022 if (index[i2] != ic) {
2023 index[i2] = ic;
2024 end_row = ip[i2 + 1];
2025 for (jj3 = ip[i2]; jj3 < end_row; jj3++) {
2026 i3 = jp[jj3];
2027 if (iindex[i3] < jj_row_begining) {
2028 iindex[i3] = jj_counter;
2029 jj_counter++;
2030 }
2031 }
2032 }
2033 }
2034 }
2035 iac_temp[ic + myid] = jj_row_begining;
2036 }
2037 iac_temp[myend + myid] = jj_counter;
2038 part_end[myid] = myend + myid + 1;
2039 }
2040 fasp_iarray_cp(part_end[0], iac_temp, iac);
2041 jj_counter = part_end[0];
2042 INT Ctemp = 0;
2043 for (i1 = 1; i1 < nthreads; i1++) {
2044 Ctemp += iac_temp[part_end[i1 - 1] - 1];
2045 for (jj1 = part_end[i1 - 1] + 1; jj1 < part_end[i1]; jj1++) {
2046 iac[jj_counter] = iac_temp[jj1] + Ctemp;
2047 jj_counter++;
2048 }
2049 }
2050 INT* jac = (INT*)fasp_mem_calloc(iac[row], sizeof(INT));
2051#if DEBUG_MODE > 1
2052 total_alloc_mem += iac[row] * sizeof(INT);
2053#endif
2054 fasp_iarray_set(minus_one_length, iindexs, -2);
2055#ifdef _OPENMP
2056#pragma omp parallel for private(myid, index, iindex, FiveMyid, mybegin, myend, i, \
2057 istart, length, i1, end_rowR, jj, j, end_rowA, k, \
2058 iistart, end_row)
2059#endif
2060 for (myid = 0; myid < nthreads; myid++) {
2061 iindex = iindex_array[myid];
2062 index = index_array[myid];
2063 FiveMyid = myid * 5;
2064 mybegin = icor_ysk[FiveMyid];
2065 if (myid == nthreads - 1) {
2066 myend = row;
2067 } else {
2068 myend = icor_ysk[FiveMyid + 5];
2069 }
2070 for (i = mybegin; i < myend; ++i) {
2071 istart = -1;
2072 length = 0;
2073 i1 = i + 1;
2074 // go across the rows in R
2075 end_rowR = ir[i1];
2076 for (jj = ir[i]; jj < end_rowR; ++jj) {
2077 j = jr[jj];
2078 // for each column in A
2079 end_rowA = ia[j + 1];
2080 for (k = ia[j]; k < end_rowA; ++k) {
2081 if (index[ja[k]] == -2) {
2082 index[ja[k]] = istart;
2083 istart = ja[k];
2084 ++length;
2085 }
2086 }
2087 }
2088 // book-keeping [resetting length and setting iistart]
2089 // count = length;
2090 iistart = -1;
2091 // length = 0;
2092 // use each column that would have resulted from R*A
2093 // for (j = 0; j < count; ++ j) {
2094 for (j = 0; j < length; ++j) {
2095 jj = istart;
2096 istart = index[istart];
2097 index[jj] = -2;
2098 // go across the row of P
2099 end_row = ip[jj + 1];
2100 for (k = ip[jj]; k < end_row; ++k) {
2101 // pull out the appropriate columns of P
2102 if (iindex[jp[k]] == -2) {
2103 iindex[jp[k]] = iistart;
2104 iistart = jp[k];
2105 //++length;
2106 }
2107 } // end for k
2108 } // end for j
2109 // put the correct columns of p into the column list of the products
2110 end_row = iac[i1];
2111 for (j = iac[i]; j < end_row; ++j) {
2112 // put the value in B->JA
2113 jac[j] = iistart;
2114 // set istart to the next value
2115 iistart = iindex[iistart];
2116 // set the iindex spot to 0
2117 iindex[jac[j]] = -2;
2118 } // end j
2119 }
2120 }
2121 // Third loop: compute entries of R*A*P
2122 REAL* acj = (REAL*)fasp_mem_calloc(iac[row], sizeof(REAL));
2123#if DEBUG_MODE > 1
2124 total_alloc_mem += iac[row] * sizeof(REAL);
2125#endif
2126 REAL* temps = (REAL*)fasp_mem_calloc(minus_one_length_A, sizeof(REAL));
2127#if DEBUG_MODE > 1
2128 total_alloc_mem += minus_one_length_A * sizeof(REAL);
2129#endif
2130
2131#ifdef _OPENMP
2132#pragma omp parallel for private( \
2133 myid, index, FiveMyid, mybegin, myend, min_A, min_P, A_pos, P_pos, ic, FiveIc, \
2134 BTindex, temp, i, i1, end_row, j, istart, length, end_rowR, jj, end_rowA, k)
2135#endif
2136 for (myid = 0; myid < nthreads; myid++) {
2137 index = index_array[myid];
2138 FiveMyid = myid * 5;
2139 mybegin = icor_ysk[FiveMyid];
2140 if (myid == nthreads - 1) {
2141 myend = row;
2142 } else {
2143 myend = icor_ysk[FiveMyid + 5];
2144 }
2145 min_A = icor_ysk[FiveMyid + 2];
2146 min_P = icor_ysk[FiveMyid + 4];
2147 A_pos = 0;
2148 P_pos = 0;
2149 for (ic = myid - 1; ic >= 0; ic--) {
2150 FiveIc = ic * 5;
2151 A_pos += icor_ysk[FiveIc + 1];
2152 P_pos += icor_ysk[FiveIc + 3];
2153 }
2154 BTindex = BTindexs + P_pos - min_P;
2155 temp = temps + A_pos - min_A;
2156 for (i = mybegin; i < myend; ++i) {
2157 i1 = i + 1;
2158 // each col of B
2159 end_row = iac[i1];
2160 for (j = iac[i]; j < end_row; ++j) {
2161 BTindex[jac[j]] = j;
2162 }
2163 // reset istart and length at the beginning of each loop
2164 istart = -1;
2165 length = 0;
2166 // go across the rows in R
2167 end_rowR = ir[i1];
2168 for (jj = ir[i]; jj < end_rowR; ++jj) {
2169 j = jr[jj];
2170 // for each column in A
2171 end_rowA = ia[j + 1];
2172 for (k = ia[j]; k < end_rowA; ++k) {
2173 if (index[ja[k]] == -2) {
2174 index[ja[k]] = istart;
2175 istart = ja[k];
2176 ++length;
2177 }
2178 temp[ja[k]] += rj[jj] * aj[k];
2179 }
2180 }
2181 // book-keeping [resetting length and setting iistart]
2182 // use each column that would have resulted from R*A
2183 for (j = 0; j < length; ++j) {
2184 jj = istart;
2185 istart = index[istart];
2186 index[jj] = -2;
2187 // go across the row of P
2188 end_row = ip[jj + 1];
2189 for (k = ip[jj]; k < end_row; ++k) {
2190 // pull out the appropriate columns of P
2191 acj[BTindex[jp[k]]] += temp[jj] * pj[k];
2192 }
2193 temp[jj] = 0.0;
2194 }
2195 }
2196 }
2197 // setup coarse matrix B
2198 B->row = row;
2199 B->col = col;
2200 B->IA = iac;
2201 B->JA = jac;
2202 B->val = acj;
2203 B->nnz = B->IA[B->row] - B->IA[0];
2204
2205 fasp_mem_free(temps);
2206 temps = NULL;
2207 fasp_mem_free(iindexs);
2208 iindexs = NULL;
2209 fasp_mem_free(part_end);
2210 part_end = NULL;
2211 fasp_mem_free(iindex_array);
2212 iindex_array = NULL;
2213 fasp_mem_free(index_array);
2214 index_array = NULL;
2215 } else {
2216 fasp_blas_dcsr_rap(R, A, P, B);
2217 }
2218}
2219
2220/*---------------------------------*/
2221/*-- End of File --*/
2222/*---------------------------------*/
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
Definition: AuxArray.c:103
void fasp_iarray_cp(const INT n, const INT *x, INT *y)
Copy an array to the other y=x.
Definition: AuxArray.c:184
void fasp_mem_free(void *mem)
Free up previous allocated memory body and set pointer to NULL.
Definition: AuxMemory.c:155
void * fasp_mem_realloc(void *oldmem, const LONGLONG tsize)
Reallocate, initiate, and check memory.
Definition: AuxMemory.c:114
void * fasp_mem_calloc(const unsigned int size, const unsigned int type)
Allocate, initiate, and check memory.
Definition: AuxMemory.c:65
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:92
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
Definition: BlaArray.c:43
void fasp_dcsr_alloc(const INT m, const INT n, const INT nnz, dCSRmat *A)
Allocate CSR sparse matrix memory space.
Definition: BlaSparseCSR.c:138
void fasp_sparse_rapms_(INT *ir, INT *jr, INT *ia, INT *ja, INT *ip, INT *jp, INT *nin, INT *ncin, INT *iac, INT *jac, INT *maxrout)
Calculates the nonzero structure of R*A*P, if jac is not null. If jac is null only finds num of nonze...
void fasp_sparse_iit_(INT *ia, INT *ja, INT *na, INT *ma, INT *iat, INT *jat)
Transpose a boolean matrix (only given by ia, ja)
void fasp_sparse_rapcmp_(INT *ir, INT *jr, REAL *r, INT *ia, INT *ja, REAL *a, INT *ipt, INT *jpt, REAL *pt, INT *nin, INT *ncin, INT *iac, INT *jac, REAL *ac, INT *idummy)
Calculates R*A*P after the nonzero structure of the result is known. iac,jac,ac have to be allocated ...
void fasp_blas_dcsr_mxv_agg(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x (nonzeros of A = 1)
Definition: BlaSpmvCSR.c:437
void fasp_blas_ldcsr_aAxpy(const REAL alpha, const dCSRmat *A, const LONGREAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: BlaSpmvCSR.c:608
void fasp_blas_dcsr_axm(dCSRmat *A, const REAL alpha)
Multiply a sparse matrix A in CSR format by a scalar alpha.
Definition: BlaSpmvCSR.c:219
void fasp_blas_dcsr_mxv(const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Definition: BlaSpmvCSR.c:241
dCSRmat fasp_blas_dcsr_rap2(INT *ir, INT *jr, REAL *r, INT *ia, INT *ja, REAL *a, INT *ipt, INT *jpt, REAL *pt, INT n, INT nc, INT *maxrpout, INT *ipin, INT *jpin)
Compute R*A*P.
Definition: BlaSpmvCSR.c:1842
void fasp_blas_dcsr_aAxpy_agg(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y (nonzeros of A = 1)
Definition: BlaSpmvCSR.c:724
unsigned long total_alloc_mem
void fasp_blas_dcsr_mxm(const dCSRmat *A, const dCSRmat *B, dCSRmat *C)
Sparse matrix multiplication C=A*B.
Definition: BlaSpmvCSR.c:888
void fasp_blas_dcsr_rap_agg1(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *B)
Triple sparse matrix multiplication B=R*A*P (nonzeros of R, P = 1)
Definition: BlaSpmvCSR.c:1530
void fasp_blas_dcsr_rap_agg(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *RAP)
Triple sparse matrix multiplication B=R*A*P (nonzeros of R, P = 1)
Definition: BlaSpmvCSR.c:1269
REAL fasp_blas_dcsr_vmv(const dCSRmat *A, const REAL *x, const REAL *y)
vector-Matrix-vector multiplication alpha = y'*A*x
Definition: BlaSpmvCSR.c:834
SHORT fasp_blas_dcsr_add(const dCSRmat *A, const REAL alpha, const dCSRmat *B, const REAL beta, dCSRmat *C)
compute C = alpha*A + beta*B in CSR format
Definition: BlaSpmvCSR.c:60
unsigned long total_alloc_count
void fasp_blas_dcsr_rap4(dCSRmat *R, dCSRmat *A, dCSRmat *P, dCSRmat *B, INT *icor_ysk)
Triple sparse matrix multiplication B=R*A*P.
Definition: BlaSpmvCSR.c:1930
void fasp_blas_dcsr_rap(const dCSRmat *R, const dCSRmat *A, const dCSRmat *P, dCSRmat *RAP)
Triple sparse matrix multiplication B=R*A*P.
Definition: BlaSpmvCSR.c:994
void fasp_blas_dcsr_aAxpy(const REAL alpha, const dCSRmat *A, const REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: BlaSpmvCSR.c:493
void fasp_blas_dcsr_ptap(const dCSRmat *Pt, const dCSRmat *A, const dCSRmat *P, dCSRmat *Ac)
Triple sparse matrix multiplication B=P'*A*P.
Definition: BlaSpmvCSR.c:1734
Main header file for the FASP project.
#define REAL
Definition: fasp.h:67
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
#define LONGREAL
Definition: fasp.h:68
#define INT
Definition: fasp.h:64
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:19
#define OPENMP_HOLDS
Definition: fasp_const.h:260
#define TRUE
Definition of logic type.
Definition: fasp_const.h:61
#define FALSE
Definition: fasp_const.h:62
#define ERROR_MAT_SIZE
Definition: fasp_const.h:26
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:143
INT col
column of matrix A, n
Definition: fasp.h:149
REAL * val
nonzero entries of A
Definition: fasp.h:161
INT row
row number of matrix A, m
Definition: fasp.h:146
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:155
INT nnz
number of nonzero entries
Definition: fasp.h:152
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:158