Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
BlaArray.c
Go to the documentation of this file.
1
14#include <math.h>
15
16#ifdef _OPENMP
17#include <omp.h>
18#endif
19
20#include "fasp.h"
21#include "fasp_functs.h"
22
23/*---------------------------------*/
24/*-- Public Functions --*/
25/*---------------------------------*/
26
44 const REAL a,
45 REAL *x)
46{
47 if ( a == 1.0 ) return; // do nothing
48
49 {
50 SHORT use_openmp = FALSE;
51 INT i;
52
53#ifdef _OPENMP
54 INT myid, mybegin, myend, nthreads;
55 if ( n > OPENMP_HOLDS ) {
56 use_openmp = TRUE;
57 nthreads = fasp_get_num_threads();
58 }
59#endif
60
61 if ( use_openmp ) {
62#ifdef _OPENMP
63#pragma omp parallel private(myid, mybegin, myend, i)
64 {
65 myid = omp_get_thread_num();
66 fasp_get_start_end (myid, nthreads, n, &mybegin, &myend);
67 for ( i = mybegin; i < myend; ++i ) x[i] *= a;
68 }
69#endif
70 }
71 else {
72 for ( i = 0; i < n; ++i ) x[i] *= a;
73 }
74 }
75}
76
94 const REAL a,
95 const REAL *x,
96 REAL *y)
97{
98 SHORT use_openmp = FALSE;
99 INT i;
100
101#ifdef _OPENMP
102 INT myid, mybegin, myend, nthreads;
103 if ( n > OPENMP_HOLDS ) {
104 use_openmp = TRUE;
105 nthreads = fasp_get_num_threads();
106 }
107#endif
108
109 if ( a == 1.0 ) {
110 if ( use_openmp ) {
111#ifdef _OPENMP
112#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
113 {
114 myid = omp_get_thread_num();
115 fasp_get_start_end (myid, nthreads, n, &mybegin, &myend);
116 for ( i = mybegin; i < myend; ++i ) y[i] += x[i];
117 }
118#endif
119 }
120 else {
121 for ( i = 0; i < n; ++i ) y[i] += x[i];
122 }
123 }
124
125 else if ( a == -1.0 ) {
126 if ( use_openmp ) {
127#ifdef _OPENMP
128#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
129 {
130 myid = omp_get_thread_num();
131 fasp_get_start_end (myid, nthreads, n, &mybegin, &myend);
132 for ( i = mybegin; i < myend; ++i ) y[i] -= x[i];
133 }
134#endif
135 }
136 else {
137 for ( i = 0; i < n; ++i ) y[i] -= x[i];
138 }
139 }
140
141 else {
142 if ( use_openmp ) {
143#ifdef _OPENMP
144#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
145 {
146 myid = omp_get_thread_num();
147 fasp_get_start_end (myid, nthreads, n, &mybegin, &myend);
148 for ( i = mybegin; i < myend; ++i ) y[i] += a*x[i];
149 }
150#endif
151 }
152 else {
153 for ( i = 0; i < n; ++i ) y[i] += a*x[i];
154 }
155 }
156}
157
171 const REAL *x,
172 REAL *y)
173{
174 y[0] += a*x[0];
175 y[1] += a*x[1];
176
177 y[2] += a*x[2];
178 y[3] += a*x[3];
179}
180
194 const REAL *x,
195 REAL *y)
196{
197 y[0] += a*x[0];
198 y[1] += a*x[1];
199 y[2] += a*x[2];
200
201 y[3] += a*x[3];
202 y[4] += a*x[4];
203 y[5] += a*x[5];
204
205 y[6] += a*x[6];
206 y[7] += a*x[7];
207 y[8] += a*x[8];
208}
209
223 const REAL *x,
224 REAL *y)
225{
226 y[0] += a*x[0];
227 y[1] += a*x[1];
228 y[2] += a*x[2];
229 y[3] += a*x[3];
230 y[4] += a*x[4];
231
232 y[5] += a*x[5];
233 y[6] += a*x[6];
234 y[7] += a*x[7];
235 y[8] += a*x[8];
236 y[9] += a*x[9];
237
238 y[10] += a*x[10];
239 y[11] += a*x[11];
240 y[12] += a*x[12];
241 y[13] += a*x[13];
242 y[14] += a*x[14];
243
244 y[15] += a*x[15];
245 y[16] += a*x[16];
246 y[17] += a*x[17];
247 y[18] += a*x[18];
248 y[19] += a*x[19];
249
250 y[20] += a*x[20];
251 y[21] += a*x[21];
252 y[22] += a*x[22];
253 y[23] += a*x[23];
254 y[24] += a*x[24];
255}
256
270 const REAL *x,
271 REAL *y)
272{
273 y[0] += a*x[0];
274 y[1] += a*x[1];
275 y[2] += a*x[2];
276 y[3] += a*x[3];
277 y[4] += a*x[4];
278 y[5] += a*x[5];
279 y[6] += a*x[6];
280
281 y[7] += a*x[7];
282 y[8] += a*x[8];
283 y[9] += a*x[9];
284 y[10] += a*x[10];
285 y[11] += a*x[11];
286 y[12] += a*x[12];
287 y[13] += a*x[13];
288
289 y[14] += a*x[14];
290 y[15] += a*x[15];
291 y[16] += a*x[16];
292 y[17] += a*x[17];
293 y[18] += a*x[18];
294 y[19] += a*x[19];
295 y[20] += a*x[20];
296
297 y[21] += a*x[21];
298 y[22] += a*x[22];
299 y[23] += a*x[23];
300 y[24] += a*x[24];
301 y[25] += a*x[25];
302 y[26] += a*x[26];
303 y[27] += a*x[27];
304
305 y[28] += a*x[28];
306 y[29] += a*x[29];
307 y[30] += a*x[30];
308 y[31] += a*x[31];
309 y[32] += a*x[32];
310 y[33] += a*x[33];
311 y[34] += a*x[34];
312
313 y[35] += a*x[35];
314 y[36] += a*x[36];
315 y[37] += a*x[37];
316 y[38] += a*x[38];
317 y[39] += a*x[39];
318 y[40] += a*x[40];
319 y[41] += a*x[41];
320
321 y[42] += a*x[42];
322 y[43] += a*x[43];
323 y[44] += a*x[44];
324 y[45] += a*x[45];
325 y[46] += a*x[46];
326 y[47] += a*x[47];
327 y[48] += a*x[48];
328}
329
348 const REAL a,
349 const REAL *x,
350 const REAL *y,
351 REAL *z)
352{
353 SHORT use_openmp = FALSE;
354 INT i;
355
356#ifdef _OPENMP
357 INT myid, mybegin, myend, nthreads;
358 if ( n > OPENMP_HOLDS ) {
359 use_openmp = TRUE;
360 nthreads = fasp_get_num_threads();
361 }
362#endif
363
364 if ( use_openmp ) {
365#ifdef _OPENMP
366#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
367 {
368 myid = omp_get_thread_num();
369 fasp_get_start_end (myid, nthreads, n, &mybegin, &myend);
370 for ( i = mybegin; i < myend; ++i ) z[i] = a*x[i] + y[i];
371 }
372#endif
373 }
374 else {
375 for ( i = 0; i < n; ++i ) z[i] = a*x[i] + y[i];
376 }
377}
378
394 const REAL *x,
395 const REAL *y,
396 REAL *z)
397{
398 z[0] = a*x[0] + y[0];
399 z[1] = a*x[1] + y[1];
400
401 z[2] = a*x[2] + y[2];
402 z[3] = a*x[3] + y[3];
403}
404
420 const REAL *x,
421 const REAL *y,
422 REAL *z)
423{
424 z[0] = a*x[0] + y[0];
425 z[1] = a*x[1] + y[1];
426 z[2] = a*x[2] + y[2];
427
428 z[3] = a*x[3] + y[3];
429 z[4] = a*x[4] + y[4];
430 z[5] = a*x[5] + y[5];
431
432 z[6] = a*x[6] + y[6];
433 z[7] = a*x[7] + y[7];
434 z[8] = a*x[8] + y[8];
435}
436
452 const REAL *x,
453 const REAL *y,
454 REAL *z)
455{
456 z[0] = a*x[0] + y[0];
457 z[1] = a*x[1] + y[1];
458 z[2] = a*x[2] + y[2];
459 z[3] = a*x[3] + y[3];
460 z[4] = a*x[4] + y[4];
461
462 z[5] = a*x[5] + y[5];
463 z[6] = a*x[6] + y[6];
464 z[7] = a*x[7] + y[7];
465 z[8] = a*x[8] + y[8];
466 z[9] = a*x[9] + y[9];
467
468 z[10] = a*x[10] + y[10];
469 z[11] = a*x[11] + y[11];
470 z[12] = a*x[12] + y[12];
471 z[13] = a*x[13] + y[13];
472 z[14] = a*x[14] + y[14];
473
474 z[15] = a*x[15] + y[15];
475 z[16] = a*x[16] + y[16];
476 z[17] = a*x[17] + y[17];
477 z[18] = a*x[18] + y[18];
478 z[19] = a*x[19] + y[19];
479
480 z[20] = a*x[20] + y[20];
481 z[21] = a*x[21] + y[21];
482 z[22] = a*x[22] + y[22];
483 z[23] = a*x[23] + y[23];
484 z[24] = a*x[24] + y[24];
485}
486
502 const REAL *x,
503 const REAL *y,
504 REAL *z)
505{
506 z[0] = a*x[0] + y[0];
507 z[1] = a*x[1] + y[1];
508 z[2] = a*x[2] + y[2];
509 z[3] = a*x[3] + y[3];
510 z[4] = a*x[4] + y[4];
511 z[5] = a*x[5] + y[5];
512 z[6] = a*x[6] + y[6];
513
514 z[7] = a*x[7] + y[7];
515 z[8] = a*x[8] + y[8];
516 z[9] = a*x[9] + y[9];
517 z[10] = a*x[10] + y[10];
518 z[11] = a*x[11] + y[11];
519 z[12] = a*x[12] + y[12];
520 z[13] = a*x[13] + y[13];
521
522 z[14] = a*x[14] + y[14];
523 z[15] = a*x[15] + y[15];
524 z[16] = a*x[16] + y[16];
525 z[17] = a*x[17] + y[17];
526 z[18] = a*x[18] + y[18];
527 z[19] = a*x[19] + y[19];
528 z[20] = a*x[20] + y[20];
529
530 z[21] = a*x[21] + y[21];
531 z[22] = a*x[22] + y[22];
532 z[23] = a*x[23] + y[23];
533 z[24] = a*x[24] + y[24];
534 z[25] = a*x[25] + y[25];
535 z[26] = a*x[26] + y[26];
536 z[27] = a*x[27] + y[27];
537
538 z[28] = a*x[28] + y[28];
539 z[29] = a*x[29] + y[29];
540 z[30] = a*x[30] + y[30];
541 z[31] = a*x[31] + y[31];
542 z[32] = a*x[32] + y[32];
543 z[33] = a*x[33] + y[33];
544 z[34] = a*x[34] + y[34];
545
546 z[35] = a*x[35] + y[35];
547 z[36] = a*x[36] + y[36];
548 z[37] = a*x[37] + y[37];
549 z[38] = a*x[38] + y[38];
550 z[39] = a*x[39] + y[39];
551 z[40] = a*x[40] + y[40];
552 z[41] = a*x[41] + y[41];
553
554 z[42] = a*x[42] + y[42];
555 z[43] = a*x[43] + y[43];
556 z[44] = a*x[44] + y[44];
557 z[45] = a*x[45] + y[45];
558 z[46] = a*x[46] + y[46];
559 z[47] = a*x[47] + y[47];
560 z[48] = a*x[48] + y[48];
561}
562
581 const REAL a,
582 const REAL *x,
583 const REAL b,
584 REAL *y)
585{
586 SHORT use_openmp = FALSE;
587 INT i;
588
589#ifdef _OPENMP
590 INT myid, mybegin, myend, nthreads;
591 if ( n > OPENMP_HOLDS ) {
592 use_openmp = TRUE;
593 nthreads = fasp_get_num_threads();
594 }
595#endif
596
597 if (use_openmp) {
598#ifdef _OPENMP
599#pragma omp parallel private(myid, mybegin, myend, i) num_threads(nthreads)
600 {
601 myid = omp_get_thread_num();
602 fasp_get_start_end (myid, nthreads, n, &mybegin, &myend);
603 for ( i = mybegin; i < myend; ++i ) y[i] = a*x[i] + b*y[i];
604 }
605#endif
606 }
607 else {
608 for ( i = 0; i < n; ++i ) y[i] = a*x[i] + b*y[i];
609 }
610
611}
612
629 const REAL *x)
630{
631 register REAL onenorm = 0.0;
632 INT i;
633
634#ifdef _OPENMP
635#pragma omp parallel for reduction(+:onenorm) private(i)
636#endif
637 for ( i = 0; i < n; ++i ) onenorm += ABS(x[i]);
638
639 return onenorm;
640}
641
658 const REAL *x)
659{
660 register REAL twonorm = 0.0;
661 INT i;
662
663#ifdef _OPENMP
664#pragma omp parallel for reduction(+:twonorm) private(i)
665#endif
666 for ( i = 0; i < n; ++i ) twonorm += x[i] * x[i];
667
668 return sqrt(twonorm);
669}
670
687 const REAL *x)
688{
689 SHORT use_openmp = FALSE;
690 register REAL infnorm = 0.0;
691 INT i;
692
693#ifdef _OPENMP
694 INT myid, mybegin, myend, nthreads;
695 if ( n > OPENMP_HOLDS ) {
696 use_openmp = TRUE;
697 nthreads = fasp_get_num_threads();
698 }
699#endif
700
701 if ( use_openmp ) {
702#ifdef _OPENMP
703 REAL infnorm_loc = 0.0;
704#pragma omp parallel firstprivate(infnorm_loc) private(myid, mybegin, myend, i)
705 {
706 myid = omp_get_thread_num();
707 fasp_get_start_end (myid, nthreads, n, &mybegin, &myend);
708 for ( i = mybegin; i < myend; ++i )
709 infnorm_loc = MAX( infnorm_loc, ABS(x[i]) );
710
711 if ( infnorm_loc > infnorm ) {
712#pragma omp critical
713 infnorm = MAX( infnorm_loc, infnorm );
714 }
715 }
716#endif
717 }
718 else {
719 for ( i = 0; i < n; ++i ) infnorm = MAX( infnorm, ABS(x[i]) );
720 }
721
722 return infnorm;
723}
724
742 const REAL *x,
743 const REAL *y)
744{
745 SHORT use_openmp = FALSE;
746 register REAL value = 0.0;
747 INT i;
748
749#ifdef _OPENMP
750 if ( n > OPENMP_HOLDS ) use_openmp = TRUE;
751#endif
752
753 if ( use_openmp ) {
754#ifdef _OPENMP
755#pragma omp parallel for reduction(+:value) private(i)
756#endif
757 for ( i = 0; i < n; ++i ) value += x[i]*y[i];
758 }
759 else {
760 for ( i = 0; i < n; ++i ) value += x[i]*y[i];
761 }
762
763 return value;
764}
765
766/*---------------------------------*/
767/*-- End of File --*/
768/*---------------------------------*/
void fasp_get_start_end(const INT procid, const INT nprocs, const INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: AuxThreads.c:92
REAL fasp_blas_darray_dotprod(const INT n, const REAL *x, const REAL *y)
Inner product of two arraies x and y.
Definition: BlaArray.c:741
void fasp_blas_darray_axpy_nc3(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 3
Definition: BlaArray.c:193
REAL fasp_blas_darray_norminf(const INT n, const REAL *x)
Linf norm of array x.
Definition: BlaArray.c:686
void fasp_blas_darray_axpyz(const INT n, const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y
Definition: BlaArray.c:347
REAL fasp_blas_darray_norm1(const INT n, const REAL *x)
L1 norm of array x.
Definition: BlaArray.c:628
void fasp_blas_darray_axpy_nc7(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 7
Definition: BlaArray.c:269
void fasp_blas_darray_axpy_nc5(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 5
Definition: BlaArray.c:222
void fasp_blas_darray_axpby(const INT n, const REAL a, const REAL *x, const REAL b, REAL *y)
y = a*x + b*y
Definition: BlaArray.c:580
void fasp_blas_darray_axpyz_nc7(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 7
Definition: BlaArray.c:501
void fasp_blas_darray_axpy_nc2(const REAL a, const REAL *x, REAL *y)
y = a*x + y, length of x and y should be 2
Definition: BlaArray.c:170
void fasp_blas_darray_axpyz_nc2(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 2
Definition: BlaArray.c:393
REAL fasp_blas_darray_norm2(const INT n, const REAL *x)
L2 norm of array x.
Definition: BlaArray.c:657
void fasp_blas_darray_ax(const INT n, const REAL a, REAL *x)
x = a*x
Definition: BlaArray.c:43
void fasp_blas_darray_axpyz_nc3(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 3
Definition: BlaArray.c:419
void fasp_blas_darray_axpy(const INT n, const REAL a, const REAL *x, REAL *y)
y = a*x + y
Definition: BlaArray.c:93
void fasp_blas_darray_axpyz_nc5(const REAL a, const REAL *x, const REAL *y, REAL *z)
z = a*x + y, length of x, y and z should be 5
Definition: BlaArray.c:451
Main header file for the FASP project.
#define REAL
Definition: fasp.h:67
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
#define ABS(a)
Definition: fasp.h:76
#define MAX(a, b)
Definition of max, min, abs.
Definition: fasp.h:74
#define INT
Definition: fasp.h:64
#define OPENMP_HOLDS
Definition: fasp_const.h:260
#define TRUE
Definition of logic type.
Definition: fasp_const.h:61
#define FALSE
Definition: fasp_const.h:62