Fast Auxiliary Space Preconditioning 2.7.7 Aug/28/2022
BlaSmallMat.c
Go to the documentation of this file.
1
18#include "fasp.h"
19#include "fasp_functs.h"
20
21/*---------------------------------*/
22/*-- Public Functions --*/
23/*---------------------------------*/
24
38 const INT n,
39 const REAL alpha)
40{
41 const INT n2 = n*n;
42 INT i;
43
44 for ( i = 0; i < n2; i++ ) a[i] *= alpha;
45
46 return;
47}
48
65void fasp_blas_smat_add (const REAL *a,
66 const REAL *b,
67 const INT n,
68 const REAL alpha,
69 const REAL beta,
70 REAL *c)
71{
72 const INT n2 = n*n;
73 INT i;
74
75 for ( i = 0; i < n2; i++ ) c[i] = alpha * a[i] + beta * b[i];
76
77 return;
78}
79
80
94 const REAL *b,
95 REAL *c)
96{
97 const REAL b0 = b[0], b1 = b[1];
98
99 c[0] = a[0]*b0 + a[1]*b1;
100 c[1] = a[2]*b0 + a[3]*b1;
101}
102
116 const REAL *b,
117 REAL *c)
118{
119 const REAL b0 = b[0], b1 = b[1], b2 = b[2];
120
121 c[0] = a[0]*b0 + a[1]*b1 + a[2]*b2;
122 c[1] = a[3]*b0 + a[4]*b1 + a[5]*b2;
123 c[2] = a[6]*b0 + a[7]*b1 + a[8]*b2;
124}
125
139 const REAL *b,
140 REAL *c)
141{
142 const REAL b0 = b[0], b1 = b[1], b2 = b[2], b3 = b[3];
143
144 c[0] = a[0] *b0 + a[1] *b1 + a[2] *b2 + a[3] *b3;
145 c[1] = a[4] *b0 + a[5] *b1 + a[6] *b2 + a[7] *b3;
146 c[2] = a[8] *b0 + a[9] *b1 + a[10]*b2 + a[11]*b3;
147 c[3] = a[12]*b0 + a[13]*b1 + a[14]*b2 + a[15]*b3;
148}
149
163 const REAL *b,
164 REAL *c)
165{
166 const REAL b0 = b[0], b1 = b[1], b2 = b[2];
167 const REAL b3 = b[3], b4 = b[4];
168
169 c[0] = a[0]*b0 + a[1]*b1 + a[2]*b2 + a[3]*b3 + a[4]*b4;
170 c[1] = a[5]*b0 + a[6]*b1 + a[7]*b2 + a[8]*b3 + a[9]*b4;
171 c[2] = a[10]*b0 + a[11]*b1 + a[12]*b2 + a[13]*b3 + a[14]*b4;
172 c[3] = a[15]*b0 + a[16]*b1 + a[17]*b2 + a[18]*b3 + a[19]*b4;
173 c[4] = a[20]*b0 + a[21]*b1 + a[22]*b2 + a[23]*b3 + a[24]*b4;
174}
175
189 const REAL *b,
190 REAL *c)
191{
192 const REAL b0 = b[0], b1 = b[1], b2 = b[2];
193 const REAL b3 = b[3], b4 = b[4], b5 = b[5], b6 = b[6];
194
195 c[0] = a[0]*b0 + a[1]*b1 + a[2]*b2 + a[3]*b3 + a[4]*b4 + a[5]*b5 + a[6]*b6;
196 c[1] = a[7]*b0 + a[8]*b1 + a[9]*b2 + a[10]*b3 + a[11]*b4 + a[12]*b5 + a[13]*b6;
197 c[2] = a[14]*b0 + a[15]*b1 + a[16]*b2 + a[17]*b3 + a[18]*b4 + a[19]*b5 + a[20]*b6;
198 c[3] = a[21]*b0 + a[22]*b1 + a[23]*b2 + a[24]*b3 + a[25]*b4 + a[26]*b5 + a[27]*b6;
199 c[4] = a[28]*b0 + a[29]*b1 + a[30]*b2 + a[31]*b3 + a[32]*b4 + a[33]*b5 + a[34]*b6;
200 c[5] = a[35]*b0 + a[36]*b1 + a[37]*b2 + a[38]*b3 + a[39]*b4 + a[40]*b5 + a[41]*b6;
201 c[6] = a[42]*b0 + a[43]*b1 + a[44]*b2 + a[45]*b3 + a[46]*b4 + a[47]*b5 + a[48]*b6;
202}
203
222 const REAL *b,
223 REAL *c,
224 const INT n)
225{
226 switch (n) {
227 case 2:
228 fasp_blas_smat_mxv_nc2(a, b, c);
229 break;
230
231 case 3:
232 fasp_blas_smat_mxv_nc3(a, b, c);
233 break;
234
235 case 4:
236 fasp_blas_smat_mxv_nc4(a, b, c);
237 break;
238
239 case 5:
240 fasp_blas_smat_mxv_nc5(a, b, c);
241 break;
242
243 case 7:
244 fasp_blas_smat_mxv_nc7(a, b, c);
245 break;
246
247 default:
248 {
249 INT i,j,in=0;
250 REAL temp;
251
252 for (i=0; i<n; ++i, in+=n) {
253 temp = 0.0;
254 for (j=0; j<n; ++j) temp += a[in+j]*b[j];
255 c[i]=temp;
256 } // end for i
257 }
258 break;
259 }
260 return;
261}
262
276 const REAL *b,
277 REAL *c)
278{
279 const REAL a0 = a[0], a1 = a[1];
280 const REAL a2 = a[2], a3 = a[3];
281
282 const REAL b0 = b[0], b1 = b[1];
283 const REAL b2 = b[2], b3 = b[3];
284
285 c[0] = a0*b0 + a1*b2;
286 c[1] = a0*b1 + a1*b3;
287 c[2] = a2*b0 + a3*b2;
288 c[3] = a2*b1 + a3*b3;
289
290}
291
305 const REAL *b,
306 REAL *c)
307{
308 const REAL a0 = a[0], a1 = a[1], a2 = a[2];
309 const REAL a3 = a[3], a4 = a[4], a5 = a[5];
310 const REAL a6 = a[6], a7 = a[7], a8 = a[8];
311
312 const REAL b0 = b[0], b1 = b[1], b2 = b[2];
313 const REAL b3 = b[3], b4 = b[4], b5 = b[5];
314 const REAL b6 = b[6], b7 = b[7], b8 = b[8];
315
316 c[0] = a0*b0 + a1*b3 + a2*b6;
317 c[1] = a0*b1 + a1*b4 + a2*b7;
318 c[2] = a0*b2 + a1*b5 + a2*b8;
319
320 c[3] = a3*b0 + a4*b3 + a5*b6;
321 c[4] = a3*b1 + a4*b4 + a5*b7;
322 c[5] = a3*b2 + a4*b5 + a5*b8;
323
324 c[6] = a6*b0 + a7*b3 + a8*b6;
325 c[7] = a6*b1 + a7*b4 + a8*b7;
326 c[8] = a6*b2 + a7*b5 + a8*b8;
327}
328
342 const REAL *b,
343 REAL *c)
344{
345 const REAL a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3];
346 const REAL a4 = a[4], a5 = a[5], a6 = a[6], a7 = a[7];
347 const REAL a8 = a[8], a9 = a[9], a10 = a[10], a11 = a[11];
348 const REAL a12 = a[12], a13 = a[13], a14 = a[14], a15 = a[15];
349
350 const REAL b0 = b[0], b1 = b[1], b2 = b[2], b3 = b[3];
351 const REAL b4 = b[4], b5 = b[5], b6 = b[6], b7 = b[7];
352 const REAL b8 = b[8], b9 = b[9], b10 = b[10], b11 = b[11];
353 const REAL b12 = b[12], b13 = b[13], b14 = b[14], b15 = b[15];
354
355 c[0] = a0*b0 + a1*b4 + a2*b8 + a3*b12;
356 c[1] = a0*b1 + a1*b5 + a2*b9 + a3*b13;
357 c[2] = a0*b2 + a1*b6 + a2*b10 + a3*b14;
358 c[3] = a0*b3 + a1*b7 + a2*b11 + a3*b15;
359
360 c[4] = a4*b0 + a5*b4 + a6*b8 + a7*b12;
361 c[5] = a4*b1 + a5*b5 + a6*b9 + a7*b13;
362 c[6] = a4*b2 + a5*b6 + a6*b10 + a7*b14;
363 c[7] = a4*b3 + a5*b7 + a6*b11 + a7*b15;
364
365 c[8] = a8*b0 + a9*b4 + a10*b8 + a11*b12;
366 c[9] = a8*b1 + a9*b5 + a10*b9 + a11*b13;
367 c[10] = a8*b2 + a9*b6 + a10*b10 + a11*b14;
368 c[11] = a8*b3 + a9*b7 + a10*b11 + a11*b15;
369
370 c[12] = a12*b0 + a13*b4 + a14*b8 + a15*b12;
371 c[13] = a12*b1 + a13*b5 + a14*b9 + a15*b13;
372 c[14] = a12*b2 + a13*b6 + a14*b10 + a15*b14;
373 c[15] = a12*b3 + a13*b7 + a14*b11 + a15*b15;
374}
375
389 const REAL *b,
390 REAL *c)
391{
392 const REAL a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
393 const REAL a5 = a[5], a6 = a[6], a7 = a[7], a8 = a[8], a9 = a[9];
394 const REAL a10 = a[10], a11 = a[11], a12 = a[12], a13 = a[13], a14 = a[14];
395 const REAL a15 = a[15], a16 = a[16], a17 = a[17], a18 = a[18], a19 = a[19];
396 const REAL a20 = a[20], a21 = a[21], a22 = a[22], a23 = a[23], a24 = a[24];
397
398 const REAL b0 = b[0], b1 = b[1], b2 = b[2], b3 = b[3], b4 = b[4];
399 const REAL b5 = b[5], b6 = b[6], b7 = b[7], b8 = b[8], b9 = b[9];
400 const REAL b10 = b[10], b11 = b[11], b12 = b[12], b13 = b[13], b14 = b[14];
401 const REAL b15 = b[15], b16 = b[16], b17 = b[17], b18 = b[18], b19 = b[19];
402 const REAL b20 = b[20], b21 = b[21], b22 = b[22], b23 = b[23], b24 = b[24];
403
404 c[0] = a0*b0 + a1*b5 + a2*b10 + a3*b15 + a4*b20;
405 c[1] = a0*b1 + a1*b6 + a2*b11 + a3*b16 + a4*b21;
406 c[2] = a0*b2 + a1*b7 + a2*b12 + a3*b17 + a4*b22;
407 c[3] = a0*b3 + a1*b8 + a2*b13 + a3*b18 + a4*b23;
408 c[4] = a0*b4 + a1*b9 + a2*b14 + a3*b19 + a4*b24;
409
410 c[5] = a5*b0 + a6*b5 + a7*b10 + a8*b15 + a9*b20;
411 c[6] = a5*b1 + a6*b6 + a7*b11 + a8*b16 + a9*b21;
412 c[7] = a5*b2 + a6*b7 + a7*b12 + a8*b17 + a9*b22;
413 c[8] = a5*b3 + a6*b8 + a7*b13 + a8*b18 + a9*b23;
414 c[9] = a5*b4 + a6*b9 + a7*b14 + a8*b19 + a9*b24;
415
416 c[10] = a10*b0 + a11*b5 + a12*b10 + a13*b15 + a14*b20;
417 c[11] = a10*b1 + a11*b6 + a12*b11 + a13*b16 + a14*b21;
418 c[12] = a10*b2 + a11*b7 + a12*b12 + a13*b17 + a14*b22;
419 c[13] = a10*b3 + a11*b8 + a12*b13 + a13*b18 + a14*b23;
420 c[14] = a10*b4 + a11*b9 + a12*b14 + a13*b19 + a14*b24;
421
422 c[15] = a15*b0 + a16*b5 + a17*b10 + a18*b15 + a19*b20;
423 c[16] = a15*b1 + a16*b6 + a17*b11 + a18*b16 + a19*b21;
424 c[17] = a15*b2 + a16*b7 + a17*b12 + a18*b17 + a19*b22;
425 c[18] = a15*b3 + a16*b8 + a17*b13 + a18*b18 + a19*b23;
426 c[19] = a15*b4 + a16*b9 + a17*b14 + a18*b19 + a19*b24;
427
428 c[20] = a20*b0 + a21*b5 + a22*b10 + a23*b15 + a24*b20;
429 c[21] = a20*b1 + a21*b6 + a22*b11 + a23*b16 + a24*b21;
430 c[22] = a20*b2 + a21*b7 + a22*b12 + a23*b17 + a24*b22;
431 c[23] = a20*b3 + a21*b8 + a22*b13 + a23*b18 + a24*b23;
432 c[24] = a20*b4 + a21*b9 + a22*b14 + a23*b19 + a24*b24;
433}
434
448 const REAL *b,
449 REAL *c)
450{
451 const REAL a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4], a5 = a[5], a6 = a[6];
452 const REAL a7 = a[7], a8 = a[8], a9 = a[9], a10 = a[10], a11 = a[11], a12 = a[12], a13 = a[13];
453 const REAL a14 = a[14], a15 = a[15], a16 = a[16], a17 = a[17], a18 = a[18], a19 = a[19], a20 = a[20];
454 const REAL a21 = a[21], a22 = a[22], a23 = a[23], a24 = a[24], a25 = a[25], a26 = a[26], a27 = a[27];
455 const REAL a28 = a[28], a29 = a[29], a30 = a[30], a31 = a[31], a32 = a[32], a33 = a[33], a34 = a[34];
456 const REAL a35 = a[35], a36 = a[36], a37 = a[37], a38 = a[38], a39 = a[39], a40 = a[40], a41 = a[41];
457 const REAL a42 = a[42], a43 = a[43], a44 = a[44], a45 = a[45], a46 = a[46], a47 = a[47], a48 = a[48];
458
459 const REAL b0 = b[0], b1 = b[1], b2 = b[2], b3 = b[3], b4 = b[4], b5 = b[5], b6 = b[6];
460 const REAL b7 = b[7], b8 = b[8], b9 = b[9], b10 = b[10], b11 = b[11], b12 = b[12], b13 = b[13];
461 const REAL b14 = b[14], b15 = b[15], b16 = b[16], b17 = b[17], b18 = b[18], b19 = b[19], b20 = b[20];
462 const REAL b21 = b[21], b22 = b[22], b23 = b[23], b24 = b[24], b25 = b[25], b26 = b[26], b27 = b[27];
463 const REAL b28 = b[28], b29 = b[29], b30 = b[30], b31 = b[31], b32 = b[32], b33 = b[33], b34 = b[34];
464 const REAL b35 = b[35], b36 = b[36], b37 = b[37], b38 = b[38], b39 = b[39], b40 = b[40], b41 = b[41];
465 const REAL b42 = b[42], b43 = b[43], b44 = b[44], b45 = b[45], b46 = b[46], b47 = b[47], b48 = b[48];
466
467 c[0] = a0*b0 + a1*b7 + a2*b14 + a3*b21 + a4*b28 + a5*b35 + a6*b42;
468 c[1] = a0*b1 + a1*b8 + a2*b15 + a3*b22 + a4*b29 + a5*b36 + a6*b43;
469 c[2] = a0*b2 + a1*b9 + a2*b16 + a3*b23 + a4*b30 + a5*b37 + a6*b44;
470 c[3] = a0*b3 + a1*b10 + a2*b17 + a3*b24 + a4*b31 + a5*b38 + a6*b45;
471 c[4] = a0*b4 + a1*b11 + a2*b18 + a3*b25 + a4*b32 + a5*b39 + a6*b46;
472 c[5] = a0*b5 + a1*b12 + a2*b19 + a3*b26 + a4*b33 + a5*b40 + a6*b47;
473 c[6] = a0*b6 + a1*b13 + a2*b20 + a3*b27 + a4*b34 + a5*b41 + a6*b48;
474
475 c[7] = a7*b0 + a8*b7 + a9*b14 + a10*b21 + a11*b28 + a12*b35 + a13*b42;
476 c[8] = a7*b1 + a8*b8 + a9*b15 + a10*b22 + a11*b29 + a12*b36 + a13*b43;
477 c[9] = a7*b2 + a8*b9 + a9*b16 + a10*b23 + a11*b30 + a12*b37 + a13*b44;
478 c[10] = a7*b3 + a8*b10 + a9*b17 + a10*b24 + a11*b31 + a12*b38 + a13*b45;
479 c[11] = a7*b4 + a8*b11 + a9*b18 + a10*b25 + a11*b32 + a12*b39 + a13*b46;
480 c[12] = a7*b5 + a8*b12 + a9*b19 + a10*b26 + a11*b33 + a12*b40 + a13*b47;
481 c[13] = a7*b6 + a8*b13 + a9*b20 + a10*b27 + a11*b34 + a12*b41 + a13*b48;
482
483 c[14] = a14*b0 + a15*b7 + a16*b14 + a17*b21 + a18*b28 + a19*b35 + a20*b42;
484 c[15] = a14*b1 + a15*b8 + a16*b15 + a17*b22 + a18*b29 + a19*b36 + a20*b43;
485 c[16] = a14*b2 + a15*b9 + a16*b16 + a17*b23 + a18*b30 + a19*b37 + a20*b44;
486 c[17] = a14*b3 + a15*b10 + a16*b17 + a17*b24 + a18*b31 + a19*b38 + a20*b45;
487 c[18] = a14*b4 + a15*b11 + a16*b18 + a17*b25 + a18*b32 + a19*b39 + a20*b46;
488 c[19] = a14*b5 + a15*b12 + a16*b19 + a17*b26 + a18*b33 + a19*b40 + a20*b47;
489 c[20] = a14*b6 + a15*b13 + a16*b20 + a17*b27 + a18*b34 + a19*b41 + a20*b48;
490
491 c[21] = a21*b0 + a22*b7 + a23*b14 + a24*b21 + a25*b28 + a26*b35 + a27*b42;
492 c[22] = a21*b1 + a22*b8 + a23*b15 + a24*b22 + a25*b29 + a26*b36 + a27*b43;
493 c[23] = a21*b2 + a22*b9 + a23*b16 + a24*b23 + a25*b30 + a26*b37 + a27*b44;
494 c[24] = a21*b3 + a22*b10 + a23*b17 + a24*b24 + a25*b31 + a26*b38 + a27*b45;
495 c[25] = a21*b4 + a22*b11 + a23*b18 + a24*b25 + a25*b32 + a26*b39 + a27*b46;
496 c[26] = a21*b5 + a22*b12 + a23*b19 + a24*b26 + a25*b33 + a26*b40 + a27*b47;
497 c[27] = a21*b6 + a22*b13 + a23*b20 + a24*b27 + a25*b34 + a26*b41 + a27*b48;
498
499 c[28] = a28*b0 + a29*b7 + a30*b14 + a31*b21 + a32*b28 + a33*b35 + a34*b42;
500 c[29] = a28*b1 + a29*b8 + a30*b15 + a31*b22 + a32*b29 + a33*b36 + a34*b43;
501 c[30] = a28*b2 + a29*b9 + a30*b16 + a31*b23 + a32*b30 + a33*b37 + a34*b44;
502 c[31] = a28*b3 + a29*b10 + a30*b17 + a31*b24 + a32*b31 + a33*b38 + a34*b45;
503 c[32] = a28*b4 + a29*b11 + a30*b18 + a31*b25 + a32*b32 + a33*b39 + a34*b46;
504 c[33] = a28*b5 + a29*b12 + a30*b19 + a31*b26 + a32*b33 + a33*b40 + a34*b47;
505 c[34] = a28*b6 + a29*b13 + a30*b20 + a31*b27 + a32*b34 + a33*b41 + a34*b48;
506
507 c[35] = a35*b0 + a36*b7 + a37*b14 + a38*b21 + a39*b28 + a40*b35 + a41*b42;
508 c[36] = a35*b1 + a36*b8 + a37*b15 + a38*b22 + a39*b29 + a40*b36 + a41*b43;
509 c[37] = a35*b2 + a36*b9 + a37*b16 + a38*b23 + a39*b30 + a40*b37 + a41*b44;
510 c[38] = a35*b3 + a36*b10 + a37*b17 + a38*b24 + a39*b31 + a40*b38 + a41*b45;
511 c[39] = a35*b4 + a36*b11 + a37*b18 + a38*b25 + a39*b32 + a40*b39 + a41*b46;
512 c[40] = a35*b5 + a36*b12 + a37*b19 + a38*b26 + a39*b33 + a40*b40 + a41*b47;
513 c[41] = a35*b6 + a36*b13 + a37*b20 + a38*b27 + a39*b34 + a40*b41 + a41*b48;
514
515 c[42] = a42*b0 + a43*b7 + a44*b14 + a45*b21 + a46*b28 + a47*b35 + a48*b42;
516 c[43] = a42*b1 + a43*b8 + a44*b15 + a45*b22 + a46*b29 + a47*b36 + a48*b43;
517 c[44] = a42*b2 + a43*b9 + a44*b16 + a45*b23 + a46*b30 + a47*b37 + a48*b44;
518 c[45] = a42*b3 + a43*b10 + a44*b17 + a45*b24 + a46*b31 + a47*b38 + a48*b45;
519 c[46] = a42*b4 + a43*b11 + a44*b18 + a45*b25 + a46*b32 + a47*b39 + a48*b46;
520 c[47] = a42*b5 + a43*b12 + a44*b19 + a45*b26 + a46*b33 + a47*b40 + a48*b47;
521 c[48] = a42*b6 + a43*b13 + a44*b20 + a45*b27 + a46*b34 + a47*b41 + a48*b48;
522}
523
541 const REAL *b,
542 REAL *c,
543 const INT n)
544{
545
546 switch (n) {
547 case 2:
548 fasp_blas_smat_mul_nc2(a, b, c); break;
549
550 case 3:
551 fasp_blas_smat_mul_nc3(a, b, c); break;
552
553 case 5:
554 fasp_blas_smat_mul_nc5(a, b, c); break;
555
556 case 7:
557 fasp_blas_smat_mul_nc7(a, b, c); break;
558
559 default: {
560 const INT n2 = n*n;
561 INT i,j,k;
562 REAL temp;
563
564 for (i=0; i<n2; i+=n) {
565 for (j=0; j<n; ++j) {
566 temp = 0.0; // Fixed by Chensong. Feb/22/2011.
567 for (k=0; k<n; ++k) temp += a[i+k]*b[k*n+j];
568 c[i+j] = temp;
569 } // end for j
570 } // end for i
571 }
572 break;
573 }
574 return;
575}
576
590 const REAL *x,
591 REAL *y)
592{
593 const REAL x0 = x[0], x1 = x[1];
594
595 y[0] += A[0]*x0 + A[1]*x1;
596 y[1] += A[2]*x0 + A[3]*x1;
597
598 return;
599}
600
614 const REAL *x,
615 REAL *y)
616{
617 const REAL x0 = x[0], x1 = x[1], x2 = x[2];
618
619 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2;
620 y[1] += A[3]*x0 + A[4]*x1 + A[5]*x2;
621 y[2] += A[6]*x0 + A[7]*x1 + A[8]*x2;
622 return;
623}
624
638 const REAL *x,
639 REAL *y)
640{
641 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
642
643 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3;
644 y[1] += A[4]*x0 + A[5]*x1 + A[6]*x2 + A[7]*x3;
645 y[2] += A[8]*x0 + A[9]*x1 + A[10]*x2 + A[11]*x3;
646 y[3] += A[12]*x0 + A[13]*x1 +A[14]*x2 + A[15]*x3;
647 return;
648}
649
663 const REAL *x,
664 REAL *y)
665{
666 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
667
668 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4;
669 y[1] += A[5]*x0 + A[6]*x1 + A[7]*x2 + A[8]*x3 + A[9]*x4;
670 y[2] += A[10]*x0 + A[11]*x1 + A[12]*x2 + A[13]*x3 + A[14]*x4;
671 y[3] += A[15]*x0 + A[16]*x1 + A[17]*x2 + A[18]*x3 + A[19]*x4;
672 y[4] += A[20]*x0 + A[21]*x1 + A[22]*x2 + A[23]*x3 + A[24]*x4;
673 return;
674}
675
689 const REAL *x,
690 REAL *y)
691{
692 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
693 const REAL x4 = x[4], x5 = x[5], x6 = x[6];
694
695 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4 + A[5]*x5 + A[6]*x6;
696 y[1] += A[7]*x0 + A[8]*x1 + A[9]*x2 + A[10]*x3 + A[11]*x4 + A[12]*x5 + A[13]*x6;
697 y[2] += A[14]*x0 + A[15]*x1 + A[16]*x2 + A[17]*x3 + A[18]*x4 + A[19]*x5 + A[20]*x6;
698 y[3] += A[21]*x0 + A[22]*x1 + A[23]*x2 + A[24]*x3 + A[25]*x4 + A[26]*x5 + A[27]*x6;
699 y[4] += A[28]*x0 + A[29]*x1 + A[30]*x2 + A[31]*x3 + A[32]*x4 + A[33]*x5 + A[34]*x6;
700 y[5] += A[35]*x0 + A[36]*x1 + A[37]*x2 + A[38]*x3 + A[39]*x4 + A[40]*x5 + A[41]*x6;
701 y[6] += A[42]*x0 + A[43]*x1 + A[44]*x2 + A[45]*x3 + A[46]*x4 + A[47]*x5 + A[48]*x6;
702 return;
703}
704
721 const REAL *x,
722 REAL *y,
723 const INT n)
724{
725 switch (n) {
726 case 1:
727 {
728 y[0] += A[0]*x[0];
729 break;
730 }
731 case 2:
732 {
733 const REAL x0 = x[0], x1 = x[1];
734 y[0] += A[0]*x0 + A[1]*x1;
735 y[1] += A[2]*x0 + A[3]*x1;
736 break;
737 }
738 case 3:
739 {
740 const REAL x0 = x[0], x1 = x[1], x2 = x[2];
741 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2;
742 y[1] += A[3]*x0 + A[4]*x1 + A[5]*x2;
743 y[2] += A[6]*x0 + A[7]*x1 + A[8]*x2;
744 break;
745 }
746 case 4:
747 {
748 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
749 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3;
750 y[1] += A[4]*x0 + A[5]*x1 + A[6]*x2 + A[7]*x3;
751 y[2] += A[8]*x0 + A[9]*x1 + A[10]*x2 + A[11]*x3;
752 y[3] += A[12]*x0 + A[13]*x1 +A[14]*x2 + A[15]*x3;
753 break;
754 }
755 case 5:
756 {
757 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
758 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4;
759 y[1] += A[5]*x0 + A[6]*x1 + A[7]*x2 + A[8]*x3 + A[9]*x4;
760 y[2] += A[10]*x0 + A[11]*x1 + A[12]*x2 + A[13]*x3 + A[14]*x4;
761 y[3] += A[15]*x0 + A[16]*x1 + A[17]*x2 + A[18]*x3 + A[19]*x4;
762 y[4] += A[20]*x0 + A[21]*x1 + A[22]*x2 + A[23]*x3 + A[24]*x4;
763 break;
764 }
765 case 6:
766 {
767 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
768 const REAL x4 = x[4], x5 = x[5];
769 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4 + A[5]*x5;
770 y[1] += A[6]*x0 + A[7]*x1 + A[8]*x2 + A[9]*x3 + A[10]*x4 + A[11]*x5;
771 y[2] += A[12]*x0 + A[13]*x1 + A[14]*x2 + A[15]*x3 + A[16]*x4 + A[17]*x5;
772 y[3] += A[18]*x0 + A[19]*x1 + A[20]*x2 + A[21]*x3 + A[22]*x4 + A[23]*x5;
773 y[4] += A[24]*x0 + A[25]*x1 + A[26]*x2 + A[27]*x3 + A[28]*x4 + A[29]*x5;
774 y[5] += A[30]*x0 + A[31]*x1 + A[32]*x2 + A[33]*x3 + A[34]*x4 + A[35]*x5;
775 break;
776 }
777 case 7:
778 {
779 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
780 const REAL x4 = x[4], x5 = x[5], x6 = x[6];
781 y[0] += A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4 + A[5]*x5 + A[6]*x6;
782 y[1] += A[7]*x0 + A[8]*x1 + A[9]*x2 + A[10]*x3 + A[11]*x4 + A[12]*x5 + A[13]*x6;
783 y[2] += A[14]*x0 + A[15]*x1 + A[16]*x2 + A[17]*x3 + A[18]*x4 + A[19]*x5 + A[20]*x6;
784 y[3] += A[21]*x0 + A[22]*x1 + A[23]*x2 + A[24]*x3 + A[25]*x4 + A[26]*x5 + A[27]*x6;
785 y[4] += A[28]*x0 + A[29]*x1 + A[30]*x2 + A[31]*x3 + A[32]*x4 + A[33]*x5 + A[34]*x6;
786 y[5] += A[35]*x0 + A[36]*x1 + A[37]*x2 + A[38]*x3 + A[39]*x4 + A[40]*x5 + A[41]*x6;
787 y[6] += A[42]*x0 + A[43]*x1 + A[44]*x2 + A[45]*x3 + A[46]*x4 + A[47]*x5 + A[48]*x6;
788 break;
789 }
790 default: /* For everything beyond 7 */
791 {
792 INT i,j,k;
793
794 for ( k = i = 0; i < n; i++, k+=n ) {
795 for ( j = 0; j < n; j++ ) {
796 y[i] += A[k+j]*x[j];
797 }
798 }
799 break;
800 }
801 }
802
803 return;
804}
805
821 const REAL *x,
822 REAL *y)
823{
824 const REAL x0 = x[0], x1 = x[1];
825
826 y[0] -= A[0]*x0 + A[1]*x1;
827 y[1] -= A[2]*x0 + A[3]*x1;
828
829 return;
830}
831
847 const REAL *x,
848 REAL *y)
849{
850 const REAL x0 = x[0], x1 = x[1], x2 = x[2];
851
852 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2;
853 y[1] -= A[3]*x0 + A[4]*x1 + A[5]*x2;
854 y[2] -= A[6]*x0 + A[7]*x1 + A[8]*x2;
855
856 return;
857}
858
874 const REAL *x,
875 REAL *y)
876{
877 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
878
879 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3;
880 y[1] -= A[4]*x0 + A[5]*x1 + A[6]*x2 + A[7]*x3;
881 y[2] -= A[8]*x0 + A[9]*x1 + A[10]*x2 + A[11]*x3;
882 y[3] -= A[12]*x0 + A[13]*x1 +A[14]*x2 + A[15]*x3;
883 return;
884}
885
901 const REAL *x,
902 REAL *y)
903{
904 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
905
906 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4;
907 y[1] -= A[5]*x0 + A[6]*x1 + A[7]*x2 + A[8]*x3 + A[9]*x4;
908 y[2] -= A[10]*x0 + A[11]*x1 + A[12]*x2 + A[13]*x3 + A[14]*x4;
909 y[3] -= A[15]*x0 + A[16]*x1 + A[17]*x2 + A[18]*x3 + A[19]*x4;
910 y[4] -= A[20]*x0 + A[21]*x1 + A[22]*x2 + A[23]*x3 + A[24]*x4;
911
912 return;
913}
914
930 const REAL *x,
931 REAL *y)
932{
933 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
934 const REAL x4 = x[4], x5 = x[5], x6 = x[6];
935
936 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4 + A[5]*x5 + A[6]*x6;
937 y[1] -= A[7]*x0 + A[8]*x1 + A[9]*x2 + A[10]*x3 + A[11]*x4 + A[12]*x5 + A[13]*x6;
938 y[2] -= A[14]*x0 + A[15]*x1 + A[16]*x2 + A[17]*x3 + A[18]*x4 + A[19]*x5 + A[20]*x6;
939 y[3] -= A[21]*x0 + A[22]*x1 + A[23]*x2 + A[24]*x3 + A[25]*x4 + A[26]*x5 + A[27]*x6;
940 y[4] -= A[28]*x0 + A[29]*x1 + A[30]*x2 + A[31]*x3 + A[32]*x4 + A[33]*x5 + A[34]*x6;
941 y[5] -= A[35]*x0 + A[36]*x1 + A[37]*x2 + A[38]*x3 + A[39]*x4 + A[40]*x5 + A[41]*x6;
942 y[6] -= A[42]*x0 + A[43]*x1 + A[44]*x2 + A[45]*x3 + A[46]*x4 + A[47]*x5 + A[48]*x6;
943
944 return;
945}
946
963 const REAL *x,
964 REAL *y,
965 const INT n)
966{
967 switch (n) {
968 case 1:
969 {
970 y[0] -= A[0]*x[0];
971 break;
972 }
973 case 2:
974 {
975 const REAL x0 = x[0], x1 = x[1];
976 y[0] -= A[0]*x0 + A[1]*x1;
977 y[1] -= A[2]*x0 + A[3]*x1;
978 break;
979 }
980 case 3:
981 {
982 const REAL x0 = x[0], x1 = x[1], x2 = x[2];
983 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2;
984 y[1] -= A[3]*x0 + A[4]*x1 + A[5]*x2;
985 y[2] -= A[6]*x0 + A[7]*x1 + A[8]*x2;
986 break;
987 }
988 case 4:
989 {
990 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
991 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3;
992 y[1] -= A[4]*x0 + A[5]*x1 + A[6]*x2 + A[7]*x3;
993 y[2] -= A[8]*x0 + A[9]*x1 + A[10]*x2 + A[11]*x3;
994 y[3] -= A[12]*x0 + A[13]*x1 +A[14]*x2 + A[15]*x3;
995 break;
996 }
997 case 5:
998 {
999 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
1000 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4;
1001 y[1] -= A[5]*x0 + A[6]*x1 + A[7]*x2 + A[8]*x3 + A[9]*x4;
1002 y[2] -= A[10]*x0 + A[11]*x1 + A[12]*x2 + A[13]*x3 + A[14]*x4;
1003 y[3] -= A[15]*x0 + A[16]*x1 + A[17]*x2 + A[18]*x3 + A[19]*x4;
1004 y[4] -= A[20]*x0 + A[21]*x1 + A[22]*x2 + A[23]*x3 + A[24]*x4;
1005 break;
1006 }
1007 case 6:
1008 {
1009 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
1010 const REAL x4 = x[4], x5 = x[5];
1011 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4 + A[5]*x5;
1012 y[1] -= A[6]*x0 + A[7]*x1 + A[8]*x2 + A[9]*x3 + A[10]*x4 + A[11]*x5;
1013 y[2] -= A[12]*x0 + A[13]*x1 + A[14]*x2 + A[15]*x3 + A[16]*x4 + A[17]*x5;
1014 y[3] -= A[18]*x0 + A[19]*x1 + A[20]*x2 + A[21]*x3 + A[22]*x4 + A[23]*x5;
1015 y[4] -= A[24]*x0 + A[25]*x1 + A[26]*x2 + A[27]*x3 + A[28]*x4 + A[29]*x5;
1016 y[5] -= A[30]*x0 + A[31]*x1 + A[32]*x2 + A[33]*x3 + A[34]*x4 + A[35]*x5;
1017 break;
1018 }
1019 case 7:
1020 {
1021 const REAL x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
1022 const REAL x4 = x[4], x5 = x[5], x6 = x[6];
1023 y[0] -= A[0]*x0 + A[1]*x1 + A[2]*x2 + A[3]*x3 + A[4]*x4 + A[5]*x5 + A[6]*x6;
1024 y[1] -= A[7]*x0 + A[8]*x1 + A[9]*x2 + A[10]*x3 + A[11]*x4 + A[12]*x5 + A[13]*x6;
1025 y[2] -= A[14]*x0 + A[15]*x1 + A[16]*x2 + A[17]*x3 + A[18]*x4 + A[19]*x5 + A[20]*x6;
1026 y[3] -= A[21]*x0 + A[22]*x1 + A[23]*x2 + A[24]*x3 + A[25]*x4 + A[26]*x5 + A[27]*x6;
1027 y[4] -= A[28]*x0 + A[29]*x1 + A[30]*x2 + A[31]*x3 + A[32]*x4 + A[33]*x5 + A[34]*x6;
1028 y[5] -= A[35]*x0 + A[36]*x1 + A[37]*x2 + A[38]*x3 + A[39]*x4 + A[40]*x5 + A[41]*x6;
1029 y[6] -= A[42]*x0 + A[43]*x1 + A[44]*x2 + A[45]*x3 + A[46]*x4 + A[47]*x5 + A[48]*x6;
1030 break;
1031 }
1032 default: // Everything beyond 7
1033 {
1034 INT i,j,k;
1035
1036 for ( k = i = 0; i < n; i++, k+=n ) {
1037 for ( j = 0; j < n; j++ ) {
1038 y[i] -= A[k+j]*x[j];
1039 }
1040 }
1041 break;
1042 }
1043 }
1044
1045 return;
1046}
1047
1064void fasp_blas_smat_aAxpby (const REAL alpha,
1065 const REAL *A,
1066 const REAL *x,
1067 const REAL beta,
1068 REAL *y,
1069 const INT n)
1070{
1071 INT i,j,k;
1072 REAL tmp = 0.0;
1073
1074 if ( alpha == 0 ) {
1075 for (i = 0; i < n; i ++) y[i] *= beta;
1076 return;
1077 }
1078
1079 // y := (beta/alpha)y
1080 tmp = beta / alpha;
1081 if ( tmp != 1.0 ) {
1082 for (i = 0; i < n; i ++) y[i] *= tmp;
1083 }
1084
1085 // y := y + A*x
1086 for ( k = i = 0; i < n; i++, k+=n ) {
1087 for (j = 0; j < n; j ++) {
1088 y[i] += A[k+j]*x[j];
1089 }
1090 }
1091
1092 // y := alpha*y
1093 if ( alpha != 1.0 ) {
1094 for ( i = 0; i < n; i ++ ) y[i] *= alpha;
1095 }
1096}
1097
1098/*---------------------------------*/
1099/*-- End of File --*/
1100/*---------------------------------*/
void fasp_blas_smat_ypAx(const REAL *A, const REAL *x, REAL *y, const INT n)
Compute y := y + Ax, where 'A' is a n*n dense matrix.
Definition: BlaSmallMat.c:720
void fasp_blas_smat_axm(REAL *a, const INT n, const REAL alpha)
Compute a = alpha*a (in place)
Definition: BlaSmallMat.c:37
void fasp_blas_smat_mul_nc4(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 4*4 matrices a and b, stored in c.
Definition: BlaSmallMat.c:341
void fasp_blas_smat_ypAx_nc3(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 3*3 dense matrix.
Definition: BlaSmallMat.c:613
void fasp_blas_smat_ypAx_nc2(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 2*2 dense matrix.
Definition: BlaSmallMat.c:589
void fasp_blas_smat_mxv_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 7*7 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:188
void fasp_blas_smat_mul(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
Definition: BlaSmallMat.c:540
void fasp_blas_smat_mxv_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 5*5 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:162
void fasp_blas_smat_ymAx_nc3(const REAL *A, const REAL *x, REAL *y)
Compute y := y - Ax, where 'A' is a 3*3 dense matrix.
Definition: BlaSmallMat.c:846
void fasp_blas_smat_mxv_nc4(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 4*4 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:138
void fasp_blas_smat_ymAx_nc7(const REAL *A, const REAL *x, REAL *y)
Compute y := y - Ax, where 'A' is a 7*7 dense matrix.
Definition: BlaSmallMat.c:929
void fasp_blas_smat_aAxpby(const REAL alpha, const REAL *A, const REAL *x, const REAL beta, REAL *y, const INT n)
Compute y:=alpha*A*x + beta*y
Definition: BlaSmallMat.c:1064
void fasp_blas_smat_mxv(const REAL *a, const REAL *b, REAL *c, const INT n)
Compute the product of a small full matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:221
void fasp_blas_smat_add(const REAL *a, const REAL *b, const INT n, const REAL alpha, const REAL beta, REAL *c)
Compute c = alpha*a + beta*b.
Definition: BlaSmallMat.c:65
void fasp_blas_smat_ymAx(const REAL *A, const REAL *x, REAL *y, const INT n)
Compute y := y - Ax, where 'A' is a n*n dense matrix.
Definition: BlaSmallMat.c:962
void fasp_blas_smat_mul_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 2* matrices a and b, stored in c.
Definition: BlaSmallMat.c:275
void fasp_blas_smat_ypAx_nc4(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 4*4 dense matrix.
Definition: BlaSmallMat.c:637
void fasp_blas_smat_mul_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 3*3 matrices a and b, stored in c.
Definition: BlaSmallMat.c:304
void fasp_blas_smat_mul_nc7(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 7*7 matrices a and b, stored in c.
Definition: BlaSmallMat.c:447
void fasp_blas_smat_ypAx_nc5(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 5*5 dense matrix.
Definition: BlaSmallMat.c:662
void fasp_blas_smat_mxv_nc3(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 3*3 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:115
void fasp_blas_smat_ypAx_nc7(const REAL *A, const REAL *x, REAL *y)
Compute y := y + Ax, where 'A' is a 7*7 dense matrix.
Definition: BlaSmallMat.c:688
void fasp_blas_smat_mxv_nc2(const REAL *a, const REAL *b, REAL *c)
Compute the product of a 2*2 matrix a and a array b, stored in c.
Definition: BlaSmallMat.c:93
void fasp_blas_smat_ymAx_nc4(const REAL *A, const REAL *x, REAL *y)
Compute y := y - Ax, where 'A' is a 4*4 dense matrix.
Definition: BlaSmallMat.c:873
void fasp_blas_smat_mul_nc5(const REAL *a, const REAL *b, REAL *c)
Compute the matrix product of two 5*5 matrices a and b, stored in c.
Definition: BlaSmallMat.c:388
void fasp_blas_smat_ymAx_nc2(const REAL *A, const REAL *x, REAL *y)
Compute y := y - Ax, where 'A' is a 2*2 dense matrix.
Definition: BlaSmallMat.c:820
void fasp_blas_smat_ymAx_nc5(const REAL *A, const REAL *x, REAL *y)
Compute y := y - Ax, where 'A' is a 5*5 dense matrix.
Definition: BlaSmallMat.c:900
Main header file for the FASP project.
#define REAL
Definition: fasp.h:67
#define INT
Definition: fasp.h:64