From 464781235b4b4dced59ebc188863410d1412d1b7 Mon Sep 17 00:00:00 2001 From: Amrita H S Date: Wed, 10 Jun 2026 02:07:05 -0500 Subject: [PATCH] Power10: Fix __builtin_vsx_lxvp offset for Clang Use 0L instead of 0 for offset parameter in __builtin_vsx_lxvp to satisfy Clang compiler requirements. Signed-off-by: Amrita H S --- kernel/power/dgemm_kernel_power10.c | 36 +++--- kernel/power/dgemm_small_kernel_nn_power10.c | 4 +- kernel/power/dgemm_small_kernel_nt_power10.c | 6 +- kernel/power/dgemm_small_kernel_tt_power10.c | 6 +- kernel/power/dgemv_t_power10.c | 18 +-- kernel/power/zgemm_kernel_power10.c | 120 +++++++++---------- 6 files changed, 95 insertions(+), 95 deletions(-) diff --git a/kernel/power/dgemm_kernel_power10.c b/kernel/power/dgemm_kernel_power10.c index 6ec6ee51b2..4141fbcbc4 100644 --- a/kernel/power/dgemm_kernel_power10.c +++ b/kernel/power/dgemm_kernel_power10.c @@ -94,8 +94,8 @@ typedef FLOAT v4sf_t __attribute__ ((vector_size (16))); #endif #define KERNEL(i) \ rowA = (vec_t *)&AO[(i)<< 3];\ - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[(i) << 3])); \ - rowB1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[((i) << 3) + 4])); \ + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[(i) << 3])); \ + rowB1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[((i) << 3) + 4])); \ __builtin_mma_xvf64gerpp(&acc0, rowB, rowA[0]);\ __builtin_mma_xvf64gerpp(&acc1, rowB1, rowA[0]);\ __builtin_mma_xvf64gerpp(&acc2, rowB, rowA[1]);\ @@ -200,8 +200,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, BLASLONG l = 1; vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB, rowB1; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[0])); - rowB1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[4])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[0])); + rowB1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[4])); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]); @@ -283,16 +283,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, BLASLONG l = 0; vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB, rowB1; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[0])); - rowB1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[4])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[0])); + rowB1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[4])); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc3, rowB1, rowA[1]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 2]; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[l << 3])); - rowB1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[(l << 3) + 4])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[l << 3])); + rowB1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[(l << 3) + 4])); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]); @@ -323,14 +323,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, BLASLONG l = 0; vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB, rowB1; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[0])); - rowB1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[4])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[0])); + rowB1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[4])); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 1]; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[l << 3])); - rowB1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[(l << 3) + 4])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[l << 3])); + rowB1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[(l << 3) + 4])); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]); } @@ -428,14 +428,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, BLASLONG l = 0; vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[0])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[0])); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); __builtin_mma_xvf64ger (&acc2, rowB, rowA[2]); __builtin_mma_xvf64ger (&acc3, rowB, rowA[3]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 3]; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[l << 2])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[l << 2])); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); @@ -466,12 +466,12 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, BLASLONG l = 0; vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[0])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[0])); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); __builtin_mma_xvf64ger (&acc1, rowB, rowA[1]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 2]; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[l << 2])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[l << 2])); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); } @@ -498,11 +498,11 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B, BLASLONG l = 0; vec_t *rowA = (vec_t *) & AO[0]; __vector_pair rowB; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[0])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[0])); __builtin_mma_xvf64ger (&acc0, rowB, rowA[0]); for (l = 1; l < temp; l++) { rowA = (vec_t *) & AO[l << 1]; - rowB = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&BO[l << 2])); + rowB = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&BO[l << 2])); __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); } SAVE_ACC (&acc0, 0); diff --git a/kernel/power/dgemm_small_kernel_nn_power10.c b/kernel/power/dgemm_small_kernel_nn_power10.c index 59bee5fe5d..6a373a1fd8 100644 --- a/kernel/power/dgemm_small_kernel_nn_power10.c +++ b/kernel/power/dgemm_small_kernel_nn_power10.c @@ -314,8 +314,8 @@ typedef __vector unsigned char vec_t; *((__vector_pair *)(void *)(packB+(k*8)+4+offset)) = pb1; #define LOAD_PACKED_B(pb0, pb1, offset) \ - pb0 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)(packB+(k*8)+0+offset))); \ - pb1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)(packB+(k*8)+4+offset))); + pb0 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)(packB+(k*8)+0+offset))); \ + pb1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)(packB+(k*8)+4+offset))); #ifdef B0 int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, IFLOAT * A, BLASLONG lda, FLOAT alpha, IFLOAT * B, BLASLONG ldb, FLOAT * C, BLASLONG ldc) diff --git a/kernel/power/dgemm_small_kernel_nt_power10.c b/kernel/power/dgemm_small_kernel_nt_power10.c index 009f2dd6cd..b3a8b8f3c6 100644 --- a/kernel/power/dgemm_small_kernel_nt_power10.c +++ b/kernel/power/dgemm_small_kernel_nt_power10.c @@ -144,11 +144,11 @@ typedef __vector unsigned char vec_t; #define LOAD_A_1x1(K, M) ra0 = vec_splats(A[K*lda+M]); #define LOAD_BP_1x8(K, N) \ - pb0 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); \ - pb1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&B[((K)*ldb)+N+4])); + pb0 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); \ + pb1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&B[((K)*ldb)+N+4])); #define LOAD_BP_1x4(K, N) \ - pb0 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); + pb0 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); #define LOAD_BP_1x2(K, N) \ t0 = vec_xl(0, B+(K*ldb)+N); \ diff --git a/kernel/power/dgemm_small_kernel_tt_power10.c b/kernel/power/dgemm_small_kernel_tt_power10.c index 62e3b8b678..13e6b1cb2d 100644 --- a/kernel/power/dgemm_small_kernel_tt_power10.c +++ b/kernel/power/dgemm_small_kernel_tt_power10.c @@ -207,11 +207,11 @@ typedef __vector unsigned char vec_t; #define LOAD_A_1x1(M, K) ra0 = vec_splats(A[(M)*lda+K]); #define LOAD_BP_1x8(K, N) \ - pb0 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); \ - pb1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&B[((K)*ldb)+N+4])); + pb0 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); \ + pb1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&B[((K)*ldb)+N+4])); #define LOAD_BP_1x4(K, N) \ - pb0 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); + pb0 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&B[((K)*ldb)+N+0])); #define LOAD_BP_1x2(K, N) \ t0 = vec_xl(0, B+((K)*ldb)+N); \ diff --git a/kernel/power/dgemv_t_power10.c b/kernel/power/dgemv_t_power10.c index 9aaeec902f..099517b05a 100644 --- a/kernel/power/dgemv_t_power10.c +++ b/kernel/power/dgemv_t_power10.c @@ -61,37 +61,37 @@ static void dgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA a6 = a5 + lda; a7 = a6 + lda; for (i = 0; i < n/2; i += 2) { - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a0[i*2])); - vx = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&x[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a0[i*2])); + vx = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&x[i*2])); __builtin_vsx_disassemble_pair (res, &vx); __builtin_vsx_disassemble_pair (res1, &vp); temp0 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp0); temp0 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp0); - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a1[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a1[i*2])); __builtin_vsx_disassemble_pair (res1, &vp); temp1 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp1); temp1 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp1); - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a2[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a2[i*2])); __builtin_vsx_disassemble_pair (res1, &vp); temp2 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp2); temp2 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp2); - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a3[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a3[i*2])); __builtin_vsx_disassemble_pair (res1, &vp); temp3 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp3); temp3 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp3); - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a4[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a4[i*2])); __builtin_vsx_disassemble_pair (res1, &vp); temp4 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp4); temp4 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp4); - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a5[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a5[i*2])); __builtin_vsx_disassemble_pair (res1, &vp); temp5 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp5); temp5 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp5); - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a6[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a6[i*2])); __builtin_vsx_disassemble_pair (res1, &vp); temp6 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp6); temp6 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp6); - vp = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&a7[i*2])); + vp = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&a7[i*2])); __builtin_vsx_disassemble_pair (res1, &vp); temp7 = vec_madd ((__vector double)res[0], (__vector double)res1[0], temp7); temp7 = vec_madd ((__vector double)res[1], (__vector double)res1[1], temp7); diff --git a/kernel/power/zgemm_kernel_power10.c b/kernel/power/zgemm_kernel_power10.c index 8091418683..b141307bf2 100644 --- a/kernel/power/zgemm_kernel_power10.c +++ b/kernel/power/zgemm_kernel_power10.c @@ -316,10 +316,10 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<4])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+4])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+8])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+12])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<4])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+4])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+8])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+12])); vec_t rowB1 = *(vec_t *) & BO[l<<2]; vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); @@ -406,10 +406,10 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < (temp & (~1)); l+=2) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<3])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+4])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+8])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+12])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<3])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+4])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+8])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+12])); vec_t rowB1 = *(vec_t *) & BO[l<<2]; vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; vec_t rowB3 = *(vec_t *) & BO[(l<<2)+4]; @@ -425,8 +425,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * } for (l = (temp & (~1)); l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<3])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+4])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<3])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+4])); vec_t rowB1 = *(vec_t *) & BO[l<<2]; vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); @@ -454,10 +454,10 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < (temp & (~3)); l+=4) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<2])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+4])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+8])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+12])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<2])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+4])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+8])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+12])); vec_t rowB1 = *(vec_t *) & BO[l<<2]; vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; vec_t rowB3 = *(vec_t *) & BO[(l<<2)+4]; @@ -477,7 +477,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * } for (l = (temp & (~3)); l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<2])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<2])); vec_t rowB1 = *(vec_t *) & BO[l<<2]; vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); @@ -503,10 +503,10 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < (temp & (~3)); l+=4) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<1])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+2])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+4])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+6])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<1])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+2])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+4])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+6])); vec_t rowB1 = *(vec_t *) & BO[l<<2]; vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; vec_t rowB3 = *(vec_t *) & BO[(l<<2)+4]; @@ -526,7 +526,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * } for (l = (temp & (~3)); l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<1])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<1])); vec_t rowB1 = *(vec_t *) & BO[l<<2]; vec_t rowB2 = *(vec_t *) & BO[(l<<2)+2]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); @@ -564,14 +564,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < (temp & (~1)); l+=2) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<4])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+4])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+8])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+12])); - __vector_pair rowA5 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+16])); - __vector_pair rowA6 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+20])); - __vector_pair rowA7 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+24])); - __vector_pair rowA8 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+28])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<4])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+4])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+8])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+12])); + __vector_pair rowA5 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+16])); + __vector_pair rowA6 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+20])); + __vector_pair rowA7 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+24])); + __vector_pair rowA8 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+28])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); @@ -585,10 +585,10 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * } for (l = (temp & (~1)); l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<4])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+4])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+8])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<4)+12])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<4])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+4])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+8])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<4)+12])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); @@ -615,14 +615,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < (temp & (~3)); l+=4) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<3])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+4])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+8])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+12])); - __vector_pair rowA5 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+16])); - __vector_pair rowA6 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+20])); - __vector_pair rowA7 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+24])); - __vector_pair rowA8 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+28])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<3])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+4])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+8])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+12])); + __vector_pair rowA5 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+16])); + __vector_pair rowA6 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+20])); + __vector_pair rowA7 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+24])); + __vector_pair rowA8 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+28])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; vec_t rowB3 = *(vec_t *) & BO[(l<<1)+4]; @@ -638,8 +638,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * } for (l = (temp & (~3)); l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<3])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<3)+4])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<3])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<3)+4])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); __builtin_mma_xvf64gerpp(&acc1, rowA2, rowB1); @@ -662,14 +662,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < (temp & (~7)); l+=8) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<2])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+4])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+8])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+12])); - __vector_pair rowA5 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+16])); - __vector_pair rowA6 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+20])); - __vector_pair rowA7 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+24])); - __vector_pair rowA8 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<2)+28])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<2])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+4])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+8])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+12])); + __vector_pair rowA5 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+16])); + __vector_pair rowA6 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+20])); + __vector_pair rowA7 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+24])); + __vector_pair rowA8 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<2)+28])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; vec_t rowB3 = *(vec_t *) & BO[(l<<1)+4]; @@ -689,7 +689,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * } for (l = (temp & (~7)); l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<2])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<2])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); } @@ -713,14 +713,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * SET_ACC_ZERO() for (l = 0; l < (temp & (~7)); l+=8) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<1])); - __vector_pair rowA2 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+2])); - __vector_pair rowA3 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+4])); - __vector_pair rowA4 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+6])); - __vector_pair rowA5 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+8])); - __vector_pair rowA6 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+10])); - __vector_pair rowA7 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+12])); - __vector_pair rowA8 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[(l<<1)+14])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<1])); + __vector_pair rowA2 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+2])); + __vector_pair rowA3 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+4])); + __vector_pair rowA4 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+6])); + __vector_pair rowA5 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+8])); + __vector_pair rowA6 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+10])); + __vector_pair rowA7 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+12])); + __vector_pair rowA8 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[(l<<1)+14])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; vec_t rowB2 = *(vec_t *) & BO[(l<<1)+2]; vec_t rowB3 = *(vec_t *) & BO[(l<<1)+4]; @@ -740,7 +740,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT * } for (l = (temp & (~7)); l < temp; ++l) { - __vector_pair rowA1 = __builtin_vsx_lxvp(0, (__vector_pair *)((void *)&AO[l<<1])); + __vector_pair rowA1 = __builtin_vsx_lxvp(0L, (__vector_pair *)((void *)&AO[l<<1])); vec_t rowB1 = *(vec_t *) & BO[l<<1]; __builtin_mma_xvf64gerpp(&acc0, rowA1, rowB1); }