gcc/testsuite/gcc.dg/vmx/dct.c

   1 /* { dg-do compile } */
   2 #include <altivec.h>
   3
   4 inline void
   5 transpose_vmx (vector signed short *input, vector signed short *output)
   6 {
   7   vector signed short v0, v1, v2, v3, v4, v5, v6, v7;
   8   vector signed short x0, x1, x2, x3, x4, x5, x6, x7;
   9
  10   /* Matrix transpose */
  11   v0 = vec_mergeh (input[0], input[4]);
  12   v1 = vec_mergel (input[0], input[4]);
  13   v2 = vec_mergeh (input[1], input[5]);
  14   v3 = vec_mergel (input[1], input[5]);
  15   v4 = vec_mergeh (input[2], input[6]);
  16   v5 = vec_mergel (input[2], input[6]);
  17   v6 = vec_mergeh (input[3], input[7]);
  18   v7 = vec_mergel (input[3], input[7]);
  19
  20   x0 = vec_mergeh (v0, v4);
  21   x1 = vec_mergel (v0, v4);
  22   x2 = vec_mergeh (v1, v5);
  23   x3 = vec_mergel (v1, v5);
  24   x4 = vec_mergeh (v2, v6);
  25   x5 = vec_mergel (v2, v6);
  26   x6 = vec_mergeh (v3, v7);
  27   x7 = vec_mergel (v3, v7);
  28
  29   output[0] = vec_mergeh (x0, x4);
  30   output[1] = vec_mergel (x0, x4);
  31   output[2] = vec_mergeh (x1, x5);
  32   output[3] = vec_mergel (x1, x5);
  33   output[4] = vec_mergeh (x2, x6);
  34   output[5] = vec_mergel (x2, x6);
  35   output[6] = vec_mergeh (x3, x7);
  36   output[7] = vec_mergel (x3, x7);
  37 }
  38
  39 void
  40 dct_vmx (vector signed short *input, vector signed short *output,
  41          vector signed short *postscale)
  42 {
  43   vector signed short mul0, mul1, mul2, mul3, mul4, mul5, mul6, mul;
  44   vector signed short v0, v1, v2, v3, v4, v5, v6, v7, v8, v9;
  45   vector signed short v20, v21, v22, v23, v24, v25, v26, v27, v31;
  46   int i;
  47   vector signed short in[8], out[8];
  48
  49   /* Load first eight rows of input data */
  50
  51   /* Load multiplication constants */
  52
  53   /* Splat multiplication constants */
  54   mul0 = vec_splat(input[8],0);
  55   mul1 = vec_splat(input[8],1);
  56   mul2 = vec_splat(input[8],2);
  57   mul3 = vec_splat(input[8],3);
  58   mul4 = vec_splat(input[8],4);
  59   mul5 = vec_splat(input[8],5);
  60   mul6 = vec_splat(input[8],6);
  61
  62   /* Perform DCT on the eight columns */
  63
  64   /*********** Stage 1 ***********/
  65
  66   v8 = vec_adds (input[0], input[7]);
  67   v9 = vec_subs (input[0], input[7]);
  68   v0 = vec_adds (input[1], input[6]);
  69   v7 = vec_subs (input[1], input[6]);
  70   v1 = vec_adds (input[2], input[5]);
  71   v6 = vec_subs (input[2], input[5]);
  72   v2 = vec_adds (input[3], input[4]);
  73   v5 = vec_subs (input[3], input[4]);
  74
  75   /*********** Stage 2 ***********/
  76
  77   /* Top */
  78   v3 = vec_adds (v8, v2);               /* (V0+V7) + (V3+V4) */
  79   v4 = vec_subs (v8, v2);               /* (V0+V7) - (V3+V4) */
  80   v2 = vec_adds (v0, v1);               /* (V1+V6) + (V2+V5) */
  81   v8 = vec_subs (v0, v1);               /* (V1+V6) - (V2+V5) */
  82
  83   /* Bottom */
  84   v0 = vec_subs (v7, v6);               /* (V1-V6) - (V2-V5) */
  85   v1 = vec_adds (v7, v6);               /* (V1-V6) + (V2-V5) */
  86
  87   /*********** Stage 3 ***********/
  88
  89   /* Top */
  90   in[0] = vec_adds (v3, v2);            /* y0 = v3 + v2 */
  91   in[4] = vec_subs (v3, v2);            /* y4 = v3 - v2 */
  92   in[2] = vec_mradds (v8, mul2, v4);    /* y2 = v8 * a0 + v4 */
  93   v6 = vec_mradds (v4, mul2, mul6);
  94   in[6] = vec_subs (v6, v8);            /* y6 = v4 * a0 - v8 */
  95
  96   /* Bottom */
  97   v6 = vec_mradds (v0, mul0, v5);       /* v6 = v0 * (c4) + v5 */
  98   v7 = vec_mradds (v0, mul4, v5);       /* v7 = v0 * (-c4) + v5 */
  99   v2 = vec_mradds (v1, mul4, v9);       /* v2 = v1 * (-c4) + v9 */
 100   v3 = vec_mradds (v1, mul0, v9);       /* v3 = v1 * (c4) + v9 */
 101
 102   /*********** Stage 4 ***********/
 103
 104   /* Bottom */
 105   in[1] = vec_mradds (v6, mul3, v3);    /* y1 = v6 * (a1) + v3 */
 106   v23 = vec_mradds (v3, mul3, mul6);
 107   in[7] = vec_subs (v23, v6);           /* y7 = v3 * (a1) - v6 */
 108   in[5] = vec_mradds (v2, mul1, v7);    /* y5 = v2 * (a2) + v7 */
 109   in[3] = vec_mradds (v7, mul5, v2);    /* y3 = v7 * (-a2) + v2 */
 110
 111   transpose_vmx (in, out);
 112
 113   /* Perform DCT on the eight rows */
 114
 115   /*********** Stage 1 ***********/
 116
 117   v8 = vec_adds (out[0], out[7]);
 118   v9 = vec_subs (out[0], out[7]);
 119   v0 = vec_adds (out[1], out[6]);
 120   v7 = vec_subs (out[1], out[6]);
 121   v1 = vec_adds (out[2], out[5]);
 122   v6 = vec_subs (out[2], out[5]);
 123   v2 = vec_adds (out[3], out[4]);
 124   v5 = vec_subs (out[3], out[4]);
 125
 126   /*********** Stage 2 ***********/
 127
 128   /* Top */
 129   v3 = vec_adds (v8, v2);               /* (V0+V7) + (V3+V4) */
 130   v4 = vec_subs (v8, v2);               /* (V0+V7) - (V3+V4) */
 131   v2 = vec_adds (v0, v1);               /* (V1+V6) + (V2+V5) */
 132   v8 = vec_subs (v0, v1);               /* (V1+V6) - (V2+V5) */
 133
 134   /* Bottom */
 135   v0 = vec_subs (v7, v6);               /* (V1-V6) - (V2-V5) */
 136   v1 = vec_adds (v7, v6);               /* (V1-V6) + (V2-V5) */
 137
 138   /*********** Stage 3 ***********/
 139
 140   /* Top */
 141   v25 = vec_subs (v25, v25);          /* reinit v25 = 0 */
 142
 143   v20 = vec_adds (v3, v2);              /* y0 = v3 + v2 */
 144   v24 = vec_subs (v3, v2);              /* y4 = v3 - v2 */
 145   v22 = vec_mradds (v8, mul2, v4);      /* y2 = v8 * a0 + v4 */
 146   v6 = vec_mradds (v4, mul2, v25);
 147   v26 = vec_subs (v6, v8);              /* y6 = v4 * a0 - v8 */
 148
 149   /* Bottom */
 150   v6 = vec_mradds (v0, mul0, v5);       /* v6 = v0 * (c4) + v5 */
 151   v7 = vec_mradds (v0, mul4, v5);       /* v7 = v0 * (-c4) + v5 */
 152   v2 = vec_mradds (v1, mul4, v9);       /* v2 = v1 * (-c4) + v9 */
 153   v3 = vec_mradds (v1, mul0, v9);       /* v3 = v1 * (c4) + v9 */
 154
 155   /*********** Stage 4 ***********/
 156
 157   /* Bottom */
 158   v21 = vec_mradds (v6, mul3, v3);      /* y1 = v6 * (a1) + v3 */
 159   v23 = vec_mradds (v3, mul3, v25);
 160   v27 = vec_subs (v23, v6);             /* y7 = v3 * (a1) - v6 */
 161   v25 = vec_mradds (v2, mul1, v7);      /* y5 = v2 * (a2) + v7 */
 162   v23 = vec_mradds (v7, mul5, v2);      /* y3 = v7 * (-a2) + v2 */
 163
 164   /* Post-scale and store reults */
 165
 166   v31 = vec_subs (v31, v31);          /* reinit v25 = 0 */
 167
 168   output[0] = vec_mradds (postscale[0], v20, v31);
 169   output[2] = vec_mradds (postscale[2], v22, v31);
 170   output[4] = vec_mradds (postscale[4], v24, v31);
 171   output[6] = vec_mradds (postscale[6], v26, v31);
 172   output[1] = vec_mradds (postscale[1], v21, v31);
 173   output[3] = vec_mradds (postscale[3], v23, v31);
 174   output[5] = vec_mradds (postscale[5], v25, v31);
 175   output[7] = vec_mradds (postscale[7], v27, v31);
 176 }