third_party/libwebp/dsp/dec_mips32.c

   1 // Copyright 2014 Google Inc. All Rights Reserved.
   2 //
   3 // Use of this source code is governed by a BSD-style license
   4 // that can be found in the COPYING file in the root of the source
   5 // tree. An additional intellectual property rights grant can be found
   6 // in the file PATENTS. All contributing project authors may
   7 // be found in the AUTHORS file in the root of the source tree.
   8 // -----------------------------------------------------------------------------
   9 //
  10 // MIPS version of dsp functions
  11 //
  12 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
  13 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
  14
  15 #include "./dsp.h"
  16
  17 #if defined(WEBP_USE_MIPS32)
  18
  19 static const int kC1 = 20091 + (1 << 16);
  20 static const int kC2 = 35468;
  21
  22 static WEBP_INLINE int abs_mips32(int x) {
  23   const int sign = x >> 31;
  24   return (x ^ sign) - sign;
  25 }
  26
  27 // 4 pixels in, 2 pixels out
  28 static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
  29   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  30   const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
  31   const int a1 = VP8ksclip2[(a + 4) >> 3];
  32   const int a2 = VP8ksclip2[(a + 3) >> 3];
  33   p[-step] = VP8kclip1[p0 + a2];
  34   p[    0] = VP8kclip1[q0 - a1];
  35 }
  36
  37 // 4 pixels in, 4 pixels out
  38 static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
  39   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  40   const int a = 3 * (q0 - p0);
  41   const int a1 = VP8ksclip2[(a + 4) >> 3];
  42   const int a2 = VP8ksclip2[(a + 3) >> 3];
  43   const int a3 = (a1 + 1) >> 1;
  44   p[-2 * step] = VP8kclip1[p1 + a3];
  45   p[-    step] = VP8kclip1[p0 + a2];
  46   p[        0] = VP8kclip1[q0 - a1];
  47   p[     step] = VP8kclip1[q1 - a3];
  48 }
  49
  50 // 6 pixels in, 6 pixels out
  51 static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
  52   const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
  53   const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
  54   const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
  55   const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
  56   const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
  57   const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
  58   p[-3 * step] = VP8kclip1[p2 + a3];
  59   p[-2 * step] = VP8kclip1[p1 + a2];
  60   p[-    step] = VP8kclip1[p0 + a1];
  61   p[        0] = VP8kclip1[q0 - a1];
  62   p[     step] = VP8kclip1[q1 - a2];
  63   p[ 2 * step] = VP8kclip1[q2 - a3];
  64 }
  65
  66 static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
  67   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  68   return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
  69 }
  70
  71 static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
  72   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
  73   return ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) <= thresh);
  74 }
  75
  76 static WEBP_INLINE int needs_filter2(const uint8_t* p,
  77                                      int step, int t, int it) {
  78   const int p3 = p[-4 * step], p2 = p[-3 * step];
  79   const int p1 = p[-2 * step], p0 = p[-step];
  80   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
  81   if ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) > t) {
  82     return 0;
  83   }
  84   return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
  85          abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
  86          abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
  87 }
  88
  89 static WEBP_INLINE void FilterLoop26(uint8_t* p,
  90                                      int hstride, int vstride, int size,
  91                                      int thresh, int ithresh, int hev_thresh) {
  92   while (size-- > 0) {
  93     if (needs_filter2(p, hstride, thresh, ithresh)) {
  94       if (hev(p, hstride, hev_thresh)) {
  95         do_filter2(p, hstride);
  96       } else {
  97         do_filter6(p, hstride);
  98       }
  99     }
 100     p += vstride;
 101   }
 102 }
 103
 104 static WEBP_INLINE void FilterLoop24(uint8_t* p,
 105                                      int hstride, int vstride, int size,
 106                                      int thresh, int ithresh, int hev_thresh) {
 107   while (size-- > 0) {
 108     if (needs_filter2(p, hstride, thresh, ithresh)) {
 109       if (hev(p, hstride, hev_thresh)) {
 110         do_filter2(p, hstride);
 111       } else {
 112         do_filter4(p, hstride);
 113       }
 114     }
 115     p += vstride;
 116   }
 117 }
 118
 119 // on macroblock edges
 120 static void VFilter16(uint8_t* p, int stride,
 121                       int thresh, int ithresh, int hev_thresh) {
 122   FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 123 }
 124
 125 static void HFilter16(uint8_t* p, int stride,
 126                       int thresh, int ithresh, int hev_thresh) {
 127   FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 128 }
 129
 130 // 8-pixels wide variant, for chroma filtering
 131 static void VFilter8(uint8_t* u, uint8_t* v, int stride,
 132                      int thresh, int ithresh, int hev_thresh) {
 133   FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
 134   FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
 135 }
 136
 137 static void HFilter8(uint8_t* u, uint8_t* v, int stride,
 138                      int thresh, int ithresh, int hev_thresh) {
 139   FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
 140   FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
 141 }
 142
 143 static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
 144                       int thresh, int ithresh, int hev_thresh) {
 145   FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
 146   FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
 147 }
 148
 149 static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
 150                       int thresh, int ithresh, int hev_thresh) {
 151   FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
 152   FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
 153 }
 154
 155 // on three inner edges
 156 static void VFilter16i(uint8_t* p, int stride,
 157                        int thresh, int ithresh, int hev_thresh) {
 158   int k;
 159   for (k = 3; k > 0; --k) {
 160     p += 4 * stride;
 161     FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
 162   }
 163 }
 164
 165 static void HFilter16i(uint8_t* p, int stride,
 166                        int thresh, int ithresh, int hev_thresh) {
 167   int k;
 168   for (k = 3; k > 0; --k) {
 169     p += 4;
 170     FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
 171   }
 172 }
 173
 174 //------------------------------------------------------------------------------
 175 // Simple In-loop filtering (Paragraph 15.2)
 176
 177 static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
 178   int i;
 179   for (i = 0; i < 16; ++i) {
 180     if (needs_filter(p + i, stride, thresh)) {
 181       do_filter2(p + i, stride);
 182     }
 183   }
 184 }
 185
 186 static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
 187   int i;
 188   for (i = 0; i < 16; ++i) {
 189     if (needs_filter(p + i * stride, 1, thresh)) {
 190       do_filter2(p + i * stride, 1);
 191     }
 192   }
 193 }
 194
 195 static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
 196   int k;
 197   for (k = 3; k > 0; --k) {
 198     p += 4 * stride;
 199     SimpleVFilter16(p, stride, thresh);
 200   }
 201 }
 202
 203 static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
 204   int k;
 205   for (k = 3; k > 0; --k) {
 206     p += 4;
 207     SimpleHFilter16(p, stride, thresh);
 208   }
 209 }
 210
 211 static void TransformOne(const int16_t* in, uint8_t* dst) {
 212   int temp0, temp1, temp2, temp3, temp4;
 213   int temp5, temp6, temp7, temp8, temp9;
 214   int temp10, temp11, temp12, temp13, temp14;
 215   int temp15, temp16, temp17, temp18;
 216   int16_t* p_in = (int16_t*)in;
 217
 218   // loops unrolled and merged to avoid usage of tmp buffer
 219   // and to reduce number of stalls. MUL macro is written
 220   // in assembler and inlined
 221   __asm__ volatile(
 222     "lh       %[temp0],  0(%[in])                      \n\t"
 223     "lh       %[temp8],  16(%[in])                     \n\t"
 224     "lh       %[temp4],  8(%[in])                      \n\t"
 225     "lh       %[temp12], 24(%[in])                     \n\t"
 226     "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
 227     "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
 228     "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
 229     "mul      %[temp17], %[temp12], %[kC1]             \n\t"
 230     "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
 231     "mul      %[temp12], %[temp12], %[kC2]             \n\t"
 232     "lh       %[temp1],  2(%[in])                      \n\t"
 233     "lh       %[temp5],  10(%[in])                     \n\t"
 234     "lh       %[temp9],  18(%[in])                     \n\t"
 235     "lh       %[temp13], 26(%[in])                     \n\t"
 236     "sra      %[temp8],  %[temp8],  16                 \n\t"
 237     "sra      %[temp17], %[temp17], 16                 \n\t"
 238     "sra      %[temp4],  %[temp4],  16                 \n\t"
 239     "sra      %[temp12], %[temp12], 16                 \n\t"
 240     "lh       %[temp2],  4(%[in])                      \n\t"
 241     "lh       %[temp6],  12(%[in])                     \n\t"
 242     "lh       %[temp10], 20(%[in])                     \n\t"
 243     "lh       %[temp14], 28(%[in])                     \n\t"
 244     "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
 245     "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
 246     "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
 247     "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
 248     "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
 249     "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
 250     "lh       %[temp3],  6(%[in])                      \n\t"
 251     "lh       %[temp7],  14(%[in])                     \n\t"
 252     "lh       %[temp11], 22(%[in])                     \n\t"
 253     "lh       %[temp15], 30(%[in])                     \n\t"
 254     "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
 255     "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
 256     "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
 257     "mul      %[temp17], %[temp13], %[kC1]             \n\t"
 258     "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
 259     "mul      %[temp13], %[temp13], %[kC2]             \n\t"
 260     "sra      %[temp9],  %[temp9],  16                 \n\t"
 261     "sra      %[temp17], %[temp17], 16                 \n\t"
 262     "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
 263     "sra      %[temp5],  %[temp5],  16                 \n\t"
 264     "sra      %[temp13], %[temp13], 16                 \n\t"
 265     "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
 266     "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
 267     "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
 268     "mul      %[temp17], %[temp14], %[kC1]             \n\t"
 269     "mul      %[temp14], %[temp14], %[kC2]             \n\t"
 270     "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
 271     "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
 272     "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
 273     "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
 274     "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
 275     "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
 276     "sra      %[temp17], %[temp17], 16                 \n\t"
 277     "sra      %[temp14], %[temp14], 16                 \n\t"
 278     "sra      %[temp10], %[temp10], 16                 \n\t"
 279     "sra      %[temp6],  %[temp6],  16                 \n\t"
 280     "subu     %[temp17], %[temp10], %[temp17]          \n\t"
 281     "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
 282     "addu     %[temp10], %[temp16], %[temp6]           \n\t"
 283     "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
 284     "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
 285     "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
 286     "mul      %[temp17], %[temp15], %[kC1]             \n\t"
 287     "mul      %[temp15], %[temp15], %[kC2]             \n\t"
 288     "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
 289     "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
 290     "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
 291     "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
 292     "addiu    %[temp8],  %[temp8],  4                  \n\t"
 293     "addiu    %[temp12], %[temp12], 4                  \n\t"
 294     "addiu    %[temp0],  %[temp0],  4                  \n\t"
 295     "addiu    %[temp4],  %[temp4],  4                  \n\t"
 296     "sra      %[temp17], %[temp17], 16                 \n\t"
 297     "sra      %[temp15], %[temp15], 16                 \n\t"
 298     "sra      %[temp11], %[temp11], 16                 \n\t"
 299     "sra      %[temp7],  %[temp7],  16                 \n\t"
 300     "subu     %[temp17], %[temp11], %[temp17]          \n\t"
 301     "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
 302     "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
 303     "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
 304     "addu     %[temp11], %[temp16], %[temp7]           \n\t"
 305     "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
 306     "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
 307     "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
 308     "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
 309     "mul      %[temp17], %[temp11], %[kC1]             \n\t"
 310     "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
 311     "mul      %[temp11], %[temp11], %[kC2]             \n\t"
 312     "sra      %[temp10], %[temp10], 16                 \n\t"
 313     "sra      %[temp17], %[temp17], 16                 \n\t"
 314     "sra      %[temp9],  %[temp9],  16                 \n\t"
 315     "sra      %[temp11], %[temp11], 16                 \n\t"
 316     "subu     %[temp17], %[temp10], %[temp17]          \n\t"
 317     "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
 318     "addu     %[temp10], %[temp12], %[temp14]          \n\t"
 319     "subu     %[temp12], %[temp12], %[temp14]          \n\t"
 320     "mul      %[temp14], %[temp13], %[kC2]             \n\t"
 321     "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
 322     "mul      %[temp13], %[temp13], %[kC1]             \n\t"
 323     "mul      %[temp15], %[temp15], %[kC2]             \n\t"
 324     "sra      %[temp14], %[temp14], 16                 \n\t"
 325     "sra      %[temp9],  %[temp9],  16                 \n\t"
 326     "sra      %[temp13], %[temp13], 16                 \n\t"
 327     "sra      %[temp15], %[temp15], 16                 \n\t"
 328     "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
 329     "addu     %[temp15], %[temp13], %[temp15]          \n\t"
 330     "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
 331     "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
 332     "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
 333     "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
 334     "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
 335     "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
 336     "sra      %[temp2],  %[temp2],  16                 \n\t"
 337     "sra      %[temp13], %[temp13], 16                 \n\t"
 338     "sra      %[temp1],  %[temp1],  16                 \n\t"
 339     "sra      %[temp3],  %[temp3],  16                 \n\t"
 340     "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
 341     "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
 342     "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
 343     "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
 344     "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
 345     "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
 346     "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
 347     "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
 348     "sra      %[temp6],  %[temp6],  16                 \n\t"
 349     "sra      %[temp1],  %[temp1],  16                 \n\t"
 350     "sra      %[temp5],  %[temp5],  16                 \n\t"
 351     "sra      %[temp7],  %[temp7],  16                 \n\t"
 352     "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
 353     "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
 354     "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
 355     "subu     %[temp16], %[temp16], %[temp11]          \n\t"
 356     "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
 357     "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
 358     "sra      %[temp5],  %[temp5],  3                  \n\t"
 359     "sra      %[temp16], %[temp16], 3                  \n\t"
 360     "sra      %[temp11], %[temp11], 3                  \n\t"
 361     "sra      %[temp8],  %[temp8],  3                  \n\t"
 362     "addu     %[temp17], %[temp10], %[temp15]          \n\t"
 363     "subu     %[temp10], %[temp10], %[temp15]          \n\t"
 364     "addu     %[temp15], %[temp12], %[temp9]           \n\t"
 365     "subu     %[temp12], %[temp12], %[temp9]           \n\t"
 366     "sra      %[temp17], %[temp17], 3                  \n\t"
 367     "sra      %[temp10], %[temp10], 3                  \n\t"
 368     "sra      %[temp15], %[temp15], 3                  \n\t"
 369     "sra      %[temp12], %[temp12], 3                  \n\t"
 370     "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
 371     "subu     %[temp14], %[temp14], %[temp3]           \n\t"
 372     "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
 373     "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
 374     "sra      %[temp9],  %[temp9],  3                  \n\t"
 375     "sra      %[temp14], %[temp14], 3                  \n\t"
 376     "sra      %[temp3],  %[temp3],  3                  \n\t"
 377     "sra      %[temp0],  %[temp0],  3                  \n\t"
 378     "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
 379     "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
 380     "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
 381     "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
 382     "sra      %[temp13], %[temp13], 3                  \n\t"
 383     "sra      %[temp2],  %[temp2],  3                  \n\t"
 384     "sra      %[temp7],  %[temp7],  3                  \n\t"
 385     "sra      %[temp4],  %[temp4],  3                  \n\t"
 386     "addiu    %[temp6],  $zero,     255                \n\t"
 387     "lbu      %[temp1],  0(%[dst])                     \n\t"
 388     "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
 389     "sra      %[temp5],  %[temp1],  8                  \n\t"
 390     "sra      %[temp18], %[temp1],  31                 \n\t"
 391     "beqz     %[temp5],  1f                            \n\t"
 392     "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
 393     "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
 394   "1:                                                  \n\t"
 395     "lbu      %[temp18], 1(%[dst])                     \n\t"
 396     "sb       %[temp1],  0(%[dst])                     \n\t"
 397     "addu     %[temp18], %[temp18], %[temp11]          \n\t"
 398     "sra      %[temp11], %[temp18], 8                  \n\t"
 399     "sra      %[temp1],  %[temp18], 31                 \n\t"
 400     "beqz     %[temp11], 2f                            \n\t"
 401     "xor      %[temp18], %[temp18], %[temp18]          \n\t"
 402     "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
 403   "2:                                                  \n\t"
 404     "lbu      %[temp1],  2(%[dst])                     \n\t"
 405     "sb       %[temp18], 1(%[dst])                     \n\t"
 406     "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
 407     "sra      %[temp8],  %[temp1],  8                  \n\t"
 408     "sra      %[temp18], %[temp1],  31                 \n\t"
 409     "beqz     %[temp8],  3f                            \n\t"
 410     "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
 411     "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
 412   "3:                                                  \n\t"
 413     "lbu      %[temp18], 3(%[dst])                     \n\t"
 414     "sb       %[temp1],  2(%[dst])                     \n\t"
 415     "addu     %[temp18], %[temp18], %[temp16]          \n\t"
 416     "sra      %[temp16], %[temp18], 8                  \n\t"
 417     "sra      %[temp1],  %[temp18], 31                 \n\t"
 418     "beqz     %[temp16], 4f                            \n\t"
 419     "xor      %[temp18], %[temp18], %[temp18]          \n\t"
 420     "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
 421   "4:                                                  \n\t"
 422     "sb       %[temp18], 3(%[dst])                     \n\t"
 423     "lbu      %[temp5],  32(%[dst])                    \n\t"
 424     "lbu      %[temp8],  33(%[dst])                    \n\t"
 425     "lbu      %[temp11], 34(%[dst])                    \n\t"
 426     "lbu      %[temp16], 35(%[dst])                    \n\t"
 427     "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
 428     "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
 429     "addu     %[temp11], %[temp11], %[temp12]          \n\t"
 430     "addu     %[temp16], %[temp16], %[temp10]          \n\t"
 431     "sra      %[temp18], %[temp5],  8                  \n\t"
 432     "sra      %[temp1],  %[temp5],  31                 \n\t"
 433     "beqz     %[temp18], 5f                            \n\t"
 434     "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
 435     "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
 436   "5:                                                  \n\t"
 437     "sra      %[temp18], %[temp8],  8                  \n\t"
 438     "sra      %[temp1],  %[temp8],  31                 \n\t"
 439     "beqz     %[temp18], 6f                            \n\t"
 440     "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
 441     "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
 442   "6:                                                  \n\t"
 443     "sra      %[temp18], %[temp11], 8                  \n\t"
 444     "sra      %[temp1],  %[temp11], 31                 \n\t"
 445     "sra      %[temp17], %[temp16], 8                  \n\t"
 446     "sra      %[temp15], %[temp16], 31                 \n\t"
 447     "beqz     %[temp18], 7f                            \n\t"
 448     "xor      %[temp11], %[temp11], %[temp11]          \n\t"
 449     "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
 450   "7:                                                  \n\t"
 451     "beqz     %[temp17], 8f                            \n\t"
 452     "xor      %[temp16], %[temp16], %[temp16]          \n\t"
 453     "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
 454   "8:                                                  \n\t"
 455     "sb       %[temp5],  32(%[dst])                    \n\t"
 456     "sb       %[temp8],  33(%[dst])                    \n\t"
 457     "sb       %[temp11], 34(%[dst])                    \n\t"
 458     "sb       %[temp16], 35(%[dst])                    \n\t"
 459     "lbu      %[temp5],  64(%[dst])                    \n\t"
 460     "lbu      %[temp8],  65(%[dst])                    \n\t"
 461     "lbu      %[temp11], 66(%[dst])                    \n\t"
 462     "lbu      %[temp16], 67(%[dst])                    \n\t"
 463     "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
 464     "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
 465     "addu     %[temp11], %[temp11], %[temp0]           \n\t"
 466     "addu     %[temp16], %[temp16], %[temp14]          \n\t"
 467     "sra      %[temp18], %[temp5],  8                  \n\t"
 468     "sra      %[temp1],  %[temp5],  31                 \n\t"
 469     "sra      %[temp17], %[temp8],  8                  \n\t"
 470     "sra      %[temp15], %[temp8],  31                 \n\t"
 471     "sra      %[temp12], %[temp11], 8                  \n\t"
 472     "sra      %[temp10], %[temp11], 31                 \n\t"
 473     "sra      %[temp9],  %[temp16], 8                  \n\t"
 474     "sra      %[temp3],  %[temp16], 31                 \n\t"
 475     "beqz     %[temp18], 9f                            \n\t"
 476     "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
 477     "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
 478   "9:                                                  \n\t"
 479     "beqz     %[temp17], 10f                           \n\t"
 480     "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
 481     "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
 482   "10:                                                 \n\t"
 483     "beqz     %[temp12], 11f                           \n\t"
 484     "xor      %[temp11], %[temp11], %[temp11]          \n\t"
 485     "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
 486   "11:                                                 \n\t"
 487     "beqz     %[temp9],  12f                           \n\t"
 488     "xor      %[temp16], %[temp16], %[temp16]          \n\t"
 489     "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
 490   "12:                                                 \n\t"
 491     "sb       %[temp5],  64(%[dst])                    \n\t"
 492     "sb       %[temp8],  65(%[dst])                    \n\t"
 493     "sb       %[temp11], 66(%[dst])                    \n\t"
 494     "sb       %[temp16], 67(%[dst])                    \n\t"
 495     "lbu      %[temp5],  96(%[dst])                    \n\t"
 496     "lbu      %[temp8],  97(%[dst])                    \n\t"
 497     "lbu      %[temp11], 98(%[dst])                    \n\t"
 498     "lbu      %[temp16], 99(%[dst])                    \n\t"
 499     "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
 500     "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
 501     "addu     %[temp11], %[temp11], %[temp4]           \n\t"
 502     "addu     %[temp16], %[temp16], %[temp2]           \n\t"
 503     "sra      %[temp18], %[temp5],  8                  \n\t"
 504     "sra      %[temp1],  %[temp5],  31                 \n\t"
 505     "sra      %[temp17], %[temp8],  8                  \n\t"
 506     "sra      %[temp15], %[temp8],  31                 \n\t"
 507     "sra      %[temp12], %[temp11], 8                  \n\t"
 508     "sra      %[temp10], %[temp11], 31                 \n\t"
 509     "sra      %[temp9],  %[temp16], 8                  \n\t"
 510     "sra      %[temp3],  %[temp16], 31                 \n\t"
 511     "beqz     %[temp18], 13f                           \n\t"
 512     "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
 513     "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
 514   "13:                                                 \n\t"
 515     "beqz     %[temp17], 14f                           \n\t"
 516     "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
 517     "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
 518   "14:                                                 \n\t"
 519     "beqz     %[temp12], 15f                           \n\t"
 520     "xor      %[temp11], %[temp11], %[temp11]          \n\t"
 521     "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
 522   "15:                                                 \n\t"
 523     "beqz     %[temp9],  16f                           \n\t"
 524     "xor      %[temp16], %[temp16], %[temp16]          \n\t"
 525     "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
 526   "16:                                                 \n\t"
 527     "sb       %[temp5],  96(%[dst])                    \n\t"
 528     "sb       %[temp8],  97(%[dst])                    \n\t"
 529     "sb       %[temp11], 98(%[dst])                    \n\t"
 530     "sb       %[temp16], 99(%[dst])                    \n\t"
 531
 532     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
 533       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
 534       [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
 535       [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
 536       [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
 537       [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
 538       [temp18]"=&r"(temp18)
 539     : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
 540     : "memory", "hi", "lo"
 541   );
 542 }
 543
 544 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
 545   TransformOne(in, dst);
 546   if (do_two) {
 547     TransformOne(in + 16, dst + 4);
 548   }
 549 }
 550
 551 #endif  // WEBP_USE_MIPS32
 552
 553 //------------------------------------------------------------------------------
 554 // Entry point
 555
 556 extern void VP8DspInitMIPS32(void);
 557
 558 void VP8DspInitMIPS32(void) {
 559 #if defined(WEBP_USE_MIPS32)
 560   VP8InitClipTables();
 561
 562   VP8Transform = TransformTwo;
 563
 564   VP8VFilter16 = VFilter16;
 565   VP8HFilter16 = HFilter16;
 566   VP8VFilter8 = VFilter8;
 567   VP8HFilter8 = HFilter8;
 568   VP8VFilter16i = VFilter16i;
 569   VP8HFilter16i = HFilter16i;
 570   VP8VFilter8i = VFilter8i;
 571   VP8HFilter8i = HFilter8i;
 572
 573   VP8SimpleVFilter16 = SimpleVFilter16;
 574   VP8SimpleHFilter16 = SimpleHFilter16;
 575   VP8SimpleVFilter16i = SimpleVFilter16i;
 576   VP8SimpleHFilter16i = SimpleHFilter16i;
 577 #endif  // WEBP_USE_MIPS32
 578 }