package/libfreeimage/0003-LibWebP-fix-compilation-issue-with-GCC-5.x-C-11.patch

   1 From 44bce1b66c1cdd5308ac3ac773ea0a53d83790fd Mon Sep 17 00:00:00 2001
   2 From: Peter Korsgaard <peter@korsgaard.com>
   3 Date: Tue, 24 Nov 2015 21:16:39 +0100
   4 Subject: [PATCH] LibWebP: fix compilation issue with GCC 5.x / C++11
   5
   6 GCC 5.1 / C++11 gets confused about the "#<TEXT>" in the inline assembly
   7 code, and dies with errors like:
   8
   9 Source/LibWebP/./src/dsp/dsp.upsampling_mips_dsp_r2.c:37:34: error: invalid
  10 character ' ' in raw string delimiter
  11
  12 Fix it by introducting white space around the string literals like it has
  13 been done in upstream webp:
  14
  15 https://chromium.googlesource.com/webm/libwebp/+/eebaf97f5a1cb713d81d311308d8a48c124e5aef
  16
  17 Discussed upstream:
  18 http://sourceforge.net/p/freeimage/discussion/36110/thread/605ef8e4/
  19
  20 [Scripted by sed -i 's/"\(#[A-Z0-9]*\)"/" \1 "/g' *.c]
  21 Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
  22 ---
  23  Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c       |  28 +-
  24  Source/LibWebP/src/dsp/dsp.enc_mips32.c            | 314 ++++++++++-----------
  25  Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c       | 288 +++++++++----------
  26  Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c   |  10 +-
  27  Source/LibWebP/src/dsp/dsp.lossless_mips32.c       |  34 +--
  28  Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c  |   8 +-
  29  .../LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c   |  18 +-
  30  Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c       |  10 +-
  31  8 files changed, 355 insertions(+), 355 deletions(-)
  32
  33 diff --git a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
  34 index dac2c93..aaa8111 100644
  35 --- a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
  36 +++ b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
  37 @@ -548,10 +548,10 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
  38  // TEMP3 = SRC[D + D1 * BPS]
  39  #define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3,                               \
  40                       A, A1, B, B1, C, C1, D, D1, SRC)                          \
  41 -  "lbu          %["#TEMP0"],   "#A"+"#A1"*"XSTR(BPS)"(%["#SRC"])     \n\t"     \
  42 -  "lbu          %["#TEMP1"],   "#B"+"#B1"*"XSTR(BPS)"(%["#SRC"])     \n\t"     \
  43 -  "lbu          %["#TEMP2"],   "#C"+"#C1"*"XSTR(BPS)"(%["#SRC"])     \n\t"     \
  44 -  "lbu          %["#TEMP3"],   "#D"+"#D1"*"XSTR(BPS)"(%["#SRC"])     \n\t"     \
  45 +  "lbu          %[" #TEMP0 "],   " #A "+" #A1 "*"XSTR(BPS)"(%[" #SRC "])     \n\t"     \
  46 +  "lbu          %[" #TEMP1 "],   " #B "+" #B1 "*"XSTR(BPS)"(%[" #SRC "])     \n\t"     \
  47 +  "lbu          %[" #TEMP2 "],   " #C "+" #C1 "*"XSTR(BPS)"(%[" #SRC "])     \n\t"     \
  48 +  "lbu          %[" #TEMP3 "],   " #D "+" #D1 "*"XSTR(BPS)"(%[" #SRC "])     \n\t"     \
  49
  50  static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
  51    int i;
  52 @@ -623,8 +623,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
  53  // DST[A * BPS]     = TEMP0
  54  // DST[B + C * BPS] = TEMP1
  55  #define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST)                              \
  56 -  "usw          %["#TEMP0"],   "#A"*"XSTR(BPS)"(%["#DST"])         \n\t"       \
  57 -  "usw          %["#TEMP1"],   "#B"+"#C"*"XSTR(BPS)"(%["#DST"])    \n\t"
  58 +  "usw          %[" #TEMP0 "],   " #A "*"XSTR(BPS)"(%[" #DST "])         \n\t"       \
  59 +  "usw          %[" #TEMP1 "],   " #B "+" #C "*"XSTR(BPS)"(%[" #DST "])    \n\t"
  60
  61  static void VE4(uint8_t* dst) {    // vertical
  62    const uint8_t* top = dst - BPS;
  63 @@ -725,8 +725,8 @@ static void RD4(uint8_t* dst) {   // Down-right
  64  // TEMP0 = SRC[A * BPS]
  65  // TEMP1 = SRC[B + C * BPS]
  66  #define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC)                               \
  67 -  "ulw          %["#TEMP0"],   "#A"*"XSTR(BPS)"(%["#SRC"])         \n\t"       \
  68 -  "ulw          %["#TEMP1"],   "#B"+"#C"*"XSTR(BPS)"(%["#SRC"])    \n\t"
  69 +  "ulw          %[" #TEMP0 "],   " #A "*"XSTR(BPS)"(%[" #SRC "])         \n\t"       \
  70 +  "ulw          %[" #TEMP1 "],   " #B "+" #C "*"XSTR(BPS)"(%[" #SRC "])    \n\t"
  71
  72  static void LD4(uint8_t* dst) {   // Down-Left
  73    int temp0, temp1, temp2, temp3, temp4;
  74 @@ -873,24 +873,24 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
  75  #define CLIPPING(SIZE)                                                         \
  76    "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"                 \
  77    "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"                 \
  78 -".if "#SIZE" == 8                                        \n\t"                 \
  79 +".if " #SIZE " == 8                                        \n\t"                 \
  80    "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"                 \
  81    "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"                 \
  82  ".endif                                                  \n\t"                 \
  83    "addu.ph         %[temp2],   %[temp2],   %[dst_1]      \n\t"                 \
  84    "addu.ph         %[temp0],   %[temp0],   %[dst_1]      \n\t"                 \
  85 -".if "#SIZE" == 8                                        \n\t"                 \
  86 +".if " #SIZE " == 8                                        \n\t"                 \
  87    "addu.ph         %[temp3],   %[temp3],   %[dst_1]      \n\t"                 \
  88    "addu.ph         %[temp1],   %[temp1],   %[dst_1]      \n\t"                 \
  89  ".endif                                                  \n\t"                 \
  90    "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"                 \
  91    "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"                 \
  92 -".if "#SIZE" == 8                                        \n\t"                 \
  93 +".if " #SIZE " == 8                                        \n\t"                 \
  94    "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"                 \
  95    "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"                 \
  96  ".endif                                                  \n\t"                 \
  97    "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"                 \
  98 -".if "#SIZE" == 8                                        \n\t"                 \
  99 +".if " #SIZE " == 8                                        \n\t"                 \
 100    "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"                 \
 101  ".endif                                                  \n\t"
 102
 103 @@ -899,7 +899,7 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
 104    int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1];                              \
 105    int temp0, temp1, temp2, temp3;                                              \
 106    __asm__ volatile (                                                           \
 107 -  ".if "#SIZE" < 8                                       \n\t"                 \
 108 +  ".if " #SIZE " < 8                                       \n\t"                 \
 109      "ulw             %[temp0],   0(%[top])               \n\t"                 \
 110      "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
 111      CLIPPING(4)                                                                \
 112 @@ -911,7 +911,7 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
 113      CLIPPING(8)                                                                \
 114      "usw             %[temp0],   0(%[dst])               \n\t"                 \
 115      "usw             %[temp1],   4(%[dst])               \n\t"                 \
 116 -  ".if "#SIZE" == 16                                     \n\t"                 \
 117 +  ".if " #SIZE " == 16                                     \n\t"                 \
 118      "ulw             %[temp0],   8(%[top])               \n\t"                 \
 119      "ulw             %[temp1],   12(%[top])              \n\t"                 \
 120      CLIPPING(8)                                                                \
 121 diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips32.c b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
 122 index 545aa3a..bf1c16d 100644
 123 --- a/Source/LibWebP/src/dsp/dsp.enc_mips32.c
 124 +++ b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
 125 @@ -31,26 +31,26 @@ static const int kC2 = 35468;
 126  // TEMP0..TEMP3 - registers for corresponding tmp elements
 127  // TEMP4..TEMP5 - temporary registers
 128  #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3)        \
 129 -  "lh      %[temp16],      "#A"(%[temp20])                 \n\t"            \
 130 -  "lh      %[temp18],      "#B"(%[temp20])                 \n\t"            \
 131 -  "lh      %[temp17],      "#C"(%[temp20])                 \n\t"            \
 132 -  "lh      %[temp19],      "#D"(%[temp20])                 \n\t"            \
 133 -  "addu    %["#TEMP4"],    %[temp16],      %[temp18]       \n\t"            \
 134 +  "lh      %[temp16],      " #A "(%[temp20])                 \n\t"            \
 135 +  "lh      %[temp18],      " #B "(%[temp20])                 \n\t"            \
 136 +  "lh      %[temp17],      " #C "(%[temp20])                 \n\t"            \
 137 +  "lh      %[temp19],      " #D "(%[temp20])                 \n\t"            \
 138 +  "addu    %[" #TEMP4 "],    %[temp16],      %[temp18]       \n\t"            \
 139    "subu    %[temp16],      %[temp16],      %[temp18]       \n\t"            \
 140 -  "mul     %["#TEMP0"],    %[temp17],      %[kC2]          \n\t"            \
 141 +  "mul     %[" #TEMP0 "],    %[temp17],      %[kC2]          \n\t"            \
 142    "mul     %[temp18],      %[temp19],      %[kC1]          \n\t"            \
 143    "mul     %[temp17],      %[temp17],      %[kC1]          \n\t"            \
 144    "mul     %[temp19],      %[temp19],      %[kC2]          \n\t"            \
 145 -  "sra     %["#TEMP0"],    %["#TEMP0"],    16              \n\n"            \
 146 +  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16              \n\n"            \
 147    "sra     %[temp18],      %[temp18],      16              \n\n"            \
 148    "sra     %[temp17],      %[temp17],      16              \n\n"            \
 149    "sra     %[temp19],      %[temp19],      16              \n\n"            \
 150 -  "subu    %["#TEMP2"],    %["#TEMP0"],    %[temp18]       \n\t"            \
 151 -  "addu    %["#TEMP3"],    %[temp17],      %[temp19]       \n\t"            \
 152 -  "addu    %["#TEMP0"],    %["#TEMP4"],    %["#TEMP3"]     \n\t"            \
 153 -  "addu    %["#TEMP1"],    %[temp16],      %["#TEMP2"]     \n\t"            \
 154 -  "subu    %["#TEMP2"],    %[temp16],      %["#TEMP2"]     \n\t"            \
 155 -  "subu    %["#TEMP3"],    %["#TEMP4"],    %["#TEMP3"]     \n\t"
 156 +  "subu    %[" #TEMP2 "],    %[" #TEMP0 "],    %[temp18]       \n\t"            \
 157 +  "addu    %[" #TEMP3 "],    %[temp17],      %[temp19]       \n\t"            \
 158 +  "addu    %[" #TEMP0 "],    %[" #TEMP4 "],    %[" #TEMP3 "]     \n\t"            \
 159 +  "addu    %[" #TEMP1 "],    %[temp16],      %[" #TEMP2 "]     \n\t"            \
 160 +  "subu    %[" #TEMP2 "],    %[temp16],      %[" #TEMP2 "]     \n\t"            \
 161 +  "subu    %[" #TEMP3 "],    %[" #TEMP4 "],    %[" #TEMP3 "]     \n\t"
 162
 163  // macro for one horizontal pass in ITransformOne
 164  // MUL and STORE macros inlined
 165 @@ -59,58 +59,58 @@ static const int kC2 = 35468;
 166  // A - offset in bytes to load from ref and store to dst buffer
 167  // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
 168  #define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12)                     \
 169 -  "addiu   %["#TEMP0"],    %["#TEMP0"],    4               \n\t"            \
 170 -  "addu    %[temp16],      %["#TEMP0"],    %["#TEMP8"]     \n\t"            \
 171 -  "subu    %[temp17],      %["#TEMP0"],    %["#TEMP8"]     \n\t"            \
 172 -  "mul     %["#TEMP0"],    %["#TEMP4"],    %[kC2]          \n\t"            \
 173 -  "mul     %["#TEMP8"],    %["#TEMP12"],   %[kC1]          \n\t"            \
 174 -  "mul     %["#TEMP4"],    %["#TEMP4"],    %[kC1]          \n\t"            \
 175 -  "mul     %["#TEMP12"],   %["#TEMP12"],   %[kC2]          \n\t"            \
 176 -  "sra     %["#TEMP0"],    %["#TEMP0"],    16              \n\t"            \
 177 -  "sra     %["#TEMP8"],    %["#TEMP8"],    16              \n\t"            \
 178 -  "sra     %["#TEMP4"],    %["#TEMP4"],    16              \n\t"            \
 179 -  "sra     %["#TEMP12"],   %["#TEMP12"],   16              \n\t"            \
 180 -  "subu    %[temp18],      %["#TEMP0"],    %["#TEMP8"]     \n\t"            \
 181 -  "addu    %[temp19],      %["#TEMP4"],    %["#TEMP12"]    \n\t"            \
 182 -  "addu    %["#TEMP0"],    %[temp16],      %[temp19]       \n\t"            \
 183 -  "addu    %["#TEMP4"],    %[temp17],      %[temp18]       \n\t"            \
 184 -  "subu    %["#TEMP8"],    %[temp17],      %[temp18]       \n\t"            \
 185 -  "subu    %["#TEMP12"],   %[temp16],      %[temp19]       \n\t"            \
 186 +  "addiu   %[" #TEMP0 "],    %[" #TEMP0 "],    4               \n\t"            \
 187 +  "addu    %[temp16],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"            \
 188 +  "subu    %[temp17],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"            \
 189 +  "mul     %[" #TEMP0 "],    %[" #TEMP4 "],    %[kC2]          \n\t"            \
 190 +  "mul     %[" #TEMP8 "],    %[" #TEMP12 "],   %[kC1]          \n\t"            \
 191 +  "mul     %[" #TEMP4 "],    %[" #TEMP4 "],    %[kC1]          \n\t"            \
 192 +  "mul     %[" #TEMP12 "],   %[" #TEMP12 "],   %[kC2]          \n\t"            \
 193 +  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16              \n\t"            \
 194 +  "sra     %[" #TEMP8 "],    %[" #TEMP8 "],    16              \n\t"            \
 195 +  "sra     %[" #TEMP4 "],    %[" #TEMP4 "],    16              \n\t"            \
 196 +  "sra     %[" #TEMP12 "],   %[" #TEMP12 "],   16              \n\t"            \
 197 +  "subu    %[temp18],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"            \
 198 +  "addu    %[temp19],      %[" #TEMP4 "],    %[" #TEMP12 "]    \n\t"            \
 199 +  "addu    %[" #TEMP0 "],    %[temp16],      %[temp19]       \n\t"            \
 200 +  "addu    %[" #TEMP4 "],    %[temp17],      %[temp18]       \n\t"            \
 201 +  "subu    %[" #TEMP8 "],    %[temp17],      %[temp18]       \n\t"            \
 202 +  "subu    %[" #TEMP12 "],   %[temp16],      %[temp19]       \n\t"            \
 203    "lw      %[temp20],      0(%[args])                      \n\t"            \
 204 -  "sra     %["#TEMP0"],    %["#TEMP0"],    3               \n\t"            \
 205 -  "sra     %["#TEMP4"],    %["#TEMP4"],    3               \n\t"            \
 206 -  "sra     %["#TEMP8"],    %["#TEMP8"],    3               \n\t"            \
 207 -  "sra     %["#TEMP12"],   %["#TEMP12"],   3               \n\t"            \
 208 -  "lbu     %[temp16],      0+"XSTR(BPS)"*"#A"(%[temp20])   \n\t"            \
 209 -  "lbu     %[temp17],      1+"XSTR(BPS)"*"#A"(%[temp20])   \n\t"            \
 210 -  "lbu     %[temp18],      2+"XSTR(BPS)"*"#A"(%[temp20])   \n\t"            \
 211 -  "lbu     %[temp19],      3+"XSTR(BPS)"*"#A"(%[temp20])   \n\t"            \
 212 -  "addu    %["#TEMP0"],    %[temp16],      %["#TEMP0"]     \n\t"            \
 213 -  "addu    %["#TEMP4"],    %[temp17],      %["#TEMP4"]     \n\t"            \
 214 -  "addu    %["#TEMP8"],    %[temp18],      %["#TEMP8"]     \n\t"            \
 215 -  "addu    %["#TEMP12"],   %[temp19],      %["#TEMP12"]    \n\t"            \
 216 -  "slt     %[temp16],      %["#TEMP0"],    $zero           \n\t"            \
 217 -  "slt     %[temp17],      %["#TEMP4"],    $zero           \n\t"            \
 218 -  "slt     %[temp18],      %["#TEMP8"],    $zero           \n\t"            \
 219 -  "slt     %[temp19],      %["#TEMP12"],   $zero           \n\t"            \
 220 -  "movn    %["#TEMP0"],    $zero,          %[temp16]       \n\t"            \
 221 -  "movn    %["#TEMP4"],    $zero,          %[temp17]       \n\t"            \
 222 -  "movn    %["#TEMP8"],    $zero,          %[temp18]       \n\t"            \
 223 -  "movn    %["#TEMP12"],   $zero,          %[temp19]       \n\t"            \
 224 +  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    3               \n\t"            \
 225 +  "sra     %[" #TEMP4 "],    %[" #TEMP4 "],    3               \n\t"            \
 226 +  "sra     %[" #TEMP8 "],    %[" #TEMP8 "],    3               \n\t"            \
 227 +  "sra     %[" #TEMP12 "],   %[" #TEMP12 "],   3               \n\t"            \
 228 +  "lbu     %[temp16],      0+"XSTR(BPS)"*" #A "(%[temp20])   \n\t"            \
 229 +  "lbu     %[temp17],      1+"XSTR(BPS)"*" #A "(%[temp20])   \n\t"            \
 230 +  "lbu     %[temp18],      2+"XSTR(BPS)"*" #A "(%[temp20])   \n\t"            \
 231 +  "lbu     %[temp19],      3+"XSTR(BPS)"*" #A "(%[temp20])   \n\t"            \
 232 +  "addu    %[" #TEMP0 "],    %[temp16],      %[" #TEMP0 "]     \n\t"            \
 233 +  "addu    %[" #TEMP4 "],    %[temp17],      %[" #TEMP4 "]     \n\t"            \
 234 +  "addu    %[" #TEMP8 "],    %[temp18],      %[" #TEMP8 "]     \n\t"            \
 235 +  "addu    %[" #TEMP12 "],   %[temp19],      %[" #TEMP12 "]    \n\t"            \
 236 +  "slt     %[temp16],      %[" #TEMP0 "],    $zero           \n\t"            \
 237 +  "slt     %[temp17],      %[" #TEMP4 "],    $zero           \n\t"            \
 238 +  "slt     %[temp18],      %[" #TEMP8 "],    $zero           \n\t"            \
 239 +  "slt     %[temp19],      %[" #TEMP12 "],   $zero           \n\t"            \
 240 +  "movn    %[" #TEMP0 "],    $zero,          %[temp16]       \n\t"            \
 241 +  "movn    %[" #TEMP4 "],    $zero,          %[temp17]       \n\t"            \
 242 +  "movn    %[" #TEMP8 "],    $zero,          %[temp18]       \n\t"            \
 243 +  "movn    %[" #TEMP12 "],   $zero,          %[temp19]       \n\t"            \
 244    "addiu   %[temp20],      $zero,          255             \n\t"            \
 245 -  "slt     %[temp16],      %["#TEMP0"],    %[temp20]       \n\t"            \
 246 -  "slt     %[temp17],      %["#TEMP4"],    %[temp20]       \n\t"            \
 247 -  "slt     %[temp18],      %["#TEMP8"],    %[temp20]       \n\t"            \
 248 -  "slt     %[temp19],      %["#TEMP12"],   %[temp20]       \n\t"            \
 249 -  "movz    %["#TEMP0"],    %[temp20],      %[temp16]       \n\t"            \
 250 -  "movz    %["#TEMP4"],    %[temp20],      %[temp17]       \n\t"            \
 251 +  "slt     %[temp16],      %[" #TEMP0 "],    %[temp20]       \n\t"            \
 252 +  "slt     %[temp17],      %[" #TEMP4 "],    %[temp20]       \n\t"            \
 253 +  "slt     %[temp18],      %[" #TEMP8 "],    %[temp20]       \n\t"            \
 254 +  "slt     %[temp19],      %[" #TEMP12 "],   %[temp20]       \n\t"            \
 255 +  "movz    %[" #TEMP0 "],    %[temp20],      %[temp16]       \n\t"            \
 256 +  "movz    %[" #TEMP4 "],    %[temp20],      %[temp17]       \n\t"            \
 257    "lw      %[temp16],      8(%[args])                      \n\t"            \
 258 -  "movz    %["#TEMP8"],    %[temp20],      %[temp18]       \n\t"            \
 259 -  "movz    %["#TEMP12"],   %[temp20],      %[temp19]       \n\t"            \
 260 -  "sb      %["#TEMP0"],    0+"XSTR(BPS)"*"#A"(%[temp16])   \n\t"            \
 261 -  "sb      %["#TEMP4"],    1+"XSTR(BPS)"*"#A"(%[temp16])   \n\t"            \
 262 -  "sb      %["#TEMP8"],    2+"XSTR(BPS)"*"#A"(%[temp16])   \n\t"            \
 263 -  "sb      %["#TEMP12"],   3+"XSTR(BPS)"*"#A"(%[temp16])   \n\t"
 264 +  "movz    %[" #TEMP8 "],    %[temp20],      %[temp18]       \n\t"            \
 265 +  "movz    %[" #TEMP12 "],   %[temp20],      %[temp19]       \n\t"            \
 266 +  "sb      %[" #TEMP0 "],    0+"XSTR(BPS)"*" #A "(%[temp16])   \n\t"            \
 267 +  "sb      %[" #TEMP4 "],    1+"XSTR(BPS)"*" #A "(%[temp16])   \n\t"            \
 268 +  "sb      %[" #TEMP8 "],    2+"XSTR(BPS)"*" #A "(%[temp16])   \n\t"            \
 269 +  "sb      %[" #TEMP12 "],   3+"XSTR(BPS)"*" #A "(%[temp16])   \n\t"
 270
 271  // Does one or two inverse transforms.
 272  static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
 273 @@ -161,9 +161,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
 274  // K - offset in bytes (kZigzag[n] * 4)
 275  // N - offset in bytes (n * 2)
 276  #define QUANTIZE_ONE(J, K, N)                                               \
 277 -  "lh           %[temp0],       "#J"(%[ppin])                       \n\t"   \
 278 -  "lhu          %[temp1],       "#J"(%[ppsharpen])                  \n\t"   \
 279 -  "lw           %[temp2],       "#K"(%[ppzthresh])                  \n\t"   \
 280 +  "lh           %[temp0],       " #J "(%[ppin])                       \n\t"   \
 281 +  "lhu          %[temp1],       " #J "(%[ppsharpen])                  \n\t"   \
 282 +  "lw           %[temp2],       " #K "(%[ppzthresh])                  \n\t"   \
 283    "sra          %[sign],        %[temp0],           15              \n\t"   \
 284    "xor          %[coeff],       %[temp0],           %[sign]         \n\t"   \
 285    "subu         %[coeff],       %[coeff],           %[sign]         \n\t"   \
 286 @@ -172,9 +172,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
 287    "addiu        %[temp5],       $zero,              0               \n\t"   \
 288    "addiu        %[level],       $zero,              0               \n\t"   \
 289    "beqz         %[temp4],       2f                                  \n\t"   \
 290 -  "lhu          %[temp1],       "#J"(%[ppiq])                       \n\t"   \
 291 -  "lw           %[temp2],       "#K"(%[ppbias])                     \n\t"   \
 292 -  "lhu          %[temp3],       "#J"(%[ppq])                        \n\t"   \
 293 +  "lhu          %[temp1],       " #J "(%[ppiq])                       \n\t"   \
 294 +  "lw           %[temp2],       " #K "(%[ppbias])                     \n\t"   \
 295 +  "lhu          %[temp3],       " #J "(%[ppq])                        \n\t"   \
 296    "mul          %[level],       %[coeff],           %[temp1]        \n\t"   \
 297    "addu         %[level],       %[level],           %[temp2]        \n\t"   \
 298    "sra          %[level],       %[level],           17              \n\t"   \
 299 @@ -184,8 +184,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
 300    "subu         %[level],       %[level],           %[sign]         \n\t"   \
 301    "mul          %[temp5],       %[level],           %[temp3]        \n\t"   \
 302  "2:                                                                 \n\t"   \
 303 -  "sh           %[temp5],       "#J"(%[ppin])                       \n\t"   \
 304 -  "sh           %[level],       "#N"(%[pout])                       \n\t"
 305 +  "sh           %[temp5],       " #J "(%[ppin])                       \n\t"   \
 306 +  "sh           %[level],       " #N "(%[pout])                       \n\t"
 307
 308  static int QuantizeBlock(int16_t in[16], int16_t out[16],
 309                           const VP8Matrix* const mtx) {
 310 @@ -254,14 +254,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 311  // E..H - offsets in bytes to store first results to tmp buffer
 312  // E1..H1 - offsets in bytes to store second results to tmp buffer
 313  #define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1)              \
 314 -  "lbu    %[temp0],  0+"XSTR(BPS)"*"#A"(%[a])  \n\t"                \
 315 -  "lbu    %[temp1],  1+"XSTR(BPS)"*"#A"(%[a])  \n\t"                \
 316 -  "lbu    %[temp2],  2+"XSTR(BPS)"*"#A"(%[a])  \n\t"                \
 317 -  "lbu    %[temp3],  3+"XSTR(BPS)"*"#A"(%[a])  \n\t"                \
 318 -  "lbu    %[temp4],  0+"XSTR(BPS)"*"#A"(%[b])  \n\t"                \
 319 -  "lbu    %[temp5],  1+"XSTR(BPS)"*"#A"(%[b])  \n\t"                \
 320 -  "lbu    %[temp6],  2+"XSTR(BPS)"*"#A"(%[b])  \n\t"                \
 321 -  "lbu    %[temp7],  3+"XSTR(BPS)"*"#A"(%[b])  \n\t"                \
 322 +  "lbu    %[temp0],  0+"XSTR(BPS)"*" #A "(%[a])  \n\t"                \
 323 +  "lbu    %[temp1],  1+"XSTR(BPS)"*" #A "(%[a])  \n\t"                \
 324 +  "lbu    %[temp2],  2+"XSTR(BPS)"*" #A "(%[a])  \n\t"                \
 325 +  "lbu    %[temp3],  3+"XSTR(BPS)"*" #A "(%[a])  \n\t"                \
 326 +  "lbu    %[temp4],  0+"XSTR(BPS)"*" #A "(%[b])  \n\t"                \
 327 +  "lbu    %[temp5],  1+"XSTR(BPS)"*" #A "(%[b])  \n\t"                \
 328 +  "lbu    %[temp6],  2+"XSTR(BPS)"*" #A "(%[b])  \n\t"                \
 329 +  "lbu    %[temp7],  3+"XSTR(BPS)"*" #A "(%[b])  \n\t"                \
 330    "addu   %[temp8],  %[temp0],    %[temp2]     \n\t"                \
 331    "subu   %[temp0],  %[temp0],    %[temp2]     \n\t"                \
 332    "addu   %[temp2],  %[temp1],    %[temp3]     \n\t"                \
 333 @@ -278,14 +278,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 334    "subu   %[temp3],  %[temp3],    %[temp6]     \n\t"                \
 335    "addu   %[temp6],  %[temp4],    %[temp5]     \n\t"                \
 336    "subu   %[temp4],  %[temp4],    %[temp5]     \n\t"                \
 337 -  "sw     %[temp7],  "#E"(%[tmp])              \n\t"                \
 338 -  "sw     %[temp2],  "#H"(%[tmp])              \n\t"                \
 339 -  "sw     %[temp8],  "#F"(%[tmp])              \n\t"                \
 340 -  "sw     %[temp0],  "#G"(%[tmp])              \n\t"                \
 341 -  "sw     %[temp1],  "#E1"(%[tmp])             \n\t"                \
 342 -  "sw     %[temp3],  "#H1"(%[tmp])             \n\t"                \
 343 -  "sw     %[temp6],  "#F1"(%[tmp])             \n\t"                \
 344 -  "sw     %[temp4],  "#G1"(%[tmp])             \n\t"
 345 +  "sw     %[temp7],  " #E "(%[tmp])              \n\t"                \
 346 +  "sw     %[temp2],  " #H "(%[tmp])              \n\t"                \
 347 +  "sw     %[temp8],  " #F "(%[tmp])              \n\t"                \
 348 +  "sw     %[temp0],  " #G "(%[tmp])              \n\t"                \
 349 +  "sw     %[temp1],  " #E1 "(%[tmp])             \n\t"                \
 350 +  "sw     %[temp3],  " #H1 "(%[tmp])             \n\t"                \
 351 +  "sw     %[temp6],  " #F1 "(%[tmp])             \n\t"                \
 352 +  "sw     %[temp4],  " #G1 "(%[tmp])             \n\t"
 353
 354  // macro for one vertical pass in Disto4x4 (TTransform)
 355  // two calls of function TTransform are merged into single one
 356 @@ -300,10 +300,10 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 357  // A1..D1 - offsets in bytes to load second results from tmp buffer
 358  // E..H - offsets in bytes to load from w buffer
 359  #define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H)     \
 360 -  "lw     %[temp0],  "#A1"(%[tmp])           \n\t"                \
 361 -  "lw     %[temp1],  "#C1"(%[tmp])           \n\t"                \
 362 -  "lw     %[temp2],  "#B1"(%[tmp])           \n\t"                \
 363 -  "lw     %[temp3],  "#D1"(%[tmp])           \n\t"                \
 364 +  "lw     %[temp0],  " #A1 "(%[tmp])           \n\t"                \
 365 +  "lw     %[temp1],  " #C1 "(%[tmp])           \n\t"                \
 366 +  "lw     %[temp2],  " #B1 "(%[tmp])           \n\t"                \
 367 +  "lw     %[temp3],  " #D1 "(%[tmp])           \n\t"                \
 368    "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
 369    "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
 370    "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
 371 @@ -324,18 +324,18 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 372    "subu   %[temp1],  %[temp1],    %[temp5]   \n\t"                \
 373    "subu   %[temp0],  %[temp0],    %[temp6]   \n\t"                \
 374    "subu   %[temp8],  %[temp8],    %[temp7]   \n\t"                \
 375 -  "lhu    %[temp4],  "#E"(%[w])              \n\t"                \
 376 -  "lhu    %[temp5],  "#F"(%[w])              \n\t"                \
 377 -  "lhu    %[temp6],  "#G"(%[w])              \n\t"                \
 378 -  "lhu    %[temp7],  "#H"(%[w])              \n\t"                \
 379 +  "lhu    %[temp4],  " #E "(%[w])              \n\t"                \
 380 +  "lhu    %[temp5],  " #F "(%[w])              \n\t"                \
 381 +  "lhu    %[temp6],  " #G "(%[w])              \n\t"                \
 382 +  "lhu    %[temp7],  " #H "(%[w])              \n\t"                \
 383    "madd   %[temp4],  %[temp3]                \n\t"                \
 384    "madd   %[temp5],  %[temp1]                \n\t"                \
 385    "madd   %[temp6],  %[temp0]                \n\t"                \
 386    "madd   %[temp7],  %[temp8]                \n\t"                \
 387 -  "lw     %[temp0],  "#A"(%[tmp])            \n\t"                \
 388 -  "lw     %[temp1],  "#C"(%[tmp])            \n\t"                \
 389 -  "lw     %[temp2],  "#B"(%[tmp])            \n\t"                \
 390 -  "lw     %[temp3],  "#D"(%[tmp])            \n\t"                \
 391 +  "lw     %[temp0],  " #A "(%[tmp])            \n\t"                \
 392 +  "lw     %[temp1],  " #C "(%[tmp])            \n\t"                \
 393 +  "lw     %[temp2],  " #B "(%[tmp])            \n\t"                \
 394 +  "lw     %[temp3],  " #D "(%[tmp])            \n\t"                \
 395    "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
 396    "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
 397    "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
 398 @@ -413,70 +413,70 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
 399  // A - offset in bytes to load from src and ref buffers
 400  // TEMP0..TEMP3 - registers for corresponding tmp elements
 401  #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)            \
 402 -  "lw     %["#TEMP1"],  0(%[args])                       \n\t"    \
 403 -  "lw     %["#TEMP2"],  4(%[args])                       \n\t"    \
 404 -  "lbu    %[temp16],    0+"XSTR(BPS)"*"#A"(%["#TEMP1"])  \n\t"    \
 405 -  "lbu    %[temp17],    0+"XSTR(BPS)"*"#A"(%["#TEMP2"])  \n\t"    \
 406 -  "lbu    %[temp18],    1+"XSTR(BPS)"*"#A"(%["#TEMP1"])  \n\t"    \
 407 -  "lbu    %[temp19],    1+"XSTR(BPS)"*"#A"(%["#TEMP2"])  \n\t"    \
 408 +  "lw     %[" #TEMP1 "],  0(%[args])                       \n\t"    \
 409 +  "lw     %[" #TEMP2 "],  4(%[args])                       \n\t"    \
 410 +  "lbu    %[temp16],    0+"XSTR(BPS)"*" #A "(%[" #TEMP1 "])  \n\t"    \
 411 +  "lbu    %[temp17],    0+"XSTR(BPS)"*" #A "(%[" #TEMP2 "])  \n\t"    \
 412 +  "lbu    %[temp18],    1+"XSTR(BPS)"*" #A "(%[" #TEMP1 "])  \n\t"    \
 413 +  "lbu    %[temp19],    1+"XSTR(BPS)"*" #A "(%[" #TEMP2 "])  \n\t"    \
 414    "subu   %[temp20],    %[temp16],    %[temp17]          \n\t"    \
 415 -  "lbu    %[temp16],    2+"XSTR(BPS)"*"#A"(%["#TEMP1"])  \n\t"    \
 416 -  "lbu    %[temp17],    2+"XSTR(BPS)"*"#A"(%["#TEMP2"])  \n\t"    \
 417 -  "subu   %["#TEMP0"],  %[temp18],    %[temp19]          \n\t"    \
 418 -  "lbu    %[temp18],    3+"XSTR(BPS)"*"#A"(%["#TEMP1"])  \n\t"    \
 419 -  "lbu    %[temp19],    3+"XSTR(BPS)"*"#A"(%["#TEMP2"])  \n\t"    \
 420 -  "subu   %["#TEMP1"],  %[temp16],    %[temp17]          \n\t"    \
 421 -  "subu   %["#TEMP2"],  %[temp18],    %[temp19]          \n\t"    \
 422 -  "addu   %["#TEMP3"],  %[temp20],    %["#TEMP2"]        \n\t"    \
 423 -  "subu   %["#TEMP2"],  %[temp20],    %["#TEMP2"]        \n\t"    \
 424 -  "addu   %[temp20],    %["#TEMP0"],  %["#TEMP1"]        \n\t"    \
 425 -  "subu   %["#TEMP0"],  %["#TEMP0"],  %["#TEMP1"]        \n\t"    \
 426 -  "mul    %[temp16],    %["#TEMP2"],  %[c5352]           \n\t"    \
 427 -  "mul    %[temp17],    %["#TEMP2"],  %[c2217]           \n\t"    \
 428 -  "mul    %[temp18],    %["#TEMP0"],  %[c5352]           \n\t"    \
 429 -  "mul    %[temp19],    %["#TEMP0"],  %[c2217]           \n\t"    \
 430 -  "addu   %["#TEMP1"],  %["#TEMP3"],  %[temp20]          \n\t"    \
 431 -  "subu   %[temp20],    %["#TEMP3"],  %[temp20]          \n\t"    \
 432 -  "sll    %["#TEMP0"],  %["#TEMP1"],  3                  \n\t"    \
 433 -  "sll    %["#TEMP2"],  %[temp20],    3                  \n\t"    \
 434 +  "lbu    %[temp16],    2+"XSTR(BPS)"*" #A "(%[" #TEMP1 "])  \n\t"    \
 435 +  "lbu    %[temp17],    2+"XSTR(BPS)"*" #A "(%[" #TEMP2 "])  \n\t"    \
 436 +  "subu   %[" #TEMP0 "],  %[temp18],    %[temp19]          \n\t"    \
 437 +  "lbu    %[temp18],    3+"XSTR(BPS)"*" #A "(%[" #TEMP1 "])  \n\t"    \
 438 +  "lbu    %[temp19],    3+"XSTR(BPS)"*" #A "(%[" #TEMP2 "])  \n\t"    \
 439 +  "subu   %[" #TEMP1 "],  %[temp16],    %[temp17]          \n\t"    \
 440 +  "subu   %[" #TEMP2 "],  %[temp18],    %[temp19]          \n\t"    \
 441 +  "addu   %[" #TEMP3 "],  %[temp20],    %[" #TEMP2 "]        \n\t"    \
 442 +  "subu   %[" #TEMP2 "],  %[temp20],    %[" #TEMP2 "]        \n\t"    \
 443 +  "addu   %[temp20],    %[" #TEMP0 "],  %[" #TEMP1 "]        \n\t"    \
 444 +  "subu   %[" #TEMP0 "],  %[" #TEMP0 "],  %[" #TEMP1 "]        \n\t"    \
 445 +  "mul    %[temp16],    %[" #TEMP2 "],  %[c5352]           \n\t"    \
 446 +  "mul    %[temp17],    %[" #TEMP2 "],  %[c2217]           \n\t"    \
 447 +  "mul    %[temp18],    %[" #TEMP0 "],  %[c5352]           \n\t"    \
 448 +  "mul    %[temp19],    %[" #TEMP0 "],  %[c2217]           \n\t"    \
 449 +  "addu   %[" #TEMP1 "],  %[" #TEMP3 "],  %[temp20]          \n\t"    \
 450 +  "subu   %[temp20],    %[" #TEMP3 "],  %[temp20]          \n\t"    \
 451 +  "sll    %[" #TEMP0 "],  %[" #TEMP1 "],  3                  \n\t"    \
 452 +  "sll    %[" #TEMP2 "],  %[temp20],    3                  \n\t"    \
 453    "addiu  %[temp16],    %[temp16],    1812               \n\t"    \
 454    "addiu  %[temp17],    %[temp17],    937                \n\t"    \
 455    "addu   %[temp16],    %[temp16],    %[temp19]          \n\t"    \
 456    "subu   %[temp17],    %[temp17],    %[temp18]          \n\t"    \
 457 -  "sra    %["#TEMP1"],  %[temp16],    9                  \n\t"    \
 458 -  "sra    %["#TEMP3"],  %[temp17],    9                  \n\t"
 459 +  "sra    %[" #TEMP1 "],  %[temp16],    9                  \n\t"    \
 460 +  "sra    %[" #TEMP3 "],  %[temp17],    9                  \n\t"
 461
 462  // macro for one vertical pass in FTransform
 463  // temp0..temp15 holds tmp[0]..tmp[15]
 464  // A..D - offsets in bytes to store to out buffer
 465  // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
 466  #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)  \
 467 -  "addu   %[temp16],    %["#TEMP0"],  %["#TEMP12"]     \n\t"    \
 468 -  "subu   %[temp19],    %["#TEMP0"],  %["#TEMP12"]     \n\t"    \
 469 -  "addu   %[temp17],    %["#TEMP4"],  %["#TEMP8"]      \n\t"    \
 470 -  "subu   %[temp18],    %["#TEMP4"],  %["#TEMP8"]      \n\t"    \
 471 -  "mul    %["#TEMP8"],  %[temp19],    %[c2217]         \n\t"    \
 472 -  "mul    %["#TEMP12"], %[temp18],    %[c2217]         \n\t"    \
 473 -  "mul    %["#TEMP4"],  %[temp19],    %[c5352]         \n\t"    \
 474 +  "addu   %[temp16],    %[" #TEMP0 "],  %[" #TEMP12 "]     \n\t"    \
 475 +  "subu   %[temp19],    %[" #TEMP0 "],  %[" #TEMP12 "]     \n\t"    \
 476 +  "addu   %[temp17],    %[" #TEMP4 "],  %[" #TEMP8 "]      \n\t"    \
 477 +  "subu   %[temp18],    %[" #TEMP4 "],  %[" #TEMP8 "]      \n\t"    \
 478 +  "mul    %[" #TEMP8 "],  %[temp19],    %[c2217]         \n\t"    \
 479 +  "mul    %[" #TEMP12 "], %[temp18],    %[c2217]         \n\t"    \
 480 +  "mul    %[" #TEMP4 "],  %[temp19],    %[c5352]         \n\t"    \
 481    "mul    %[temp18],    %[temp18],    %[c5352]         \n\t"    \
 482    "addiu  %[temp16],    %[temp16],    7                \n\t"    \
 483 -  "addu   %["#TEMP0"],  %[temp16],    %[temp17]        \n\t"    \
 484 -  "sra    %["#TEMP0"],  %["#TEMP0"],  4                \n\t"    \
 485 -  "addu   %["#TEMP12"], %["#TEMP12"], %["#TEMP4"]      \n\t"    \
 486 -  "subu   %["#TEMP4"],  %[temp16],    %[temp17]        \n\t"    \
 487 -  "sra    %["#TEMP4"],  %["#TEMP4"],  4                \n\t"    \
 488 -  "addiu  %["#TEMP8"],  %["#TEMP8"],  30000            \n\t"    \
 489 -  "addiu  %["#TEMP12"], %["#TEMP12"], 12000            \n\t"    \
 490 -  "addiu  %["#TEMP8"],  %["#TEMP8"],  21000            \n\t"    \
 491 -  "subu   %["#TEMP8"],  %["#TEMP8"],  %[temp18]        \n\t"    \
 492 -  "sra    %["#TEMP12"], %["#TEMP12"], 16               \n\t"    \
 493 -  "sra    %["#TEMP8"],  %["#TEMP8"],  16               \n\t"    \
 494 -  "addiu  %[temp16],    %["#TEMP12"], 1                \n\t"    \
 495 -  "movn   %["#TEMP12"], %[temp16],    %[temp19]        \n\t"    \
 496 -  "sh     %["#TEMP0"],  "#A"(%[temp20])                \n\t"    \
 497 -  "sh     %["#TEMP4"],  "#C"(%[temp20])                \n\t"    \
 498 -  "sh     %["#TEMP8"],  "#D"(%[temp20])                \n\t"    \
 499 -  "sh     %["#TEMP12"], "#B"(%[temp20])                \n\t"
 500 +  "addu   %[" #TEMP0 "],  %[temp16],    %[temp17]        \n\t"    \
 501 +  "sra    %[" #TEMP0 "],  %[" #TEMP0 "],  4                \n\t"    \
 502 +  "addu   %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "]      \n\t"    \
 503 +  "subu   %[" #TEMP4 "],  %[temp16],    %[temp17]        \n\t"    \
 504 +  "sra    %[" #TEMP4 "],  %[" #TEMP4 "],  4                \n\t"    \
 505 +  "addiu  %[" #TEMP8 "],  %[" #TEMP8 "],  30000            \n\t"    \
 506 +  "addiu  %[" #TEMP12 "], %[" #TEMP12 "], 12000            \n\t"    \
 507 +  "addiu  %[" #TEMP8 "],  %[" #TEMP8 "],  21000            \n\t"    \
 508 +  "subu   %[" #TEMP8 "],  %[" #TEMP8 "],  %[temp18]        \n\t"    \
 509 +  "sra    %[" #TEMP12 "], %[" #TEMP12 "], 16               \n\t"    \
 510 +  "sra    %[" #TEMP8 "],  %[" #TEMP8 "],  16               \n\t"    \
 511 +  "addiu  %[temp16],    %[" #TEMP12 "], 1                \n\t"    \
 512 +  "movn   %[" #TEMP12 "], %[temp16],    %[temp19]        \n\t"    \
 513 +  "sh     %[" #TEMP0 "],  " #A "(%[temp20])                \n\t"    \
 514 +  "sh     %[" #TEMP4 "],  " #C "(%[temp20])                \n\t"    \
 515 +  "sh     %[" #TEMP8 "],  " #D "(%[temp20])                \n\t"    \
 516 +  "sh     %[" #TEMP12 "], " #B "(%[temp20])                \n\t"
 517
 518  static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
 519    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
 520 @@ -516,14 +516,14 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
 521  #if !defined(WORK_AROUND_GCC)
 522
 523  #define GET_SSE_INNER(A, B, C, D)                               \
 524 -  "lbu     %[temp0],    "#A"(%[a])                   \n\t"      \
 525 -  "lbu     %[temp1],    "#A"(%[b])                   \n\t"      \
 526 -  "lbu     %[temp2],    "#B"(%[a])                   \n\t"      \
 527 -  "lbu     %[temp3],    "#B"(%[b])                   \n\t"      \
 528 -  "lbu     %[temp4],    "#C"(%[a])                   \n\t"      \
 529 -  "lbu     %[temp5],    "#C"(%[b])                   \n\t"      \
 530 -  "lbu     %[temp6],    "#D"(%[a])                   \n\t"      \
 531 -  "lbu     %[temp7],    "#D"(%[b])                   \n\t"      \
 532 +  "lbu     %[temp0],    " #A "(%[a])                   \n\t"      \
 533 +  "lbu     %[temp1],    " #A "(%[b])                   \n\t"      \
 534 +  "lbu     %[temp2],    " #B "(%[a])                   \n\t"      \
 535 +  "lbu     %[temp3],    " #B "(%[b])                   \n\t"      \
 536 +  "lbu     %[temp4],    " #C "(%[a])                   \n\t"      \
 537 +  "lbu     %[temp5],    " #C "(%[b])                   \n\t"      \
 538 +  "lbu     %[temp6],    " #D "(%[a])                   \n\t"      \
 539 +  "lbu     %[temp7],    " #D "(%[b])                   \n\t"      \
 540    "subu    %[temp0],    %[temp0],     %[temp1]       \n\t"      \
 541    "subu    %[temp2],    %[temp2],     %[temp3]       \n\t"      \
 542    "subu    %[temp4],    %[temp4],     %[temp5]       \n\t"      \
 543 diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
 544 index ec58efe..1a3f968 100644
 545 --- a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
 546 +++ b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
 547 @@ -27,25 +27,25 @@ static const int kC2 = 35468;
 548  // I - input (macro doesn't change it)
 549  #define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7,                      \
 550                            I0, I1, I2, I3, I4, I5, I6, I7)                      \
 551 -  "addq.ph          %["#O0"],   %["#I0"],  %["#I1"]           \n\t"            \
 552 -  "subq.ph          %["#O1"],   %["#I0"],  %["#I1"]           \n\t"            \
 553 -  "addq.ph          %["#O2"],   %["#I2"],  %["#I3"]           \n\t"            \
 554 -  "subq.ph          %["#O3"],   %["#I2"],  %["#I3"]           \n\t"            \
 555 -  "addq.ph          %["#O4"],   %["#I4"],  %["#I5"]           \n\t"            \
 556 -  "subq.ph          %["#O5"],   %["#I4"],  %["#I5"]           \n\t"            \
 557 -  "addq.ph          %["#O6"],   %["#I6"],  %["#I7"]           \n\t"            \
 558 -  "subq.ph          %["#O7"],   %["#I6"],  %["#I7"]           \n\t"
 559 +  "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1 "]           \n\t"            \
 560 +  "subq.ph          %[" #O1 "],   %[" #I0 "],  %[" #I1 "]           \n\t"            \
 561 +  "addq.ph          %[" #O2 "],   %[" #I2 "],  %[" #I3 "]           \n\t"            \
 562 +  "subq.ph          %[" #O3 "],   %[" #I2 "],  %[" #I3 "]           \n\t"            \
 563 +  "addq.ph          %[" #O4 "],   %[" #I4 "],  %[" #I5 "]           \n\t"            \
 564 +  "subq.ph          %[" #O5 "],   %[" #I4 "],  %[" #I5 "]           \n\t"            \
 565 +  "addq.ph          %[" #O6 "],   %[" #I6 "],  %[" #I7 "]           \n\t"            \
 566 +  "subq.ph          %[" #O7 "],   %[" #I6 "],  %[" #I7 "]           \n\t"
 567
 568  // IO - input/output
 569  #define ABS_X8(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7)                         \
 570 -  "absq_s.ph        %["#IO0"],   %["#IO0"]                    \n\t"            \
 571 -  "absq_s.ph        %["#IO1"],   %["#IO1"]                    \n\t"            \
 572 -  "absq_s.ph        %["#IO2"],   %["#IO2"]                    \n\t"            \
 573 -  "absq_s.ph        %["#IO3"],   %["#IO3"]                    \n\t"            \
 574 -  "absq_s.ph        %["#IO4"],   %["#IO4"]                    \n\t"            \
 575 -  "absq_s.ph        %["#IO5"],   %["#IO5"]                    \n\t"            \
 576 -  "absq_s.ph        %["#IO6"],   %["#IO6"]                    \n\t"            \
 577 -  "absq_s.ph        %["#IO7"],   %["#IO7"]                    \n\t"
 578 +  "absq_s.ph        %[" #IO0 "],   %[" #IO0 "]                    \n\t"            \
 579 +  "absq_s.ph        %[" #IO1 "],   %[" #IO1 "]                    \n\t"            \
 580 +  "absq_s.ph        %[" #IO2 "],   %[" #IO2 "]                    \n\t"            \
 581 +  "absq_s.ph        %[" #IO3 "],   %[" #IO3 "]                    \n\t"            \
 582 +  "absq_s.ph        %[" #IO4 "],   %[" #IO4 "]                    \n\t"            \
 583 +  "absq_s.ph        %[" #IO5 "],   %[" #IO5 "]                    \n\t"            \
 584 +  "absq_s.ph        %[" #IO6 "],   %[" #IO6 "]                    \n\t"            \
 585 +  "absq_s.ph        %[" #IO7 "],   %[" #IO7 "]                    \n\t"
 586
 587  // dpa.w.ph $ac0 temp0 ,temp1
 588  //  $ac += temp0[31..16] * temp1[31..16] + temp0[15..0] * temp1[15..0]
 589 @@ -56,15 +56,15 @@ static const int kC2 = 35468;
 590  #define MUL_HALF(O0, I0, I1, I2, I3, I4, I5, I6, I7,                           \
 591                   I8, I9, I10, I11, I12, I13, I14, I15)                         \
 592      "mult            $ac0,      $zero,     $zero              \n\t"            \
 593 -    "dpa.w.ph        $ac0,      %["#I2"],  %["#I0"]           \n\t"            \
 594 -    "dpax.w.ph       $ac0,      %["#I5"],  %["#I6"]           \n\t"            \
 595 -    "dpa.w.ph        $ac0,      %["#I8"],  %["#I9"]           \n\t"            \
 596 -    "dpax.w.ph       $ac0,      %["#I11"], %["#I4"]           \n\t"            \
 597 -    "dpa.w.ph        $ac0,      %["#I12"], %["#I7"]           \n\t"            \
 598 -    "dpax.w.ph       $ac0,      %["#I13"], %["#I1"]           \n\t"            \
 599 -    "dpa.w.ph        $ac0,      %["#I14"], %["#I3"]           \n\t"            \
 600 -    "dpax.w.ph       $ac0,      %["#I15"], %["#I10"]          \n\t"            \
 601 -    "mflo            %["#O0"],  $ac0                          \n\t"
 602 +    "dpa.w.ph        $ac0,      %[" #I2 "],  %[" #I0 "]           \n\t"            \
 603 +    "dpax.w.ph       $ac0,      %[" #I5 "],  %[" #I6 "]           \n\t"            \
 604 +    "dpa.w.ph        $ac0,      %[" #I8 "],  %[" #I9 "]           \n\t"            \
 605 +    "dpax.w.ph       $ac0,      %[" #I11 "], %[" #I4 "]           \n\t"            \
 606 +    "dpa.w.ph        $ac0,      %[" #I12 "], %[" #I7 "]           \n\t"            \
 607 +    "dpax.w.ph       $ac0,      %[" #I13 "], %[" #I1 "]           \n\t"            \
 608 +    "dpa.w.ph        $ac0,      %[" #I14 "], %[" #I3 "]           \n\t"            \
 609 +    "dpax.w.ph       $ac0,      %[" #I15 "], %[" #I10 "]          \n\t"            \
 610 +    "mflo            %[" #O0 "],  $ac0                          \n\t"
 611
 612  #define OUTPUT_EARLY_CLOBBER_REGS_17()                                         \
 613    OUTPUT_EARLY_CLOBBER_REGS_10(),                                              \
 614 @@ -77,69 +77,69 @@ static const int kC2 = 35468;
 615  // A - offset in bytes to load from src and ref buffers
 616  // TEMP0..TEMP3 - registers for corresponding tmp elements
 617  #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                         \
 618 -  "lw              %["#TEMP0"],   0(%[args])                        \n\t"      \
 619 -  "lw              %["#TEMP1"],   4(%[args])                        \n\t"      \
 620 -  "lw              %["#TEMP2"],   "XSTR(BPS)"*"#A"(%["#TEMP0"])     \n\t"      \
 621 -  "lw              %["#TEMP3"],   "XSTR(BPS)"*"#A"(%["#TEMP1"])     \n\t"      \
 622 -  "preceu.ph.qbl   %["#TEMP0"],   %["#TEMP2"]                       \n\t"      \
 623 -  "preceu.ph.qbl   %["#TEMP1"],   %["#TEMP3"]                       \n\t"      \
 624 -  "preceu.ph.qbr   %["#TEMP2"],   %["#TEMP2"]                       \n\t"      \
 625 -  "preceu.ph.qbr   %["#TEMP3"],   %["#TEMP3"]                       \n\t"      \
 626 -  "subq.ph         %["#TEMP0"],   %["#TEMP0"],   %["#TEMP1"]        \n\t"      \
 627 -  "subq.ph         %["#TEMP2"],   %["#TEMP2"],   %["#TEMP3"]        \n\t"      \
 628 -  "rotr            %["#TEMP0"],   %["#TEMP0"],   16                 \n\t"      \
 629 -  "addq.ph         %["#TEMP1"],   %["#TEMP2"],   %["#TEMP0"]        \n\t"      \
 630 -  "subq.ph         %["#TEMP3"],   %["#TEMP2"],   %["#TEMP0"]        \n\t"      \
 631 -  "seh             %["#TEMP0"],   %["#TEMP1"]                       \n\t"      \
 632 -  "sra             %[temp16],     %["#TEMP1"],   16                 \n\t"      \
 633 -  "seh             %[temp19],     %["#TEMP3"]                       \n\t"      \
 634 -  "sra             %["#TEMP3"],   %["#TEMP3"],   16                 \n\t"      \
 635 -  "subu            %["#TEMP2"],   %["#TEMP0"],   %[temp16]          \n\t"      \
 636 -  "addu            %["#TEMP0"],   %["#TEMP0"],   %[temp16]          \n\t"      \
 637 +  "lw              %[" #TEMP0 "],   0(%[args])                        \n\t"      \
 638 +  "lw              %[" #TEMP1 "],   4(%[args])                        \n\t"      \
 639 +  "lw              %[" #TEMP2 "],   "XSTR(BPS)"*" #A "(%[" #TEMP0 "])     \n\t"      \
 640 +  "lw              %[" #TEMP3 "],   "XSTR(BPS)"*" #A "(%[" #TEMP1 "])     \n\t"      \
 641 +  "preceu.ph.qbl   %[" #TEMP0 "],   %[" #TEMP2 "]                       \n\t"      \
 642 +  "preceu.ph.qbl   %[" #TEMP1 "],   %[" #TEMP3 "]                       \n\t"      \
 643 +  "preceu.ph.qbr   %[" #TEMP2 "],   %[" #TEMP2 "]                       \n\t"      \
 644 +  "preceu.ph.qbr   %[" #TEMP3 "],   %[" #TEMP3 "]                       \n\t"      \
 645 +  "subq.ph         %[" #TEMP0 "],   %[" #TEMP0 "],   %[" #TEMP1 "]        \n\t"      \
 646 +  "subq.ph         %[" #TEMP2 "],   %[" #TEMP2 "],   %[" #TEMP3 "]        \n\t"      \
 647 +  "rotr            %[" #TEMP0 "],   %[" #TEMP0 "],   16                 \n\t"      \
 648 +  "addq.ph         %[" #TEMP1 "],   %[" #TEMP2 "],   %[" #TEMP0 "]        \n\t"      \
 649 +  "subq.ph         %[" #TEMP3 "],   %[" #TEMP2 "],   %[" #TEMP0 "]        \n\t"      \
 650 +  "seh             %[" #TEMP0 "],   %[" #TEMP1 "]                       \n\t"      \
 651 +  "sra             %[temp16],     %[" #TEMP1 "],   16                 \n\t"      \
 652 +  "seh             %[temp19],     %[" #TEMP3 "]                       \n\t"      \
 653 +  "sra             %[" #TEMP3 "],   %[" #TEMP3 "],   16                 \n\t"      \
 654 +  "subu            %[" #TEMP2 "],   %[" #TEMP0 "],   %[temp16]          \n\t"      \
 655 +  "addu            %[" #TEMP0 "],   %[" #TEMP0 "],   %[temp16]          \n\t"      \
 656    "mul             %[temp17],     %[temp19],     %[c2217]           \n\t"      \
 657 -  "mul             %[temp18],     %["#TEMP3"],   %[c5352]           \n\t"      \
 658 -  "mul             %["#TEMP1"],   %[temp19],     %[c5352]           \n\t"      \
 659 -  "mul             %[temp16],     %["#TEMP3"],   %[c2217]           \n\t"      \
 660 -  "sll             %["#TEMP2"],   %["#TEMP2"],   3                  \n\t"      \
 661 -  "sll             %["#TEMP0"],   %["#TEMP0"],   3                  \n\t"      \
 662 -  "subu            %["#TEMP3"],   %[temp17],     %[temp18]          \n\t"      \
 663 -  "addu            %["#TEMP1"],   %[temp16],     %["#TEMP1"]        \n\t"      \
 664 -  "addiu           %["#TEMP3"],   %["#TEMP3"],   937                \n\t"      \
 665 -  "addiu           %["#TEMP1"],   %["#TEMP1"],   1812               \n\t"      \
 666 -  "sra             %["#TEMP3"],   %["#TEMP3"],   9                  \n\t"      \
 667 -  "sra             %["#TEMP1"],   %["#TEMP1"],   9                  \n\t"
 668 +  "mul             %[temp18],     %[" #TEMP3 "],   %[c5352]           \n\t"      \
 669 +  "mul             %[" #TEMP1 "],   %[temp19],     %[c5352]           \n\t"      \
 670 +  "mul             %[temp16],     %[" #TEMP3 "],   %[c2217]           \n\t"      \
 671 +  "sll             %[" #TEMP2 "],   %[" #TEMP2 "],   3                  \n\t"      \
 672 +  "sll             %[" #TEMP0 "],   %[" #TEMP0 "],   3                  \n\t"      \
 673 +  "subu            %[" #TEMP3 "],   %[temp17],     %[temp18]          \n\t"      \
 674 +  "addu            %[" #TEMP1 "],   %[temp16],     %[" #TEMP1 "]        \n\t"      \
 675 +  "addiu           %[" #TEMP3 "],   %[" #TEMP3 "],   937                \n\t"      \
 676 +  "addiu           %[" #TEMP1 "],   %[" #TEMP1 "],   1812               \n\t"      \
 677 +  "sra             %[" #TEMP3 "],   %[" #TEMP3 "],   9                  \n\t"      \
 678 +  "sra             %[" #TEMP1 "],   %[" #TEMP1 "],   9                  \n\t"
 679
 680  // macro for one vertical pass in FTransform
 681  // temp0..temp15 holds tmp[0]..tmp[15]
 682  // A..D - offsets in bytes to store to out buffer
 683  // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
 684  #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)                 \
 685 -  "addu            %[temp16],     %["#TEMP0"],   %["#TEMP12"] \n\t"            \
 686 -  "subu            %[temp19],     %["#TEMP0"],   %["#TEMP12"] \n\t"            \
 687 -  "addu            %[temp17],     %["#TEMP4"],   %["#TEMP8"]  \n\t"            \
 688 -  "subu            %[temp18],     %["#TEMP4"],   %["#TEMP8"]  \n\t"            \
 689 -  "mul             %["#TEMP8"],   %[temp19],     %[c2217]     \n\t"            \
 690 -  "mul             %["#TEMP12"],  %[temp18],     %[c2217]     \n\t"            \
 691 -  "mul             %["#TEMP4"],   %[temp19],     %[c5352]     \n\t"            \
 692 +  "addu            %[temp16],     %[" #TEMP0 "],   %[" #TEMP12 "] \n\t"            \
 693 +  "subu            %[temp19],     %[" #TEMP0 "],   %[" #TEMP12 "] \n\t"            \
 694 +  "addu            %[temp17],     %[" #TEMP4 "],   %[" #TEMP8 "]  \n\t"            \
 695 +  "subu            %[temp18],     %[" #TEMP4 "],   %[" #TEMP8 "]  \n\t"            \
 696 +  "mul             %[" #TEMP8 "],   %[temp19],     %[c2217]     \n\t"            \
 697 +  "mul             %[" #TEMP12 "],  %[temp18],     %[c2217]     \n\t"            \
 698 +  "mul             %[" #TEMP4 "],   %[temp19],     %[c5352]     \n\t"            \
 699    "mul             %[temp18],     %[temp18],     %[c5352]     \n\t"            \
 700    "addiu           %[temp16],     %[temp16],     7            \n\t"            \
 701 -  "addu            %["#TEMP0"],   %[temp16],     %[temp17]    \n\t"            \
 702 -  "sra             %["#TEMP0"],   %["#TEMP0"],   4            \n\t"            \
 703 -  "addu            %["#TEMP12"],  %["#TEMP12"],  %["#TEMP4"]  \n\t"            \
 704 -  "subu            %["#TEMP4"],   %[temp16],     %[temp17]    \n\t"            \
 705 -  "sra             %["#TEMP4"],   %["#TEMP4"],   4            \n\t"            \
 706 -  "addiu           %["#TEMP8"],   %["#TEMP8"],   30000        \n\t"            \
 707 -  "addiu           %["#TEMP12"],  %["#TEMP12"],  12000        \n\t"            \
 708 -  "addiu           %["#TEMP8"],   %["#TEMP8"],   21000        \n\t"            \
 709 -  "subu            %["#TEMP8"],   %["#TEMP8"],   %[temp18]    \n\t"            \
 710 -  "sra             %["#TEMP12"],  %["#TEMP12"],  16           \n\t"            \
 711 -  "sra             %["#TEMP8"],   %["#TEMP8"],   16           \n\t"            \
 712 -  "addiu           %[temp16],     %["#TEMP12"],  1            \n\t"            \
 713 -  "movn            %["#TEMP12"],  %[temp16],     %[temp19]    \n\t"            \
 714 -  "sh              %["#TEMP0"],   "#A"(%[temp20])             \n\t"            \
 715 -  "sh              %["#TEMP4"],   "#C"(%[temp20])             \n\t"            \
 716 -  "sh              %["#TEMP8"],   "#D"(%[temp20])             \n\t"            \
 717 -  "sh              %["#TEMP12"],  "#B"(%[temp20])             \n\t"
 718 +  "addu            %[" #TEMP0 "],   %[temp16],     %[temp17]    \n\t"            \
 719 +  "sra             %[" #TEMP0 "],   %[" #TEMP0 "],   4            \n\t"            \
 720 +  "addu            %[" #TEMP12 "],  %[" #TEMP12 "],  %[" #TEMP4 "]  \n\t"            \
 721 +  "subu            %[" #TEMP4 "],   %[temp16],     %[temp17]    \n\t"            \
 722 +  "sra             %[" #TEMP4 "],   %[" #TEMP4 "],   4            \n\t"            \
 723 +  "addiu           %[" #TEMP8 "],   %[" #TEMP8 "],   30000        \n\t"            \
 724 +  "addiu           %[" #TEMP12 "],  %[" #TEMP12 "],  12000        \n\t"            \
 725 +  "addiu           %[" #TEMP8 "],   %[" #TEMP8 "],   21000        \n\t"            \
 726 +  "subu            %[" #TEMP8 "],   %[" #TEMP8 "],   %[temp18]    \n\t"            \
 727 +  "sra             %[" #TEMP12 "],  %[" #TEMP12 "],  16           \n\t"            \
 728 +  "sra             %[" #TEMP8 "],   %[" #TEMP8 "],   16           \n\t"            \
 729 +  "addiu           %[temp16],     %[" #TEMP12 "],  1            \n\t"            \
 730 +  "movn            %[" #TEMP12 "],  %[temp16],     %[temp19]    \n\t"            \
 731 +  "sh              %[" #TEMP0 "],   " #A "(%[temp20])             \n\t"            \
 732 +  "sh              %[" #TEMP4 "],   " #C "(%[temp20])             \n\t"            \
 733 +  "sh              %[" #TEMP8 "],   " #D "(%[temp20])             \n\t"            \
 734 +  "sh              %[" #TEMP12 "],  " #B "(%[temp20])             \n\t"
 735
 736  static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
 737    const int c2217 = 2217;
 738 @@ -329,11 +329,11 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
 739  // Intra predictions
 740
 741  #define FILL_PART(J, SIZE)                                          \
 742 -    "usw        %[value],  0+"#J"*"XSTR(BPS)"(%[dst])    \n\t"      \
 743 -    "usw        %[value],  4+"#J"*"XSTR(BPS)"(%[dst])    \n\t"      \
 744 -  ".if "#SIZE" == 16                                     \n\t"      \
 745 -    "usw        %[value],  8+"#J"*"XSTR(BPS)"(%[dst])    \n\t"      \
 746 -    "usw        %[value], 12+"#J"*"XSTR(BPS)"(%[dst])    \n\t"      \
 747 +    "usw        %[value],  0+" #J "*"XSTR(BPS)"(%[dst])    \n\t"      \
 748 +    "usw        %[value],  4+" #J "*"XSTR(BPS)"(%[dst])    \n\t"      \
 749 +  ".if " #SIZE " == 16                                     \n\t"      \
 750 +    "usw        %[value],  8+" #J "*"XSTR(BPS)"(%[dst])    \n\t"      \
 751 +    "usw        %[value], 12+" #J "*"XSTR(BPS)"(%[dst])    \n\t"      \
 752    ".endif                                                \n\t"
 753
 754  #define FILL_8_OR_16(DST, VALUE, SIZE) do {                         \
 755 @@ -348,7 +348,7 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
 756      FILL_PART( 5, SIZE)                                             \
 757      FILL_PART( 6, SIZE)                                             \
 758      FILL_PART( 7, SIZE)                                             \
 759 -  ".if "#SIZE" == 16                                     \n\t"      \
 760 +  ".if " #SIZE " == 16                                     \n\t"      \
 761      FILL_PART( 8, 16)                                               \
 762      FILL_PART( 9, 16)                                               \
 763      FILL_PART(10, 16)                                               \
 764 @@ -425,7 +425,7 @@ HORIZONTAL_PRED(dst, left, 16)
 765      CLIPPING()                                                                 \
 766      "usw             %[temp0],   0(%[dst])               \n\t"                 \
 767      "usw             %[temp1],   4(%[dst])               \n\t"                 \
 768 -  ".if "#SIZE" == 16                                     \n\t"                 \
 769 +  ".if " #SIZE " == 16                                     \n\t"                 \
 770      "ulw             %[temp0],   8(%[top])               \n\t"                 \
 771      "ulw             %[temp1],   12(%[top])              \n\t"                 \
 772      CLIPPING()                                                                 \
 773 @@ -1060,8 +1060,8 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
 774  #if !defined(WORK_AROUND_GCC)
 775
 776  #define GET_SSE_INNER(A)                                                  \
 777 -  "lw               %[temp0],    "#A"(%[a])                    \n\t"      \
 778 -  "lw               %[temp1],    "#A"(%[b])                    \n\t"      \
 779 +  "lw               %[temp0],    " #A "(%[a])                    \n\t"      \
 780 +  "lw               %[temp1],    " #A "(%[b])                    \n\t"      \
 781    "preceu.ph.qbr    %[temp2],    %[temp0]                      \n\t"      \
 782    "preceu.ph.qbl    %[temp0],    %[temp0]                      \n\t"      \
 783    "preceu.ph.qbr    %[temp3],    %[temp1]                      \n\t"      \
 784 @@ -1185,28 +1185,28 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
 785  // N - offset in bytes (n * 2)
 786  // N1 - offset in bytes ((n + 1) * 2)
 787  #define QUANTIZE_ONE(J, K, N, N1)                                         \
 788 -  "ulw         %[temp1],     "#J"(%[ppin])                   \n\t"        \
 789 -  "ulw         %[temp2],     "#J"(%[ppsharpen])              \n\t"        \
 790 -  "lhu         %[temp3],     "#K"(%[ppzthresh])              \n\t"        \
 791 -  "lhu         %[temp6],     "#K"+4(%[ppzthresh])            \n\t"        \
 792 +  "ulw         %[temp1],     " #J "(%[ppin])                   \n\t"        \
 793 +  "ulw         %[temp2],     " #J "(%[ppsharpen])              \n\t"        \
 794 +  "lhu         %[temp3],     " #K "(%[ppzthresh])              \n\t"        \
 795 +  "lhu         %[temp6],     " #K "+4(%[ppzthresh])            \n\t"        \
 796    "absq_s.ph   %[temp4],     %[temp1]                        \n\t"        \
 797    "ins         %[temp3],     %[temp6],         16,       16  \n\t"        \
 798    "addu.ph     %[coeff],     %[temp4],         %[temp2]      \n\t"        \
 799    "shra.ph     %[sign],      %[temp1],         15            \n\t"        \
 800    "li          %[level],     0x10001                         \n\t"        \
 801    "cmp.lt.ph   %[temp3],     %[coeff]                        \n\t"        \
 802 -  "lhu         %[temp1],     "#J"(%[ppiq])                   \n\t"        \
 803 +  "lhu         %[temp1],     " #J "(%[ppiq])                   \n\t"        \
 804    "pick.ph     %[temp5],     %[level],         $0            \n\t"        \
 805 -  "lw          %[temp2],     "#K"(%[ppbias])                 \n\t"        \
 806 +  "lw          %[temp2],     " #K "(%[ppbias])                 \n\t"        \
 807    "beqz        %[temp5],     0f                              \n\t"        \
 808 -  "lhu         %[temp3],     "#J"(%[ppq])                    \n\t"        \
 809 +  "lhu         %[temp3],     " #J "(%[ppq])                    \n\t"        \
 810    "beq         %[temp5],     %[level],         1f            \n\t"        \
 811    "andi        %[temp5],     %[temp5],         0x1           \n\t"        \
 812    "andi        %[temp4],     %[coeff],         0xffff        \n\t"        \
 813    "beqz        %[temp5],     2f                              \n\t"        \
 814    "mul         %[level],     %[temp4],         %[temp1]      \n\t"        \
 815 -  "sh          $0,           "#J"+2(%[ppin])                 \n\t"        \
 816 -  "sh          $0,           "#N1"(%[pout])                  \n\t"        \
 817 +  "sh          $0,           " #J "+2(%[ppin])                 \n\t"        \
 818 +  "sh          $0,           " #N1 "(%[pout])                  \n\t"        \
 819    "addu        %[level],     %[level],         %[temp2]      \n\t"        \
 820    "sra         %[level],     %[level],         17            \n\t"        \
 821    "slt         %[temp4],     %[max_level],     %[level]      \n\t"        \
 822 @@ -1216,15 +1216,15 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
 823    "subu        %[level],     %[level],         %[temp6]      \n\t"        \
 824    "mul         %[temp5],     %[level],         %[temp3]      \n\t"        \
 825    "or          %[ret],       %[ret],           %[level]      \n\t"        \
 826 -  "sh          %[level],     "#N"(%[pout])                   \n\t"        \
 827 -  "sh          %[temp5],     "#J"(%[ppin])                   \n\t"        \
 828 +  "sh          %[level],     " #N "(%[pout])                   \n\t"        \
 829 +  "sh          %[temp5],     " #J "(%[ppin])                   \n\t"        \
 830    "j           3f                                            \n\t"        \
 831  "2:                                                          \n\t"        \
 832 -  "lhu         %[temp1],     "#J"+2(%[ppiq])                 \n\t"        \
 833 +  "lhu         %[temp1],     " #J "+2(%[ppiq])                 \n\t"        \
 834    "srl         %[temp5],     %[coeff],         16            \n\t"        \
 835    "mul         %[level],     %[temp5],         %[temp1]      \n\t"        \
 836 -  "lw          %[temp2],     "#K"+4(%[ppbias])               \n\t"        \
 837 -  "lhu         %[temp3],     "#J"+2(%[ppq])                  \n\t"        \
 838 +  "lw          %[temp2],     " #K "+4(%[ppbias])               \n\t"        \
 839 +  "lhu         %[temp3],     " #J "+2(%[ppq])                  \n\t"        \
 840    "addu        %[level],     %[level],         %[temp2]      \n\t"        \
 841    "sra         %[level],     %[level],         17            \n\t"        \
 842    "srl         %[temp6],     %[sign],          16            \n\t"        \
 843 @@ -1233,20 +1233,20 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
 844    "xor         %[level],     %[level],         %[temp6]      \n\t"        \
 845    "subu        %[level],     %[level],         %[temp6]      \n\t"        \
 846    "mul         %[temp5],     %[level],         %[temp3]      \n\t"        \
 847 -  "sh          $0,           "#J"(%[ppin])                   \n\t"        \
 848 -  "sh          $0,           "#N"(%[pout])                   \n\t"        \
 849 +  "sh          $0,           " #J "(%[ppin])                   \n\t"        \
 850 +  "sh          $0,           " #N "(%[pout])                   \n\t"        \
 851    "or          %[ret],       %[ret],           %[level]      \n\t"        \
 852 -  "sh          %[temp5],     "#J"+2(%[ppin])                 \n\t"        \
 853 -  "sh          %[level],     "#N1"(%[pout])                  \n\t"        \
 854 +  "sh          %[temp5],     " #J "+2(%[ppin])                 \n\t"        \
 855 +  "sh          %[level],     " #N1 "(%[pout])                  \n\t"        \
 856    "j           3f                                            \n\t"        \
 857  "1:                                                          \n\t"        \
 858 -  "lhu         %[temp1],     "#J"(%[ppiq])                   \n\t"        \
 859 -  "lw          %[temp2],     "#K"(%[ppbias])                 \n\t"        \
 860 -  "ulw         %[temp3],     "#J"(%[ppq])                    \n\t"        \
 861 +  "lhu         %[temp1],     " #J "(%[ppiq])                   \n\t"        \
 862 +  "lw          %[temp2],     " #K "(%[ppbias])                 \n\t"        \
 863 +  "ulw         %[temp3],     " #J "(%[ppq])                    \n\t"        \
 864    "andi        %[temp5],     %[coeff],         0xffff        \n\t"        \
 865    "srl         %[temp0],     %[coeff],         16            \n\t"        \
 866 -  "lhu         %[temp6],     "#J"+2(%[ppiq])                 \n\t"        \
 867 -  "lw          %[coeff],     "#K"+4(%[ppbias])               \n\t"        \
 868 +  "lhu         %[temp6],     " #J "+2(%[ppiq])                 \n\t"        \
 869 +  "lw          %[coeff],     " #K "+4(%[ppbias])               \n\t"        \
 870    "mul         %[level],     %[temp5],         %[temp1]      \n\t"        \
 871    "mul         %[temp4],     %[temp0],         %[temp6]      \n\t"        \
 872    "addu        %[level],     %[level],         %[temp2]      \n\t"        \
 873 @@ -1259,15 +1259,15 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
 874    "subu.ph     %[level],     %[level],         %[sign]       \n\t"        \
 875    "mul.ph      %[temp3],     %[level],         %[temp3]      \n\t"        \
 876    "or          %[ret],       %[ret],           %[level]      \n\t"        \
 877 -  "sh          %[level],     "#N"(%[pout])                   \n\t"        \
 878 +  "sh          %[level],     " #N "(%[pout])                   \n\t"        \
 879    "srl         %[level],     %[level],         16            \n\t"        \
 880 -  "sh          %[level],     "#N1"(%[pout])                  \n\t"        \
 881 -  "usw         %[temp3],     "#J"(%[ppin])                   \n\t"        \
 882 +  "sh          %[level],     " #N1 "(%[pout])                  \n\t"        \
 883 +  "usw         %[temp3],     " #J "(%[ppin])                   \n\t"        \
 884    "j           3f                                            \n\t"        \
 885  "0:                                                          \n\t"        \
 886 -  "sh          $0,           "#N"(%[pout])                   \n\t"        \
 887 -  "sh          $0,           "#N1"(%[pout])                  \n\t"        \
 888 -  "usw         $0,           "#J"(%[ppin])                   \n\t"        \
 889 +  "sh          $0,           " #N "(%[pout])                   \n\t"        \
 890 +  "sh          $0,           " #N1 "(%[pout])                  \n\t"        \
 891 +  "usw         $0,           " #J "(%[ppin])                   \n\t"        \
 892  "3:                                                          \n\t"
 893
 894  static int QuantizeBlock(int16_t in[16], int16_t out[16],
 895 @@ -1326,37 +1326,37 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
 896  // A, B, C, D - offset in bytes to load from in buffer
 897  // TEMP0, TEMP1 - registers for corresponding tmp elements
 898  #define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1)                          \
 899 -  "lh              %["#TEMP0"],  "#A"(%[in])                \n\t"              \
 900 -  "lh              %["#TEMP1"],  "#B"(%[in])                \n\t"              \
 901 -  "lh              %[temp8],     "#C"(%[in])                \n\t"              \
 902 -  "lh              %[temp9],     "#D"(%[in])                \n\t"              \
 903 -  "ins             %["#TEMP1"],  %["#TEMP0"],  16,  16      \n\t"              \
 904 +  "lh              %[" #TEMP0 "],  " #A "(%[in])                \n\t"              \
 905 +  "lh              %[" #TEMP1 "],  " #B "(%[in])                \n\t"              \
 906 +  "lh              %[temp8],     " #C "(%[in])                \n\t"              \
 907 +  "lh              %[temp9],     " #D "(%[in])                \n\t"              \
 908 +  "ins             %[" #TEMP1 "],  %[" #TEMP0 "],  16,  16      \n\t"              \
 909    "ins             %[temp9],     %[temp8],     16,  16      \n\t"              \
 910 -  "subq.ph         %[temp8],     %["#TEMP1"],  %[temp9]     \n\t"              \
 911 -  "addq.ph         %[temp9],     %["#TEMP1"],  %[temp9]     \n\t"              \
 912 -  "precrq.ph.w     %["#TEMP0"],  %[temp8],     %[temp9]     \n\t"              \
 913 +  "subq.ph         %[temp8],     %[" #TEMP1 "],  %[temp9]     \n\t"              \
 914 +  "addq.ph         %[temp9],     %[" #TEMP1 "],  %[temp9]     \n\t"              \
 915 +  "precrq.ph.w     %[" #TEMP0 "],  %[temp8],     %[temp9]     \n\t"              \
 916    "append          %[temp8],     %[temp9],     16           \n\t"              \
 917 -  "subq.ph         %["#TEMP1"],  %["#TEMP0"],  %[temp8]     \n\t"              \
 918 -  "addq.ph         %["#TEMP0"],  %["#TEMP0"],  %[temp8]     \n\t"              \
 919 -  "rotr            %["#TEMP1"],  %["#TEMP1"],  16           \n\t"
 920 +  "subq.ph         %[" #TEMP1 "],  %[" #TEMP0 "],  %[temp8]     \n\t"              \
 921 +  "addq.ph         %[" #TEMP0 "],  %[" #TEMP0 "],  %[temp8]     \n\t"              \
 922 +  "rotr            %[" #TEMP1 "],  %[" #TEMP1 "],  16           \n\t"
 923
 924  // macro for one vertical pass in FTransformWHT
 925  // temp0..temp7 holds tmp[0]..tmp[15]
 926  // A, B, C, D - offsets in bytes to store to out buffer
 927  // TEMP0, TEMP2, TEMP4 and TEMP6 - registers for corresponding tmp elements
 928  #define VERTICAL_PASS_WHT(A, B, C, D, TEMP0, TEMP2, TEMP4, TEMP6)              \
 929 -  "addq.ph         %[temp8],     %["#TEMP0"],  %["#TEMP4"]  \n\t"              \
 930 -  "addq.ph         %[temp9],     %["#TEMP2"],  %["#TEMP6"]  \n\t"              \
 931 -  "subq.ph         %["#TEMP2"],  %["#TEMP2"],  %["#TEMP6"]  \n\t"              \
 932 -  "subq.ph         %["#TEMP6"],  %["#TEMP0"],  %["#TEMP4"]  \n\t"              \
 933 -  "addqh.ph        %["#TEMP0"],  %[temp8],     %[temp9]     \n\t"              \
 934 -  "subqh.ph        %["#TEMP4"],  %["#TEMP6"],  %["#TEMP2"]  \n\t"              \
 935 -  "addqh.ph        %["#TEMP2"],  %["#TEMP2"],  %["#TEMP6"]  \n\t"              \
 936 -  "subqh.ph        %["#TEMP6"],  %[temp8],     %[temp9]     \n\t"              \
 937 -  "usw             %["#TEMP0"],  "#A"(%[out])               \n\t"              \
 938 -  "usw             %["#TEMP2"],  "#B"(%[out])               \n\t"              \
 939 -  "usw             %["#TEMP4"],  "#C"(%[out])               \n\t"              \
 940 -  "usw             %["#TEMP6"],  "#D"(%[out])               \n\t"
 941 +  "addq.ph         %[temp8],     %[" #TEMP0 "],  %[" #TEMP4 "]  \n\t"              \
 942 +  "addq.ph         %[temp9],     %[" #TEMP2 "],  %[" #TEMP6 "]  \n\t"              \
 943 +  "subq.ph         %[" #TEMP2 "],  %[" #TEMP2 "],  %[" #TEMP6 "]  \n\t"              \
 944 +  "subq.ph         %[" #TEMP6 "],  %[" #TEMP0 "],  %[" #TEMP4 "]  \n\t"              \
 945 +  "addqh.ph        %[" #TEMP0 "],  %[temp8],     %[temp9]     \n\t"              \
 946 +  "subqh.ph        %[" #TEMP4 "],  %[" #TEMP6 "],  %[" #TEMP2 "]  \n\t"              \
 947 +  "addqh.ph        %[" #TEMP2 "],  %[" #TEMP2 "],  %[" #TEMP6 "]  \n\t"              \
 948 +  "subqh.ph        %[" #TEMP6 "],  %[temp8],     %[temp9]     \n\t"              \
 949 +  "usw             %[" #TEMP0 "],  " #A "(%[out])               \n\t"              \
 950 +  "usw             %[" #TEMP2 "],  " #B "(%[out])               \n\t"              \
 951 +  "usw             %[" #TEMP4 "],  " #C "(%[out])               \n\t"              \
 952 +  "usw             %[" #TEMP6 "],  " #D "(%[out])               \n\t"
 953
 954  static void FTransformWHT(const int16_t* in, int16_t* out) {
 955    int temp0, temp1, temp2, temp3, temp4;
 956 @@ -1385,10 +1385,10 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
 957  // convert 8 coeffs at time
 958  // A, B, C, D - offsets in bytes to load from out buffer
 959  #define CONVERT_COEFFS_TO_BIN(A, B, C, D)                                      \
 960 -  "ulw        %[temp0],  "#A"(%[out])                  \n\t"                   \
 961 -  "ulw        %[temp1],  "#B"(%[out])                  \n\t"                   \
 962 -  "ulw        %[temp2],  "#C"(%[out])                  \n\t"                   \
 963 -  "ulw        %[temp3],  "#D"(%[out])                  \n\t"                   \
 964 +  "ulw        %[temp0],  " #A "(%[out])                  \n\t"                   \
 965 +  "ulw        %[temp1],  " #B "(%[out])                  \n\t"                   \
 966 +  "ulw        %[temp2],  " #C "(%[out])                  \n\t"                   \
 967 +  "ulw        %[temp3],  " #D "(%[out])                  \n\t"                   \
 968    "absq_s.ph  %[temp0],  %[temp0]                      \n\t"                   \
 969    "absq_s.ph  %[temp1],  %[temp1]                      \n\t"                   \
 970    "absq_s.ph  %[temp2],  %[temp2]                      \n\t"                   \
 971 diff --git a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
 972 index 6c34efb..6a1f8f4 100644
 973 --- a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
 974 +++ b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
 975 @@ -48,7 +48,7 @@
 976        "srl       %[temp0],    %[length],    0x2         \n\t"                  \
 977        "beqz      %[temp0],    4f                        \n\t"                  \
 978        " andi     %[temp6],    %[length],    0x3         \n\t"                  \
 979 -    ".if "#INVERSE"                                     \n\t"                  \
 980 +    ".if " #INVERSE "                                     \n\t"                  \
 981        "lbu       %[temp1],    -1(%[src])                \n\t"                  \
 982      "1:                                                 \n\t"                  \
 983        "lbu       %[temp2],    0(%[src])                 \n\t"                  \
 984 @@ -84,7 +84,7 @@
 985        "lbu       %[temp1],    -1(%[src])                \n\t"                  \
 986        "lbu       %[temp2],    0(%[src])                 \n\t"                  \
 987        "addiu     %[src],      %[src],       1           \n\t"                  \
 988 -    ".if "#INVERSE"                                     \n\t"                  \
 989 +    ".if " #INVERSE "                                     \n\t"                  \
 990        "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
 991        "sb        %[temp3],    -1(%[src])                \n\t"                  \
 992      ".else                                              \n\t"                  \
 993 @@ -131,7 +131,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
 994        "ulw       %[temp3],    4(%[src])                 \n\t"                  \
 995        "ulw       %[temp4],    4(%[pred])                \n\t"                  \
 996        "addiu     %[src],      %[src],       8           \n\t"                  \
 997 -    ".if "#INVERSE"                                     \n\t"                  \
 998 +    ".if " #INVERSE "                                     \n\t"                  \
 999        "addu.qb   %[temp5],    %[temp1],     %[temp2]    \n\t"                  \
1000        "addu.qb   %[temp6],    %[temp3],     %[temp4]    \n\t"                  \
1001      ".else                                              \n\t"                  \
1002 @@ -152,7 +152,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
1003        "lbu       %[temp2],    0(%[pred])                \n\t"                  \
1004        "addiu     %[src],      %[src],       1           \n\t"                  \
1005        "addiu     %[pred],     %[pred],      1           \n\t"                  \
1006 -    ".if "#INVERSE"                                     \n\t"                  \
1007 +    ".if " #INVERSE "                                     \n\t"                  \
1008        "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
1009      ".else                                              \n\t"                  \
1010        "subu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
1011 @@ -177,7 +177,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
1012      __asm__ volatile (                                                         \
1013        "lbu       %[temp1],   0(%[src])               \n\t"                     \
1014        "lbu       %[temp2],   0(%[pred])              \n\t"                     \
1015 -    ".if "#INVERSE"                                  \n\t"                     \
1016 +    ".if " #INVERSE "                                  \n\t"                     \
1017        "addu      %[temp3],   %[temp1],   %[temp2]    \n\t"                     \
1018      ".else                                           \n\t"                     \
1019        "subu      %[temp3],   %[temp1],   %[temp2]    \n\t"                     \
1020 diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
1021 index 68fbe85..abe97c1 100644
1022 --- a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
1023 +++ b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
1024 @@ -285,28 +285,28 @@ static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
1025  // literal_ and successive histograms could be unaligned
1026  // so we must use ulw and usw
1027  #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2)           \
1028 -    "ulw    %[temp0], "#A"(%["#P0"])        \n\t"       \
1029 -    "ulw    %[temp1], "#B"(%["#P0"])        \n\t"       \
1030 -    "ulw    %[temp2], "#C"(%["#P0"])        \n\t"       \
1031 -    "ulw    %[temp3], "#D"(%["#P0"])        \n\t"       \
1032 -    "ulw    %[temp4], "#A"(%["#P1"])        \n\t"       \
1033 -    "ulw    %[temp5], "#B"(%["#P1"])        \n\t"       \
1034 -    "ulw    %[temp6], "#C"(%["#P1"])        \n\t"       \
1035 -    "ulw    %[temp7], "#D"(%["#P1"])        \n\t"       \
1036 +    "ulw    %[temp0], " #A "(%[" #P0 "])        \n\t"       \
1037 +    "ulw    %[temp1], " #B "(%[" #P0 "])        \n\t"       \
1038 +    "ulw    %[temp2], " #C "(%[" #P0 "])        \n\t"       \
1039 +    "ulw    %[temp3], " #D "(%[" #P0 "])        \n\t"       \
1040 +    "ulw    %[temp4], " #A "(%[" #P1 "])        \n\t"       \
1041 +    "ulw    %[temp5], " #B "(%[" #P1 "])        \n\t"       \
1042 +    "ulw    %[temp6], " #C "(%[" #P1 "])        \n\t"       \
1043 +    "ulw    %[temp7], " #D "(%[" #P1 "])        \n\t"       \
1044      "addu   %[temp4], %[temp4],   %[temp0]  \n\t"       \
1045      "addu   %[temp5], %[temp5],   %[temp1]  \n\t"       \
1046      "addu   %[temp6], %[temp6],   %[temp2]  \n\t"       \
1047      "addu   %[temp7], %[temp7],   %[temp3]  \n\t"       \
1048 -    "addiu  %["#P0"],  %["#P0"],  16        \n\t"       \
1049 -  ".if "#E" == 1                            \n\t"       \
1050 -    "addiu  %["#P1"],  %["#P1"],  16        \n\t"       \
1051 +    "addiu  %[" #P0 "],  %[" #P0 "],  16        \n\t"       \
1052 +  ".if " #E " == 1                            \n\t"       \
1053 +    "addiu  %[" #P1 "],  %[" #P1 "],  16        \n\t"       \
1054    ".endif                                   \n\t"       \
1055 -    "usw    %[temp4], "#A"(%["#P2"])        \n\t"       \
1056 -    "usw    %[temp5], "#B"(%["#P2"])        \n\t"       \
1057 -    "usw    %[temp6], "#C"(%["#P2"])        \n\t"       \
1058 -    "usw    %[temp7], "#D"(%["#P2"])        \n\t"       \
1059 -    "addiu  %["#P2"], %["#P2"],   16        \n\t"       \
1060 -    "bne    %["#P0"], %[LoopEnd], 1b        \n\t"       \
1061 +    "usw    %[temp4], " #A "(%[" #P2 "])        \n\t"       \
1062 +    "usw    %[temp5], " #B "(%[" #P2 "])        \n\t"       \
1063 +    "usw    %[temp6], " #C "(%[" #P2 "])        \n\t"       \
1064 +    "usw    %[temp7], " #D "(%[" #P2 "])        \n\t"       \
1065 +    "addiu  %[" #P2 "], %[" #P2 "],   16        \n\t"       \
1066 +    "bne    %[" #P0 "], %[LoopEnd], 1b        \n\t"       \
1067      ".set   pop                             \n\t"       \
1068
1069  #define ASM_END_COMMON_0                                \
1070 diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
1071 index 821cda9..31ac181 100644
1072 --- a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
1073 +++ b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
1074 @@ -29,14 +29,14 @@ static void FUNC_NAME(const TYPE* src,                                         \
1075      for (x = 0; x < (width >> 2); ++x) {                                       \
1076        int tmp1, tmp2, tmp3, tmp4;                                              \
1077        __asm__ volatile (                                                       \
1078 -      ".ifc        "#TYPE",  uint8_t                    \n\t"                  \
1079 +      ".ifc        " #TYPE ",  uint8_t                    \n\t"                  \
1080          "lbu       %[tmp1],  0(%[src])                  \n\t"                  \
1081          "lbu       %[tmp2],  1(%[src])                  \n\t"                  \
1082          "lbu       %[tmp3],  2(%[src])                  \n\t"                  \
1083          "lbu       %[tmp4],  3(%[src])                  \n\t"                  \
1084          "addiu     %[src],   %[src],      4             \n\t"                  \
1085        ".endif                                           \n\t"                  \
1086 -      ".ifc        "#TYPE",  uint32_t                   \n\t"                  \
1087 +      ".ifc        " #TYPE ",  uint32_t                   \n\t"                  \
1088          "lw        %[tmp1],  0(%[src])                  \n\t"                  \
1089          "lw        %[tmp2],  4(%[src])                  \n\t"                  \
1090          "lw        %[tmp3],  8(%[src])                  \n\t"                  \
1091 @@ -55,7 +55,7 @@ static void FUNC_NAME(const TYPE* src,                                         \
1092          "lwx       %[tmp2],  %[tmp2](%[color_map])      \n\t"                  \
1093          "lwx       %[tmp3],  %[tmp3](%[color_map])      \n\t"                  \
1094          "lwx       %[tmp4],  %[tmp4](%[color_map])      \n\t"                  \
1095 -      ".ifc        "#TYPE",  uint8_t                    \n\t"                  \
1096 +      ".ifc        " #TYPE ",  uint8_t                    \n\t"                  \
1097          "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
1098          "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
1099          "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
1100 @@ -66,7 +66,7 @@ static void FUNC_NAME(const TYPE* src,                                         \
1101          "sb        %[tmp4],  3(%[dst])                  \n\t"                  \
1102          "addiu     %[dst],   %[dst],      4             \n\t"                  \
1103        ".endif                                           \n\t"                  \
1104 -      ".ifc        "#TYPE",  uint32_t                   \n\t"                  \
1105 +      ".ifc        " #TYPE ",  uint32_t                   \n\t"                  \
1106          "sw        %[tmp1],  0(%[dst])                  \n\t"                  \
1107          "sw        %[tmp2],  4(%[dst])                  \n\t"                  \
1108          "sw        %[tmp3],  8(%[dst])                  \n\t"                  \
1109 diff --git a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
1110 index a7864a0..cb3adfe 100644
1111 --- a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
1112 +++ b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
1113 @@ -34,15 +34,15 @@
1114      G = G - t2 + kGCst;                                                        \
1115      B = B + kBCst;                                                             \
1116      __asm__ volatile (                                                         \
1117 -      "shll_s.w         %["#R"],      %["#R"],        9              \n\t"     \
1118 -      "shll_s.w         %["#G"],      %["#G"],        9              \n\t"     \
1119 -      "shll_s.w         %["#B"],      %["#B"],        9              \n\t"     \
1120 -      "precrqu_s.qb.ph  %["#R"],      %["#R"],        $zero          \n\t"     \
1121 -      "precrqu_s.qb.ph  %["#G"],      %["#G"],        $zero          \n\t"     \
1122 -      "precrqu_s.qb.ph  %["#B"],      %["#B"],        $zero          \n\t"     \
1123 -      "srl              %["#R"],      %["#R"],        24             \n\t"     \
1124 -      "srl              %["#G"],      %["#G"],        24             \n\t"     \
1125 -      "srl              %["#B"],      %["#B"],        24             \n\t"     \
1126 +      "shll_s.w         %[" #R "],      %[" #R "],        9              \n\t"     \
1127 +      "shll_s.w         %[" #G "],      %[" #G "],        9              \n\t"     \
1128 +      "shll_s.w         %[" #B "],      %[" #B "],        9              \n\t"     \
1129 +      "precrqu_s.qb.ph  %[" #R "],      %[" #R "],        $zero          \n\t"     \
1130 +      "precrqu_s.qb.ph  %[" #G "],      %[" #G "],        $zero          \n\t"     \
1131 +      "precrqu_s.qb.ph  %[" #B "],      %[" #B "],        $zero          \n\t"     \
1132 +      "srl              %[" #R "],      %[" #R "],        24             \n\t"     \
1133 +      "srl              %[" #G "],      %[" #G "],        24             \n\t"     \
1134 +      "srl              %[" #B "],      %[" #B "],        24             \n\t"     \
1135        : [R]"+r"(R), [G]"+r"(G), [B]"+r"(B)                                     \
1136        :                                                                        \
1137      );                                                                         \
1138 diff --git a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
1139 index 66adde5..51cbe9e 100644
1140 --- a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
1141 +++ b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
1142 @@ -39,12 +39,12 @@
1143    "addu             %[temp5],   %[temp0],       %[temp1]        \n\t"          \
1144    "subu             %[temp6],   %[temp0],       %[temp2]        \n\t"          \
1145    "addu             %[temp7],   %[temp0],       %[temp4]        \n\t"          \
1146 -".if "#K"                                                       \n\t"          \
1147 +".if " #K "                                                       \n\t"          \
1148    "lbu              %[temp0],   1(%[y])                         \n\t"          \
1149  ".endif                                                         \n\t"          \
1150    "shll_s.w         %[temp5],   %[temp5],       9               \n\t"          \
1151    "shll_s.w         %[temp6],   %[temp6],       9               \n\t"          \
1152 -".if "#K"                                                       \n\t"          \
1153 +".if " #K "                                                       \n\t"          \
1154    "mul              %[temp0],   %[t_con_5],     %[temp0]        \n\t"          \
1155  ".endif                                                         \n\t"          \
1156    "shll_s.w         %[temp7],   %[temp7],       9               \n\t"          \
1157 @@ -54,9 +54,9 @@
1158    "srl              %[temp5],   %[temp5],       24              \n\t"          \
1159    "srl              %[temp6],   %[temp6],       24              \n\t"          \
1160    "srl              %[temp7],   %[temp7],       24              \n\t"          \
1161 -  "sb               %[temp5],   "#R"(%[dst])                    \n\t"          \
1162 -  "sb               %[temp6],   "#G"(%[dst])                    \n\t"          \
1163 -  "sb               %[temp7],   "#B"(%[dst])                    \n\t"          \
1164 +  "sb               %[temp5],   " #R "(%[dst])                    \n\t"          \
1165 +  "sb               %[temp6],   " #G "(%[dst])                    \n\t"          \
1166 +  "sb               %[temp7],   " #B "(%[dst])                    \n\t"          \
1167
1168  #define ASM_CLOBBER_LIST()                                                     \
1169    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),             \
1170 --
1171 2.1.4
1172