1 From 44bce1b66c1cdd5308ac3ac773ea0a53d83790fd Mon Sep 17 00:00:00 2001
2 From: Peter Korsgaard <peter@korsgaard.com>
3 Date: Tue, 24 Nov 2015 21:16:39 +0100
4 Subject: [PATCH] LibWebP: fix compilation issue with GCC 5.x / C++11
6 GCC 5.1 / C++11 gets confused about the "#<TEXT>" in the inline assembly
7 code, and dies with errors like:
9 Source/LibWebP/./src/dsp/dsp.upsampling_mips_dsp_r2.c:37:34: error: invalid
10 character ' ' in raw string delimiter
12 Fix it by introducting white space around the string literals like it has
13 been done in upstream webp:
15 https://chromium.googlesource.com/webm/libwebp/+/eebaf97f5a1cb713d81d311308d8a48c124e5aef
18 http://sourceforge.net/p/freeimage/discussion/36110/thread/605ef8e4/
20 [Scripted by sed -i 's/"\(#[A-Z0-9]*\)"/" \1 "/g' *.c]
21 Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
23 Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c | 28 +-
24 Source/LibWebP/src/dsp/dsp.enc_mips32.c | 314 ++++++++++-----------
25 Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c | 288 +++++++++----------
26 Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c | 10 +-
27 Source/LibWebP/src/dsp/dsp.lossless_mips32.c | 34 +--
28 Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c | 8 +-
29 .../LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c | 18 +-
30 Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c | 10 +-
31 8 files changed, 355 insertions(+), 355 deletions(-)
33 diff --git a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
34 index dac2c93..aaa8111 100644
35 --- a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
36 +++ b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
37 @@ -548,10 +548,10 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
38 // TEMP3 = SRC[D + D1 * BPS]
39 #define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3, \
40 A, A1, B, B1, C, C1, D, D1, SRC) \
41 - "lbu %["#TEMP0"], "#A"+"#A1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
42 - "lbu %["#TEMP1"], "#B"+"#B1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
43 - "lbu %["#TEMP2"], "#C"+"#C1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
44 - "lbu %["#TEMP3"], "#D"+"#D1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
45 + "lbu %[" #TEMP0 "], " #A "+" #A1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
46 + "lbu %[" #TEMP1 "], " #B "+" #B1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
47 + "lbu %[" #TEMP2 "], " #C "+" #C1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
48 + "lbu %[" #TEMP3 "], " #D "+" #D1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
50 static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
52 @@ -623,8 +623,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
53 // DST[A * BPS] = TEMP0
54 // DST[B + C * BPS] = TEMP1
55 #define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST) \
56 - "usw %["#TEMP0"], "#A"*"XSTR(BPS)"(%["#DST"]) \n\t" \
57 - "usw %["#TEMP1"], "#B"+"#C"*"XSTR(BPS)"(%["#DST"]) \n\t"
58 + "usw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #DST "]) \n\t" \
59 + "usw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #DST "]) \n\t"
61 static void VE4(uint8_t* dst) { // vertical
62 const uint8_t* top = dst - BPS;
63 @@ -725,8 +725,8 @@ static void RD4(uint8_t* dst) { // Down-right
64 // TEMP0 = SRC[A * BPS]
65 // TEMP1 = SRC[B + C * BPS]
66 #define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC) \
67 - "ulw %["#TEMP0"], "#A"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
68 - "ulw %["#TEMP1"], "#B"+"#C"*"XSTR(BPS)"(%["#SRC"]) \n\t"
69 + "ulw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
70 + "ulw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #SRC "]) \n\t"
72 static void LD4(uint8_t* dst) { // Down-Left
73 int temp0, temp1, temp2, temp3, temp4;
74 @@ -873,24 +873,24 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
75 #define CLIPPING(SIZE) \
76 "preceu.ph.qbl %[temp2], %[temp0] \n\t" \
77 "preceu.ph.qbr %[temp0], %[temp0] \n\t" \
78 -".if "#SIZE" == 8 \n\t" \
79 +".if " #SIZE " == 8 \n\t" \
80 "preceu.ph.qbl %[temp3], %[temp1] \n\t" \
81 "preceu.ph.qbr %[temp1], %[temp1] \n\t" \
83 "addu.ph %[temp2], %[temp2], %[dst_1] \n\t" \
84 "addu.ph %[temp0], %[temp0], %[dst_1] \n\t" \
85 -".if "#SIZE" == 8 \n\t" \
86 +".if " #SIZE " == 8 \n\t" \
87 "addu.ph %[temp3], %[temp3], %[dst_1] \n\t" \
88 "addu.ph %[temp1], %[temp1], %[dst_1] \n\t" \
90 "shll_s.ph %[temp2], %[temp2], 7 \n\t" \
91 "shll_s.ph %[temp0], %[temp0], 7 \n\t" \
92 -".if "#SIZE" == 8 \n\t" \
93 +".if " #SIZE " == 8 \n\t" \
94 "shll_s.ph %[temp3], %[temp3], 7 \n\t" \
95 "shll_s.ph %[temp1], %[temp1], 7 \n\t" \
97 "precrqu_s.qb.ph %[temp0], %[temp2], %[temp0] \n\t" \
98 -".if "#SIZE" == 8 \n\t" \
99 +".if " #SIZE " == 8 \n\t" \
100 "precrqu_s.qb.ph %[temp1], %[temp3], %[temp1] \n\t" \
103 @@ -899,7 +899,7 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
104 int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1]; \
105 int temp0, temp1, temp2, temp3; \
107 - ".if "#SIZE" < 8 \n\t" \
108 + ".if " #SIZE " < 8 \n\t" \
109 "ulw %[temp0], 0(%[top]) \n\t" \
110 "subu.ph %[dst_1], %[dst_1], %[top_1] \n\t" \
112 @@ -911,7 +911,7 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
114 "usw %[temp0], 0(%[dst]) \n\t" \
115 "usw %[temp1], 4(%[dst]) \n\t" \
116 - ".if "#SIZE" == 16 \n\t" \
117 + ".if " #SIZE " == 16 \n\t" \
118 "ulw %[temp0], 8(%[top]) \n\t" \
119 "ulw %[temp1], 12(%[top]) \n\t" \
121 diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips32.c b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
122 index 545aa3a..bf1c16d 100644
123 --- a/Source/LibWebP/src/dsp/dsp.enc_mips32.c
124 +++ b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
125 @@ -31,26 +31,26 @@ static const int kC2 = 35468;
126 // TEMP0..TEMP3 - registers for corresponding tmp elements
127 // TEMP4..TEMP5 - temporary registers
128 #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
129 - "lh %[temp16], "#A"(%[temp20]) \n\t" \
130 - "lh %[temp18], "#B"(%[temp20]) \n\t" \
131 - "lh %[temp17], "#C"(%[temp20]) \n\t" \
132 - "lh %[temp19], "#D"(%[temp20]) \n\t" \
133 - "addu %["#TEMP4"], %[temp16], %[temp18] \n\t" \
134 + "lh %[temp16], " #A "(%[temp20]) \n\t" \
135 + "lh %[temp18], " #B "(%[temp20]) \n\t" \
136 + "lh %[temp17], " #C "(%[temp20]) \n\t" \
137 + "lh %[temp19], " #D "(%[temp20]) \n\t" \
138 + "addu %[" #TEMP4 "], %[temp16], %[temp18] \n\t" \
139 "subu %[temp16], %[temp16], %[temp18] \n\t" \
140 - "mul %["#TEMP0"], %[temp17], %[kC2] \n\t" \
141 + "mul %[" #TEMP0 "], %[temp17], %[kC2] \n\t" \
142 "mul %[temp18], %[temp19], %[kC1] \n\t" \
143 "mul %[temp17], %[temp17], %[kC1] \n\t" \
144 "mul %[temp19], %[temp19], %[kC2] \n\t" \
145 - "sra %["#TEMP0"], %["#TEMP0"], 16 \n\n" \
146 + "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\n" \
147 "sra %[temp18], %[temp18], 16 \n\n" \
148 "sra %[temp17], %[temp17], 16 \n\n" \
149 "sra %[temp19], %[temp19], 16 \n\n" \
150 - "subu %["#TEMP2"], %["#TEMP0"], %[temp18] \n\t" \
151 - "addu %["#TEMP3"], %[temp17], %[temp19] \n\t" \
152 - "addu %["#TEMP0"], %["#TEMP4"], %["#TEMP3"] \n\t" \
153 - "addu %["#TEMP1"], %[temp16], %["#TEMP2"] \n\t" \
154 - "subu %["#TEMP2"], %[temp16], %["#TEMP2"] \n\t" \
155 - "subu %["#TEMP3"], %["#TEMP4"], %["#TEMP3"] \n\t"
156 + "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp18] \n\t" \
157 + "addu %[" #TEMP3 "], %[temp17], %[temp19] \n\t" \
158 + "addu %[" #TEMP0 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" \
159 + "addu %[" #TEMP1 "], %[temp16], %[" #TEMP2 "] \n\t" \
160 + "subu %[" #TEMP2 "], %[temp16], %[" #TEMP2 "] \n\t" \
161 + "subu %[" #TEMP3 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t"
163 // macro for one horizontal pass in ITransformOne
164 // MUL and STORE macros inlined
165 @@ -59,58 +59,58 @@ static const int kC2 = 35468;
166 // A - offset in bytes to load from ref and store to dst buffer
167 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
168 #define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12) \
169 - "addiu %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
170 - "addu %[temp16], %["#TEMP0"], %["#TEMP8"] \n\t" \
171 - "subu %[temp17], %["#TEMP0"], %["#TEMP8"] \n\t" \
172 - "mul %["#TEMP0"], %["#TEMP4"], %[kC2] \n\t" \
173 - "mul %["#TEMP8"], %["#TEMP12"], %[kC1] \n\t" \
174 - "mul %["#TEMP4"], %["#TEMP4"], %[kC1] \n\t" \
175 - "mul %["#TEMP12"], %["#TEMP12"], %[kC2] \n\t" \
176 - "sra %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
177 - "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
178 - "sra %["#TEMP4"], %["#TEMP4"], 16 \n\t" \
179 - "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
180 - "subu %[temp18], %["#TEMP0"], %["#TEMP8"] \n\t" \
181 - "addu %[temp19], %["#TEMP4"], %["#TEMP12"] \n\t" \
182 - "addu %["#TEMP0"], %[temp16], %[temp19] \n\t" \
183 - "addu %["#TEMP4"], %[temp17], %[temp18] \n\t" \
184 - "subu %["#TEMP8"], %[temp17], %[temp18] \n\t" \
185 - "subu %["#TEMP12"], %[temp16], %[temp19] \n\t" \
186 + "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
187 + "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
188 + "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
189 + "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \
190 + "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \
191 + "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \
192 + "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \
193 + "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
194 + "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
195 + "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \
196 + "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
197 + "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
198 + "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \
199 + "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \
200 + "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \
201 + "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \
202 + "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
203 "lw %[temp20], 0(%[args]) \n\t" \
204 - "sra %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
205 - "sra %["#TEMP4"], %["#TEMP4"], 3 \n\t" \
206 - "sra %["#TEMP8"], %["#TEMP8"], 3 \n\t" \
207 - "sra %["#TEMP12"], %["#TEMP12"], 3 \n\t" \
208 - "lbu %[temp16], 0+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
209 - "lbu %[temp17], 1+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
210 - "lbu %[temp18], 2+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
211 - "lbu %[temp19], 3+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
212 - "addu %["#TEMP0"], %[temp16], %["#TEMP0"] \n\t" \
213 - "addu %["#TEMP4"], %[temp17], %["#TEMP4"] \n\t" \
214 - "addu %["#TEMP8"], %[temp18], %["#TEMP8"] \n\t" \
215 - "addu %["#TEMP12"], %[temp19], %["#TEMP12"] \n\t" \
216 - "slt %[temp16], %["#TEMP0"], $zero \n\t" \
217 - "slt %[temp17], %["#TEMP4"], $zero \n\t" \
218 - "slt %[temp18], %["#TEMP8"], $zero \n\t" \
219 - "slt %[temp19], %["#TEMP12"], $zero \n\t" \
220 - "movn %["#TEMP0"], $zero, %[temp16] \n\t" \
221 - "movn %["#TEMP4"], $zero, %[temp17] \n\t" \
222 - "movn %["#TEMP8"], $zero, %[temp18] \n\t" \
223 - "movn %["#TEMP12"], $zero, %[temp19] \n\t" \
224 + "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
225 + "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
226 + "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
227 + "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
228 + "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
229 + "lbu %[temp17], 1+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
230 + "lbu %[temp18], 2+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
231 + "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
232 + "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
233 + "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
234 + "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
235 + "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \
236 + "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \
237 + "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \
238 + "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \
239 + "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \
240 + "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \
241 + "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \
242 + "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \
243 + "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \
244 "addiu %[temp20], $zero, 255 \n\t" \
245 - "slt %[temp16], %["#TEMP0"], %[temp20] \n\t" \
246 - "slt %[temp17], %["#TEMP4"], %[temp20] \n\t" \
247 - "slt %[temp18], %["#TEMP8"], %[temp20] \n\t" \
248 - "slt %[temp19], %["#TEMP12"], %[temp20] \n\t" \
249 - "movz %["#TEMP0"], %[temp20], %[temp16] \n\t" \
250 - "movz %["#TEMP4"], %[temp20], %[temp17] \n\t" \
251 + "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \
252 + "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \
253 + "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \
254 + "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \
255 + "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \
256 + "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \
257 "lw %[temp16], 8(%[args]) \n\t" \
258 - "movz %["#TEMP8"], %[temp20], %[temp18] \n\t" \
259 - "movz %["#TEMP12"], %[temp20], %[temp19] \n\t" \
260 - "sb %["#TEMP0"], 0+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
261 - "sb %["#TEMP4"], 1+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
262 - "sb %["#TEMP8"], 2+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
263 - "sb %["#TEMP12"], 3+"XSTR(BPS)"*"#A"(%[temp16]) \n\t"
264 + "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
265 + "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
266 + "sb %[" #TEMP0 "], 0+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
267 + "sb %[" #TEMP4 "], 1+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
268 + "sb %[" #TEMP8 "], 2+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
269 + "sb %[" #TEMP12 "], 3+"XSTR(BPS)"*" #A "(%[temp16]) \n\t"
271 // Does one or two inverse transforms.
272 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
273 @@ -161,9 +161,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
274 // K - offset in bytes (kZigzag[n] * 4)
275 // N - offset in bytes (n * 2)
276 #define QUANTIZE_ONE(J, K, N) \
277 - "lh %[temp0], "#J"(%[ppin]) \n\t" \
278 - "lhu %[temp1], "#J"(%[ppsharpen]) \n\t" \
279 - "lw %[temp2], "#K"(%[ppzthresh]) \n\t" \
280 + "lh %[temp0], " #J "(%[ppin]) \n\t" \
281 + "lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \
282 + "lw %[temp2], " #K "(%[ppzthresh]) \n\t" \
283 "sra %[sign], %[temp0], 15 \n\t" \
284 "xor %[coeff], %[temp0], %[sign] \n\t" \
285 "subu %[coeff], %[coeff], %[sign] \n\t" \
286 @@ -172,9 +172,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
287 "addiu %[temp5], $zero, 0 \n\t" \
288 "addiu %[level], $zero, 0 \n\t" \
289 "beqz %[temp4], 2f \n\t" \
290 - "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
291 - "lw %[temp2], "#K"(%[ppbias]) \n\t" \
292 - "lhu %[temp3], "#J"(%[ppq]) \n\t" \
293 + "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
294 + "lw %[temp2], " #K "(%[ppbias]) \n\t" \
295 + "lhu %[temp3], " #J "(%[ppq]) \n\t" \
296 "mul %[level], %[coeff], %[temp1] \n\t" \
297 "addu %[level], %[level], %[temp2] \n\t" \
298 "sra %[level], %[level], 17 \n\t" \
299 @@ -184,8 +184,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
300 "subu %[level], %[level], %[sign] \n\t" \
301 "mul %[temp5], %[level], %[temp3] \n\t" \
303 - "sh %[temp5], "#J"(%[ppin]) \n\t" \
304 - "sh %[level], "#N"(%[pout]) \n\t"
305 + "sh %[temp5], " #J "(%[ppin]) \n\t" \
306 + "sh %[level], " #N "(%[pout]) \n\t"
308 static int QuantizeBlock(int16_t in[16], int16_t out[16],
309 const VP8Matrix* const mtx) {
310 @@ -254,14 +254,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
311 // E..H - offsets in bytes to store first results to tmp buffer
312 // E1..H1 - offsets in bytes to store second results to tmp buffer
313 #define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \
314 - "lbu %[temp0], 0+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
315 - "lbu %[temp1], 1+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
316 - "lbu %[temp2], 2+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
317 - "lbu %[temp3], 3+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
318 - "lbu %[temp4], 0+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
319 - "lbu %[temp5], 1+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
320 - "lbu %[temp6], 2+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
321 - "lbu %[temp7], 3+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
322 + "lbu %[temp0], 0+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
323 + "lbu %[temp1], 1+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
324 + "lbu %[temp2], 2+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
325 + "lbu %[temp3], 3+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
326 + "lbu %[temp4], 0+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
327 + "lbu %[temp5], 1+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
328 + "lbu %[temp6], 2+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
329 + "lbu %[temp7], 3+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
330 "addu %[temp8], %[temp0], %[temp2] \n\t" \
331 "subu %[temp0], %[temp0], %[temp2] \n\t" \
332 "addu %[temp2], %[temp1], %[temp3] \n\t" \
333 @@ -278,14 +278,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
334 "subu %[temp3], %[temp3], %[temp6] \n\t" \
335 "addu %[temp6], %[temp4], %[temp5] \n\t" \
336 "subu %[temp4], %[temp4], %[temp5] \n\t" \
337 - "sw %[temp7], "#E"(%[tmp]) \n\t" \
338 - "sw %[temp2], "#H"(%[tmp]) \n\t" \
339 - "sw %[temp8], "#F"(%[tmp]) \n\t" \
340 - "sw %[temp0], "#G"(%[tmp]) \n\t" \
341 - "sw %[temp1], "#E1"(%[tmp]) \n\t" \
342 - "sw %[temp3], "#H1"(%[tmp]) \n\t" \
343 - "sw %[temp6], "#F1"(%[tmp]) \n\t" \
344 - "sw %[temp4], "#G1"(%[tmp]) \n\t"
345 + "sw %[temp7], " #E "(%[tmp]) \n\t" \
346 + "sw %[temp2], " #H "(%[tmp]) \n\t" \
347 + "sw %[temp8], " #F "(%[tmp]) \n\t" \
348 + "sw %[temp0], " #G "(%[tmp]) \n\t" \
349 + "sw %[temp1], " #E1 "(%[tmp]) \n\t" \
350 + "sw %[temp3], " #H1 "(%[tmp]) \n\t" \
351 + "sw %[temp6], " #F1 "(%[tmp]) \n\t" \
352 + "sw %[temp4], " #G1 "(%[tmp]) \n\t"
354 // macro for one vertical pass in Disto4x4 (TTransform)
355 // two calls of function TTransform are merged into single one
356 @@ -300,10 +300,10 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
357 // A1..D1 - offsets in bytes to load second results from tmp buffer
358 // E..H - offsets in bytes to load from w buffer
359 #define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \
360 - "lw %[temp0], "#A1"(%[tmp]) \n\t" \
361 - "lw %[temp1], "#C1"(%[tmp]) \n\t" \
362 - "lw %[temp2], "#B1"(%[tmp]) \n\t" \
363 - "lw %[temp3], "#D1"(%[tmp]) \n\t" \
364 + "lw %[temp0], " #A1 "(%[tmp]) \n\t" \
365 + "lw %[temp1], " #C1 "(%[tmp]) \n\t" \
366 + "lw %[temp2], " #B1 "(%[tmp]) \n\t" \
367 + "lw %[temp3], " #D1 "(%[tmp]) \n\t" \
368 "addu %[temp8], %[temp0], %[temp1] \n\t" \
369 "subu %[temp0], %[temp0], %[temp1] \n\t" \
370 "addu %[temp1], %[temp2], %[temp3] \n\t" \
371 @@ -324,18 +324,18 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
372 "subu %[temp1], %[temp1], %[temp5] \n\t" \
373 "subu %[temp0], %[temp0], %[temp6] \n\t" \
374 "subu %[temp8], %[temp8], %[temp7] \n\t" \
375 - "lhu %[temp4], "#E"(%[w]) \n\t" \
376 - "lhu %[temp5], "#F"(%[w]) \n\t" \
377 - "lhu %[temp6], "#G"(%[w]) \n\t" \
378 - "lhu %[temp7], "#H"(%[w]) \n\t" \
379 + "lhu %[temp4], " #E "(%[w]) \n\t" \
380 + "lhu %[temp5], " #F "(%[w]) \n\t" \
381 + "lhu %[temp6], " #G "(%[w]) \n\t" \
382 + "lhu %[temp7], " #H "(%[w]) \n\t" \
383 "madd %[temp4], %[temp3] \n\t" \
384 "madd %[temp5], %[temp1] \n\t" \
385 "madd %[temp6], %[temp0] \n\t" \
386 "madd %[temp7], %[temp8] \n\t" \
387 - "lw %[temp0], "#A"(%[tmp]) \n\t" \
388 - "lw %[temp1], "#C"(%[tmp]) \n\t" \
389 - "lw %[temp2], "#B"(%[tmp]) \n\t" \
390 - "lw %[temp3], "#D"(%[tmp]) \n\t" \
391 + "lw %[temp0], " #A "(%[tmp]) \n\t" \
392 + "lw %[temp1], " #C "(%[tmp]) \n\t" \
393 + "lw %[temp2], " #B "(%[tmp]) \n\t" \
394 + "lw %[temp3], " #D "(%[tmp]) \n\t" \
395 "addu %[temp8], %[temp0], %[temp1] \n\t" \
396 "subu %[temp0], %[temp0], %[temp1] \n\t" \
397 "addu %[temp1], %[temp2], %[temp3] \n\t" \
398 @@ -413,70 +413,70 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
399 // A - offset in bytes to load from src and ref buffers
400 // TEMP0..TEMP3 - registers for corresponding tmp elements
401 #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
402 - "lw %["#TEMP1"], 0(%[args]) \n\t" \
403 - "lw %["#TEMP2"], 4(%[args]) \n\t" \
404 - "lbu %[temp16], 0+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
405 - "lbu %[temp17], 0+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
406 - "lbu %[temp18], 1+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
407 - "lbu %[temp19], 1+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
408 + "lw %[" #TEMP1 "], 0(%[args]) \n\t" \
409 + "lw %[" #TEMP2 "], 4(%[args]) \n\t" \
410 + "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
411 + "lbu %[temp17], 0+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
412 + "lbu %[temp18], 1+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
413 + "lbu %[temp19], 1+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
414 "subu %[temp20], %[temp16], %[temp17] \n\t" \
415 - "lbu %[temp16], 2+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
416 - "lbu %[temp17], 2+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
417 - "subu %["#TEMP0"], %[temp18], %[temp19] \n\t" \
418 - "lbu %[temp18], 3+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
419 - "lbu %[temp19], 3+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
420 - "subu %["#TEMP1"], %[temp16], %[temp17] \n\t" \
421 - "subu %["#TEMP2"], %[temp18], %[temp19] \n\t" \
422 - "addu %["#TEMP3"], %[temp20], %["#TEMP2"] \n\t" \
423 - "subu %["#TEMP2"], %[temp20], %["#TEMP2"] \n\t" \
424 - "addu %[temp20], %["#TEMP0"], %["#TEMP1"] \n\t" \
425 - "subu %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
426 - "mul %[temp16], %["#TEMP2"], %[c5352] \n\t" \
427 - "mul %[temp17], %["#TEMP2"], %[c2217] \n\t" \
428 - "mul %[temp18], %["#TEMP0"], %[c5352] \n\t" \
429 - "mul %[temp19], %["#TEMP0"], %[c2217] \n\t" \
430 - "addu %["#TEMP1"], %["#TEMP3"], %[temp20] \n\t" \
431 - "subu %[temp20], %["#TEMP3"], %[temp20] \n\t" \
432 - "sll %["#TEMP0"], %["#TEMP1"], 3 \n\t" \
433 - "sll %["#TEMP2"], %[temp20], 3 \n\t" \
434 + "lbu %[temp16], 2+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
435 + "lbu %[temp17], 2+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
436 + "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
437 + "lbu %[temp18], 3+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
438 + "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
439 + "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
440 + "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
441 + "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \
442 + "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \
443 + "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
444 + "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
445 + "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \
446 + "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \
447 + "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \
448 + "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \
449 + "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \
450 + "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \
451 + "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \
452 + "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \
453 "addiu %[temp16], %[temp16], 1812 \n\t" \
454 "addiu %[temp17], %[temp17], 937 \n\t" \
455 "addu %[temp16], %[temp16], %[temp19] \n\t" \
456 "subu %[temp17], %[temp17], %[temp18] \n\t" \
457 - "sra %["#TEMP1"], %[temp16], 9 \n\t" \
458 - "sra %["#TEMP3"], %[temp17], 9 \n\t"
459 + "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \
460 + "sra %[" #TEMP3 "], %[temp17], 9 \n\t"
462 // macro for one vertical pass in FTransform
463 // temp0..temp15 holds tmp[0]..tmp[15]
464 // A..D - offsets in bytes to store to out buffer
465 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
466 #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
467 - "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
468 - "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
469 - "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
470 - "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
471 - "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
472 - "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
473 - "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
474 + "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
475 + "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
476 + "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
477 + "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
478 + "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
479 + "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
480 + "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
481 "mul %[temp18], %[temp18], %[c5352] \n\t" \
482 "addiu %[temp16], %[temp16], 7 \n\t" \
483 - "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
484 - "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
485 - "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
486 - "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
487 - "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
488 - "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
489 - "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
490 - "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
491 - "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
492 - "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
493 - "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
494 - "addiu %[temp16], %["#TEMP12"], 1 \n\t" \
495 - "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
496 - "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
497 - "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
498 - "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
499 - "sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
500 + "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
501 + "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
502 + "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
503 + "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
504 + "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
505 + "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
506 + "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
507 + "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
508 + "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
509 + "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
510 + "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
511 + "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
512 + "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
513 + "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
514 + "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
515 + "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
516 + "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
518 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
519 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
520 @@ -516,14 +516,14 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
521 #if !defined(WORK_AROUND_GCC)
523 #define GET_SSE_INNER(A, B, C, D) \
524 - "lbu %[temp0], "#A"(%[a]) \n\t" \
525 - "lbu %[temp1], "#A"(%[b]) \n\t" \
526 - "lbu %[temp2], "#B"(%[a]) \n\t" \
527 - "lbu %[temp3], "#B"(%[b]) \n\t" \
528 - "lbu %[temp4], "#C"(%[a]) \n\t" \
529 - "lbu %[temp5], "#C"(%[b]) \n\t" \
530 - "lbu %[temp6], "#D"(%[a]) \n\t" \
531 - "lbu %[temp7], "#D"(%[b]) \n\t" \
532 + "lbu %[temp0], " #A "(%[a]) \n\t" \
533 + "lbu %[temp1], " #A "(%[b]) \n\t" \
534 + "lbu %[temp2], " #B "(%[a]) \n\t" \
535 + "lbu %[temp3], " #B "(%[b]) \n\t" \
536 + "lbu %[temp4], " #C "(%[a]) \n\t" \
537 + "lbu %[temp5], " #C "(%[b]) \n\t" \
538 + "lbu %[temp6], " #D "(%[a]) \n\t" \
539 + "lbu %[temp7], " #D "(%[b]) \n\t" \
540 "subu %[temp0], %[temp0], %[temp1] \n\t" \
541 "subu %[temp2], %[temp2], %[temp3] \n\t" \
542 "subu %[temp4], %[temp4], %[temp5] \n\t" \
543 diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
544 index ec58efe..1a3f968 100644
545 --- a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
546 +++ b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
547 @@ -27,25 +27,25 @@ static const int kC2 = 35468;
548 // I - input (macro doesn't change it)
549 #define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7, \
550 I0, I1, I2, I3, I4, I5, I6, I7) \
551 - "addq.ph %["#O0"], %["#I0"], %["#I1"] \n\t" \
552 - "subq.ph %["#O1"], %["#I0"], %["#I1"] \n\t" \
553 - "addq.ph %["#O2"], %["#I2"], %["#I3"] \n\t" \
554 - "subq.ph %["#O3"], %["#I2"], %["#I3"] \n\t" \
555 - "addq.ph %["#O4"], %["#I4"], %["#I5"] \n\t" \
556 - "subq.ph %["#O5"], %["#I4"], %["#I5"] \n\t" \
557 - "addq.ph %["#O6"], %["#I6"], %["#I7"] \n\t" \
558 - "subq.ph %["#O7"], %["#I6"], %["#I7"] \n\t"
559 + "addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \
560 + "subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t" \
561 + "addq.ph %[" #O2 "], %[" #I2 "], %[" #I3 "] \n\t" \
562 + "subq.ph %[" #O3 "], %[" #I2 "], %[" #I3 "] \n\t" \
563 + "addq.ph %[" #O4 "], %[" #I4 "], %[" #I5 "] \n\t" \
564 + "subq.ph %[" #O5 "], %[" #I4 "], %[" #I5 "] \n\t" \
565 + "addq.ph %[" #O6 "], %[" #I6 "], %[" #I7 "] \n\t" \
566 + "subq.ph %[" #O7 "], %[" #I6 "], %[" #I7 "] \n\t"
569 #define ABS_X8(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7) \
570 - "absq_s.ph %["#IO0"], %["#IO0"] \n\t" \
571 - "absq_s.ph %["#IO1"], %["#IO1"] \n\t" \
572 - "absq_s.ph %["#IO2"], %["#IO2"] \n\t" \
573 - "absq_s.ph %["#IO3"], %["#IO3"] \n\t" \
574 - "absq_s.ph %["#IO4"], %["#IO4"] \n\t" \
575 - "absq_s.ph %["#IO5"], %["#IO5"] \n\t" \
576 - "absq_s.ph %["#IO6"], %["#IO6"] \n\t" \
577 - "absq_s.ph %["#IO7"], %["#IO7"] \n\t"
578 + "absq_s.ph %[" #IO0 "], %[" #IO0 "] \n\t" \
579 + "absq_s.ph %[" #IO1 "], %[" #IO1 "] \n\t" \
580 + "absq_s.ph %[" #IO2 "], %[" #IO2 "] \n\t" \
581 + "absq_s.ph %[" #IO3 "], %[" #IO3 "] \n\t" \
582 + "absq_s.ph %[" #IO4 "], %[" #IO4 "] \n\t" \
583 + "absq_s.ph %[" #IO5 "], %[" #IO5 "] \n\t" \
584 + "absq_s.ph %[" #IO6 "], %[" #IO6 "] \n\t" \
585 + "absq_s.ph %[" #IO7 "], %[" #IO7 "] \n\t"
587 // dpa.w.ph $ac0 temp0 ,temp1
588 // $ac += temp0[31..16] * temp1[31..16] + temp0[15..0] * temp1[15..0]
589 @@ -56,15 +56,15 @@ static const int kC2 = 35468;
590 #define MUL_HALF(O0, I0, I1, I2, I3, I4, I5, I6, I7, \
591 I8, I9, I10, I11, I12, I13, I14, I15) \
592 "mult $ac0, $zero, $zero \n\t" \
593 - "dpa.w.ph $ac0, %["#I2"], %["#I0"] \n\t" \
594 - "dpax.w.ph $ac0, %["#I5"], %["#I6"] \n\t" \
595 - "dpa.w.ph $ac0, %["#I8"], %["#I9"] \n\t" \
596 - "dpax.w.ph $ac0, %["#I11"], %["#I4"] \n\t" \
597 - "dpa.w.ph $ac0, %["#I12"], %["#I7"] \n\t" \
598 - "dpax.w.ph $ac0, %["#I13"], %["#I1"] \n\t" \
599 - "dpa.w.ph $ac0, %["#I14"], %["#I3"] \n\t" \
600 - "dpax.w.ph $ac0, %["#I15"], %["#I10"] \n\t" \
601 - "mflo %["#O0"], $ac0 \n\t"
602 + "dpa.w.ph $ac0, %[" #I2 "], %[" #I0 "] \n\t" \
603 + "dpax.w.ph $ac0, %[" #I5 "], %[" #I6 "] \n\t" \
604 + "dpa.w.ph $ac0, %[" #I8 "], %[" #I9 "] \n\t" \
605 + "dpax.w.ph $ac0, %[" #I11 "], %[" #I4 "] \n\t" \
606 + "dpa.w.ph $ac0, %[" #I12 "], %[" #I7 "] \n\t" \
607 + "dpax.w.ph $ac0, %[" #I13 "], %[" #I1 "] \n\t" \
608 + "dpa.w.ph $ac0, %[" #I14 "], %[" #I3 "] \n\t" \
609 + "dpax.w.ph $ac0, %[" #I15 "], %[" #I10 "] \n\t" \
610 + "mflo %[" #O0 "], $ac0 \n\t"
612 #define OUTPUT_EARLY_CLOBBER_REGS_17() \
613 OUTPUT_EARLY_CLOBBER_REGS_10(), \
614 @@ -77,69 +77,69 @@ static const int kC2 = 35468;
615 // A - offset in bytes to load from src and ref buffers
616 // TEMP0..TEMP3 - registers for corresponding tmp elements
617 #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
618 - "lw %["#TEMP0"], 0(%[args]) \n\t" \
619 - "lw %["#TEMP1"], 4(%[args]) \n\t" \
620 - "lw %["#TEMP2"], "XSTR(BPS)"*"#A"(%["#TEMP0"]) \n\t" \
621 - "lw %["#TEMP3"], "XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
622 - "preceu.ph.qbl %["#TEMP0"], %["#TEMP2"] \n\t" \
623 - "preceu.ph.qbl %["#TEMP1"], %["#TEMP3"] \n\t" \
624 - "preceu.ph.qbr %["#TEMP2"], %["#TEMP2"] \n\t" \
625 - "preceu.ph.qbr %["#TEMP3"], %["#TEMP3"] \n\t" \
626 - "subq.ph %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
627 - "subq.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP3"] \n\t" \
628 - "rotr %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
629 - "addq.ph %["#TEMP1"], %["#TEMP2"], %["#TEMP0"] \n\t" \
630 - "subq.ph %["#TEMP3"], %["#TEMP2"], %["#TEMP0"] \n\t" \
631 - "seh %["#TEMP0"], %["#TEMP1"] \n\t" \
632 - "sra %[temp16], %["#TEMP1"], 16 \n\t" \
633 - "seh %[temp19], %["#TEMP3"] \n\t" \
634 - "sra %["#TEMP3"], %["#TEMP3"], 16 \n\t" \
635 - "subu %["#TEMP2"], %["#TEMP0"], %[temp16] \n\t" \
636 - "addu %["#TEMP0"], %["#TEMP0"], %[temp16] \n\t" \
637 + "lw %[" #TEMP0 "], 0(%[args]) \n\t" \
638 + "lw %[" #TEMP1 "], 4(%[args]) \n\t" \
639 + "lw %[" #TEMP2 "], "XSTR(BPS)"*" #A "(%[" #TEMP0 "]) \n\t" \
640 + "lw %[" #TEMP3 "], "XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
641 + "preceu.ph.qbl %[" #TEMP0 "], %[" #TEMP2 "] \n\t" \
642 + "preceu.ph.qbl %[" #TEMP1 "], %[" #TEMP3 "] \n\t" \
643 + "preceu.ph.qbr %[" #TEMP2 "], %[" #TEMP2 "] \n\t" \
644 + "preceu.ph.qbr %[" #TEMP3 "], %[" #TEMP3 "] \n\t" \
645 + "subq.ph %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
646 + "subq.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP3 "] \n\t" \
647 + "rotr %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
648 + "addq.ph %[" #TEMP1 "], %[" #TEMP2 "], %[" #TEMP0 "] \n\t" \
649 + "subq.ph %[" #TEMP3 "], %[" #TEMP2 "], %[" #TEMP0 "] \n\t" \
650 + "seh %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
651 + "sra %[temp16], %[" #TEMP1 "], 16 \n\t" \
652 + "seh %[temp19], %[" #TEMP3 "] \n\t" \
653 + "sra %[" #TEMP3 "], %[" #TEMP3 "], 16 \n\t" \
654 + "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp16] \n\t" \
655 + "addu %[" #TEMP0 "], %[" #TEMP0 "], %[temp16] \n\t" \
656 "mul %[temp17], %[temp19], %[c2217] \n\t" \
657 - "mul %[temp18], %["#TEMP3"], %[c5352] \n\t" \
658 - "mul %["#TEMP1"], %[temp19], %[c5352] \n\t" \
659 - "mul %[temp16], %["#TEMP3"], %[c2217] \n\t" \
660 - "sll %["#TEMP2"], %["#TEMP2"], 3 \n\t" \
661 - "sll %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
662 - "subu %["#TEMP3"], %[temp17], %[temp18] \n\t" \
663 - "addu %["#TEMP1"], %[temp16], %["#TEMP1"] \n\t" \
664 - "addiu %["#TEMP3"], %["#TEMP3"], 937 \n\t" \
665 - "addiu %["#TEMP1"], %["#TEMP1"], 1812 \n\t" \
666 - "sra %["#TEMP3"], %["#TEMP3"], 9 \n\t" \
667 - "sra %["#TEMP1"], %["#TEMP1"], 9 \n\t"
668 + "mul %[temp18], %[" #TEMP3 "], %[c5352] \n\t" \
669 + "mul %[" #TEMP1 "], %[temp19], %[c5352] \n\t" \
670 + "mul %[temp16], %[" #TEMP3 "], %[c2217] \n\t" \
671 + "sll %[" #TEMP2 "], %[" #TEMP2 "], 3 \n\t" \
672 + "sll %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
673 + "subu %[" #TEMP3 "], %[temp17], %[temp18] \n\t" \
674 + "addu %[" #TEMP1 "], %[temp16], %[" #TEMP1 "] \n\t" \
675 + "addiu %[" #TEMP3 "], %[" #TEMP3 "], 937 \n\t" \
676 + "addiu %[" #TEMP1 "], %[" #TEMP1 "], 1812 \n\t" \
677 + "sra %[" #TEMP3 "], %[" #TEMP3 "], 9 \n\t" \
678 + "sra %[" #TEMP1 "], %[" #TEMP1 "], 9 \n\t"
680 // macro for one vertical pass in FTransform
681 // temp0..temp15 holds tmp[0]..tmp[15]
682 // A..D - offsets in bytes to store to out buffer
683 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
684 #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
685 - "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
686 - "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
687 - "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
688 - "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
689 - "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
690 - "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
691 - "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
692 + "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
693 + "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
694 + "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
695 + "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
696 + "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
697 + "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
698 + "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
699 "mul %[temp18], %[temp18], %[c5352] \n\t" \
700 "addiu %[temp16], %[temp16], 7 \n\t" \
701 - "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
702 - "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
703 - "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
704 - "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
705 - "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
706 - "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
707 - "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
708 - "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
709 - "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
710 - "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
711 - "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
712 - "addiu %[temp16], %["#TEMP12"], 1 \n\t" \
713 - "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
714 - "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
715 - "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
716 - "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
717 - "sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
718 + "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
719 + "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
720 + "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
721 + "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
722 + "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
723 + "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
724 + "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
725 + "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
726 + "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
727 + "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
728 + "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
729 + "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
730 + "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
731 + "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
732 + "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
733 + "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
734 + "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
736 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
737 const int c2217 = 2217;
738 @@ -329,11 +329,11 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
741 #define FILL_PART(J, SIZE) \
742 - "usw %[value], 0+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
743 - "usw %[value], 4+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
744 - ".if "#SIZE" == 16 \n\t" \
745 - "usw %[value], 8+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
746 - "usw %[value], 12+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
747 + "usw %[value], 0+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
748 + "usw %[value], 4+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
749 + ".if " #SIZE " == 16 \n\t" \
750 + "usw %[value], 8+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
751 + "usw %[value], 12+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
754 #define FILL_8_OR_16(DST, VALUE, SIZE) do { \
755 @@ -348,7 +348,7 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
756 FILL_PART( 5, SIZE) \
757 FILL_PART( 6, SIZE) \
758 FILL_PART( 7, SIZE) \
759 - ".if "#SIZE" == 16 \n\t" \
760 + ".if " #SIZE " == 16 \n\t" \
764 @@ -425,7 +425,7 @@ HORIZONTAL_PRED(dst, left, 16)
766 "usw %[temp0], 0(%[dst]) \n\t" \
767 "usw %[temp1], 4(%[dst]) \n\t" \
768 - ".if "#SIZE" == 16 \n\t" \
769 + ".if " #SIZE " == 16 \n\t" \
770 "ulw %[temp0], 8(%[top]) \n\t" \
771 "ulw %[temp1], 12(%[top]) \n\t" \
773 @@ -1060,8 +1060,8 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
774 #if !defined(WORK_AROUND_GCC)
776 #define GET_SSE_INNER(A) \
777 - "lw %[temp0], "#A"(%[a]) \n\t" \
778 - "lw %[temp1], "#A"(%[b]) \n\t" \
779 + "lw %[temp0], " #A "(%[a]) \n\t" \
780 + "lw %[temp1], " #A "(%[b]) \n\t" \
781 "preceu.ph.qbr %[temp2], %[temp0] \n\t" \
782 "preceu.ph.qbl %[temp0], %[temp0] \n\t" \
783 "preceu.ph.qbr %[temp3], %[temp1] \n\t" \
784 @@ -1185,28 +1185,28 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
785 // N - offset in bytes (n * 2)
786 // N1 - offset in bytes ((n + 1) * 2)
787 #define QUANTIZE_ONE(J, K, N, N1) \
788 - "ulw %[temp1], "#J"(%[ppin]) \n\t" \
789 - "ulw %[temp2], "#J"(%[ppsharpen]) \n\t" \
790 - "lhu %[temp3], "#K"(%[ppzthresh]) \n\t" \
791 - "lhu %[temp6], "#K"+4(%[ppzthresh]) \n\t" \
792 + "ulw %[temp1], " #J "(%[ppin]) \n\t" \
793 + "ulw %[temp2], " #J "(%[ppsharpen]) \n\t" \
794 + "lhu %[temp3], " #K "(%[ppzthresh]) \n\t" \
795 + "lhu %[temp6], " #K "+4(%[ppzthresh]) \n\t" \
796 "absq_s.ph %[temp4], %[temp1] \n\t" \
797 "ins %[temp3], %[temp6], 16, 16 \n\t" \
798 "addu.ph %[coeff], %[temp4], %[temp2] \n\t" \
799 "shra.ph %[sign], %[temp1], 15 \n\t" \
800 "li %[level], 0x10001 \n\t" \
801 "cmp.lt.ph %[temp3], %[coeff] \n\t" \
802 - "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
803 + "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
804 "pick.ph %[temp5], %[level], $0 \n\t" \
805 - "lw %[temp2], "#K"(%[ppbias]) \n\t" \
806 + "lw %[temp2], " #K "(%[ppbias]) \n\t" \
807 "beqz %[temp5], 0f \n\t" \
808 - "lhu %[temp3], "#J"(%[ppq]) \n\t" \
809 + "lhu %[temp3], " #J "(%[ppq]) \n\t" \
810 "beq %[temp5], %[level], 1f \n\t" \
811 "andi %[temp5], %[temp5], 0x1 \n\t" \
812 "andi %[temp4], %[coeff], 0xffff \n\t" \
813 "beqz %[temp5], 2f \n\t" \
814 "mul %[level], %[temp4], %[temp1] \n\t" \
815 - "sh $0, "#J"+2(%[ppin]) \n\t" \
816 - "sh $0, "#N1"(%[pout]) \n\t" \
817 + "sh $0, " #J "+2(%[ppin]) \n\t" \
818 + "sh $0, " #N1 "(%[pout]) \n\t" \
819 "addu %[level], %[level], %[temp2] \n\t" \
820 "sra %[level], %[level], 17 \n\t" \
821 "slt %[temp4], %[max_level], %[level] \n\t" \
822 @@ -1216,15 +1216,15 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
823 "subu %[level], %[level], %[temp6] \n\t" \
824 "mul %[temp5], %[level], %[temp3] \n\t" \
825 "or %[ret], %[ret], %[level] \n\t" \
826 - "sh %[level], "#N"(%[pout]) \n\t" \
827 - "sh %[temp5], "#J"(%[ppin]) \n\t" \
828 + "sh %[level], " #N "(%[pout]) \n\t" \
829 + "sh %[temp5], " #J "(%[ppin]) \n\t" \
832 - "lhu %[temp1], "#J"+2(%[ppiq]) \n\t" \
833 + "lhu %[temp1], " #J "+2(%[ppiq]) \n\t" \
834 "srl %[temp5], %[coeff], 16 \n\t" \
835 "mul %[level], %[temp5], %[temp1] \n\t" \
836 - "lw %[temp2], "#K"+4(%[ppbias]) \n\t" \
837 - "lhu %[temp3], "#J"+2(%[ppq]) \n\t" \
838 + "lw %[temp2], " #K "+4(%[ppbias]) \n\t" \
839 + "lhu %[temp3], " #J "+2(%[ppq]) \n\t" \
840 "addu %[level], %[level], %[temp2] \n\t" \
841 "sra %[level], %[level], 17 \n\t" \
842 "srl %[temp6], %[sign], 16 \n\t" \
843 @@ -1233,20 +1233,20 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
844 "xor %[level], %[level], %[temp6] \n\t" \
845 "subu %[level], %[level], %[temp6] \n\t" \
846 "mul %[temp5], %[level], %[temp3] \n\t" \
847 - "sh $0, "#J"(%[ppin]) \n\t" \
848 - "sh $0, "#N"(%[pout]) \n\t" \
849 + "sh $0, " #J "(%[ppin]) \n\t" \
850 + "sh $0, " #N "(%[pout]) \n\t" \
851 "or %[ret], %[ret], %[level] \n\t" \
852 - "sh %[temp5], "#J"+2(%[ppin]) \n\t" \
853 - "sh %[level], "#N1"(%[pout]) \n\t" \
854 + "sh %[temp5], " #J "+2(%[ppin]) \n\t" \
855 + "sh %[level], " #N1 "(%[pout]) \n\t" \
858 - "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
859 - "lw %[temp2], "#K"(%[ppbias]) \n\t" \
860 - "ulw %[temp3], "#J"(%[ppq]) \n\t" \
861 + "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
862 + "lw %[temp2], " #K "(%[ppbias]) \n\t" \
863 + "ulw %[temp3], " #J "(%[ppq]) \n\t" \
864 "andi %[temp5], %[coeff], 0xffff \n\t" \
865 "srl %[temp0], %[coeff], 16 \n\t" \
866 - "lhu %[temp6], "#J"+2(%[ppiq]) \n\t" \
867 - "lw %[coeff], "#K"+4(%[ppbias]) \n\t" \
868 + "lhu %[temp6], " #J "+2(%[ppiq]) \n\t" \
869 + "lw %[coeff], " #K "+4(%[ppbias]) \n\t" \
870 "mul %[level], %[temp5], %[temp1] \n\t" \
871 "mul %[temp4], %[temp0], %[temp6] \n\t" \
872 "addu %[level], %[level], %[temp2] \n\t" \
873 @@ -1259,15 +1259,15 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
874 "subu.ph %[level], %[level], %[sign] \n\t" \
875 "mul.ph %[temp3], %[level], %[temp3] \n\t" \
876 "or %[ret], %[ret], %[level] \n\t" \
877 - "sh %[level], "#N"(%[pout]) \n\t" \
878 + "sh %[level], " #N "(%[pout]) \n\t" \
879 "srl %[level], %[level], 16 \n\t" \
880 - "sh %[level], "#N1"(%[pout]) \n\t" \
881 - "usw %[temp3], "#J"(%[ppin]) \n\t" \
882 + "sh %[level], " #N1 "(%[pout]) \n\t" \
883 + "usw %[temp3], " #J "(%[ppin]) \n\t" \
886 - "sh $0, "#N"(%[pout]) \n\t" \
887 - "sh $0, "#N1"(%[pout]) \n\t" \
888 - "usw $0, "#J"(%[ppin]) \n\t" \
889 + "sh $0, " #N "(%[pout]) \n\t" \
890 + "sh $0, " #N1 "(%[pout]) \n\t" \
891 + "usw $0, " #J "(%[ppin]) \n\t" \
894 static int QuantizeBlock(int16_t in[16], int16_t out[16],
895 @@ -1326,37 +1326,37 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
896 // A, B, C, D - offset in bytes to load from in buffer
897 // TEMP0, TEMP1 - registers for corresponding tmp elements
898 #define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1) \
899 - "lh %["#TEMP0"], "#A"(%[in]) \n\t" \
900 - "lh %["#TEMP1"], "#B"(%[in]) \n\t" \
901 - "lh %[temp8], "#C"(%[in]) \n\t" \
902 - "lh %[temp9], "#D"(%[in]) \n\t" \
903 - "ins %["#TEMP1"], %["#TEMP0"], 16, 16 \n\t" \
904 + "lh %[" #TEMP0 "], " #A "(%[in]) \n\t" \
905 + "lh %[" #TEMP1 "], " #B "(%[in]) \n\t" \
906 + "lh %[temp8], " #C "(%[in]) \n\t" \
907 + "lh %[temp9], " #D "(%[in]) \n\t" \
908 + "ins %[" #TEMP1 "], %[" #TEMP0 "], 16, 16 \n\t" \
909 "ins %[temp9], %[temp8], 16, 16 \n\t" \
910 - "subq.ph %[temp8], %["#TEMP1"], %[temp9] \n\t" \
911 - "addq.ph %[temp9], %["#TEMP1"], %[temp9] \n\t" \
912 - "precrq.ph.w %["#TEMP0"], %[temp8], %[temp9] \n\t" \
913 + "subq.ph %[temp8], %[" #TEMP1 "], %[temp9] \n\t" \
914 + "addq.ph %[temp9], %[" #TEMP1 "], %[temp9] \n\t" \
915 + "precrq.ph.w %[" #TEMP0 "], %[temp8], %[temp9] \n\t" \
916 "append %[temp8], %[temp9], 16 \n\t" \
917 - "subq.ph %["#TEMP1"], %["#TEMP0"], %[temp8] \n\t" \
918 - "addq.ph %["#TEMP0"], %["#TEMP0"], %[temp8] \n\t" \
919 - "rotr %["#TEMP1"], %["#TEMP1"], 16 \n\t"
920 + "subq.ph %[" #TEMP1 "], %[" #TEMP0 "], %[temp8] \n\t" \
921 + "addq.ph %[" #TEMP0 "], %[" #TEMP0 "], %[temp8] \n\t" \
922 + "rotr %[" #TEMP1 "], %[" #TEMP1 "], 16 \n\t"
924 // macro for one vertical pass in FTransformWHT
925 // temp0..temp7 holds tmp[0]..tmp[15]
926 // A, B, C, D - offsets in bytes to store to out buffer
927 // TEMP0, TEMP2, TEMP4 and TEMP6 - registers for corresponding tmp elements
928 #define VERTICAL_PASS_WHT(A, B, C, D, TEMP0, TEMP2, TEMP4, TEMP6) \
929 - "addq.ph %[temp8], %["#TEMP0"], %["#TEMP4"] \n\t" \
930 - "addq.ph %[temp9], %["#TEMP2"], %["#TEMP6"] \n\t" \
931 - "subq.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP6"] \n\t" \
932 - "subq.ph %["#TEMP6"], %["#TEMP0"], %["#TEMP4"] \n\t" \
933 - "addqh.ph %["#TEMP0"], %[temp8], %[temp9] \n\t" \
934 - "subqh.ph %["#TEMP4"], %["#TEMP6"], %["#TEMP2"] \n\t" \
935 - "addqh.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP6"] \n\t" \
936 - "subqh.ph %["#TEMP6"], %[temp8], %[temp9] \n\t" \
937 - "usw %["#TEMP0"], "#A"(%[out]) \n\t" \
938 - "usw %["#TEMP2"], "#B"(%[out]) \n\t" \
939 - "usw %["#TEMP4"], "#C"(%[out]) \n\t" \
940 - "usw %["#TEMP6"], "#D"(%[out]) \n\t"
941 + "addq.ph %[temp8], %[" #TEMP0 "], %[" #TEMP4 "] \n\t" \
942 + "addq.ph %[temp9], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
943 + "subq.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
944 + "subq.ph %[" #TEMP6 "], %[" #TEMP0 "], %[" #TEMP4 "] \n\t" \
945 + "addqh.ph %[" #TEMP0 "], %[temp8], %[temp9] \n\t" \
946 + "subqh.ph %[" #TEMP4 "], %[" #TEMP6 "], %[" #TEMP2 "] \n\t" \
947 + "addqh.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
948 + "subqh.ph %[" #TEMP6 "], %[temp8], %[temp9] \n\t" \
949 + "usw %[" #TEMP0 "], " #A "(%[out]) \n\t" \
950 + "usw %[" #TEMP2 "], " #B "(%[out]) \n\t" \
951 + "usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
952 + "usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
954 static void FTransformWHT(const int16_t* in, int16_t* out) {
955 int temp0, temp1, temp2, temp3, temp4;
956 @@ -1385,10 +1385,10 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
957 // convert 8 coeffs at time
958 // A, B, C, D - offsets in bytes to load from out buffer
959 #define CONVERT_COEFFS_TO_BIN(A, B, C, D) \
960 - "ulw %[temp0], "#A"(%[out]) \n\t" \
961 - "ulw %[temp1], "#B"(%[out]) \n\t" \
962 - "ulw %[temp2], "#C"(%[out]) \n\t" \
963 - "ulw %[temp3], "#D"(%[out]) \n\t" \
964 + "ulw %[temp0], " #A "(%[out]) \n\t" \
965 + "ulw %[temp1], " #B "(%[out]) \n\t" \
966 + "ulw %[temp2], " #C "(%[out]) \n\t" \
967 + "ulw %[temp3], " #D "(%[out]) \n\t" \
968 "absq_s.ph %[temp0], %[temp0] \n\t" \
969 "absq_s.ph %[temp1], %[temp1] \n\t" \
970 "absq_s.ph %[temp2], %[temp2] \n\t" \
971 diff --git a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
972 index 6c34efb..6a1f8f4 100644
973 --- a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
974 +++ b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
976 "srl %[temp0], %[length], 0x2 \n\t" \
977 "beqz %[temp0], 4f \n\t" \
978 " andi %[temp6], %[length], 0x3 \n\t" \
979 - ".if "#INVERSE" \n\t" \
980 + ".if " #INVERSE " \n\t" \
981 "lbu %[temp1], -1(%[src]) \n\t" \
983 "lbu %[temp2], 0(%[src]) \n\t" \
985 "lbu %[temp1], -1(%[src]) \n\t" \
986 "lbu %[temp2], 0(%[src]) \n\t" \
987 "addiu %[src], %[src], 1 \n\t" \
988 - ".if "#INVERSE" \n\t" \
989 + ".if " #INVERSE " \n\t" \
990 "addu %[temp3], %[temp1], %[temp2] \n\t" \
991 "sb %[temp3], -1(%[src]) \n\t" \
993 @@ -131,7 +131,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
994 "ulw %[temp3], 4(%[src]) \n\t" \
995 "ulw %[temp4], 4(%[pred]) \n\t" \
996 "addiu %[src], %[src], 8 \n\t" \
997 - ".if "#INVERSE" \n\t" \
998 + ".if " #INVERSE " \n\t" \
999 "addu.qb %[temp5], %[temp1], %[temp2] \n\t" \
1000 "addu.qb %[temp6], %[temp3], %[temp4] \n\t" \
1002 @@ -152,7 +152,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
1003 "lbu %[temp2], 0(%[pred]) \n\t" \
1004 "addiu %[src], %[src], 1 \n\t" \
1005 "addiu %[pred], %[pred], 1 \n\t" \
1006 - ".if "#INVERSE" \n\t" \
1007 + ".if " #INVERSE " \n\t" \
1008 "addu %[temp3], %[temp1], %[temp2] \n\t" \
1010 "subu %[temp3], %[temp1], %[temp2] \n\t" \
1011 @@ -177,7 +177,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
1012 __asm__ volatile ( \
1013 "lbu %[temp1], 0(%[src]) \n\t" \
1014 "lbu %[temp2], 0(%[pred]) \n\t" \
1015 - ".if "#INVERSE" \n\t" \
1016 + ".if " #INVERSE " \n\t" \
1017 "addu %[temp3], %[temp1], %[temp2] \n\t" \
1019 "subu %[temp3], %[temp1], %[temp2] \n\t" \
1020 diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
1021 index 68fbe85..abe97c1 100644
1022 --- a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
1023 +++ b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
1024 @@ -285,28 +285,28 @@ static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
1025 // literal_ and successive histograms could be unaligned
1026 // so we must use ulw and usw
1027 #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \
1028 - "ulw %[temp0], "#A"(%["#P0"]) \n\t" \
1029 - "ulw %[temp1], "#B"(%["#P0"]) \n\t" \
1030 - "ulw %[temp2], "#C"(%["#P0"]) \n\t" \
1031 - "ulw %[temp3], "#D"(%["#P0"]) \n\t" \
1032 - "ulw %[temp4], "#A"(%["#P1"]) \n\t" \
1033 - "ulw %[temp5], "#B"(%["#P1"]) \n\t" \
1034 - "ulw %[temp6], "#C"(%["#P1"]) \n\t" \
1035 - "ulw %[temp7], "#D"(%["#P1"]) \n\t" \
1036 + "ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \
1037 + "ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \
1038 + "ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \
1039 + "ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \
1040 + "ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \
1041 + "ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \
1042 + "ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \
1043 + "ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \
1044 "addu %[temp4], %[temp4], %[temp0] \n\t" \
1045 "addu %[temp5], %[temp5], %[temp1] \n\t" \
1046 "addu %[temp6], %[temp6], %[temp2] \n\t" \
1047 "addu %[temp7], %[temp7], %[temp3] \n\t" \
1048 - "addiu %["#P0"], %["#P0"], 16 \n\t" \
1049 - ".if "#E" == 1 \n\t" \
1050 - "addiu %["#P1"], %["#P1"], 16 \n\t" \
1051 + "addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \
1052 + ".if " #E " == 1 \n\t" \
1053 + "addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \
1055 - "usw %[temp4], "#A"(%["#P2"]) \n\t" \
1056 - "usw %[temp5], "#B"(%["#P2"]) \n\t" \
1057 - "usw %[temp6], "#C"(%["#P2"]) \n\t" \
1058 - "usw %[temp7], "#D"(%["#P2"]) \n\t" \
1059 - "addiu %["#P2"], %["#P2"], 16 \n\t" \
1060 - "bne %["#P0"], %[LoopEnd], 1b \n\t" \
1061 + "usw %[temp4], " #A "(%[" #P2 "]) \n\t" \
1062 + "usw %[temp5], " #B "(%[" #P2 "]) \n\t" \
1063 + "usw %[temp6], " #C "(%[" #P2 "]) \n\t" \
1064 + "usw %[temp7], " #D "(%[" #P2 "]) \n\t" \
1065 + "addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \
1066 + "bne %[" #P0 "], %[LoopEnd], 1b \n\t" \
1069 #define ASM_END_COMMON_0 \
1070 diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
1071 index 821cda9..31ac181 100644
1072 --- a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
1073 +++ b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
1074 @@ -29,14 +29,14 @@ static void FUNC_NAME(const TYPE* src, \
1075 for (x = 0; x < (width >> 2); ++x) { \
1076 int tmp1, tmp2, tmp3, tmp4; \
1077 __asm__ volatile ( \
1078 - ".ifc "#TYPE", uint8_t \n\t" \
1079 + ".ifc " #TYPE ", uint8_t \n\t" \
1080 "lbu %[tmp1], 0(%[src]) \n\t" \
1081 "lbu %[tmp2], 1(%[src]) \n\t" \
1082 "lbu %[tmp3], 2(%[src]) \n\t" \
1083 "lbu %[tmp4], 3(%[src]) \n\t" \
1084 "addiu %[src], %[src], 4 \n\t" \
1086 - ".ifc "#TYPE", uint32_t \n\t" \
1087 + ".ifc " #TYPE ", uint32_t \n\t" \
1088 "lw %[tmp1], 0(%[src]) \n\t" \
1089 "lw %[tmp2], 4(%[src]) \n\t" \
1090 "lw %[tmp3], 8(%[src]) \n\t" \
1091 @@ -55,7 +55,7 @@ static void FUNC_NAME(const TYPE* src, \
1092 "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
1093 "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
1094 "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
1095 - ".ifc "#TYPE", uint8_t \n\t" \
1096 + ".ifc " #TYPE ", uint8_t \n\t" \
1097 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
1098 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
1099 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
1100 @@ -66,7 +66,7 @@ static void FUNC_NAME(const TYPE* src, \
1101 "sb %[tmp4], 3(%[dst]) \n\t" \
1102 "addiu %[dst], %[dst], 4 \n\t" \
1104 - ".ifc "#TYPE", uint32_t \n\t" \
1105 + ".ifc " #TYPE ", uint32_t \n\t" \
1106 "sw %[tmp1], 0(%[dst]) \n\t" \
1107 "sw %[tmp2], 4(%[dst]) \n\t" \
1108 "sw %[tmp3], 8(%[dst]) \n\t" \
1109 diff --git a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
1110 index a7864a0..cb3adfe 100644
1111 --- a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
1112 +++ b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
1114 G = G - t2 + kGCst; \
1116 __asm__ volatile ( \
1117 - "shll_s.w %["#R"], %["#R"], 9 \n\t" \
1118 - "shll_s.w %["#G"], %["#G"], 9 \n\t" \
1119 - "shll_s.w %["#B"], %["#B"], 9 \n\t" \
1120 - "precrqu_s.qb.ph %["#R"], %["#R"], $zero \n\t" \
1121 - "precrqu_s.qb.ph %["#G"], %["#G"], $zero \n\t" \
1122 - "precrqu_s.qb.ph %["#B"], %["#B"], $zero \n\t" \
1123 - "srl %["#R"], %["#R"], 24 \n\t" \
1124 - "srl %["#G"], %["#G"], 24 \n\t" \
1125 - "srl %["#B"], %["#B"], 24 \n\t" \
1126 + "shll_s.w %[" #R "], %[" #R "], 9 \n\t" \
1127 + "shll_s.w %[" #G "], %[" #G "], 9 \n\t" \
1128 + "shll_s.w %[" #B "], %[" #B "], 9 \n\t" \
1129 + "precrqu_s.qb.ph %[" #R "], %[" #R "], $zero \n\t" \
1130 + "precrqu_s.qb.ph %[" #G "], %[" #G "], $zero \n\t" \
1131 + "precrqu_s.qb.ph %[" #B "], %[" #B "], $zero \n\t" \
1132 + "srl %[" #R "], %[" #R "], 24 \n\t" \
1133 + "srl %[" #G "], %[" #G "], 24 \n\t" \
1134 + "srl %[" #B "], %[" #B "], 24 \n\t" \
1135 : [R]"+r"(R), [G]"+r"(G), [B]"+r"(B) \
1138 diff --git a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
1139 index 66adde5..51cbe9e 100644
1140 --- a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
1141 +++ b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
1143 "addu %[temp5], %[temp0], %[temp1] \n\t" \
1144 "subu %[temp6], %[temp0], %[temp2] \n\t" \
1145 "addu %[temp7], %[temp0], %[temp4] \n\t" \
1147 +".if " #K " \n\t" \
1148 "lbu %[temp0], 1(%[y]) \n\t" \
1150 "shll_s.w %[temp5], %[temp5], 9 \n\t" \
1151 "shll_s.w %[temp6], %[temp6], 9 \n\t" \
1153 +".if " #K " \n\t" \
1154 "mul %[temp0], %[t_con_5], %[temp0] \n\t" \
1156 "shll_s.w %[temp7], %[temp7], 9 \n\t" \
1158 "srl %[temp5], %[temp5], 24 \n\t" \
1159 "srl %[temp6], %[temp6], 24 \n\t" \
1160 "srl %[temp7], %[temp7], 24 \n\t" \
1161 - "sb %[temp5], "#R"(%[dst]) \n\t" \
1162 - "sb %[temp6], "#G"(%[dst]) \n\t" \
1163 - "sb %[temp7], "#B"(%[dst]) \n\t" \
1164 + "sb %[temp5], " #R "(%[dst]) \n\t" \
1165 + "sb %[temp6], " #G "(%[dst]) \n\t" \
1166 + "sb %[temp7], " #B "(%[dst]) \n\t" \
1168 #define ASM_CLOBBER_LIST() \
1169 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), \