2 * x86-64 optimized assembler MD5 implementation
4 * Author: Marc Bevand, 2004
6 * This code was placed in the public domain by the author. The original
7 * publication can be found at:
9 * https://www.zorinaq.com/papers/md5-amd64.html
12 * No modifications were made aside from changing the function and file names.
13 * The MD5_CTX structure as expected here (from OpenSSL) is binary compatible
14 * with the md_context used by rsync, for the fields accessed.
16 * Benchmarks (in MB/s) C ASM
17 * - Intel Atom D2700 302 334
18 * - Intel i7-7700hq 351 376
19 * - AMD ThreadRipper 2950x 728 784
21 * The original code was also incorporated into OpenSSL. It has since been
22 * modified there. Those changes have not been made here due to licensing
23 * incompatibilities. Benchmarks of those changes on the above CPUs did not
24 * show any significant difference in performance, though.
28 #include "md-defines.h"
30 #ifdef USE_MD5_ASM /* { */
33 #define md5_process_asm _md5_process_asm
39 .globl md5_process_asm
44 push %r13 # not really useful (r13 is unused)
48 # rdi = arg #1 (ctx, MD5_CTX pointer)
49 # rsi = arg #2 (ptr, data pointer)
50 # rdx = arg #3 (nbr, number of 16-word blocks to process)
51 mov %rdi, %rbp # rbp = ctx
52 shl $6, %rdx # rdx = nbr in bytes
53 lea (%rsi,%rdx), %rdi # rdi = end
54 mov 0*4(%rbp), %eax # eax = ctx->A
55 mov 1*4(%rbp), %ebx # ebx = ctx->B
56 mov 2*4(%rbp), %ecx # ecx = ctx->C
57 mov 3*4(%rbp), %edx # edx = ctx->D
65 cmp %rdi, %rsi # cmp end with ptr
66 je 1f # jmp if ptr == end
68 # BEGIN of loop over 16-word blocks
69 2: # save old values of A, B, C, D
74 mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
75 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
76 xor %ecx, %r11d /* y ^ ... */
77 lea -680876936(%eax,%r10d),%eax /* Const + dst + ... */
78 and %ebx, %r11d /* x & ... */
79 xor %edx, %r11d /* z ^ ... */
80 mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
81 add %r11d, %eax /* dst += ... */
82 rol $7, %eax /* dst <<< s */
83 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
84 add %ebx, %eax /* dst += x */
85 xor %ebx, %r11d /* y ^ ... */
86 lea -389564586(%edx,%r10d),%edx /* Const + dst + ... */
87 and %eax, %r11d /* x & ... */
88 xor %ecx, %r11d /* z ^ ... */
89 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
90 add %r11d, %edx /* dst += ... */
91 rol $12, %edx /* dst <<< s */
92 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
93 add %eax, %edx /* dst += x */
94 xor %eax, %r11d /* y ^ ... */
95 lea 606105819(%ecx,%r10d),%ecx /* Const + dst + ... */
96 and %edx, %r11d /* x & ... */
97 xor %ebx, %r11d /* z ^ ... */
98 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
99 add %r11d, %ecx /* dst += ... */
100 rol $17, %ecx /* dst <<< s */
101 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
102 add %edx, %ecx /* dst += x */
103 xor %edx, %r11d /* y ^ ... */
104 lea -1044525330(%ebx,%r10d),%ebx /* Const + dst + ... */
105 and %ecx, %r11d /* x & ... */
106 xor %eax, %r11d /* z ^ ... */
107 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
108 add %r11d, %ebx /* dst += ... */
109 rol $22, %ebx /* dst <<< s */
110 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
111 add %ecx, %ebx /* dst += x */
112 xor %ecx, %r11d /* y ^ ... */
113 lea -176418897(%eax,%r10d),%eax /* Const + dst + ... */
114 and %ebx, %r11d /* x & ... */
115 xor %edx, %r11d /* z ^ ... */
116 mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
117 add %r11d, %eax /* dst += ... */
118 rol $7, %eax /* dst <<< s */
119 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
120 add %ebx, %eax /* dst += x */
121 xor %ebx, %r11d /* y ^ ... */
122 lea 1200080426(%edx,%r10d),%edx /* Const + dst + ... */
123 and %eax, %r11d /* x & ... */
124 xor %ecx, %r11d /* z ^ ... */
125 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
126 add %r11d, %edx /* dst += ... */
127 rol $12, %edx /* dst <<< s */
128 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
129 add %eax, %edx /* dst += x */
130 xor %eax, %r11d /* y ^ ... */
131 lea -1473231341(%ecx,%r10d),%ecx /* Const + dst + ... */
132 and %edx, %r11d /* x & ... */
133 xor %ebx, %r11d /* z ^ ... */
134 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
135 add %r11d, %ecx /* dst += ... */
136 rol $17, %ecx /* dst <<< s */
137 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
138 add %edx, %ecx /* dst += x */
139 xor %edx, %r11d /* y ^ ... */
140 lea -45705983(%ebx,%r10d),%ebx /* Const + dst + ... */
141 and %ecx, %r11d /* x & ... */
142 xor %eax, %r11d /* z ^ ... */
143 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
144 add %r11d, %ebx /* dst += ... */
145 rol $22, %ebx /* dst <<< s */
146 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
147 add %ecx, %ebx /* dst += x */
148 xor %ecx, %r11d /* y ^ ... */
149 lea 1770035416(%eax,%r10d),%eax /* Const + dst + ... */
150 and %ebx, %r11d /* x & ... */
151 xor %edx, %r11d /* z ^ ... */
152 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
153 add %r11d, %eax /* dst += ... */
154 rol $7, %eax /* dst <<< s */
155 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
156 add %ebx, %eax /* dst += x */
157 xor %ebx, %r11d /* y ^ ... */
158 lea -1958414417(%edx,%r10d),%edx /* Const + dst + ... */
159 and %eax, %r11d /* x & ... */
160 xor %ecx, %r11d /* z ^ ... */
161 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
162 add %r11d, %edx /* dst += ... */
163 rol $12, %edx /* dst <<< s */
164 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
165 add %eax, %edx /* dst += x */
166 xor %eax, %r11d /* y ^ ... */
167 lea -42063(%ecx,%r10d),%ecx /* Const + dst + ... */
168 and %edx, %r11d /* x & ... */
169 xor %ebx, %r11d /* z ^ ... */
170 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
171 add %r11d, %ecx /* dst += ... */
172 rol $17, %ecx /* dst <<< s */
173 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
174 add %edx, %ecx /* dst += x */
175 xor %edx, %r11d /* y ^ ... */
176 lea -1990404162(%ebx,%r10d),%ebx /* Const + dst + ... */
177 and %ecx, %r11d /* x & ... */
178 xor %eax, %r11d /* z ^ ... */
179 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
180 add %r11d, %ebx /* dst += ... */
181 rol $22, %ebx /* dst <<< s */
182 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
183 add %ecx, %ebx /* dst += x */
184 xor %ecx, %r11d /* y ^ ... */
185 lea 1804603682(%eax,%r10d),%eax /* Const + dst + ... */
186 and %ebx, %r11d /* x & ... */
187 xor %edx, %r11d /* z ^ ... */
188 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
189 add %r11d, %eax /* dst += ... */
190 rol $7, %eax /* dst <<< s */
191 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
192 add %ebx, %eax /* dst += x */
193 xor %ebx, %r11d /* y ^ ... */
194 lea -40341101(%edx,%r10d),%edx /* Const + dst + ... */
195 and %eax, %r11d /* x & ... */
196 xor %ecx, %r11d /* z ^ ... */
197 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
198 add %r11d, %edx /* dst += ... */
199 rol $12, %edx /* dst <<< s */
200 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
201 add %eax, %edx /* dst += x */
202 xor %eax, %r11d /* y ^ ... */
203 lea -1502002290(%ecx,%r10d),%ecx /* Const + dst + ... */
204 and %edx, %r11d /* x & ... */
205 xor %ebx, %r11d /* z ^ ... */
206 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
207 add %r11d, %ecx /* dst += ... */
208 rol $17, %ecx /* dst <<< s */
209 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
210 add %edx, %ecx /* dst += x */
211 xor %edx, %r11d /* y ^ ... */
212 lea 1236535329(%ebx,%r10d),%ebx /* Const + dst + ... */
213 and %ecx, %r11d /* x & ... */
214 xor %eax, %r11d /* z ^ ... */
215 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
216 add %r11d, %ebx /* dst += ... */
217 rol $22, %ebx /* dst <<< s */
218 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
219 add %ecx, %ebx /* dst += x */
220 mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */
221 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
222 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
223 not %r11d /* not z */
224 lea -165796510(%eax,%r10d),%eax /* Const + dst + ... */
225 and %ebx, %r12d /* x & z */
226 and %ecx, %r11d /* y & (not z) */
227 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
228 or %r11d, %r12d /* (y & (not z)) | (x & z) */
229 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
230 add %r12d, %eax /* dst += ... */
231 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
232 rol $5, %eax /* dst <<< s */
233 add %ebx, %eax /* dst += x */
234 not %r11d /* not z */
235 lea -1069501632(%edx,%r10d),%edx /* Const + dst + ... */
236 and %eax, %r12d /* x & z */
237 and %ebx, %r11d /* y & (not z) */
238 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
239 or %r11d, %r12d /* (y & (not z)) | (x & z) */
240 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
241 add %r12d, %edx /* dst += ... */
242 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
243 rol $9, %edx /* dst <<< s */
244 add %eax, %edx /* dst += x */
245 not %r11d /* not z */
246 lea 643717713(%ecx,%r10d),%ecx /* Const + dst + ... */
247 and %edx, %r12d /* x & z */
248 and %eax, %r11d /* y & (not z) */
249 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
250 or %r11d, %r12d /* (y & (not z)) | (x & z) */
251 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
252 add %r12d, %ecx /* dst += ... */
253 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
254 rol $14, %ecx /* dst <<< s */
255 add %edx, %ecx /* dst += x */
256 not %r11d /* not z */
257 lea -373897302(%ebx,%r10d),%ebx /* Const + dst + ... */
258 and %ecx, %r12d /* x & z */
259 and %edx, %r11d /* y & (not z) */
260 mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
261 or %r11d, %r12d /* (y & (not z)) | (x & z) */
262 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
263 add %r12d, %ebx /* dst += ... */
264 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
265 rol $20, %ebx /* dst <<< s */
266 add %ecx, %ebx /* dst += x */
267 not %r11d /* not z */
268 lea -701558691(%eax,%r10d),%eax /* Const + dst + ... */
269 and %ebx, %r12d /* x & z */
270 and %ecx, %r11d /* y & (not z) */
271 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
272 or %r11d, %r12d /* (y & (not z)) | (x & z) */
273 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
274 add %r12d, %eax /* dst += ... */
275 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
276 rol $5, %eax /* dst <<< s */
277 add %ebx, %eax /* dst += x */
278 not %r11d /* not z */
279 lea 38016083(%edx,%r10d),%edx /* Const + dst + ... */
280 and %eax, %r12d /* x & z */
281 and %ebx, %r11d /* y & (not z) */
282 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
283 or %r11d, %r12d /* (y & (not z)) | (x & z) */
284 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
285 add %r12d, %edx /* dst += ... */
286 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
287 rol $9, %edx /* dst <<< s */
288 add %eax, %edx /* dst += x */
289 not %r11d /* not z */
290 lea -660478335(%ecx,%r10d),%ecx /* Const + dst + ... */
291 and %edx, %r12d /* x & z */
292 and %eax, %r11d /* y & (not z) */
293 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
294 or %r11d, %r12d /* (y & (not z)) | (x & z) */
295 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
296 add %r12d, %ecx /* dst += ... */
297 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
298 rol $14, %ecx /* dst <<< s */
299 add %edx, %ecx /* dst += x */
300 not %r11d /* not z */
301 lea -405537848(%ebx,%r10d),%ebx /* Const + dst + ... */
302 and %ecx, %r12d /* x & z */
303 and %edx, %r11d /* y & (not z) */
304 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
305 or %r11d, %r12d /* (y & (not z)) | (x & z) */
306 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
307 add %r12d, %ebx /* dst += ... */
308 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
309 rol $20, %ebx /* dst <<< s */
310 add %ecx, %ebx /* dst += x */
311 not %r11d /* not z */
312 lea 568446438(%eax,%r10d),%eax /* Const + dst + ... */
313 and %ebx, %r12d /* x & z */
314 and %ecx, %r11d /* y & (not z) */
315 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
316 or %r11d, %r12d /* (y & (not z)) | (x & z) */
317 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
318 add %r12d, %eax /* dst += ... */
319 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
320 rol $5, %eax /* dst <<< s */
321 add %ebx, %eax /* dst += x */
322 not %r11d /* not z */
323 lea -1019803690(%edx,%r10d),%edx /* Const + dst + ... */
324 and %eax, %r12d /* x & z */
325 and %ebx, %r11d /* y & (not z) */
326 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
327 or %r11d, %r12d /* (y & (not z)) | (x & z) */
328 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
329 add %r12d, %edx /* dst += ... */
330 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
331 rol $9, %edx /* dst <<< s */
332 add %eax, %edx /* dst += x */
333 not %r11d /* not z */
334 lea -187363961(%ecx,%r10d),%ecx /* Const + dst + ... */
335 and %edx, %r12d /* x & z */
336 and %eax, %r11d /* y & (not z) */
337 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
338 or %r11d, %r12d /* (y & (not z)) | (x & z) */
339 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
340 add %r12d, %ecx /* dst += ... */
341 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
342 rol $14, %ecx /* dst <<< s */
343 add %edx, %ecx /* dst += x */
344 not %r11d /* not z */
345 lea 1163531501(%ebx,%r10d),%ebx /* Const + dst + ... */
346 and %ecx, %r12d /* x & z */
347 and %edx, %r11d /* y & (not z) */
348 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
349 or %r11d, %r12d /* (y & (not z)) | (x & z) */
350 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
351 add %r12d, %ebx /* dst += ... */
352 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
353 rol $20, %ebx /* dst <<< s */
354 add %ecx, %ebx /* dst += x */
355 not %r11d /* not z */
356 lea -1444681467(%eax,%r10d),%eax /* Const + dst + ... */
357 and %ebx, %r12d /* x & z */
358 and %ecx, %r11d /* y & (not z) */
359 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
360 or %r11d, %r12d /* (y & (not z)) | (x & z) */
361 mov %ecx, %r11d /* (NEXT STEP) z' = %ecx */
362 add %r12d, %eax /* dst += ... */
363 mov %ecx, %r12d /* (NEXT STEP) z' = %ecx */
364 rol $5, %eax /* dst <<< s */
365 add %ebx, %eax /* dst += x */
366 not %r11d /* not z */
367 lea -51403784(%edx,%r10d),%edx /* Const + dst + ... */
368 and %eax, %r12d /* x & z */
369 and %ebx, %r11d /* y & (not z) */
370 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
371 or %r11d, %r12d /* (y & (not z)) | (x & z) */
372 mov %ebx, %r11d /* (NEXT STEP) z' = %ebx */
373 add %r12d, %edx /* dst += ... */
374 mov %ebx, %r12d /* (NEXT STEP) z' = %ebx */
375 rol $9, %edx /* dst <<< s */
376 add %eax, %edx /* dst += x */
377 not %r11d /* not z */
378 lea 1735328473(%ecx,%r10d),%ecx /* Const + dst + ... */
379 and %edx, %r12d /* x & z */
380 and %eax, %r11d /* y & (not z) */
381 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
382 or %r11d, %r12d /* (y & (not z)) | (x & z) */
383 mov %eax, %r11d /* (NEXT STEP) z' = %eax */
384 add %r12d, %ecx /* dst += ... */
385 mov %eax, %r12d /* (NEXT STEP) z' = %eax */
386 rol $14, %ecx /* dst <<< s */
387 add %edx, %ecx /* dst += x */
388 not %r11d /* not z */
389 lea -1926607734(%ebx,%r10d),%ebx /* Const + dst + ... */
390 and %ecx, %r12d /* x & z */
391 and %edx, %r11d /* y & (not z) */
392 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
393 or %r11d, %r12d /* (y & (not z)) | (x & z) */
394 mov %edx, %r11d /* (NEXT STEP) z' = %edx */
395 add %r12d, %ebx /* dst += ... */
396 mov %edx, %r12d /* (NEXT STEP) z' = %edx */
397 rol $20, %ebx /* dst <<< s */
398 add %ecx, %ebx /* dst += x */
399 mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */
400 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
401 lea -378558(%eax,%r10d),%eax /* Const + dst + ... */
402 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
403 xor %edx, %r11d /* z ^ ... */
404 xor %ebx, %r11d /* x ^ ... */
405 add %r11d, %eax /* dst += ... */
406 rol $4, %eax /* dst <<< s */
407 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
408 add %ebx, %eax /* dst += x */
409 lea -2022574463(%edx,%r10d),%edx /* Const + dst + ... */
410 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
411 xor %ecx, %r11d /* z ^ ... */
412 xor %eax, %r11d /* x ^ ... */
413 add %r11d, %edx /* dst += ... */
414 rol $11, %edx /* dst <<< s */
415 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
416 add %eax, %edx /* dst += x */
417 lea 1839030562(%ecx,%r10d),%ecx /* Const + dst + ... */
418 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
419 xor %ebx, %r11d /* z ^ ... */
420 xor %edx, %r11d /* x ^ ... */
421 add %r11d, %ecx /* dst += ... */
422 rol $16, %ecx /* dst <<< s */
423 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
424 add %edx, %ecx /* dst += x */
425 lea -35309556(%ebx,%r10d),%ebx /* Const + dst + ... */
426 mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
427 xor %eax, %r11d /* z ^ ... */
428 xor %ecx, %r11d /* x ^ ... */
429 add %r11d, %ebx /* dst += ... */
430 rol $23, %ebx /* dst <<< s */
431 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
432 add %ecx, %ebx /* dst += x */
433 lea -1530992060(%eax,%r10d),%eax /* Const + dst + ... */
434 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
435 xor %edx, %r11d /* z ^ ... */
436 xor %ebx, %r11d /* x ^ ... */
437 add %r11d, %eax /* dst += ... */
438 rol $4, %eax /* dst <<< s */
439 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
440 add %ebx, %eax /* dst += x */
441 lea 1272893353(%edx,%r10d),%edx /* Const + dst + ... */
442 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
443 xor %ecx, %r11d /* z ^ ... */
444 xor %eax, %r11d /* x ^ ... */
445 add %r11d, %edx /* dst += ... */
446 rol $11, %edx /* dst <<< s */
447 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
448 add %eax, %edx /* dst += x */
449 lea -155497632(%ecx,%r10d),%ecx /* Const + dst + ... */
450 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
451 xor %ebx, %r11d /* z ^ ... */
452 xor %edx, %r11d /* x ^ ... */
453 add %r11d, %ecx /* dst += ... */
454 rol $16, %ecx /* dst <<< s */
455 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
456 add %edx, %ecx /* dst += x */
457 lea -1094730640(%ebx,%r10d),%ebx /* Const + dst + ... */
458 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
459 xor %eax, %r11d /* z ^ ... */
460 xor %ecx, %r11d /* x ^ ... */
461 add %r11d, %ebx /* dst += ... */
462 rol $23, %ebx /* dst <<< s */
463 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
464 add %ecx, %ebx /* dst += x */
465 lea 681279174(%eax,%r10d),%eax /* Const + dst + ... */
466 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
467 xor %edx, %r11d /* z ^ ... */
468 xor %ebx, %r11d /* x ^ ... */
469 add %r11d, %eax /* dst += ... */
470 rol $4, %eax /* dst <<< s */
471 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
472 add %ebx, %eax /* dst += x */
473 lea -358537222(%edx,%r10d),%edx /* Const + dst + ... */
474 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
475 xor %ecx, %r11d /* z ^ ... */
476 xor %eax, %r11d /* x ^ ... */
477 add %r11d, %edx /* dst += ... */
478 rol $11, %edx /* dst <<< s */
479 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
480 add %eax, %edx /* dst += x */
481 lea -722521979(%ecx,%r10d),%ecx /* Const + dst + ... */
482 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
483 xor %ebx, %r11d /* z ^ ... */
484 xor %edx, %r11d /* x ^ ... */
485 add %r11d, %ecx /* dst += ... */
486 rol $16, %ecx /* dst <<< s */
487 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
488 add %edx, %ecx /* dst += x */
489 lea 76029189(%ebx,%r10d),%ebx /* Const + dst + ... */
490 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
491 xor %eax, %r11d /* z ^ ... */
492 xor %ecx, %r11d /* x ^ ... */
493 add %r11d, %ebx /* dst += ... */
494 rol $23, %ebx /* dst <<< s */
495 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
496 add %ecx, %ebx /* dst += x */
497 lea -640364487(%eax,%r10d),%eax /* Const + dst + ... */
498 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
499 xor %edx, %r11d /* z ^ ... */
500 xor %ebx, %r11d /* x ^ ... */
501 add %r11d, %eax /* dst += ... */
502 rol $4, %eax /* dst <<< s */
503 mov %ebx, %r11d /* (NEXT STEP) y' = %ebx */
504 add %ebx, %eax /* dst += x */
505 lea -421815835(%edx,%r10d),%edx /* Const + dst + ... */
506 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
507 xor %ecx, %r11d /* z ^ ... */
508 xor %eax, %r11d /* x ^ ... */
509 add %r11d, %edx /* dst += ... */
510 rol $11, %edx /* dst <<< s */
511 mov %eax, %r11d /* (NEXT STEP) y' = %eax */
512 add %eax, %edx /* dst += x */
513 lea 530742520(%ecx,%r10d),%ecx /* Const + dst + ... */
514 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
515 xor %ebx, %r11d /* z ^ ... */
516 xor %edx, %r11d /* x ^ ... */
517 add %r11d, %ecx /* dst += ... */
518 rol $16, %ecx /* dst <<< s */
519 mov %edx, %r11d /* (NEXT STEP) y' = %edx */
520 add %edx, %ecx /* dst += x */
521 lea -995338651(%ebx,%r10d),%ebx /* Const + dst + ... */
522 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
523 xor %eax, %r11d /* z ^ ... */
524 xor %ecx, %r11d /* x ^ ... */
525 add %r11d, %ebx /* dst += ... */
526 rol $23, %ebx /* dst <<< s */
527 mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */
528 add %ecx, %ebx /* dst += x */
529 mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */
530 mov $0xffffffff, %r11d
531 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/
532 lea -198630844(%eax,%r10d),%eax /* Const + dst + ... */
533 or %ebx, %r11d /* x | ... */
534 xor %ecx, %r11d /* y ^ ... */
535 add %r11d, %eax /* dst += ... */
536 mov 7*4(%rsi),%r10d /* (NEXT STEP) X[7] */
537 mov $0xffffffff, %r11d
538 rol $6, %eax /* dst <<< s */
539 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
540 add %ebx, %eax /* dst += x */
541 lea 1126891415(%edx,%r10d),%edx /* Const + dst + ... */
542 or %eax, %r11d /* x | ... */
543 xor %ebx, %r11d /* y ^ ... */
544 add %r11d, %edx /* dst += ... */
545 mov 14*4(%rsi),%r10d /* (NEXT STEP) X[14] */
546 mov $0xffffffff, %r11d
547 rol $10, %edx /* dst <<< s */
548 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
549 add %eax, %edx /* dst += x */
550 lea -1416354905(%ecx,%r10d),%ecx /* Const + dst + ... */
551 or %edx, %r11d /* x | ... */
552 xor %eax, %r11d /* y ^ ... */
553 add %r11d, %ecx /* dst += ... */
554 mov 5*4(%rsi),%r10d /* (NEXT STEP) X[5] */
555 mov $0xffffffff, %r11d
556 rol $15, %ecx /* dst <<< s */
557 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
558 add %edx, %ecx /* dst += x */
559 lea -57434055(%ebx,%r10d),%ebx /* Const + dst + ... */
560 or %ecx, %r11d /* x | ... */
561 xor %edx, %r11d /* y ^ ... */
562 add %r11d, %ebx /* dst += ... */
563 mov 12*4(%rsi),%r10d /* (NEXT STEP) X[12] */
564 mov $0xffffffff, %r11d
565 rol $21, %ebx /* dst <<< s */
566 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
567 add %ecx, %ebx /* dst += x */
568 lea 1700485571(%eax,%r10d),%eax /* Const + dst + ... */
569 or %ebx, %r11d /* x | ... */
570 xor %ecx, %r11d /* y ^ ... */
571 add %r11d, %eax /* dst += ... */
572 mov 3*4(%rsi),%r10d /* (NEXT STEP) X[3] */
573 mov $0xffffffff, %r11d
574 rol $6, %eax /* dst <<< s */
575 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
576 add %ebx, %eax /* dst += x */
577 lea -1894986606(%edx,%r10d),%edx /* Const + dst + ... */
578 or %eax, %r11d /* x | ... */
579 xor %ebx, %r11d /* y ^ ... */
580 add %r11d, %edx /* dst += ... */
581 mov 10*4(%rsi),%r10d /* (NEXT STEP) X[10] */
582 mov $0xffffffff, %r11d
583 rol $10, %edx /* dst <<< s */
584 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
585 add %eax, %edx /* dst += x */
586 lea -1051523(%ecx,%r10d),%ecx /* Const + dst + ... */
587 or %edx, %r11d /* x | ... */
588 xor %eax, %r11d /* y ^ ... */
589 add %r11d, %ecx /* dst += ... */
590 mov 1*4(%rsi),%r10d /* (NEXT STEP) X[1] */
591 mov $0xffffffff, %r11d
592 rol $15, %ecx /* dst <<< s */
593 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
594 add %edx, %ecx /* dst += x */
595 lea -2054922799(%ebx,%r10d),%ebx /* Const + dst + ... */
596 or %ecx, %r11d /* x | ... */
597 xor %edx, %r11d /* y ^ ... */
598 add %r11d, %ebx /* dst += ... */
599 mov 8*4(%rsi),%r10d /* (NEXT STEP) X[8] */
600 mov $0xffffffff, %r11d
601 rol $21, %ebx /* dst <<< s */
602 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
603 add %ecx, %ebx /* dst += x */
604 lea 1873313359(%eax,%r10d),%eax /* Const + dst + ... */
605 or %ebx, %r11d /* x | ... */
606 xor %ecx, %r11d /* y ^ ... */
607 add %r11d, %eax /* dst += ... */
608 mov 15*4(%rsi),%r10d /* (NEXT STEP) X[15] */
609 mov $0xffffffff, %r11d
610 rol $6, %eax /* dst <<< s */
611 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
612 add %ebx, %eax /* dst += x */
613 lea -30611744(%edx,%r10d),%edx /* Const + dst + ... */
614 or %eax, %r11d /* x | ... */
615 xor %ebx, %r11d /* y ^ ... */
616 add %r11d, %edx /* dst += ... */
617 mov 6*4(%rsi),%r10d /* (NEXT STEP) X[6] */
618 mov $0xffffffff, %r11d
619 rol $10, %edx /* dst <<< s */
620 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
621 add %eax, %edx /* dst += x */
622 lea -1560198380(%ecx,%r10d),%ecx /* Const + dst + ... */
623 or %edx, %r11d /* x | ... */
624 xor %eax, %r11d /* y ^ ... */
625 add %r11d, %ecx /* dst += ... */
626 mov 13*4(%rsi),%r10d /* (NEXT STEP) X[13] */
627 mov $0xffffffff, %r11d
628 rol $15, %ecx /* dst <<< s */
629 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
630 add %edx, %ecx /* dst += x */
631 lea 1309151649(%ebx,%r10d),%ebx /* Const + dst + ... */
632 or %ecx, %r11d /* x | ... */
633 xor %edx, %r11d /* y ^ ... */
634 add %r11d, %ebx /* dst += ... */
635 mov 4*4(%rsi),%r10d /* (NEXT STEP) X[4] */
636 mov $0xffffffff, %r11d
637 rol $21, %ebx /* dst <<< s */
638 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
639 add %ecx, %ebx /* dst += x */
640 lea -145523070(%eax,%r10d),%eax /* Const + dst + ... */
641 or %ebx, %r11d /* x | ... */
642 xor %ecx, %r11d /* y ^ ... */
643 add %r11d, %eax /* dst += ... */
644 mov 11*4(%rsi),%r10d /* (NEXT STEP) X[11] */
645 mov $0xffffffff, %r11d
646 rol $6, %eax /* dst <<< s */
647 xor %ecx, %r11d /* (NEXT STEP) not z' = not %ecx */
648 add %ebx, %eax /* dst += x */
649 lea -1120210379(%edx,%r10d),%edx /* Const + dst + ... */
650 or %eax, %r11d /* x | ... */
651 xor %ebx, %r11d /* y ^ ... */
652 add %r11d, %edx /* dst += ... */
653 mov 2*4(%rsi),%r10d /* (NEXT STEP) X[2] */
654 mov $0xffffffff, %r11d
655 rol $10, %edx /* dst <<< s */
656 xor %ebx, %r11d /* (NEXT STEP) not z' = not %ebx */
657 add %eax, %edx /* dst += x */
658 lea 718787259(%ecx,%r10d),%ecx /* Const + dst + ... */
659 or %edx, %r11d /* x | ... */
660 xor %eax, %r11d /* y ^ ... */
661 add %r11d, %ecx /* dst += ... */
662 mov 9*4(%rsi),%r10d /* (NEXT STEP) X[9] */
663 mov $0xffffffff, %r11d
664 rol $15, %ecx /* dst <<< s */
665 xor %eax, %r11d /* (NEXT STEP) not z' = not %eax */
666 add %edx, %ecx /* dst += x */
667 lea -343485551(%ebx,%r10d),%ebx /* Const + dst + ... */
668 or %ecx, %r11d /* x | ... */
669 xor %edx, %r11d /* y ^ ... */
670 add %r11d, %ebx /* dst += ... */
671 mov 0*4(%rsi),%r10d /* (NEXT STEP) X[0] */
672 mov $0xffffffff, %r11d
673 rol $21, %ebx /* dst <<< s */
674 xor %edx, %r11d /* (NEXT STEP) not z' = not %edx */
675 add %ecx, %ebx /* dst += x */
676 # add old values of A, B, C, D
683 add $64, %rsi # ptr += 64
684 cmp %rdi, %rsi # cmp end with ptr
685 jb 2b # jmp if ptr < end
686 # END of loop over 16-word blocks
688 mov %eax, 0*4(%rbp) # ctx->A = A
689 mov %ebx, 1*4(%rbp) # ctx->B = B
690 mov %ecx, 2*4(%rbp) # ctx->C = C
691 mov %edx, 3*4(%rbp) # ctx->D = D
695 pop %r13 # not really useful (r13 is unused)
701 #endif /* } USE_MD5_ASM */