clarify the purpose of this project
[nyanglibc.git] / crypt / sha512.s
blob85c2a8b5db7d728188867a9e1ad0a38bf9a1026d
1 .text
2 .p2align 4,,15
3 .globl __sha512_init_ctx
4 .type __sha512_init_ctx, @function
5 __sha512_init_ctx:
6 movabsq $7640891576956012808, %rax
7 movq $0, 72(%rdi)
8 movq $0, 64(%rdi)
9 movq %rax, (%rdi)
10 movabsq $-4942790177534073029, %rax
11 movq $0, 80(%rdi)
12 movq %rax, 8(%rdi)
13 movabsq $4354685564936845355, %rax
14 movq %rax, 16(%rdi)
15 movabsq $-6534734903238641935, %rax
16 movq %rax, 24(%rdi)
17 movabsq $5840696475078001361, %rax
18 movq %rax, 32(%rdi)
19 movabsq $-7276294671716946913, %rax
20 movq %rax, 40(%rdi)
21 movabsq $2270897969802886507, %rax
22 movq %rax, 48(%rdi)
23 movabsq $6620516959819538809, %rax
24 movq %rax, 56(%rdi)
25 ret
26 .size __sha512_init_ctx, .-__sha512_init_ctx
27 .p2align 4,,15
28 .globl __sha512_process_block
29 .type __sha512_process_block, @function
30 __sha512_process_block:
31 pushq %r15
32 pushq %r14
33 pushq %r13
34 pushq %r12
35 pushq %rbp
36 pushq %rbx
37 movq %rdx, %rbx
38 subq $624, %rsp
39 movq (%rdx), %rax
40 movq %rdi, -56(%rsp)
41 movq %rsi, %rdi
42 movq %rdx, -32(%rsp)
43 shrq $3, %rdi
44 movq %rax, -96(%rsp)
45 movq 8(%rdx), %rax
46 movq %rdi, -48(%rsp)
47 movq %rax, -88(%rsp)
48 movq 16(%rdx), %rax
49 movq %rax, -80(%rsp)
50 movq 24(%rdx), %rax
51 movq %rax, -104(%rsp)
52 movq 32(%rdx), %rax
53 movq %rax, -72(%rsp)
54 movq 40(%rdx), %rax
55 movq %rax, -112(%rsp)
56 movq 48(%rdx), %rax
57 movq %rax, -64(%rsp)
58 movq 56(%rdx), %rax
59 xorl %edx, %edx
60 addq %rsi, 64(%rbx)
61 adcq %rdx, 72(%rbx)
62 testq %rdi, %rdi
63 movq %rax, -120(%rsp)
64 je .L4
65 leaq -24(%rsp), %r14
66 leaq K(%rip), %r15
67 leaq 512(%r14), %rax
68 movq %rax, -40(%rsp)
69 .p2align 4,,10
70 .p2align 3
71 .L9:
72 movq -56(%rsp), %rcx
73 xorl %eax, %eax
74 .p2align 4,,10
75 .p2align 3
76 .L5:
77 movq (%rcx,%rax), %rdx
78 bswap %rdx
79 movq %rdx, (%r14,%rax)
80 addq $8, %rax
81 cmpq $128, %rax
82 jne .L5
83 subq $-128, -56(%rsp)
84 movq -40(%rsp), %rdi
85 movq %r14, %rsi
86 .p2align 4,,10
87 .p2align 3
88 .L6:
89 movq 112(%rsi), %rax
90 movq 8(%rsi), %rcx
91 addq $8, %rsi
92 movq %rax, %rdx
93 movq %rax, %r8
94 shrq $6, %rax
95 rolq $3, %r8
96 rorq $19, %rdx
97 xorq %r8, %rdx
98 movq %rcx, %r8
99 xorq %rax, %rdx
100 movq -8(%rsi), %rax
101 addq 64(%rsi), %rax
102 rorq $8, %r8
103 addq %rax, %rdx
104 movq %rcx, %rax
105 shrq $7, %rcx
106 rorq %rax
107 xorq %r8, %rax
108 xorq %rcx, %rax
109 addq %rdx, %rax
110 movq %rax, 120(%rsi)
111 cmpq %rsi, %rdi
112 jne .L6
113 movq -120(%rsp), %rax
114 movq -64(%rsp), %rbx
115 xorl %r9d, %r9d
116 movq -112(%rsp), %rbp
117 movq -72(%rsp), %rdi
118 movabsq $4794697086780616226, %r12
119 movq -104(%rsp), %r13
120 movq -80(%rsp), %r10
121 movq -88(%rsp), %r11
122 movq -96(%rsp), %r8
123 movq %rax, %rsi
124 jmp .L8
125 .p2align 4,,10
126 .p2align 3
127 .L17:
128 movq (%r15,%r9), %r12
129 movq %r10, %r13
130 movq %rbx, %rsi
131 movq %r11, %r10
132 movq %rbp, %rbx
133 movq %r8, %r11
134 movq %rdi, %rbp
135 movq %rax, %r8
136 movq %rcx, %rdi
137 .L8:
138 movq %rdi, %rax
139 movq %rdi, %rdx
140 movq %rdi, %rcx
141 rorq $18, %rdx
142 rorq $14, %rax
143 andq %rbp, %rcx
144 xorq %rdx, %rax
145 movq %rdi, %rdx
146 rolq $23, %rdx
147 xorq %rax, %rdx
148 movq %rdi, %rax
149 notq %rax
150 andq %rbx, %rax
151 xorq %rcx, %rax
152 movq %r8, %rcx
153 addq %rdx, %rax
154 movq %r8, %rdx
155 addq (%r14,%r9), %rax
156 rolq $30, %rcx
157 rorq $28, %rdx
158 addq $8, %r9
159 xorq %rcx, %rdx
160 movq %r8, %rcx
161 rolq $25, %rcx
162 xorq %rdx, %rcx
163 movq %r11, %rdx
164 addq %rsi, %rax
165 xorq %r10, %rdx
166 movq %r11, %rsi
167 addq %r12, %rax
168 andq %r8, %rdx
169 andq %r10, %rsi
170 xorq %rsi, %rdx
171 addq %rcx, %rdx
172 leaq (%rax,%r13), %rcx
173 addq %rdx, %rax
174 cmpq $640, %r9
175 jne .L17
176 addq %rax, -96(%rsp)
177 addq %r8, -88(%rsp)
178 addq %r11, -80(%rsp)
179 addq %r10, -104(%rsp)
180 addq %rcx, -72(%rsp)
181 addq %rdi, -112(%rsp)
182 addq %rbp, -64(%rsp)
183 addq %rbx, -120(%rsp)
184 subq $16, -48(%rsp)
185 jne .L9
186 .L4:
187 movq -32(%rsp), %rax
188 movq -96(%rsp), %rbx
189 movq %rbx, (%rax)
190 movq -88(%rsp), %rbx
191 movq %rbx, 8(%rax)
192 movq -80(%rsp), %rbx
193 movq %rbx, 16(%rax)
194 movq -104(%rsp), %rbx
195 movq %rbx, 24(%rax)
196 movq -72(%rsp), %rbx
197 movq %rbx, 32(%rax)
198 movq -112(%rsp), %rbx
199 movq %rbx, 40(%rax)
200 movq -64(%rsp), %rbx
201 movq %rbx, 48(%rax)
202 movq -120(%rsp), %rbx
203 movq %rbx, 56(%rax)
204 addq $624, %rsp
205 popq %rbx
206 popq %rbp
207 popq %r12
208 popq %r13
209 popq %r14
210 popq %r15
212 .size __sha512_process_block, .-__sha512_process_block
213 .p2align 4,,15
214 .globl __sha512_finish_ctx
215 .type __sha512_finish_ctx, @function
216 __sha512_finish_ctx:
217 pushq %r14
218 pushq %r13
219 pushq %r12
220 pushq %rbp
221 movq %rsi, %rbp
222 pushq %rbx
223 movq 80(%rdi), %rax
224 movq %rdi, %rbx
225 xorl %edi, %edi
226 addq %rax, 64(%rbx)
227 adcq %rdi, 72(%rbx)
228 cmpq $111, %rax
229 jbe .L19
230 movl $240, %edx
231 movl $256, %r12d
232 movl $30, %r13d
233 subq %rax, %rdx
234 movl $31, %r14d
235 .L20:
236 leaq 88(%rbx,%rax), %rdi
237 leaq fillbuf(%rip), %rsi
238 call memcpy@PLT
239 movq 64(%rbx), %rdx
240 leaq 88(%rbx), %rdi
241 movq %r12, %rsi
242 leaq 0(,%rdx,8), %rax
243 shrq $61, %rdx
244 bswap %rax
245 movq %rax, 88(%rbx,%r14,8)
246 movq 72(%rbx), %rax
247 salq $3, %rax
248 orq %rdx, %rax
249 movq %rbx, %rdx
250 bswap %rax
251 movq %rax, 88(%rbx,%r13,8)
252 call __sha512_process_block
253 xorl %eax, %eax
254 .p2align 4,,10
255 .p2align 3
256 .L21:
257 movq (%rbx,%rax), %rdx
258 bswap %rdx
259 movq %rdx, 0(%rbp,%rax)
260 addq $8, %rax
261 cmpq $64, %rax
262 jne .L21
263 popq %rbx
264 movq %rbp, %rax
265 popq %rbp
266 popq %r12
267 popq %r13
268 popq %r14
270 .p2align 4,,10
271 .p2align 3
272 .L19:
273 movl $112, %edx
274 movl $128, %r12d
275 movl $14, %r13d
276 subq %rax, %rdx
277 movl $15, %r14d
278 jmp .L20
279 .size __sha512_finish_ctx, .-__sha512_finish_ctx
280 .p2align 4,,15
281 .globl __sha512_process_bytes
282 .type __sha512_process_bytes, @function
283 __sha512_process_bytes:
284 pushq %r15
285 pushq %r14
286 pushq %r13
287 pushq %r12
288 movq %rdi, %r12
289 pushq %rbp
290 pushq %rbx
291 movq %rdx, %rbp
292 movq %rsi, %rbx
293 subq $8, %rsp
294 movq 80(%rdx), %r14
295 testq %r14, %r14
296 jne .L54
297 .L25:
298 cmpq $127, %rbx
299 ja .L55
300 .L31:
301 testq %rbx, %rbx
302 jne .L56
303 addq $8, %rsp
304 popq %rbx
305 popq %rbp
306 popq %r12
307 popq %r13
308 popq %r14
309 popq %r15
311 .p2align 4,,10
312 .p2align 3
313 .L56:
314 movq 80(%rbp), %r8
315 cmpl $8, %ebx
316 leaq 88(%rbp,%r8), %rdx
317 jnb .L33
318 testb $4, %bl
319 jne .L57
320 testl %ebx, %ebx
321 je .L34
322 movzbl (%r12), %eax
323 testb $2, %bl
324 movb %al, (%rdx)
325 jne .L58
326 .L34:
327 addq %r8, %rbx
328 cmpq $127, %rbx
329 ja .L59
330 .L37:
331 movq %rbx, 80(%rbp)
332 addq $8, %rsp
333 popq %rbx
334 popq %rbp
335 popq %r12
336 popq %r13
337 popq %r14
338 popq %r15
340 .p2align 4,,10
341 .p2align 3
342 .L55:
343 movq %rbx, %r13
344 movq %r12, %rdi
345 movq %rbp, %rdx
346 andq $-128, %r13
347 andl $127, %ebx
348 movq %r13, %rsi
349 addq %r13, %r12
350 call __sha512_process_block
351 jmp .L31
352 .p2align 4,,10
353 .p2align 3
354 .L54:
355 movl $256, %r13d
356 leaq 88(%rdx,%r14), %rdi
357 subq %r14, %r13
358 cmpq %rsi, %r13
359 cmova %rsi, %r13
360 movq %r12, %rsi
361 movq %r13, %rdx
362 call memcpy@PLT
363 movq 80(%rbp), %rsi
364 addq %r13, %rsi
365 cmpq $128, %rsi
366 movq %rsi, 80(%rbp)
367 ja .L60
368 .L26:
369 addq %r13, %r12
370 subq %r13, %rbx
371 jmp .L25
372 .p2align 4,,10
373 .p2align 3
374 .L33:
375 movq (%r12), %rax
376 leaq 8(%rdx), %rdi
377 movq %r12, %rsi
378 andq $-8, %rdi
379 movq %rax, (%rdx)
380 movl %ebx, %eax
381 movq -8(%r12,%rax), %rcx
382 movq %rcx, -8(%rdx,%rax)
383 subq %rdi, %rdx
384 leal (%rbx,%rdx), %ecx
385 addq %r8, %rbx
386 subq %rdx, %rsi
387 shrl $3, %ecx
388 cmpq $127, %rbx
389 rep movsq
390 jbe .L37
391 .L59:
392 leaq 88(%rbp), %r12
393 movq %rbp, %rdx
394 movl $128, %esi
395 addq $-128, %rbx
396 movq %r12, %rdi
397 call __sha512_process_block
398 leaq 216(%rbp), %rsi
399 movq %rbx, %rdx
400 movq %r12, %rdi
401 call memcpy@PLT
402 jmp .L37
403 .p2align 4,,10
404 .p2align 3
405 .L60:
406 leaq 88(%rbp), %r15
407 andq $-128, %rsi
408 movq %rbp, %rdx
409 addq %r13, %r14
410 movq %r15, %rdi
411 andq $-128, %r14
412 call __sha512_process_block
413 movq 80(%rbp), %rax
414 leaq 88(%rbp,%r14), %rsi
415 andl $127, %eax
416 cmpl $8, %eax
417 movq %rax, 80(%rbp)
418 jnb .L27
419 testb $4, %al
420 jne .L61
421 testl %eax, %eax
422 je .L26
423 movzbl (%rsi), %edx
424 testb $2, %al
425 movb %dl, 88(%rbp)
426 je .L26
427 movl %eax, %eax
428 movzwl -2(%rsi,%rax), %edx
429 movw %dx, -2(%r15,%rax)
430 jmp .L26
431 .p2align 4,,10
432 .p2align 3
433 .L27:
434 movq (%rsi), %rdx
435 leaq 96(%rbp), %rdi
436 andq $-8, %rdi
437 movq %rdx, 88(%rbp)
438 movl %eax, %edx
439 movq -8(%rsi,%rdx), %rcx
440 movq %rcx, -8(%r15,%rdx)
441 subq %rdi, %r15
442 leal (%rax,%r15), %ecx
443 subq %r15, %rsi
444 movl %ecx, %eax
445 shrl $3, %eax
446 movl %eax, %ecx
447 rep movsq
448 jmp .L26
449 .L61:
450 movl (%rsi), %edx
451 movl %eax, %eax
452 movl %edx, 88(%rbp)
453 movl -4(%rsi,%rax), %edx
454 movl %edx, -4(%r15,%rax)
455 jmp .L26
456 .p2align 4,,10
457 .p2align 3
458 .L57:
459 movl (%r12), %eax
460 movl %eax, (%rdx)
461 movl %ebx, %eax
462 movl -4(%r12,%rax), %ecx
463 movl %ecx, -4(%rdx,%rax)
464 jmp .L34
465 .L58:
466 movl %ebx, %eax
467 movzwl -2(%r12,%rax), %ecx
468 movw %cx, -2(%rdx,%rax)
469 jmp .L34
470 .size __sha512_process_bytes, .-__sha512_process_bytes
471 .section .rodata
472 .align 32
473 .type K, @object
474 .size K, 640
476 .quad 4794697086780616226
477 .quad 8158064640168781261
478 .quad -5349999486874862801
479 .quad -1606136188198331460
480 .quad 4131703408338449720
481 .quad 6480981068601479193
482 .quad -7908458776815382629
483 .quad -6116909921290321640
484 .quad -2880145864133508542
485 .quad 1334009975649890238
486 .quad 2608012711638119052
487 .quad 6128411473006802146
488 .quad 8268148722764581231
489 .quad -9160688886553864527
490 .quad -7215885187991268811
491 .quad -4495734319001033068
492 .quad -1973867731355612462
493 .quad -1171420211273849373
494 .quad 1135362057144423861
495 .quad 2597628984639134821
496 .quad 3308224258029322869
497 .quad 5365058923640841347
498 .quad 6679025012923562964
499 .quad 8573033837759648693
500 .quad -7476448914759557205
501 .quad -6327057829258317296
502 .quad -5763719355590565569
503 .quad -4658551843659510044
504 .quad -4116276920077217854
505 .quad -3051310485924567259
506 .quad 489312712824947311
507 .quad 1452737877330783856
508 .quad 2861767655752347644
509 .quad 3322285676063803686
510 .quad 5560940570517711597
511 .quad 5996557281743188959
512 .quad 7280758554555802590
513 .quad 8532644243296465576
514 .quad -9096487096722542874
515 .quad -7894198246740708037
516 .quad -6719396339535248540
517 .quad -6333637450476146687
518 .quad -4446306890439682159
519 .quad -4076793802049405392
520 .quad -3345356375505022440
521 .quad -2983346525034927856
522 .quad -860691631967231958
523 .quad 1182934255886127544
524 .quad 1847814050463011016
525 .quad 2177327727835720531
526 .quad 2830643537854262169
527 .quad 3796741975233480872
528 .quad 4115178125766777443
529 .quad 5681478168544905931
530 .quad 6601373596472566643
531 .quad 7507060721942968483
532 .quad 8399075790359081724
533 .quad 8693463985226723168
534 .quad -8878714635349349518
535 .quad -8302665154208450068
536 .quad -8016688836872298968
537 .quad -6606660893046293015
538 .quad -4685533653050689259
539 .quad -4147400797238176981
540 .quad -3880063495543823972
541 .quad -3348786107499101689
542 .quad -1523767162380948706
543 .quad -757361751448694408
544 .quad 500013540394364858
545 .quad 748580250866718886
546 .quad 1242879168328830382
547 .quad 1977374033974150939
548 .quad 2944078676154940804
549 .quad 3659926193048069267
550 .quad 4368137639120453308
551 .quad 4836135668995329356
552 .quad 5532061633213252278
553 .quad 6448918945643986474
554 .quad 6902733635092675308
555 .quad 7801388544844847127
556 .align 32
557 .type fillbuf, @object
558 .size fillbuf, 128
559 fillbuf:
560 .byte -128
561 .byte 0
562 .zero 126