some tweaks
[mkp224o.git] / ed25519 / amd64-51-30k / heap_rootreplaced_3limbs.s
blobe5aeda95d622db25f2bdf0c2e5064a8fd065ead1
2 # qhasm: int64 hp
4 # qhasm: int64 hlen
6 # qhasm: int64 sp
8 # qhasm: int64 pp
10 # qhasm: input hp
12 # qhasm: input hlen
14 # qhasm: input sp
16 # qhasm: int64 prc
18 # qhasm: int64 plc
20 # qhasm: int64 pc
22 # qhasm: int64 d
24 # qhasm: int64 spp
26 # qhasm: int64 sprc
28 # qhasm: int64 spc
30 # qhasm: int64 c0
32 # qhasm: int64 c1
34 # qhasm: int64 c2
36 # qhasm: int64 c3
38 # qhasm: int64 t0
40 # qhasm: int64 t1
42 # qhasm: int64 t2
44 # qhasm: int64 t3
46 # qhasm: int64 p0
48 # qhasm: int64 p1
50 # qhasm: int64 p2
52 # qhasm: int64 p3
54 # qhasm: int64 caller1
56 # qhasm: int64 caller2
58 # qhasm: int64 caller3
60 # qhasm: int64 caller4
62 # qhasm: int64 caller5
64 # qhasm: int64 caller6
66 # qhasm: int64 caller7
68 # qhasm: caller caller1
70 # qhasm: caller caller2
72 # qhasm: caller caller3
74 # qhasm: caller caller4
76 # qhasm: caller caller5
78 # qhasm: caller caller6
80 # qhasm: caller caller7
82 # qhasm: stack64 caller1_stack
84 # qhasm: stack64 caller2_stack
86 # qhasm: stack64 caller3_stack
88 # qhasm: stack64 caller4_stack
90 # qhasm: stack64 caller5_stack
92 # qhasm: stack64 caller6_stack
94 # qhasm: stack64 caller7_stack
96 # qhasm: enter crypto_sign_ed25519_amd64_51_30k_batch_heap_rootreplaced_3limbs
97 .text
98 .p2align 5
99 .globl _crypto_sign_ed25519_amd64_51_30k_batch_heap_rootreplaced_3limbs
100 .globl crypto_sign_ed25519_amd64_51_30k_batch_heap_rootreplaced_3limbs
101 _crypto_sign_ed25519_amd64_51_30k_batch_heap_rootreplaced_3limbs:
102 crypto_sign_ed25519_amd64_51_30k_batch_heap_rootreplaced_3limbs:
103 mov %rsp,%r11
104 and $31,%r11
105 add $64,%r11
106 sub %r11,%rsp
108 # qhasm: caller1_stack = caller1
109 # asm 1: movq <caller1=int64#9,>caller1_stack=stack64#1
110 # asm 2: movq <caller1=%r11,>caller1_stack=0(%rsp)
111 movq %r11,0(%rsp)
113 # qhasm: caller2_stack = caller2
114 # asm 1: movq <caller2=int64#10,>caller2_stack=stack64#2
115 # asm 2: movq <caller2=%r12,>caller2_stack=8(%rsp)
116 movq %r12,8(%rsp)
118 # qhasm: caller3_stack = caller3
119 # asm 1: movq <caller3=int64#11,>caller3_stack=stack64#3
120 # asm 2: movq <caller3=%r13,>caller3_stack=16(%rsp)
121 movq %r13,16(%rsp)
123 # qhasm: caller4_stack = caller4
124 # asm 1: movq <caller4=int64#12,>caller4_stack=stack64#4
125 # asm 2: movq <caller4=%r14,>caller4_stack=24(%rsp)
126 movq %r14,24(%rsp)
128 # qhasm: caller5_stack = caller5
129 # asm 1: movq <caller5=int64#13,>caller5_stack=stack64#5
130 # asm 2: movq <caller5=%r15,>caller5_stack=32(%rsp)
131 movq %r15,32(%rsp)
133 # qhasm: caller6_stack = caller6
134 # asm 1: movq <caller6=int64#14,>caller6_stack=stack64#6
135 # asm 2: movq <caller6=%rbx,>caller6_stack=40(%rsp)
136 movq %rbx,40(%rsp)
138 # qhasm: caller7_stack = caller7
139 # asm 1: movq <caller7=int64#15,>caller7_stack=stack64#7
140 # asm 2: movq <caller7=%rbp,>caller7_stack=48(%rsp)
141 movq %rbp,48(%rsp)
143 # qhasm: pp = 0
144 # asm 1: mov $0,>pp=int64#4
145 # asm 2: mov $0,>pp=%rcx
146 mov $0,%rcx
148 # qhasm: siftdownloop:
149 ._siftdownloop:
151 # qhasm: prc = pp
152 # asm 1: mov <pp=int64#4,>prc=int64#5
153 # asm 2: mov <pp=%rcx,>prc=%r8
154 mov %rcx,%r8
156 # qhasm: prc *= 2
157 # asm 1: imulq $2,<prc=int64#5,>prc=int64#5
158 # asm 2: imulq $2,<prc=%r8,>prc=%r8
159 imulq $2,%r8,%r8
161 # qhasm: pc = prc
162 # asm 1: mov <prc=int64#5,>pc=int64#6
163 # asm 2: mov <prc=%r8,>pc=%r9
164 mov %r8,%r9
166 # qhasm: prc += 2
167 # asm 1: add $2,<prc=int64#5
168 # asm 2: add $2,<prc=%r8
169 add $2,%r8
171 # qhasm: pc += 1
172 # asm 1: add $1,<pc=int64#6
173 # asm 2: add $1,<pc=%r9
174 add $1,%r9
176 # qhasm: unsigned>? hlen - prc
177 # asm 1: cmp <prc=int64#5,<hlen=int64#2
178 # asm 2: cmp <prc=%r8,<hlen=%rsi
179 cmp %r8,%rsi
180 # comment:fp stack unchanged by jump
182 # qhasm: goto siftuploop if !unsigned>
183 jbe ._siftuploop
185 # qhasm: sprc = *(uint64 *)(hp + prc * 8)
186 # asm 1: movq (<hp=int64#1,<prc=int64#5,8),>sprc=int64#7
187 # asm 2: movq (<hp=%rdi,<prc=%r8,8),>sprc=%rax
188 movq (%rdi,%r8,8),%rax
190 # qhasm: sprc <<= 5
191 # asm 1: shl $5,<sprc=int64#7
192 # asm 2: shl $5,<sprc=%rax
193 shl $5,%rax
195 # qhasm: sprc += sp
196 # asm 1: add <sp=int64#3,<sprc=int64#7
197 # asm 2: add <sp=%rdx,<sprc=%rax
198 add %rdx,%rax
200 # qhasm: spc = *(uint64 *)(hp + pc * 8)
201 # asm 1: movq (<hp=int64#1,<pc=int64#6,8),>spc=int64#8
202 # asm 2: movq (<hp=%rdi,<pc=%r9,8),>spc=%r10
203 movq (%rdi,%r9,8),%r10
205 # qhasm: spc <<= 5
206 # asm 1: shl $5,<spc=int64#8
207 # asm 2: shl $5,<spc=%r10
208 shl $5,%r10
210 # qhasm: spc += sp
211 # asm 1: add <sp=int64#3,<spc=int64#8
212 # asm 2: add <sp=%rdx,<spc=%r10
213 add %rdx,%r10
215 # qhasm: c0 = *(uint64 *)(spc + 0)
216 # asm 1: movq 0(<spc=int64#8),>c0=int64#9
217 # asm 2: movq 0(<spc=%r10),>c0=%r11
218 movq 0(%r10),%r11
220 # qhasm: c1 = *(uint64 *)(spc + 8)
221 # asm 1: movq 8(<spc=int64#8),>c1=int64#10
222 # asm 2: movq 8(<spc=%r10),>c1=%r12
223 movq 8(%r10),%r12
225 # qhasm: c2 = *(uint64 *)(spc + 16)
226 # asm 1: movq 16(<spc=int64#8),>c2=int64#11
227 # asm 2: movq 16(<spc=%r10),>c2=%r13
228 movq 16(%r10),%r13
230 # qhasm: carry? c0 -= *(uint64 *)(sprc + 0)
231 # asm 1: subq 0(<sprc=int64#7),<c0=int64#9
232 # asm 2: subq 0(<sprc=%rax),<c0=%r11
233 subq 0(%rax),%r11
235 # qhasm: carry? c1 -= *(uint64 *)(sprc + 8) - carry
236 # asm 1: sbbq 8(<sprc=int64#7),<c1=int64#10
237 # asm 2: sbbq 8(<sprc=%rax),<c1=%r12
238 sbbq 8(%rax),%r12
240 # qhasm: carry? c2 -= *(uint64 *)(sprc + 16) - carry
241 # asm 1: sbbq 16(<sprc=int64#7),<c2=int64#11
242 # asm 2: sbbq 16(<sprc=%rax),<c2=%r13
243 sbbq 16(%rax),%r13
245 # qhasm: pc = prc if carry
246 # asm 1: cmovc <prc=int64#5,<pc=int64#6
247 # asm 2: cmovc <prc=%r8,<pc=%r9
248 cmovc %r8,%r9
250 # qhasm: spc = sprc if carry
251 # asm 1: cmovc <sprc=int64#7,<spc=int64#8
252 # asm 2: cmovc <sprc=%rax,<spc=%r10
253 cmovc %rax,%r10
255 # qhasm: spc -= sp
256 # asm 1: sub <sp=int64#3,<spc=int64#8
257 # asm 2: sub <sp=%rdx,<spc=%r10
258 sub %rdx,%r10
260 # qhasm: (uint64) spc >>= 5
261 # asm 1: shr $5,<spc=int64#8
262 # asm 2: shr $5,<spc=%r10
263 shr $5,%r10
265 # qhasm: spp = *(uint64 *)(hp + pp * 8)
266 # asm 1: movq (<hp=int64#1,<pp=int64#4,8),>spp=int64#5
267 # asm 2: movq (<hp=%rdi,<pp=%rcx,8),>spp=%r8
268 movq (%rdi,%rcx,8),%r8
270 # qhasm: *(uint64 *)(hp + pp * 8) = spc
271 # asm 1: movq <spc=int64#8,(<hp=int64#1,<pp=int64#4,8)
272 # asm 2: movq <spc=%r10,(<hp=%rdi,<pp=%rcx,8)
273 movq %r10,(%rdi,%rcx,8)
275 # qhasm: *(uint64 *)(hp + pc * 8) = spp
276 # asm 1: movq <spp=int64#5,(<hp=int64#1,<pc=int64#6,8)
277 # asm 2: movq <spp=%r8,(<hp=%rdi,<pc=%r9,8)
278 movq %r8,(%rdi,%r9,8)
280 # qhasm: pp = pc
281 # asm 1: mov <pc=int64#6,>pp=int64#4
282 # asm 2: mov <pc=%r9,>pp=%rcx
283 mov %r9,%rcx
284 # comment:fp stack unchanged by jump
286 # qhasm: goto siftdownloop
287 jmp ._siftdownloop
289 # qhasm: siftuploop:
290 ._siftuploop:
292 # qhasm: pc = pp
293 # asm 1: mov <pp=int64#4,>pc=int64#2
294 # asm 2: mov <pp=%rcx,>pc=%rsi
295 mov %rcx,%rsi
297 # qhasm: pp -= 1
298 # asm 1: sub $1,<pp=int64#4
299 # asm 2: sub $1,<pp=%rcx
300 sub $1,%rcx
302 # qhasm: (uint64) pp >>= 1
303 # asm 1: shr $1,<pp=int64#4
304 # asm 2: shr $1,<pp=%rcx
305 shr $1,%rcx
307 # qhasm: unsigned>? pc - 0
308 # asm 1: cmp $0,<pc=int64#2
309 # asm 2: cmp $0,<pc=%rsi
310 cmp $0,%rsi
311 # comment:fp stack unchanged by jump
313 # qhasm: goto end if !unsigned>
314 jbe ._end
316 # qhasm: spp = *(uint64 *)(hp + pp * 8)
317 # asm 1: movq (<hp=int64#1,<pp=int64#4,8),>spp=int64#5
318 # asm 2: movq (<hp=%rdi,<pp=%rcx,8),>spp=%r8
319 movq (%rdi,%rcx,8),%r8
321 # qhasm: spc = *(uint64 *)(hp + pc * 8)
322 # asm 1: movq (<hp=int64#1,<pc=int64#2,8),>spc=int64#6
323 # asm 2: movq (<hp=%rdi,<pc=%rsi,8),>spc=%r9
324 movq (%rdi,%rsi,8),%r9
326 # qhasm: spp <<= 5
327 # asm 1: shl $5,<spp=int64#5
328 # asm 2: shl $5,<spp=%r8
329 shl $5,%r8
331 # qhasm: spc <<= 5
332 # asm 1: shl $5,<spc=int64#6
333 # asm 2: shl $5,<spc=%r9
334 shl $5,%r9
336 # qhasm: spc += sp
337 # asm 1: add <sp=int64#3,<spc=int64#6
338 # asm 2: add <sp=%rdx,<spc=%r9
339 add %rdx,%r9
341 # qhasm: spp += sp
342 # asm 1: add <sp=int64#3,<spp=int64#5
343 # asm 2: add <sp=%rdx,<spp=%r8
344 add %rdx,%r8
346 # qhasm: c0 = *(uint64 *)(spc + 0)
347 # asm 1: movq 0(<spc=int64#6),>c0=int64#7
348 # asm 2: movq 0(<spc=%r9),>c0=%rax
349 movq 0(%r9),%rax
351 # qhasm: c1 = *(uint64 *)(spc + 8)
352 # asm 1: movq 8(<spc=int64#6),>c1=int64#8
353 # asm 2: movq 8(<spc=%r9),>c1=%r10
354 movq 8(%r9),%r10
356 # qhasm: c2 = *(uint64 *)(spc + 16)
357 # asm 1: movq 16(<spc=int64#6),>c2=int64#9
358 # asm 2: movq 16(<spc=%r9),>c2=%r11
359 movq 16(%r9),%r11
361 # qhasm: carry? c0 -= *(uint64 *)(spp + 0)
362 # asm 1: subq 0(<spp=int64#5),<c0=int64#7
363 # asm 2: subq 0(<spp=%r8),<c0=%rax
364 subq 0(%r8),%rax
366 # qhasm: carry? c1 -= *(uint64 *)(spp + 8) - carry
367 # asm 1: sbbq 8(<spp=int64#5),<c1=int64#8
368 # asm 2: sbbq 8(<spp=%r8),<c1=%r10
369 sbbq 8(%r8),%r10
371 # qhasm: carry? c2 -= *(uint64 *)(spp + 16) - carry
372 # asm 1: sbbq 16(<spp=int64#5),<c2=int64#9
373 # asm 2: sbbq 16(<spp=%r8),<c2=%r11
374 sbbq 16(%r8),%r11
375 # comment:fp stack unchanged by jump
377 # qhasm: goto end if carry
378 jc ._end
380 # qhasm: spc -= sp
381 # asm 1: sub <sp=int64#3,<spc=int64#6
382 # asm 2: sub <sp=%rdx,<spc=%r9
383 sub %rdx,%r9
385 # qhasm: (uint64) spc >>= 5
386 # asm 1: shr $5,<spc=int64#6
387 # asm 2: shr $5,<spc=%r9
388 shr $5,%r9
390 # qhasm: spp -= sp
391 # asm 1: sub <sp=int64#3,<spp=int64#5
392 # asm 2: sub <sp=%rdx,<spp=%r8
393 sub %rdx,%r8
395 # qhasm: (uint64) spp >>= 5
396 # asm 1: shr $5,<spp=int64#5
397 # asm 2: shr $5,<spp=%r8
398 shr $5,%r8
400 # qhasm: *(uint64 *)(hp + pp * 8) = spc
401 # asm 1: movq <spc=int64#6,(<hp=int64#1,<pp=int64#4,8)
402 # asm 2: movq <spc=%r9,(<hp=%rdi,<pp=%rcx,8)
403 movq %r9,(%rdi,%rcx,8)
405 # qhasm: *(uint64 *)(hp + pc * 8) = spp
406 # asm 1: movq <spp=int64#5,(<hp=int64#1,<pc=int64#2,8)
407 # asm 2: movq <spp=%r8,(<hp=%rdi,<pc=%rsi,8)
408 movq %r8,(%rdi,%rsi,8)
409 # comment:fp stack unchanged by jump
411 # qhasm: goto siftuploop
412 jmp ._siftuploop
414 # qhasm: end:
415 ._end:
417 # qhasm: caller1 = caller1_stack
418 # asm 1: movq <caller1_stack=stack64#1,>caller1=int64#9
419 # asm 2: movq <caller1_stack=0(%rsp),>caller1=%r11
420 movq 0(%rsp),%r11
422 # qhasm: caller2 = caller2_stack
423 # asm 1: movq <caller2_stack=stack64#2,>caller2=int64#10
424 # asm 2: movq <caller2_stack=8(%rsp),>caller2=%r12
425 movq 8(%rsp),%r12
427 # qhasm: caller3 = caller3_stack
428 # asm 1: movq <caller3_stack=stack64#3,>caller3=int64#11
429 # asm 2: movq <caller3_stack=16(%rsp),>caller3=%r13
430 movq 16(%rsp),%r13
432 # qhasm: caller4 = caller4_stack
433 # asm 1: movq <caller4_stack=stack64#4,>caller4=int64#12
434 # asm 2: movq <caller4_stack=24(%rsp),>caller4=%r14
435 movq 24(%rsp),%r14
437 # qhasm: caller5 = caller5_stack
438 # asm 1: movq <caller5_stack=stack64#5,>caller5=int64#13
439 # asm 2: movq <caller5_stack=32(%rsp),>caller5=%r15
440 movq 32(%rsp),%r15
442 # qhasm: caller6 = caller6_stack
443 # asm 1: movq <caller6_stack=stack64#6,>caller6=int64#14
444 # asm 2: movq <caller6_stack=40(%rsp),>caller6=%rbx
445 movq 40(%rsp),%rbx
447 # qhasm: caller7 = caller7_stack
448 # asm 1: movq <caller7_stack=stack64#7,>caller7=int64#15
449 # asm 2: movq <caller7_stack=48(%rsp),>caller7=%rbp
450 movq 48(%rsp),%rbp
452 # qhasm: leave
453 add %r11,%rsp
454 mov %rdi,%rax
455 mov %rsi,%rdx