8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / common / bignum / sun4u / mont_mulf_kernel_v9.s
blob4080b43f8233a703cfba9f36f243fb16ad080171
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * This file is mostly a result of compiling the mont_mulf.c file to generate an
28 * assembly output and then hand-editing that output to replace the
29 * compiler-generated loop for the 512-bit case (nlen == 16) in the
30 * mont_mulf_noconv routine with a hand-crafted version. This file also
31 * has big_savefp() and big_restorefp() routines added by hand.
34 #include <sys/asm_linkage.h>
35 #include <sys/trap.h>
36 #include <sys/stack.h>
37 #include <sys/privregs.h>
38 #include <sys/regset.h>
39 #include <sys/vis.h>
40 #include <sys/machthread.h>
41 #include <sys/machtrap.h>
42 #include <sys/machsig.h>
44 #if defined(lint) || defined(__lint)
45 #include <sys/types.h>
47 /* ARGSUSED */
48 uint64_t
49 double2uint64_t(double* d)
51 return (0ULL);
54 /* ARGSUSED */
55 void
56 conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
60 /* ARGSUSED */
61 void
62 conv_i32_to_d32(double *d32, uint32_t *i32, int len)
66 /* ARGSUSED */
67 void
68 conv_i32_to_d16(double *d16, uint32_t *i32, int len)
72 /* ARGSUSED */
73 void
74 mont_mulf_noconv(uint32_t *result, double *dm1, double *dm2, double *dt,
75 double *dn, uint32_t *nint, int nlen, double dn0)
79 #else /* lint || __lint */
81 .section ".text",#alloc,#execinstr
82 .file "mont_mulf.c"
84 .section ".bss",#alloc,#write
85 Bbss.bss:
87 .section ".data",#alloc,#write
88 Ddata.data:
90 .section ".rodata",#alloc
92 ! CONSTANT POOL
94 Drodata.rodata:
95 .global TwoTo16
96 .align 8
98 ! CONSTANT POOL
100 .global TwoTo16
101 TwoTo16:
102 .word 1089470464
103 .word 0
104 .type TwoTo16,#object
105 .size TwoTo16,8
106 .global TwoToMinus16
108 ! CONSTANT POOL
110 .global TwoToMinus16
111 TwoToMinus16:
112 .word 1055916032
113 .word 0
114 .type TwoToMinus16,#object
115 .size TwoToMinus16,8
116 .global Zero
118 ! CONSTANT POOL
120 .global Zero
121 Zero:
122 .word 0
123 .word 0
124 .type Zero,#object
125 .size Zero,8
126 .global TwoTo32
128 ! CONSTANT POOL
130 .global TwoTo32
131 TwoTo32:
132 .word 1106247680
133 .word 0
134 .type TwoTo32,#object
135 .size TwoTo32,8
136 .global TwoToMinus32
138 ! CONSTANT POOL
140 .global TwoToMinus32
141 TwoToMinus32:
142 .word 1039138816
143 .word 0
144 .type TwoToMinus32,#object
145 .size TwoToMinus32,8
147 .section ".text",#alloc,#execinstr
148 /* 000000 0 */ .register %g3,#scratch
149 /* 000000 */ .register %g2,#scratch
150 /* 000000 0 */ .align 32
151 ! FILE mont_mulf.c
153 ! 1 !/*
154 ! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
155 ! 3 ! * Use is subject to license terms.
156 ! 4 ! */
157 ! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI"
158 ! 9 !/*
159 ! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
160 ! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
161 ! 12 ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
162 ! 13 ! */
163 ! 15 !#include <sys/types.h>
164 ! 16 !#include <math.h>
165 ! 18 !static const double TwoTo16 = 65536.0;
166 ! 19 !static const double TwoToMinus16 = 1.0/65536.0;
167 ! 20 !static const double Zero = 0.0;
168 ! 21 !static const double TwoTo32 = 65536.0 * 65536.0;
169 ! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
170 ! 24 !#ifdef RF_INLINE_MACROS
171 ! 26 !double upper32(double);
172 ! 27 !double lower32(double, double);
173 ! 28 !double mod(double, double, double);
174 ! 30 !#else
175 ! 32 !static double
176 ! 33 !upper32(double x)
177 ! 34 !{
178 ! 35 ! return (floor(x * TwoToMinus32));
179 ! 36 !}
180 ! 39 !/* ARGSUSED */
181 ! 40 !static double
182 ! 41 !lower32(double x, double y)
183 ! 42 !{
184 ! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32));
185 ! 44 !}
186 ! 46 !static double
187 ! 47 !mod(double x, double oneoverm, double m)
188 ! 48 !{
189 ! 49 ! return (x - m * floor(x * oneoverm));
190 ! 50 !}
191 ! 52 !#endif
192 ! 55 !static void
193 ! 56 !cleanup(double *dt, int from, int tlen)
194 ! 57 !{
197 ! SUBROUTINE cleanup
199 ! OFFSET SOURCE LINE LABEL INSTRUCTION
201 cleanup:
202 /* 000000 57 */ sra %o1,0,%o4
203 /* 0x0004 */ sra %o2,0,%o5
205 ! 58 ! int i;
206 ! 59 ! double tmp, tmp1, x, x1;
207 ! 61 ! tmp = tmp1 = Zero;
209 /* 0x0008 61 */ sll %o5,1,%g5
211 ! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) {
213 /* 0x000c 63 */ sll %o4,1,%g3
214 /* 0x0010 */ cmp %g3,%g5
215 /* 0x0014 */ bge,pn %icc,.L77000188
216 /* 0x0018 0 */ sethi %hi(Zero),%o3
217 .L77000197:
218 /* 0x001c 63 */ ldd [%o3+%lo(Zero)],%f8
219 /* 0x0020 */ sra %g3,0,%o1
220 /* 0x0024 */ sub %g5,1,%g2
221 /* 0x0028 */ sllx %o1,3,%g4
223 ! 64 ! x = dt[i];
225 /* 0x002c 64 */ ldd [%g4+%o0],%f10
226 /* 0x0030 63 */ add %g4,%o0,%g1
227 /* 0x0034 */ fmovd %f8,%f18
228 /* 0x0038 */ fmovd %f8,%f16
230 ! 65 ! x1 = dt[i + 1];
231 ! 66 ! dt[i] = lower32(x, Zero) + tmp;
233 .L900000110:
234 /* 0x003c 66 */ fdtox %f10,%f0
235 /* 0x0040 65 */ ldd [%g1+8],%f12
237 ! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1;
238 ! 68 ! tmp = upper32(x);
239 ! 69 ! tmp1 = upper32(x1);
241 /* 0x0044 69 */ add %g3,2,%g3
242 /* 0x0048 */ cmp %g3,%g2
243 /* 0x004c 67 */ fdtox %f12,%f2
244 /* 0x0050 68 */ fmovd %f0,%f4
245 /* 0x0054 66 */ fmovs %f8,%f0
246 /* 0x0058 67 */ fmovs %f8,%f2
247 /* 0x005c 66 */ fxtod %f0,%f0
248 /* 0x0060 67 */ fxtod %f2,%f2
249 /* 0x0064 69 */ fdtox %f12,%f6
250 /* 0x0068 66 */ faddd %f0,%f18,%f10
251 /* 0x006c */ std %f10,[%g1]
252 /* 0x0070 67 */ faddd %f2,%f16,%f14
253 /* 0x0074 */ std %f14,[%g1+8]
254 /* 0x0078 68 */ fitod %f4,%f18
255 /* 0x007c 69 */ add %g1,16,%g1
256 /* 0x0080 */ fitod %f6,%f16
257 /* 0x0084 */ ble,a,pt %icc,.L900000110
258 /* 0x0088 64 */ ldd [%g1],%f10
259 .L77000188:
260 /* 0x008c 69 */ retl ! Result =
261 /* 0x0090 */ nop
262 /* 0x0094 0 */ .type cleanup,2
263 /* 0x0094 0 */ .size cleanup,(.-cleanup)
265 .section ".text",#alloc,#execinstr
266 /* 000000 0 */ .align 8
267 /* 000000 */ .skip 24
268 /* 0x0018 */ .align 32
270 ! 70 ! }
271 ! 71 !}
272 ! 75 !#ifdef _KERNEL
273 ! 76 !/*
274 ! 77 ! * This only works if 0 <= d < 2^53
275 ! 78 ! */
276 ! 79 !uint64_t
277 ! 80 !double2uint64_t(double* d)
278 ! 81 !{
279 ! 82 ! uint64_t x;
280 ! 83 ! uint64_t exp;
281 ! 84 ! uint64_t man;
282 ! 86 ! x = *((uint64_t *)d);
285 ! SUBROUTINE double2uint64_t
287 ! OFFSET SOURCE LINE LABEL INSTRUCTION
289 .global double2uint64_t
290 double2uint64_t:
291 /* 000000 86 */ ldx [%o0],%o2
293 ! 87 ! if (x == 0) {
295 /* 0x0004 87 */ cmp %o2,0
296 /* 0x0008 */ bne,pn %xcc,.L900000206
297 /* 0x000c 94 */ sethi %hi(0xfff00000),%o5
298 .L77000202:
299 /* 0x0010 94 */ retl ! Result = %o0
301 ! 88 ! return (0ULL);
303 /* 0x0014 88 */ or %g0,0,%o0
305 ! 89 ! }
306 ! 90 ! exp = (x >> 52) - 1023;
307 ! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL;
308 ! 92 ! x = man >> (52 - exp);
309 ! 94 ! return (x);
311 .L900000206:
312 /* 0x0018 94 */ sllx %o5,32,%o4
313 /* 0x001c */ srlx %o2,52,%o0
314 /* 0x0020 */ sethi %hi(0x40000000),%o1
315 /* 0x0024 */ or %g0,1023,%g5
316 /* 0x0028 */ sllx %o1,22,%g4
317 /* 0x002c */ xor %o4,-1,%o3
318 /* 0x0030 */ sub %g5,%o0,%g3
319 /* 0x0034 */ and %o2,%o3,%g2
320 /* 0x0038 */ or %g2,%g4,%o5
321 /* 0x003c */ add %g3,52,%g1
322 /* 0x0040 */ retl ! Result = %o0
323 /* 0x0044 */ srlx %o5,%g1,%o0
324 /* 0x0048 0 */ .type double2uint64_t,2
325 /* 0x0048 0 */ .size double2uint64_t,(.-double2uint64_t)
327 .section ".text",#alloc,#execinstr
328 /* 000000 0 */ .align 8
329 /* 000000 */ .skip 24
330 /* 0x0018 */ .align 32
332 ! 95 !}
333 ! 96 !#else
334 ! 97 !/*
335 ! 98 ! * This only works if 0 <= d < 2^63
336 ! 99 ! */
337 ! 100 !uint64_t
338 ! 101 !double2uint64_t(double* d)
339 ! 102 !{
340 ! 103 ! return ((int64_t)(*d));
341 ! 104 !}
342 ! 105 !#endif
343 ! 107 !/* ARGSUSED */
344 ! 108 !void
345 ! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
346 ! 110 !{
349 ! SUBROUTINE conv_d16_to_i32
351 ! OFFSET SOURCE LINE LABEL INSTRUCTION
353 .global conv_d16_to_i32
354 conv_d16_to_i32:
355 /* 000000 110 */ save %sp,-176,%sp
357 ! 111 ! int i;
358 ! 112 ! int64_t t, t1, /* using int64_t and not uint64_t */
359 ! 113 ! a, b, c, d; /* because more efficient code is */
360 ! 114 ! /* generated this way, and there */
361 ! 115 ! /* is no overflow */
362 ! 116 ! t1 = 0;
363 ! 117 ! a = double2uint64_t(&(d16[0]));
365 /* 0x0004 117 */ ldx [%i1],%o0
366 /* 0x0008 118 */ ldx [%i1+8],%i2
367 /* 0x000c 117 */ cmp %o0,0
368 /* 0x0010 */ bne,pn %xcc,.L77000216
369 /* 0x0014 */ or %g0,0,%i4
370 .L77000215:
371 /* 0x0018 117 */ ba .L900000316
372 /* 0x001c 118 */ cmp %i2,0
373 .L77000216:
374 /* 0x0020 117 */ srlx %o0,52,%o5
375 /* 0x0024 */ sethi %hi(0xfff00000),%i4
376 /* 0x0028 */ sllx %i4,32,%o2
377 /* 0x002c */ sethi %hi(0x40000000),%o7
378 /* 0x0030 */ sllx %o7,22,%o3
379 /* 0x0034 */ or %g0,1023,%o4
380 /* 0x0038 */ xor %o2,-1,%g5
381 /* 0x003c */ sub %o4,%o5,%l0
382 /* 0x0040 */ and %o0,%g5,%o1
383 /* 0x0044 */ add %l0,52,%l1
384 /* 0x0048 */ or %o1,%o3,%g4
386 ! 118 ! b = double2uint64_t(&(d16[1]));
388 /* 0x004c 118 */ cmp %i2,0
389 /* 0x0050 117 */ srlx %g4,%l1,%i4
390 .L900000316:
391 /* 0x0054 118 */ bne,pn %xcc,.L77000222
392 /* 0x0058 134 */ sub %i3,1,%l3
393 .L77000221:
394 /* 0x005c 118 */ or %g0,0,%i2
395 /* 0x0060 */ ba .L900000315
396 /* 0x0064 116 */ or %g0,0,%o3
397 .L77000222:
398 /* 0x0068 118 */ srlx %i2,52,%l6
399 /* 0x006c */ sethi %hi(0xfff00000),%g4
400 /* 0x0070 */ sllx %g4,32,%i5
401 /* 0x0074 */ sethi %hi(0x40000000),%l5
402 /* 0x0078 */ xor %i5,-1,%l4
403 /* 0x007c */ or %g0,1023,%l2
404 /* 0x0080 */ and %i2,%l4,%l7
405 /* 0x0084 */ sllx %l5,22,%i2
406 /* 0x0088 */ sub %l2,%l6,%g1
407 /* 0x008c */ or %l7,%i2,%g3
408 /* 0x0090 */ add %g1,52,%g2
409 /* 0x0094 116 */ or %g0,0,%o3
410 /* 0x0098 118 */ srlx %g3,%g2,%i2
412 ! 119 ! for (i = 0; i < ilen - 1; i++) {
414 .L900000315:
415 /* 0x009c 119 */ cmp %l3,0
416 /* 0x00a0 */ ble,pn %icc,.L77000210
417 /* 0x00a4 */ or %g0,0,%l4
418 .L77000245:
419 /* 0x00a8 118 */ sethi %hi(0xfff00000),%l7
420 /* 0x00ac */ or %g0,-1,%l6
421 /* 0x00b0 */ sllx %l7,32,%l3
422 /* 0x00b4 */ srl %l6,0,%l6
423 /* 0x00b8 */ sethi %hi(0x40000000),%l1
424 /* 0x00bc */ sethi %hi(0xfc00),%l2
425 /* 0x00c0 */ xor %l3,-1,%l7
426 /* 0x00c4 */ sllx %l1,22,%l3
427 /* 0x00c8 */ sub %i3,2,%l5
428 /* 0x00cc */ add %l2,1023,%l2
429 /* 0x00d0 */ or %g0,2,%g2
430 /* 0x00d4 */ or %g0,%i0,%g1
432 ! 120 ! c = double2uint64_t(&(d16[2 * i + 2]));
434 .L77000208:
435 /* 0x00d8 120 */ sra %g2,0,%g3
436 /* 0x00dc 123 */ add %g2,1,%o2
437 /* 0x00e0 120 */ sllx %g3,3,%i3
439 ! 121 ! t1 += a & 0xffffffff;
440 ! 122 ! t = (a >> 32);
441 ! 123 ! d = double2uint64_t(&(d16[2 * i + 3]));
443 /* 0x00e4 123 */ sra %o2,0,%g5
444 /* 0x00e8 120 */ ldx [%i1+%i3],%o5
445 /* 0x00ec 123 */ sllx %g5,3,%o0
446 /* 0x00f0 121 */ and %i4,%l6,%g4
447 /* 0x00f4 123 */ ldx [%i1+%o0],%i3
448 /* 0x00f8 120 */ cmp %o5,0
449 /* 0x00fc */ bne,pn %xcc,.L77000228
450 /* 0x0100 124 */ and %i2,%l2,%i5
451 .L77000227:
452 /* 0x0104 120 */ or %g0,0,%l1
453 /* 0x0108 */ ba .L900000314
454 /* 0x010c 121 */ add %o3,%g4,%o0
455 .L77000228:
456 /* 0x0110 120 */ srlx %o5,52,%o7
457 /* 0x0114 */ and %o5,%l7,%o5
458 /* 0x0118 */ or %g0,52,%l0
459 /* 0x011c */ sub %o7,1023,%o4
460 /* 0x0120 */ or %o5,%l3,%l1
461 /* 0x0124 */ sub %l0,%o4,%o1
462 /* 0x0128 */ srlx %l1,%o1,%l1
463 /* 0x012c 121 */ add %o3,%g4,%o0
464 .L900000314:
465 /* 0x0130 122 */ srax %i4,32,%g3
466 /* 0x0134 123 */ cmp %i3,0
467 /* 0x0138 */ bne,pn %xcc,.L77000234
468 /* 0x013c 124 */ sllx %i5,16,%g5
469 .L77000233:
470 /* 0x0140 123 */ or %g0,0,%o2
471 /* 0x0144 */ ba .L900000313
472 /* 0x0148 124 */ add %o0,%g5,%o7
473 .L77000234:
474 /* 0x014c 123 */ srlx %i3,52,%o2
475 /* 0x0150 */ and %i3,%l7,%i4
476 /* 0x0154 */ sub %o2,1023,%o1
477 /* 0x0158 */ or %g0,52,%g4
478 /* 0x015c */ sub %g4,%o1,%i5
479 /* 0x0160 */ or %i4,%l3,%i3
480 /* 0x0164 */ srlx %i3,%i5,%o2
482 ! 124 ! t1 += (b & 0xffff) << 16;
484 /* 0x0168 124 */ add %o0,%g5,%o7
486 ! 125 ! t += (b >> 16) + (t1 >> 32);
488 .L900000313:
489 /* 0x016c 125 */ srax %i2,16,%l0
490 /* 0x0170 */ srax %o7,32,%o4
491 /* 0x0174 */ add %l0,%o4,%o3
493 ! 126 ! i32[i] = t1 & 0xffffffff;
494 ! 127 ! t1 = t;
495 ! 128 ! a = c;
496 ! 129 ! b = d;
498 /* 0x0178 129 */ add %l4,1,%l4
499 /* 0x017c 126 */ and %o7,%l6,%o5
500 /* 0x0180 125 */ add %g3,%o3,%o3
501 /* 0x0184 126 */ st %o5,[%g1]
502 /* 0x0188 128 */ or %g0,%l1,%i4
503 /* 0x018c 129 */ or %g0,%o2,%i2
504 /* 0x0190 */ add %g2,2,%g2
505 /* 0x0194 */ cmp %l4,%l5
506 /* 0x0198 */ ble,pt %icc,.L77000208
507 /* 0x019c */ add %g1,4,%g1
509 ! 130 ! }
510 ! 131 ! t1 += a & 0xffffffff;
511 ! 132 ! t = (a >> 32);
512 ! 133 ! t1 += (b & 0xffff) << 16;
513 ! 134 ! i32[i] = t1 & 0xffffffff;
515 .L77000210:
516 /* 0x01a0 134 */ sra %l4,0,%l4
517 /* 0x01a4 */ sethi %hi(0xfc00),%i1
518 /* 0x01a8 */ add %o3,%i4,%l2
519 /* 0x01ac */ add %i1,1023,%i5
520 /* 0x01b0 */ and %i2,%i5,%l5
521 /* 0x01b4 */ sllx %l4,2,%i2
522 /* 0x01b8 */ sllx %l5,16,%l6
523 /* 0x01bc */ add %l2,%l6,%l7
524 /* 0x01c0 */ st %l7,[%i0+%i2]
525 /* 0x01c4 129 */ ret ! Result =
526 /* 0x01c8 */ restore %g0,%g0,%g0
527 /* 0x01cc 0 */ .type conv_d16_to_i32,2
528 /* 0x01cc 0 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
530 .section ".text",#alloc,#execinstr
531 /* 000000 0 */ .align 8
533 ! CONSTANT POOL
535 ___const_seg_900000401:
536 /* 000000 0 */ .word 1127219200,0
537 /* 0x0008 */ .word 1127219200
538 /* 0x000c 0 */ .type ___const_seg_900000401,1
539 /* 0x000c 0 */ .size ___const_seg_900000401,(.-___const_seg_900000401)
540 /* 0x000c 0 */ .align 8
541 /* 0x0010 */ .skip 24
542 /* 0x0028 */ .align 32
544 ! 135 !}
545 ! 138 !void
546 ! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len)
547 ! 140 !{
550 ! SUBROUTINE conv_i32_to_d32
552 ! OFFSET SOURCE LINE LABEL INSTRUCTION
554 .global conv_i32_to_d32
555 conv_i32_to_d32:
556 /* 000000 140 */ orcc %g0,%o2,%o2
558 ! 141 ! int i;
559 ! 143 !#pragma pipeloop(0)
560 ! 144 ! for (i = 0; i < len; i++)
562 /* 0x0004 144 */ ble,pn %icc,.L77000254
563 /* 0x0008 */ sub %o2,1,%o3
564 .L77000263:
565 /* 0x000c 140 */ or %g0,%o0,%o2
567 ! 145 ! d32[i] = (double)(i32[i]);
569 /* 0x0010 145 */ add %o3,1,%o5
570 /* 0x0014 144 */ or %g0,0,%g5
571 /* 0x0018 145 */ cmp %o5,10
572 /* 0x001c */ bl,pn %icc,.L77000261
573 /* 0x0020 */ sethi %hi(___const_seg_900000401),%g4
574 .L900000407:
575 /* 0x0024 145 */ prefetch [%o1],0
576 /* 0x0028 */ prefetch [%o0],22
577 /* 0x002c */ sethi %hi(___const_seg_900000401+8),%o4
578 /* 0x0030 */ or %g0,%o0,%o2
579 /* 0x0034 */ prefetch [%o1+64],0
580 /* 0x0038 */ add %o1,8,%o0
581 /* 0x003c */ sub %o3,7,%o5
582 /* 0x0040 */ prefetch [%o2+64],22
583 /* 0x0044 */ or %g0,2,%g5
584 /* 0x0048 */ prefetch [%o2+128],22
585 /* 0x004c */ prefetch [%o2+192],22
586 /* 0x0050 */ prefetch [%o1+128],0
587 /* 0x0054 */ ld [%o4+%lo(___const_seg_900000401+8)],%f2
588 /* 0x0058 */ ldd [%g4+%lo(___const_seg_900000401)],%f16
589 /* 0x005c */ fmovs %f2,%f0
590 /* 0x0060 */ prefetch [%o2+256],22
591 /* 0x0064 */ prefetch [%o2+320],22
592 /* 0x0068 */ ld [%o1],%f3
593 /* 0x006c */ prefetch [%o1+192],0
594 /* 0x0070 */ ld [%o1+4],%f1
595 .L900000405:
596 /* 0x0074 145 */ prefetch [%o0+188],0
597 /* 0x0078 */ fsubd %f2,%f16,%f22
598 /* 0x007c */ add %g5,8,%g5
599 /* 0x0080 */ add %o0,32,%o0
600 /* 0x0084 */ ld [%o4+%lo(___const_seg_900000401+8)],%f4
601 /* 0x0088 */ std %f22,[%o2]
602 /* 0x008c */ cmp %g5,%o5
603 /* 0x0090 */ ld [%o0-32],%f5
604 /* 0x0094 */ fsubd %f0,%f16,%f24
605 /* 0x0098 */ add %o2,64,%o2
606 /* 0x009c */ fmovs %f4,%f0
607 /* 0x00a0 */ std %f24,[%o2-56]
608 /* 0x00a4 */ ld [%o0-28],%f1
609 /* 0x00a8 */ fsubd %f4,%f16,%f26
610 /* 0x00ac */ fmovs %f0,%f6
611 /* 0x00b0 */ prefetch [%o2+312],22
612 /* 0x00b4 */ std %f26,[%o2-48]
613 /* 0x00b8 */ ld [%o0-24],%f7
614 /* 0x00bc */ fsubd %f0,%f16,%f28
615 /* 0x00c0 */ fmovs %f6,%f8
616 /* 0x00c4 */ std %f28,[%o2-40]
617 /* 0x00c8 */ ld [%o0-20],%f9
618 /* 0x00cc */ fsubd %f6,%f16,%f30
619 /* 0x00d0 */ fmovs %f8,%f10
620 /* 0x00d4 */ std %f30,[%o2-32]
621 /* 0x00d8 */ ld [%o0-16],%f11
622 /* 0x00dc */ prefetch [%o2+344],22
623 /* 0x00e0 */ fsubd %f8,%f16,%f48
624 /* 0x00e4 */ fmovs %f10,%f12
625 /* 0x00e8 */ std %f48,[%o2-24]
626 /* 0x00ec */ ld [%o0-12],%f13
627 /* 0x00f0 */ fsubd %f10,%f16,%f50
628 /* 0x00f4 */ fmovs %f12,%f2
629 /* 0x00f8 */ std %f50,[%o2-16]
630 /* 0x00fc */ ld [%o0-8],%f3
631 /* 0x0100 */ fsubd %f12,%f16,%f52
632 /* 0x0104 */ fmovs %f2,%f0
633 /* 0x0108 */ std %f52,[%o2-8]
634 /* 0x010c */ ble,pt %icc,.L900000405
635 /* 0x0110 */ ld [%o0-4],%f1
636 .L900000408:
637 /* 0x0114 145 */ fsubd %f2,%f16,%f18
638 /* 0x0118 */ add %o2,16,%o2
639 /* 0x011c */ cmp %g5,%o3
640 /* 0x0120 */ std %f18,[%o2-16]
641 /* 0x0124 */ fsubd %f0,%f16,%f20
642 /* 0x0128 */ or %g0,%o0,%o1
643 /* 0x012c */ bg,pn %icc,.L77000254
644 /* 0x0130 */ std %f20,[%o2-8]
645 .L77000261:
646 /* 0x0134 145 */ ld [%o1],%f15
647 .L900000409:
648 /* 0x0138 145 */ sethi %hi(___const_seg_900000401+8),%o4
649 /* 0x013c */ ldd [%g4+%lo(___const_seg_900000401)],%f16
650 /* 0x0140 */ add %g5,1,%g5
651 /* 0x0144 */ ld [%o4+%lo(___const_seg_900000401+8)],%f14
652 /* 0x0148 */ add %o1,4,%o1
653 /* 0x014c */ cmp %g5,%o3
654 /* 0x0150 */ fsubd %f14,%f16,%f54
655 /* 0x0154 */ std %f54,[%o2]
656 /* 0x0158 */ add %o2,8,%o2
657 /* 0x015c */ ble,a,pt %icc,.L900000409
658 /* 0x0160 */ ld [%o1],%f15
659 .L77000254:
660 /* 0x0164 145 */ retl ! Result =
661 /* 0x0168 */ nop
662 /* 0x016c 0 */ .type conv_i32_to_d32,2
663 /* 0x016c 0 */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
665 .section ".text",#alloc,#execinstr
666 /* 000000 0 */ .align 8
668 ! CONSTANT POOL
670 ___const_seg_900000501:
671 /* 000000 0 */ .word 1127219200,0
672 /* 0x0008 */ .word 1127219200
673 /* 0x000c 0 */ .type ___const_seg_900000501,1
674 /* 0x000c 0 */ .size ___const_seg_900000501,(.-___const_seg_900000501)
675 /* 0x000c 0 */ .align 8
676 /* 0x0010 */ .skip 24
677 /* 0x0028 */ .align 32
679 ! 146 !}
680 ! 149 !void
681 ! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len)
682 ! 151 !{
685 ! SUBROUTINE conv_i32_to_d16
687 ! OFFSET SOURCE LINE LABEL INSTRUCTION
689 .global conv_i32_to_d16
690 conv_i32_to_d16:
691 /* 000000 151 */ save %sp,-368,%sp
692 /* 0x0004 */ orcc %g0,%i2,%i2
694 ! 152 ! int i;
695 ! 153 ! uint32_t a;
696 ! 155 !#pragma pipeloop(0)
697 ! 156 ! for (i = 0; i < len; i++) {
699 /* 0x0008 156 */ ble,pn %icc,.L77000272
700 /* 0x000c */ sub %i2,1,%l6
701 .L77000281:
702 /* 0x0010 156 */ sethi %hi(0xfc00),%i3
704 ! 157 ! a = i32[i];
706 /* 0x0014 157 */ or %g0,%i2,%l1
707 /* 0x0018 156 */ add %i3,1023,%i4
708 /* 0x001c 157 */ cmp %i2,4
709 /* 0x0020 151 */ or %g0,%i1,%l7
710 /* 0x0024 */ or %g0,%i0,%i2
711 /* 0x0028 156 */ or %g0,0,%i5
712 /* 0x002c */ or %g0,0,%i3
713 /* 0x0030 157 */ bl,pn %icc,.L77000279
714 /* 0x0034 0 */ sethi %hi(___const_seg_900000501),%i1
715 .L900000508:
716 /* 0x0038 157 */ prefetch [%i0+8],22
717 /* 0x003c */ prefetch [%i0+72],22
718 /* 0x0040 */ or %g0,%i0,%l2
720 ! 158 ! d16[2 * i] = (double)(a & 0xffff);
722 /* 0x0044 158 */ sethi %hi(___const_seg_900000501+8),%l1
723 /* 0x0048 157 */ prefetch [%i0+136],22
724 /* 0x004c */ sub %l6,1,%i0
725 /* 0x0050 */ or %g0,0,%i3
726 /* 0x0054 */ prefetch [%i2+200],22
727 /* 0x0058 */ or %g0,2,%i5
728 /* 0x005c */ prefetch [%i2+264],22
729 /* 0x0060 */ prefetch [%i2+328],22
730 /* 0x0064 */ prefetch [%i2+392],22
731 /* 0x0068 */ ld [%l7],%l3
732 /* 0x006c */ ld [%l7+4],%l4
733 /* 0x0070 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20
735 ! 159 ! d16[2 * i + 1] = (double)(a >> 16);
737 /* 0x0074 159 */ srl %l3,16,%o1
738 /* 0x0078 158 */ and %l3,%i4,%o3
739 /* 0x007c */ st %o3,[%sp+2335]
740 /* 0x0080 159 */ srl %l4,16,%g4
741 /* 0x0084 158 */ and %l4,%i4,%o0
742 /* 0x0088 */ st %o0,[%sp+2303]
743 /* 0x008c 159 */ add %l7,8,%l7
744 /* 0x0090 */ st %o1,[%sp+2271]
745 /* 0x0094 */ st %g4,[%sp+2239]
746 /* 0x0098 157 */ prefetch [%i2+456],22
747 /* 0x009c */ prefetch [%i2+520],22
748 .L900000506:
749 /* 0x00a0 157 */ prefetch [%l2+536],22
750 /* 0x00a4 159 */ add %i5,2,%i5
751 /* 0x00a8 157 */ add %l2,32,%l2
752 /* 0x00ac */ ld [%l7],%g2
753 /* 0x00b0 159 */ cmp %i5,%i0
754 /* 0x00b4 */ add %l7,8,%l7
755 /* 0x00b8 158 */ ld [%sp+2335],%f9
756 /* 0x00bc 159 */ add %i3,4,%i3
757 /* 0x00c0 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f8
758 /* 0x00c4 159 */ ld [%sp+2271],%f11
759 /* 0x00c8 158 */ and %g2,%i4,%g3
760 /* 0x00cc 159 */ fmovs %f8,%f10
761 /* 0x00d0 158 */ st %g3,[%sp+2335]
762 /* 0x00d4 */ fsubd %f8,%f20,%f28
763 /* 0x00d8 */ std %f28,[%l2-32]
764 /* 0x00dc 159 */ srl %g2,16,%g1
765 /* 0x00e0 */ st %g1,[%sp+2271]
766 /* 0x00e4 */ fsubd %f10,%f20,%f30
767 /* 0x00e8 */ std %f30,[%l2-24]
768 /* 0x00ec 157 */ ld [%l7-4],%l0
769 /* 0x00f0 158 */ ld [%sp+2303],%f13
770 /* 0x00f4 */ ld [%l1+%lo(___const_seg_900000501+8)],%f12
771 /* 0x00f8 159 */ ld [%sp+2239],%f15
772 /* 0x00fc 158 */ and %l0,%i4,%l5
773 /* 0x0100 159 */ fmovs %f12,%f14
774 /* 0x0104 158 */ st %l5,[%sp+2303]
775 /* 0x0108 */ fsubd %f12,%f20,%f44
776 /* 0x010c */ std %f44,[%l2-16]
777 /* 0x0110 159 */ srl %l0,16,%o5
778 /* 0x0114 */ st %o5,[%sp+2239]
779 /* 0x0118 */ fsubd %f14,%f20,%f46
780 /* 0x011c */ ble,pt %icc,.L900000506
781 /* 0x0120 */ std %f46,[%l2-8]
782 .L900000509:
783 /* 0x0124 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f0
784 /* 0x0128 159 */ cmp %i5,%l6
785 /* 0x012c */ add %i3,4,%i3
786 /* 0x0130 158 */ ld [%sp+2335],%f1
787 /* 0x0134 */ ld [%sp+2303],%f5
788 /* 0x0138 159 */ fmovs %f0,%f2
789 /* 0x013c */ ld [%sp+2271],%f3
790 /* 0x0140 158 */ fmovs %f0,%f4
791 /* 0x0144 159 */ ld [%sp+2239],%f7
792 /* 0x0148 */ fmovs %f0,%f6
793 /* 0x014c 158 */ fsubd %f0,%f20,%f22
794 /* 0x0150 */ std %f22,[%l2]
795 /* 0x0154 159 */ fsubd %f2,%f20,%f24
796 /* 0x0158 */ std %f24,[%l2+8]
797 /* 0x015c 158 */ fsubd %f4,%f20,%f26
798 /* 0x0160 */ std %f26,[%l2+16]
799 /* 0x0164 159 */ fsubd %f6,%f20,%f20
800 /* 0x0168 */ bg,pn %icc,.L77000272
801 /* 0x016c */ std %f20,[%l2+24]
802 .L77000279:
803 /* 0x0170 157 */ ld [%l7],%l2
804 .L900000510:
805 /* 0x0174 158 */ and %l2,%i4,%o4
806 /* 0x0178 */ st %o4,[%sp+2399]
807 /* 0x017c 159 */ srl %l2,16,%o2
808 /* 0x0180 */ st %o2,[%sp+2367]
809 /* 0x0184 158 */ sethi %hi(___const_seg_900000501+8),%l1
810 /* 0x0188 */ sra %i3,0,%i0
811 /* 0x018c */ ld [%l1+%lo(___const_seg_900000501+8)],%f16
812 /* 0x0190 */ sllx %i0,3,%o1
813 /* 0x0194 159 */ add %i3,1,%o3
814 /* 0x0198 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20
815 /* 0x019c 159 */ sra %o3,0,%l3
816 /* 0x01a0 */ add %i5,1,%i5
817 /* 0x01a4 158 */ ld [%sp+2399],%f17
818 /* 0x01a8 159 */ sllx %l3,3,%o0
819 /* 0x01ac */ add %l7,4,%l7
820 /* 0x01b0 */ fmovs %f16,%f18
821 /* 0x01b4 */ cmp %i5,%l6
822 /* 0x01b8 */ add %i3,2,%i3
823 /* 0x01bc 158 */ fsubd %f16,%f20,%f48
824 /* 0x01c0 */ std %f48,[%i2+%o1]
825 /* 0x01c4 159 */ ld [%sp+2367],%f19
826 /* 0x01c8 */ fsubd %f18,%f20,%f50
827 /* 0x01cc */ std %f50,[%i2+%o0]
828 /* 0x01d0 */ ble,a,pt %icc,.L900000510
829 /* 0x01d4 157 */ ld [%l7],%l2
830 .L77000272:
831 /* 0x01d8 159 */ ret ! Result =
832 /* 0x01dc */ restore %g0,%g0,%g0
833 /* 0x01e0 0 */ .type conv_i32_to_d16,2
834 /* 0x01e0 0 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
836 .section ".text",#alloc,#execinstr
837 /* 000000 0 */ .align 8
839 ! CONSTANT POOL
841 ___const_seg_900000601:
842 /* 000000 0 */ .word 1127219200,0
843 /* 0x0008 */ .word 1127219200
844 /* 0x000c 0 */ .type ___const_seg_900000601,1
845 /* 0x000c 0 */ .size ___const_seg_900000601,(.-___const_seg_900000601)
846 /* 0x000c 0 */ .align 8
847 /* 0x0010 */ .skip 24
848 /* 0x0028 */ .align 32
850 ! 160 ! }
851 ! 161 !}
852 ! 163 !#ifdef RF_INLINE_MACROS
853 ! 165 !void
854 ! 166 !i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */
855 ! 167 ! const double *, /* 2^16 */
856 ! 168 ! const double *, /* 0 */
857 ! 169 ! double *, /* result16 */
858 ! 170 ! double *, /* result32 */
859 ! 171 ! float *); /* source - should be unsigned int* */
860 ! 172 ! /* converted to float* */
861 ! 174 !#else
862 ! 177 !/* ARGSUSED */
863 ! 178 !static void
864 ! 179 !i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */
865 ! 180 ! const double *dummy2, /* 2^16 */
866 ! 181 ! const double *dummy3, /* 0 */
867 ! 182 ! double *result16,
868 ! 183 ! double *result32,
869 ! 184 ! float *src) /* source - should be unsigned int* */
870 ! 185 ! /* converted to float* */
871 ! 186 !{
872 ! 187 ! uint32_t *i32;
873 ! 188 ! uint32_t a, b, c, d;
874 ! 190 ! i32 = (uint32_t *)src;
875 ! 191 ! a = i32[0];
876 ! 192 ! b = i32[1];
877 ! 193 ! c = i32[2];
878 ! 194 ! d = i32[3];
879 ! 195 ! result16[0] = (double)(a & 0xffff);
880 ! 196 ! result16[1] = (double)(a >> 16);
881 ! 197 ! result32[0] = (double)a;
882 ! 198 ! result16[2] = (double)(b & 0xffff);
883 ! 199 ! result16[3] = (double)(b >> 16);
884 ! 200 ! result32[1] = (double)b;
885 ! 201 ! result16[4] = (double)(c & 0xffff);
886 ! 202 ! result16[5] = (double)(c >> 16);
887 ! 203 ! result32[2] = (double)c;
888 ! 204 ! result16[6] = (double)(d & 0xffff);
889 ! 205 ! result16[7] = (double)(d >> 16);
890 ! 206 ! result32[3] = (double)d;
891 ! 207 !}
892 ! 209 !#endif
893 ! 212 !void
894 ! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
895 ! 214 !{
898 ! SUBROUTINE conv_i32_to_d32_and_d16
900 ! OFFSET SOURCE LINE LABEL INSTRUCTION
902 .global conv_i32_to_d32_and_d16
903 conv_i32_to_d32_and_d16:
904 /* 000000 214 */ save %sp,-368,%sp
906 ! 215 ! int i;
907 ! 216 ! uint32_t a;
908 ! 218 !#pragma pipeloop(0)
909 ! 219 ! for (i = 0; i < len - 3; i += 4) {
910 ! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
911 ! 221 ! &(d16[2*i]), &(d32[i]),
912 ! 222 ! (float *)(&(i32[i])));
913 ! 223 ! }
914 ! 224 ! for (; i < len; i++) {
915 ! 225 ! a = i32[i];
916 ! 226 ! d32[i] = (double)(i32[i]);
917 ! 227 ! d16[2 * i] = (double)(a & 0xffff);
918 ! 228 ! d16[2 * i + 1] = (double)(a >> 16);
920 /* 0x0004 228 */ sub %i3,3,%i4
921 /* 0x0008 219 */ cmp %i4,0
922 /* 0x000c */ ble,pn %icc,.L77000289
923 /* 0x0010 */ or %g0,0,%i5
924 .L77000306:
925 /* 0x0014 222 */ sethi %hi(Zero),%g3
926 /* 0x0018 */ sethi %hi(TwoToMinus16),%g2
927 /* 0x001c */ sethi %hi(TwoTo16),%o5
928 /* 0x0020 */ ldd [%g3+%lo(Zero)],%f2
929 /* 0x0024 219 */ sub %i3,4,%o4
930 /* 0x0028 */ or %g0,0,%o3
931 /* 0x002c */ or %g0,%i0,%l6
932 /* 0x0030 */ or %g0,%i2,%l5
933 .L900000615:
934 /* 0x0034 222 */ fmovd %f2,%f26
935 /* 0x0038 */ ld [%l5],%f27
936 /* 0x003c */ sra %o3,0,%o0
937 /* 0x0040 */ add %i5,4,%i5
938 /* 0x0044 */ fmovd %f2,%f28
939 /* 0x0048 */ ld [%l5+4],%f29
940 /* 0x004c */ sllx %o0,3,%g5
941 /* 0x0050 */ cmp %i5,%o4
942 /* 0x0054 */ fmovd %f2,%f30
943 /* 0x0058 */ ld [%l5+8],%f31
944 /* 0x005c */ add %i1,%g5,%g4
945 /* 0x0060 */ add %o3,8,%o3
946 /* 0x0064 */ ld [%l5+12],%f3
947 /* 0x0068 */ fxtod %f26,%f26
948 /* 0x006c */ ldd [%g2+%lo(TwoToMinus16)],%f32
949 /* 0x0070 */ fxtod %f28,%f28
950 /* 0x0074 */ add %l5,16,%l5
951 /* 0x0078 */ fxtod %f30,%f30
952 /* 0x007c */ ldd [%o5+%lo(TwoTo16)],%f34
953 /* 0x0080 */ fxtod %f2,%f2
954 /* 0x0084 */ std %f2,[%l6+24]
955 /* 0x0088 */ fmuld %f32,%f26,%f36
956 /* 0x008c */ std %f26,[%l6]
957 /* 0x0090 */ fmuld %f32,%f28,%f38
958 /* 0x0094 */ std %f28,[%l6+8]
959 /* 0x0098 */ fmuld %f32,%f30,%f40
960 /* 0x009c */ std %f30,[%l6+16]
961 /* 0x00a0 */ fmuld %f32,%f2,%f42
962 /* 0x00a4 */ add %l6,32,%l6
963 /* 0x00a8 */ fdtox %f36,%f36
964 /* 0x00ac */ fdtox %f38,%f38
965 /* 0x00b0 */ fdtox %f40,%f40
966 /* 0x00b4 */ fdtox %f42,%f42
967 /* 0x00b8 */ fxtod %f36,%f36
968 /* 0x00bc */ std %f36,[%g4+8]
969 /* 0x00c0 */ fxtod %f38,%f38
970 /* 0x00c4 */ std %f38,[%g4+24]
971 /* 0x00c8 */ fxtod %f40,%f40
972 /* 0x00cc */ std %f40,[%g4+40]
973 /* 0x00d0 */ fxtod %f42,%f42
974 /* 0x00d4 */ std %f42,[%g4+56]
975 /* 0x00d8 */ fmuld %f36,%f34,%f36
976 /* 0x00dc */ fmuld %f38,%f34,%f38
977 /* 0x00e0 */ fmuld %f40,%f34,%f40
978 /* 0x00e4 */ fmuld %f42,%f34,%f42
979 /* 0x00e8 */ fsubd %f26,%f36,%f36
980 /* 0x00ec */ std %f36,[%i1+%g5]
981 /* 0x00f0 */ fsubd %f28,%f38,%f38
982 /* 0x00f4 */ std %f38,[%g4+16]
983 /* 0x00f8 */ fsubd %f30,%f40,%f40
984 /* 0x00fc */ std %f40,[%g4+32]
985 /* 0x0100 */ fsubd %f2,%f42,%f42
986 /* 0x0104 */ std %f42,[%g4+48]
987 /* 0x0108 */ ble,a,pt %icc,.L900000615
988 /* 0x010c */ ldd [%g3+%lo(Zero)],%f2
989 .L77000289:
990 /* 0x0110 224 */ cmp %i5,%i3
991 /* 0x0114 */ bge,pn %icc,.L77000294
992 /* 0x0118 */ sethi %hi(0xfc00),%l0
993 .L77000307:
994 /* 0x011c 224 */ sra %i5,0,%l2
995 /* 0x0120 */ sll %i5,1,%i4
996 /* 0x0124 */ sllx %l2,3,%l1
997 /* 0x0128 */ sllx %l2,2,%o1
998 /* 0x012c 225 */ sub %i3,%i5,%l3
999 /* 0x0130 224 */ add %l0,1023,%l0
1000 /* 0x0134 */ add %l1,%i0,%l1
1001 /* 0x0138 */ add %o1,%i2,%i2
1002 /* 0x013c 225 */ cmp %l3,5
1003 /* 0x0140 */ bl,pn %icc,.L77000291
1004 /* 0x0144 0 */ sethi %hi(___const_seg_900000601),%l7
1005 .L900000612:
1006 /* 0x0148 225 */ prefetch [%l1],22
1007 /* 0x014c */ prefetch [%l1+64],22
1008 /* 0x0150 */ sra %i4,0,%l6
1009 /* 0x0154 226 */ sethi %hi(___const_seg_900000601+8),%l2
1010 /* 0x0158 225 */ prefetch [%l1+128],22
1011 /* 0x015c */ add %l6,-2,%l5
1012 /* 0x0160 */ sub %i3,3,%i0
1013 /* 0x0164 */ prefetch [%l1+192],22
1014 /* 0x0168 */ sllx %l5,3,%o4
1015 /* 0x016c 228 */ add %i5,1,%i5
1016 /* 0x0170 225 */ add %i1,%o4,%o3
1017 /* 0x0174 */ or %g0,%i3,%g1
1018 /* 0x0178 */ ld [%i2],%l4
1019 /* 0x017c */ prefetch [%o3+16],22
1020 /* 0x0180 */ add %o3,16,%l3
1021 /* 0x0184 228 */ add %i2,4,%i2
1022 /* 0x0188 225 */ prefetch [%o3+80],22
1023 /* 0x018c 228 */ srl %l4,16,%o1
1024 /* 0x0190 227 */ and %l4,%l0,%o0
1025 /* 0x0194 225 */ prefetch [%o3+144],22
1026 /* 0x0198 228 */ st %o1,[%sp+2271]
1027 /* 0x019c 227 */ st %o0,[%sp+2239]
1028 /* 0x01a0 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32
1029 /* 0x01a4 228 */ ld [%l2+%lo(___const_seg_900000601+8)],%f0
1030 /* 0x01a8 225 */ prefetch [%o3+208],22
1031 /* 0x01ac */ prefetch [%o3+272],22
1032 /* 0x01b0 */ prefetch [%o3+336],22
1033 .L900000610:
1034 /* 0x01b4 225 */ prefetch [%l1+192],22
1035 /* 0x01b8 228 */ add %i5,4,%i5
1036 /* 0x01bc 225 */ add %l3,64,%l3
1037 /* 0x01c0 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f8
1038 /* 0x01c4 228 */ cmp %i5,%i0
1039 /* 0x01c8 225 */ ld [%i2],%g5
1040 /* 0x01cc 228 */ add %i2,16,%i2
1041 /* 0x01d0 */ add %l1,32,%l1
1042 /* 0x01d4 */ add %i4,8,%i4
1043 /* 0x01d8 226 */ ld [%i2-20],%f7
1044 /* 0x01dc 228 */ srl %g5,16,%i3
1045 /* 0x01e0 226 */ fmovs %f8,%f6
1046 /* 0x01e4 228 */ st %i3,[%sp+2335]
1047 /* 0x01e8 227 */ and %g5,%l0,%g4
1048 /* 0x01ec */ st %g4,[%sp+2303]
1049 /* 0x01f0 226 */ fsubd %f6,%f32,%f40
1050 /* 0x01f4 227 */ ld [%sp+2239],%f9
1051 /* 0x01f8 228 */ ld [%sp+2271],%f1
1052 /* 0x01fc */ fmovs %f8,%f12
1053 /* 0x0200 226 */ std %f40,[%l1-32]
1054 /* 0x0204 227 */ fsubd %f8,%f32,%f42
1055 /* 0x0208 */ std %f42,[%l3-64]
1056 /* 0x020c 228 */ fsubd %f0,%f32,%f44
1057 /* 0x0210 */ std %f44,[%l3-56]
1058 /* 0x0214 227 */ fmovs %f12,%f10
1059 /* 0x0218 225 */ ld [%i2-12],%g2
1060 /* 0x021c 226 */ ld [%i2-16],%f1
1061 /* 0x0220 228 */ srl %g2,16,%g3
1062 /* 0x0224 226 */ fmovs %f12,%f0
1063 /* 0x0228 225 */ prefetch [%l3+320],22
1064 /* 0x022c 228 */ st %g3,[%sp+2271]
1065 /* 0x0230 227 */ and %g2,%l0,%l6
1066 /* 0x0234 */ st %l6,[%sp+2239]
1067 /* 0x0238 226 */ fsubd %f0,%f32,%f46
1068 /* 0x023c 227 */ ld [%sp+2303],%f11
1069 /* 0x0240 228 */ ld [%sp+2335],%f13
1070 /* 0x0244 */ fmovs %f12,%f18
1071 /* 0x0248 226 */ std %f46,[%l1-24]
1072 /* 0x024c 227 */ fsubd %f10,%f32,%f48
1073 /* 0x0250 */ std %f48,[%l3-48]
1074 /* 0x0254 228 */ fsubd %f12,%f32,%f50
1075 /* 0x0258 */ std %f50,[%l3-40]
1076 /* 0x025c 227 */ fmovs %f18,%f16
1077 /* 0x0260 225 */ ld [%i2-8],%o5
1078 /* 0x0264 226 */ ld [%i2-12],%f15
1079 /* 0x0268 228 */ srl %o5,16,%l5
1080 /* 0x026c 226 */ fmovs %f18,%f14
1081 /* 0x0270 228 */ st %l5,[%sp+2335]
1082 /* 0x0274 227 */ and %o5,%l0,%o4
1083 /* 0x0278 */ st %o4,[%sp+2303]
1084 /* 0x027c 226 */ fsubd %f14,%f32,%f52
1085 /* 0x0280 227 */ ld [%sp+2239],%f17
1086 /* 0x0284 228 */ ld [%sp+2271],%f19
1087 /* 0x0288 225 */ prefetch [%l3+352],22
1088 /* 0x028c 228 */ fmovs %f18,%f24
1089 /* 0x0290 226 */ std %f52,[%l1-16]
1090 /* 0x0294 227 */ fsubd %f16,%f32,%f54
1091 /* 0x0298 */ std %f54,[%l3-32]
1092 /* 0x029c 228 */ fsubd %f18,%f32,%f56
1093 /* 0x02a0 */ std %f56,[%l3-24]
1094 /* 0x02a4 227 */ fmovs %f24,%f22
1095 /* 0x02a8 225 */ ld [%i2-4],%l4
1096 /* 0x02ac 226 */ ld [%i2-8],%f21
1097 /* 0x02b0 228 */ srl %l4,16,%o3
1098 /* 0x02b4 226 */ fmovs %f24,%f20
1099 /* 0x02b8 228 */ st %o3,[%sp+2271]
1100 /* 0x02bc 227 */ and %l4,%l0,%o2
1101 /* 0x02c0 */ st %o2,[%sp+2239]
1102 /* 0x02c4 226 */ fsubd %f20,%f32,%f58
1103 /* 0x02c8 227 */ ld [%sp+2303],%f23
1104 /* 0x02cc 228 */ ld [%sp+2335],%f25
1105 /* 0x02d0 */ fmovs %f24,%f0
1106 /* 0x02d4 226 */ std %f58,[%l1-8]
1107 /* 0x02d8 227 */ fsubd %f22,%f32,%f60
1108 /* 0x02dc */ std %f60,[%l3-16]
1109 /* 0x02e0 228 */ fsubd %f24,%f32,%f62
1110 /* 0x02e4 */ bl,pt %icc,.L900000610
1111 /* 0x02e8 */ std %f62,[%l3-8]
1112 .L900000613:
1113 /* 0x02ec 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4
1114 /* 0x02f0 228 */ add %l1,8,%l1
1115 /* 0x02f4 */ cmp %i5,%g1
1116 /* 0x02f8 226 */ ld [%i2-4],%f3
1117 /* 0x02fc 225 */ or %g0,%g1,%i3
1118 /* 0x0300 228 */ add %i4,2,%i4
1119 /* 0x0304 227 */ ld [%sp+2239],%f5
1120 /* 0x0308 226 */ fmovs %f4,%f2
1121 /* 0x030c 228 */ ld [%sp+2271],%f1
1122 /* 0x0310 226 */ fsubd %f2,%f32,%f34
1123 /* 0x0314 */ std %f34,[%l1-8]
1124 /* 0x0318 227 */ fsubd %f4,%f32,%f36
1125 /* 0x031c */ std %f36,[%l3]
1126 /* 0x0320 228 */ fsubd %f0,%f32,%f38
1127 /* 0x0324 */ bge,pn %icc,.L77000294
1128 /* 0x0328 */ std %f38,[%l3+8]
1129 .L77000291:
1130 /* 0x032c 225 */ ld [%i2],%o2
1131 .L900000614:
1132 /* 0x0330 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32
1133 /* 0x0334 228 */ srl %o2,16,%l3
1134 /* 0x0338 227 */ sra %i4,0,%i0
1135 /* 0x033c 228 */ st %l3,[%sp+2367]
1136 /* 0x0340 227 */ and %o2,%l0,%g1
1137 /* 0x0344 226 */ sethi %hi(___const_seg_900000601+8),%l2
1138 /* 0x0348 227 */ st %g1,[%sp+2399]
1139 /* 0x034c */ sllx %i0,3,%o0
1140 /* 0x0350 228 */ add %i4,1,%l4
1141 /* 0x0354 226 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4
1142 /* 0x0358 228 */ sra %l4,0,%o1
1143 /* 0x035c */ add %i5,1,%i5
1144 /* 0x0360 226 */ ld [%i2],%f5
1145 /* 0x0364 228 */ sllx %o1,3,%g5
1146 /* 0x0368 */ cmp %i5,%i3
1147 /* 0x036c */ ld [%sp+2367],%f9
1148 /* 0x0370 */ add %i2,4,%i2
1149 /* 0x0374 */ add %i4,2,%i4
1150 /* 0x0378 227 */ fmovs %f4,%f6
1151 /* 0x037c 226 */ fsubd %f4,%f32,%f44
1152 /* 0x0380 */ std %f44,[%l1]
1153 /* 0x0384 227 */ ld [%sp+2399],%f7
1154 /* 0x0388 228 */ fmovs %f6,%f8
1155 /* 0x038c */ add %l1,8,%l1
1156 /* 0x0390 */ fsubd %f8,%f32,%f48
1157 /* 0x0394 227 */ fsubd %f6,%f32,%f46
1158 /* 0x0398 */ std %f46,[%i1+%o0]
1159 /* 0x039c 228 */ std %f48,[%i1+%g5]
1160 /* 0x03a0 */ bl,a,pt %icc,.L900000614
1161 /* 0x03a4 225 */ ld [%i2],%o2
1162 .L77000294:
1163 /* 0x03a8 222 */ ret ! Result =
1164 /* 0x03ac */ restore %g0,%g0,%g0
1165 /* 0x03b0 0 */ .type conv_i32_to_d32_and_d16,2
1166 /* 0x03b0 0 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
1168 .section ".text",#alloc,#execinstr
1169 /* 000000 0 */ .align 32
1171 ! 229 ! }
1172 ! 230 !}
1173 ! 232 !extern long long c1, c2, c3, c4;
1174 ! 234 !static void
1175 ! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
1176 ! 236 !{
1179 ! SUBROUTINE adjust_montf_result
1181 ! OFFSET SOURCE LINE LABEL INSTRUCTION
1183 adjust_montf_result:
1184 /* 000000 236 */ sra %o2,0,%g2
1185 /* 0x0004 */ or %g0,%o0,%o4
1187 ! 237 ! int64_t acc;
1188 ! 238 ! int i;
1189 ! 240 ! if (i32[len] > 0) {
1191 /* 0x0008 240 */ sllx %g2,2,%g3
1192 /* 0x000c */ ld [%o0+%g3],%o0
1193 /* 0x0010 */ cmp %o0,0
1194 /* 0x0014 */ bleu,pn %icc,.L77000316
1195 /* 0x0018 236 */ or %g0,%o1,%o5
1197 ! 241 ! i = -1;
1199 .L77000315:
1200 /* 0x001c 241 */ sub %g2,1,%g3
1201 /* 0x0020 */ ba .L900000712
1202 /* 0x0024 249 */ cmp %g2,0
1204 ! 242 ! } else {
1205 ! 243 ! for (i = len - 1; i >= 0; i--) {
1207 .L77000316:
1208 /* 0x0028 243 */ subcc %g2,1,%g3
1209 /* 0x002c */ bneg,pn %icc,.L77000340
1210 /* 0x0030 */ or %g0,%g3,%o3
1211 .L77000348:
1212 /* 0x0034 243 */ sra %g3,0,%o1
1213 /* 0x0038 */ sllx %o1,2,%g1
1215 ! 244 ! if (i32[i] != nint[i]) break;
1217 /* 0x003c 244 */ ld [%g1+%o5],%g4
1218 /* 0x0040 243 */ add %g1,%o4,%o2
1219 /* 0x0044 */ add %g1,%o5,%o1
1220 .L900000713:
1221 /* 0x0048 244 */ ld [%o2],%o0
1222 /* 0x004c */ cmp %o0,%g4
1223 /* 0x0050 */ bne,pn %icc,.L77000324
1224 /* 0x0054 */ sub %o2,4,%o2
1225 .L77000320:
1226 /* 0x0058 244 */ sub %o1,4,%o1
1227 /* 0x005c */ subcc %o3,1,%o3
1228 /* 0x0060 */ bpos,a,pt %icc,.L900000713
1229 /* 0x0064 */ ld [%o1],%g4
1230 .L900000706:
1231 /* 0x0068 244 */ ba .L900000712
1232 /* 0x006c 249 */ cmp %g2,0
1233 .L77000324:
1234 /* 0x0070 244 */ sra %o3,0,%o0
1235 /* 0x0074 */ sllx %o0,2,%g1
1236 /* 0x0078 */ ld [%o5+%g1],%o3
1237 /* 0x007c */ ld [%o4+%g1],%g5
1238 /* 0x0080 */ cmp %g5,%o3
1239 /* 0x0084 */ bleu,pt %icc,.L77000332
1240 /* 0x0088 */ nop
1242 ! 245 ! }
1243 ! 246 ! }
1244 ! 247 ! if ((i < 0) || (i32[i] > nint[i])) {
1245 ! 248 ! acc = 0;
1246 ! 249 ! for (i = 0; i < len; i++) {
1248 .L77000340:
1249 /* 0x008c 249 */ cmp %g2,0
1250 .L900000712:
1251 /* 0x0090 249 */ ble,pn %icc,.L77000332
1252 /* 0x0094 250 */ or %g0,%g2,%o3
1253 .L77000347:
1254 /* 0x0098 249 */ or %g0,0,%o0
1256 ! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1258 /* 0x009c 250 */ cmp %o3,10
1259 /* 0x00a0 */ bl,pn %icc,.L77000341
1260 /* 0x00a4 249 */ or %g0,0,%g2
1261 .L900000709:
1262 /* 0x00a8 250 */ prefetch [%o4],22
1263 /* 0x00ac */ prefetch [%o4+64],22
1265 ! 251 ! i32[i] = acc & 0xffffffff;
1266 ! 252 ! acc = acc >> 32;
1268 /* 0x00b0 252 */ add %o5,4,%o1
1269 /* 0x00b4 */ add %o4,8,%o2
1270 /* 0x00b8 250 */ prefetch [%o4+128],22
1271 /* 0x00bc */ sub %o3,8,%o5
1272 /* 0x00c0 */ or %g0,2,%o0
1273 /* 0x00c4 */ prefetch [%o4+192],22
1274 /* 0x00c8 */ prefetch [%o4+256],22
1275 /* 0x00cc */ prefetch [%o4+320],22
1276 /* 0x00d0 */ prefetch [%o4+384],22
1277 /* 0x00d4 */ ld [%o2-4],%g5
1278 /* 0x00d8 */ prefetch [%o2+440],22
1279 /* 0x00dc */ prefetch [%o2+504],22
1280 /* 0x00e0 */ ld [%o4],%g4
1281 /* 0x00e4 */ ld [%o1-4],%o4
1282 /* 0x00e8 */ sub %g4,%o4,%o3
1283 /* 0x00ec 251 */ st %o3,[%o2-8]
1284 /* 0x00f0 252 */ srax %o3,32,%g4
1285 .L900000707:
1286 /* 0x00f4 252 */ add %o0,8,%o0
1287 /* 0x00f8 */ add %o2,32,%o2
1288 /* 0x00fc 250 */ ld [%o1],%g1
1289 /* 0x0100 */ prefetch [%o2+496],22
1290 /* 0x0104 252 */ cmp %o0,%o5
1291 /* 0x0108 */ add %o1,32,%o1
1292 /* 0x010c 250 */ sub %g5,%g1,%g5
1293 /* 0x0110 */ add %g5,%g4,%o4
1294 /* 0x0114 */ ld [%o2-32],%g4
1295 /* 0x0118 251 */ st %o4,[%o2-36]
1296 /* 0x011c 252 */ srax %o4,32,%g1
1297 /* 0x0120 250 */ ld [%o1-28],%o3
1298 /* 0x0124 */ sub %g4,%o3,%g2
1299 /* 0x0128 */ add %g2,%g1,%g5
1300 /* 0x012c */ ld [%o2-28],%o3
1301 /* 0x0130 251 */ st %g5,[%o2-32]
1302 /* 0x0134 252 */ srax %g5,32,%g4
1303 /* 0x0138 250 */ ld [%o1-24],%o4
1304 /* 0x013c */ sub %o3,%o4,%g1
1305 /* 0x0140 */ add %g1,%g4,%g2
1306 /* 0x0144 */ ld [%o2-24],%o3
1307 /* 0x0148 251 */ st %g2,[%o2-28]
1308 /* 0x014c 252 */ srax %g2,32,%g5
1309 /* 0x0150 250 */ ld [%o1-20],%o4
1310 /* 0x0154 */ sub %o3,%o4,%g4
1311 /* 0x0158 */ add %g4,%g5,%g1
1312 /* 0x015c */ ld [%o2-20],%o4
1313 /* 0x0160 251 */ st %g1,[%o2-24]
1314 /* 0x0164 252 */ srax %g1,32,%o3
1315 /* 0x0168 250 */ ld [%o1-16],%g2
1316 /* 0x016c */ sub %o4,%g2,%g5
1317 /* 0x0170 */ add %g5,%o3,%g1
1318 /* 0x0174 */ ld [%o2-16],%g4
1319 /* 0x0178 251 */ st %g1,[%o2-20]
1320 /* 0x017c 252 */ srax %g1,32,%o4
1321 /* 0x0180 250 */ ld [%o1-12],%g2
1322 /* 0x0184 */ sub %g4,%g2,%o3
1323 /* 0x0188 */ add %o3,%o4,%g5
1324 /* 0x018c */ ld [%o2-12],%g2
1325 /* 0x0190 251 */ st %g5,[%o2-16]
1326 /* 0x0194 252 */ srax %g5,32,%g4
1327 /* 0x0198 250 */ ld [%o1-8],%g1
1328 /* 0x019c */ sub %g2,%g1,%o4
1329 /* 0x01a0 */ add %o4,%g4,%o3
1330 /* 0x01a4 */ ld [%o2-8],%g2
1331 /* 0x01a8 251 */ st %o3,[%o2-12]
1332 /* 0x01ac 252 */ srax %o3,32,%g5
1333 /* 0x01b0 250 */ ld [%o1-4],%g1
1334 /* 0x01b4 */ sub %g2,%g1,%g4
1335 /* 0x01b8 */ add %g4,%g5,%o4
1336 /* 0x01bc */ ld [%o2-4],%g5
1337 /* 0x01c0 251 */ st %o4,[%o2-8]
1338 /* 0x01c4 252 */ ble,pt %icc,.L900000707
1339 /* 0x01c8 */ srax %o4,32,%g4
1340 .L900000710:
1341 /* 0x01cc 250 */ ld [%o1],%o3
1342 /* 0x01d0 252 */ add %o1,4,%o5
1343 /* 0x01d4 250 */ or %g0,%o2,%o4
1344 /* 0x01d8 252 */ cmp %o0,%g3
1345 /* 0x01dc 250 */ sub %g5,%o3,%g2
1346 /* 0x01e0 */ add %g2,%g4,%g1
1347 /* 0x01e4 251 */ st %g1,[%o2-4]
1348 /* 0x01e8 252 */ bg,pn %icc,.L77000332
1349 /* 0x01ec */ srax %g1,32,%g2
1350 .L77000341:
1351 /* 0x01f0 250 */ ld [%o4],%g5
1352 .L900000711:
1353 /* 0x01f4 250 */ ld [%o5],%o2
1354 /* 0x01f8 */ add %g2,%g5,%g4
1355 /* 0x01fc 252 */ add %o0,1,%o0
1356 /* 0x0200 */ cmp %o0,%g3
1357 /* 0x0204 */ add %o5,4,%o5
1358 /* 0x0208 250 */ sub %g4,%o2,%o1
1359 /* 0x020c 251 */ st %o1,[%o4]
1360 /* 0x0210 252 */ srax %o1,32,%g2
1361 /* 0x0214 */ add %o4,4,%o4
1362 /* 0x0218 */ ble,a,pt %icc,.L900000711
1363 /* 0x021c 250 */ ld [%o4],%g5
1364 .L77000332:
1365 /* 0x0220 252 */ retl ! Result =
1366 /* 0x0224 */ nop
1367 /* 0x0228 0 */ .type adjust_montf_result,2
1368 /* 0x0228 0 */ .size adjust_montf_result,(.-adjust_montf_result)
1370 .section ".text",#alloc,#execinstr
1371 /* 000000 0 */ .align 32
1373 ! 253 ! }
1374 ! 254 ! }
1375 ! 255 !}
1376 ! 257 !/*************
1377 ! 258 !static void
1378 ! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len)
1379 ! 260 !{
1380 ! 261 ! int64_t acc;
1381 ! 262 ! int i;
1382 ! 264 ! c4++;
1383 ! 265 !
1384 ! 266 ! if (i32[len] > 0) {
1385 ! 267 ! i = -1;
1386 ! 268 ! c1++;
1387 ! 269 ! } else {
1388 ! 270 ! for (i = len - 1; i >= 0; i++) {
1389 ! 271 ! if (i32[i] != nint[i]) break;
1390 ! 272 ! c2++;
1391 ! 273 ! }
1392 ! 274 ! }
1393 ! 275 ! if ((i < 0) || (i32[i] > nint[i])) {
1394 ! 276 ! c3++;
1395 ! 277 ! acc = 0;
1396 ! 278 ! for (i = 0; i < len; i++) {
1397 ! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1398 ! 280 ! i32[i] = acc & 0xffffffff;
1399 ! 281 ! acc = acc >> 32;
1400 ! 282 ! }
1401 ! 283 ! }
1402 ! 284 !}
1403 ! 285 !uint32_t saveresult[1000];
1404 ! 286 !void printarray(char *name, uint32_t *arr, int len)
1405 ! 287 !{
1406 ! 288 ! int i, j;
1407 ! 289 ! uint64_t tmp;
1408 ! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2);
1409 ! 292 ! for(i=j=0; i<len; i+=2,j+=2){
1410 ! 293 ! if(j == 6){
1411 ! 294 ! printf("\n");
1412 ! 295 ! j=0;
1413 ! 296 ! }
1414 ! 297 ! tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]);
1415 ! 298 ! printf("0x%016llx",tmp);
1416 ! 299 ! if((i/2)!=(((len+1)/2)-1))printf(",");
1417 ! 300 ! if(j!=4)printf(" ");
1418 ! 301 ! }
1419 ! 302 ! if(j!=0) printf("\n");
1420 ! 303 ! printf("};\n");
1421 ! 304 !}
1422 ! 305 !**************/
1423 ! 308 !/*
1424 ! 309 ! * the lengths of the input arrays should be at least the following:
1425 ! 310 ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
1426 ! 311 ! * all of them should be different from one another
1427 ! 312 ! */
1428 ! 313 !void mont_mulf_noconv(uint32_t *result,
1429 ! 314 ! double *dm1, double *dm2, double *dt,
1430 ! 315 ! double *dn, uint32_t *nint,
1431 ! 316 ! int nlen, double dn0)
1432 ! 317 !{
1435 ! SUBROUTINE mont_mulf_noconv
1437 ! OFFSET SOURCE LINE LABEL INSTRUCTION
1439 .global mont_mulf_noconv
1440 mont_mulf_noconv:
1441 /* 000000 317 */ save %sp,-176,%sp
1442 /* 0x0004 */ ldx [%fp+2223],%g1
1443 /* 0x0008 0 */ sethi %hi(Zero),%l5
1444 /* 0x000c 317 */ or %g0,%i2,%l0
1446 ! 318 ! int i, j, jj;
1447 ! 319 ! double digit, m2j, a, b;
1448 ! 320 ! double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
1449 ! 322 ! pdm1 = &(dm1[0]);
1450 ! 323 ! pdm2 = &(dm2[0]);
1451 ! 324 ! pdn = &(dn[0]);
1452 ! 325 ! pdm2[2 * nlen] = Zero;
1454 /* 0x0010 325 */ ldd [%l5+%lo(Zero)],%f0
1455 /* 0x0014 317 */ or %g0,%i0,%i2
1456 /* 0x0018 325 */ sll %g1,1,%o3
1458 ! 327 ! if (nlen != 16) {
1460 /* 0x001c 327 */ cmp %g1,16
1461 /* 0x0020 325 */ sra %o3,0,%i0
1462 /* 0x0024 */ sllx %i0,3,%o0
1463 /* 0x0028 317 */ or %g0,%i5,%i0
1464 /* 0x002c 327 */ bne,pn %icc,.L77000476
1465 /* 0x0030 325 */ std %f0,[%l0+%o0]
1466 .L77000488:
1467 /* 0x0034 0 */ sethi %hi(TwoToMinus16),%o2
1468 /* 0x0038 0 */ sethi %hi(TwoTo16),%l3
1470 ! 328 ! for (i = 0; i < 4 * nlen + 2; i++)
1471 ! 329 ! dt[i] = Zero;
1472 ! 330 ! a = dt[0] = pdm1[0] * pdm2[0];
1473 ! 331 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1474 ! 333 ! pdtj = &(dt[0]);
1475 ! 334 ! for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
1476 ! 335 ! m2j = pdm2[j];
1477 ! 336 ! a = pdtj[0] + pdn[0] * digit;
1478 ! 337 ! b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
1479 ! 338 ! pdtj[1] = b;
1480 ! 340 !#pragma pipeloop(0)
1481 ! 341 ! for (i = 1; i < nlen; i++) {
1482 ! 342 ! pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
1483 ! 343 ! }
1484 ! 344 ! if (jj == 15) {
1485 ! 345 ! cleanup(dt, j / 2 + 1, 2 * nlen + 1);
1486 ! 346 ! jj = 0;
1487 ! 347 ! }
1488 ! 349 ! digit = mod(lower32(b, Zero) * dn0,
1489 ! 350 ! TwoToMinus16, TwoTo16);
1490 ! 351 ! }
1491 ! 352 ! } else {
1492 ! 353 ! a = dt[0] = pdm1[0] * pdm2[0];
1494 /* 0x003c 353 */ ldd [%i1],%f40
1496 ! 355 ! dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
1497 ! 356 ! dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
1498 ! 357 ! dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
1499 ! 358 ! dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
1500 ! 359 ! dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
1501 ! 360 ! dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
1502 ! 361 ! dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
1503 ! 362 ! dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
1504 ! 363 ! dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
1505 ! 364 ! dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
1506 ! 365 ! dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
1507 ! 366 ! dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
1508 ! 367 ! dt[3] = dt[2] = dt[1] = Zero;
1509 ! 369 ! pdn_0 = pdn[0];
1510 ! 370 ! pdm1_0 = pdm1[0];
1511 ! 372 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1512 ! 373 ! pdtj = &(dt[0]);
1514 /* 0x0040 373 */ or %g0,%i3,%o3
1516 ! 375 ! for (j = 0; j < 32; j++, pdtj++) {
1518 /* 0x0044 375 */ or %g0,0,%l1
1519 /* 0x0048 353 */ ldd [%l0],%f42
1520 /* 0x004c 372 */ ldd [%o2+%lo(TwoToMinus16)],%f44
1521 /* 0x0050 */ ldd [%l3+%lo(TwoTo16)],%f46
1522 /* 0x0054 367 */ std %f0,[%i3+8]
1523 /* 0x0058 353 */ fmuld %f40,%f42,%f38
1524 /* 0x005c */ std %f38,[%i3]
1525 /* 0x0060 367 */ std %f0,[%i3+16]
1526 /* 0x0064 */ std %f0,[%i3+24]
1527 /* 0x0068 */ std %f0,[%i3+32]
1528 /* 0x006c 372 */ fdtox %f38,%f4
1529 /* 0x0070 367 */ std %f0,[%i3+40]
1530 /* 0x0074 */ std %f0,[%i3+48]
1531 /* 0x0078 */ std %f0,[%i3+56]
1532 /* 0x007c 372 */ fmovs %f0,%f4
1533 /* 0x0080 367 */ std %f0,[%i3+64]
1534 /* 0x0084 */ std %f0,[%i3+72]
1535 /* 0x0088 372 */ fxtod %f4,%f52
1536 /* 0x008c 367 */ std %f0,[%i3+80]
1537 /* 0x0090 */ std %f0,[%i3+88]
1538 /* 0x0094 */ std %f0,[%i3+96]
1539 /* 0x0098 */ std %f0,[%i3+104]
1540 /* 0x009c 372 */ fmuld %f52,%f14,%f60
1541 /* 0x00a0 367 */ std %f0,[%i3+112]
1542 /* 0x00a4 */ std %f0,[%i3+120]
1543 /* 0x00a8 */ std %f0,[%i3+128]
1544 /* 0x00ac */ std %f0,[%i3+136]
1545 /* 0x00b0 372 */ fmuld %f60,%f44,%f62
1546 /* 0x00b4 367 */ std %f0,[%i3+144]
1547 /* 0x00b8 */ std %f0,[%i3+152]
1548 /* 0x00bc */ std %f0,[%i3+160]
1549 /* 0x00c0 */ std %f0,[%i3+168]
1550 /* 0x00c4 372 */ fdtox %f62,%f32
1551 /* 0x00c8 367 */ std %f0,[%i3+176]
1552 /* 0x00cc */ std %f0,[%i3+184]
1553 /* 0x00d0 */ std %f0,[%i3+192]
1554 /* 0x00d4 */ std %f0,[%i3+200]
1555 /* 0x00d8 372 */ fxtod %f32,%f50
1556 /* 0x00dc 367 */ std %f0,[%i3+208]
1557 /* 0x00e0 */ std %f0,[%i3+216]
1558 /* 0x00e4 */ std %f0,[%i3+224]
1559 /* 0x00e8 */ std %f0,[%i3+232]
1560 /* 0x00ec 372 */ fmuld %f50,%f46,%f34
1561 /* 0x00f0 367 */ std %f0,[%i3+240]
1562 /* 0x00f4 */ std %f0,[%i3+248]
1563 /* 0x00f8 */ std %f0,[%i3+256]
1564 /* 0x00fc */ std %f0,[%i3+264]
1565 /* 0x0100 372 */ fsubd %f60,%f34,%f40
1566 /* 0x0104 367 */ std %f0,[%i3+272]
1567 /* 0x0108 */ std %f0,[%i3+280]
1568 /* 0x010c */ std %f0,[%i3+288]
1569 /* 0x0110 */ std %f0,[%i3+296]
1570 /* 0x0114 */ std %f0,[%i3+304]
1571 /* 0x0118 */ std %f0,[%i3+312]
1572 /* 0x011c */ std %f0,[%i3+320]
1573 /* 0x0120 */ std %f0,[%i3+328]
1574 /* 0x0124 */ std %f0,[%i3+336]
1575 /* 0x0128 */ std %f0,[%i3+344]
1576 /* 0x012c */ std %f0,[%i3+352]
1577 /* 0x0130 */ std %f0,[%i3+360]
1578 /* 0x0134 */ std %f0,[%i3+368]
1579 /* 0x0138 375 */ sub %g1,1,%l3
1580 /* 0x013c */ add %i3,8,%o7
1581 /* 0x0140 367 */ std %f0,[%i3+376]
1582 /* 0x0144 */ std %f0,[%i3+384]
1583 /* 0x0148 */ std %f0,[%i3+392]
1584 /* 0x014c */ std %f0,[%i3+400]
1585 /* 0x0150 */ std %f0,[%i3+408]
1586 /* 0x0154 */ std %f0,[%i3+416]
1587 /* 0x0158 */ std %f0,[%i3+424]
1588 /* 0x015c */ std %f0,[%i3+432]
1589 /* 0x0160 */ std %f0,[%i3+440]
1590 /* 0x0164 */ std %f0,[%i3+448]
1591 /* 0x0168 */ std %f0,[%i3+456]
1592 /* 0x016c */ std %f0,[%i3+464]
1593 /* 0x0170 */ std %f0,[%i3+472]
1594 /* 0x0174 */ std %f0,[%i3+480]
1595 /* 0x0178 */ std %f0,[%i3+488]
1596 /* 0x017c */ std %f0,[%i3+496]
1597 /* 0x0180 */ std %f0,[%i3+504]
1598 /* 0x0184 */ std %f0,[%i3+512]
1599 /* 0x0188 */ std %f0,[%i3+520]
1601 !BEGIN HAND CODED PART
1603 ! cheetah schedule, no even-odd trick
1606 add %i3,%g0,%o5
1608 fmovd %f40,%f0
1609 fmovd %f14,%f2
1610 fmovd %f44,%f8
1611 sethi %hi(TwoTo32),%l5
1612 fmovd %f46,%f10
1613 sethi %hi(TwoToMinus32),%g5
1614 ldd [%i3],%f6
1615 ldd [%l0],%f4
1617 ldd [%i1],%f40
1618 ldd [%i1+8],%f42
1619 ldd [%i1+16],%f52
1620 ldd [%i1+48],%f54
1621 ldd [%i1+56],%f36
1622 ldd [%i1+64],%f56
1623 ldd [%i1+104],%f48
1624 ldd [%i1+112],%f58
1626 ldd [%i4],%f44
1627 ldd [%i4+8],%f46
1628 ldd [%i4+104],%f50
1629 ldd [%i4+112],%f60
1632 .L99999999:
1634 ldd [%i1+24],%f20
1635 fmuld %f0,%f44,%f12
1637 ldd [%i4+24],%f22
1638 fmuld %f42,%f4,%f16
1640 ldd [%i1+40],%f24
1641 fmuld %f46,%f0,%f18
1643 ldd [%i4+40],%f26
1644 fmuld %f20,%f4,%f20
1646 ldd [%l0+8],%f38
1647 faddd %f12,%f6,%f12
1648 fmuld %f22,%f0,%f22
1650 add %l0,8,%l0
1651 ldd [%i4+56],%f30
1652 fmuld %f24,%f4,%f24
1654 ldd [%i1+72],%f32
1655 faddd %f16,%f18,%f16
1656 fmuld %f26,%f0,%f26
1658 ldd [%i3+16],%f18
1659 fmuld %f40,%f38,%f14
1661 ldd [%i4+72],%f34
1662 faddd %f20,%f22,%f20
1663 fmuld %f8,%f12,%f12
1665 ldd [%i3+48],%f22
1666 fmuld %f36,%f4,%f28
1668 ldd [%i3+8],%f6
1669 faddd %f16,%f18,%f16
1670 fmuld %f30,%f0,%f30
1672 std %f16,[%i3+16]
1673 faddd %f24,%f26,%f24
1674 fmuld %f32,%f4,%f32
1676 ldd [%i3+80],%f26
1677 faddd %f12,%f14,%f12
1678 fmuld %f34,%f0,%f34
1680 ldd [%i1+88],%f16
1681 faddd %f20,%f22,%f20
1683 ldd [%i4+88],%f18
1684 faddd %f28,%f30,%f28
1686 ldd [%i3+112],%f30
1687 faddd %f32,%f34,%f32
1689 ldd [%i3+144],%f34
1690 faddd %f12,%f6,%f6
1691 fmuld %f16,%f4,%f16
1693 std %f20,[%i3+48]
1694 faddd %f24,%f26,%f24
1695 fmuld %f18,%f0,%f18
1697 std %f24,[%i3+80]
1698 faddd %f28,%f30,%f28
1699 fmuld %f48,%f4,%f20
1701 std %f28,[%i3+112]
1702 faddd %f32,%f34,%f32
1703 fmuld %f50,%f0,%f22
1705 ldd [%i1+120],%f24
1706 fdtox %f6,%f12
1708 std %f32,[%i3+144]
1709 faddd %f16,%f18,%f16
1711 ldd [%i4+120],%f26
1713 ldd [%i3+176],%f18
1714 faddd %f20,%f22,%f20
1715 fmuld %f24,%f4,%f24
1717 ldd [%i4+16],%f30
1718 fmovs %f11,%f12
1720 ldd [%i1+32],%f32
1721 fmuld %f26,%f0,%f26
1723 ldd [%i4+32],%f34
1724 fmuld %f52,%f4,%f28
1726 ldd [%i3+208],%f22
1727 faddd %f16,%f18,%f16
1728 fmuld %f30,%f0,%f30
1730 std %f16,[%i3+176]
1731 fxtod %f12,%f12
1732 fmuld %f32,%f4,%f32
1734 ldd [%i4+48],%f18
1735 faddd %f24,%f26,%f24
1736 fmuld %f34,%f0,%f34
1738 ldd [%i3+240],%f26
1739 faddd %f20,%f22,%f20
1741 std %f20,[%i3+208]
1742 faddd %f28,%f30,%f28
1743 fmuld %f54,%f4,%f16
1745 ldd [%i3+32],%f30
1746 fmuld %f12,%f2,%f14
1748 ldd [%i4+64],%f22
1749 faddd %f32,%f34,%f32
1750 fmuld %f18,%f0,%f18
1752 ldd [%i3+64],%f34
1753 faddd %f24,%f26,%f24
1755 std %f24,[%i3+240]
1756 faddd %f28,%f30,%f28
1757 fmuld %f56,%f4,%f20
1759 std %f28,[%i3+32]
1760 fmuld %f14,%f8,%f12
1762 ldd [%i1+80],%f24
1763 faddd %f32,%f34,%f34 ! yes, tmp52!
1764 fmuld %f22,%f0,%f22
1766 ldd [%i4+80],%f26
1767 faddd %f16,%f18,%f16
1769 ldd [%i1+96],%f28
1770 fmuld %f58,%f4,%f32
1772 ldd [%i4+96],%f30
1773 fdtox %f12,%f12
1774 fmuld %f24,%f4,%f24
1776 std %f34,[%i3+64] ! yes, tmp52!
1777 faddd %f20,%f22,%f20
1778 fmuld %f26,%f0,%f26
1780 ldd [%i3+96],%f18
1781 fmuld %f28,%f4,%f28
1783 ldd [%i3+128],%f22
1784 fmovd %f38,%f4
1785 fmuld %f30,%f0,%f30
1787 fxtod %f12,%f12
1788 fmuld %f60,%f0,%f34
1790 add %i3,8,%i3
1791 faddd %f24,%f26,%f24
1793 ldd [%i3+160-8],%f26
1794 faddd %f16,%f18,%f16
1796 std %f16,[%i3+96-8]
1797 faddd %f28,%f30,%f28
1799 ldd [%i3+192-8],%f30
1800 faddd %f32,%f34,%f32
1801 fmuld %f12,%f10,%f12
1803 ldd [%i3+224-8],%f34
1804 faddd %f20,%f22,%f20
1806 std %f20,[%i3+128-8]
1807 faddd %f24,%f26,%f24
1809 add %l1,1,%l1
1810 std %f24,[%i3+160-8]
1811 faddd %f28,%f30,%f28
1813 cmp %l1,15
1814 std %f28,[%i3+192-8]
1815 fsubd %f14,%f12,%f0
1817 faddd %f32,%f34,%f32
1818 ble,pt %icc,.L99999999
1819 std %f32,[%i3+224-8]
1823 ldd [%g5+%lo(TwoToMinus32)],%f8
1825 ldd [%i3+8],%f16
1827 ldd [%i3+16],%f20
1829 fmuld %f8,%f16,%f18
1830 ldd [%i3+24],%f24
1832 fmuld %f8,%f20,%f22
1833 ldd [%i3+32],%f28
1835 fmuld %f8,%f24,%f26
1836 ldd [%l5+%lo(TwoTo32)],%f10
1838 fmuld %f8,%f28,%f30
1840 fdtox %f18,%f18
1842 fdtox %f22,%f22
1844 fdtox %f26,%f26
1845 ldd [%i3+40],%f32
1847 fdtox %f30,%f30
1848 ldd [%i3+48],%f56
1850 fxtod %f18,%f18
1851 fmuld %f8,%f32,%f34
1852 ldd [%i3+56],%f36
1854 fxtod %f22,%f22
1855 fmuld %f8,%f56,%f58
1856 ldd [%i3+64],%f38
1858 fxtod %f26,%f26
1859 fmuld %f8,%f36,%f60
1861 fxtod %f30,%f30
1862 fmuld %f8,%f38,%f62
1864 fdtox %f34,%f34
1865 fmuld %f10,%f18,%f40
1867 fdtox %f58,%f58
1868 fmuld %f10,%f22,%f42
1870 fdtox %f60,%f60
1871 fmuld %f10,%f26,%f44
1873 fdtox %f62,%f62
1874 fmuld %f10,%f30,%f46
1876 fxtod %f34,%f34
1878 fxtod %f58,%f58
1880 fxtod %f60,%f60
1882 fxtod %f62,%f62
1884 fsubd %f16,%f40,%f40
1885 fmuld %f10,%f34,%f48
1887 fsubd %f20,%f42,%f42
1888 fmuld %f10,%f58,%f50
1890 fsubd %f24,%f44,%f44
1891 fmuld %f10,%f60,%f52
1893 fsubd %f28,%f46,%f46
1894 fmuld %f10,%f62,%f54
1896 std %f40,[%i3+8]
1898 std %f42,[%i3+16]
1900 faddd %f18,%f44,%f44
1901 std %f44,[%i3+24]
1903 faddd %f22,%f46,%f46
1904 std %f46,[%i3+32]
1909 fsubd %f32,%f48,%f48
1910 ldd [%i3+64+8],%f16
1912 fsubd %f56,%f50,%f50
1913 ldd [%i3+64+16],%f20
1915 fsubd %f36,%f52,%f52
1916 ldd [%i3+64+24],%f24
1918 fsubd %f38,%f54,%f54
1919 ldd [%i3+64+32],%f28
1921 faddd %f26,%f48,%f48
1922 fmuld %f8,%f16,%f18
1923 std %f48,[%i3+40]
1925 faddd %f30,%f50,%f50
1926 fmuld %f8,%f20,%f22
1927 std %f50,[%i3+48]
1929 faddd %f34,%f52,%f52
1930 fmuld %f8,%f24,%f26
1931 std %f52,[%i3+56]
1933 faddd %f58,%f54,%f54
1934 fmuld %f8,%f28,%f30
1935 std %f54,[%i3+64]
1939 fdtox %f18,%f18
1941 fdtox %f22,%f22
1943 fdtox %f26,%f26
1944 ldd [%i3+64+40],%f32
1946 fdtox %f30,%f30
1947 ldd [%i3+64+48],%f56
1949 fxtod %f18,%f18
1950 fmuld %f8,%f32,%f34
1951 ldd [%i3+64+56],%f36
1953 fxtod %f22,%f22
1954 fmuld %f8,%f56,%f58
1955 ldd [%i3+64+64],%f38
1957 fxtod %f26,%f26
1958 fmuld %f8,%f36,%f12
1960 fxtod %f30,%f30
1961 fmuld %f8,%f38,%f14
1963 fdtox %f34,%f34
1964 fmuld %f10,%f18,%f40
1966 fdtox %f58,%f58
1967 fmuld %f10,%f22,%f42
1969 fdtox %f12,%f12
1970 fmuld %f10,%f26,%f44
1972 fdtox %f14,%f14
1973 fmuld %f10,%f30,%f46
1975 fxtod %f34,%f34
1977 fxtod %f58,%f58
1979 fxtod %f12,%f12
1981 fxtod %f14,%f14
1983 fsubd %f16,%f40,%f40
1984 fmuld %f10,%f34,%f48
1986 fsubd %f20,%f42,%f42
1987 fmuld %f10,%f58,%f50
1989 fsubd %f24,%f44,%f44
1990 fmuld %f10,%f12,%f52
1992 fsubd %f28,%f46,%f46
1993 fmuld %f10,%f14,%f54
1995 faddd %f60,%f40,%f40
1996 std %f40,[%i3+64+8]
1998 faddd %f62,%f42,%f42
1999 std %f42,[%i3+64+16]
2001 faddd %f18,%f44,%f44
2002 std %f44,[%i3+64+24]
2004 faddd %f22,%f46,%f46
2005 std %f46,[%i3+64+32]
2010 fsubd %f32,%f48,%f48
2011 ldd [%i3+64+64+8],%f16
2013 fsubd %f56,%f50,%f50
2014 ldd [%i3+64+64+16],%f20
2016 fsubd %f36,%f52,%f52
2017 ldd [%i3+64+64+24],%f24
2019 fsubd %f38,%f54,%f54
2020 ldd [%i3+64+64+32],%f28
2022 faddd %f26,%f48,%f48
2023 fmuld %f8,%f16,%f18
2024 std %f48,[%i3+64+40]
2026 faddd %f30,%f50,%f50
2027 fmuld %f8,%f20,%f22
2028 std %f50,[%i3+64+48]
2030 faddd %f34,%f52,%f52
2031 fmuld %f8,%f24,%f26
2032 std %f52,[%i3+64+56]
2034 faddd %f58,%f54,%f54
2035 fmuld %f8,%f28,%f30
2036 std %f54,[%i3+64+64]
2041 fdtox %f18,%f18
2043 fdtox %f22,%f22
2045 fdtox %f26,%f26
2046 ldd [%i3+64+64+40],%f32
2048 fdtox %f30,%f30
2049 ldd [%i3+64+64+48],%f56
2051 fxtod %f18,%f18
2052 fmuld %f8,%f32,%f34
2053 ldd [%i3+64+64+56],%f36
2055 fxtod %f22,%f22
2056 fmuld %f8,%f56,%f58
2057 ldd [%i3+64+64+64],%f38
2059 fxtod %f26,%f26
2060 fmuld %f8,%f36,%f60
2062 fxtod %f30,%f30
2063 fmuld %f8,%f38,%f62
2065 fdtox %f34,%f34
2066 fmuld %f10,%f18,%f40
2068 fdtox %f58,%f58
2069 fmuld %f10,%f22,%f42
2071 fdtox %f60,%f60
2072 fmuld %f10,%f26,%f44
2074 fdtox %f62,%f62
2075 fmuld %f10,%f30,%f46
2077 fxtod %f34,%f34
2079 fxtod %f58,%f58
2081 fxtod %f60,%f60
2083 fxtod %f62,%f62
2085 fsubd %f16,%f40,%f40
2086 fmuld %f10,%f34,%f48
2088 fsubd %f20,%f42,%f42
2089 fmuld %f10,%f58,%f50
2091 fsubd %f24,%f44,%f44
2092 fmuld %f10,%f60,%f52
2094 fsubd %f28,%f46,%f46
2095 fmuld %f10,%f62,%f54
2097 faddd %f12,%f40,%f40
2098 std %f40,[%i3+64+64+8]
2100 faddd %f14,%f42,%f42
2101 std %f42,[%i3+64+64+16]
2103 faddd %f18,%f44,%f44
2104 std %f44,[%i3+64+64+24]
2106 faddd %f22,%f46,%f46
2107 std %f46,[%i3+64+64+32]
2111 fsubd %f32,%f48,%f48
2112 ldd [%i3+64+64+64+8],%f16
2114 fsubd %f56,%f50,%f50
2115 ldd [%i3+64+64+64+16],%f20
2117 fsubd %f36,%f52,%f52
2118 ldd [%i3+64+64+64+24],%f24
2120 fsubd %f38,%f54,%f54
2121 ldd [%i3+64+64+64+32],%f28
2123 faddd %f26,%f48,%f48
2124 fmuld %f8,%f16,%f18
2125 std %f48,[%i3+64+64+40]
2127 faddd %f30,%f50,%f50
2128 fmuld %f8,%f20,%f22
2129 std %f50,[%i3+64+64+48]
2131 faddd %f34,%f52,%f52
2132 fmuld %f8,%f24,%f26
2133 std %f52,[%i3+64+64+56]
2135 faddd %f58,%f54,%f54
2136 fmuld %f8,%f28,%f30
2137 std %f54,[%i3+64+64+64]
2141 fdtox %f18,%f18
2143 fdtox %f22,%f22
2145 fdtox %f26,%f26
2146 ldd [%i3+64+64+64+40],%f32
2148 fdtox %f30,%f30
2149 ldd [%i3+64+64+64+48],%f56
2151 fxtod %f18,%f18
2152 fmuld %f8,%f32,%f34
2153 ldd [%i3+64+64+64+56],%f36
2155 fxtod %f22,%f22
2156 fmuld %f8,%f56,%f58
2157 ldd [%i3+64+64+64+64],%f38
2159 fxtod %f26,%f26
2160 fmuld %f8,%f36,%f12
2162 fxtod %f30,%f30
2163 fmuld %f8,%f38,%f14
2165 fdtox %f34,%f34
2166 fmuld %f10,%f18,%f40
2168 fdtox %f58,%f58
2169 fmuld %f10,%f22,%f42
2171 fdtox %f12,%f12
2172 fmuld %f10,%f26,%f44
2174 fdtox %f14,%f14
2175 fmuld %f10,%f30,%f46
2177 sethi %hi(TwoToMinus16),%g5
2178 fxtod %f34,%f34
2180 sethi %hi(TwoTo16),%l5
2181 fxtod %f58,%f58
2183 fxtod %f12,%f12
2185 fxtod %f14,%f14
2187 fsubd %f16,%f40,%f16
2188 fmuld %f10,%f34,%f48
2189 ldd [%g5+%lo(TwoToMinus16)],%f8
2191 fsubd %f20,%f42,%f20
2192 fmuld %f10,%f58,%f50
2193 ldd [%i1],%f40 ! should be %f40
2195 fsubd %f24,%f44,%f24
2196 fmuld %f10,%f12,%f52
2197 ldd [%i1+8],%f42 ! should be %f42
2199 fsubd %f28,%f46,%f28
2200 fmuld %f10,%f14,%f54
2201 ldd [%i4],%f44 ! should be %f44
2203 faddd %f60,%f16,%f16
2204 std %f16,[%i3+64+64+64+8]
2206 faddd %f62,%f20,%f20
2207 std %f20,[%i3+64+64+64+16]
2209 faddd %f18,%f24,%f24
2210 std %f24,[%i3+64+64+64+24]
2212 faddd %f22,%f28,%f28
2213 std %f28,[%i3+64+64+64+32]
2215 fsubd %f32,%f48,%f32
2216 ldd [%i4+8],%f46 ! should be %f46
2218 fsubd %f56,%f50,%f56
2219 ldd [%i1+104],%f48 ! should be %f48
2221 fsubd %f36,%f52,%f36
2222 ldd [%i4+104],%f50 ! should be %f50
2224 fsubd %f38,%f54,%f38
2225 ldd [%i1+16],%f52 ! should be %f52
2227 faddd %f26,%f32,%f32
2228 std %f32,[%i3+64+64+64+40]
2230 faddd %f30,%f56,%f56
2231 std %f56,[%i3+64+64+64+48]
2233 faddd %f34,%f36,%f36
2234 std %f36,[%i3+64+64+64+56]
2236 faddd %f58,%f38,%f38
2237 std %f38,[%i3+64+64+64+64]
2239 std %f12,[%i3+64+64+64+64+8]
2241 std %f14,[%i3+64+64+64+64+16]
2244 ldd [%l5+%lo(TwoTo16)],%f10
2245 ldd [%i1+48],%f54
2246 ldd [%i1+56],%f36
2247 ldd [%i1+64],%f56
2248 ldd [%i1+112],%f58
2250 ldd [%i4+104],%f50
2251 ldd [%i4+112],%f60
2254 .L99999998:
2256 ldd [%i1+24],%f20
2257 fmuld %f0,%f44,%f12
2259 ldd [%i4+24],%f22
2260 fmuld %f42,%f4,%f16
2262 ldd [%i1+40],%f24
2263 fmuld %f46,%f0,%f18
2265 ldd [%i4+40],%f26
2266 fmuld %f20,%f4,%f20
2268 ldd [%l0+8],%f38
2269 faddd %f12,%f6,%f12
2270 fmuld %f22,%f0,%f22
2272 add %l0,8,%l0
2273 ldd [%i4+56],%f30
2274 fmuld %f24,%f4,%f24
2276 ldd [%i1+72],%f32
2277 faddd %f16,%f18,%f16
2278 fmuld %f26,%f0,%f26
2280 ldd [%i3+16],%f18
2281 fmuld %f40,%f38,%f14
2283 ldd [%i4+72],%f34
2284 faddd %f20,%f22,%f20
2285 fmuld %f8,%f12,%f12
2287 ldd [%i3+48],%f22
2288 fmuld %f36,%f4,%f28
2290 ldd [%i3+8],%f6
2291 faddd %f16,%f18,%f16
2292 fmuld %f30,%f0,%f30
2294 std %f16,[%i3+16]
2295 faddd %f24,%f26,%f24
2296 fmuld %f32,%f4,%f32
2298 ldd [%i3+80],%f26
2299 faddd %f12,%f14,%f12
2300 fmuld %f34,%f0,%f34
2302 ldd [%i1+88],%f16
2303 faddd %f20,%f22,%f20
2305 ldd [%i4+88],%f18
2306 faddd %f28,%f30,%f28
2308 ldd [%i3+112],%f30
2309 faddd %f32,%f34,%f32
2311 ldd [%i3+144],%f34
2312 faddd %f12,%f6,%f6
2313 fmuld %f16,%f4,%f16
2315 std %f20,[%i3+48]
2316 faddd %f24,%f26,%f24
2317 fmuld %f18,%f0,%f18
2319 std %f24,[%i3+80]
2320 faddd %f28,%f30,%f28
2321 fmuld %f48,%f4,%f20
2323 std %f28,[%i3+112]
2324 faddd %f32,%f34,%f32
2325 fmuld %f50,%f0,%f22
2327 ldd [%i1+120],%f24
2328 fdtox %f6,%f12
2330 std %f32,[%i3+144]
2331 faddd %f16,%f18,%f16
2333 ldd [%i4+120],%f26
2335 ldd [%i3+176],%f18
2336 faddd %f20,%f22,%f20
2337 fmuld %f24,%f4,%f24
2339 ldd [%i4+16],%f30
2340 fmovs %f11,%f12
2342 ldd [%i1+32],%f32
2343 fmuld %f26,%f0,%f26
2345 ldd [%i4+32],%f34
2346 fmuld %f52,%f4,%f28
2348 ldd [%i3+208],%f22
2349 faddd %f16,%f18,%f16
2350 fmuld %f30,%f0,%f30
2352 std %f16,[%i3+176]
2353 fxtod %f12,%f12
2354 fmuld %f32,%f4,%f32
2356 ldd [%i4+48],%f18
2357 faddd %f24,%f26,%f24
2358 fmuld %f34,%f0,%f34
2360 ldd [%i3+240],%f26
2361 faddd %f20,%f22,%f20
2363 std %f20,[%i3+208]
2364 faddd %f28,%f30,%f28
2365 fmuld %f54,%f4,%f16
2367 ldd [%i3+32],%f30
2368 fmuld %f12,%f2,%f14
2370 ldd [%i4+64],%f22
2371 faddd %f32,%f34,%f32
2372 fmuld %f18,%f0,%f18
2374 ldd [%i3+64],%f34
2375 faddd %f24,%f26,%f24
2377 std %f24,[%i3+240]
2378 faddd %f28,%f30,%f28
2379 fmuld %f56,%f4,%f20
2381 std %f28,[%i3+32]
2382 fmuld %f14,%f8,%f12
2384 ldd [%i1+80],%f24
2385 faddd %f32,%f34,%f34 ! yes, tmp52!
2386 fmuld %f22,%f0,%f22
2388 ldd [%i4+80],%f26
2389 faddd %f16,%f18,%f16
2391 ldd [%i1+96],%f28
2392 fmuld %f58,%f4,%f32
2394 ldd [%i4+96],%f30
2395 fdtox %f12,%f12
2396 fmuld %f24,%f4,%f24
2398 std %f34,[%i3+64] ! yes, tmp52!
2399 faddd %f20,%f22,%f20
2400 fmuld %f26,%f0,%f26
2402 ldd [%i3+96],%f18
2403 fmuld %f28,%f4,%f28
2405 ldd [%i3+128],%f22
2406 fmovd %f38,%f4
2407 fmuld %f30,%f0,%f30
2409 fxtod %f12,%f12
2410 fmuld %f60,%f0,%f34
2412 add %i3,8,%i3
2413 faddd %f24,%f26,%f24
2415 ldd [%i3+160-8],%f26
2416 faddd %f16,%f18,%f16
2418 std %f16,[%i3+96-8]
2419 faddd %f28,%f30,%f28
2421 ldd [%i3+192-8],%f30
2422 faddd %f32,%f34,%f32
2423 fmuld %f12,%f10,%f12
2425 ldd [%i3+224-8],%f34
2426 faddd %f20,%f22,%f20
2428 std %f20,[%i3+128-8]
2429 faddd %f24,%f26,%f24
2431 add %l1,1,%l1
2432 std %f24,[%i3+160-8]
2433 faddd %f28,%f30,%f28
2435 cmp %l1,31
2436 std %f28,[%i3+192-8]
2437 fsubd %f14,%f12,%f0
2439 faddd %f32,%f34,%f32
2440 ble,pt %icc,.L99999998
2441 std %f32,[%i3+224-8]
2443 std %f6,[%i3]
2445 add %o5,%g0,%i3
2448 !END HAND CODED PART
2449 .L900000828:
2450 /* 0x03e4 405 */ ba .L900000852
2451 /* 0x03e8 409 */ ldx [%i3+%o0],%l1
2453 ! 406 ! }
2454 ! 407 ! }
2455 ! 409 ! conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
2456 ! 411 !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/
2457 ! 413 ! adjust_montf_result(result, nint, nlen);
2459 .L77000476:
2460 /* 0x03ec 413 */ sll %g1,2,%l3
2461 /* 0x03f0 0 */ sethi %hi(TwoTo16),%g5
2462 /* 0x03f4 413 */ add %l3,2,%l2
2463 /* 0x03f8 328 */ cmp %l2,0
2464 /* 0x03fc */ ble,pn %icc,.L77000482
2465 /* 0x0400 0 */ sethi %hi(TwoToMinus16),%o2
2466 .L77000514:
2467 /* 0x0404 329 */ add %l3,2,%l2
2468 /* 0x0408 328 */ add %l3,1,%o4
2469 /* 0x040c */ or %g0,0,%l3
2470 /* 0x0410 329 */ cmp %l2,8
2471 /* 0x0414 */ bl,pn %icc,.L77000477
2472 /* 0x0418 328 */ or %g0,%i3,%l1
2473 .L900000831:
2474 /* 0x041c 329 */ prefetch [%i3],22
2475 /* 0x0420 */ sub %o4,7,%l4
2476 /* 0x0424 */ or %g0,0,%l3
2477 /* 0x0428 */ or %g0,%i3,%l1
2478 .L900000829:
2479 /* 0x042c 329 */ prefetch [%l1+528],22
2480 /* 0x0430 */ std %f0,[%l1]
2481 /* 0x0434 */ add %l3,8,%l3
2482 /* 0x0438 */ add %l1,64,%l1
2483 /* 0x043c */ std %f0,[%l1-56]
2484 /* 0x0440 */ cmp %l3,%l4
2485 /* 0x0444 */ std %f0,[%l1-48]
2486 /* 0x0448 */ std %f0,[%l1-40]
2487 /* 0x044c */ prefetch [%l1+496],22
2488 /* 0x0450 */ std %f0,[%l1-32]
2489 /* 0x0454 */ std %f0,[%l1-24]
2490 /* 0x0458 */ std %f0,[%l1-16]
2491 /* 0x045c */ ble,pt %icc,.L900000829
2492 /* 0x0460 */ std %f0,[%l1-8]
2493 .L900000832:
2494 /* 0x0464 329 */ cmp %l3,%o4
2495 /* 0x0468 */ bg,pn %icc,.L77000482
2496 /* 0x046c */ nop
2497 .L77000477:
2498 /* 0x0470 329 */ add %l3,1,%l3
2499 .L900000851:
2500 /* 0x0474 329 */ std %f0,[%l1]
2501 /* 0x0478 */ cmp %l3,%o4
2502 /* 0x047c */ add %l1,8,%l1
2503 /* 0x0480 */ ble,pt %icc,.L900000851
2504 /* 0x0484 */ add %l3,1,%l3
2505 .L77000482:
2506 /* 0x0488 330 */ ldd [%i1],%f40
2507 /* 0x048c 334 */ cmp %o3,0
2508 /* 0x0490 */ sub %g1,1,%l3
2509 /* 0x0494 330 */ ldd [%l0],%f42
2510 /* 0x0498 331 */ ldd [%o2+%lo(TwoToMinus16)],%f36
2511 /* 0x049c */ ldd [%g5+%lo(TwoTo16)],%f38
2512 /* 0x04a0 330 */ fmuld %f40,%f42,%f52
2513 /* 0x04a4 331 */ fdtox %f52,%f8
2514 /* 0x04a8 */ fmovs %f0,%f8
2515 /* 0x04ac */ fxtod %f8,%f62
2516 /* 0x04b0 */ fmuld %f62,%f14,%f60
2517 /* 0x04b4 */ fmuld %f60,%f36,%f32
2518 /* 0x04b8 */ fdtox %f32,%f50
2519 /* 0x04bc */ fxtod %f50,%f34
2520 /* 0x04c0 */ fmuld %f34,%f38,%f46
2521 /* 0x04c4 */ fsubd %f60,%f46,%f40
2522 /* 0x04c8 334 */ ble,pn %icc,.L77000378
2523 /* 0x04cc 330 */ std %f52,[%i3]
2524 .L77000509:
2525 /* 0x04d0 345 */ add %o3,1,%g5
2526 /* 0x04d4 */ sll %g5,1,%o2
2527 /* 0x04d8 */ or %g0,0,%l1
2528 /* 0x04dc 337 */ ldd [%i4],%f42
2529 /* 0x04e0 345 */ sub %o3,1,%o3
2530 /* 0x04e4 */ or %g0,0,%o5
2531 /* 0x04e8 */ or %g0,%i3,%l2
2532 /* 0x04ec */ add %i4,8,%o1
2533 /* 0x04f0 */ add %i1,8,%g5
2534 .L900000848:
2535 /* 0x04f4 337 */ fmuld %f40,%f42,%f34
2536 /* 0x04f8 */ ldd [%l0+8],%f32
2537 /* 0x04fc 341 */ cmp %g1,1
2538 /* 0x0500 337 */ ldd [%i1],%f50
2539 /* 0x0504 */ ldd [%l2],%f46
2540 /* 0x0508 */ ldd [%l2+8],%f44
2541 /* 0x050c */ fmuld %f50,%f32,%f60
2542 /* 0x0510 335 */ ldd [%l0],%f42
2543 /* 0x0514 337 */ faddd %f46,%f34,%f48
2544 /* 0x0518 */ faddd %f44,%f60,%f58
2545 /* 0x051c */ fmuld %f36,%f48,%f54
2546 /* 0x0520 */ faddd %f58,%f54,%f34
2547 /* 0x0524 341 */ ble,pn %icc,.L77000368
2548 /* 0x0528 338 */ std %f34,[%l2+8]
2549 .L77000507:
2550 /* 0x052c 341 */ or %g0,1,%l5
2551 /* 0x0530 */ or %g0,2,%l4
2552 /* 0x0534 */ or %g0,%g5,%g4
2553 /* 0x0538 342 */ cmp %l3,12
2554 /* 0x053c */ bl,pn %icc,.L77000481
2555 /* 0x0540 341 */ or %g0,%o1,%g3
2556 .L900000839:
2557 /* 0x0544 342 */ prefetch [%i1+8],0
2558 /* 0x0548 */ prefetch [%i1+72],0
2559 /* 0x054c */ add %i4,40,%l6
2560 /* 0x0550 */ add %i1,40,%l7
2561 /* 0x0554 */ prefetch [%l2+16],0
2562 /* 0x0558 */ or %g0,%l2,%o7
2563 /* 0x055c */ sub %l3,7,%i5
2564 /* 0x0560 */ prefetch [%l2+80],0
2565 /* 0x0564 */ add %l2,80,%g2
2566 /* 0x0568 */ or %g0,2,%l4
2567 /* 0x056c */ prefetch [%i1+136],0
2568 /* 0x0570 */ or %g0,5,%l5
2569 /* 0x0574 */ prefetch [%i1+200],0
2570 /* 0x0578 */ prefetch [%l2+144],0
2571 /* 0x057c */ ldd [%i4+8],%f52
2572 /* 0x0580 */ ldd [%i4+16],%f44
2573 /* 0x0584 */ ldd [%i4+24],%f56
2574 /* 0x0588 */ fmuld %f40,%f52,%f48
2575 /* 0x058c */ fmuld %f40,%f44,%f46
2576 /* 0x0590 */ fmuld %f40,%f56,%f44
2577 /* 0x0594 */ ldd [%l2+48],%f56
2578 /* 0x0598 */ prefetch [%l2+208],0
2579 /* 0x059c */ prefetch [%l2+272],0
2580 /* 0x05a0 */ prefetch [%l2+336],0
2581 /* 0x05a4 */ prefetch [%l2+400],0
2582 /* 0x05a8 */ ldd [%i1+8],%f32
2583 /* 0x05ac */ ldd [%i1+16],%f60
2584 /* 0x05b0 */ ldd [%i1+24],%f50
2585 /* 0x05b4 */ fmuld %f42,%f32,%f62
2586 /* 0x05b8 */ ldd [%i1+32],%f32
2587 /* 0x05bc */ fmuld %f42,%f60,%f58
2588 /* 0x05c0 */ ldd [%l2+16],%f52
2589 /* 0x05c4 */ ldd [%l2+32],%f54
2590 /* 0x05c8 */ faddd %f62,%f48,%f60
2591 /* 0x05cc */ fmuld %f42,%f50,%f48
2592 /* 0x05d0 */ faddd %f58,%f46,%f62
2593 /* 0x05d4 */ ldd [%i4+32],%f46
2594 /* 0x05d8 */ ldd [%l2+64],%f58
2595 .L900000837:
2596 /* 0x05dc 342 */ prefetch [%l7+192],0
2597 /* 0x05e0 */ fmuld %f40,%f46,%f46
2598 /* 0x05e4 */ faddd %f60,%f52,%f60
2599 /* 0x05e8 */ ldd [%l6],%f52
2600 /* 0x05ec */ std %f60,[%g2-64]
2601 /* 0x05f0 */ fmuld %f42,%f32,%f50
2602 /* 0x05f4 */ add %l5,8,%l5
2603 /* 0x05f8 */ ldd [%l7],%f60
2604 /* 0x05fc */ faddd %f48,%f44,%f48
2605 /* 0x0600 */ cmp %l5,%i5
2606 /* 0x0604 */ ldd [%g2],%f32
2607 /* 0x0608 */ add %g2,128,%g2
2608 /* 0x060c */ prefetch [%g2+256],0
2609 /* 0x0610 */ fmuld %f40,%f52,%f52
2610 /* 0x0614 */ faddd %f62,%f54,%f44
2611 /* 0x0618 */ ldd [%l6+8],%f54
2612 /* 0x061c */ std %f44,[%g2-176]
2613 /* 0x0620 */ fmuld %f42,%f60,%f44
2614 /* 0x0624 */ add %l6,64,%l6
2615 /* 0x0628 */ ldd [%l7+8],%f60
2616 /* 0x062c */ faddd %f50,%f46,%f50
2617 /* 0x0630 */ add %l7,64,%l7
2618 /* 0x0634 */ add %l4,16,%l4
2619 /* 0x0638 */ ldd [%g2-112],%f46
2620 /* 0x063c */ fmuld %f40,%f54,%f54
2621 /* 0x0640 */ faddd %f48,%f56,%f62
2622 /* 0x0644 */ ldd [%l6-48],%f56
2623 /* 0x0648 */ std %f62,[%g2-160]
2624 /* 0x064c */ fmuld %f42,%f60,%f48
2625 /* 0x0650 */ ldd [%l7-48],%f60
2626 /* 0x0654 */ faddd %f44,%f52,%f52
2627 /* 0x0658 */ ldd [%g2-96],%f30
2628 /* 0x065c */ prefetch [%g2+288],0
2629 /* 0x0660 */ fmuld %f40,%f56,%f56
2630 /* 0x0664 */ faddd %f50,%f58,%f62
2631 /* 0x0668 */ ldd [%l6-40],%f58
2632 /* 0x066c */ std %f62,[%g2-144]
2633 /* 0x0670 */ fmuld %f42,%f60,%f50
2634 /* 0x0674 */ ldd [%l7-40],%f62
2635 /* 0x0678 */ faddd %f48,%f54,%f54
2636 /* 0x067c */ ldd [%g2-80],%f28
2637 /* 0x0680 */ prefetch [%l7+160],0
2638 /* 0x0684 */ fmuld %f40,%f58,%f48
2639 /* 0x0688 */ faddd %f52,%f32,%f44
2640 /* 0x068c */ ldd [%l6-32],%f58
2641 /* 0x0690 */ std %f44,[%g2-128]
2642 /* 0x0694 */ fmuld %f42,%f62,%f44
2643 /* 0x0698 */ ldd [%l7-32],%f60
2644 /* 0x069c */ faddd %f50,%f56,%f56
2645 /* 0x06a0 */ ldd [%g2-64],%f52
2646 /* 0x06a4 */ prefetch [%g2+320],0
2647 /* 0x06a8 */ fmuld %f40,%f58,%f50
2648 /* 0x06ac */ faddd %f54,%f46,%f32
2649 /* 0x06b0 */ ldd [%l6-24],%f62
2650 /* 0x06b4 */ std %f32,[%g2-112]
2651 /* 0x06b8 */ fmuld %f42,%f60,%f46
2652 /* 0x06bc */ ldd [%l7-24],%f60
2653 /* 0x06c0 */ faddd %f44,%f48,%f48
2654 /* 0x06c4 */ ldd [%g2-48],%f54
2655 /* 0x06c8 */ fmuld %f40,%f62,%f26
2656 /* 0x06cc */ faddd %f56,%f30,%f32
2657 /* 0x06d0 */ ldd [%l6-16],%f58
2658 /* 0x06d4 */ std %f32,[%g2-96]
2659 /* 0x06d8 */ fmuld %f42,%f60,%f30
2660 /* 0x06dc */ ldd [%l7-16],%f32
2661 /* 0x06e0 */ faddd %f46,%f50,%f60
2662 /* 0x06e4 */ ldd [%g2-32],%f56
2663 /* 0x06e8 */ prefetch [%g2+352],0
2664 /* 0x06ec */ fmuld %f40,%f58,%f44
2665 /* 0x06f0 */ faddd %f48,%f28,%f62
2666 /* 0x06f4 */ ldd [%l6-8],%f46
2667 /* 0x06f8 */ std %f62,[%g2-80]
2668 /* 0x06fc */ fmuld %f42,%f32,%f48
2669 /* 0x0700 */ ldd [%l7-8],%f32
2670 /* 0x0704 */ faddd %f30,%f26,%f62
2671 /* 0x0708 */ ble,pt %icc,.L900000837
2672 /* 0x070c */ ldd [%g2-16],%f58
2673 .L900000840:
2674 /* 0x0710 342 */ fmuld %f40,%f46,%f46
2675 /* 0x0714 */ faddd %f62,%f54,%f62
2676 /* 0x0718 */ std %f62,[%g2-48]
2677 /* 0x071c */ cmp %l5,%l3
2678 /* 0x0720 */ fmuld %f42,%f32,%f50
2679 /* 0x0724 */ faddd %f48,%f44,%f48
2680 /* 0x0728 */ or %g0,%l7,%g4
2681 /* 0x072c */ or %g0,%l6,%g3
2682 /* 0x0730 */ faddd %f60,%f52,%f60
2683 /* 0x0734 */ std %f60,[%g2-64]
2684 /* 0x0738 */ or %g0,%o7,%l2
2685 /* 0x073c */ add %l4,8,%l4
2686 /* 0x0740 */ faddd %f50,%f46,%f54
2687 /* 0x0744 */ faddd %f48,%f56,%f56
2688 /* 0x0748 */ std %f56,[%g2-32]
2689 /* 0x074c */ faddd %f54,%f58,%f58
2690 /* 0x0750 */ bg,pn %icc,.L77000368
2691 /* 0x0754 */ std %f58,[%g2-16]
2692 .L77000481:
2693 /* 0x0758 342 */ ldd [%g4],%f44
2694 .L900000850:
2695 /* 0x075c 342 */ ldd [%g3],%f48
2696 /* 0x0760 */ fmuld %f42,%f44,%f58
2697 /* 0x0764 */ sra %l4,0,%l7
2698 /* 0x0768 */ add %l5,1,%l5
2699 /* 0x076c */ sllx %l7,3,%g2
2700 /* 0x0770 */ add %g4,8,%g4
2701 /* 0x0774 */ ldd [%l2+%g2],%f56
2702 /* 0x0778 */ cmp %l5,%l3
2703 /* 0x077c */ add %l4,2,%l4
2704 /* 0x0780 */ fmuld %f40,%f48,%f54
2705 /* 0x0784 */ add %g3,8,%g3
2706 /* 0x0788 */ faddd %f58,%f54,%f52
2707 /* 0x078c */ faddd %f52,%f56,%f62
2708 /* 0x0790 */ std %f62,[%l2+%g2]
2709 /* 0x0794 */ ble,a,pt %icc,.L900000850
2710 /* 0x0798 */ ldd [%g4],%f44
2711 .L77000368:
2712 /* 0x079c 344 */ cmp %o5,15
2713 /* 0x07a0 */ bne,pn %icc,.L77000483
2714 /* 0x07a4 345 */ srl %l1,31,%g4
2715 .L77000478:
2716 /* 0x07a8 345 */ add %l1,%g4,%l4
2717 /* 0x07ac */ sra %l4,1,%o7
2718 /* 0x07b0 */ add %o7,1,%o4
2719 /* 0x07b4 */ sll %o4,1,%l6
2720 /* 0x07b8 */ cmp %l6,%o2
2721 /* 0x07bc */ bge,pn %icc,.L77000392
2722 /* 0x07c0 */ fmovd %f0,%f42
2723 .L77000508:
2724 /* 0x07c4 345 */ sra %l6,0,%l4
2725 /* 0x07c8 */ sllx %l4,3,%g2
2726 /* 0x07cc */ fmovd %f0,%f32
2727 /* 0x07d0 */ sub %o2,1,%l5
2728 /* 0x07d4 */ ldd [%g2+%i3],%f40
2729 /* 0x07d8 */ add %g2,%i3,%g3
2730 .L900000849:
2731 /* 0x07dc 345 */ fdtox %f40,%f10
2732 /* 0x07e0 */ ldd [%g3+8],%f52
2733 /* 0x07e4 */ add %l6,2,%l6
2734 /* 0x07e8 */ cmp %l6,%l5
2735 /* 0x07ec */ fdtox %f52,%f2
2736 /* 0x07f0 */ fmovd %f10,%f30
2737 /* 0x07f4 */ fmovs %f0,%f10
2738 /* 0x07f8 */ fmovs %f0,%f2
2739 /* 0x07fc */ fxtod %f10,%f10
2740 /* 0x0800 */ fxtod %f2,%f2
2741 /* 0x0804 */ fdtox %f52,%f28
2742 /* 0x0808 */ faddd %f10,%f32,%f56
2743 /* 0x080c */ std %f56,[%g3]
2744 /* 0x0810 */ faddd %f2,%f42,%f62
2745 /* 0x0814 */ std %f62,[%g3+8]
2746 /* 0x0818 */ fitod %f30,%f32
2747 /* 0x081c */ add %g3,16,%g3
2748 /* 0x0820 */ fitod %f28,%f42
2749 /* 0x0824 */ ble,a,pt %icc,.L900000849
2750 /* 0x0828 */ ldd [%g3],%f40
2751 .L77000392:
2752 /* 0x082c 346 */ or %g0,0,%o5
2753 .L77000483:
2754 /* 0x0830 350 */ fdtox %f34,%f6
2755 /* 0x0834 */ add %l1,1,%l1
2756 /* 0x0838 */ cmp %l1,%o3
2757 /* 0x083c */ add %o5,1,%o5
2758 /* 0x0840 */ add %l2,8,%l2
2759 /* 0x0844 */ add %l0,8,%l0
2760 /* 0x0848 */ fmovs %f0,%f6
2761 /* 0x084c */ fxtod %f6,%f46
2762 /* 0x0850 */ fmuld %f46,%f14,%f56
2763 /* 0x0854 */ fmuld %f56,%f36,%f44
2764 /* 0x0858 */ fdtox %f44,%f48
2765 /* 0x085c */ fxtod %f48,%f58
2766 /* 0x0860 */ fmuld %f58,%f38,%f54
2767 /* 0x0864 */ fsubd %f56,%f54,%f40
2768 /* 0x0868 */ ble,a,pt %icc,.L900000848
2769 /* 0x086c 337 */ ldd [%i4],%f42
2770 .L77000378:
2771 /* 0x0870 409 */ ldx [%i3+%o0],%l1
2772 .L900000852:
2773 /* 0x0874 409 */ add %i3,%o0,%l4
2774 /* 0x0878 */ ldx [%l4+8],%i1
2775 /* 0x087c */ cmp %l1,0
2776 /* 0x0880 */ bne,pn %xcc,.L77000403
2777 /* 0x0884 */ or %g0,0,%g5
2778 .L77000402:
2779 /* 0x0888 409 */ or %g0,0,%i3
2780 /* 0x088c */ ba .L900000847
2781 /* 0x0890 */ cmp %i1,0
2782 .L77000403:
2783 /* 0x0894 409 */ srlx %l1,52,%o5
2784 /* 0x0898 */ sethi %hi(0xfff00000),%i3
2785 /* 0x089c */ sllx %i3,32,%o2
2786 /* 0x08a0 */ sethi %hi(0x40000000),%o0
2787 /* 0x08a4 */ sllx %o0,22,%o4
2788 /* 0x08a8 */ or %g0,1023,%l0
2789 /* 0x08ac */ xor %o2,-1,%o3
2790 /* 0x08b0 */ sub %l0,%o5,%o7
2791 /* 0x08b4 */ and %l1,%o3,%l1
2792 /* 0x08b8 */ add %o7,52,%i4
2793 /* 0x08bc */ or %l1,%o4,%o1
2794 /* 0x08c0 */ cmp %i1,0
2795 /* 0x08c4 */ srlx %o1,%i4,%i3
2796 .L900000847:
2797 /* 0x08c8 409 */ bne,pn %xcc,.L77000409
2798 /* 0x08cc */ or %g0,0,%o7
2799 .L77000408:
2800 /* 0x08d0 409 */ ba .L900000846
2801 /* 0x08d4 350 */ cmp %g1,0
2802 .L77000409:
2803 /* 0x08d8 409 */ srlx %i1,52,%l2
2804 /* 0x08dc */ sethi %hi(0xfff00000),%o7
2805 /* 0x08e0 */ sllx %o7,32,%i4
2806 /* 0x08e4 */ sethi %hi(0x40000000),%i5
2807 /* 0x08e8 */ sllx %i5,22,%l6
2808 /* 0x08ec */ or %g0,1023,%l5
2809 /* 0x08f0 */ xor %i4,-1,%o1
2810 /* 0x08f4 */ sub %l5,%l2,%g2
2811 /* 0x08f8 */ and %i1,%o1,%l7
2812 /* 0x08fc */ add %g2,52,%g3
2813 /* 0x0900 */ or %l7,%l6,%g4
2814 /* 0x0904 350 */ cmp %g1,0
2815 /* 0x0908 409 */ srlx %g4,%g3,%o7
2816 .L900000846:
2817 /* 0x090c 350 */ ble,pn %icc,.L77000397
2818 /* 0x0910 */ or %g0,0,%l5
2819 .L77000510:
2820 /* 0x0914 409 */ sethi %hi(0xfff00000),%g4
2821 /* 0x0918 */ sllx %g4,32,%o0
2822 /* 0x091c 0 */ or %g0,-1,%i5
2823 /* 0x0920 409 */ srl %i5,0,%l7
2824 /* 0x0924 */ sethi %hi(0x40000000),%i1
2825 /* 0x0928 */ sllx %i1,22,%l6
2826 /* 0x092c */ sethi %hi(0xfc00),%i4
2827 /* 0x0930 */ xor %o0,-1,%g2
2828 /* 0x0934 */ add %i4,1023,%l2
2829 /* 0x0938 */ or %g0,2,%g4
2830 /* 0x093c */ or %g0,%i2,%g3
2831 .L77000395:
2832 /* 0x0940 409 */ sra %g4,0,%o2
2833 /* 0x0944 */ add %g4,1,%o3
2834 /* 0x0948 */ sllx %o2,3,%o0
2835 /* 0x094c */ sra %o3,0,%o5
2836 /* 0x0950 */ ldx [%l4+%o0],%o4
2837 /* 0x0954 */ sllx %o5,3,%l0
2838 /* 0x0958 */ and %i3,%l7,%o1
2839 /* 0x095c */ ldx [%l4+%l0],%i4
2840 /* 0x0960 */ cmp %o4,0
2841 /* 0x0964 */ bne,pn %xcc,.L77000415
2842 /* 0x0968 350 */ and %o7,%l2,%i5
2843 .L77000414:
2844 /* 0x096c 409 */ or %g0,0,%l1
2845 /* 0x0970 */ ba .L900000845
2846 /* 0x0974 */ add %g5,%o1,%i1
2847 .L77000415:
2848 /* 0x0978 409 */ srlx %o4,52,%o3
2849 /* 0x097c */ and %o4,%g2,%l1
2850 /* 0x0980 */ or %g0,52,%o0
2851 /* 0x0984 */ sub %o3,1023,%l0
2852 /* 0x0988 */ or %l1,%l6,%o4
2853 /* 0x098c */ sub %o0,%l0,%o5
2854 /* 0x0990 */ srlx %o4,%o5,%l1
2855 /* 0x0994 */ add %g5,%o1,%i1
2856 .L900000845:
2857 /* 0x0998 409 */ srax %i3,32,%g5
2858 /* 0x099c */ cmp %i4,0
2859 /* 0x09a0 */ bne,pn %xcc,.L77000421
2860 /* 0x09a4 350 */ sllx %i5,16,%o2
2861 .L77000420:
2862 /* 0x09a8 409 */ or %g0,0,%o4
2863 /* 0x09ac */ ba .L900000844
2864 /* 0x09b0 350 */ add %i1,%o2,%o5
2865 .L77000421:
2866 /* 0x09b4 409 */ srlx %i4,52,%o4
2867 /* 0x09b8 */ or %g0,52,%o0
2868 /* 0x09bc */ sub %o4,1023,%o3
2869 /* 0x09c0 */ and %i4,%g2,%i3
2870 /* 0x09c4 */ or %i3,%l6,%o5
2871 /* 0x09c8 */ sub %o0,%o3,%l0
2872 /* 0x09cc */ srlx %o5,%l0,%o4
2873 /* 0x09d0 350 */ add %i1,%o2,%o5
2874 .L900000844:
2875 /* 0x09d4 350 */ srax %o7,16,%i4
2876 /* 0x09d8 */ srax %o5,32,%i5
2877 /* 0x09dc */ add %i4,%i5,%o1
2878 /* 0x09e0 */ add %l5,1,%l5
2879 /* 0x09e4 */ and %o5,%l7,%i1
2880 /* 0x09e8 */ add %g5,%o1,%g5
2881 /* 0x09ec */ st %i1,[%g3]
2882 /* 0x09f0 */ or %g0,%l1,%i3
2883 /* 0x09f4 */ or %g0,%o4,%o7
2884 /* 0x09f8 */ add %g4,2,%g4
2885 /* 0x09fc */ cmp %l5,%l3
2886 /* 0x0a00 */ ble,pt %icc,.L77000395
2887 /* 0x0a04 */ add %g3,4,%g3
2888 .L77000397:
2889 /* 0x0a08 409 */ sethi %hi(0xfc00),%l4
2890 /* 0x0a0c */ sra %l5,0,%i5
2891 /* 0x0a10 */ add %l4,1023,%i1
2892 /* 0x0a14 */ add %g5,%i3,%l5
2893 /* 0x0a18 */ and %o7,%i1,%g5
2894 /* 0x0a1c */ sllx %g5,16,%l2
2895 /* 0x0a20 */ sllx %i5,2,%l7
2896 /* 0x0a24 413 */ sra %g1,0,%g2
2897 /* 0x0a28 409 */ add %l5,%l2,%l6
2898 /* 0x0a2c */ st %l6,[%i2+%l7]
2899 /* 0x0a30 413 */ sllx %g2,2,%g3
2900 /* 0x0a34 */ ld [%i2+%g3],%g4
2901 /* 0x0a38 */ cmp %g4,0
2902 /* 0x0a3c */ bgu,pn %icc,.L77000486
2903 /* 0x0a40 */ cmp %l3,0
2904 .L77000427:
2905 /* 0x0a44 413 */ bl,pn %icc,.L77000486
2906 /* 0x0a48 */ or %g0,%l3,%i5
2907 .L77000512:
2908 /* 0x0a4c 413 */ sra %l3,0,%o5
2909 /* 0x0a50 */ sllx %o5,2,%l7
2910 /* 0x0a54 */ ld [%l7+%i0],%o5
2911 /* 0x0a58 */ add %l7,%i2,%o1
2912 /* 0x0a5c */ add %l7,%i0,%i4
2913 .L900000843:
2914 /* 0x0a60 413 */ ld [%o1],%i1
2915 /* 0x0a64 */ cmp %i1,%o5
2916 /* 0x0a68 */ bne,pn %icc,.L77000435
2917 /* 0x0a6c */ sub %o1,4,%o1
2918 .L77000431:
2919 /* 0x0a70 413 */ sub %i4,4,%i4
2920 /* 0x0a74 */ subcc %i5,1,%i5
2921 /* 0x0a78 */ bpos,a,pt %icc,.L900000843
2922 /* 0x0a7c */ ld [%i4],%o5
2923 .L900000827:
2924 /* 0x0a80 413 */ ba .L900000842
2925 /* 0x0a84 350 */ cmp %g1,0
2926 .L77000435:
2927 /* 0x0a88 413 */ sra %i5,0,%o0
2928 /* 0x0a8c */ sllx %o0,2,%l1
2929 /* 0x0a90 */ ld [%i0+%l1],%i3
2930 /* 0x0a94 */ ld [%i2+%l1],%l0
2931 /* 0x0a98 */ cmp %l0,%i3
2932 /* 0x0a9c */ bleu,pt %icc,.L77000379
2933 /* 0x0aa0 */ nop
2934 .L77000486:
2935 /* 0x0aa4 350 */ cmp %g1,0
2936 .L900000842:
2937 /* 0x0aa8 350 */ ble,pn %icc,.L77000379
2938 /* 0x0aac */ add %l3,1,%g3
2939 .L77000511:
2940 /* 0x0ab0 350 */ or %g0,0,%l5
2941 /* 0x0ab4 */ cmp %g3,10
2942 /* 0x0ab8 */ bl,pn %icc,.L77000487
2943 /* 0x0abc */ or %g0,0,%g1
2944 .L900000835:
2945 /* 0x0ac0 350 */ prefetch [%i2],22
2946 /* 0x0ac4 */ add %i0,4,%l2
2947 /* 0x0ac8 */ prefetch [%i2+64],22
2948 /* 0x0acc */ add %i2,8,%o5
2949 /* 0x0ad0 */ sub %l3,7,%i0
2950 /* 0x0ad4 */ prefetch [%i2+128],22
2951 /* 0x0ad8 */ or %g0,2,%l5
2952 /* 0x0adc */ prefetch [%i2+192],22
2953 /* 0x0ae0 */ prefetch [%i2+256],22
2954 /* 0x0ae4 */ prefetch [%i2+320],22
2955 /* 0x0ae8 */ prefetch [%i2+384],22
2956 /* 0x0aec */ ld [%l2-4],%l7
2957 /* 0x0af0 */ ld [%o5-4],%l6
2958 /* 0x0af4 */ prefetch [%o5+440],22
2959 /* 0x0af8 */ prefetch [%o5+504],22
2960 /* 0x0afc */ ld [%i2],%i2
2961 /* 0x0b00 */ sub %i2,%l7,%g3
2962 /* 0x0b04 */ st %g3,[%o5-8]
2963 /* 0x0b08 */ srax %g3,32,%l7
2964 .L900000833:
2965 /* 0x0b0c 350 */ add %l5,8,%l5
2966 /* 0x0b10 */ add %o5,32,%o5
2967 /* 0x0b14 */ ld [%l2],%i5
2968 /* 0x0b18 */ prefetch [%o5+496],22
2969 /* 0x0b1c */ cmp %l5,%i0
2970 /* 0x0b20 */ add %l2,32,%l2
2971 /* 0x0b24 */ sub %l6,%i5,%g5
2972 /* 0x0b28 */ add %g5,%l7,%o0
2973 /* 0x0b2c */ ld [%o5-32],%l4
2974 /* 0x0b30 */ st %o0,[%o5-36]
2975 /* 0x0b34 */ srax %o0,32,%i3
2976 /* 0x0b38 */ ld [%l2-28],%i1
2977 /* 0x0b3c */ sub %l4,%i1,%i4
2978 /* 0x0b40 */ add %i4,%i3,%o1
2979 /* 0x0b44 */ ld [%o5-28],%o3
2980 /* 0x0b48 */ st %o1,[%o5-32]
2981 /* 0x0b4c */ srax %o1,32,%l1
2982 /* 0x0b50 */ ld [%l2-24],%o2
2983 /* 0x0b54 */ sub %o3,%o2,%g2
2984 /* 0x0b58 */ add %g2,%l1,%o7
2985 /* 0x0b5c */ ld [%o5-24],%l0
2986 /* 0x0b60 */ st %o7,[%o5-28]
2987 /* 0x0b64 */ srax %o7,32,%l6
2988 /* 0x0b68 */ ld [%l2-20],%o4
2989 /* 0x0b6c */ sub %l0,%o4,%g1
2990 /* 0x0b70 */ add %g1,%l6,%l7
2991 /* 0x0b74 */ ld [%o5-20],%i2
2992 /* 0x0b78 */ st %l7,[%o5-24]
2993 /* 0x0b7c */ srax %l7,32,%g4
2994 /* 0x0b80 */ ld [%l2-16],%g3
2995 /* 0x0b84 */ sub %i2,%g3,%i5
2996 /* 0x0b88 */ add %i5,%g4,%g5
2997 /* 0x0b8c */ ld [%o5-16],%i1
2998 /* 0x0b90 */ st %g5,[%o5-20]
2999 /* 0x0b94 */ srax %g5,32,%l4
3000 /* 0x0b98 */ ld [%l2-12],%o0
3001 /* 0x0b9c */ sub %i1,%o0,%i3
3002 /* 0x0ba0 */ add %i3,%l4,%i4
3003 /* 0x0ba4 */ ld [%o5-12],%o2
3004 /* 0x0ba8 */ st %i4,[%o5-16]
3005 /* 0x0bac */ srax %i4,32,%o3
3006 /* 0x0bb0 */ ld [%l2-8],%o1
3007 /* 0x0bb4 */ sub %o2,%o1,%l1
3008 /* 0x0bb8 */ add %l1,%o3,%g2
3009 /* 0x0bbc */ ld [%o5-8],%o4
3010 /* 0x0bc0 */ st %g2,[%o5-12]
3011 /* 0x0bc4 */ srax %g2,32,%l0
3012 /* 0x0bc8 */ ld [%l2-4],%o7
3013 /* 0x0bcc */ sub %o4,%o7,%l6
3014 /* 0x0bd0 */ add %l6,%l0,%g1
3015 /* 0x0bd4 */ ld [%o5-4],%l6
3016 /* 0x0bd8 */ st %g1,[%o5-8]
3017 /* 0x0bdc */ ble,pt %icc,.L900000833
3018 /* 0x0be0 */ srax %g1,32,%l7
3019 .L900000836:
3020 /* 0x0be4 350 */ ld [%l2],%l0
3021 /* 0x0be8 */ add %l2,4,%i0
3022 /* 0x0bec */ or %g0,%o5,%i2
3023 /* 0x0bf0 */ cmp %l5,%l3
3024 /* 0x0bf4 */ sub %l6,%l0,%l6
3025 /* 0x0bf8 */ add %l6,%l7,%g1
3026 /* 0x0bfc */ st %g1,[%o5-4]
3027 /* 0x0c00 */ bg,pn %icc,.L77000379
3028 /* 0x0c04 */ srax %g1,32,%g1
3029 .L77000487:
3030 /* 0x0c08 350 */ ld [%i2],%o4
3031 .L900000841:
3032 /* 0x0c0c 350 */ ld [%i0],%i3
3033 /* 0x0c10 */ add %g1,%o4,%l0
3034 /* 0x0c14 */ add %l5,1,%l5
3035 /* 0x0c18 */ cmp %l5,%l3
3036 /* 0x0c1c */ add %i0,4,%i0
3037 /* 0x0c20 */ sub %l0,%i3,%l6
3038 /* 0x0c24 */ st %l6,[%i2]
3039 /* 0x0c28 */ srax %l6,32,%g1
3040 /* 0x0c2c */ add %i2,4,%i2
3041 /* 0x0c30 */ ble,a,pt %icc,.L900000841
3042 /* 0x0c34 */ ld [%i2],%o4
3043 .L77000379:
3044 /* 0x0c38 405 */ ret ! Result =
3045 /* 0x0c3c */ restore %g0,%g0,%g0
3046 /* 0x0c40 0 */ .type mont_mulf_noconv,2
3047 /* 0x0c40 0 */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
3049 ! Begin Disassembling Debug Info
3050 .xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0
3051 .xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0
3053 ! End Disassembling Debug Info
3055 ! Begin Disassembling Ident
3056 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
3057 .ident "@(#)mont_mulf.c\t1.2\t01/09/24 SMI" ! (/tmp/acompAAApja4Fx:8)
3058 .ident "@(#)types.h\t1.74\t03/08/07 SMI" ! (/tmp/acompAAApja4Fx:9)
3059 .ident "@(#)isa_defs.h\t1.20\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:10)
3060 .ident "@(#)feature_tests.h\t1.18\t99/07/26 SMI" ! (/tmp/acompAAApja4Fx:11)
3061 .ident "@(#)machtypes.h\t1.13\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:12)
3062 .ident "@(#)inttypes.h\t1.2\t98/01/16 SMI" ! (/tmp/acompAAApja4Fx:13)
3063 .ident "@(#)int_types.h\t1.6\t97/08/20 SMI" ! (/tmp/acompAAApja4Fx:14)
3064 .ident "@(#)int_limits.h\t1.6\t99/08/06 SMI" ! (/tmp/acompAAApja4Fx:15)
3065 .ident "@(#)int_const.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:16)
3066 .ident "@(#)int_fmtio.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:17)
3067 .ident "@(#)types32.h\t1.4\t98/02/13 SMI" ! (/tmp/acompAAApja4Fx:18)
3068 .ident "@(#)select.h\t1.17\t01/08/15 SMI" ! (/tmp/acompAAApja4Fx:19)
3069 .ident "@(#)math.h\t2.11\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:20)
3070 .ident "@(#)math_iso.h\t1.2\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:21)
3071 .ident "@(#)floatingpoint.h\t2.5\t99/06/22 SMI" ! (/tmp/acompAAApja4Fx:22)
3072 .ident "@(#)stdio_tag.h\t1.3\t98/04/20 SMI" ! (/tmp/acompAAApja4Fx:23)
3073 .ident "@(#)ieeefp.h\t2.8 99/10/29" ! (/tmp/acompAAApja4Fx:24)
3074 .ident "acomp: Sun C 5.5 Patch 112760-07 2004/02/03" ! (/tmp/acompAAApja4Fx:57)
3075 .ident "iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (/tmp/acompAAApja4Fx:58)
3076 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
3077 ! End Disassembling Ident
3079 #define FZERO \
3080 fzero %f0 ;\
3081 fzero %f2 ;\
3082 faddd %f0, %f2, %f4 ;\
3083 fmuld %f0, %f2, %f6 ;\
3084 faddd %f0, %f2, %f8 ;\
3085 fmuld %f0, %f2, %f10 ;\
3086 faddd %f0, %f2, %f12 ;\
3087 fmuld %f0, %f2, %f14 ;\
3088 faddd %f0, %f2, %f16 ;\
3089 fmuld %f0, %f2, %f18 ;\
3090 faddd %f0, %f2, %f20 ;\
3091 fmuld %f0, %f2, %f22 ;\
3092 faddd %f0, %f2, %f24 ;\
3093 fmuld %f0, %f2, %f26 ;\
3094 faddd %f0, %f2, %f28 ;\
3095 fmuld %f0, %f2, %f30 ;\
3096 faddd %f0, %f2, %f32 ;\
3097 fmuld %f0, %f2, %f34 ;\
3098 faddd %f0, %f2, %f36 ;\
3099 fmuld %f0, %f2, %f38 ;\
3100 faddd %f0, %f2, %f40 ;\
3101 fmuld %f0, %f2, %f42 ;\
3102 faddd %f0, %f2, %f44 ;\
3103 fmuld %f0, %f2, %f46 ;\
3104 faddd %f0, %f2, %f48 ;\
3105 fmuld %f0, %f2, %f50 ;\
3106 faddd %f0, %f2, %f52 ;\
3107 fmuld %f0, %f2, %f54 ;\
3108 faddd %f0, %f2, %f56 ;\
3109 fmuld %f0, %f2, %f58 ;\
3110 faddd %f0, %f2, %f60 ;\
3111 fmuld %f0, %f2, %f62
3113 #include "assym.h"
3116 * In the routine below, we check/set FPRS_FEF bit since
3117 * we don't want to take a fp_disabled trap. We need not
3118 * check/set PSTATE_PEF bit as it is done early during boot.
3120 ENTRY(big_savefp)
3121 rd %fprs, %o2
3122 st %o2, [%o0 + FPU_FPRS]
3123 andcc %o2, FPRS_FEF, %g0 ! is FPRS_FEF set?
3124 bnz,a,pt %icc, .fregs_save ! yes, go to save
3126 wr %g0, FPRS_FEF, %fprs ! else, set the bit
3127 stx %fsr, [%o0 + FPU_FSR] ! store %fsr
3128 retl
3130 .fregs_save:
3131 BSTORE_FPREGS(%o0, %o4)
3132 stx %fsr, [%o0 + FPU_FSR] ! store %fsr
3133 retl
3135 SET_SIZE(big_savefp)
3138 ENTRY(big_restorefp)
3139 ldx [%o0 + FPU_FSR], %fsr ! restore %fsr
3140 ld [%o0 + FPU_FPRS], %o1
3141 andcc %o1, FPRS_FEF, %g0 ! is FPRS_FEF set in saved %fprs?
3142 bnz,pt %icc, .fregs_restore ! yes, go to restore
3144 FZERO ! zero out to avoid leaks
3145 wr %g0, 0, %fprs
3146 retl
3148 .fregs_restore:
3149 BLOAD_FPREGS(%o0, %o2)
3150 wr %o1, 0, %fprs
3151 retl
3153 SET_SIZE(big_restorefp)
3155 #endif /* lint || __lint */