dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / common / bignum / sun4u / mont_mulf_kernel_v9.s
blob40672c478b06db36bb95c4dbba055bc44204bbff
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * This file is mostly a result of compiling the mont_mulf.c file to generate an
28 * assembly output and then hand-editing that output to replace the
29 * compiler-generated loop for the 512-bit case (nlen == 16) in the
30 * mont_mulf_noconv routine with a hand-crafted version. This file also
31 * has big_savefp() and big_restorefp() routines added by hand.
34 #include <sys/asm_linkage.h>
35 #include <sys/trap.h>
36 #include <sys/stack.h>
37 #include <sys/privregs.h>
38 #include <sys/regset.h>
39 #include <sys/vis.h>
40 #include <sys/machthread.h>
41 #include <sys/machtrap.h>
42 #include <sys/machsig.h>
45 .section ".text",#alloc,#execinstr
46 .file "mont_mulf.c"
48 .section ".bss",#alloc,#write
49 Bbss.bss:
51 .section ".data",#alloc,#write
52 Ddata.data:
54 .section ".rodata",#alloc
56 ! CONSTANT POOL
58 Drodata.rodata:
59 .global TwoTo16
60 .align 8
62 ! CONSTANT POOL
64 .global TwoTo16
65 TwoTo16:
66 .word 1089470464
67 .word 0
68 .type TwoTo16,#object
69 .size TwoTo16,8
70 .global TwoToMinus16
72 ! CONSTANT POOL
74 .global TwoToMinus16
75 TwoToMinus16:
76 .word 1055916032
77 .word 0
78 .type TwoToMinus16,#object
79 .size TwoToMinus16,8
80 .global Zero
82 ! CONSTANT POOL
84 .global Zero
85 Zero:
86 .word 0
87 .word 0
88 .type Zero,#object
89 .size Zero,8
90 .global TwoTo32
92 ! CONSTANT POOL
94 .global TwoTo32
95 TwoTo32:
96 .word 1106247680
97 .word 0
98 .type TwoTo32,#object
99 .size TwoTo32,8
100 .global TwoToMinus32
102 ! CONSTANT POOL
104 .global TwoToMinus32
105 TwoToMinus32:
106 .word 1039138816
107 .word 0
108 .type TwoToMinus32,#object
109 .size TwoToMinus32,8
111 .section ".text",#alloc,#execinstr
112 /* 000000 0 */ .register %g3,#scratch
113 /* 000000 */ .register %g2,#scratch
114 /* 000000 0 */ .align 32
115 ! FILE mont_mulf.c
117 ! 1 !/*
118 ! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
119 ! 3 ! * Use is subject to license terms.
120 ! 4 ! */
121 ! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI"
122 ! 9 !/*
123 ! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
124 ! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
125 ! 12 ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
126 ! 13 ! */
127 ! 15 !#include <sys/types.h>
128 ! 16 !#include <math.h>
129 ! 18 !static const double TwoTo16 = 65536.0;
130 ! 19 !static const double TwoToMinus16 = 1.0/65536.0;
131 ! 20 !static const double Zero = 0.0;
132 ! 21 !static const double TwoTo32 = 65536.0 * 65536.0;
133 ! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
134 ! 24 !#ifdef RF_INLINE_MACROS
135 ! 26 !double upper32(double);
136 ! 27 !double lower32(double, double);
137 ! 28 !double mod(double, double, double);
138 ! 30 !#else
139 ! 32 !static double
140 ! 33 !upper32(double x)
141 ! 34 !{
142 ! 35 ! return (floor(x * TwoToMinus32));
143 ! 36 !}
144 ! 39 !/* ARGSUSED */
145 ! 40 !static double
146 ! 41 !lower32(double x, double y)
147 ! 42 !{
148 ! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32));
149 ! 44 !}
150 ! 46 !static double
151 ! 47 !mod(double x, double oneoverm, double m)
152 ! 48 !{
153 ! 49 ! return (x - m * floor(x * oneoverm));
154 ! 50 !}
155 ! 52 !#endif
156 ! 55 !static void
157 ! 56 !cleanup(double *dt, int from, int tlen)
158 ! 57 !{
161 ! SUBROUTINE cleanup
163 ! OFFSET SOURCE LINE LABEL INSTRUCTION
165 cleanup:
166 /* 000000 57 */ sra %o1,0,%o4
167 /* 0x0004 */ sra %o2,0,%o5
169 ! 58 ! int i;
170 ! 59 ! double tmp, tmp1, x, x1;
171 ! 61 ! tmp = tmp1 = Zero;
173 /* 0x0008 61 */ sll %o5,1,%g5
175 ! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) {
177 /* 0x000c 63 */ sll %o4,1,%g3
178 /* 0x0010 */ cmp %g3,%g5
179 /* 0x0014 */ bge,pn %icc,.L77000188
180 /* 0x0018 0 */ sethi %hi(Zero),%o3
181 .L77000197:
182 /* 0x001c 63 */ ldd [%o3+%lo(Zero)],%f8
183 /* 0x0020 */ sra %g3,0,%o1
184 /* 0x0024 */ sub %g5,1,%g2
185 /* 0x0028 */ sllx %o1,3,%g4
187 ! 64 ! x = dt[i];
189 /* 0x002c 64 */ ldd [%g4+%o0],%f10
190 /* 0x0030 63 */ add %g4,%o0,%g1
191 /* 0x0034 */ fmovd %f8,%f18
192 /* 0x0038 */ fmovd %f8,%f16
194 ! 65 ! x1 = dt[i + 1];
195 ! 66 ! dt[i] = lower32(x, Zero) + tmp;
197 .L900000110:
198 /* 0x003c 66 */ fdtox %f10,%f0
199 /* 0x0040 65 */ ldd [%g1+8],%f12
201 ! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1;
202 ! 68 ! tmp = upper32(x);
203 ! 69 ! tmp1 = upper32(x1);
205 /* 0x0044 69 */ add %g3,2,%g3
206 /* 0x0048 */ cmp %g3,%g2
207 /* 0x004c 67 */ fdtox %f12,%f2
208 /* 0x0050 68 */ fmovd %f0,%f4
209 /* 0x0054 66 */ fmovs %f8,%f0
210 /* 0x0058 67 */ fmovs %f8,%f2
211 /* 0x005c 66 */ fxtod %f0,%f0
212 /* 0x0060 67 */ fxtod %f2,%f2
213 /* 0x0064 69 */ fdtox %f12,%f6
214 /* 0x0068 66 */ faddd %f0,%f18,%f10
215 /* 0x006c */ std %f10,[%g1]
216 /* 0x0070 67 */ faddd %f2,%f16,%f14
217 /* 0x0074 */ std %f14,[%g1+8]
218 /* 0x0078 68 */ fitod %f4,%f18
219 /* 0x007c 69 */ add %g1,16,%g1
220 /* 0x0080 */ fitod %f6,%f16
221 /* 0x0084 */ ble,a,pt %icc,.L900000110
222 /* 0x0088 64 */ ldd [%g1],%f10
223 .L77000188:
224 /* 0x008c 69 */ retl ! Result =
225 /* 0x0090 */ nop
226 /* 0x0094 0 */ .type cleanup,2
227 /* 0x0094 0 */ .size cleanup,(.-cleanup)
229 .section ".text",#alloc,#execinstr
230 /* 000000 0 */ .align 8
231 /* 000000 */ .skip 24
232 /* 0x0018 */ .align 32
234 ! 70 ! }
235 ! 71 !}
236 ! 75 !#ifdef _KERNEL
237 ! 76 !/*
238 ! 77 ! * This only works if 0 <= d < 2^53
239 ! 78 ! */
240 ! 79 !uint64_t
241 ! 80 !double2uint64_t(double* d)
242 ! 81 !{
243 ! 82 ! uint64_t x;
244 ! 83 ! uint64_t exp;
245 ! 84 ! uint64_t man;
246 ! 86 ! x = *((uint64_t *)d);
249 ! SUBROUTINE double2uint64_t
251 ! OFFSET SOURCE LINE LABEL INSTRUCTION
253 .global double2uint64_t
254 double2uint64_t:
255 /* 000000 86 */ ldx [%o0],%o2
257 ! 87 ! if (x == 0) {
259 /* 0x0004 87 */ cmp %o2,0
260 /* 0x0008 */ bne,pn %xcc,.L900000206
261 /* 0x000c 94 */ sethi %hi(0xfff00000),%o5
262 .L77000202:
263 /* 0x0010 94 */ retl ! Result = %o0
265 ! 88 ! return (0ULL);
267 /* 0x0014 88 */ or %g0,0,%o0
269 ! 89 ! }
270 ! 90 ! exp = (x >> 52) - 1023;
271 ! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL;
272 ! 92 ! x = man >> (52 - exp);
273 ! 94 ! return (x);
275 .L900000206:
276 /* 0x0018 94 */ sllx %o5,32,%o4
277 /* 0x001c */ srlx %o2,52,%o0
278 /* 0x0020 */ sethi %hi(0x40000000),%o1
279 /* 0x0024 */ or %g0,1023,%g5
280 /* 0x0028 */ sllx %o1,22,%g4
281 /* 0x002c */ xor %o4,-1,%o3
282 /* 0x0030 */ sub %g5,%o0,%g3
283 /* 0x0034 */ and %o2,%o3,%g2
284 /* 0x0038 */ or %g2,%g4,%o5
285 /* 0x003c */ add %g3,52,%g1
286 /* 0x0040 */ retl ! Result = %o0
287 /* 0x0044 */ srlx %o5,%g1,%o0
288 /* 0x0048 0 */ .type double2uint64_t,2
289 /* 0x0048 0 */ .size double2uint64_t,(.-double2uint64_t)
291 .section ".text",#alloc,#execinstr
292 /* 000000 0 */ .align 8
293 /* 000000 */ .skip 24
294 /* 0x0018 */ .align 32
296 ! 95 !}
297 ! 96 !#else
298 ! 97 !/*
299 ! 98 ! * This only works if 0 <= d < 2^63
300 ! 99 ! */
301 ! 100 !uint64_t
302 ! 101 !double2uint64_t(double* d)
303 ! 102 !{
304 ! 103 ! return ((int64_t)(*d));
305 ! 104 !}
306 ! 105 !#endif
307 ! 107 !/* ARGSUSED */
308 ! 108 !void
309 ! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
310 ! 110 !{
313 ! SUBROUTINE conv_d16_to_i32
315 ! OFFSET SOURCE LINE LABEL INSTRUCTION
317 .global conv_d16_to_i32
318 conv_d16_to_i32:
319 /* 000000 110 */ save %sp,-176,%sp
321 ! 111 ! int i;
322 ! 112 ! int64_t t, t1, /* using int64_t and not uint64_t */
323 ! 113 ! a, b, c, d; /* because more efficient code is */
324 ! 114 ! /* generated this way, and there */
325 ! 115 ! /* is no overflow */
326 ! 116 ! t1 = 0;
327 ! 117 ! a = double2uint64_t(&(d16[0]));
329 /* 0x0004 117 */ ldx [%i1],%o0
330 /* 0x0008 118 */ ldx [%i1+8],%i2
331 /* 0x000c 117 */ cmp %o0,0
332 /* 0x0010 */ bne,pn %xcc,.L77000216
333 /* 0x0014 */ or %g0,0,%i4
334 .L77000215:
335 /* 0x0018 117 */ ba .L900000316
336 /* 0x001c 118 */ cmp %i2,0
337 .L77000216:
338 /* 0x0020 117 */ srlx %o0,52,%o5
339 /* 0x0024 */ sethi %hi(0xfff00000),%i4
340 /* 0x0028 */ sllx %i4,32,%o2
341 /* 0x002c */ sethi %hi(0x40000000),%o7
342 /* 0x0030 */ sllx %o7,22,%o3
343 /* 0x0034 */ or %g0,1023,%o4
344 /* 0x0038 */ xor %o2,-1,%g5
345 /* 0x003c */ sub %o4,%o5,%l0
346 /* 0x0040 */ and %o0,%g5,%o1
347 /* 0x0044 */ add %l0,52,%l1
348 /* 0x0048 */ or %o1,%o3,%g4
350 ! 118 ! b = double2uint64_t(&(d16[1]));
352 /* 0x004c 118 */ cmp %i2,0
353 /* 0x0050 117 */ srlx %g4,%l1,%i4
354 .L900000316:
355 /* 0x0054 118 */ bne,pn %xcc,.L77000222
356 /* 0x0058 134 */ sub %i3,1,%l3
357 .L77000221:
358 /* 0x005c 118 */ or %g0,0,%i2
359 /* 0x0060 */ ba .L900000315
360 /* 0x0064 116 */ or %g0,0,%o3
361 .L77000222:
362 /* 0x0068 118 */ srlx %i2,52,%l6
363 /* 0x006c */ sethi %hi(0xfff00000),%g4
364 /* 0x0070 */ sllx %g4,32,%i5
365 /* 0x0074 */ sethi %hi(0x40000000),%l5
366 /* 0x0078 */ xor %i5,-1,%l4
367 /* 0x007c */ or %g0,1023,%l2
368 /* 0x0080 */ and %i2,%l4,%l7
369 /* 0x0084 */ sllx %l5,22,%i2
370 /* 0x0088 */ sub %l2,%l6,%g1
371 /* 0x008c */ or %l7,%i2,%g3
372 /* 0x0090 */ add %g1,52,%g2
373 /* 0x0094 116 */ or %g0,0,%o3
374 /* 0x0098 118 */ srlx %g3,%g2,%i2
376 ! 119 ! for (i = 0; i < ilen - 1; i++) {
378 .L900000315:
379 /* 0x009c 119 */ cmp %l3,0
380 /* 0x00a0 */ ble,pn %icc,.L77000210
381 /* 0x00a4 */ or %g0,0,%l4
382 .L77000245:
383 /* 0x00a8 118 */ sethi %hi(0xfff00000),%l7
384 /* 0x00ac */ or %g0,-1,%l6
385 /* 0x00b0 */ sllx %l7,32,%l3
386 /* 0x00b4 */ srl %l6,0,%l6
387 /* 0x00b8 */ sethi %hi(0x40000000),%l1
388 /* 0x00bc */ sethi %hi(0xfc00),%l2
389 /* 0x00c0 */ xor %l3,-1,%l7
390 /* 0x00c4 */ sllx %l1,22,%l3
391 /* 0x00c8 */ sub %i3,2,%l5
392 /* 0x00cc */ add %l2,1023,%l2
393 /* 0x00d0 */ or %g0,2,%g2
394 /* 0x00d4 */ or %g0,%i0,%g1
396 ! 120 ! c = double2uint64_t(&(d16[2 * i + 2]));
398 .L77000208:
399 /* 0x00d8 120 */ sra %g2,0,%g3
400 /* 0x00dc 123 */ add %g2,1,%o2
401 /* 0x00e0 120 */ sllx %g3,3,%i3
403 ! 121 ! t1 += a & 0xffffffff;
404 ! 122 ! t = (a >> 32);
405 ! 123 ! d = double2uint64_t(&(d16[2 * i + 3]));
407 /* 0x00e4 123 */ sra %o2,0,%g5
408 /* 0x00e8 120 */ ldx [%i1+%i3],%o5
409 /* 0x00ec 123 */ sllx %g5,3,%o0
410 /* 0x00f0 121 */ and %i4,%l6,%g4
411 /* 0x00f4 123 */ ldx [%i1+%o0],%i3
412 /* 0x00f8 120 */ cmp %o5,0
413 /* 0x00fc */ bne,pn %xcc,.L77000228
414 /* 0x0100 124 */ and %i2,%l2,%i5
415 .L77000227:
416 /* 0x0104 120 */ or %g0,0,%l1
417 /* 0x0108 */ ba .L900000314
418 /* 0x010c 121 */ add %o3,%g4,%o0
419 .L77000228:
420 /* 0x0110 120 */ srlx %o5,52,%o7
421 /* 0x0114 */ and %o5,%l7,%o5
422 /* 0x0118 */ or %g0,52,%l0
423 /* 0x011c */ sub %o7,1023,%o4
424 /* 0x0120 */ or %o5,%l3,%l1
425 /* 0x0124 */ sub %l0,%o4,%o1
426 /* 0x0128 */ srlx %l1,%o1,%l1
427 /* 0x012c 121 */ add %o3,%g4,%o0
428 .L900000314:
429 /* 0x0130 122 */ srax %i4,32,%g3
430 /* 0x0134 123 */ cmp %i3,0
431 /* 0x0138 */ bne,pn %xcc,.L77000234
432 /* 0x013c 124 */ sllx %i5,16,%g5
433 .L77000233:
434 /* 0x0140 123 */ or %g0,0,%o2
435 /* 0x0144 */ ba .L900000313
436 /* 0x0148 124 */ add %o0,%g5,%o7
437 .L77000234:
438 /* 0x014c 123 */ srlx %i3,52,%o2
439 /* 0x0150 */ and %i3,%l7,%i4
440 /* 0x0154 */ sub %o2,1023,%o1
441 /* 0x0158 */ or %g0,52,%g4
442 /* 0x015c */ sub %g4,%o1,%i5
443 /* 0x0160 */ or %i4,%l3,%i3
444 /* 0x0164 */ srlx %i3,%i5,%o2
446 ! 124 ! t1 += (b & 0xffff) << 16;
448 /* 0x0168 124 */ add %o0,%g5,%o7
450 ! 125 ! t += (b >> 16) + (t1 >> 32);
452 .L900000313:
453 /* 0x016c 125 */ srax %i2,16,%l0
454 /* 0x0170 */ srax %o7,32,%o4
455 /* 0x0174 */ add %l0,%o4,%o3
457 ! 126 ! i32[i] = t1 & 0xffffffff;
458 ! 127 ! t1 = t;
459 ! 128 ! a = c;
460 ! 129 ! b = d;
462 /* 0x0178 129 */ add %l4,1,%l4
463 /* 0x017c 126 */ and %o7,%l6,%o5
464 /* 0x0180 125 */ add %g3,%o3,%o3
465 /* 0x0184 126 */ st %o5,[%g1]
466 /* 0x0188 128 */ or %g0,%l1,%i4
467 /* 0x018c 129 */ or %g0,%o2,%i2
468 /* 0x0190 */ add %g2,2,%g2
469 /* 0x0194 */ cmp %l4,%l5
470 /* 0x0198 */ ble,pt %icc,.L77000208
471 /* 0x019c */ add %g1,4,%g1
473 ! 130 ! }
474 ! 131 ! t1 += a & 0xffffffff;
475 ! 132 ! t = (a >> 32);
476 ! 133 ! t1 += (b & 0xffff) << 16;
477 ! 134 ! i32[i] = t1 & 0xffffffff;
479 .L77000210:
480 /* 0x01a0 134 */ sra %l4,0,%l4
481 /* 0x01a4 */ sethi %hi(0xfc00),%i1
482 /* 0x01a8 */ add %o3,%i4,%l2
483 /* 0x01ac */ add %i1,1023,%i5
484 /* 0x01b0 */ and %i2,%i5,%l5
485 /* 0x01b4 */ sllx %l4,2,%i2
486 /* 0x01b8 */ sllx %l5,16,%l6
487 /* 0x01bc */ add %l2,%l6,%l7
488 /* 0x01c0 */ st %l7,[%i0+%i2]
489 /* 0x01c4 129 */ ret ! Result =
490 /* 0x01c8 */ restore %g0,%g0,%g0
491 /* 0x01cc 0 */ .type conv_d16_to_i32,2
492 /* 0x01cc 0 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
494 .section ".text",#alloc,#execinstr
495 /* 000000 0 */ .align 8
497 ! CONSTANT POOL
499 ___const_seg_900000401:
500 /* 000000 0 */ .word 1127219200,0
501 /* 0x0008 */ .word 1127219200
502 /* 0x000c 0 */ .type ___const_seg_900000401,1
503 /* 0x000c 0 */ .size ___const_seg_900000401,(.-___const_seg_900000401)
504 /* 0x000c 0 */ .align 8
505 /* 0x0010 */ .skip 24
506 /* 0x0028 */ .align 32
508 ! 135 !}
509 ! 138 !void
510 ! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len)
511 ! 140 !{
514 ! SUBROUTINE conv_i32_to_d32
516 ! OFFSET SOURCE LINE LABEL INSTRUCTION
518 .global conv_i32_to_d32
519 conv_i32_to_d32:
520 /* 000000 140 */ orcc %g0,%o2,%o2
522 ! 141 ! int i;
523 ! 143 !#pragma pipeloop(0)
524 ! 144 ! for (i = 0; i < len; i++)
526 /* 0x0004 144 */ ble,pn %icc,.L77000254
527 /* 0x0008 */ sub %o2,1,%o3
528 .L77000263:
529 /* 0x000c 140 */ or %g0,%o0,%o2
531 ! 145 ! d32[i] = (double)(i32[i]);
533 /* 0x0010 145 */ add %o3,1,%o5
534 /* 0x0014 144 */ or %g0,0,%g5
535 /* 0x0018 145 */ cmp %o5,10
536 /* 0x001c */ bl,pn %icc,.L77000261
537 /* 0x0020 */ sethi %hi(___const_seg_900000401),%g4
538 .L900000407:
539 /* 0x0024 145 */ prefetch [%o1],0
540 /* 0x0028 */ prefetch [%o0],22
541 /* 0x002c */ sethi %hi(___const_seg_900000401+8),%o4
542 /* 0x0030 */ or %g0,%o0,%o2
543 /* 0x0034 */ prefetch [%o1+64],0
544 /* 0x0038 */ add %o1,8,%o0
545 /* 0x003c */ sub %o3,7,%o5
546 /* 0x0040 */ prefetch [%o2+64],22
547 /* 0x0044 */ or %g0,2,%g5
548 /* 0x0048 */ prefetch [%o2+128],22
549 /* 0x004c */ prefetch [%o2+192],22
550 /* 0x0050 */ prefetch [%o1+128],0
551 /* 0x0054 */ ld [%o4+%lo(___const_seg_900000401+8)],%f2
552 /* 0x0058 */ ldd [%g4+%lo(___const_seg_900000401)],%f16
553 /* 0x005c */ fmovs %f2,%f0
554 /* 0x0060 */ prefetch [%o2+256],22
555 /* 0x0064 */ prefetch [%o2+320],22
556 /* 0x0068 */ ld [%o1],%f3
557 /* 0x006c */ prefetch [%o1+192],0
558 /* 0x0070 */ ld [%o1+4],%f1
559 .L900000405:
560 /* 0x0074 145 */ prefetch [%o0+188],0
561 /* 0x0078 */ fsubd %f2,%f16,%f22
562 /* 0x007c */ add %g5,8,%g5
563 /* 0x0080 */ add %o0,32,%o0
564 /* 0x0084 */ ld [%o4+%lo(___const_seg_900000401+8)],%f4
565 /* 0x0088 */ std %f22,[%o2]
566 /* 0x008c */ cmp %g5,%o5
567 /* 0x0090 */ ld [%o0-32],%f5
568 /* 0x0094 */ fsubd %f0,%f16,%f24
569 /* 0x0098 */ add %o2,64,%o2
570 /* 0x009c */ fmovs %f4,%f0
571 /* 0x00a0 */ std %f24,[%o2-56]
572 /* 0x00a4 */ ld [%o0-28],%f1
573 /* 0x00a8 */ fsubd %f4,%f16,%f26
574 /* 0x00ac */ fmovs %f0,%f6
575 /* 0x00b0 */ prefetch [%o2+312],22
576 /* 0x00b4 */ std %f26,[%o2-48]
577 /* 0x00b8 */ ld [%o0-24],%f7
578 /* 0x00bc */ fsubd %f0,%f16,%f28
579 /* 0x00c0 */ fmovs %f6,%f8
580 /* 0x00c4 */ std %f28,[%o2-40]
581 /* 0x00c8 */ ld [%o0-20],%f9
582 /* 0x00cc */ fsubd %f6,%f16,%f30
583 /* 0x00d0 */ fmovs %f8,%f10
584 /* 0x00d4 */ std %f30,[%o2-32]
585 /* 0x00d8 */ ld [%o0-16],%f11
586 /* 0x00dc */ prefetch [%o2+344],22
587 /* 0x00e0 */ fsubd %f8,%f16,%f48
588 /* 0x00e4 */ fmovs %f10,%f12
589 /* 0x00e8 */ std %f48,[%o2-24]
590 /* 0x00ec */ ld [%o0-12],%f13
591 /* 0x00f0 */ fsubd %f10,%f16,%f50
592 /* 0x00f4 */ fmovs %f12,%f2
593 /* 0x00f8 */ std %f50,[%o2-16]
594 /* 0x00fc */ ld [%o0-8],%f3
595 /* 0x0100 */ fsubd %f12,%f16,%f52
596 /* 0x0104 */ fmovs %f2,%f0
597 /* 0x0108 */ std %f52,[%o2-8]
598 /* 0x010c */ ble,pt %icc,.L900000405
599 /* 0x0110 */ ld [%o0-4],%f1
600 .L900000408:
601 /* 0x0114 145 */ fsubd %f2,%f16,%f18
602 /* 0x0118 */ add %o2,16,%o2
603 /* 0x011c */ cmp %g5,%o3
604 /* 0x0120 */ std %f18,[%o2-16]
605 /* 0x0124 */ fsubd %f0,%f16,%f20
606 /* 0x0128 */ or %g0,%o0,%o1
607 /* 0x012c */ bg,pn %icc,.L77000254
608 /* 0x0130 */ std %f20,[%o2-8]
609 .L77000261:
610 /* 0x0134 145 */ ld [%o1],%f15
611 .L900000409:
612 /* 0x0138 145 */ sethi %hi(___const_seg_900000401+8),%o4
613 /* 0x013c */ ldd [%g4+%lo(___const_seg_900000401)],%f16
614 /* 0x0140 */ add %g5,1,%g5
615 /* 0x0144 */ ld [%o4+%lo(___const_seg_900000401+8)],%f14
616 /* 0x0148 */ add %o1,4,%o1
617 /* 0x014c */ cmp %g5,%o3
618 /* 0x0150 */ fsubd %f14,%f16,%f54
619 /* 0x0154 */ std %f54,[%o2]
620 /* 0x0158 */ add %o2,8,%o2
621 /* 0x015c */ ble,a,pt %icc,.L900000409
622 /* 0x0160 */ ld [%o1],%f15
623 .L77000254:
624 /* 0x0164 145 */ retl ! Result =
625 /* 0x0168 */ nop
626 /* 0x016c 0 */ .type conv_i32_to_d32,2
627 /* 0x016c 0 */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
629 .section ".text",#alloc,#execinstr
630 /* 000000 0 */ .align 8
632 ! CONSTANT POOL
634 ___const_seg_900000501:
635 /* 000000 0 */ .word 1127219200,0
636 /* 0x0008 */ .word 1127219200
637 /* 0x000c 0 */ .type ___const_seg_900000501,1
638 /* 0x000c 0 */ .size ___const_seg_900000501,(.-___const_seg_900000501)
639 /* 0x000c 0 */ .align 8
640 /* 0x0010 */ .skip 24
641 /* 0x0028 */ .align 32
643 ! 146 !}
644 ! 149 !void
645 ! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len)
646 ! 151 !{
649 ! SUBROUTINE conv_i32_to_d16
651 ! OFFSET SOURCE LINE LABEL INSTRUCTION
653 .global conv_i32_to_d16
654 conv_i32_to_d16:
655 /* 000000 151 */ save %sp,-368,%sp
656 /* 0x0004 */ orcc %g0,%i2,%i2
658 ! 152 ! int i;
659 ! 153 ! uint32_t a;
660 ! 155 !#pragma pipeloop(0)
661 ! 156 ! for (i = 0; i < len; i++) {
663 /* 0x0008 156 */ ble,pn %icc,.L77000272
664 /* 0x000c */ sub %i2,1,%l6
665 .L77000281:
666 /* 0x0010 156 */ sethi %hi(0xfc00),%i3
668 ! 157 ! a = i32[i];
670 /* 0x0014 157 */ or %g0,%i2,%l1
671 /* 0x0018 156 */ add %i3,1023,%i4
672 /* 0x001c 157 */ cmp %i2,4
673 /* 0x0020 151 */ or %g0,%i1,%l7
674 /* 0x0024 */ or %g0,%i0,%i2
675 /* 0x0028 156 */ or %g0,0,%i5
676 /* 0x002c */ or %g0,0,%i3
677 /* 0x0030 157 */ bl,pn %icc,.L77000279
678 /* 0x0034 0 */ sethi %hi(___const_seg_900000501),%i1
679 .L900000508:
680 /* 0x0038 157 */ prefetch [%i0+8],22
681 /* 0x003c */ prefetch [%i0+72],22
682 /* 0x0040 */ or %g0,%i0,%l2
684 ! 158 ! d16[2 * i] = (double)(a & 0xffff);
686 /* 0x0044 158 */ sethi %hi(___const_seg_900000501+8),%l1
687 /* 0x0048 157 */ prefetch [%i0+136],22
688 /* 0x004c */ sub %l6,1,%i0
689 /* 0x0050 */ or %g0,0,%i3
690 /* 0x0054 */ prefetch [%i2+200],22
691 /* 0x0058 */ or %g0,2,%i5
692 /* 0x005c */ prefetch [%i2+264],22
693 /* 0x0060 */ prefetch [%i2+328],22
694 /* 0x0064 */ prefetch [%i2+392],22
695 /* 0x0068 */ ld [%l7],%l3
696 /* 0x006c */ ld [%l7+4],%l4
697 /* 0x0070 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20
699 ! 159 ! d16[2 * i + 1] = (double)(a >> 16);
701 /* 0x0074 159 */ srl %l3,16,%o1
702 /* 0x0078 158 */ and %l3,%i4,%o3
703 /* 0x007c */ st %o3,[%sp+2335]
704 /* 0x0080 159 */ srl %l4,16,%g4
705 /* 0x0084 158 */ and %l4,%i4,%o0
706 /* 0x0088 */ st %o0,[%sp+2303]
707 /* 0x008c 159 */ add %l7,8,%l7
708 /* 0x0090 */ st %o1,[%sp+2271]
709 /* 0x0094 */ st %g4,[%sp+2239]
710 /* 0x0098 157 */ prefetch [%i2+456],22
711 /* 0x009c */ prefetch [%i2+520],22
712 .L900000506:
713 /* 0x00a0 157 */ prefetch [%l2+536],22
714 /* 0x00a4 159 */ add %i5,2,%i5
715 /* 0x00a8 157 */ add %l2,32,%l2
716 /* 0x00ac */ ld [%l7],%g2
717 /* 0x00b0 159 */ cmp %i5,%i0
718 /* 0x00b4 */ add %l7,8,%l7
719 /* 0x00b8 158 */ ld [%sp+2335],%f9
720 /* 0x00bc 159 */ add %i3,4,%i3
721 /* 0x00c0 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f8
722 /* 0x00c4 159 */ ld [%sp+2271],%f11
723 /* 0x00c8 158 */ and %g2,%i4,%g3
724 /* 0x00cc 159 */ fmovs %f8,%f10
725 /* 0x00d0 158 */ st %g3,[%sp+2335]
726 /* 0x00d4 */ fsubd %f8,%f20,%f28
727 /* 0x00d8 */ std %f28,[%l2-32]
728 /* 0x00dc 159 */ srl %g2,16,%g1
729 /* 0x00e0 */ st %g1,[%sp+2271]
730 /* 0x00e4 */ fsubd %f10,%f20,%f30
731 /* 0x00e8 */ std %f30,[%l2-24]
732 /* 0x00ec 157 */ ld [%l7-4],%l0
733 /* 0x00f0 158 */ ld [%sp+2303],%f13
734 /* 0x00f4 */ ld [%l1+%lo(___const_seg_900000501+8)],%f12
735 /* 0x00f8 159 */ ld [%sp+2239],%f15
736 /* 0x00fc 158 */ and %l0,%i4,%l5
737 /* 0x0100 159 */ fmovs %f12,%f14
738 /* 0x0104 158 */ st %l5,[%sp+2303]
739 /* 0x0108 */ fsubd %f12,%f20,%f44
740 /* 0x010c */ std %f44,[%l2-16]
741 /* 0x0110 159 */ srl %l0,16,%o5
742 /* 0x0114 */ st %o5,[%sp+2239]
743 /* 0x0118 */ fsubd %f14,%f20,%f46
744 /* 0x011c */ ble,pt %icc,.L900000506
745 /* 0x0120 */ std %f46,[%l2-8]
746 .L900000509:
747 /* 0x0124 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f0
748 /* 0x0128 159 */ cmp %i5,%l6
749 /* 0x012c */ add %i3,4,%i3
750 /* 0x0130 158 */ ld [%sp+2335],%f1
751 /* 0x0134 */ ld [%sp+2303],%f5
752 /* 0x0138 159 */ fmovs %f0,%f2
753 /* 0x013c */ ld [%sp+2271],%f3
754 /* 0x0140 158 */ fmovs %f0,%f4
755 /* 0x0144 159 */ ld [%sp+2239],%f7
756 /* 0x0148 */ fmovs %f0,%f6
757 /* 0x014c 158 */ fsubd %f0,%f20,%f22
758 /* 0x0150 */ std %f22,[%l2]
759 /* 0x0154 159 */ fsubd %f2,%f20,%f24
760 /* 0x0158 */ std %f24,[%l2+8]
761 /* 0x015c 158 */ fsubd %f4,%f20,%f26
762 /* 0x0160 */ std %f26,[%l2+16]
763 /* 0x0164 159 */ fsubd %f6,%f20,%f20
764 /* 0x0168 */ bg,pn %icc,.L77000272
765 /* 0x016c */ std %f20,[%l2+24]
766 .L77000279:
767 /* 0x0170 157 */ ld [%l7],%l2
768 .L900000510:
769 /* 0x0174 158 */ and %l2,%i4,%o4
770 /* 0x0178 */ st %o4,[%sp+2399]
771 /* 0x017c 159 */ srl %l2,16,%o2
772 /* 0x0180 */ st %o2,[%sp+2367]
773 /* 0x0184 158 */ sethi %hi(___const_seg_900000501+8),%l1
774 /* 0x0188 */ sra %i3,0,%i0
775 /* 0x018c */ ld [%l1+%lo(___const_seg_900000501+8)],%f16
776 /* 0x0190 */ sllx %i0,3,%o1
777 /* 0x0194 159 */ add %i3,1,%o3
778 /* 0x0198 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20
779 /* 0x019c 159 */ sra %o3,0,%l3
780 /* 0x01a0 */ add %i5,1,%i5
781 /* 0x01a4 158 */ ld [%sp+2399],%f17
782 /* 0x01a8 159 */ sllx %l3,3,%o0
783 /* 0x01ac */ add %l7,4,%l7
784 /* 0x01b0 */ fmovs %f16,%f18
785 /* 0x01b4 */ cmp %i5,%l6
786 /* 0x01b8 */ add %i3,2,%i3
787 /* 0x01bc 158 */ fsubd %f16,%f20,%f48
788 /* 0x01c0 */ std %f48,[%i2+%o1]
789 /* 0x01c4 159 */ ld [%sp+2367],%f19
790 /* 0x01c8 */ fsubd %f18,%f20,%f50
791 /* 0x01cc */ std %f50,[%i2+%o0]
792 /* 0x01d0 */ ble,a,pt %icc,.L900000510
793 /* 0x01d4 157 */ ld [%l7],%l2
794 .L77000272:
795 /* 0x01d8 159 */ ret ! Result =
796 /* 0x01dc */ restore %g0,%g0,%g0
797 /* 0x01e0 0 */ .type conv_i32_to_d16,2
798 /* 0x01e0 0 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
800 .section ".text",#alloc,#execinstr
801 /* 000000 0 */ .align 8
803 ! CONSTANT POOL
805 ___const_seg_900000601:
806 /* 000000 0 */ .word 1127219200,0
807 /* 0x0008 */ .word 1127219200
808 /* 0x000c 0 */ .type ___const_seg_900000601,1
809 /* 0x000c 0 */ .size ___const_seg_900000601,(.-___const_seg_900000601)
810 /* 0x000c 0 */ .align 8
811 /* 0x0010 */ .skip 24
812 /* 0x0028 */ .align 32
814 ! 160 ! }
815 ! 161 !}
816 ! 163 !#ifdef RF_INLINE_MACROS
817 ! 165 !void
818 ! 166 !i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */
819 ! 167 ! const double *, /* 2^16 */
820 ! 168 ! const double *, /* 0 */
821 ! 169 ! double *, /* result16 */
822 ! 170 ! double *, /* result32 */
823 ! 171 ! float *); /* source - should be unsigned int* */
824 ! 172 ! /* converted to float* */
825 ! 174 !#else
826 ! 177 !/* ARGSUSED */
827 ! 178 !static void
828 ! 179 !i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */
829 ! 180 ! const double *dummy2, /* 2^16 */
830 ! 181 ! const double *dummy3, /* 0 */
831 ! 182 ! double *result16,
832 ! 183 ! double *result32,
833 ! 184 ! float *src) /* source - should be unsigned int* */
834 ! 185 ! /* converted to float* */
835 ! 186 !{
836 ! 187 ! uint32_t *i32;
837 ! 188 ! uint32_t a, b, c, d;
838 ! 190 ! i32 = (uint32_t *)src;
839 ! 191 ! a = i32[0];
840 ! 192 ! b = i32[1];
841 ! 193 ! c = i32[2];
842 ! 194 ! d = i32[3];
843 ! 195 ! result16[0] = (double)(a & 0xffff);
844 ! 196 ! result16[1] = (double)(a >> 16);
845 ! 197 ! result32[0] = (double)a;
846 ! 198 ! result16[2] = (double)(b & 0xffff);
847 ! 199 ! result16[3] = (double)(b >> 16);
848 ! 200 ! result32[1] = (double)b;
849 ! 201 ! result16[4] = (double)(c & 0xffff);
850 ! 202 ! result16[5] = (double)(c >> 16);
851 ! 203 ! result32[2] = (double)c;
852 ! 204 ! result16[6] = (double)(d & 0xffff);
853 ! 205 ! result16[7] = (double)(d >> 16);
854 ! 206 ! result32[3] = (double)d;
855 ! 207 !}
856 ! 209 !#endif
857 ! 212 !void
858 ! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
859 ! 214 !{
862 ! SUBROUTINE conv_i32_to_d32_and_d16
864 ! OFFSET SOURCE LINE LABEL INSTRUCTION
866 .global conv_i32_to_d32_and_d16
867 conv_i32_to_d32_and_d16:
868 /* 000000 214 */ save %sp,-368,%sp
870 ! 215 ! int i;
871 ! 216 ! uint32_t a;
872 ! 218 !#pragma pipeloop(0)
873 ! 219 ! for (i = 0; i < len - 3; i += 4) {
874 ! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
875 ! 221 ! &(d16[2*i]), &(d32[i]),
876 ! 222 ! (float *)(&(i32[i])));
877 ! 223 ! }
878 ! 224 ! for (; i < len; i++) {
879 ! 225 ! a = i32[i];
880 ! 226 ! d32[i] = (double)(i32[i]);
881 ! 227 ! d16[2 * i] = (double)(a & 0xffff);
882 ! 228 ! d16[2 * i + 1] = (double)(a >> 16);
884 /* 0x0004 228 */ sub %i3,3,%i4
885 /* 0x0008 219 */ cmp %i4,0
886 /* 0x000c */ ble,pn %icc,.L77000289
887 /* 0x0010 */ or %g0,0,%i5
888 .L77000306:
889 /* 0x0014 222 */ sethi %hi(Zero),%g3
890 /* 0x0018 */ sethi %hi(TwoToMinus16),%g2
891 /* 0x001c */ sethi %hi(TwoTo16),%o5
892 /* 0x0020 */ ldd [%g3+%lo(Zero)],%f2
893 /* 0x0024 219 */ sub %i3,4,%o4
894 /* 0x0028 */ or %g0,0,%o3
895 /* 0x002c */ or %g0,%i0,%l6
896 /* 0x0030 */ or %g0,%i2,%l5
897 .L900000615:
898 /* 0x0034 222 */ fmovd %f2,%f26
899 /* 0x0038 */ ld [%l5],%f27
900 /* 0x003c */ sra %o3,0,%o0
901 /* 0x0040 */ add %i5,4,%i5
902 /* 0x0044 */ fmovd %f2,%f28
903 /* 0x0048 */ ld [%l5+4],%f29
904 /* 0x004c */ sllx %o0,3,%g5
905 /* 0x0050 */ cmp %i5,%o4
906 /* 0x0054 */ fmovd %f2,%f30
907 /* 0x0058 */ ld [%l5+8],%f31
908 /* 0x005c */ add %i1,%g5,%g4
909 /* 0x0060 */ add %o3,8,%o3
910 /* 0x0064 */ ld [%l5+12],%f3
911 /* 0x0068 */ fxtod %f26,%f26
912 /* 0x006c */ ldd [%g2+%lo(TwoToMinus16)],%f32
913 /* 0x0070 */ fxtod %f28,%f28
914 /* 0x0074 */ add %l5,16,%l5
915 /* 0x0078 */ fxtod %f30,%f30
916 /* 0x007c */ ldd [%o5+%lo(TwoTo16)],%f34
917 /* 0x0080 */ fxtod %f2,%f2
918 /* 0x0084 */ std %f2,[%l6+24]
919 /* 0x0088 */ fmuld %f32,%f26,%f36
920 /* 0x008c */ std %f26,[%l6]
921 /* 0x0090 */ fmuld %f32,%f28,%f38
922 /* 0x0094 */ std %f28,[%l6+8]
923 /* 0x0098 */ fmuld %f32,%f30,%f40
924 /* 0x009c */ std %f30,[%l6+16]
925 /* 0x00a0 */ fmuld %f32,%f2,%f42
926 /* 0x00a4 */ add %l6,32,%l6
927 /* 0x00a8 */ fdtox %f36,%f36
928 /* 0x00ac */ fdtox %f38,%f38
929 /* 0x00b0 */ fdtox %f40,%f40
930 /* 0x00b4 */ fdtox %f42,%f42
931 /* 0x00b8 */ fxtod %f36,%f36
932 /* 0x00bc */ std %f36,[%g4+8]
933 /* 0x00c0 */ fxtod %f38,%f38
934 /* 0x00c4 */ std %f38,[%g4+24]
935 /* 0x00c8 */ fxtod %f40,%f40
936 /* 0x00cc */ std %f40,[%g4+40]
937 /* 0x00d0 */ fxtod %f42,%f42
938 /* 0x00d4 */ std %f42,[%g4+56]
939 /* 0x00d8 */ fmuld %f36,%f34,%f36
940 /* 0x00dc */ fmuld %f38,%f34,%f38
941 /* 0x00e0 */ fmuld %f40,%f34,%f40
942 /* 0x00e4 */ fmuld %f42,%f34,%f42
943 /* 0x00e8 */ fsubd %f26,%f36,%f36
944 /* 0x00ec */ std %f36,[%i1+%g5]
945 /* 0x00f0 */ fsubd %f28,%f38,%f38
946 /* 0x00f4 */ std %f38,[%g4+16]
947 /* 0x00f8 */ fsubd %f30,%f40,%f40
948 /* 0x00fc */ std %f40,[%g4+32]
949 /* 0x0100 */ fsubd %f2,%f42,%f42
950 /* 0x0104 */ std %f42,[%g4+48]
951 /* 0x0108 */ ble,a,pt %icc,.L900000615
952 /* 0x010c */ ldd [%g3+%lo(Zero)],%f2
953 .L77000289:
954 /* 0x0110 224 */ cmp %i5,%i3
955 /* 0x0114 */ bge,pn %icc,.L77000294
956 /* 0x0118 */ sethi %hi(0xfc00),%l0
957 .L77000307:
958 /* 0x011c 224 */ sra %i5,0,%l2
959 /* 0x0120 */ sll %i5,1,%i4
960 /* 0x0124 */ sllx %l2,3,%l1
961 /* 0x0128 */ sllx %l2,2,%o1
962 /* 0x012c 225 */ sub %i3,%i5,%l3
963 /* 0x0130 224 */ add %l0,1023,%l0
964 /* 0x0134 */ add %l1,%i0,%l1
965 /* 0x0138 */ add %o1,%i2,%i2
966 /* 0x013c 225 */ cmp %l3,5
967 /* 0x0140 */ bl,pn %icc,.L77000291
968 /* 0x0144 0 */ sethi %hi(___const_seg_900000601),%l7
969 .L900000612:
970 /* 0x0148 225 */ prefetch [%l1],22
971 /* 0x014c */ prefetch [%l1+64],22
972 /* 0x0150 */ sra %i4,0,%l6
973 /* 0x0154 226 */ sethi %hi(___const_seg_900000601+8),%l2
974 /* 0x0158 225 */ prefetch [%l1+128],22
975 /* 0x015c */ add %l6,-2,%l5
976 /* 0x0160 */ sub %i3,3,%i0
977 /* 0x0164 */ prefetch [%l1+192],22
978 /* 0x0168 */ sllx %l5,3,%o4
979 /* 0x016c 228 */ add %i5,1,%i5
980 /* 0x0170 225 */ add %i1,%o4,%o3
981 /* 0x0174 */ or %g0,%i3,%g1
982 /* 0x0178 */ ld [%i2],%l4
983 /* 0x017c */ prefetch [%o3+16],22
984 /* 0x0180 */ add %o3,16,%l3
985 /* 0x0184 228 */ add %i2,4,%i2
986 /* 0x0188 225 */ prefetch [%o3+80],22
987 /* 0x018c 228 */ srl %l4,16,%o1
988 /* 0x0190 227 */ and %l4,%l0,%o0
989 /* 0x0194 225 */ prefetch [%o3+144],22
990 /* 0x0198 228 */ st %o1,[%sp+2271]
991 /* 0x019c 227 */ st %o0,[%sp+2239]
992 /* 0x01a0 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32
993 /* 0x01a4 228 */ ld [%l2+%lo(___const_seg_900000601+8)],%f0
994 /* 0x01a8 225 */ prefetch [%o3+208],22
995 /* 0x01ac */ prefetch [%o3+272],22
996 /* 0x01b0 */ prefetch [%o3+336],22
997 .L900000610:
998 /* 0x01b4 225 */ prefetch [%l1+192],22
999 /* 0x01b8 228 */ add %i5,4,%i5
1000 /* 0x01bc 225 */ add %l3,64,%l3
1001 /* 0x01c0 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f8
1002 /* 0x01c4 228 */ cmp %i5,%i0
1003 /* 0x01c8 225 */ ld [%i2],%g5
1004 /* 0x01cc 228 */ add %i2,16,%i2
1005 /* 0x01d0 */ add %l1,32,%l1
1006 /* 0x01d4 */ add %i4,8,%i4
1007 /* 0x01d8 226 */ ld [%i2-20],%f7
1008 /* 0x01dc 228 */ srl %g5,16,%i3
1009 /* 0x01e0 226 */ fmovs %f8,%f6
1010 /* 0x01e4 228 */ st %i3,[%sp+2335]
1011 /* 0x01e8 227 */ and %g5,%l0,%g4
1012 /* 0x01ec */ st %g4,[%sp+2303]
1013 /* 0x01f0 226 */ fsubd %f6,%f32,%f40
1014 /* 0x01f4 227 */ ld [%sp+2239],%f9
1015 /* 0x01f8 228 */ ld [%sp+2271],%f1
1016 /* 0x01fc */ fmovs %f8,%f12
1017 /* 0x0200 226 */ std %f40,[%l1-32]
1018 /* 0x0204 227 */ fsubd %f8,%f32,%f42
1019 /* 0x0208 */ std %f42,[%l3-64]
1020 /* 0x020c 228 */ fsubd %f0,%f32,%f44
1021 /* 0x0210 */ std %f44,[%l3-56]
1022 /* 0x0214 227 */ fmovs %f12,%f10
1023 /* 0x0218 225 */ ld [%i2-12],%g2
1024 /* 0x021c 226 */ ld [%i2-16],%f1
1025 /* 0x0220 228 */ srl %g2,16,%g3
1026 /* 0x0224 226 */ fmovs %f12,%f0
1027 /* 0x0228 225 */ prefetch [%l3+320],22
1028 /* 0x022c 228 */ st %g3,[%sp+2271]
1029 /* 0x0230 227 */ and %g2,%l0,%l6
1030 /* 0x0234 */ st %l6,[%sp+2239]
1031 /* 0x0238 226 */ fsubd %f0,%f32,%f46
1032 /* 0x023c 227 */ ld [%sp+2303],%f11
1033 /* 0x0240 228 */ ld [%sp+2335],%f13
1034 /* 0x0244 */ fmovs %f12,%f18
1035 /* 0x0248 226 */ std %f46,[%l1-24]
1036 /* 0x024c 227 */ fsubd %f10,%f32,%f48
1037 /* 0x0250 */ std %f48,[%l3-48]
1038 /* 0x0254 228 */ fsubd %f12,%f32,%f50
1039 /* 0x0258 */ std %f50,[%l3-40]
1040 /* 0x025c 227 */ fmovs %f18,%f16
1041 /* 0x0260 225 */ ld [%i2-8],%o5
1042 /* 0x0264 226 */ ld [%i2-12],%f15
1043 /* 0x0268 228 */ srl %o5,16,%l5
1044 /* 0x026c 226 */ fmovs %f18,%f14
1045 /* 0x0270 228 */ st %l5,[%sp+2335]
1046 /* 0x0274 227 */ and %o5,%l0,%o4
1047 /* 0x0278 */ st %o4,[%sp+2303]
1048 /* 0x027c 226 */ fsubd %f14,%f32,%f52
1049 /* 0x0280 227 */ ld [%sp+2239],%f17
1050 /* 0x0284 228 */ ld [%sp+2271],%f19
1051 /* 0x0288 225 */ prefetch [%l3+352],22
1052 /* 0x028c 228 */ fmovs %f18,%f24
1053 /* 0x0290 226 */ std %f52,[%l1-16]
1054 /* 0x0294 227 */ fsubd %f16,%f32,%f54
1055 /* 0x0298 */ std %f54,[%l3-32]
1056 /* 0x029c 228 */ fsubd %f18,%f32,%f56
1057 /* 0x02a0 */ std %f56,[%l3-24]
1058 /* 0x02a4 227 */ fmovs %f24,%f22
1059 /* 0x02a8 225 */ ld [%i2-4],%l4
1060 /* 0x02ac 226 */ ld [%i2-8],%f21
1061 /* 0x02b0 228 */ srl %l4,16,%o3
1062 /* 0x02b4 226 */ fmovs %f24,%f20
1063 /* 0x02b8 228 */ st %o3,[%sp+2271]
1064 /* 0x02bc 227 */ and %l4,%l0,%o2
1065 /* 0x02c0 */ st %o2,[%sp+2239]
1066 /* 0x02c4 226 */ fsubd %f20,%f32,%f58
1067 /* 0x02c8 227 */ ld [%sp+2303],%f23
1068 /* 0x02cc 228 */ ld [%sp+2335],%f25
1069 /* 0x02d0 */ fmovs %f24,%f0
1070 /* 0x02d4 226 */ std %f58,[%l1-8]
1071 /* 0x02d8 227 */ fsubd %f22,%f32,%f60
1072 /* 0x02dc */ std %f60,[%l3-16]
1073 /* 0x02e0 228 */ fsubd %f24,%f32,%f62
1074 /* 0x02e4 */ bl,pt %icc,.L900000610
1075 /* 0x02e8 */ std %f62,[%l3-8]
1076 .L900000613:
1077 /* 0x02ec 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4
1078 /* 0x02f0 228 */ add %l1,8,%l1
1079 /* 0x02f4 */ cmp %i5,%g1
1080 /* 0x02f8 226 */ ld [%i2-4],%f3
1081 /* 0x02fc 225 */ or %g0,%g1,%i3
1082 /* 0x0300 228 */ add %i4,2,%i4
1083 /* 0x0304 227 */ ld [%sp+2239],%f5
1084 /* 0x0308 226 */ fmovs %f4,%f2
1085 /* 0x030c 228 */ ld [%sp+2271],%f1
1086 /* 0x0310 226 */ fsubd %f2,%f32,%f34
1087 /* 0x0314 */ std %f34,[%l1-8]
1088 /* 0x0318 227 */ fsubd %f4,%f32,%f36
1089 /* 0x031c */ std %f36,[%l3]
1090 /* 0x0320 228 */ fsubd %f0,%f32,%f38
1091 /* 0x0324 */ bge,pn %icc,.L77000294
1092 /* 0x0328 */ std %f38,[%l3+8]
1093 .L77000291:
1094 /* 0x032c 225 */ ld [%i2],%o2
1095 .L900000614:
1096 /* 0x0330 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32
1097 /* 0x0334 228 */ srl %o2,16,%l3
1098 /* 0x0338 227 */ sra %i4,0,%i0
1099 /* 0x033c 228 */ st %l3,[%sp+2367]
1100 /* 0x0340 227 */ and %o2,%l0,%g1
1101 /* 0x0344 226 */ sethi %hi(___const_seg_900000601+8),%l2
1102 /* 0x0348 227 */ st %g1,[%sp+2399]
1103 /* 0x034c */ sllx %i0,3,%o0
1104 /* 0x0350 228 */ add %i4,1,%l4
1105 /* 0x0354 226 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4
1106 /* 0x0358 228 */ sra %l4,0,%o1
1107 /* 0x035c */ add %i5,1,%i5
1108 /* 0x0360 226 */ ld [%i2],%f5
1109 /* 0x0364 228 */ sllx %o1,3,%g5
1110 /* 0x0368 */ cmp %i5,%i3
1111 /* 0x036c */ ld [%sp+2367],%f9
1112 /* 0x0370 */ add %i2,4,%i2
1113 /* 0x0374 */ add %i4,2,%i4
1114 /* 0x0378 227 */ fmovs %f4,%f6
1115 /* 0x037c 226 */ fsubd %f4,%f32,%f44
1116 /* 0x0380 */ std %f44,[%l1]
1117 /* 0x0384 227 */ ld [%sp+2399],%f7
1118 /* 0x0388 228 */ fmovs %f6,%f8
1119 /* 0x038c */ add %l1,8,%l1
1120 /* 0x0390 */ fsubd %f8,%f32,%f48
1121 /* 0x0394 227 */ fsubd %f6,%f32,%f46
1122 /* 0x0398 */ std %f46,[%i1+%o0]
1123 /* 0x039c 228 */ std %f48,[%i1+%g5]
1124 /* 0x03a0 */ bl,a,pt %icc,.L900000614
1125 /* 0x03a4 225 */ ld [%i2],%o2
1126 .L77000294:
1127 /* 0x03a8 222 */ ret ! Result =
1128 /* 0x03ac */ restore %g0,%g0,%g0
1129 /* 0x03b0 0 */ .type conv_i32_to_d32_and_d16,2
1130 /* 0x03b0 0 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
1132 .section ".text",#alloc,#execinstr
1133 /* 000000 0 */ .align 32
1135 ! 229 ! }
1136 ! 230 !}
1137 ! 232 !extern long long c1, c2, c3, c4;
1138 ! 234 !static void
1139 ! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
1140 ! 236 !{
1143 ! SUBROUTINE adjust_montf_result
1145 ! OFFSET SOURCE LINE LABEL INSTRUCTION
1147 adjust_montf_result:
1148 /* 000000 236 */ sra %o2,0,%g2
1149 /* 0x0004 */ or %g0,%o0,%o4
1151 ! 237 ! int64_t acc;
1152 ! 238 ! int i;
1153 ! 240 ! if (i32[len] > 0) {
1155 /* 0x0008 240 */ sllx %g2,2,%g3
1156 /* 0x000c */ ld [%o0+%g3],%o0
1157 /* 0x0010 */ cmp %o0,0
1158 /* 0x0014 */ bleu,pn %icc,.L77000316
1159 /* 0x0018 236 */ or %g0,%o1,%o5
1161 ! 241 ! i = -1;
1163 .L77000315:
1164 /* 0x001c 241 */ sub %g2,1,%g3
1165 /* 0x0020 */ ba .L900000712
1166 /* 0x0024 249 */ cmp %g2,0
1168 ! 242 ! } else {
1169 ! 243 ! for (i = len - 1; i >= 0; i--) {
1171 .L77000316:
1172 /* 0x0028 243 */ subcc %g2,1,%g3
1173 /* 0x002c */ bneg,pn %icc,.L77000340
1174 /* 0x0030 */ or %g0,%g3,%o3
1175 .L77000348:
1176 /* 0x0034 243 */ sra %g3,0,%o1
1177 /* 0x0038 */ sllx %o1,2,%g1
1179 ! 244 ! if (i32[i] != nint[i]) break;
1181 /* 0x003c 244 */ ld [%g1+%o5],%g4
1182 /* 0x0040 243 */ add %g1,%o4,%o2
1183 /* 0x0044 */ add %g1,%o5,%o1
1184 .L900000713:
1185 /* 0x0048 244 */ ld [%o2],%o0
1186 /* 0x004c */ cmp %o0,%g4
1187 /* 0x0050 */ bne,pn %icc,.L77000324
1188 /* 0x0054 */ sub %o2,4,%o2
1189 .L77000320:
1190 /* 0x0058 244 */ sub %o1,4,%o1
1191 /* 0x005c */ subcc %o3,1,%o3
1192 /* 0x0060 */ bpos,a,pt %icc,.L900000713
1193 /* 0x0064 */ ld [%o1],%g4
1194 .L900000706:
1195 /* 0x0068 244 */ ba .L900000712
1196 /* 0x006c 249 */ cmp %g2,0
1197 .L77000324:
1198 /* 0x0070 244 */ sra %o3,0,%o0
1199 /* 0x0074 */ sllx %o0,2,%g1
1200 /* 0x0078 */ ld [%o5+%g1],%o3
1201 /* 0x007c */ ld [%o4+%g1],%g5
1202 /* 0x0080 */ cmp %g5,%o3
1203 /* 0x0084 */ bleu,pt %icc,.L77000332
1204 /* 0x0088 */ nop
1206 ! 245 ! }
1207 ! 246 ! }
1208 ! 247 ! if ((i < 0) || (i32[i] > nint[i])) {
1209 ! 248 ! acc = 0;
1210 ! 249 ! for (i = 0; i < len; i++) {
1212 .L77000340:
1213 /* 0x008c 249 */ cmp %g2,0
1214 .L900000712:
1215 /* 0x0090 249 */ ble,pn %icc,.L77000332
1216 /* 0x0094 250 */ or %g0,%g2,%o3
1217 .L77000347:
1218 /* 0x0098 249 */ or %g0,0,%o0
1220 ! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1222 /* 0x009c 250 */ cmp %o3,10
1223 /* 0x00a0 */ bl,pn %icc,.L77000341
1224 /* 0x00a4 249 */ or %g0,0,%g2
1225 .L900000709:
1226 /* 0x00a8 250 */ prefetch [%o4],22
1227 /* 0x00ac */ prefetch [%o4+64],22
1229 ! 251 ! i32[i] = acc & 0xffffffff;
1230 ! 252 ! acc = acc >> 32;
1232 /* 0x00b0 252 */ add %o5,4,%o1
1233 /* 0x00b4 */ add %o4,8,%o2
1234 /* 0x00b8 250 */ prefetch [%o4+128],22
1235 /* 0x00bc */ sub %o3,8,%o5
1236 /* 0x00c0 */ or %g0,2,%o0
1237 /* 0x00c4 */ prefetch [%o4+192],22
1238 /* 0x00c8 */ prefetch [%o4+256],22
1239 /* 0x00cc */ prefetch [%o4+320],22
1240 /* 0x00d0 */ prefetch [%o4+384],22
1241 /* 0x00d4 */ ld [%o2-4],%g5
1242 /* 0x00d8 */ prefetch [%o2+440],22
1243 /* 0x00dc */ prefetch [%o2+504],22
1244 /* 0x00e0 */ ld [%o4],%g4
1245 /* 0x00e4 */ ld [%o1-4],%o4
1246 /* 0x00e8 */ sub %g4,%o4,%o3
1247 /* 0x00ec 251 */ st %o3,[%o2-8]
1248 /* 0x00f0 252 */ srax %o3,32,%g4
1249 .L900000707:
1250 /* 0x00f4 252 */ add %o0,8,%o0
1251 /* 0x00f8 */ add %o2,32,%o2
1252 /* 0x00fc 250 */ ld [%o1],%g1
1253 /* 0x0100 */ prefetch [%o2+496],22
1254 /* 0x0104 252 */ cmp %o0,%o5
1255 /* 0x0108 */ add %o1,32,%o1
1256 /* 0x010c 250 */ sub %g5,%g1,%g5
1257 /* 0x0110 */ add %g5,%g4,%o4
1258 /* 0x0114 */ ld [%o2-32],%g4
1259 /* 0x0118 251 */ st %o4,[%o2-36]
1260 /* 0x011c 252 */ srax %o4,32,%g1
1261 /* 0x0120 250 */ ld [%o1-28],%o3
1262 /* 0x0124 */ sub %g4,%o3,%g2
1263 /* 0x0128 */ add %g2,%g1,%g5
1264 /* 0x012c */ ld [%o2-28],%o3
1265 /* 0x0130 251 */ st %g5,[%o2-32]
1266 /* 0x0134 252 */ srax %g5,32,%g4
1267 /* 0x0138 250 */ ld [%o1-24],%o4
1268 /* 0x013c */ sub %o3,%o4,%g1
1269 /* 0x0140 */ add %g1,%g4,%g2
1270 /* 0x0144 */ ld [%o2-24],%o3
1271 /* 0x0148 251 */ st %g2,[%o2-28]
1272 /* 0x014c 252 */ srax %g2,32,%g5
1273 /* 0x0150 250 */ ld [%o1-20],%o4
1274 /* 0x0154 */ sub %o3,%o4,%g4
1275 /* 0x0158 */ add %g4,%g5,%g1
1276 /* 0x015c */ ld [%o2-20],%o4
1277 /* 0x0160 251 */ st %g1,[%o2-24]
1278 /* 0x0164 252 */ srax %g1,32,%o3
1279 /* 0x0168 250 */ ld [%o1-16],%g2
1280 /* 0x016c */ sub %o4,%g2,%g5
1281 /* 0x0170 */ add %g5,%o3,%g1
1282 /* 0x0174 */ ld [%o2-16],%g4
1283 /* 0x0178 251 */ st %g1,[%o2-20]
1284 /* 0x017c 252 */ srax %g1,32,%o4
1285 /* 0x0180 250 */ ld [%o1-12],%g2
1286 /* 0x0184 */ sub %g4,%g2,%o3
1287 /* 0x0188 */ add %o3,%o4,%g5
1288 /* 0x018c */ ld [%o2-12],%g2
1289 /* 0x0190 251 */ st %g5,[%o2-16]
1290 /* 0x0194 252 */ srax %g5,32,%g4
1291 /* 0x0198 250 */ ld [%o1-8],%g1
1292 /* 0x019c */ sub %g2,%g1,%o4
1293 /* 0x01a0 */ add %o4,%g4,%o3
1294 /* 0x01a4 */ ld [%o2-8],%g2
1295 /* 0x01a8 251 */ st %o3,[%o2-12]
1296 /* 0x01ac 252 */ srax %o3,32,%g5
1297 /* 0x01b0 250 */ ld [%o1-4],%g1
1298 /* 0x01b4 */ sub %g2,%g1,%g4
1299 /* 0x01b8 */ add %g4,%g5,%o4
1300 /* 0x01bc */ ld [%o2-4],%g5
1301 /* 0x01c0 251 */ st %o4,[%o2-8]
1302 /* 0x01c4 252 */ ble,pt %icc,.L900000707
1303 /* 0x01c8 */ srax %o4,32,%g4
1304 .L900000710:
1305 /* 0x01cc 250 */ ld [%o1],%o3
1306 /* 0x01d0 252 */ add %o1,4,%o5
1307 /* 0x01d4 250 */ or %g0,%o2,%o4
1308 /* 0x01d8 252 */ cmp %o0,%g3
1309 /* 0x01dc 250 */ sub %g5,%o3,%g2
1310 /* 0x01e0 */ add %g2,%g4,%g1
1311 /* 0x01e4 251 */ st %g1,[%o2-4]
1312 /* 0x01e8 252 */ bg,pn %icc,.L77000332
1313 /* 0x01ec */ srax %g1,32,%g2
1314 .L77000341:
1315 /* 0x01f0 250 */ ld [%o4],%g5
1316 .L900000711:
1317 /* 0x01f4 250 */ ld [%o5],%o2
1318 /* 0x01f8 */ add %g2,%g5,%g4
1319 /* 0x01fc 252 */ add %o0,1,%o0
1320 /* 0x0200 */ cmp %o0,%g3
1321 /* 0x0204 */ add %o5,4,%o5
1322 /* 0x0208 250 */ sub %g4,%o2,%o1
1323 /* 0x020c 251 */ st %o1,[%o4]
1324 /* 0x0210 252 */ srax %o1,32,%g2
1325 /* 0x0214 */ add %o4,4,%o4
1326 /* 0x0218 */ ble,a,pt %icc,.L900000711
1327 /* 0x021c 250 */ ld [%o4],%g5
1328 .L77000332:
1329 /* 0x0220 252 */ retl ! Result =
1330 /* 0x0224 */ nop
1331 /* 0x0228 0 */ .type adjust_montf_result,2
1332 /* 0x0228 0 */ .size adjust_montf_result,(.-adjust_montf_result)
1334 .section ".text",#alloc,#execinstr
1335 /* 000000 0 */ .align 32
1337 ! 253 ! }
1338 ! 254 ! }
1339 ! 255 !}
1340 ! 257 !/*************
1341 ! 258 !static void
1342 ! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len)
1343 ! 260 !{
1344 ! 261 ! int64_t acc;
1345 ! 262 ! int i;
1346 ! 264 ! c4++;
1347 ! 265 !
1348 ! 266 ! if (i32[len] > 0) {
1349 ! 267 ! i = -1;
1350 ! 268 ! c1++;
1351 ! 269 ! } else {
1352 ! 270 ! for (i = len - 1; i >= 0; i++) {
1353 ! 271 ! if (i32[i] != nint[i]) break;
1354 ! 272 ! c2++;
1355 ! 273 ! }
1356 ! 274 ! }
1357 ! 275 ! if ((i < 0) || (i32[i] > nint[i])) {
1358 ! 276 ! c3++;
1359 ! 277 ! acc = 0;
1360 ! 278 ! for (i = 0; i < len; i++) {
1361 ! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1362 ! 280 ! i32[i] = acc & 0xffffffff;
1363 ! 281 ! acc = acc >> 32;
1364 ! 282 ! }
1365 ! 283 ! }
1366 ! 284 !}
1367 ! 285 !uint32_t saveresult[1000];
1368 ! 286 !void printarray(char *name, uint32_t *arr, int len)
1369 ! 287 !{
1370 ! 288 ! int i, j;
1371 ! 289 ! uint64_t tmp;
1372 ! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2);
1373 ! 292 ! for(i=j=0; i<len; i+=2,j+=2){
1374 ! 293 ! if(j == 6){
1375 ! 294 ! printf("\n");
1376 ! 295 ! j=0;
1377 ! 296 ! }
1378 ! 297 ! tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]);
1379 ! 298 ! printf("0x%016llx",tmp);
1380 ! 299 ! if((i/2)!=(((len+1)/2)-1))printf(",");
1381 ! 300 ! if(j!=4)printf(" ");
1382 ! 301 ! }
1383 ! 302 ! if(j!=0) printf("\n");
1384 ! 303 ! printf("};\n");
1385 ! 304 !}
1386 ! 305 !**************/
1387 ! 308 !/*
1388 ! 309 ! * the lengths of the input arrays should be at least the following:
1389 ! 310 ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
1390 ! 311 ! * all of them should be different from one another
1391 ! 312 ! */
1392 ! 313 !void mont_mulf_noconv(uint32_t *result,
1393 ! 314 ! double *dm1, double *dm2, double *dt,
1394 ! 315 ! double *dn, uint32_t *nint,
1395 ! 316 ! int nlen, double dn0)
1396 ! 317 !{
1399 ! SUBROUTINE mont_mulf_noconv
1401 ! OFFSET SOURCE LINE LABEL INSTRUCTION
1403 .global mont_mulf_noconv
1404 mont_mulf_noconv:
1405 /* 000000 317 */ save %sp,-176,%sp
1406 /* 0x0004 */ ldx [%fp+2223],%g1
1407 /* 0x0008 0 */ sethi %hi(Zero),%l5
1408 /* 0x000c 317 */ or %g0,%i2,%l0
1410 ! 318 ! int i, j, jj;
1411 ! 319 ! double digit, m2j, a, b;
1412 ! 320 ! double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
1413 ! 322 ! pdm1 = &(dm1[0]);
1414 ! 323 ! pdm2 = &(dm2[0]);
1415 ! 324 ! pdn = &(dn[0]);
1416 ! 325 ! pdm2[2 * nlen] = Zero;
1418 /* 0x0010 325 */ ldd [%l5+%lo(Zero)],%f0
1419 /* 0x0014 317 */ or %g0,%i0,%i2
1420 /* 0x0018 325 */ sll %g1,1,%o3
1422 ! 327 ! if (nlen != 16) {
1424 /* 0x001c 327 */ cmp %g1,16
1425 /* 0x0020 325 */ sra %o3,0,%i0
1426 /* 0x0024 */ sllx %i0,3,%o0
1427 /* 0x0028 317 */ or %g0,%i5,%i0
1428 /* 0x002c 327 */ bne,pn %icc,.L77000476
1429 /* 0x0030 325 */ std %f0,[%l0+%o0]
1430 .L77000488:
1431 /* 0x0034 0 */ sethi %hi(TwoToMinus16),%o2
1432 /* 0x0038 0 */ sethi %hi(TwoTo16),%l3
1434 ! 328 ! for (i = 0; i < 4 * nlen + 2; i++)
1435 ! 329 ! dt[i] = Zero;
1436 ! 330 ! a = dt[0] = pdm1[0] * pdm2[0];
1437 ! 331 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1438 ! 333 ! pdtj = &(dt[0]);
1439 ! 334 ! for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
1440 ! 335 ! m2j = pdm2[j];
1441 ! 336 ! a = pdtj[0] + pdn[0] * digit;
1442 ! 337 ! b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
1443 ! 338 ! pdtj[1] = b;
1444 ! 340 !#pragma pipeloop(0)
1445 ! 341 ! for (i = 1; i < nlen; i++) {
1446 ! 342 ! pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
1447 ! 343 ! }
1448 ! 344 ! if (jj == 15) {
1449 ! 345 ! cleanup(dt, j / 2 + 1, 2 * nlen + 1);
1450 ! 346 ! jj = 0;
1451 ! 347 ! }
1452 ! 349 ! digit = mod(lower32(b, Zero) * dn0,
1453 ! 350 ! TwoToMinus16, TwoTo16);
1454 ! 351 ! }
1455 ! 352 ! } else {
1456 ! 353 ! a = dt[0] = pdm1[0] * pdm2[0];
1458 /* 0x003c 353 */ ldd [%i1],%f40
1460 ! 355 ! dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
1461 ! 356 ! dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
1462 ! 357 ! dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
1463 ! 358 ! dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
1464 ! 359 ! dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
1465 ! 360 ! dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
1466 ! 361 ! dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
1467 ! 362 ! dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
1468 ! 363 ! dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
1469 ! 364 ! dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
1470 ! 365 ! dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
1471 ! 366 ! dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
1472 ! 367 ! dt[3] = dt[2] = dt[1] = Zero;
1473 ! 369 ! pdn_0 = pdn[0];
1474 ! 370 ! pdm1_0 = pdm1[0];
1475 ! 372 ! digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1476 ! 373 ! pdtj = &(dt[0]);
1478 /* 0x0040 373 */ or %g0,%i3,%o3
1480 ! 375 ! for (j = 0; j < 32; j++, pdtj++) {
1482 /* 0x0044 375 */ or %g0,0,%l1
1483 /* 0x0048 353 */ ldd [%l0],%f42
1484 /* 0x004c 372 */ ldd [%o2+%lo(TwoToMinus16)],%f44
1485 /* 0x0050 */ ldd [%l3+%lo(TwoTo16)],%f46
1486 /* 0x0054 367 */ std %f0,[%i3+8]
1487 /* 0x0058 353 */ fmuld %f40,%f42,%f38
1488 /* 0x005c */ std %f38,[%i3]
1489 /* 0x0060 367 */ std %f0,[%i3+16]
1490 /* 0x0064 */ std %f0,[%i3+24]
1491 /* 0x0068 */ std %f0,[%i3+32]
1492 /* 0x006c 372 */ fdtox %f38,%f4
1493 /* 0x0070 367 */ std %f0,[%i3+40]
1494 /* 0x0074 */ std %f0,[%i3+48]
1495 /* 0x0078 */ std %f0,[%i3+56]
1496 /* 0x007c 372 */ fmovs %f0,%f4
1497 /* 0x0080 367 */ std %f0,[%i3+64]
1498 /* 0x0084 */ std %f0,[%i3+72]
1499 /* 0x0088 372 */ fxtod %f4,%f52
1500 /* 0x008c 367 */ std %f0,[%i3+80]
1501 /* 0x0090 */ std %f0,[%i3+88]
1502 /* 0x0094 */ std %f0,[%i3+96]
1503 /* 0x0098 */ std %f0,[%i3+104]
1504 /* 0x009c 372 */ fmuld %f52,%f14,%f60
1505 /* 0x00a0 367 */ std %f0,[%i3+112]
1506 /* 0x00a4 */ std %f0,[%i3+120]
1507 /* 0x00a8 */ std %f0,[%i3+128]
1508 /* 0x00ac */ std %f0,[%i3+136]
1509 /* 0x00b0 372 */ fmuld %f60,%f44,%f62
1510 /* 0x00b4 367 */ std %f0,[%i3+144]
1511 /* 0x00b8 */ std %f0,[%i3+152]
1512 /* 0x00bc */ std %f0,[%i3+160]
1513 /* 0x00c0 */ std %f0,[%i3+168]
1514 /* 0x00c4 372 */ fdtox %f62,%f32
1515 /* 0x00c8 367 */ std %f0,[%i3+176]
1516 /* 0x00cc */ std %f0,[%i3+184]
1517 /* 0x00d0 */ std %f0,[%i3+192]
1518 /* 0x00d4 */ std %f0,[%i3+200]
1519 /* 0x00d8 372 */ fxtod %f32,%f50
1520 /* 0x00dc 367 */ std %f0,[%i3+208]
1521 /* 0x00e0 */ std %f0,[%i3+216]
1522 /* 0x00e4 */ std %f0,[%i3+224]
1523 /* 0x00e8 */ std %f0,[%i3+232]
1524 /* 0x00ec 372 */ fmuld %f50,%f46,%f34
1525 /* 0x00f0 367 */ std %f0,[%i3+240]
1526 /* 0x00f4 */ std %f0,[%i3+248]
1527 /* 0x00f8 */ std %f0,[%i3+256]
1528 /* 0x00fc */ std %f0,[%i3+264]
1529 /* 0x0100 372 */ fsubd %f60,%f34,%f40
1530 /* 0x0104 367 */ std %f0,[%i3+272]
1531 /* 0x0108 */ std %f0,[%i3+280]
1532 /* 0x010c */ std %f0,[%i3+288]
1533 /* 0x0110 */ std %f0,[%i3+296]
1534 /* 0x0114 */ std %f0,[%i3+304]
1535 /* 0x0118 */ std %f0,[%i3+312]
1536 /* 0x011c */ std %f0,[%i3+320]
1537 /* 0x0120 */ std %f0,[%i3+328]
1538 /* 0x0124 */ std %f0,[%i3+336]
1539 /* 0x0128 */ std %f0,[%i3+344]
1540 /* 0x012c */ std %f0,[%i3+352]
1541 /* 0x0130 */ std %f0,[%i3+360]
1542 /* 0x0134 */ std %f0,[%i3+368]
1543 /* 0x0138 375 */ sub %g1,1,%l3
1544 /* 0x013c */ add %i3,8,%o7
1545 /* 0x0140 367 */ std %f0,[%i3+376]
1546 /* 0x0144 */ std %f0,[%i3+384]
1547 /* 0x0148 */ std %f0,[%i3+392]
1548 /* 0x014c */ std %f0,[%i3+400]
1549 /* 0x0150 */ std %f0,[%i3+408]
1550 /* 0x0154 */ std %f0,[%i3+416]
1551 /* 0x0158 */ std %f0,[%i3+424]
1552 /* 0x015c */ std %f0,[%i3+432]
1553 /* 0x0160 */ std %f0,[%i3+440]
1554 /* 0x0164 */ std %f0,[%i3+448]
1555 /* 0x0168 */ std %f0,[%i3+456]
1556 /* 0x016c */ std %f0,[%i3+464]
1557 /* 0x0170 */ std %f0,[%i3+472]
1558 /* 0x0174 */ std %f0,[%i3+480]
1559 /* 0x0178 */ std %f0,[%i3+488]
1560 /* 0x017c */ std %f0,[%i3+496]
1561 /* 0x0180 */ std %f0,[%i3+504]
1562 /* 0x0184 */ std %f0,[%i3+512]
1563 /* 0x0188 */ std %f0,[%i3+520]
1565 !BEGIN HAND CODED PART
1567 ! cheetah schedule, no even-odd trick
1570 add %i3,%g0,%o5
1572 fmovd %f40,%f0
1573 fmovd %f14,%f2
1574 fmovd %f44,%f8
1575 sethi %hi(TwoTo32),%l5
1576 fmovd %f46,%f10
1577 sethi %hi(TwoToMinus32),%g5
1578 ldd [%i3],%f6
1579 ldd [%l0],%f4
1581 ldd [%i1],%f40
1582 ldd [%i1+8],%f42
1583 ldd [%i1+16],%f52
1584 ldd [%i1+48],%f54
1585 ldd [%i1+56],%f36
1586 ldd [%i1+64],%f56
1587 ldd [%i1+104],%f48
1588 ldd [%i1+112],%f58
1590 ldd [%i4],%f44
1591 ldd [%i4+8],%f46
1592 ldd [%i4+104],%f50
1593 ldd [%i4+112],%f60
1596 .L99999999:
1598 ldd [%i1+24],%f20
1599 fmuld %f0,%f44,%f12
1601 ldd [%i4+24],%f22
1602 fmuld %f42,%f4,%f16
1604 ldd [%i1+40],%f24
1605 fmuld %f46,%f0,%f18
1607 ldd [%i4+40],%f26
1608 fmuld %f20,%f4,%f20
1610 ldd [%l0+8],%f38
1611 faddd %f12,%f6,%f12
1612 fmuld %f22,%f0,%f22
1614 add %l0,8,%l0
1615 ldd [%i4+56],%f30
1616 fmuld %f24,%f4,%f24
1618 ldd [%i1+72],%f32
1619 faddd %f16,%f18,%f16
1620 fmuld %f26,%f0,%f26
1622 ldd [%i3+16],%f18
1623 fmuld %f40,%f38,%f14
1625 ldd [%i4+72],%f34
1626 faddd %f20,%f22,%f20
1627 fmuld %f8,%f12,%f12
1629 ldd [%i3+48],%f22
1630 fmuld %f36,%f4,%f28
1632 ldd [%i3+8],%f6
1633 faddd %f16,%f18,%f16
1634 fmuld %f30,%f0,%f30
1636 std %f16,[%i3+16]
1637 faddd %f24,%f26,%f24
1638 fmuld %f32,%f4,%f32
1640 ldd [%i3+80],%f26
1641 faddd %f12,%f14,%f12
1642 fmuld %f34,%f0,%f34
1644 ldd [%i1+88],%f16
1645 faddd %f20,%f22,%f20
1647 ldd [%i4+88],%f18
1648 faddd %f28,%f30,%f28
1650 ldd [%i3+112],%f30
1651 faddd %f32,%f34,%f32
1653 ldd [%i3+144],%f34
1654 faddd %f12,%f6,%f6
1655 fmuld %f16,%f4,%f16
1657 std %f20,[%i3+48]
1658 faddd %f24,%f26,%f24
1659 fmuld %f18,%f0,%f18
1661 std %f24,[%i3+80]
1662 faddd %f28,%f30,%f28
1663 fmuld %f48,%f4,%f20
1665 std %f28,[%i3+112]
1666 faddd %f32,%f34,%f32
1667 fmuld %f50,%f0,%f22
1669 ldd [%i1+120],%f24
1670 fdtox %f6,%f12
1672 std %f32,[%i3+144]
1673 faddd %f16,%f18,%f16
1675 ldd [%i4+120],%f26
1677 ldd [%i3+176],%f18
1678 faddd %f20,%f22,%f20
1679 fmuld %f24,%f4,%f24
1681 ldd [%i4+16],%f30
1682 fmovs %f11,%f12
1684 ldd [%i1+32],%f32
1685 fmuld %f26,%f0,%f26
1687 ldd [%i4+32],%f34
1688 fmuld %f52,%f4,%f28
1690 ldd [%i3+208],%f22
1691 faddd %f16,%f18,%f16
1692 fmuld %f30,%f0,%f30
1694 std %f16,[%i3+176]
1695 fxtod %f12,%f12
1696 fmuld %f32,%f4,%f32
1698 ldd [%i4+48],%f18
1699 faddd %f24,%f26,%f24
1700 fmuld %f34,%f0,%f34
1702 ldd [%i3+240],%f26
1703 faddd %f20,%f22,%f20
1705 std %f20,[%i3+208]
1706 faddd %f28,%f30,%f28
1707 fmuld %f54,%f4,%f16
1709 ldd [%i3+32],%f30
1710 fmuld %f12,%f2,%f14
1712 ldd [%i4+64],%f22
1713 faddd %f32,%f34,%f32
1714 fmuld %f18,%f0,%f18
1716 ldd [%i3+64],%f34
1717 faddd %f24,%f26,%f24
1719 std %f24,[%i3+240]
1720 faddd %f28,%f30,%f28
1721 fmuld %f56,%f4,%f20
1723 std %f28,[%i3+32]
1724 fmuld %f14,%f8,%f12
1726 ldd [%i1+80],%f24
1727 faddd %f32,%f34,%f34 ! yes, tmp52!
1728 fmuld %f22,%f0,%f22
1730 ldd [%i4+80],%f26
1731 faddd %f16,%f18,%f16
1733 ldd [%i1+96],%f28
1734 fmuld %f58,%f4,%f32
1736 ldd [%i4+96],%f30
1737 fdtox %f12,%f12
1738 fmuld %f24,%f4,%f24
1740 std %f34,[%i3+64] ! yes, tmp52!
1741 faddd %f20,%f22,%f20
1742 fmuld %f26,%f0,%f26
1744 ldd [%i3+96],%f18
1745 fmuld %f28,%f4,%f28
1747 ldd [%i3+128],%f22
1748 fmovd %f38,%f4
1749 fmuld %f30,%f0,%f30
1751 fxtod %f12,%f12
1752 fmuld %f60,%f0,%f34
1754 add %i3,8,%i3
1755 faddd %f24,%f26,%f24
1757 ldd [%i3+160-8],%f26
1758 faddd %f16,%f18,%f16
1760 std %f16,[%i3+96-8]
1761 faddd %f28,%f30,%f28
1763 ldd [%i3+192-8],%f30
1764 faddd %f32,%f34,%f32
1765 fmuld %f12,%f10,%f12
1767 ldd [%i3+224-8],%f34
1768 faddd %f20,%f22,%f20
1770 std %f20,[%i3+128-8]
1771 faddd %f24,%f26,%f24
1773 add %l1,1,%l1
1774 std %f24,[%i3+160-8]
1775 faddd %f28,%f30,%f28
1777 cmp %l1,15
1778 std %f28,[%i3+192-8]
1779 fsubd %f14,%f12,%f0
1781 faddd %f32,%f34,%f32
1782 ble,pt %icc,.L99999999
1783 std %f32,[%i3+224-8]
1787 ldd [%g5+%lo(TwoToMinus32)],%f8
1789 ldd [%i3+8],%f16
1791 ldd [%i3+16],%f20
1793 fmuld %f8,%f16,%f18
1794 ldd [%i3+24],%f24
1796 fmuld %f8,%f20,%f22
1797 ldd [%i3+32],%f28
1799 fmuld %f8,%f24,%f26
1800 ldd [%l5+%lo(TwoTo32)],%f10
1802 fmuld %f8,%f28,%f30
1804 fdtox %f18,%f18
1806 fdtox %f22,%f22
1808 fdtox %f26,%f26
1809 ldd [%i3+40],%f32
1811 fdtox %f30,%f30
1812 ldd [%i3+48],%f56
1814 fxtod %f18,%f18
1815 fmuld %f8,%f32,%f34
1816 ldd [%i3+56],%f36
1818 fxtod %f22,%f22
1819 fmuld %f8,%f56,%f58
1820 ldd [%i3+64],%f38
1822 fxtod %f26,%f26
1823 fmuld %f8,%f36,%f60
1825 fxtod %f30,%f30
1826 fmuld %f8,%f38,%f62
1828 fdtox %f34,%f34
1829 fmuld %f10,%f18,%f40
1831 fdtox %f58,%f58
1832 fmuld %f10,%f22,%f42
1834 fdtox %f60,%f60
1835 fmuld %f10,%f26,%f44
1837 fdtox %f62,%f62
1838 fmuld %f10,%f30,%f46
1840 fxtod %f34,%f34
1842 fxtod %f58,%f58
1844 fxtod %f60,%f60
1846 fxtod %f62,%f62
1848 fsubd %f16,%f40,%f40
1849 fmuld %f10,%f34,%f48
1851 fsubd %f20,%f42,%f42
1852 fmuld %f10,%f58,%f50
1854 fsubd %f24,%f44,%f44
1855 fmuld %f10,%f60,%f52
1857 fsubd %f28,%f46,%f46
1858 fmuld %f10,%f62,%f54
1860 std %f40,[%i3+8]
1862 std %f42,[%i3+16]
1864 faddd %f18,%f44,%f44
1865 std %f44,[%i3+24]
1867 faddd %f22,%f46,%f46
1868 std %f46,[%i3+32]
1873 fsubd %f32,%f48,%f48
1874 ldd [%i3+64+8],%f16
1876 fsubd %f56,%f50,%f50
1877 ldd [%i3+64+16],%f20
1879 fsubd %f36,%f52,%f52
1880 ldd [%i3+64+24],%f24
1882 fsubd %f38,%f54,%f54
1883 ldd [%i3+64+32],%f28
1885 faddd %f26,%f48,%f48
1886 fmuld %f8,%f16,%f18
1887 std %f48,[%i3+40]
1889 faddd %f30,%f50,%f50
1890 fmuld %f8,%f20,%f22
1891 std %f50,[%i3+48]
1893 faddd %f34,%f52,%f52
1894 fmuld %f8,%f24,%f26
1895 std %f52,[%i3+56]
1897 faddd %f58,%f54,%f54
1898 fmuld %f8,%f28,%f30
1899 std %f54,[%i3+64]
1903 fdtox %f18,%f18
1905 fdtox %f22,%f22
1907 fdtox %f26,%f26
1908 ldd [%i3+64+40],%f32
1910 fdtox %f30,%f30
1911 ldd [%i3+64+48],%f56
1913 fxtod %f18,%f18
1914 fmuld %f8,%f32,%f34
1915 ldd [%i3+64+56],%f36
1917 fxtod %f22,%f22
1918 fmuld %f8,%f56,%f58
1919 ldd [%i3+64+64],%f38
1921 fxtod %f26,%f26
1922 fmuld %f8,%f36,%f12
1924 fxtod %f30,%f30
1925 fmuld %f8,%f38,%f14
1927 fdtox %f34,%f34
1928 fmuld %f10,%f18,%f40
1930 fdtox %f58,%f58
1931 fmuld %f10,%f22,%f42
1933 fdtox %f12,%f12
1934 fmuld %f10,%f26,%f44
1936 fdtox %f14,%f14
1937 fmuld %f10,%f30,%f46
1939 fxtod %f34,%f34
1941 fxtod %f58,%f58
1943 fxtod %f12,%f12
1945 fxtod %f14,%f14
1947 fsubd %f16,%f40,%f40
1948 fmuld %f10,%f34,%f48
1950 fsubd %f20,%f42,%f42
1951 fmuld %f10,%f58,%f50
1953 fsubd %f24,%f44,%f44
1954 fmuld %f10,%f12,%f52
1956 fsubd %f28,%f46,%f46
1957 fmuld %f10,%f14,%f54
1959 faddd %f60,%f40,%f40
1960 std %f40,[%i3+64+8]
1962 faddd %f62,%f42,%f42
1963 std %f42,[%i3+64+16]
1965 faddd %f18,%f44,%f44
1966 std %f44,[%i3+64+24]
1968 faddd %f22,%f46,%f46
1969 std %f46,[%i3+64+32]
1974 fsubd %f32,%f48,%f48
1975 ldd [%i3+64+64+8],%f16
1977 fsubd %f56,%f50,%f50
1978 ldd [%i3+64+64+16],%f20
1980 fsubd %f36,%f52,%f52
1981 ldd [%i3+64+64+24],%f24
1983 fsubd %f38,%f54,%f54
1984 ldd [%i3+64+64+32],%f28
1986 faddd %f26,%f48,%f48
1987 fmuld %f8,%f16,%f18
1988 std %f48,[%i3+64+40]
1990 faddd %f30,%f50,%f50
1991 fmuld %f8,%f20,%f22
1992 std %f50,[%i3+64+48]
1994 faddd %f34,%f52,%f52
1995 fmuld %f8,%f24,%f26
1996 std %f52,[%i3+64+56]
1998 faddd %f58,%f54,%f54
1999 fmuld %f8,%f28,%f30
2000 std %f54,[%i3+64+64]
2005 fdtox %f18,%f18
2007 fdtox %f22,%f22
2009 fdtox %f26,%f26
2010 ldd [%i3+64+64+40],%f32
2012 fdtox %f30,%f30
2013 ldd [%i3+64+64+48],%f56
2015 fxtod %f18,%f18
2016 fmuld %f8,%f32,%f34
2017 ldd [%i3+64+64+56],%f36
2019 fxtod %f22,%f22
2020 fmuld %f8,%f56,%f58
2021 ldd [%i3+64+64+64],%f38
2023 fxtod %f26,%f26
2024 fmuld %f8,%f36,%f60
2026 fxtod %f30,%f30
2027 fmuld %f8,%f38,%f62
2029 fdtox %f34,%f34
2030 fmuld %f10,%f18,%f40
2032 fdtox %f58,%f58
2033 fmuld %f10,%f22,%f42
2035 fdtox %f60,%f60
2036 fmuld %f10,%f26,%f44
2038 fdtox %f62,%f62
2039 fmuld %f10,%f30,%f46
2041 fxtod %f34,%f34
2043 fxtod %f58,%f58
2045 fxtod %f60,%f60
2047 fxtod %f62,%f62
2049 fsubd %f16,%f40,%f40
2050 fmuld %f10,%f34,%f48
2052 fsubd %f20,%f42,%f42
2053 fmuld %f10,%f58,%f50
2055 fsubd %f24,%f44,%f44
2056 fmuld %f10,%f60,%f52
2058 fsubd %f28,%f46,%f46
2059 fmuld %f10,%f62,%f54
2061 faddd %f12,%f40,%f40
2062 std %f40,[%i3+64+64+8]
2064 faddd %f14,%f42,%f42
2065 std %f42,[%i3+64+64+16]
2067 faddd %f18,%f44,%f44
2068 std %f44,[%i3+64+64+24]
2070 faddd %f22,%f46,%f46
2071 std %f46,[%i3+64+64+32]
2075 fsubd %f32,%f48,%f48
2076 ldd [%i3+64+64+64+8],%f16
2078 fsubd %f56,%f50,%f50
2079 ldd [%i3+64+64+64+16],%f20
2081 fsubd %f36,%f52,%f52
2082 ldd [%i3+64+64+64+24],%f24
2084 fsubd %f38,%f54,%f54
2085 ldd [%i3+64+64+64+32],%f28
2087 faddd %f26,%f48,%f48
2088 fmuld %f8,%f16,%f18
2089 std %f48,[%i3+64+64+40]
2091 faddd %f30,%f50,%f50
2092 fmuld %f8,%f20,%f22
2093 std %f50,[%i3+64+64+48]
2095 faddd %f34,%f52,%f52
2096 fmuld %f8,%f24,%f26
2097 std %f52,[%i3+64+64+56]
2099 faddd %f58,%f54,%f54
2100 fmuld %f8,%f28,%f30
2101 std %f54,[%i3+64+64+64]
2105 fdtox %f18,%f18
2107 fdtox %f22,%f22
2109 fdtox %f26,%f26
2110 ldd [%i3+64+64+64+40],%f32
2112 fdtox %f30,%f30
2113 ldd [%i3+64+64+64+48],%f56
2115 fxtod %f18,%f18
2116 fmuld %f8,%f32,%f34
2117 ldd [%i3+64+64+64+56],%f36
2119 fxtod %f22,%f22
2120 fmuld %f8,%f56,%f58
2121 ldd [%i3+64+64+64+64],%f38
2123 fxtod %f26,%f26
2124 fmuld %f8,%f36,%f12
2126 fxtod %f30,%f30
2127 fmuld %f8,%f38,%f14
2129 fdtox %f34,%f34
2130 fmuld %f10,%f18,%f40
2132 fdtox %f58,%f58
2133 fmuld %f10,%f22,%f42
2135 fdtox %f12,%f12
2136 fmuld %f10,%f26,%f44
2138 fdtox %f14,%f14
2139 fmuld %f10,%f30,%f46
2141 sethi %hi(TwoToMinus16),%g5
2142 fxtod %f34,%f34
2144 sethi %hi(TwoTo16),%l5
2145 fxtod %f58,%f58
2147 fxtod %f12,%f12
2149 fxtod %f14,%f14
2151 fsubd %f16,%f40,%f16
2152 fmuld %f10,%f34,%f48
2153 ldd [%g5+%lo(TwoToMinus16)],%f8
2155 fsubd %f20,%f42,%f20
2156 fmuld %f10,%f58,%f50
2157 ldd [%i1],%f40 ! should be %f40
2159 fsubd %f24,%f44,%f24
2160 fmuld %f10,%f12,%f52
2161 ldd [%i1+8],%f42 ! should be %f42
2163 fsubd %f28,%f46,%f28
2164 fmuld %f10,%f14,%f54
2165 ldd [%i4],%f44 ! should be %f44
2167 faddd %f60,%f16,%f16
2168 std %f16,[%i3+64+64+64+8]
2170 faddd %f62,%f20,%f20
2171 std %f20,[%i3+64+64+64+16]
2173 faddd %f18,%f24,%f24
2174 std %f24,[%i3+64+64+64+24]
2176 faddd %f22,%f28,%f28
2177 std %f28,[%i3+64+64+64+32]
2179 fsubd %f32,%f48,%f32
2180 ldd [%i4+8],%f46 ! should be %f46
2182 fsubd %f56,%f50,%f56
2183 ldd [%i1+104],%f48 ! should be %f48
2185 fsubd %f36,%f52,%f36
2186 ldd [%i4+104],%f50 ! should be %f50
2188 fsubd %f38,%f54,%f38
2189 ldd [%i1+16],%f52 ! should be %f52
2191 faddd %f26,%f32,%f32
2192 std %f32,[%i3+64+64+64+40]
2194 faddd %f30,%f56,%f56
2195 std %f56,[%i3+64+64+64+48]
2197 faddd %f34,%f36,%f36
2198 std %f36,[%i3+64+64+64+56]
2200 faddd %f58,%f38,%f38
2201 std %f38,[%i3+64+64+64+64]
2203 std %f12,[%i3+64+64+64+64+8]
2205 std %f14,[%i3+64+64+64+64+16]
2208 ldd [%l5+%lo(TwoTo16)],%f10
2209 ldd [%i1+48],%f54
2210 ldd [%i1+56],%f36
2211 ldd [%i1+64],%f56
2212 ldd [%i1+112],%f58
2214 ldd [%i4+104],%f50
2215 ldd [%i4+112],%f60
2218 .L99999998:
2220 ldd [%i1+24],%f20
2221 fmuld %f0,%f44,%f12
2223 ldd [%i4+24],%f22
2224 fmuld %f42,%f4,%f16
2226 ldd [%i1+40],%f24
2227 fmuld %f46,%f0,%f18
2229 ldd [%i4+40],%f26
2230 fmuld %f20,%f4,%f20
2232 ldd [%l0+8],%f38
2233 faddd %f12,%f6,%f12
2234 fmuld %f22,%f0,%f22
2236 add %l0,8,%l0
2237 ldd [%i4+56],%f30
2238 fmuld %f24,%f4,%f24
2240 ldd [%i1+72],%f32
2241 faddd %f16,%f18,%f16
2242 fmuld %f26,%f0,%f26
2244 ldd [%i3+16],%f18
2245 fmuld %f40,%f38,%f14
2247 ldd [%i4+72],%f34
2248 faddd %f20,%f22,%f20
2249 fmuld %f8,%f12,%f12
2251 ldd [%i3+48],%f22
2252 fmuld %f36,%f4,%f28
2254 ldd [%i3+8],%f6
2255 faddd %f16,%f18,%f16
2256 fmuld %f30,%f0,%f30
2258 std %f16,[%i3+16]
2259 faddd %f24,%f26,%f24
2260 fmuld %f32,%f4,%f32
2262 ldd [%i3+80],%f26
2263 faddd %f12,%f14,%f12
2264 fmuld %f34,%f0,%f34
2266 ldd [%i1+88],%f16
2267 faddd %f20,%f22,%f20
2269 ldd [%i4+88],%f18
2270 faddd %f28,%f30,%f28
2272 ldd [%i3+112],%f30
2273 faddd %f32,%f34,%f32
2275 ldd [%i3+144],%f34
2276 faddd %f12,%f6,%f6
2277 fmuld %f16,%f4,%f16
2279 std %f20,[%i3+48]
2280 faddd %f24,%f26,%f24
2281 fmuld %f18,%f0,%f18
2283 std %f24,[%i3+80]
2284 faddd %f28,%f30,%f28
2285 fmuld %f48,%f4,%f20
2287 std %f28,[%i3+112]
2288 faddd %f32,%f34,%f32
2289 fmuld %f50,%f0,%f22
2291 ldd [%i1+120],%f24
2292 fdtox %f6,%f12
2294 std %f32,[%i3+144]
2295 faddd %f16,%f18,%f16
2297 ldd [%i4+120],%f26
2299 ldd [%i3+176],%f18
2300 faddd %f20,%f22,%f20
2301 fmuld %f24,%f4,%f24
2303 ldd [%i4+16],%f30
2304 fmovs %f11,%f12
2306 ldd [%i1+32],%f32
2307 fmuld %f26,%f0,%f26
2309 ldd [%i4+32],%f34
2310 fmuld %f52,%f4,%f28
2312 ldd [%i3+208],%f22
2313 faddd %f16,%f18,%f16
2314 fmuld %f30,%f0,%f30
2316 std %f16,[%i3+176]
2317 fxtod %f12,%f12
2318 fmuld %f32,%f4,%f32
2320 ldd [%i4+48],%f18
2321 faddd %f24,%f26,%f24
2322 fmuld %f34,%f0,%f34
2324 ldd [%i3+240],%f26
2325 faddd %f20,%f22,%f20
2327 std %f20,[%i3+208]
2328 faddd %f28,%f30,%f28
2329 fmuld %f54,%f4,%f16
2331 ldd [%i3+32],%f30
2332 fmuld %f12,%f2,%f14
2334 ldd [%i4+64],%f22
2335 faddd %f32,%f34,%f32
2336 fmuld %f18,%f0,%f18
2338 ldd [%i3+64],%f34
2339 faddd %f24,%f26,%f24
2341 std %f24,[%i3+240]
2342 faddd %f28,%f30,%f28
2343 fmuld %f56,%f4,%f20
2345 std %f28,[%i3+32]
2346 fmuld %f14,%f8,%f12
2348 ldd [%i1+80],%f24
2349 faddd %f32,%f34,%f34 ! yes, tmp52!
2350 fmuld %f22,%f0,%f22
2352 ldd [%i4+80],%f26
2353 faddd %f16,%f18,%f16
2355 ldd [%i1+96],%f28
2356 fmuld %f58,%f4,%f32
2358 ldd [%i4+96],%f30
2359 fdtox %f12,%f12
2360 fmuld %f24,%f4,%f24
2362 std %f34,[%i3+64] ! yes, tmp52!
2363 faddd %f20,%f22,%f20
2364 fmuld %f26,%f0,%f26
2366 ldd [%i3+96],%f18
2367 fmuld %f28,%f4,%f28
2369 ldd [%i3+128],%f22
2370 fmovd %f38,%f4
2371 fmuld %f30,%f0,%f30
2373 fxtod %f12,%f12
2374 fmuld %f60,%f0,%f34
2376 add %i3,8,%i3
2377 faddd %f24,%f26,%f24
2379 ldd [%i3+160-8],%f26
2380 faddd %f16,%f18,%f16
2382 std %f16,[%i3+96-8]
2383 faddd %f28,%f30,%f28
2385 ldd [%i3+192-8],%f30
2386 faddd %f32,%f34,%f32
2387 fmuld %f12,%f10,%f12
2389 ldd [%i3+224-8],%f34
2390 faddd %f20,%f22,%f20
2392 std %f20,[%i3+128-8]
2393 faddd %f24,%f26,%f24
2395 add %l1,1,%l1
2396 std %f24,[%i3+160-8]
2397 faddd %f28,%f30,%f28
2399 cmp %l1,31
2400 std %f28,[%i3+192-8]
2401 fsubd %f14,%f12,%f0
2403 faddd %f32,%f34,%f32
2404 ble,pt %icc,.L99999998
2405 std %f32,[%i3+224-8]
2407 std %f6,[%i3]
2409 add %o5,%g0,%i3
2412 !END HAND CODED PART
2413 .L900000828:
2414 /* 0x03e4 405 */ ba .L900000852
2415 /* 0x03e8 409 */ ldx [%i3+%o0],%l1
2417 ! 406 ! }
2418 ! 407 ! }
2419 ! 409 ! conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
2420 ! 411 !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/
2421 ! 413 ! adjust_montf_result(result, nint, nlen);
2423 .L77000476:
2424 /* 0x03ec 413 */ sll %g1,2,%l3
2425 /* 0x03f0 0 */ sethi %hi(TwoTo16),%g5
2426 /* 0x03f4 413 */ add %l3,2,%l2
2427 /* 0x03f8 328 */ cmp %l2,0
2428 /* 0x03fc */ ble,pn %icc,.L77000482
2429 /* 0x0400 0 */ sethi %hi(TwoToMinus16),%o2
2430 .L77000514:
2431 /* 0x0404 329 */ add %l3,2,%l2
2432 /* 0x0408 328 */ add %l3,1,%o4
2433 /* 0x040c */ or %g0,0,%l3
2434 /* 0x0410 329 */ cmp %l2,8
2435 /* 0x0414 */ bl,pn %icc,.L77000477
2436 /* 0x0418 328 */ or %g0,%i3,%l1
2437 .L900000831:
2438 /* 0x041c 329 */ prefetch [%i3],22
2439 /* 0x0420 */ sub %o4,7,%l4
2440 /* 0x0424 */ or %g0,0,%l3
2441 /* 0x0428 */ or %g0,%i3,%l1
2442 .L900000829:
2443 /* 0x042c 329 */ prefetch [%l1+528],22
2444 /* 0x0430 */ std %f0,[%l1]
2445 /* 0x0434 */ add %l3,8,%l3
2446 /* 0x0438 */ add %l1,64,%l1
2447 /* 0x043c */ std %f0,[%l1-56]
2448 /* 0x0440 */ cmp %l3,%l4
2449 /* 0x0444 */ std %f0,[%l1-48]
2450 /* 0x0448 */ std %f0,[%l1-40]
2451 /* 0x044c */ prefetch [%l1+496],22
2452 /* 0x0450 */ std %f0,[%l1-32]
2453 /* 0x0454 */ std %f0,[%l1-24]
2454 /* 0x0458 */ std %f0,[%l1-16]
2455 /* 0x045c */ ble,pt %icc,.L900000829
2456 /* 0x0460 */ std %f0,[%l1-8]
2457 .L900000832:
2458 /* 0x0464 329 */ cmp %l3,%o4
2459 /* 0x0468 */ bg,pn %icc,.L77000482
2460 /* 0x046c */ nop
2461 .L77000477:
2462 /* 0x0470 329 */ add %l3,1,%l3
2463 .L900000851:
2464 /* 0x0474 329 */ std %f0,[%l1]
2465 /* 0x0478 */ cmp %l3,%o4
2466 /* 0x047c */ add %l1,8,%l1
2467 /* 0x0480 */ ble,pt %icc,.L900000851
2468 /* 0x0484 */ add %l3,1,%l3
2469 .L77000482:
2470 /* 0x0488 330 */ ldd [%i1],%f40
2471 /* 0x048c 334 */ cmp %o3,0
2472 /* 0x0490 */ sub %g1,1,%l3
2473 /* 0x0494 330 */ ldd [%l0],%f42
2474 /* 0x0498 331 */ ldd [%o2+%lo(TwoToMinus16)],%f36
2475 /* 0x049c */ ldd [%g5+%lo(TwoTo16)],%f38
2476 /* 0x04a0 330 */ fmuld %f40,%f42,%f52
2477 /* 0x04a4 331 */ fdtox %f52,%f8
2478 /* 0x04a8 */ fmovs %f0,%f8
2479 /* 0x04ac */ fxtod %f8,%f62
2480 /* 0x04b0 */ fmuld %f62,%f14,%f60
2481 /* 0x04b4 */ fmuld %f60,%f36,%f32
2482 /* 0x04b8 */ fdtox %f32,%f50
2483 /* 0x04bc */ fxtod %f50,%f34
2484 /* 0x04c0 */ fmuld %f34,%f38,%f46
2485 /* 0x04c4 */ fsubd %f60,%f46,%f40
2486 /* 0x04c8 334 */ ble,pn %icc,.L77000378
2487 /* 0x04cc 330 */ std %f52,[%i3]
2488 .L77000509:
2489 /* 0x04d0 345 */ add %o3,1,%g5
2490 /* 0x04d4 */ sll %g5,1,%o2
2491 /* 0x04d8 */ or %g0,0,%l1
2492 /* 0x04dc 337 */ ldd [%i4],%f42
2493 /* 0x04e0 345 */ sub %o3,1,%o3
2494 /* 0x04e4 */ or %g0,0,%o5
2495 /* 0x04e8 */ or %g0,%i3,%l2
2496 /* 0x04ec */ add %i4,8,%o1
2497 /* 0x04f0 */ add %i1,8,%g5
2498 .L900000848:
2499 /* 0x04f4 337 */ fmuld %f40,%f42,%f34
2500 /* 0x04f8 */ ldd [%l0+8],%f32
2501 /* 0x04fc 341 */ cmp %g1,1
2502 /* 0x0500 337 */ ldd [%i1],%f50
2503 /* 0x0504 */ ldd [%l2],%f46
2504 /* 0x0508 */ ldd [%l2+8],%f44
2505 /* 0x050c */ fmuld %f50,%f32,%f60
2506 /* 0x0510 335 */ ldd [%l0],%f42
2507 /* 0x0514 337 */ faddd %f46,%f34,%f48
2508 /* 0x0518 */ faddd %f44,%f60,%f58
2509 /* 0x051c */ fmuld %f36,%f48,%f54
2510 /* 0x0520 */ faddd %f58,%f54,%f34
2511 /* 0x0524 341 */ ble,pn %icc,.L77000368
2512 /* 0x0528 338 */ std %f34,[%l2+8]
2513 .L77000507:
2514 /* 0x052c 341 */ or %g0,1,%l5
2515 /* 0x0530 */ or %g0,2,%l4
2516 /* 0x0534 */ or %g0,%g5,%g4
2517 /* 0x0538 342 */ cmp %l3,12
2518 /* 0x053c */ bl,pn %icc,.L77000481
2519 /* 0x0540 341 */ or %g0,%o1,%g3
2520 .L900000839:
2521 /* 0x0544 342 */ prefetch [%i1+8],0
2522 /* 0x0548 */ prefetch [%i1+72],0
2523 /* 0x054c */ add %i4,40,%l6
2524 /* 0x0550 */ add %i1,40,%l7
2525 /* 0x0554 */ prefetch [%l2+16],0
2526 /* 0x0558 */ or %g0,%l2,%o7
2527 /* 0x055c */ sub %l3,7,%i5
2528 /* 0x0560 */ prefetch [%l2+80],0
2529 /* 0x0564 */ add %l2,80,%g2
2530 /* 0x0568 */ or %g0,2,%l4
2531 /* 0x056c */ prefetch [%i1+136],0
2532 /* 0x0570 */ or %g0,5,%l5
2533 /* 0x0574 */ prefetch [%i1+200],0
2534 /* 0x0578 */ prefetch [%l2+144],0
2535 /* 0x057c */ ldd [%i4+8],%f52
2536 /* 0x0580 */ ldd [%i4+16],%f44
2537 /* 0x0584 */ ldd [%i4+24],%f56
2538 /* 0x0588 */ fmuld %f40,%f52,%f48
2539 /* 0x058c */ fmuld %f40,%f44,%f46
2540 /* 0x0590 */ fmuld %f40,%f56,%f44
2541 /* 0x0594 */ ldd [%l2+48],%f56
2542 /* 0x0598 */ prefetch [%l2+208],0
2543 /* 0x059c */ prefetch [%l2+272],0
2544 /* 0x05a0 */ prefetch [%l2+336],0
2545 /* 0x05a4 */ prefetch [%l2+400],0
2546 /* 0x05a8 */ ldd [%i1+8],%f32
2547 /* 0x05ac */ ldd [%i1+16],%f60
2548 /* 0x05b0 */ ldd [%i1+24],%f50
2549 /* 0x05b4 */ fmuld %f42,%f32,%f62
2550 /* 0x05b8 */ ldd [%i1+32],%f32
2551 /* 0x05bc */ fmuld %f42,%f60,%f58
2552 /* 0x05c0 */ ldd [%l2+16],%f52
2553 /* 0x05c4 */ ldd [%l2+32],%f54
2554 /* 0x05c8 */ faddd %f62,%f48,%f60
2555 /* 0x05cc */ fmuld %f42,%f50,%f48
2556 /* 0x05d0 */ faddd %f58,%f46,%f62
2557 /* 0x05d4 */ ldd [%i4+32],%f46
2558 /* 0x05d8 */ ldd [%l2+64],%f58
2559 .L900000837:
2560 /* 0x05dc 342 */ prefetch [%l7+192],0
2561 /* 0x05e0 */ fmuld %f40,%f46,%f46
2562 /* 0x05e4 */ faddd %f60,%f52,%f60
2563 /* 0x05e8 */ ldd [%l6],%f52
2564 /* 0x05ec */ std %f60,[%g2-64]
2565 /* 0x05f0 */ fmuld %f42,%f32,%f50
2566 /* 0x05f4 */ add %l5,8,%l5
2567 /* 0x05f8 */ ldd [%l7],%f60
2568 /* 0x05fc */ faddd %f48,%f44,%f48
2569 /* 0x0600 */ cmp %l5,%i5
2570 /* 0x0604 */ ldd [%g2],%f32
2571 /* 0x0608 */ add %g2,128,%g2
2572 /* 0x060c */ prefetch [%g2+256],0
2573 /* 0x0610 */ fmuld %f40,%f52,%f52
2574 /* 0x0614 */ faddd %f62,%f54,%f44
2575 /* 0x0618 */ ldd [%l6+8],%f54
2576 /* 0x061c */ std %f44,[%g2-176]
2577 /* 0x0620 */ fmuld %f42,%f60,%f44
2578 /* 0x0624 */ add %l6,64,%l6
2579 /* 0x0628 */ ldd [%l7+8],%f60
2580 /* 0x062c */ faddd %f50,%f46,%f50
2581 /* 0x0630 */ add %l7,64,%l7
2582 /* 0x0634 */ add %l4,16,%l4
2583 /* 0x0638 */ ldd [%g2-112],%f46
2584 /* 0x063c */ fmuld %f40,%f54,%f54
2585 /* 0x0640 */ faddd %f48,%f56,%f62
2586 /* 0x0644 */ ldd [%l6-48],%f56
2587 /* 0x0648 */ std %f62,[%g2-160]
2588 /* 0x064c */ fmuld %f42,%f60,%f48
2589 /* 0x0650 */ ldd [%l7-48],%f60
2590 /* 0x0654 */ faddd %f44,%f52,%f52
2591 /* 0x0658 */ ldd [%g2-96],%f30
2592 /* 0x065c */ prefetch [%g2+288],0
2593 /* 0x0660 */ fmuld %f40,%f56,%f56
2594 /* 0x0664 */ faddd %f50,%f58,%f62
2595 /* 0x0668 */ ldd [%l6-40],%f58
2596 /* 0x066c */ std %f62,[%g2-144]
2597 /* 0x0670 */ fmuld %f42,%f60,%f50
2598 /* 0x0674 */ ldd [%l7-40],%f62
2599 /* 0x0678 */ faddd %f48,%f54,%f54
2600 /* 0x067c */ ldd [%g2-80],%f28
2601 /* 0x0680 */ prefetch [%l7+160],0
2602 /* 0x0684 */ fmuld %f40,%f58,%f48
2603 /* 0x0688 */ faddd %f52,%f32,%f44
2604 /* 0x068c */ ldd [%l6-32],%f58
2605 /* 0x0690 */ std %f44,[%g2-128]
2606 /* 0x0694 */ fmuld %f42,%f62,%f44
2607 /* 0x0698 */ ldd [%l7-32],%f60
2608 /* 0x069c */ faddd %f50,%f56,%f56
2609 /* 0x06a0 */ ldd [%g2-64],%f52
2610 /* 0x06a4 */ prefetch [%g2+320],0
2611 /* 0x06a8 */ fmuld %f40,%f58,%f50
2612 /* 0x06ac */ faddd %f54,%f46,%f32
2613 /* 0x06b0 */ ldd [%l6-24],%f62
2614 /* 0x06b4 */ std %f32,[%g2-112]
2615 /* 0x06b8 */ fmuld %f42,%f60,%f46
2616 /* 0x06bc */ ldd [%l7-24],%f60
2617 /* 0x06c0 */ faddd %f44,%f48,%f48
2618 /* 0x06c4 */ ldd [%g2-48],%f54
2619 /* 0x06c8 */ fmuld %f40,%f62,%f26
2620 /* 0x06cc */ faddd %f56,%f30,%f32
2621 /* 0x06d0 */ ldd [%l6-16],%f58
2622 /* 0x06d4 */ std %f32,[%g2-96]
2623 /* 0x06d8 */ fmuld %f42,%f60,%f30
2624 /* 0x06dc */ ldd [%l7-16],%f32
2625 /* 0x06e0 */ faddd %f46,%f50,%f60
2626 /* 0x06e4 */ ldd [%g2-32],%f56
2627 /* 0x06e8 */ prefetch [%g2+352],0
2628 /* 0x06ec */ fmuld %f40,%f58,%f44
2629 /* 0x06f0 */ faddd %f48,%f28,%f62
2630 /* 0x06f4 */ ldd [%l6-8],%f46
2631 /* 0x06f8 */ std %f62,[%g2-80]
2632 /* 0x06fc */ fmuld %f42,%f32,%f48
2633 /* 0x0700 */ ldd [%l7-8],%f32
2634 /* 0x0704 */ faddd %f30,%f26,%f62
2635 /* 0x0708 */ ble,pt %icc,.L900000837
2636 /* 0x070c */ ldd [%g2-16],%f58
2637 .L900000840:
2638 /* 0x0710 342 */ fmuld %f40,%f46,%f46
2639 /* 0x0714 */ faddd %f62,%f54,%f62
2640 /* 0x0718 */ std %f62,[%g2-48]
2641 /* 0x071c */ cmp %l5,%l3
2642 /* 0x0720 */ fmuld %f42,%f32,%f50
2643 /* 0x0724 */ faddd %f48,%f44,%f48
2644 /* 0x0728 */ or %g0,%l7,%g4
2645 /* 0x072c */ or %g0,%l6,%g3
2646 /* 0x0730 */ faddd %f60,%f52,%f60
2647 /* 0x0734 */ std %f60,[%g2-64]
2648 /* 0x0738 */ or %g0,%o7,%l2
2649 /* 0x073c */ add %l4,8,%l4
2650 /* 0x0740 */ faddd %f50,%f46,%f54
2651 /* 0x0744 */ faddd %f48,%f56,%f56
2652 /* 0x0748 */ std %f56,[%g2-32]
2653 /* 0x074c */ faddd %f54,%f58,%f58
2654 /* 0x0750 */ bg,pn %icc,.L77000368
2655 /* 0x0754 */ std %f58,[%g2-16]
2656 .L77000481:
2657 /* 0x0758 342 */ ldd [%g4],%f44
2658 .L900000850:
2659 /* 0x075c 342 */ ldd [%g3],%f48
2660 /* 0x0760 */ fmuld %f42,%f44,%f58
2661 /* 0x0764 */ sra %l4,0,%l7
2662 /* 0x0768 */ add %l5,1,%l5
2663 /* 0x076c */ sllx %l7,3,%g2
2664 /* 0x0770 */ add %g4,8,%g4
2665 /* 0x0774 */ ldd [%l2+%g2],%f56
2666 /* 0x0778 */ cmp %l5,%l3
2667 /* 0x077c */ add %l4,2,%l4
2668 /* 0x0780 */ fmuld %f40,%f48,%f54
2669 /* 0x0784 */ add %g3,8,%g3
2670 /* 0x0788 */ faddd %f58,%f54,%f52
2671 /* 0x078c */ faddd %f52,%f56,%f62
2672 /* 0x0790 */ std %f62,[%l2+%g2]
2673 /* 0x0794 */ ble,a,pt %icc,.L900000850
2674 /* 0x0798 */ ldd [%g4],%f44
2675 .L77000368:
2676 /* 0x079c 344 */ cmp %o5,15
2677 /* 0x07a0 */ bne,pn %icc,.L77000483
2678 /* 0x07a4 345 */ srl %l1,31,%g4
2679 .L77000478:
2680 /* 0x07a8 345 */ add %l1,%g4,%l4
2681 /* 0x07ac */ sra %l4,1,%o7
2682 /* 0x07b0 */ add %o7,1,%o4
2683 /* 0x07b4 */ sll %o4,1,%l6
2684 /* 0x07b8 */ cmp %l6,%o2
2685 /* 0x07bc */ bge,pn %icc,.L77000392
2686 /* 0x07c0 */ fmovd %f0,%f42
2687 .L77000508:
2688 /* 0x07c4 345 */ sra %l6,0,%l4
2689 /* 0x07c8 */ sllx %l4,3,%g2
2690 /* 0x07cc */ fmovd %f0,%f32
2691 /* 0x07d0 */ sub %o2,1,%l5
2692 /* 0x07d4 */ ldd [%g2+%i3],%f40
2693 /* 0x07d8 */ add %g2,%i3,%g3
2694 .L900000849:
2695 /* 0x07dc 345 */ fdtox %f40,%f10
2696 /* 0x07e0 */ ldd [%g3+8],%f52
2697 /* 0x07e4 */ add %l6,2,%l6
2698 /* 0x07e8 */ cmp %l6,%l5
2699 /* 0x07ec */ fdtox %f52,%f2
2700 /* 0x07f0 */ fmovd %f10,%f30
2701 /* 0x07f4 */ fmovs %f0,%f10
2702 /* 0x07f8 */ fmovs %f0,%f2
2703 /* 0x07fc */ fxtod %f10,%f10
2704 /* 0x0800 */ fxtod %f2,%f2
2705 /* 0x0804 */ fdtox %f52,%f28
2706 /* 0x0808 */ faddd %f10,%f32,%f56
2707 /* 0x080c */ std %f56,[%g3]
2708 /* 0x0810 */ faddd %f2,%f42,%f62
2709 /* 0x0814 */ std %f62,[%g3+8]
2710 /* 0x0818 */ fitod %f30,%f32
2711 /* 0x081c */ add %g3,16,%g3
2712 /* 0x0820 */ fitod %f28,%f42
2713 /* 0x0824 */ ble,a,pt %icc,.L900000849
2714 /* 0x0828 */ ldd [%g3],%f40
2715 .L77000392:
2716 /* 0x082c 346 */ or %g0,0,%o5
2717 .L77000483:
2718 /* 0x0830 350 */ fdtox %f34,%f6
2719 /* 0x0834 */ add %l1,1,%l1
2720 /* 0x0838 */ cmp %l1,%o3
2721 /* 0x083c */ add %o5,1,%o5
2722 /* 0x0840 */ add %l2,8,%l2
2723 /* 0x0844 */ add %l0,8,%l0
2724 /* 0x0848 */ fmovs %f0,%f6
2725 /* 0x084c */ fxtod %f6,%f46
2726 /* 0x0850 */ fmuld %f46,%f14,%f56
2727 /* 0x0854 */ fmuld %f56,%f36,%f44
2728 /* 0x0858 */ fdtox %f44,%f48
2729 /* 0x085c */ fxtod %f48,%f58
2730 /* 0x0860 */ fmuld %f58,%f38,%f54
2731 /* 0x0864 */ fsubd %f56,%f54,%f40
2732 /* 0x0868 */ ble,a,pt %icc,.L900000848
2733 /* 0x086c 337 */ ldd [%i4],%f42
2734 .L77000378:
2735 /* 0x0870 409 */ ldx [%i3+%o0],%l1
2736 .L900000852:
2737 /* 0x0874 409 */ add %i3,%o0,%l4
2738 /* 0x0878 */ ldx [%l4+8],%i1
2739 /* 0x087c */ cmp %l1,0
2740 /* 0x0880 */ bne,pn %xcc,.L77000403
2741 /* 0x0884 */ or %g0,0,%g5
2742 .L77000402:
2743 /* 0x0888 409 */ or %g0,0,%i3
2744 /* 0x088c */ ba .L900000847
2745 /* 0x0890 */ cmp %i1,0
2746 .L77000403:
2747 /* 0x0894 409 */ srlx %l1,52,%o5
2748 /* 0x0898 */ sethi %hi(0xfff00000),%i3
2749 /* 0x089c */ sllx %i3,32,%o2
2750 /* 0x08a0 */ sethi %hi(0x40000000),%o0
2751 /* 0x08a4 */ sllx %o0,22,%o4
2752 /* 0x08a8 */ or %g0,1023,%l0
2753 /* 0x08ac */ xor %o2,-1,%o3
2754 /* 0x08b0 */ sub %l0,%o5,%o7
2755 /* 0x08b4 */ and %l1,%o3,%l1
2756 /* 0x08b8 */ add %o7,52,%i4
2757 /* 0x08bc */ or %l1,%o4,%o1
2758 /* 0x08c0 */ cmp %i1,0
2759 /* 0x08c4 */ srlx %o1,%i4,%i3
2760 .L900000847:
2761 /* 0x08c8 409 */ bne,pn %xcc,.L77000409
2762 /* 0x08cc */ or %g0,0,%o7
2763 .L77000408:
2764 /* 0x08d0 409 */ ba .L900000846
2765 /* 0x08d4 350 */ cmp %g1,0
2766 .L77000409:
2767 /* 0x08d8 409 */ srlx %i1,52,%l2
2768 /* 0x08dc */ sethi %hi(0xfff00000),%o7
2769 /* 0x08e0 */ sllx %o7,32,%i4
2770 /* 0x08e4 */ sethi %hi(0x40000000),%i5
2771 /* 0x08e8 */ sllx %i5,22,%l6
2772 /* 0x08ec */ or %g0,1023,%l5
2773 /* 0x08f0 */ xor %i4,-1,%o1
2774 /* 0x08f4 */ sub %l5,%l2,%g2
2775 /* 0x08f8 */ and %i1,%o1,%l7
2776 /* 0x08fc */ add %g2,52,%g3
2777 /* 0x0900 */ or %l7,%l6,%g4
2778 /* 0x0904 350 */ cmp %g1,0
2779 /* 0x0908 409 */ srlx %g4,%g3,%o7
2780 .L900000846:
2781 /* 0x090c 350 */ ble,pn %icc,.L77000397
2782 /* 0x0910 */ or %g0,0,%l5
2783 .L77000510:
2784 /* 0x0914 409 */ sethi %hi(0xfff00000),%g4
2785 /* 0x0918 */ sllx %g4,32,%o0
2786 /* 0x091c 0 */ or %g0,-1,%i5
2787 /* 0x0920 409 */ srl %i5,0,%l7
2788 /* 0x0924 */ sethi %hi(0x40000000),%i1
2789 /* 0x0928 */ sllx %i1,22,%l6
2790 /* 0x092c */ sethi %hi(0xfc00),%i4
2791 /* 0x0930 */ xor %o0,-1,%g2
2792 /* 0x0934 */ add %i4,1023,%l2
2793 /* 0x0938 */ or %g0,2,%g4
2794 /* 0x093c */ or %g0,%i2,%g3
2795 .L77000395:
2796 /* 0x0940 409 */ sra %g4,0,%o2
2797 /* 0x0944 */ add %g4,1,%o3
2798 /* 0x0948 */ sllx %o2,3,%o0
2799 /* 0x094c */ sra %o3,0,%o5
2800 /* 0x0950 */ ldx [%l4+%o0],%o4
2801 /* 0x0954 */ sllx %o5,3,%l0
2802 /* 0x0958 */ and %i3,%l7,%o1
2803 /* 0x095c */ ldx [%l4+%l0],%i4
2804 /* 0x0960 */ cmp %o4,0
2805 /* 0x0964 */ bne,pn %xcc,.L77000415
2806 /* 0x0968 350 */ and %o7,%l2,%i5
2807 .L77000414:
2808 /* 0x096c 409 */ or %g0,0,%l1
2809 /* 0x0970 */ ba .L900000845
2810 /* 0x0974 */ add %g5,%o1,%i1
2811 .L77000415:
2812 /* 0x0978 409 */ srlx %o4,52,%o3
2813 /* 0x097c */ and %o4,%g2,%l1
2814 /* 0x0980 */ or %g0,52,%o0
2815 /* 0x0984 */ sub %o3,1023,%l0
2816 /* 0x0988 */ or %l1,%l6,%o4
2817 /* 0x098c */ sub %o0,%l0,%o5
2818 /* 0x0990 */ srlx %o4,%o5,%l1
2819 /* 0x0994 */ add %g5,%o1,%i1
2820 .L900000845:
2821 /* 0x0998 409 */ srax %i3,32,%g5
2822 /* 0x099c */ cmp %i4,0
2823 /* 0x09a0 */ bne,pn %xcc,.L77000421
2824 /* 0x09a4 350 */ sllx %i5,16,%o2
2825 .L77000420:
2826 /* 0x09a8 409 */ or %g0,0,%o4
2827 /* 0x09ac */ ba .L900000844
2828 /* 0x09b0 350 */ add %i1,%o2,%o5
2829 .L77000421:
2830 /* 0x09b4 409 */ srlx %i4,52,%o4
2831 /* 0x09b8 */ or %g0,52,%o0
2832 /* 0x09bc */ sub %o4,1023,%o3
2833 /* 0x09c0 */ and %i4,%g2,%i3
2834 /* 0x09c4 */ or %i3,%l6,%o5
2835 /* 0x09c8 */ sub %o0,%o3,%l0
2836 /* 0x09cc */ srlx %o5,%l0,%o4
2837 /* 0x09d0 350 */ add %i1,%o2,%o5
2838 .L900000844:
2839 /* 0x09d4 350 */ srax %o7,16,%i4
2840 /* 0x09d8 */ srax %o5,32,%i5
2841 /* 0x09dc */ add %i4,%i5,%o1
2842 /* 0x09e0 */ add %l5,1,%l5
2843 /* 0x09e4 */ and %o5,%l7,%i1
2844 /* 0x09e8 */ add %g5,%o1,%g5
2845 /* 0x09ec */ st %i1,[%g3]
2846 /* 0x09f0 */ or %g0,%l1,%i3
2847 /* 0x09f4 */ or %g0,%o4,%o7
2848 /* 0x09f8 */ add %g4,2,%g4
2849 /* 0x09fc */ cmp %l5,%l3
2850 /* 0x0a00 */ ble,pt %icc,.L77000395
2851 /* 0x0a04 */ add %g3,4,%g3
2852 .L77000397:
2853 /* 0x0a08 409 */ sethi %hi(0xfc00),%l4
2854 /* 0x0a0c */ sra %l5,0,%i5
2855 /* 0x0a10 */ add %l4,1023,%i1
2856 /* 0x0a14 */ add %g5,%i3,%l5
2857 /* 0x0a18 */ and %o7,%i1,%g5
2858 /* 0x0a1c */ sllx %g5,16,%l2
2859 /* 0x0a20 */ sllx %i5,2,%l7
2860 /* 0x0a24 413 */ sra %g1,0,%g2
2861 /* 0x0a28 409 */ add %l5,%l2,%l6
2862 /* 0x0a2c */ st %l6,[%i2+%l7]
2863 /* 0x0a30 413 */ sllx %g2,2,%g3
2864 /* 0x0a34 */ ld [%i2+%g3],%g4
2865 /* 0x0a38 */ cmp %g4,0
2866 /* 0x0a3c */ bgu,pn %icc,.L77000486
2867 /* 0x0a40 */ cmp %l3,0
2868 .L77000427:
2869 /* 0x0a44 413 */ bl,pn %icc,.L77000486
2870 /* 0x0a48 */ or %g0,%l3,%i5
2871 .L77000512:
2872 /* 0x0a4c 413 */ sra %l3,0,%o5
2873 /* 0x0a50 */ sllx %o5,2,%l7
2874 /* 0x0a54 */ ld [%l7+%i0],%o5
2875 /* 0x0a58 */ add %l7,%i2,%o1
2876 /* 0x0a5c */ add %l7,%i0,%i4
2877 .L900000843:
2878 /* 0x0a60 413 */ ld [%o1],%i1
2879 /* 0x0a64 */ cmp %i1,%o5
2880 /* 0x0a68 */ bne,pn %icc,.L77000435
2881 /* 0x0a6c */ sub %o1,4,%o1
2882 .L77000431:
2883 /* 0x0a70 413 */ sub %i4,4,%i4
2884 /* 0x0a74 */ subcc %i5,1,%i5
2885 /* 0x0a78 */ bpos,a,pt %icc,.L900000843
2886 /* 0x0a7c */ ld [%i4],%o5
2887 .L900000827:
2888 /* 0x0a80 413 */ ba .L900000842
2889 /* 0x0a84 350 */ cmp %g1,0
2890 .L77000435:
2891 /* 0x0a88 413 */ sra %i5,0,%o0
2892 /* 0x0a8c */ sllx %o0,2,%l1
2893 /* 0x0a90 */ ld [%i0+%l1],%i3
2894 /* 0x0a94 */ ld [%i2+%l1],%l0
2895 /* 0x0a98 */ cmp %l0,%i3
2896 /* 0x0a9c */ bleu,pt %icc,.L77000379
2897 /* 0x0aa0 */ nop
2898 .L77000486:
2899 /* 0x0aa4 350 */ cmp %g1,0
2900 .L900000842:
2901 /* 0x0aa8 350 */ ble,pn %icc,.L77000379
2902 /* 0x0aac */ add %l3,1,%g3
2903 .L77000511:
2904 /* 0x0ab0 350 */ or %g0,0,%l5
2905 /* 0x0ab4 */ cmp %g3,10
2906 /* 0x0ab8 */ bl,pn %icc,.L77000487
2907 /* 0x0abc */ or %g0,0,%g1
2908 .L900000835:
2909 /* 0x0ac0 350 */ prefetch [%i2],22
2910 /* 0x0ac4 */ add %i0,4,%l2
2911 /* 0x0ac8 */ prefetch [%i2+64],22
2912 /* 0x0acc */ add %i2,8,%o5
2913 /* 0x0ad0 */ sub %l3,7,%i0
2914 /* 0x0ad4 */ prefetch [%i2+128],22
2915 /* 0x0ad8 */ or %g0,2,%l5
2916 /* 0x0adc */ prefetch [%i2+192],22
2917 /* 0x0ae0 */ prefetch [%i2+256],22
2918 /* 0x0ae4 */ prefetch [%i2+320],22
2919 /* 0x0ae8 */ prefetch [%i2+384],22
2920 /* 0x0aec */ ld [%l2-4],%l7
2921 /* 0x0af0 */ ld [%o5-4],%l6
2922 /* 0x0af4 */ prefetch [%o5+440],22
2923 /* 0x0af8 */ prefetch [%o5+504],22
2924 /* 0x0afc */ ld [%i2],%i2
2925 /* 0x0b00 */ sub %i2,%l7,%g3
2926 /* 0x0b04 */ st %g3,[%o5-8]
2927 /* 0x0b08 */ srax %g3,32,%l7
2928 .L900000833:
2929 /* 0x0b0c 350 */ add %l5,8,%l5
2930 /* 0x0b10 */ add %o5,32,%o5
2931 /* 0x0b14 */ ld [%l2],%i5
2932 /* 0x0b18 */ prefetch [%o5+496],22
2933 /* 0x0b1c */ cmp %l5,%i0
2934 /* 0x0b20 */ add %l2,32,%l2
2935 /* 0x0b24 */ sub %l6,%i5,%g5
2936 /* 0x0b28 */ add %g5,%l7,%o0
2937 /* 0x0b2c */ ld [%o5-32],%l4
2938 /* 0x0b30 */ st %o0,[%o5-36]
2939 /* 0x0b34 */ srax %o0,32,%i3
2940 /* 0x0b38 */ ld [%l2-28],%i1
2941 /* 0x0b3c */ sub %l4,%i1,%i4
2942 /* 0x0b40 */ add %i4,%i3,%o1
2943 /* 0x0b44 */ ld [%o5-28],%o3
2944 /* 0x0b48 */ st %o1,[%o5-32]
2945 /* 0x0b4c */ srax %o1,32,%l1
2946 /* 0x0b50 */ ld [%l2-24],%o2
2947 /* 0x0b54 */ sub %o3,%o2,%g2
2948 /* 0x0b58 */ add %g2,%l1,%o7
2949 /* 0x0b5c */ ld [%o5-24],%l0
2950 /* 0x0b60 */ st %o7,[%o5-28]
2951 /* 0x0b64 */ srax %o7,32,%l6
2952 /* 0x0b68 */ ld [%l2-20],%o4
2953 /* 0x0b6c */ sub %l0,%o4,%g1
2954 /* 0x0b70 */ add %g1,%l6,%l7
2955 /* 0x0b74 */ ld [%o5-20],%i2
2956 /* 0x0b78 */ st %l7,[%o5-24]
2957 /* 0x0b7c */ srax %l7,32,%g4
2958 /* 0x0b80 */ ld [%l2-16],%g3
2959 /* 0x0b84 */ sub %i2,%g3,%i5
2960 /* 0x0b88 */ add %i5,%g4,%g5
2961 /* 0x0b8c */ ld [%o5-16],%i1
2962 /* 0x0b90 */ st %g5,[%o5-20]
2963 /* 0x0b94 */ srax %g5,32,%l4
2964 /* 0x0b98 */ ld [%l2-12],%o0
2965 /* 0x0b9c */ sub %i1,%o0,%i3
2966 /* 0x0ba0 */ add %i3,%l4,%i4
2967 /* 0x0ba4 */ ld [%o5-12],%o2
2968 /* 0x0ba8 */ st %i4,[%o5-16]
2969 /* 0x0bac */ srax %i4,32,%o3
2970 /* 0x0bb0 */ ld [%l2-8],%o1
2971 /* 0x0bb4 */ sub %o2,%o1,%l1
2972 /* 0x0bb8 */ add %l1,%o3,%g2
2973 /* 0x0bbc */ ld [%o5-8],%o4
2974 /* 0x0bc0 */ st %g2,[%o5-12]
2975 /* 0x0bc4 */ srax %g2,32,%l0
2976 /* 0x0bc8 */ ld [%l2-4],%o7
2977 /* 0x0bcc */ sub %o4,%o7,%l6
2978 /* 0x0bd0 */ add %l6,%l0,%g1
2979 /* 0x0bd4 */ ld [%o5-4],%l6
2980 /* 0x0bd8 */ st %g1,[%o5-8]
2981 /* 0x0bdc */ ble,pt %icc,.L900000833
2982 /* 0x0be0 */ srax %g1,32,%l7
2983 .L900000836:
2984 /* 0x0be4 350 */ ld [%l2],%l0
2985 /* 0x0be8 */ add %l2,4,%i0
2986 /* 0x0bec */ or %g0,%o5,%i2
2987 /* 0x0bf0 */ cmp %l5,%l3
2988 /* 0x0bf4 */ sub %l6,%l0,%l6
2989 /* 0x0bf8 */ add %l6,%l7,%g1
2990 /* 0x0bfc */ st %g1,[%o5-4]
2991 /* 0x0c00 */ bg,pn %icc,.L77000379
2992 /* 0x0c04 */ srax %g1,32,%g1
2993 .L77000487:
2994 /* 0x0c08 350 */ ld [%i2],%o4
2995 .L900000841:
2996 /* 0x0c0c 350 */ ld [%i0],%i3
2997 /* 0x0c10 */ add %g1,%o4,%l0
2998 /* 0x0c14 */ add %l5,1,%l5
2999 /* 0x0c18 */ cmp %l5,%l3
3000 /* 0x0c1c */ add %i0,4,%i0
3001 /* 0x0c20 */ sub %l0,%i3,%l6
3002 /* 0x0c24 */ st %l6,[%i2]
3003 /* 0x0c28 */ srax %l6,32,%g1
3004 /* 0x0c2c */ add %i2,4,%i2
3005 /* 0x0c30 */ ble,a,pt %icc,.L900000841
3006 /* 0x0c34 */ ld [%i2],%o4
3007 .L77000379:
3008 /* 0x0c38 405 */ ret ! Result =
3009 /* 0x0c3c */ restore %g0,%g0,%g0
3010 /* 0x0c40 0 */ .type mont_mulf_noconv,2
3011 /* 0x0c40 0 */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
3013 ! Begin Disassembling Debug Info
3014 .xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0
3015 .xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o mont_mulf.c",52,0,0,0
3017 ! End Disassembling Debug Info
3019 ! Begin Disassembling Ident
3020 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
3021 .ident "@(#)mont_mulf.c\t1.2\t01/09/24 SMI" ! (/tmp/acompAAApja4Fx:8)
3022 .ident "@(#)types.h\t1.74\t03/08/07 SMI" ! (/tmp/acompAAApja4Fx:9)
3023 .ident "@(#)isa_defs.h\t1.20\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:10)
3024 .ident "@(#)feature_tests.h\t1.18\t99/07/26 SMI" ! (/tmp/acompAAApja4Fx:11)
3025 .ident "@(#)machtypes.h\t1.13\t99/05/04 SMI" ! (/tmp/acompAAApja4Fx:12)
3026 .ident "@(#)inttypes.h\t1.2\t98/01/16 SMI" ! (/tmp/acompAAApja4Fx:13)
3027 .ident "@(#)int_types.h\t1.6\t97/08/20 SMI" ! (/tmp/acompAAApja4Fx:14)
3028 .ident "@(#)int_limits.h\t1.6\t99/08/06 SMI" ! (/tmp/acompAAApja4Fx:15)
3029 .ident "@(#)int_const.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:16)
3030 .ident "@(#)int_fmtio.h\t1.2\t96/07/08 SMI" ! (/tmp/acompAAApja4Fx:17)
3031 .ident "@(#)types32.h\t1.4\t98/02/13 SMI" ! (/tmp/acompAAApja4Fx:18)
3032 .ident "@(#)select.h\t1.17\t01/08/15 SMI" ! (/tmp/acompAAApja4Fx:19)
3033 .ident "@(#)math.h\t2.11\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:20)
3034 .ident "@(#)math_iso.h\t1.2\t00/09/07 SMI" ! (/tmp/acompAAApja4Fx:21)
3035 .ident "@(#)floatingpoint.h\t2.5\t99/06/22 SMI" ! (/tmp/acompAAApja4Fx:22)
3036 .ident "@(#)stdio_tag.h\t1.3\t98/04/20 SMI" ! (/tmp/acompAAApja4Fx:23)
3037 .ident "@(#)ieeefp.h\t2.8 99/10/29" ! (/tmp/acompAAApja4Fx:24)
3038 .ident "acomp: Sun C 5.5 Patch 112760-07 2004/02/03" ! (/tmp/acompAAApja4Fx:57)
3039 .ident "iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (/tmp/acompAAApja4Fx:58)
3040 .ident "cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27" ! (NO SOURCE LINE)
3041 ! End Disassembling Ident
3043 #define FZERO \
3044 fzero %f0 ;\
3045 fzero %f2 ;\
3046 faddd %f0, %f2, %f4 ;\
3047 fmuld %f0, %f2, %f6 ;\
3048 faddd %f0, %f2, %f8 ;\
3049 fmuld %f0, %f2, %f10 ;\
3050 faddd %f0, %f2, %f12 ;\
3051 fmuld %f0, %f2, %f14 ;\
3052 faddd %f0, %f2, %f16 ;\
3053 fmuld %f0, %f2, %f18 ;\
3054 faddd %f0, %f2, %f20 ;\
3055 fmuld %f0, %f2, %f22 ;\
3056 faddd %f0, %f2, %f24 ;\
3057 fmuld %f0, %f2, %f26 ;\
3058 faddd %f0, %f2, %f28 ;\
3059 fmuld %f0, %f2, %f30 ;\
3060 faddd %f0, %f2, %f32 ;\
3061 fmuld %f0, %f2, %f34 ;\
3062 faddd %f0, %f2, %f36 ;\
3063 fmuld %f0, %f2, %f38 ;\
3064 faddd %f0, %f2, %f40 ;\
3065 fmuld %f0, %f2, %f42 ;\
3066 faddd %f0, %f2, %f44 ;\
3067 fmuld %f0, %f2, %f46 ;\
3068 faddd %f0, %f2, %f48 ;\
3069 fmuld %f0, %f2, %f50 ;\
3070 faddd %f0, %f2, %f52 ;\
3071 fmuld %f0, %f2, %f54 ;\
3072 faddd %f0, %f2, %f56 ;\
3073 fmuld %f0, %f2, %f58 ;\
3074 faddd %f0, %f2, %f60 ;\
3075 fmuld %f0, %f2, %f62
3077 #include "assym.h"
3080 * In the routine below, we check/set FPRS_FEF bit since
3081 * we don't want to take a fp_disabled trap. We need not
3082 * check/set PSTATE_PEF bit as it is done early during boot.
3084 ENTRY(big_savefp)
3085 rd %fprs, %o2
3086 st %o2, [%o0 + FPU_FPRS]
3087 andcc %o2, FPRS_FEF, %g0 ! is FPRS_FEF set?
3088 bnz,a,pt %icc, .fregs_save ! yes, go to save
3090 wr %g0, FPRS_FEF, %fprs ! else, set the bit
3091 stx %fsr, [%o0 + FPU_FSR] ! store %fsr
3092 retl
3094 .fregs_save:
3095 BSTORE_FPREGS(%o0, %o4)
3096 stx %fsr, [%o0 + FPU_FSR] ! store %fsr
3097 retl
3099 SET_SIZE(big_savefp)
3102 ENTRY(big_restorefp)
3103 ldx [%o0 + FPU_FSR], %fsr ! restore %fsr
3104 ld [%o0 + FPU_FPRS], %o1
3105 andcc %o1, FPRS_FEF, %g0 ! is FPRS_FEF set in saved %fprs?
3106 bnz,pt %icc, .fregs_restore ! yes, go to restore
3108 FZERO ! zero out to avoid leaks
3109 wr %g0, 0, %fprs
3110 retl
3112 .fregs_restore:
3113 BLOAD_FPREGS(%o0, %o2)
3114 wr %o1, 0, %fprs
3115 retl
3117 SET_SIZE(big_restorefp)