1 #if defined(__SUNPRO_C) && defined(__sparcv9)
2 # define ABI64 /* They've said -xarch=v9 at command line */
3 #elif defined(__GNUC__) && defined(__arch64__)
4 # define ABI64 /* They've said -m64 at command line */
19 .global OPENSSL_wipe_cpu
20 .type OPENSSL_wipe_cpu,#function
21 ! Keep in mind that this does not excuse us from wiping the stack!
22 ! This routine wipes registers, but not the backing store [which
23 ! resides on the stack, toward lower addresses]. To facilitate for
24 ! stack wiping I return pointer to the top of stack of the *caller*.
41 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
42 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
43 ! not cause illegal_instruction trap. It therefore can be used
44 ! to determine if the CPU the code is executing on is V8- or
45 ! V9-compliant, as V9 returns a distinct value of 0x99,
46 ! "negative" and "borrow" bits set in both %icc and %xcc.
47 .word 0x91408000 !rd %ccr,%o0
51 ! Even though we do not use %fp register bank,
52 ! we wipe it as memcpy might have used it...
53 .word 0xbfa00040 !fmovd %f0,%f62
68 .word 0x83a00040 !fmovd %f0,%f32
126 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
136 .global walk_reg_wins
137 .type walk_reg_wins,#function
145 cmp %o7,0 ! compiler never cleans %o7...
146 be 1f ! could have been a leaf function...
163 add %o0,1,%i0 ! used for debugging
166 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
168 .global OPENSSL_atomic_add
169 .type OPENSSL_atomic_add,#function
174 .word 0x95408000 !rd %ccr,%o2, see comment above
182 ! Note that you do not have to link with libthread to call thr_yield,
183 ! as libc provides a stub, which is overloaded the moment you link
184 ! with *either* libpthread or libthread...
185 #define YIELD_CPU thr_yield
187 ! applies at least to Linux and FreeBSD... Feedback expected...
188 #define YIELD_CPU sched_yield
190 .spin: call YIELD_CPU
209 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
212 mov %o3,%o2 ! cas is always fetching to dest. register
213 add %o1,%o2,%o0 ! OpenSSL expects the new value
215 sra %o0,%g0,%o0 ! we return signed int, remember?
216 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
218 .global _sparcv9_rdtick
222 .word 0x91408000 !rd %ccr,%o0
226 .word 0x91410000 !rd %tick,%o0
228 .word 0x93323020 !srlx %o0,32,%o1
232 .type _sparcv9_rdtick,#function
233 .size _sparcv9_rdtick,.-_sparcv9_rdtick
235 .global _sparcv9_vis1_probe
239 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
241 .word 0x81b00d80 !fxor %f0,%f0,%f0
242 .type _sparcv9_vis1_probe,#function
243 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
245 ! Probe and instrument VIS1 instruction. Output is number of cycles it
246 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
247 ! is slow (documented to be 6 cycles on T2) and the core is in-order
248 ! single-issue, it should be possible to distinguish Tx reliably...
249 ! Observed return values are:
255 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
257 ! It would be possible to detect specifically US-T1 by instrumenting
258 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
259 ! a lot of %tick-s, couple of thousand on Linux...
260 .global _sparcv9_vis1_instrument
262 _sparcv9_vis1_instrument:
263 .word 0x91410000 !rd %tick,%o0
264 .word 0x81b00d80 !fxor %f0,%f0,%f0
265 .word 0x85b08d82 !fxor %f2,%f2,%f2
266 .word 0x93410000 !rd %tick,%o1
267 .word 0x81b00d80 !fxor %f0,%f0,%f0
268 .word 0x85b08d82 !fxor %f2,%f2,%f2
269 .word 0x95410000 !rd %tick,%o2
270 .word 0x81b00d80 !fxor %f0,%f0,%f0
271 .word 0x85b08d82 !fxor %f2,%f2,%f2
272 .word 0x97410000 !rd %tick,%o3
273 .word 0x81b00d80 !fxor %f0,%f0,%f0
274 .word 0x85b08d82 !fxor %f2,%f2,%f2
275 .word 0x99410000 !rd %tick,%o4
277 ! calculate intervals
285 .word 0x38680002 !bgu,a %xcc,.+8
288 .word 0x38680002 !bgu,a %xcc,.+8
291 .word 0x38680002 !bgu,a %xcc,.+8
296 .type _sparcv9_vis1_instrument,#function
297 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
299 .global _sparcv9_vis2_probe
303 .word 0x81b00980 !bshuffle %f0,%f0,%f0
304 .type _sparcv9_vis2_probe,#function
305 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
307 .global _sparcv9_fmadd_probe
309 _sparcv9_fmadd_probe:
310 .word 0x81b00d80 !fxor %f0,%f0,%f0
311 .word 0x85b08d82 !fxor %f2,%f2,%f2
313 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
314 .type _sparcv9_fmadd_probe,#function
315 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
317 .global OPENSSL_cleanse
344 ! see above for explanation
345 .word 0x83408000 !rd %ccr,%g1
351 .v9lot: andcc %o0,7,%g0
360 .word 0xc0720000 !stx %g0,[%o0]
364 .word 0x126ffffd !bnz %xcc,.v9aligned
366 .word 0x124ffffd !bnz %icc,.v9aligned
376 .v8lot: andcc %o0,3,%g0
397 .type OPENSSL_cleanse,#function
398 .size OPENSSL_cleanse,.-OPENSSL_cleanse
400 .section ".init",#alloc,#execinstr
401 call OPENSSL_cpuid_setup