IB/mthca: Query port fix
[linux-2.6/verdex.git] / include / asm-xtensa / xtensa / coreasm.h
bloba8cfb54c20a15e89de7cdd9ae23947979bee54dd
1 #ifndef XTENSA_COREASM_H
2 #define XTENSA_COREASM_H
4 /*
5 * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
7 * include/asm-xtensa/xtensa/coreasm.h -- assembler-specific
8 * definitions that depend on CORE configuration.
10 * Source for configuration-independent binaries (which link in a
11 * configuration-specific HAL library) must NEVER include this file.
12 * It is perfectly normal, however, for the HAL itself to include this
13 * file.
15 * This file must NOT include xtensa/config/system.h. Any assembler
16 * header file that depends on system information should likely go in
17 * a new systemasm.h (or sysasm.h) header file.
19 * NOTE: macro beqi32 is NOT configuration-dependent, and is placed
20 * here til we will have configuration-independent header file.
22 * This file is subject to the terms and conditions of the GNU General
23 * Public License. See the file "COPYING" in the main directory of
24 * this archive for more details.
26 * Copyright (C) 2002 Tensilica Inc.
30 #include <xtensa/config/core.h>
31 #include <xtensa/config/specreg.h>
34 * Assembly-language specific definitions (assembly macros, etc.).
37 /*----------------------------------------------------------------------
38 * find_ms_setbit
40 * This macro finds the most significant bit that is set in <as>
41 * and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
42 * The index counts starting at zero for the lsbit, so the return
43 * value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
45 * Parameters:
46 * <ad> destination address register (any register)
47 * <as> source address register
48 * <at> temporary address register (must be different than <as>)
49 * <base> constant value added to result (usually 0 or 1)
50 * On entry:
51 * <ad> = undefined if different than <as>
52 * <as> = value whose most significant set bit is to be found
53 * <at> = undefined
54 * no other registers are used by this macro.
55 * On exit:
56 * <ad> = <base> + index of msbit set in original <as>,
57 * = <base> - 1 if original <as> was zero.
58 * <as> clobbered (if not <ad>)
59 * <at> clobbered (if not <ad>)
60 * Example:
61 * find_ms_setbit a0, a4, a0, 0 -- return in a0 index of msbit set in a4
64 .macro find_ms_setbit ad, as, at, base
65 #if XCHAL_HAVE_NSA
66 movi \at, 31+\base
67 nsau \as, \as // get index of \as, numbered from msbit (32 if absent)
68 sub \ad, \at, \as // get numbering from lsbit (0..31, -1 if absent)
69 #else /* XCHAL_HAVE_NSA */
70 movi \at, \base // start with result of 0 (point to lsbit of 32)
72 beqz \as, 2f // special case for zero argument: return -1
73 bltui \as, 0x10000, 1f // is it one of the 16 lsbits? (if so, check lower 16 bits)
74 addi \at, \at, 16 // no, increment result to upper 16 bits (of 32)
75 //srli \as, \as, 16 // check upper half (shift right 16 bits)
76 extui \as, \as, 16, 16 // check upper half (shift right 16 bits)
77 1: bltui \as, 0x100, 1f // is it one of the 8 lsbits? (if so, check lower 8 bits)
78 addi \at, \at, 8 // no, increment result to upper 8 bits (of 16)
79 srli \as, \as, 8 // shift right to check upper 8 bits
80 1: bltui \as, 0x10, 1f // is it one of the 4 lsbits? (if so, check lower 4 bits)
81 addi \at, \at, 4 // no, increment result to upper 4 bits (of 8)
82 srli \as, \as, 4 // shift right 4 bits to check upper half
83 1: bltui \as, 0x4, 1f // is it one of the 2 lsbits? (if so, check lower 2 bits)
84 addi \at, \at, 2 // no, increment result to upper 2 bits (of 4)
85 srli \as, \as, 2 // shift right 2 bits to check upper half
86 1: bltui \as, 0x2, 1f // is it the lsbit?
87 addi \at, \at, 2 // no, increment result to upper bit (of 2)
88 2: addi \at, \at, -1 // (from just above: add 1; from beqz: return -1)
89 //srli \as, \as, 1
90 1: // done! \at contains index of msbit set (or -1 if none set)
91 .if 0x\ad - 0x\at // destination different than \at ? (works because regs are a0-a15)
92 mov \ad, \at // then move result to \ad
93 .endif
94 #endif /* XCHAL_HAVE_NSA */
95 .endm // find_ms_setbit
97 /*----------------------------------------------------------------------
98 * find_ls_setbit
100 * This macro finds the least significant bit that is set in <as>,
101 * and return its index in <ad>.
102 * Usage is the same as for the find_ms_setbit macro.
103 * Example:
104 * find_ls_setbit a0, a4, a0, 0 -- return in a0 index of lsbit set in a4
107 .macro find_ls_setbit ad, as, at, base
108 neg \at, \as // keep only the least-significant bit that is set...
109 and \as, \at, \as // ... in \as
110 find_ms_setbit \ad, \as, \at, \base
111 .endm // find_ls_setbit
113 /*----------------------------------------------------------------------
114 * find_ls_one
116 * Same as find_ls_setbit with base zero.
117 * Source (as) and destination (ad) registers must be different.
118 * Provided for backward compatibility.
121 .macro find_ls_one ad, as
122 find_ls_setbit \ad, \as, \ad, 0
123 .endm // find_ls_one
125 /*----------------------------------------------------------------------
126 * floop, floopnez, floopgtz, floopend
128 * These macros are used for fast inner loops that
129 * work whether or not the Loops options is configured.
130 * If the Loops option is configured, they simply use
131 * the zero-overhead LOOP instructions; otherwise
132 * they use explicit decrement and branch instructions.
134 * They are used in pairs, with floop, floopnez or floopgtz
135 * at the beginning of the loop, and floopend at the end.
137 * Each pair of loop macro calls must be given the loop count
138 * address register and a unique label for that loop.
140 * Example:
142 * movi a3, 16 // loop 16 times
143 * floop a3, myloop1
145 * bnez a7, end1 // exit loop if a7 != 0
147 * floopend a3, myloop1
148 * end1:
150 * Like the LOOP instructions, these macros cannot be
151 * nested, must include at least one instruction,
152 * cannot call functions inside the loop, etc.
153 * The loop can be exited by jumping to the instruction
154 * following floopend (or elsewhere outside the loop),
155 * or continued by jumping to a NOP instruction placed
156 * immediately before floopend.
158 * Unlike LOOP instructions, the register passed to floop*
159 * cannot be used inside the loop, because it is used as
160 * the loop counter if the Loops option is not configured.
161 * And its value is undefined after exiting the loop.
162 * And because the loop counter register is active inside
163 * the loop, you can't easily use this construct to loop
164 * across a register file using ROTW as you might with LOOP
165 * instructions, unless you copy the loop register along.
168 /* Named label version of the macros: */
170 .macro floop ar, endlabel
171 floop_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
172 .endm
174 .macro floopnez ar, endlabel
175 floopnez_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
176 .endm
178 .macro floopgtz ar, endlabel
179 floopgtz_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
180 .endm
182 .macro floopend ar, endlabel
183 floopend_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
184 .endm
186 /* Numbered local label version of the macros: */
187 #if 0 /*UNTESTED*/
188 .macro floop89 ar
189 floop_ \ar, 8, 9f
190 .endm
192 .macro floopnez89 ar
193 floopnez_ \ar, 8, 9f
194 .endm
196 .macro floopgtz89 ar
197 floopgtz_ \ar, 8, 9f
198 .endm
200 .macro floopend89 ar
201 floopend_ \ar, 8b, 9
202 .endm
203 #endif /*0*/
205 /* Underlying version of the macros: */
207 .macro floop_ ar, startlabel, endlabelref
208 .ifdef _infloop_
209 .if _infloop_
210 .err // Error: floop cannot be nested
211 .endif
212 .endif
213 .set _infloop_, 1
214 #if XCHAL_HAVE_LOOPS
215 loop \ar, \endlabelref
216 #else /* XCHAL_HAVE_LOOPS */
217 \startlabel:
218 addi \ar, \ar, -1
219 #endif /* XCHAL_HAVE_LOOPS */
220 .endm // floop_
222 .macro floopnez_ ar, startlabel, endlabelref
223 .ifdef _infloop_
224 .if _infloop_
225 .err // Error: floopnez cannot be nested
226 .endif
227 .endif
228 .set _infloop_, 1
229 #if XCHAL_HAVE_LOOPS
230 loopnez \ar, \endlabelref
231 #else /* XCHAL_HAVE_LOOPS */
232 beqz \ar, \endlabelref
233 \startlabel:
234 addi \ar, \ar, -1
235 #endif /* XCHAL_HAVE_LOOPS */
236 .endm // floopnez_
238 .macro floopgtz_ ar, startlabel, endlabelref
239 .ifdef _infloop_
240 .if _infloop_
241 .err // Error: floopgtz cannot be nested
242 .endif
243 .endif
244 .set _infloop_, 1
245 #if XCHAL_HAVE_LOOPS
246 loopgtz \ar, \endlabelref
247 #else /* XCHAL_HAVE_LOOPS */
248 bltz \ar, \endlabelref
249 beqz \ar, \endlabelref
250 \startlabel:
251 addi \ar, \ar, -1
252 #endif /* XCHAL_HAVE_LOOPS */
253 .endm // floopgtz_
256 .macro floopend_ ar, startlabelref, endlabel
257 .ifndef _infloop_
258 .err // Error: floopend without matching floopXXX
259 .endif
260 .ifeq _infloop_
261 .err // Error: floopend without matching floopXXX
262 .endif
263 .set _infloop_, 0
264 #if ! XCHAL_HAVE_LOOPS
265 bnez \ar, \startlabelref
266 #endif /* XCHAL_HAVE_LOOPS */
267 \endlabel:
268 .endm // floopend_
270 /*----------------------------------------------------------------------
271 * crsil -- conditional RSIL (read/set interrupt level)
273 * Executes the RSIL instruction if it exists, else just reads PS.
274 * The RSIL instruction does not exist in the new exception architecture
275 * if the interrupt option is not selected.
278 .macro crsil ar, newlevel
279 #if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
280 rsil \ar, \newlevel
281 #else
282 rsr \ar, PS
283 #endif
284 .endm // crsil
286 /*----------------------------------------------------------------------
287 * window_spill{4,8,12}
289 * These macros spill callers' register windows to the stack.
290 * They work for both privileged and non-privileged tasks.
291 * Must be called from a windowed ABI context, eg. within
292 * a windowed ABI function (ie. valid stack frame, window
293 * exceptions enabled, not in exception mode, etc).
295 * This macro requires a single invocation of the window_spill_common
296 * macro in the same assembly unit and section.
298 * Note that using window_spill{4,8,12} macros is more efficient
299 * than calling a function implemented using window_spill_function,
300 * because the latter needs extra code to figure out the size of
301 * the call to the spilling function.
303 * Example usage:
305 * .text
306 * .align 4
307 * .global some_function
308 * .type some_function,@function
309 * some_function:
310 * entry a1, 16
314 * window_spill4 // spill windows of some_function's callers; preserves a0..a3 only;
315 * // to use window_spill{8,12} in this example function we'd have
316 * // to increase space allocated by the entry instruction, because
317 * // 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
318 * // for call8/window_spill8 or call12/window_spill12 respectively.
321 * retw
323 * window_spill_common // instantiates code used by window_spill4
326 * On entry:
327 * none (if window_spill4)
328 * stack frame has enough space allocated for call8 (if window_spill8)
329 * stack frame has enough space allocated for call12 (if window_spill12)
330 * On exit:
331 * a4..a15 clobbered (if window_spill4)
332 * a8..a15 clobbered (if window_spill8)
333 * a12..a15 clobbered (if window_spill12)
334 * no caller windows are in live registers
337 .macro window_spill4
338 #if XCHAL_HAVE_WINDOWED
339 # if XCHAL_NUM_AREGS == 16
340 movi a15, 0 // for 16-register files, no need to call to reach the end
341 # elif XCHAL_NUM_AREGS == 32
342 call4 .L__wdwspill_assist28 // call deep enough to clear out any live callers
343 # elif XCHAL_NUM_AREGS == 64
344 call4 .L__wdwspill_assist60 // call deep enough to clear out any live callers
345 # endif
346 #endif
347 .endm // window_spill4
349 .macro window_spill8
350 #if XCHAL_HAVE_WINDOWED
351 # if XCHAL_NUM_AREGS == 16
352 movi a15, 0 // for 16-register files, no need to call to reach the end
353 # elif XCHAL_NUM_AREGS == 32
354 call8 .L__wdwspill_assist24 // call deep enough to clear out any live callers
355 # elif XCHAL_NUM_AREGS == 64
356 call8 .L__wdwspill_assist56 // call deep enough to clear out any live callers
357 # endif
358 #endif
359 .endm // window_spill8
361 .macro window_spill12
362 #if XCHAL_HAVE_WINDOWED
363 # if XCHAL_NUM_AREGS == 16
364 movi a15, 0 // for 16-register files, no need to call to reach the end
365 # elif XCHAL_NUM_AREGS == 32
366 call12 .L__wdwspill_assist20 // call deep enough to clear out any live callers
367 # elif XCHAL_NUM_AREGS == 64
368 call12 .L__wdwspill_assist52 // call deep enough to clear out any live callers
369 # endif
370 #endif
371 .endm // window_spill12
373 /*----------------------------------------------------------------------
374 * window_spill_function
376 * This macro outputs a function that will spill its caller's callers'
377 * register windows to the stack. Eg. it could be used to implement
378 * a version of xthal_window_spill() that works in non-privileged tasks.
379 * This works for both privileged and non-privileged tasks.
381 * Typical usage:
383 * .text
384 * .align 4
385 * .global my_spill_function
386 * .type my_spill_function,@function
387 * my_spill_function:
388 * window_spill_function
390 * On entry to resulting function:
391 * none
392 * On exit from resulting function:
393 * none (no caller windows are in live registers)
396 .macro window_spill_function
397 #if XCHAL_HAVE_WINDOWED
398 # if XCHAL_NUM_AREGS == 32
399 entry sp, 48
400 bbci.l a0, 31, 1f // branch if called with call4
401 bbsi.l a0, 30, 2f // branch if called with call12
402 call8 .L__wdwspill_assist16 // called with call8, only need another 8
403 retw
404 1: call12 .L__wdwspill_assist16 // called with call4, only need another 12
405 retw
406 2: call4 .L__wdwspill_assist16 // called with call12, only need another 4
407 retw
408 # elif XCHAL_NUM_AREGS == 64
409 entry sp, 48
410 bbci.l a0, 31, 1f // branch if called with call4
411 bbsi.l a0, 30, 2f // branch if called with call12
412 call4 .L__wdwspill_assist52 // called with call8, only need a call4
413 retw
414 1: call8 .L__wdwspill_assist52 // called with call4, only need a call8
415 retw
416 2: call12 .L__wdwspill_assist40 // called with call12, can skip a call12
417 retw
418 # elif XCHAL_NUM_AREGS == 16
419 entry sp, 16
420 bbci.l a0, 31, 1f // branch if called with call4
421 bbsi.l a0, 30, 2f // branch if called with call12
422 movi a7, 0 // called with call8
423 retw
424 1: movi a11, 0 // called with call4
425 2: retw // if called with call12, everything already spilled
427 // movi a15, 0 // trick to spill all but the direct caller
428 // j 1f
429 // // The entry instruction is magical in the assembler (gets auto-aligned)
430 // // so we have to jump to it to avoid falling through the padding.
431 // // We need entry/retw to know where to return.
432 //1: entry sp, 16
433 // retw
434 # else
435 # error "unrecognized address register file size"
436 # endif
437 #endif /* XCHAL_HAVE_WINDOWED */
438 window_spill_common
439 .endm // window_spill_function
441 /*----------------------------------------------------------------------
442 * window_spill_common
444 * Common code used by any number of invocations of the window_spill##
445 * and window_spill_function macros.
447 * Must be instantiated exactly once within a given assembly unit,
448 * within call/j range of and same section as window_spill##
449 * macro invocations for that assembly unit.
450 * (Is automatically instantiated by the window_spill_function macro.)
453 .macro window_spill_common
454 #if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
455 .ifndef .L__wdwspill_defined
456 # if XCHAL_NUM_AREGS >= 64
457 .L__wdwspill_assist60:
458 entry sp, 32
459 call8 .L__wdwspill_assist52
460 retw
461 .L__wdwspill_assist56:
462 entry sp, 16
463 call4 .L__wdwspill_assist52
464 retw
465 .L__wdwspill_assist52:
466 entry sp, 48
467 call12 .L__wdwspill_assist40
468 retw
469 .L__wdwspill_assist40:
470 entry sp, 48
471 call12 .L__wdwspill_assist28
472 retw
473 # endif
474 .L__wdwspill_assist28:
475 entry sp, 48
476 call12 .L__wdwspill_assist16
477 retw
478 .L__wdwspill_assist24:
479 entry sp, 32
480 call8 .L__wdwspill_assist16
481 retw
482 .L__wdwspill_assist20:
483 entry sp, 16
484 call4 .L__wdwspill_assist16
485 retw
486 .L__wdwspill_assist16:
487 entry sp, 16
488 movi a15, 0
489 retw
490 .set .L__wdwspill_defined, 1
491 .endif
492 #endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
493 .endm // window_spill_common
495 /*----------------------------------------------------------------------
496 * beqi32
498 * macro implements version of beqi for arbitrary 32-bit immidiate value
500 * beqi32 ax, ay, imm32, label
502 * Compares value in register ax with imm32 value and jumps to label if
503 * equal. Clobberes register ay if needed
506 .macro beqi32 ax, ay, imm, label
507 .ifeq ((\imm-1) & ~7) // 1..8 ?
508 beqi \ax, \imm, \label
509 .else
510 .ifeq (\imm+1) // -1 ?
511 beqi \ax, \imm, \label
512 .else
513 .ifeq (\imm) // 0 ?
514 beqz \ax, \label
515 .else
516 // We could also handle immediates 10,12,16,32,64,128,256
517 // but it would be a long macro...
518 movi \ay, \imm
519 beq \ax, \ay, \label
520 .endif
521 .endif
522 .endif
523 .endm // beqi32
525 #endif /*XTENSA_COREASM_H*/