1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Copyright (C) 2003-2013 Altera Corporation
8 #include <linux/linkage.h>
15 * Explicitly allow the use of r1 (the assembler temporary register)
16 * within this code. This register is normally reserved for the use of
20 ENTRY(instruction_trap)
21 ldw r1, PT_R1(sp) // Restore registers
39 ldw et, PT_ESTATUS(sp)
42 ldw et, PT_SP(sp) /* backup sp in et */
44 addi sp, sp, PT_REGS_SIZE
46 /* INSTRUCTION EMULATION
47 * ---------------------
49 * Nios II processors generate exceptions for unimplemented instructions.
50 * The routines below emulate these instructions. Depending on the
51 * processor core, the only instructions that might need to be emulated
52 * are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
54 * The emulations match the instructions, except for the following
57 * 1) The emulation routines do not emulate the use of the exception
58 * temporary register (et) as a source operand because the exception
59 * handler already has modified it.
61 * 2) The routines do not emulate the use of the stack pointer (sp) or
62 * the exception return address register (ea) as a destination because
63 * modifying these registers crashes the exception handler or the
64 * interrupted routine.
69 * The emulation routines expect the contents of integer registers r0-r31
70 * to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp). The
71 * routines retrieve source operands from the stack and modify the
72 * destination register's value on the stack prior to the end of the
73 * exception handler. Then all registers except the destination register
74 * are restored to their previous values.
76 * The instruction that causes the exception is found at address -4(ea).
77 * The instruction's OP and OPX fields identify the operation to be
80 * One instruction, muli, is an I-type instruction that is identified by
81 * an OP field of 0x24.
83 * muli AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
84 * 27 22 6 0 <-- LSB of field
86 * The remaining emulated instructions are R-type and have an OP field
87 * of 0x3a. Their OPX fields identify them.
89 * R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
90 * 27 22 17 11 6 0 <-- LSB of field
93 * Opcode Encoding. muli is identified by its OP value. Then OPX & 0x02
94 * is used to differentiate between the division opcodes and the
95 * remaining multiplication opcodes.
97 * Instruction OP OPX OPX & 0x02
98 * ----------- ---- ---- ----------
103 * mulxuu 0x3a 0x07 != 0
104 * mulxsu 0x3a 0x17 != 0
105 * mulxss 0x3a 0x1f != 0
110 * Save everything on the stack to make it easy for the emulation
111 * routines to retrieve the source register operands.
115 stw zero, 0(sp) /* Save zero on stack to avoid special case for r0. */
139 /* Don't bother to save et. It's already been changed. */
144 stw et, 108(sp) /* et contains previous sp value. */
151 * Split the instruction into its fields. We need 4*A, 4*B, and 4*C as
152 * offsets to the stack pointer for access to the stored register values.
154 ldw r2,-4(ea) /* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */
155 roli r3, r2, 7 /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */
156 roli r4, r3, 3 /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */
157 roli r5, r4, 2 /* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */
158 srai r4, r4, 16 /* r4 = (sign-extended) IMM16 */
159 roli r6, r5, 5 /* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */
160 andi r2, r2, 0x3f /* r2 = 00000000000000000000000000,PPPPPP */
161 andi r3, r3, 0x7c /* r3 = 0000000000000000000000000,AAAAA,00 */
162 andi r5, r5, 0x7c /* r5 = 0000000000000000000000000,BBBBB,00 */
163 andi r6, r6, 0x7c /* r6 = 0000000000000000000000000,CCCCC,00 */
168 * r4 = IMM16 (sign extended)
176 * It is necessary to check for muli because it uses an I-type
177 * instruction format, while the other instructions are have an R-type
180 * Prepare for either multiplication or division loop.
181 * They both loop 32 times.
185 add r3, r3, sp /* r3 = address of A-operand. */
186 ldw r3, 0(r3) /* r3 = A-operand. */
187 movi r7, 0x24 /* muli opcode (I-type instruction format) */
188 beq r2, r7, mul_immed /* muli doesn't use the B register as a source */
190 add r5, r5, sp /* r5 = address of B-operand. */
191 ldw r5, 0(r5) /* r5 = B-operand. */
192 /* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */
193 /* IMM16 not needed, align OPX portion */
194 /* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
195 srli r4, r4, 5 /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
196 andi r4, r4, 0x3f /* r4 = 00000000000000000000000000,-OPX-- */
202 * r4 = OPX (no longer can be muli)
208 * Multiply or Divide?
210 andi r7, r4, 0x02 /* For R-type multiply instructions,
212 bne r7, zero, multiply
217 * Divide an unsigned dividend by an unsigned divisor using
218 * a shift-and-subtract algorithm. The example below shows
219 * 43 div 7 = 6 for 8-bit integers. This classic algorithm uses a
220 * single register to store both the dividend and the quotient,
221 * allowing both values to be shifted with a single instruction.
223 * remainder dividend:quotient
224 * --------- -----------------
225 * initialize 00000000 00101011:
226 * shift 00000000 0101011:_
227 * remainder >= divisor? no 00000000 0101011:0
228 * shift 00000000 101011:0_
229 * remainder >= divisor? no 00000000 101011:00
230 * shift 00000001 01011:00_
231 * remainder >= divisor? no 00000001 01011:000
232 * shift 00000010 1011:000_
233 * remainder >= divisor? no 00000010 1011:0000
234 * shift 00000101 011:0000_
235 * remainder >= divisor? no 00000101 011:00000
236 * shift 00001010 11:00000_
237 * remainder >= divisor? yes 00001010 11:000001
238 * remainder -= divisor - 00000111
241 * shift 00000111 1:000001_
242 * remainder >= divisor? yes 00000111 1:0000011
243 * remainder -= divisor - 00000111
246 * shift 00000001 :0000011_
247 * remainder >= divisor? no 00000001 :00000110
249 * The quotient is 00000110.
254 * Prepare for division by assuming the result
255 * is unsigned, and storing its "sign" as 0.
260 /* Which division opcode? */
261 xori r7, r4, 0x25 /* OPX of div */
262 bne r7, zero, unsigned_division
266 * OPX is div. Determine and store the sign of the quotient.
267 * Then take the absolute value of both operands.
269 xor r17, r3, r5 /* MSB contains sign of quotient */
270 bge r3,zero,dividend_is_nonnegative
271 sub r3, zero, r3 /* -r3 */
272 dividend_is_nonnegative:
273 bge r5, zero, divisor_is_nonnegative
274 sub r5, zero, r5 /* -r5 */
275 divisor_is_nonnegative:
279 /* Initialize the unsigned-division loop. */
280 movi r13, 0 /* remainder = 0 */
283 * r3 = dividend : quotient
284 * r4 = 0x25 for div, 0x24 for divu
287 * r14 = loop counter (already initialized to 32)
288 * r17 = MSB contains sign of quotient
293 * for (count = 32; count > 0; --count)
301 * (remainder:dividend:quotient) <<= 1;
304 cmplt r7, r3, zero /* r7 = MSB of r3 */
310 * if (remainder >= divisor)
312 * set LSB of quotient
313 * remainder -= divisor;
316 bltu r13, r5, div_skip
325 bne r14, zero, divide_loop
330 * r4 = 0x25 for div, 0x24 for divu
332 * r17 = MSB contains sign of quotient
337 * Conditionally negate signed quotient. If quotient is unsigned,
338 * the sign already is initialized to 0.
340 bge r17, zero, quotient_is_nonnegative
341 sub r3, zero, r3 /* -r3 */
342 quotient_is_nonnegative:
346 * Final quotient is in r3.
349 stw r3, 0(r6) /* write quotient to stack */
357 * A "product" is the number that one gets by summing a "multiplicand"
358 * several times. The "multiplier" specifies the number of copies of the
359 * multiplicand that are summed.
361 * Actual multiplication algorithms don't use repeated addition, however.
362 * Shift-and-add algorithms get the same answer as repeated addition, and
363 * they are faster. To compute the lower half of a product (pppp below)
364 * one shifts the product left before adding in each of the partial
365 * products (a * mmmm) through (d * mmmm).
367 * To compute the upper half of a product (PPPP below), one adds in the
368 * partial products (d * mmmm) through (a * mmmm), each time following
369 * the add by a right shift of the product.
381 * The example above shows 4 partial products. Computing actual Nios II
382 * products requires 32 partials.
384 * It is possible to compute the result of mulxsu from the result of
385 * mulxuu because the only difference between the results of these two
386 * opcodes is the value of the partial product associated with the sign
389 * mulxsu = mulxuu - (rA < 0) ? rB : 0;
391 * It is possible to compute the result of mulxss from the result of
392 * mulxsu because the only difference between the results of these two
393 * opcodes is the value of the partial product associated with the sign
396 * mulxss = mulxsu - (rB < 0) ? rA : 0;
401 /* Opcode is muli. Change it into mul for remainder of algorithm. */
402 mov r6, r5 /* Field B is dest register, not field C. */
403 mov r5, r4 /* Field IMM16 is src2, not field B. */
404 movi r4, 0x27 /* OPX of mul is 0x27 */
407 /* Initialize the multiplication loop. */
408 movi r9, 0 /* mul_product = 0 */
409 movi r10, 0 /* mulxuu_product = 0 */
410 mov r11, r5 /* save original multiplier for mulxsu and mulxss */
411 mov r12, r5 /* mulxuu_multiplier (will be shifted) */
412 movi r16, 1 /* used to create "rori B,A,1" from "ror B,A,r16" */
416 * r5 = mul_multiplier
417 * r6 = 4 * dest_register (used later as offset to sp)
420 * r10 = mulxuu_product
421 * r11 = original multiplier
422 * r12 = mulxuu_multiplier
423 * r14 = loop counter (already initialized)
429 * for (count = 32; count > 0; --count)
436 * lsb = multiplier & 1;
444 * mulxuu_product += multiplicand;
447 beq r7, zero, mulx_skip
449 cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
450 ror r7, r7, r16 /* r7 = 0x80000000 on carry, or else 0x00000000 */
454 * if (MSB of mul_multiplier == 1)
456 * mul_product += multiplicand;
459 bge r5, zero, mul_skip
464 * mulxuu_product >>= 1; logical shift
465 * mul_multiplier <<= 1; done with MSB
466 * mulx_multiplier >>= 1; done with LSB
469 or r10, r10, r7 /* OR in the saved carry bit. */
478 bne r14, zero, multiply_loop
482 * Multiply emulation loop done.
488 * r6 = 4 * dest_register (used later as offset to sp)
491 * r10 = mulxuu_product
492 * r11 = original multiplier
496 /* Calculate address for result from 4 * dest_register */
501 * Select/compute the result based on OPX.
505 /* OPX == mul? Then store. */
507 beq r7, zero, store_product
509 /* It's one of the mulx.. opcodes. Move over the result. */
512 /* OPX == mulxuu? Then store. */
514 beq r7, zero, store_product
518 * mulxsu = mulxuu - (rA < 0) ? rB : 0;
520 bge r3, zero, mulxsu_skip
524 /* OPX == mulxsu? Then store. */
526 beq r7, zero, store_product
530 * mulxss = mulxsu - (rB < 0) ? rA : 0;
532 bge r11,zero,mulxss_skip
535 /* At this point, assume that OPX is mulxss, so store*/
543 /* No need to restore r0. */
570 /* Does not need to restore et */
576 ldw sp, 108(sp) /* last restore sp */