Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[wrt350n-kernel.git] / arch / ia64 / kernel / unaligned.c
blob4e020466a831fff835f2298f3a3b745e7a2b99fd
1 /*
2 * Architecture-specific unaligned trap handling.
4 * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger-Tang <davidm@hpl.hp.com>
8 * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
9 * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
10 * stacked register returns an undefined value; it does NOT trigger a
11 * "rsvd register fault").
12 * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
13 * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
14 * 2001/01/17 Add support emulation of unaligned kernel accesses.
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/tty.h>
20 #include <asm/intrinsics.h>
21 #include <asm/processor.h>
22 #include <asm/rse.h>
23 #include <asm/uaccess.h>
24 #include <asm/unaligned.h>
26 extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
28 #undef DEBUG_UNALIGNED_TRAP
30 #ifdef DEBUG_UNALIGNED_TRAP
31 <<<<<<< HEAD:arch/ia64/kernel/unaligned.c
32 # define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
33 =======
34 # define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
35 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:arch/ia64/kernel/unaligned.c
36 # define DDUMP(str,vp,len) dump(str, vp, len)
38 static void
39 dump (const char *str, void *vp, size_t len)
41 unsigned char *cp = vp;
42 int i;
44 printk("%s", str);
45 for (i = 0; i < len; ++i)
46 printk (" %02x", *cp++);
47 printk("\n");
49 #else
50 # define DPRINT(a...)
51 # define DDUMP(str,vp,len)
52 #endif
54 #define IA64_FIRST_STACKED_GR 32
55 #define IA64_FIRST_ROTATING_FR 32
56 #define SIGN_EXT9 0xffffffffffffff00ul
59 * sysctl settable hook which tells the kernel whether to honor the
60 * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want
61 * to allow the super user to enable/disable this for security reasons
62 * (i.e. don't allow attacker to fill up logs with unaligned accesses).
64 int no_unaligned_warning;
65 static int noprint_warning;
68 * For M-unit:
70 * opcode | m | x6 |
71 * --------|------|---------|
72 * [40-37] | [36] | [35:30] |
73 * --------|------|---------|
74 * 4 | 1 | 6 | = 11 bits
75 * --------------------------
76 * However bits [31:30] are not directly useful to distinguish between
77 * load/store so we can use [35:32] instead, which gives the following
78 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
79 * checking the m-bit until later in the load/store emulation.
81 #define IA64_OPCODE_MASK 0x1ef
82 #define IA64_OPCODE_SHIFT 32
85 * Table C-28 Integer Load/Store
87 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
89 * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
90 * the address (bits [8:3]), so we must failed.
92 #define LD_OP 0x080
93 #define LDS_OP 0x081
94 #define LDA_OP 0x082
95 #define LDSA_OP 0x083
96 #define LDBIAS_OP 0x084
97 #define LDACQ_OP 0x085
98 /* 0x086, 0x087 are not relevant */
99 #define LDCCLR_OP 0x088
100 #define LDCNC_OP 0x089
101 #define LDCCLRACQ_OP 0x08a
102 #define ST_OP 0x08c
103 #define STREL_OP 0x08d
104 /* 0x08e,0x8f are not relevant */
107 * Table C-29 Integer Load +Reg
109 * we use the ld->m (bit [36:36]) field to determine whether or not we have
110 * a load/store of this form.
114 * Table C-30 Integer Load/Store +Imm
116 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
118 * ld8.fill, st8.fill must be aligned because the Nat register are based on
119 * the address, so we must fail and the program must be fixed.
121 #define LD_IMM_OP 0x0a0
122 #define LDS_IMM_OP 0x0a1
123 #define LDA_IMM_OP 0x0a2
124 #define LDSA_IMM_OP 0x0a3
125 #define LDBIAS_IMM_OP 0x0a4
126 #define LDACQ_IMM_OP 0x0a5
127 /* 0x0a6, 0xa7 are not relevant */
128 #define LDCCLR_IMM_OP 0x0a8
129 #define LDCNC_IMM_OP 0x0a9
130 #define LDCCLRACQ_IMM_OP 0x0aa
131 #define ST_IMM_OP 0x0ac
132 #define STREL_IMM_OP 0x0ad
133 /* 0x0ae,0xaf are not relevant */
136 * Table C-32 Floating-point Load/Store
138 #define LDF_OP 0x0c0
139 #define LDFS_OP 0x0c1
140 #define LDFA_OP 0x0c2
141 #define LDFSA_OP 0x0c3
142 /* 0x0c6 is irrelevant */
143 #define LDFCCLR_OP 0x0c8
144 #define LDFCNC_OP 0x0c9
145 /* 0x0cb is irrelevant */
146 #define STF_OP 0x0cc
149 * Table C-33 Floating-point Load +Reg
151 * we use the ld->m (bit [36:36]) field to determine whether or not we have
152 * a load/store of this form.
156 * Table C-34 Floating-point Load/Store +Imm
158 #define LDF_IMM_OP 0x0e0
159 #define LDFS_IMM_OP 0x0e1
160 #define LDFA_IMM_OP 0x0e2
161 #define LDFSA_IMM_OP 0x0e3
162 /* 0x0e6 is irrelevant */
163 #define LDFCCLR_IMM_OP 0x0e8
164 #define LDFCNC_IMM_OP 0x0e9
165 #define STF_IMM_OP 0x0ec
167 typedef struct {
168 unsigned long qp:6; /* [0:5] */
169 unsigned long r1:7; /* [6:12] */
170 unsigned long imm:7; /* [13:19] */
171 unsigned long r3:7; /* [20:26] */
172 unsigned long x:1; /* [27:27] */
173 unsigned long hint:2; /* [28:29] */
174 unsigned long x6_sz:2; /* [30:31] */
175 unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
176 unsigned long m:1; /* [36:36] */
177 unsigned long op:4; /* [37:40] */
178 unsigned long pad:23; /* [41:63] */
179 } load_store_t;
182 typedef enum {
183 UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
184 UPD_REG /* ldXZ r1=[r3],r2 */
185 } update_t;
188 * We use tables to keep track of the offsets of registers in the saved state.
189 * This way we save having big switch/case statements.
191 * We use bit 0 to indicate switch_stack or pt_regs.
192 * The offset is simply shifted by 1 bit.
193 * A 2-byte value should be enough to hold any kind of offset
195 * In case the calling convention changes (and thus pt_regs/switch_stack)
196 * simply use RSW instead of RPT or vice-versa.
199 #define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
200 #define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
202 #define RPT(x) (RPO(x) << 1)
203 #define RSW(x) (1| RSO(x)<<1)
205 #define GR_OFFS(x) (gr_info[x]>>1)
206 #define GR_IN_SW(x) (gr_info[x] & 0x1)
208 #define FR_OFFS(x) (fr_info[x]>>1)
209 #define FR_IN_SW(x) (fr_info[x] & 0x1)
211 static u16 gr_info[32]={
212 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
214 RPT(r1), RPT(r2), RPT(r3),
216 RSW(r4), RSW(r5), RSW(r6), RSW(r7),
218 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
219 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
221 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
222 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
223 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
224 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
227 static u16 fr_info[32]={
228 0, /* constant : WE SHOULD NEVER GET THIS */
229 0, /* constant : WE SHOULD NEVER GET THIS */
231 RSW(f2), RSW(f3), RSW(f4), RSW(f5),
233 RPT(f6), RPT(f7), RPT(f8), RPT(f9),
234 RPT(f10), RPT(f11),
236 RSW(f12), RSW(f13), RSW(f14),
237 RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
238 RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
239 RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
240 RSW(f30), RSW(f31)
243 /* Invalidate ALAT entry for integer register REGNO. */
244 static void
245 invala_gr (int regno)
247 # define F(reg) case reg: ia64_invala_gr(reg); break
249 switch (regno) {
250 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
251 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
252 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
253 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
254 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
255 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
256 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
257 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
258 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
259 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
260 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
261 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
262 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
263 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
264 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
265 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
267 # undef F
270 /* Invalidate ALAT entry for floating-point register REGNO. */
271 static void
272 invala_fr (int regno)
274 # define F(reg) case reg: ia64_invala_fr(reg); break
276 switch (regno) {
277 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
278 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
279 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
280 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
281 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
282 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
283 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
284 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
285 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
286 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
287 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
288 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
289 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
290 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
291 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
292 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
294 # undef F
297 static inline unsigned long
298 rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
300 reg += rrb;
301 if (reg >= sor)
302 reg -= sor;
303 return reg;
306 static void
307 set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
309 struct switch_stack *sw = (struct switch_stack *) regs - 1;
310 unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
311 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
312 unsigned long rnats, nat_mask;
313 unsigned long on_kbs;
314 long sof = (regs->cr_ifs) & 0x7f;
315 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
316 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
317 long ridx = r1 - 32;
319 if (ridx >= sof) {
320 /* this should never happen, as the "rsvd register fault" has higher priority */
321 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
322 return;
325 if (ridx < sor)
326 ridx = rotate_reg(sor, rrb_gr, ridx);
328 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
329 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
331 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
332 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
333 if (addr >= kbs) {
334 /* the register is on the kernel backing store: easy... */
335 rnat_addr = ia64_rse_rnat_addr(addr);
336 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
337 rnat_addr = &sw->ar_rnat;
338 nat_mask = 1UL << ia64_rse_slot_num(addr);
340 *addr = val;
341 if (nat)
342 *rnat_addr |= nat_mask;
343 else
344 *rnat_addr &= ~nat_mask;
345 return;
348 if (!user_stack(current, regs)) {
349 DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
350 return;
353 bspstore = (unsigned long *)regs->ar_bspstore;
354 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
355 bsp = ia64_rse_skip_regs(ubs_end, -sof);
356 addr = ia64_rse_skip_regs(bsp, ridx);
358 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
360 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
362 rnat_addr = ia64_rse_rnat_addr(addr);
364 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
365 DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
366 (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
368 nat_mask = 1UL << ia64_rse_slot_num(addr);
369 if (nat)
370 rnats |= nat_mask;
371 else
372 rnats &= ~nat_mask;
373 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
375 DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
379 static void
380 get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
382 struct switch_stack *sw = (struct switch_stack *) regs - 1;
383 unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
384 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
385 unsigned long rnats, nat_mask;
386 unsigned long on_kbs;
387 long sof = (regs->cr_ifs) & 0x7f;
388 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
389 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
390 long ridx = r1 - 32;
392 if (ridx >= sof) {
393 /* read of out-of-frame register returns an undefined value; 0 in our case. */
394 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
395 goto fail;
398 if (ridx < sor)
399 ridx = rotate_reg(sor, rrb_gr, ridx);
401 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
402 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
404 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
405 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
406 if (addr >= kbs) {
407 /* the register is on the kernel backing store: easy... */
408 *val = *addr;
409 if (nat) {
410 rnat_addr = ia64_rse_rnat_addr(addr);
411 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
412 rnat_addr = &sw->ar_rnat;
413 nat_mask = 1UL << ia64_rse_slot_num(addr);
414 *nat = (*rnat_addr & nat_mask) != 0;
416 return;
419 if (!user_stack(current, regs)) {
420 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
421 goto fail;
424 bspstore = (unsigned long *)regs->ar_bspstore;
425 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
426 bsp = ia64_rse_skip_regs(ubs_end, -sof);
427 addr = ia64_rse_skip_regs(bsp, ridx);
429 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
431 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
433 if (nat) {
434 rnat_addr = ia64_rse_rnat_addr(addr);
435 nat_mask = 1UL << ia64_rse_slot_num(addr);
437 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
439 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
440 *nat = (rnats & nat_mask) != 0;
442 return;
444 fail:
445 *val = 0;
446 if (nat)
447 *nat = 0;
448 return;
452 static void
453 setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
455 struct switch_stack *sw = (struct switch_stack *) regs - 1;
456 unsigned long addr;
457 unsigned long bitmask;
458 unsigned long *unat;
461 * First takes care of stacked registers
463 if (regnum >= IA64_FIRST_STACKED_GR) {
464 set_rse_reg(regs, regnum, val, nat);
465 return;
469 * Using r0 as a target raises a General Exception fault which has higher priority
470 * than the Unaligned Reference fault.
474 * Now look at registers in [0-31] range and init correct UNAT
476 if (GR_IN_SW(regnum)) {
477 addr = (unsigned long)sw;
478 unat = &sw->ar_unat;
479 } else {
480 addr = (unsigned long)regs;
481 unat = &sw->caller_unat;
483 DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
484 addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
486 * add offset from base of struct
487 * and do it !
489 addr += GR_OFFS(regnum);
491 *(unsigned long *)addr = val;
494 * We need to clear the corresponding UNAT bit to fully emulate the load
495 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
497 bitmask = 1UL << (addr >> 3 & 0x3f);
498 DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
499 if (nat) {
500 *unat |= bitmask;
501 } else {
502 *unat &= ~bitmask;
504 DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
508 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
509 * range from 32-127, result is in the range from 0-95.
511 static inline unsigned long
512 fph_index (struct pt_regs *regs, long regnum)
514 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
515 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
518 static void
519 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
521 struct switch_stack *sw = (struct switch_stack *)regs - 1;
522 unsigned long addr;
525 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
526 * Fault. Thus, when we get here, we know the partition is enabled.
527 * To update f32-f127, there are three choices:
529 * (1) save f32-f127 to thread.fph and update the values there
530 * (2) use a gigantic switch statement to directly access the registers
531 * (3) generate code on the fly to update the desired register
533 * For now, we are using approach (1).
535 if (regnum >= IA64_FIRST_ROTATING_FR) {
536 ia64_sync_fph(current);
537 current->thread.fph[fph_index(regs, regnum)] = *fpval;
538 } else {
540 * pt_regs or switch_stack ?
542 if (FR_IN_SW(regnum)) {
543 addr = (unsigned long)sw;
544 } else {
545 addr = (unsigned long)regs;
548 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
550 addr += FR_OFFS(regnum);
551 *(struct ia64_fpreg *)addr = *fpval;
554 * mark the low partition as being used now
556 * It is highly unlikely that this bit is not already set, but
557 * let's do it for safety.
559 regs->cr_ipsr |= IA64_PSR_MFL;
564 * Those 2 inline functions generate the spilled versions of the constant floating point
565 * registers which can be used with stfX
567 static inline void
568 float_spill_f0 (struct ia64_fpreg *final)
570 ia64_stf_spill(final, 0);
573 static inline void
574 float_spill_f1 (struct ia64_fpreg *final)
576 ia64_stf_spill(final, 1);
579 static void
580 getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
582 struct switch_stack *sw = (struct switch_stack *) regs - 1;
583 unsigned long addr;
586 * From EAS-2.5: FPDisableFault has higher priority than
587 * Unaligned Fault. Thus, when we get here, we know the partition is
588 * enabled.
590 * When regnum > 31, the register is still live and we need to force a save
591 * to current->thread.fph to get access to it. See discussion in setfpreg()
592 * for reasons and other ways of doing this.
594 if (regnum >= IA64_FIRST_ROTATING_FR) {
595 ia64_flush_fph(current);
596 *fpval = current->thread.fph[fph_index(regs, regnum)];
597 } else {
599 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
600 * not saved, we must generate their spilled form on the fly
602 switch(regnum) {
603 case 0:
604 float_spill_f0(fpval);
605 break;
606 case 1:
607 float_spill_f1(fpval);
608 break;
609 default:
611 * pt_regs or switch_stack ?
613 addr = FR_IN_SW(regnum) ? (unsigned long)sw
614 : (unsigned long)regs;
616 DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
617 FR_IN_SW(regnum), addr, FR_OFFS(regnum));
619 addr += FR_OFFS(regnum);
620 *fpval = *(struct ia64_fpreg *)addr;
626 static void
627 getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
629 struct switch_stack *sw = (struct switch_stack *) regs - 1;
630 unsigned long addr, *unat;
632 if (regnum >= IA64_FIRST_STACKED_GR) {
633 get_rse_reg(regs, regnum, val, nat);
634 return;
638 * take care of r0 (read-only always evaluate to 0)
640 if (regnum == 0) {
641 *val = 0;
642 if (nat)
643 *nat = 0;
644 return;
648 * Now look at registers in [0-31] range and init correct UNAT
650 if (GR_IN_SW(regnum)) {
651 addr = (unsigned long)sw;
652 unat = &sw->ar_unat;
653 } else {
654 addr = (unsigned long)regs;
655 unat = &sw->caller_unat;
658 DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
660 addr += GR_OFFS(regnum);
662 *val = *(unsigned long *)addr;
665 * do it only when requested
667 if (nat)
668 *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
671 static void
672 emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
675 * IMPORTANT:
676 * Given the way we handle unaligned speculative loads, we should
677 * not get to this point in the code but we keep this sanity check,
678 * just in case.
680 if (ld.x6_op == 1 || ld.x6_op == 3) {
681 <<<<<<< HEAD:arch/ia64/kernel/unaligned.c
682 printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
683 =======
684 printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
685 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:arch/ia64/kernel/unaligned.c
686 if (die_if_kernel("unaligned reference on speculative load with register update\n",
687 regs, 30))
688 return;
693 * at this point, we know that the base register to update is valid i.e.,
694 * it's not r0
696 if (type == UPD_IMMEDIATE) {
697 unsigned long imm;
700 * Load +Imm: ldXZ r1=[r3],imm(9)
703 * form imm9: [13:19] contain the first 7 bits
705 imm = ld.x << 7 | ld.imm;
708 * sign extend (1+8bits) if m set
710 if (ld.m) imm |= SIGN_EXT9;
713 * ifa == r3 and we know that the NaT bit on r3 was clear so
714 * we can directly use ifa.
716 ifa += imm;
718 setreg(ld.r3, ifa, 0, regs);
720 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
722 } else if (ld.m) {
723 unsigned long r2;
724 int nat_r2;
727 * Load +Reg Opcode: ldXZ r1=[r3],r2
729 * Note: that we update r3 even in the case of ldfX.a
730 * (where the load does not happen)
732 * The way the load algorithm works, we know that r3 does not
733 * have its NaT bit set (would have gotten NaT consumption
734 * before getting the unaligned fault). So we can use ifa
735 * which equals r3 at this point.
737 * IMPORTANT:
738 * The above statement holds ONLY because we know that we
739 * never reach this code when trying to do a ldX.s.
740 * If we ever make it to here on an ldfX.s then
742 getreg(ld.imm, &r2, &nat_r2, regs);
744 ifa += r2;
747 * propagate Nat r2 -> r3
749 setreg(ld.r3, ifa, nat_r2, regs);
751 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
756 static int
757 emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
759 unsigned int len = 1 << ld.x6_sz;
760 unsigned long val = 0;
763 * r0, as target, doesn't need to be checked because Illegal Instruction
764 * faults have higher priority than unaligned faults.
766 * r0 cannot be found as the base as it would never generate an
767 * unaligned reference.
771 * ldX.a we will emulate load and also invalidate the ALAT entry.
772 * See comment below for explanation on how we handle ldX.a
775 if (len != 2 && len != 4 && len != 8) {
776 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
777 return -1;
779 /* this assumes little-endian byte-order: */
780 if (copy_from_user(&val, (void __user *) ifa, len))
781 return -1;
782 setreg(ld.r1, val, 0, regs);
785 * check for updates on any kind of loads
787 if (ld.op == 0x5 || ld.m)
788 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
791 * handling of various loads (based on EAS2.4):
793 * ldX.acq (ordered load):
794 * - acquire semantics would have been used, so force fence instead.
796 * ldX.c.clr (check load and clear):
797 * - if we get to this handler, it's because the entry was not in the ALAT.
798 * Therefore the operation reverts to a normal load
800 * ldX.c.nc (check load no clear):
801 * - same as previous one
803 * ldX.c.clr.acq (ordered check load and clear):
804 * - same as above for c.clr part. The load needs to have acquire semantics. So
805 * we use the fence semantics which is stronger and thus ensures correctness.
807 * ldX.a (advanced load):
808 * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
809 * address doesn't match requested size alignment. This means that we would
810 * possibly need more than one load to get the result.
812 * The load part can be handled just like a normal load, however the difficult
813 * part is to get the right thing into the ALAT. The critical piece of information
814 * in the base address of the load & size. To do that, a ld.a must be executed,
815 * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
816 * if we use the same target register, we will be okay for the check.a instruction.
817 * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
818 * which would overlap within [r3,r3+X] (the size of the load was store in the
819 * ALAT). If such an entry is found the entry is invalidated. But this is not good
820 * enough, take the following example:
821 * r3=3
822 * ld4.a r1=[r3]
824 * Could be emulated by doing:
825 * ld1.a r1=[r3],1
826 * store to temporary;
827 * ld1.a r1=[r3],1
828 * store & shift to temporary;
829 * ld1.a r1=[r3],1
830 * store & shift to temporary;
831 * ld1.a r1=[r3]
832 * store & shift to temporary;
833 * r1=temporary
835 * So in this case, you would get the right value is r1 but the wrong info in
836 * the ALAT. Notice that you could do it in reverse to finish with address 3
837 * but you would still get the size wrong. To get the size right, one needs to
838 * execute exactly the same kind of load. You could do it from a aligned
839 * temporary location, but you would get the address wrong.
841 * So no matter what, it is not possible to emulate an advanced load
842 * correctly. But is that really critical ?
844 * We will always convert ld.a into a normal load with ALAT invalidated. This
845 * will enable compiler to do optimization where certain code path after ld.a
846 * is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
848 * If there is a store after the advanced load, one must either do a ld.c.* or
849 * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
850 * entry found in ALAT), and that's perfectly ok because:
852 * - ld.c.*, if the entry is not present a normal load is executed
853 * - chk.a.*, if the entry is not present, execution jumps to recovery code
855 * In either case, the load can be potentially retried in another form.
857 * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
858 * up a stale entry later). The register base update MUST also be performed.
862 * when the load has the .acq completer then
863 * use ordering fence.
865 if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
866 mb();
869 * invalidate ALAT entry in case of advanced load
871 if (ld.x6_op == 0x2)
872 invala_gr(ld.r1);
874 return 0;
877 static int
878 emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
880 unsigned long r2;
881 unsigned int len = 1 << ld.x6_sz;
884 * if we get to this handler, Nat bits on both r3 and r2 have already
885 * been checked. so we don't need to do it
887 * extract the value to be stored
889 getreg(ld.imm, &r2, NULL, regs);
892 * we rely on the macros in unaligned.h for now i.e.,
893 * we let the compiler figure out how to read memory gracefully.
895 * We need this switch/case because the way the inline function
896 * works. The code is optimized by the compiler and looks like
897 * a single switch/case.
899 DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
901 if (len != 2 && len != 4 && len != 8) {
902 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
903 return -1;
906 /* this assumes little-endian byte-order: */
907 if (copy_to_user((void __user *) ifa, &r2, len))
908 return -1;
911 * stX [r3]=r2,imm(9)
913 * NOTE:
914 * ld.r3 can never be r0, because r0 would not generate an
915 * unaligned access.
917 if (ld.op == 0x5) {
918 unsigned long imm;
921 * form imm9: [12:6] contain first 7bits
923 imm = ld.x << 7 | ld.r1;
925 * sign extend (8bits) if m set
927 if (ld.m) imm |= SIGN_EXT9;
929 * ifa == r3 (NaT is necessarily cleared)
931 ifa += imm;
933 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
935 setreg(ld.r3, ifa, 0, regs);
938 * we don't have alat_invalidate_multiple() so we need
939 * to do the complete flush :-<<
941 ia64_invala();
944 * stX.rel: use fence instead of release
946 if (ld.x6_op == 0xd)
947 mb();
949 return 0;
953 * floating point operations sizes in bytes
955 static const unsigned char float_fsz[4]={
956 10, /* extended precision (e) */
957 8, /* integer (8) */
958 4, /* single precision (s) */
959 8 /* double precision (d) */
962 static inline void
963 mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
965 ia64_ldfe(6, init);
966 ia64_stop();
967 ia64_stf_spill(final, 6);
970 static inline void
971 mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
973 ia64_ldf8(6, init);
974 ia64_stop();
975 ia64_stf_spill(final, 6);
978 static inline void
979 mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
981 ia64_ldfs(6, init);
982 ia64_stop();
983 ia64_stf_spill(final, 6);
986 static inline void
987 mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
989 ia64_ldfd(6, init);
990 ia64_stop();
991 ia64_stf_spill(final, 6);
994 static inline void
995 float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
997 ia64_ldf_fill(6, init);
998 ia64_stop();
999 ia64_stfe(final, 6);
1002 static inline void
1003 float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
1005 ia64_ldf_fill(6, init);
1006 ia64_stop();
1007 ia64_stf8(final, 6);
1010 static inline void
1011 float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
1013 ia64_ldf_fill(6, init);
1014 ia64_stop();
1015 ia64_stfs(final, 6);
1018 static inline void
1019 float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1021 ia64_ldf_fill(6, init);
1022 ia64_stop();
1023 ia64_stfd(final, 6);
1026 static int
1027 emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1029 struct ia64_fpreg fpr_init[2];
1030 struct ia64_fpreg fpr_final[2];
1031 unsigned long len = float_fsz[ld.x6_sz];
1034 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1035 * higher priority than unaligned faults.
1037 * r0 cannot be found as the base as it would never generate an unaligned
1038 * reference.
1042 * make sure we get clean buffers
1044 memset(&fpr_init, 0, sizeof(fpr_init));
1045 memset(&fpr_final, 0, sizeof(fpr_final));
1048 * ldfpX.a: we don't try to emulate anything but we must
1049 * invalidate the ALAT entry and execute updates, if any.
1051 if (ld.x6_op != 0x2) {
1053 * This assumes little-endian byte-order. Note that there is no "ldfpe"
1054 * instruction:
1056 if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
1057 || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
1058 return -1;
1060 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1061 DDUMP("frp_init =", &fpr_init, 2*len);
1063 * XXX fixme
1064 * Could optimize inlines by using ldfpX & 2 spills
1066 switch( ld.x6_sz ) {
1067 case 0:
1068 mem2float_extended(&fpr_init[0], &fpr_final[0]);
1069 mem2float_extended(&fpr_init[1], &fpr_final[1]);
1070 break;
1071 case 1:
1072 mem2float_integer(&fpr_init[0], &fpr_final[0]);
1073 mem2float_integer(&fpr_init[1], &fpr_final[1]);
1074 break;
1075 case 2:
1076 mem2float_single(&fpr_init[0], &fpr_final[0]);
1077 mem2float_single(&fpr_init[1], &fpr_final[1]);
1078 break;
1079 case 3:
1080 mem2float_double(&fpr_init[0], &fpr_final[0]);
1081 mem2float_double(&fpr_init[1], &fpr_final[1]);
1082 break;
1084 DDUMP("fpr_final =", &fpr_final, 2*len);
1086 * XXX fixme
1088 * A possible optimization would be to drop fpr_final and directly
1089 * use the storage from the saved context i.e., the actual final
1090 * destination (pt_regs, switch_stack or thread structure).
1092 setfpreg(ld.r1, &fpr_final[0], regs);
1093 setfpreg(ld.imm, &fpr_final[1], regs);
1097 * Check for updates: only immediate updates are available for this
1098 * instruction.
1100 if (ld.m) {
1102 * the immediate is implicit given the ldsz of the operation:
1103 * single: 8 (2x4) and for all others it's 16 (2x8)
1105 ifa += len<<1;
1108 * IMPORTANT:
1109 * the fact that we force the NaT of r3 to zero is ONLY valid
1110 * as long as we don't come here with a ldfpX.s.
1111 * For this reason we keep this sanity check
1113 if (ld.x6_op == 1 || ld.x6_op == 3)
1114 printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1115 <<<<<<< HEAD:arch/ia64/kernel/unaligned.c
1116 __FUNCTION__);
1117 =======
1118 __func__);
1119 >>>>>>> 264e3e889d86e552b4191d69bb60f4f3b383135a:arch/ia64/kernel/unaligned.c
1121 setreg(ld.r3, ifa, 0, regs);
1125 * Invalidate ALAT entries, if any, for both registers.
1127 if (ld.x6_op == 0x2) {
1128 invala_fr(ld.r1);
1129 invala_fr(ld.imm);
1131 return 0;
1135 static int
1136 emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1138 struct ia64_fpreg fpr_init;
1139 struct ia64_fpreg fpr_final;
1140 unsigned long len = float_fsz[ld.x6_sz];
1143 * fr0 & fr1 don't need to be checked because Illegal Instruction
1144 * faults have higher priority than unaligned faults.
1146 * r0 cannot be found as the base as it would never generate an
1147 * unaligned reference.
1151 * make sure we get clean buffers
1153 memset(&fpr_init,0, sizeof(fpr_init));
1154 memset(&fpr_final,0, sizeof(fpr_final));
1157 * ldfX.a we don't try to emulate anything but we must
1158 * invalidate the ALAT entry.
1159 * See comments in ldX for descriptions on how the various loads are handled.
1161 if (ld.x6_op != 0x2) {
1162 if (copy_from_user(&fpr_init, (void __user *) ifa, len))
1163 return -1;
1165 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1166 DDUMP("fpr_init =", &fpr_init, len);
1168 * we only do something for x6_op={0,8,9}
1170 switch( ld.x6_sz ) {
1171 case 0:
1172 mem2float_extended(&fpr_init, &fpr_final);
1173 break;
1174 case 1:
1175 mem2float_integer(&fpr_init, &fpr_final);
1176 break;
1177 case 2:
1178 mem2float_single(&fpr_init, &fpr_final);
1179 break;
1180 case 3:
1181 mem2float_double(&fpr_init, &fpr_final);
1182 break;
1184 DDUMP("fpr_final =", &fpr_final, len);
1186 * XXX fixme
1188 * A possible optimization would be to drop fpr_final and directly
1189 * use the storage from the saved context i.e., the actual final
1190 * destination (pt_regs, switch_stack or thread structure).
1192 setfpreg(ld.r1, &fpr_final, regs);
1196 * check for updates on any loads
1198 if (ld.op == 0x7 || ld.m)
1199 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1202 * invalidate ALAT entry in case of advanced floating point loads
1204 if (ld.x6_op == 0x2)
1205 invala_fr(ld.r1);
1207 return 0;
1211 static int
1212 emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1214 struct ia64_fpreg fpr_init;
1215 struct ia64_fpreg fpr_final;
1216 unsigned long len = float_fsz[ld.x6_sz];
1219 * make sure we get clean buffers
1221 memset(&fpr_init,0, sizeof(fpr_init));
1222 memset(&fpr_final,0, sizeof(fpr_final));
1225 * if we get to this handler, Nat bits on both r3 and r2 have already
1226 * been checked. so we don't need to do it
1228 * extract the value to be stored
1230 getfpreg(ld.imm, &fpr_init, regs);
1232 * during this step, we extract the spilled registers from the saved
1233 * context i.e., we refill. Then we store (no spill) to temporary
1234 * aligned location
1236 switch( ld.x6_sz ) {
1237 case 0:
1238 float2mem_extended(&fpr_init, &fpr_final);
1239 break;
1240 case 1:
1241 float2mem_integer(&fpr_init, &fpr_final);
1242 break;
1243 case 2:
1244 float2mem_single(&fpr_init, &fpr_final);
1245 break;
1246 case 3:
1247 float2mem_double(&fpr_init, &fpr_final);
1248 break;
1250 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1251 DDUMP("fpr_init =", &fpr_init, len);
1252 DDUMP("fpr_final =", &fpr_final, len);
1254 if (copy_to_user((void __user *) ifa, &fpr_final, len))
1255 return -1;
1258 * stfX [r3]=r2,imm(9)
1260 * NOTE:
1261 * ld.r3 can never be r0, because r0 would not generate an
1262 * unaligned access.
1264 if (ld.op == 0x7) {
1265 unsigned long imm;
1268 * form imm9: [12:6] contain first 7bits
1270 imm = ld.x << 7 | ld.r1;
1272 * sign extend (8bits) if m set
1274 if (ld.m)
1275 imm |= SIGN_EXT9;
1277 * ifa == r3 (NaT is necessarily cleared)
1279 ifa += imm;
1281 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1283 setreg(ld.r3, ifa, 0, regs);
1286 * we don't have alat_invalidate_multiple() so we need
1287 * to do the complete flush :-<<
1289 ia64_invala();
1291 return 0;
1295 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1296 * eventually fix the program. However, we don't want to do that for every access so we
1297 * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
1298 * either...
1300 static int
1301 within_logging_rate_limit (void)
1303 static unsigned long count, last_time;
1305 if (jiffies - last_time > 5*HZ)
1306 count = 0;
1307 if (count < 5) {
1308 last_time = jiffies;
1309 count++;
1310 return 1;
1312 return 0;
1316 void
1317 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1319 struct ia64_psr *ipsr = ia64_psr(regs);
1320 mm_segment_t old_fs = get_fs();
1321 unsigned long bundle[2];
1322 unsigned long opcode;
1323 struct siginfo si;
1324 const struct exception_table_entry *eh = NULL;
1325 union {
1326 unsigned long l;
1327 load_store_t insn;
1328 } u;
1329 int ret = -1;
1331 if (ia64_psr(regs)->be) {
1332 /* we don't support big-endian accesses */
1333 if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
1334 return;
1335 goto force_sigbus;
1339 * Treat kernel accesses for which there is an exception handler entry the same as
1340 * user-level unaligned accesses. Otherwise, a clever program could trick this
1341 * handler into reading an arbitrary kernel addresses...
1343 if (!user_mode(regs))
1344 eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
1345 if (user_mode(regs) || eh) {
1346 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1347 goto force_sigbus;
1349 if (!no_unaligned_warning &&
1350 !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
1351 within_logging_rate_limit())
1353 char buf[200]; /* comm[] is at most 16 bytes... */
1354 size_t len;
1356 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1357 "ip=0x%016lx\n\r", current->comm,
1358 task_pid_nr(current),
1359 ifa, regs->cr_iip + ipsr->ri);
1361 * Don't call tty_write_message() if we're in the kernel; we might
1362 * be holding locks...
1364 if (user_mode(regs))
1365 tty_write_message(current->signal->tty, buf);
1366 buf[len-1] = '\0'; /* drop '\r' */
1367 /* watch for command names containing %s */
1368 printk(KERN_WARNING "%s", buf);
1369 } else {
1370 if (no_unaligned_warning && !noprint_warning) {
1371 noprint_warning = 1;
1372 printk(KERN_WARNING "%s(%d) encountered an "
1373 "unaligned exception which required\n"
1374 "kernel assistance, which degrades "
1375 "the performance of the application.\n"
1376 "Unaligned exception warnings have "
1377 "been disabled by the system "
1378 "administrator\n"
1379 "echo 0 > /proc/sys/kernel/ignore-"
1380 "unaligned-usertrap to re-enable\n",
1381 current->comm, task_pid_nr(current));
1384 } else {
1385 if (within_logging_rate_limit())
1386 printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1387 ifa, regs->cr_iip + ipsr->ri);
1388 set_fs(KERNEL_DS);
1391 DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1392 regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1394 if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
1395 goto failure;
1398 * extract the instruction from the bundle given the slot number
1400 switch (ipsr->ri) {
1401 case 0: u.l = (bundle[0] >> 5); break;
1402 case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1403 case 2: u.l = (bundle[1] >> 23); break;
1405 opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1407 DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1408 "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1409 u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1412 * IMPORTANT:
1413 * Notice that the switch statement DOES not cover all possible instructions
1414 * that DO generate unaligned references. This is made on purpose because for some
1415 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1416 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1417 * the program will get a signal and die:
1419 * load/store:
1420 * - ldX.spill
1421 * - stX.spill
1422 * Reason: RNATs are based on addresses
1423 * - ld16
1424 * - st16
1425 * Reason: ld16 and st16 are supposed to occur in a single
1426 * memory op
1428 * synchronization:
1429 * - cmpxchg
1430 * - fetchadd
1431 * - xchg
1432 * Reason: ATOMIC operations cannot be emulated properly using multiple
1433 * instructions.
1435 * speculative loads:
1436 * - ldX.sZ
1437 * Reason: side effects, code must be ready to deal with failure so simpler
1438 * to let the load fail.
1439 * ---------------------------------------------------------------------------------
1440 * XXX fixme
1442 * I would like to get rid of this switch case and do something
1443 * more elegant.
1445 switch (opcode) {
1446 case LDS_OP:
1447 case LDSA_OP:
1448 if (u.insn.x)
1449 /* oops, really a semaphore op (cmpxchg, etc) */
1450 goto failure;
1451 /* no break */
1452 case LDS_IMM_OP:
1453 case LDSA_IMM_OP:
1454 case LDFS_OP:
1455 case LDFSA_OP:
1456 case LDFS_IMM_OP:
1458 * The instruction will be retried with deferred exceptions turned on, and
1459 * we should get Nat bit installed
1461 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1462 * are actually executed even though the operation failed. So we don't
1463 * need to take care of this.
1465 DPRINT("forcing PSR_ED\n");
1466 regs->cr_ipsr |= IA64_PSR_ED;
1467 goto done;
1469 case LD_OP:
1470 case LDA_OP:
1471 case LDBIAS_OP:
1472 case LDACQ_OP:
1473 case LDCCLR_OP:
1474 case LDCNC_OP:
1475 case LDCCLRACQ_OP:
1476 if (u.insn.x)
1477 /* oops, really a semaphore op (cmpxchg, etc) */
1478 goto failure;
1479 /* no break */
1480 case LD_IMM_OP:
1481 case LDA_IMM_OP:
1482 case LDBIAS_IMM_OP:
1483 case LDACQ_IMM_OP:
1484 case LDCCLR_IMM_OP:
1485 case LDCNC_IMM_OP:
1486 case LDCCLRACQ_IMM_OP:
1487 ret = emulate_load_int(ifa, u.insn, regs);
1488 break;
1490 case ST_OP:
1491 case STREL_OP:
1492 if (u.insn.x)
1493 /* oops, really a semaphore op (cmpxchg, etc) */
1494 goto failure;
1495 /* no break */
1496 case ST_IMM_OP:
1497 case STREL_IMM_OP:
1498 ret = emulate_store_int(ifa, u.insn, regs);
1499 break;
1501 case LDF_OP:
1502 case LDFA_OP:
1503 case LDFCCLR_OP:
1504 case LDFCNC_OP:
1505 if (u.insn.x)
1506 ret = emulate_load_floatpair(ifa, u.insn, regs);
1507 else
1508 ret = emulate_load_float(ifa, u.insn, regs);
1509 break;
1511 case LDF_IMM_OP:
1512 case LDFA_IMM_OP:
1513 case LDFCCLR_IMM_OP:
1514 case LDFCNC_IMM_OP:
1515 ret = emulate_load_float(ifa, u.insn, regs);
1516 break;
1518 case STF_OP:
1519 case STF_IMM_OP:
1520 ret = emulate_store_float(ifa, u.insn, regs);
1521 break;
1523 default:
1524 goto failure;
1526 DPRINT("ret=%d\n", ret);
1527 if (ret)
1528 goto failure;
1530 if (ipsr->ri == 2)
1532 * given today's architecture this case is not likely to happen because a
1533 * memory access instruction (M) can never be in the last slot of a
1534 * bundle. But let's keep it for now.
1536 regs->cr_iip += 16;
1537 ipsr->ri = (ipsr->ri + 1) & 0x3;
1539 DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1540 done:
1541 set_fs(old_fs); /* restore original address limit */
1542 return;
1544 failure:
1545 /* something went wrong... */
1546 if (!user_mode(regs)) {
1547 if (eh) {
1548 ia64_handle_exception(regs, eh);
1549 goto done;
1551 if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
1552 return;
1553 /* NOT_REACHED */
1555 force_sigbus:
1556 si.si_signo = SIGBUS;
1557 si.si_errno = 0;
1558 si.si_code = BUS_ADRALN;
1559 si.si_addr = (void __user *) ifa;
1560 si.si_flags = 0;
1561 si.si_isr = 0;
1562 si.si_imm = 0;
1563 force_sig_info(SIGBUS, &si, current);
1564 goto done;