4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
29 #pragma ident "%Z%%M% %I% %E% SMI"
38 #define ASM_LD(reg, symbol) \
39 sethi %hi(symbol), reg; \
40 ld [reg + %lo(symbol)], reg; \
42 #define ASM_LDX(reg, symbol) \
43 sethi %hi(symbol), reg; \
44 ldx [reg + %lo(symbol)], reg; \
46 #define ASM_JMP(reg, symbol) \
47 sethi %hi(symbol), reg; \
48 jmp reg + %lo(symbol); \
52 * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
53 * ptr is in the machcpu structure.
54 * off_reg: Register offset from 'cpu_private' ptr.
55 * scr1: Scratch, ptr is returned in this register.
57 * label: Label to branch to if cpu_private ptr is null/zero.
59 #define GET_CPU_PRIVATE_PTR(off_reg, scr1, scr2, label) \
60 CPU_ADDR(scr1, scr2); \
61 ldn [scr1 + CPU_PRIVATE], scr1; \
65 add scr1, off_reg, scr1
68 * Macro version of get_dcache_dtag. We use this macro in the
69 * CPU logout code. Since the Dcache is virtually indexed, only
70 * bits [12:5] of the AFAR can be used so we need to search through
71 * 8 indexes (4 ways + bit 13) in order to find the tag we want.
72 * afar: input AFAR, not modified.
73 * datap: input ptr to ch_dc_data_t, at end pts to end of ch_dc_data_t.
75 * scr2: scratch, will hold tag to look for.
76 * scr3: used for Dcache index, loops through 4 ways.
78 #define GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3) \
79 set CH_DCACHE_IDX_MASK, scr3; \
80 and afar, scr3, scr3; \
81 srlx afar, CH_DCTAG_PA_SHIFT, scr2; \
83 or scr2, CH_DCTAG_VALID_BIT, scr2; /* tag we want */ \
86 ldxa [scr3]ASI_DC_TAG, scr1; /* read tag */ \
88 bne 4f; /* not found? */ \
90 stxa scr3, [datap + CH_DC_IDX]%asi; /* store index */ \
91 stxa scr1, [datap + CH_DC_TAG]%asi; /* store tag */ \
92 membar #Sync; /* Cheetah PRM 10.6.3 */ \
93 ldxa [scr3]ASI_DC_UTAG, scr1; /* read utag */ \
94 membar #Sync; /* Cheetah PRM 10.6.3 */ \
95 stxa scr1, [datap + CH_DC_UTAG]%asi; \
96 ldxa [scr3]ASI_DC_SNP_TAG, scr1; /* read snoop tag */ \
97 stxa scr1, [datap + CH_DC_SNTAG]%asi; \
98 add datap, CH_DC_DATA, datap; \
101 membar #Sync; /* Cheetah PRM 10.6.1 */ \
102 ldxa [scr3 + scr2]ASI_DC_DATA, scr1; /* read data */ \
103 membar #Sync; /* Cheetah PRM 10.6.1 */ \
104 stxa scr1, [datap]%asi; \
105 add datap, 8, datap; \
106 cmp scr2, CH_DC_DATA_REG_SIZE - 8; \
110 GET_CPU_IMPL(scr2); /* Parity bits are elsewhere for */ \
111 cmp scr2, PANTHER_IMPL; /* panther processors. */ \
112 bne,a 5f; /* Done if not panther. */ \
113 add datap, 8, datap; /* Skip to the end of the struct. */ \
115 add datap, 7, datap; /* offset of the last parity byte */ \
117 sll scr1, PN_DC_DATA_PARITY_BIT_SHIFT, scr1; \
118 or scr3, scr1, scr3; /* add DC_data_parity bit to index */ \
120 membar #Sync; /* Cheetah PRM 10.6.1 */ \
121 ldxa [scr3 + scr2]ASI_DC_DATA, scr1; /* read parity bits */ \
122 membar #Sync; /* Cheetah PRM 10.6.1 */ \
123 stba scr1, [datap]%asi; \
125 cmp scr2, CH_DC_DATA_REG_SIZE - 8; \
129 add datap, 5, datap; /* set pointer to end of our struct */ \
131 set CH_DCACHE_IDX_INCR, scr1; /* incr. idx (scr3) */ \
132 add scr3, scr1, scr3; \
133 set CH_DCACHE_IDX_LIMIT, scr1; /* done? */ \
137 add datap, CH_DC_DATA_SIZE, datap; \
141 * Macro version of get_icache_dtag. We use this macro in the CPU
142 * logout code. If the Icache is on, we don't want to capture the data.
143 * afar: input AFAR, not modified.
144 * datap: input ptr to ch_ic_data_t, at end pts to end of ch_ic_data_t.
146 * scr2: scratch, will hold tag to look for.
147 * scr3: used for Icache index, loops through 4 ways.
148 * Note: For Panther, the Icache is virtually indexed and increases in
149 * size to 64KB (instead of 32KB) with a line size of 64 bytes (instead
150 * of 32). This means the IC_addr index bits[14:7] for Panther now
151 * correspond to VA bits[13:6]. But since it is virtually indexed, we
152 * still mask out only bits[12:5] from the AFAR (we have to manually
153 * check bit 13). In order to make this code work for all processors,
154 * we end up checking twice as many indexes (8 instead of 4) as required
155 * for non-Panther CPUs and saving off twice as much data (16 instructions
156 * instead of just 8).
158 #define GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3) \
159 ldxa [%g0]ASI_DCU, scr1; \
160 btst DCU_IC, scr1; /* is Icache enabled? */ \
161 bne,a 6f; /* yes, don't capture */ \
162 add datap, CH_IC_DATA_SIZE, datap; /* anul if no branch */ \
163 GET_CPU_IMPL(scr2); /* Panther only uses VA[13:6] */ \
164 cmp scr2, PANTHER_IMPL; /* and we also want to mask */ \
165 be 1f; /* out bit 13 since the */ \
166 nop; /* Panther I$ is VIPT. */ \
167 set CH_ICACHE_IDX_MASK, scr3; \
171 set PN_ICACHE_VA_IDX_MASK, scr3; \
173 and afar, scr3, scr3; \
174 sllx scr3, CH_ICACHE_IDX_SHIFT, scr3; \
175 srlx afar, CH_ICPATAG_SHIFT, scr2; /* pa tag we want */ \
176 andn scr2, CH_ICPATAG_LBITS, scr2; /* mask off lower */ \
181 ldxa [scr3]ASI_IC_TAG, scr1; /* read pa tag */ \
182 andn scr1, CH_ICPATAG_LBITS, scr1; /* mask off lower */ \
184 bne 5f; /* not found? */ \
186 stxa scr3, [datap + CH_IC_IDX]%asi; /* store index */ \
187 stxa scr1, [datap + CH_IC_PATAG]%asi; /* store pa tag */ \
188 add scr3, CH_ICTAG_UTAG, scr3; /* read utag */ \
189 ldxa [scr3]ASI_IC_TAG, scr1; \
190 add scr3, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), scr3; \
191 stxa scr1, [datap + CH_IC_UTAG]%asi; \
192 ldxa [scr3]ASI_IC_TAG, scr1; /* read upper tag */ \
193 add scr3, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), scr3; \
194 stxa scr1, [datap + CH_IC_UPPER]%asi; \
195 ldxa [scr3]ASI_IC_TAG, scr1; /* read lower tag */ \
196 andn scr3, CH_ICTAG_TMASK, scr3; \
197 stxa scr1, [datap + CH_IC_LOWER]%asi; \
198 ldxa [scr3]ASI_IC_SNP_TAG, scr1; /* read snoop tag */ \
199 stxa scr1, [datap + CH_IC_SNTAG]%asi; \
200 add datap, CH_IC_DATA, datap; \
203 ldxa [scr3 + scr2]ASI_IC_DATA, scr1; /* read ins. data */ \
204 stxa scr1, [datap]%asi; \
205 add datap, 8, datap; \
206 cmp scr2, PN_IC_DATA_REG_SIZE - 8; \
212 set CH_ICACHE_IDX_INCR, scr1; /* incr. idx (scr3) */ \
213 add scr3, scr1, scr3; \
214 set PN_ICACHE_IDX_LIMIT, scr1; /* done? */ \
218 add datap, CH_IC_DATA_SIZE, datap; \
221 #if defined(JALAPENO) || defined(SERRANO)
223 * Macro version of get_ecache_dtag. We use this macro in the
225 * afar: input AFAR, not modified
226 * datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t.
227 * ec_way: Constant value (way number)
232 #define GET_ECACHE_DTAG(afar, datap, ec_way, scr1, scr2, scr3) \
234 and scr1, JP_ECACHE_NWAY - 1, scr1; /* mask E$ way bits */ \
235 sllx scr1, JP_EC_TAG_DATA_WAY_SHIFT, scr1; \
236 set ((JP_ECACHE_MAX_SIZE / JP_ECACHE_NWAY) - 1), scr2; \
237 and afar, scr2, scr3; /* get set offset */ \
238 andn scr3, (JP_ECACHE_MAX_LSIZE - 1), scr3; /* VA<5:0>=0 */ \
239 or scr3, scr1, scr3; /* or WAY bits */ \
241 stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \
244 JP_EC_DIAG_ACCESS_MEMBAR; \
245 ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \
246 JP_EC_DIAG_ACCESS_MEMBAR; \
247 stxa scr1, [datap + CH_EC_TAG]%asi; \
248 add datap, CH_EC_DATA, datap; \
250 ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \
252 3: /* loop thru 5 regs */ \
253 ldxa [scr1]ASI_EC_DATA, scr2; \
254 stxa scr2, [datap]%asi; \
255 add datap, 8, datap; \
256 cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \
259 btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \
261 add scr3, CH_ECACHE_STGREG_SIZE, scr3
263 #define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
264 GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
265 GET_ECACHE_DTAG(afar, datap, 1, scr1, scr2, scr3); \
266 GET_ECACHE_DTAG(afar, datap, 2, scr1, scr2, scr3); \
267 GET_ECACHE_DTAG(afar, datap, 3, scr1, scr2, scr3); \
268 add datap, (CHD_EC_DATA_SETS-4)*CH_EC_DATA_SIZE, datap; \
269 add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \
272 * Jalapeno does not have cores so these macros are null.
274 #define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
275 #define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
277 #if defined(JALAPENO)
279 * Jalapeno gets primary AFSR and AFAR. All bits in the AFSR except
280 * the fatal error bits are cleared.
281 * datap: pointer to cpu logout structure.
282 * afar: returned primary AFAR value.
286 #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
287 ldxa [%g0]ASI_AFAR, afar; \
288 stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
289 ldxa [%g0]ASI_AFSR, scr2; \
290 stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
291 sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
292 sllx scr1, 32, scr1; \
293 bclr scr1, scr2; /* Clear fatal error bits here, so */ \
294 stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
298 * Jalapeno has no shadow AFAR, null operation.
300 #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3)
302 #elif defined(SERRANO)
304 * Serrano gets primary AFSR and AFAR. All bits in the AFSR except
305 * the fatal error bits are cleared. For Serrano, we also save the
307 * datap: pointer to cpu logout structure.
308 * afar: returned primary AFAR value.
312 #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
313 set ASI_MCU_AFAR2_VA, scr1; \
314 ldxa [scr1]ASI_MCU_CTRL, afar; \
315 stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR2)]%asi; \
316 ldxa [%g0]ASI_AFAR, afar; \
317 stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
318 ldxa [%g0]ASI_AFSR, scr2; \
319 stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
320 sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
321 sllx scr1, 32, scr1; \
322 bclr scr1, scr2; /* Clear fatal error bits here, so */ \
323 stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
327 * Serrano needs to capture E$, D$ and I$ lines associated with afar2.
328 * afar: scratch, holds afar2.
329 * datap: pointer to cpu logout structure
334 #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) \
335 ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR2)]%asi, afar; \
336 add datap, CH_CLO_SDW_DATA + CH_CHD_EC_DATA, datap; \
337 GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
338 GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \
339 GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \
340 sub datap, CH_CPU_LOGOUT_SIZE, datap
343 #elif defined(CHEETAH_PLUS)
345 * Macro version of get_ecache_dtag. We use this macro in the
347 * afar: input AFAR, not modified.
348 * datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t.
349 * pn_way: ecache way for panther (value = 0-3). For non-panther
350 * cpus, this macro will be called with pn_way = 0.
355 #define GET_ECACHE_DTAG(afar, datap, pn_way, scr1, scr2, scr3) \
357 andn scr3, (CH_ECACHE_SUBBLK_SIZE - 1), scr3; /* VA<5:0>=0 */\
358 set (CH_ECACHE_8M_SIZE - 1), scr2; \
359 and scr3, scr2, scr3; /* VA<63:23>=0 */ \
360 mov pn_way, scr1; /* panther L3$ is 4-way so we ... */ \
361 sllx scr1, PN_L3_WAY_SHIFT, scr1; /* need to mask... */ \
362 or scr3, scr1, scr3; /* in the way bits <24:23>. */ \
364 stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \
367 ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \
368 stxa scr1, [datap + CH_EC_TAG]%asi; \
369 set CHP_ECACHE_IDX_TAG_ECC, scr1; \
370 or scr3, scr1, scr1; \
371 ldxa [scr1]ASI_EC_DIAG, scr1; /* get E$ tag ECC */ \
372 stxa scr1, [datap + CH_EC_TAG_ECC]%asi; \
373 add datap, CH_EC_DATA, datap; \
375 ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \
377 3: /* loop thru 5 regs */ \
378 ldxa [scr1]ASI_EC_DATA, scr2; \
379 stxa scr2, [datap]%asi; \
380 add datap, 8, datap; \
381 cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \
384 btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \
386 add scr3, CH_ECACHE_STGREG_SIZE, scr3
389 * If this is a panther, we need to make sure the sibling core is
390 * parked so that we avoid any race conditions during diagnostic
391 * accesses to the shared L2 and L3 caches.
392 * dcucr_reg: This register will be used to keep track of whether
393 * or not we need to unpark the core later.
394 * It just so happens that we also use this same register
395 * to keep track of our saved DCUCR value so we only touch
396 * bit 4 of the register (which is a "reserved" bit in the
397 * DCUCR) for keeping track of core parking.
398 * scr1: Scratch register.
399 * scr2: Scratch register.
401 #define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2) \
402 GET_CPU_IMPL(scr1); \
403 cmp scr1, PANTHER_IMPL; /* only park for panthers */ \
405 andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
406 set ASI_CORE_RUNNING_STATUS, scr1; /* check other core */ \
407 ldxa [scr1]ASI_CMP_SHARED, scr2; /* is it running? */ \
408 cmp scr2, PN_BOTH_CORES_RUNNING; \
409 bne,a %xcc, 2f; /* if not running, we are done */ \
410 andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
411 or dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
412 set ASI_CORE_ID, scr1; \
413 ldxa [scr1]ASI_CMP_PER_CORE, scr2; \
414 and scr2, COREID_MASK, scr2; \
415 or %g0, 1, scr1; /* find out which core... */ \
416 sll scr1, scr2, scr2; /* ... we need to park... */ \
418 set ASI_CORE_RUNNING_RW, scr1; \
419 ldxa [scr1]ASI_CMP_SHARED, scr1; /* ...but are we? */ \
420 btst scr1, scr2; /* check our own parked status */ \
421 bz %xcc, 1b; /* if we are then go round again */ \
423 set ASI_CORE_RUNNING_RW, scr1; /* else proceed... */ \
424 stxa scr2, [scr1]ASI_CMP_SHARED; /* ... and park it. */ \
426 set ASI_CORE_RUNNING_STATUS, scr1; /* spin until... */ \
427 ldxa [scr1]ASI_CMP_SHARED, scr1; /* ... the other... */ \
428 cmp scr1, scr2; /* ...core is parked according to... */ \
429 bne,a %xcc, 1b; /* ...the core running status reg. */ \
434 * The core running this code will unpark its sibling core if the
435 * sibling core had been parked by the current core earlier in this
437 * dcucr_reg: This register is used to keep track of whether or not
438 * we need to unpark our sibling core.
439 * It just so happens that we also use this same register
440 * to keep track of our saved DCUCR value so we only touch
441 * bit 4 of the register (which is a "reserved" bit in the
442 * DCUCR) for keeping track of core parking.
443 * scr1: Scratch register.
444 * scr2: Scratch register.
446 #define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2) \
447 btst PN_PARKED_OTHER_CORE, dcucr_reg; \
448 bz,pt %xcc, 1f; /* if nothing to unpark, we are done */ \
449 andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \
450 set ASI_CORE_RUNNING_RW, scr1; \
451 set PN_BOTH_CORES_RUNNING, scr2; /* we want both... */ \
452 stxa scr2, [scr1]ASI_CMP_SHARED; /* ...cores running. */ \
457 * Cheetah+ and Jaguar get both primary and secondary AFSR/AFAR. All bits
458 * in the primary AFSR are cleared except the fatal error bits. For Panther,
459 * we also have to read and clear the AFSR_EXT, again leaving the fatal
461 * datap: pointer to cpu logout structure.
462 * afar: returned primary AFAR value.
466 #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
467 set ASI_SHADOW_REG_VA, scr1; \
468 ldxa [scr1]ASI_AFAR, scr2; \
469 stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFAR)]%asi; \
470 ldxa [scr1]ASI_AFSR, scr2; \
471 stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFSR)]%asi; \
472 ldxa [%g0]ASI_AFAR, afar; \
473 stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
474 ldxa [%g0]ASI_AFSR, scr2; \
475 stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
476 sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
477 sllx scr1, 32, scr1; \
478 bclr scr1, scr2; /* Clear fatal error bits here, so */ \
479 stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
481 GET_CPU_IMPL(scr1); \
482 cmp scr1, PANTHER_IMPL; \
485 set ASI_SHADOW_AFSR_EXT_VA, scr1; /* shadow AFSR_EXT */ \
486 ldxa [scr1]ASI_AFSR, scr2; \
487 stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFSR_EXT)]%asi; \
488 set ASI_AFSR_EXT_VA, scr1; /* primary AFSR_EXT */ \
489 ldxa [scr1]ASI_AFSR, scr2; \
490 stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR_EXT)]%asi; \
491 set C_AFSR_EXT_FATAL_ERRS, scr1; \
492 bclr scr1, scr2; /* Clear fatal error bits here, */ \
493 set ASI_AFSR_EXT_VA, scr1; /* so they're left */ \
494 stxa scr2, [scr1]ASI_AFSR; /* as is in AFSR_EXT */ \
499 * This macro is used in the CPU logout code to capture diagnostic
500 * information from the L2 cache on panther processors.
501 * afar: input AFAR, not modified.
502 * datap: Ptr to pn_l2_data_t, at end pts just past pn_l2_data_t.
507 #define GET_PN_L2_CACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
509 set PN_L2_INDEX_MASK, scr1; \
510 and scr3, scr1, scr3; \
511 b 1f; /* code to read tags and data should be ... */ \
512 nop; /* ...on the same cache line if possible. */ \
513 .align 128; /* update this line if you add lines below. */ \
515 stxa scr3, [datap + CH_EC_IDX]%asi; /* store L2$ index */ \
516 ldxa [scr3]ASI_L2_TAG, scr1; /* read the L2$ tag */ \
517 stxa scr1, [datap + CH_EC_TAG]%asi; \
518 add datap, CH_EC_DATA, datap; \
521 ldxa [scr3 + scr1]ASI_L2_DATA, scr2; /* loop through */ \
522 stxa scr2, [datap]%asi; /* <511:256> of L2 */ \
523 add datap, 8, datap; /* data and record */ \
524 cmp scr1, (PN_L2_LINESIZE / 2) - 8; /* it in the cpu */ \
525 bne 2b; /* logout struct. */ \
527 set PN_L2_DATA_ECC_SEL, scr2; /* ECC_sel bit. */ \
528 ldxa [scr3 + scr2]ASI_L2_DATA, scr2; /* Read and record */ \
529 stxa scr2, [datap]%asi; /* ecc of <511:256> */ \
530 add datap, 8, datap; \
532 ldxa [scr3 + scr1]ASI_L2_DATA, scr2; /* loop through */ \
533 stxa scr2, [datap]%asi; /* <255:0> of L2 */ \
534 add datap, 8, datap; /* data and record */ \
535 cmp scr1, PN_L2_LINESIZE - 8; /* it in the cpu */ \
536 bne 3b; /* logout struct. */ \
538 set PN_L2_DATA_ECC_SEL, scr2; /* ECC_sel bit. */ \
539 add scr2, PN_L2_ECC_LO_REG, scr2; \
540 ldxa [scr3 + scr2]ASI_L2_DATA, scr2; /* Read and record */ \
541 stxa scr2, [datap]%asi; /* ecc of <255:0>. */ \
542 add datap, 8, datap; /* Advance pointer */ \
543 set PN_L2_SET_SIZE, scr2; \
544 set PN_L2_MAX_SET, scr1; \
545 cmp scr1, scr3; /* more ways to try for this line? */ \
546 bg,a %xcc, 1b; /* if so, start over with next way */ \
550 * Cheetah+ assumes E$ is 2-way and grabs both E$ lines associated with afar.
551 * afar: AFAR from access.
552 * datap: pointer to cpu logout structure.
557 #define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
558 GET_CPU_IMPL(scr1); \
559 cmp scr1, PANTHER_IMPL; \
562 GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
563 GET_ECACHE_DTAG(afar, datap, 1, scr1, scr2, scr3); \
564 GET_ECACHE_DTAG(afar, datap, 2, scr1, scr2, scr3); \
565 GET_ECACHE_DTAG(afar, datap, 3, scr1, scr2, scr3); \
566 add datap, (CHD_EC_DATA_SETS-4)*CH_EC_DATA_SIZE, datap; \
567 GET_PN_L2_CACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
571 GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
572 GET_ECACHE_WAY_BIT(scr1, scr2); \
573 xor afar, scr1, afar; \
574 GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \
575 GET_ECACHE_WAY_BIT(scr1, scr2); /* restore AFAR */ \
576 xor afar, scr1, afar; \
577 add datap, (CHD_EC_DATA_SETS-2)*CH_EC_DATA_SIZE, datap; \
578 add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \
582 * Cheetah+ needs to capture E$, D$ and I$ lines associated with
584 * afar: scratch, holds shadow afar.
585 * datap: pointer to cpu logout structure
590 #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) \
591 ldxa [datap + (CH_CLO_SDW_DATA + CH_CHD_AFAR)]%asi, afar; \
592 add datap, CH_CLO_SDW_DATA + CH_CHD_EC_DATA, datap; \
593 GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
594 GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \
595 GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \
596 sub datap, CH_CPU_LOGOUT_SIZE, datap
599 * Compute the "Way" bit for 2-way Ecache for Cheetah+.
601 #define GET_ECACHE_WAY_BIT(scr1, scr2) \
602 CPU_INDEX(scr1, scr2); \
603 mulx scr1, CPU_NODE_SIZE, scr1; \
604 add scr1, ECACHE_SIZE, scr1; \
605 set cpunodes, scr2; \
606 ld [scr1 + scr2], scr1; \
609 #else /* CHEETAH_PLUS */
611 * Macro version of get_ecache_dtag. We use this macro in the
613 * afar: input AFAR, not modified.
614 * datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t.
619 #define GET_ECACHE_DTAG(afar, datap, scr1, scr2, scr3) \
621 andn scr3, (CH_ECACHE_SUBBLK_SIZE - 1), scr3; /* VA<5:0>=0 */\
622 set (CH_ECACHE_8M_SIZE - 1), scr2; \
623 and scr3, scr2, scr3; /* VA<63:23>=0 */ \
625 stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \
628 ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \
629 stxa scr1, [datap + CH_EC_TAG]%asi; \
630 add datap, CH_EC_DATA, datap; \
632 ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \
634 3: /* loop thru 5 regs */ \
635 ldxa [scr1]ASI_EC_DATA, scr2; \
636 stxa scr2, [datap]%asi; \
637 add datap, 8, datap; \
638 cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \
641 btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \
643 add scr3, CH_ECACHE_STGREG_SIZE, scr3
646 * Cheetah does not have cores so these macros are null.
648 #define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
649 #define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2)
652 * Cheetah gets primary AFSR and AFAR and clears the AFSR, except for the
654 * datap: pointer to cpu logout structure.
655 * afar: returned primary AFAR value.
659 #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \
660 ldxa [%g0]ASI_AFAR, afar; \
661 stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \
662 ldxa [%g0]ASI_AFSR, scr2; \
663 stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \
664 sethi %hh(C_AFSR_FATAL_ERRS), scr1; \
665 sllx scr1, 32, scr1; \
666 bclr scr1, scr2; /* Clear fatal error bits here, so */ \
667 stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \
671 * Cheetah E$ is direct-mapped, so we grab line data and skip second line.
672 * afar: AFAR from access.
673 * datap: pointer to cpu logout structure.
678 #define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \
679 GET_ECACHE_DTAG(afar, datap, scr1, scr2, scr3); \
680 add datap, (CHD_EC_DATA_SETS-1)*CH_EC_DATA_SIZE, datap; \
681 add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \
684 * Cheetah has no shadow AFAR, null operation.
686 #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3)
688 #endif /* CHEETAH_PLUS */
691 * Cheetah/(Cheetah+ Jaguar Panther)/Jalapeno Macro for capturing CPU
692 * logout data at TL>0. r_val is a register that returns the "failure count"
693 * to the caller, and may be used as a scratch register until the end of
694 * the macro. afar is used to return the primary AFAR value to the caller
695 * and it too can be used as a scratch register until the end. r_or_s is
696 * a reg or symbol that has the offset within the "cpu_private" data area
697 * to deposit the logout data. t_flags is a register that has the
698 * trap-type/trap-level/CEEN info. This t_flags register may be used after
699 * the GET_AFSR_AFAR macro.
701 * The CPU logout operation will fail (r_val > 0) if the logout
702 * structure in question is already being used. Otherwise, the CPU
703 * logout operation will succeed (r_val = 0). For failures, r_val
704 * returns the busy count (# of times we tried using this CPU logout
705 * structure when it was busy.)
708 * %asi: Must be set to either ASI_MEM if the address in datap
709 * is a physical address or to ASI_N if the address in
710 * datap is a virtual address.
711 * r_val: This register is the return value which tells the
712 * caller whether or not the LOGOUT operation was successful.
713 * For failures, r_val returns the fail count (i.e. number of
714 * times we have tried to use this logout structure when it was
715 * already being used.
716 * afar: output: contains AFAR on exit
717 * t_flags: input trap type info, may be used as scratch after stored
718 * to cpu log out structure.
719 * datap: Points to log out data area.
721 * scr2: Scratch (may be r_val)
722 * scr3: Scratch (may be t_flags)
724 #define DO_TL1_CPU_LOGOUT(r_val, afar, t_flags, datap, scr1, scr2, scr3) \
725 setx LOGOUT_INVALID, scr2, scr1; \
726 ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi, scr2; \
730 stxa t_flags, [datap + CH_CLO_FLAGS]%asi; \
731 GET_AFSR_AFAR(datap, afar, scr1, scr2); \
732 add datap, CH_CLO_DATA + CH_CHD_EC_DATA, datap; \
733 GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \
734 GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \
735 GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \
736 sub datap, CH_CLO_DATA + CH_DIAG_DATA_SIZE, datap; \
737 GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3); \
738 ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi, afar; \
739 set 0, r_val; /* return value for success */ \
743 ldxa [%g0]ASI_AFAR, afar; \
744 ldxa [datap + CH_CLO_NEST_CNT]%asi, r_val; \
745 inc r_val; /* return value for failure */ \
746 stxa r_val, [datap + CH_CLO_NEST_CNT]%asi; \
751 * Cheetah/(Cheetah+ Jaguar Panther)/Jalapeno Macro for capturing CPU
752 * logout data. Uses DO_TL1_CPU_LOGOUT macro defined above, and sets
753 * up the expected data pointer in the scr1 register and sets the %asi
754 * register to ASI_N for kernel virtual addresses instead of ASI_MEM as
757 * The CPU logout operation will fail (r_val > 0) if the logout
758 * structure in question is already being used. Otherwise, the CPU
759 * logout operation will succeed (r_val = 0). For failures, r_val
760 * returns the busy count (# of times we tried using this CPU logout
761 * structure when it was busy.)
764 * r_val: This register is the return value which tells the
765 * caller whether or not the LOGOUT operation was successful.
766 * For failures, r_val returns the fail count (i.e. number of
767 * times we have tried to use this logout structure when it was
768 * already being used.
769 * afar: returns AFAR, used internally as afar value.
770 * output: if the cpu_private struct has not been initialized,
771 * then we return the t_flags value listed below.
772 * r_or_s: input offset, either register or constant (symbol). It's
773 * OK for r_or_s to be a register as long as it's not scr1 or
775 * t_flags: input trap type info, may be used as scratch after stored
776 * to cpu log out structure.
777 * scr1: Scratch, points to log out data area.
778 * scr2: Scratch (may be r_or_s)
779 * scr3: Scratch (may be r_val)
780 * scr4: Scratch (may be t_flags)
782 #define DO_CPU_LOGOUT(r_val, afar, r_or_s, t_flags, scr1, scr2, scr3, scr4) \
783 GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr3, 7f); /* can't use scr2/4 */ \
784 wr %g0, ASI_N, %asi; \
785 DO_TL1_CPU_LOGOUT(r_val, afar, t_flags, scr1, scr2, scr3, scr4) \
789 mov t_flags, afar; /* depends on afar = %g2 */ \
790 set 0, r_val; /* success in this case. */ \
794 * The P$ is flushed as a side effect of writing to the Primary
795 * or Secondary Context Register. After writing to a context
796 * register, every line of the P$ in the Valid state is invalidated,
797 * regardless of which context it belongs to.
798 * This routine simply touches the Primary context register by
799 * reading the current value and writing it back. The Primary
800 * context is not changed.
802 #define PCACHE_FLUSHALL(tmp1, tmp2, tmp3) \
803 sethi %hi(FLUSH_ADDR), tmp1 ;\
804 set MMU_PCONTEXT, tmp2 ;\
805 ldxa [tmp2]ASI_DMMU, tmp3 ;\
806 stxa tmp3, [tmp2]ASI_DMMU ;\
807 flush tmp1 /* See Cheetah PRM 8.10.2 */
810 * Macro that flushes the entire Dcache.
813 * arg2 = dcache linesize
815 #define CH_DCACHE_FLUSHALL(arg1, arg2, tmp1) \
816 sub arg1, arg2, tmp1; \
818 stxa %g0, [tmp1]ASI_DC_TAG; \
822 sub tmp1, arg2, tmp1;
825 * Macro that flushes the entire Icache.
827 * Note that we cannot access ASI 0x67 (ASI_IC_TAG) with the Icache on,
828 * because accesses to ASI 0x67 interfere with Icache coherency. We
829 * must make sure the Icache is off, then turn it back on after the entire
830 * cache has been invalidated. If the Icache is originally off, we'll just
831 * clear the tags but not turn the Icache on.
834 * arg2 = icache linesize
836 #define CH_ICACHE_FLUSHALL(arg1, arg2, tmp1, tmp2) \
837 ldxa [%g0]ASI_DCU, tmp2; \
838 andn tmp2, DCU_IC, tmp1; \
839 stxa tmp1, [%g0]ASI_DCU; \
840 flush %g0; /* flush required after changing the IC bit */ \
841 sllx arg2, 1, arg2; /* arg2 = linesize * 2 */ \
842 sllx arg1, 1, arg1; /* arg1 = size * 2 */ \
843 sub arg1, arg2, arg1; \
844 or arg1, CH_ICTAG_LOWER, arg1; /* "write" tag */ \
846 stxa %g0, [arg1]ASI_IC_TAG; \
847 membar #Sync; /* Cheetah PRM 8.9.3 */ \
848 cmp arg1, CH_ICTAG_LOWER; \
850 sub arg1, arg2, arg1; \
851 stxa tmp2, [%g0]ASI_DCU; \
852 flush %g0; /* flush required after changing the IC bit */
855 #if defined(JALAPENO) || defined(SERRANO)
858 * ASI access to the L2 tag or L2 flush can hang the cpu when interacting
859 * with combinations of L2 snoops, victims and stores.
861 * A possible workaround is to surround each L2 ASI access with membars
862 * and make sure that the code is hitting in the Icache. This requires
863 * aligning code sequence at E$ boundary and forcing I$ fetch by
864 * jumping to selected offsets so that we don't take any I$ misses
865 * during ASI access to the L2 tag or L2 flush. This also requires
866 * making sure that we don't take any interrupts or traps (such as
867 * fast ECC trap, I$/D$ tag parity error) which can result in eviction
868 * of this code sequence from I$, thus causing a miss.
870 * Because of the complexity/risk, we have decided to do a partial fix
871 * of adding membar around each ASI access to the L2 tag or L2 flush.
874 #define JP_EC_DIAG_ACCESS_MEMBAR \
878 * Jalapeno version of macro that flushes the entire Ecache.
880 * Uses Jalapeno displacement flush feature of ASI_EC_DIAG.
883 * arg2 = ecache linesize - not modified; can be an immediate constant.
885 #define ECACHE_FLUSHALL(arg1, arg2, tmp1, tmp2) \
886 CPU_INDEX(tmp1, tmp2); \
887 set JP_ECACHE_IDX_DISP_FLUSH, tmp2; \
888 sllx tmp1, JP_ECFLUSH_PORTID_SHIFT, tmp1; \
889 or tmp1, tmp2, tmp1; \
890 srlx arg1, JP_EC_TO_SET_SIZE_SHIFT, tmp2; \
892 subcc tmp2, arg2, tmp2; \
893 JP_EC_DIAG_ACCESS_MEMBAR; \
894 ldxa [tmp1 + tmp2]ASI_EC_DIAG, %g0; \
895 JP_EC_DIAG_ACCESS_MEMBAR; \
899 sllx tmp2, JP_ECFLUSH_EC_WAY_SHIFT, tmp2; \
900 add tmp1, tmp2, tmp1; \
901 mov (JP_ECACHE_NWAY-1), tmp2; \
902 sllx tmp2, JP_ECFLUSH_EC_WAY_SHIFT, tmp2; \
903 andcc tmp1, tmp2, tmp2; \
905 srlx arg1, JP_EC_TO_SET_SIZE_SHIFT, tmp2
907 #else /* JALAPENO || SERRANO */
910 * Cheetah version of macro that flushes the entire Ecache.
912 * Need to displacement flush 2x ecache size from Ecache flush area.
915 * arg2 = ecache linesize
916 * arg3 = ecache flush address - for cheetah only
918 #define CH_ECACHE_FLUSHALL(arg1, arg2, arg3) \
919 sllx arg1, 1, arg1; \
921 subcc arg1, arg2, arg1; \
923 ldxa [arg1 + arg3]ASI_MEM, %g0;
926 * Cheetah+ version of macro that flushes the entire Ecache.
928 * Uses the displacement flush feature.
931 * arg2 = ecache linesize
932 * impl = CPU implementation as returned from GET_CPU_IMPL()
933 * The value in this register is destroyed during execution
936 #if defined(CHEETAH_PLUS)
937 #define CHP_ECACHE_FLUSHALL(arg1, arg2, impl) \
938 cmp impl, PANTHER_IMPL; \
941 set PN_L3_IDX_DISP_FLUSH, impl; \
945 set CHP_ECACHE_IDX_DISP_FLUSH, impl; \
947 subcc arg1, arg2, arg1; \
949 ldxa [arg1 + impl]ASI_EC_DIAG, %g0;
950 #else /* CHEETAH_PLUS */
951 #define CHP_ECACHE_FLUSHALL(arg1, arg2, impl)
952 #endif /* CHEETAH_PLUS */
955 * Macro that flushes the entire Ecache.
958 * arg2 = ecache linesize
959 * arg3 = ecache flush address - for cheetah only
961 #define ECACHE_FLUSHALL(arg1, arg2, arg3, tmp1) \
962 GET_CPU_IMPL(tmp1); \
963 cmp tmp1, CHEETAH_IMPL; \
966 CH_ECACHE_FLUSHALL(arg1, arg2, arg3); \
970 CHP_ECACHE_FLUSHALL(arg1, arg2, tmp1); \
973 #endif /* JALAPENO || SERRANO */
976 * Macro that flushes the Panther L2 cache.
978 #if defined(CHEETAH_PLUS)
979 #define PN_L2_FLUSHALL(scr1, scr2, scr3) \
980 GET_CPU_IMPL(scr3); \
981 cmp scr3, PANTHER_IMPL; \
984 set PN_L2_SIZE, scr1; \
985 set PN_L2_LINESIZE, scr2; \
986 set PN_L2_IDX_DISP_FLUSH, scr3; \
988 subcc scr1, scr2, scr1; \
990 ldxa [scr1 + scr3]ASI_L2_TAG, %g0; \
992 #else /* CHEETAH_PLUS */
993 #define PN_L2_FLUSHALL(scr1, scr2, scr3)
994 #endif /* CHEETAH_PLUS */
997 * Given a VA and page size (page size as encoded in ASI_MMU_TAG_ACCESS_EXT),
998 * this macro returns the TLB index for that mapping based on a 512 entry
999 * (2-way set associative) TLB. Aaside from the 16 entry fully associative
1000 * TLBs, all TLBs in Panther are 512 entry, 2-way set associative.
1002 * To find the index, we shift the VA right by 13 + (3 * pg_sz) and then
1003 * mask out all but the lower 8 bits because:
1005 * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 0 for 8K
1006 * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 1 for 64K
1007 * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 2 for 512K
1008 * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 3 for 4M
1009 * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 4 for 32M
1010 * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 5 for 256M
1014 * array index for 8K pages = VA[20:13]
1015 * array index for 64K pages = VA[23:16]
1016 * array index for 512K pages = VA[26:19]
1017 * array index for 4M pages = VA[29:22]
1018 * array index for 32M pages = VA[32:25]
1019 * array index for 256M pages = VA[35:28]
1024 * Input: Virtual address in which we are interested.
1025 * Output: TLB index value.
1026 * pg_sz - Register. Page Size of the TLB in question as encoded
1027 * in the ASI_[D|I]MMU_TAG_ACCESS_EXT register.
1029 #if defined(CHEETAH_PLUS)
1030 #define PN_GET_TLB_INDEX(va, pg_sz) \
1031 srlx va, 13, va; /* first shift the 13 bits and then */ \
1032 srlx va, pg_sz, va; /* shift by pg_sz three times. */ \
1033 srlx va, pg_sz, va; \
1034 srlx va, pg_sz, va; \
1035 and va, 0xff, va; /* mask out all but the lower 8 bits */
1036 #endif /* CHEETAH_PLUS */
1039 * The following macros are for error traps at TL>0.
1040 * The issue with error traps at TL>0 is that there are no safely
1041 * available global registers. So we use the trick of generating a
1042 * software trap, then using the %tpc, %tnpc and %tstate registers to
1043 * temporarily save the values of %g1 and %g2.
1047 * Macro to generate 8-instruction trap table entry for TL>0 trap handlers.
1048 * Does the following steps:
1049 * 1. membar #Sync - required for USIII family errors.
1050 * 2. Specified software trap.
1051 * NB: Must be 8 instructions or less to fit in trap table and code must
1054 #define CH_ERR_TL1_TRAPENTRY(trapno) \
1057 nop; nop; nop; nop; nop; nop
1060 * Macro to generate 8-instruction trap table entry for TL>0 software trap.
1061 * We save the values of %g1 and %g2 in %tpc, %tnpc and %tstate (since
1062 * the low-order two bits of %tpc/%tnpc are reserved and read as zero,
1063 * we need to put the low-order two bits of %g1 and %g2 in %tstate).
1064 * Note that %tstate has a reserved hole from bits 3-7, so we put the
1065 * low-order two bits of %g1 in bits 0-1 and the low-order two bits of
1066 * %g2 in bits 10-11 (insuring bits 8-9 are zero for use by the D$/I$
1067 * state bits). Note that we must do a jmp instruction, since this
1068 * is moved into the trap table entry.
1069 * NB: Must be 8 instructions or less to fit in trap table and code must
1072 #define CH_ERR_TL1_SWTRAPENTRY(label) \
1076 sllx %g2, CH_ERR_G2_TO_TSTATE_SHFT, %g2; \
1078 sethi %hi(label), %g1; \
1079 jmp %g1+%lo(label); \
1083 * Macro to get ptr to ch_err_tl1_data.
1084 * reg1 will either point to a physaddr with ASI_MEM in %asi OR it
1085 * will point to a kernel nucleus virtual address with ASI_N in %asi.
1086 * This allows us to:
1087 * 1. Avoid getting MMU misses. We may have gotten the original
1088 * Fast ECC error in an MMU handler and if we get an MMU trap
1089 * in the TL>0 handlers, we'll scribble on the MMU regs.
1090 * 2. Allows us to use the same code in the TL>0 handlers whether
1091 * we're accessing kernel nucleus virtual addresses or physical
1094 * reg1 <- ch_err_tl1_paddrs[CPUID];
1095 * if (reg1 == NULL) {
1096 * reg1 <- &ch_err_tl1_data
1099 * reg1 <- reg1 + offset +
1100 * sizeof (ch_err_tl1_data) * (%tl - 3)
1104 #define GET_CH_ERR_TL1_PTR(reg1, reg2, offset) \
1105 CPU_INDEX(reg1, reg2); \
1106 sllx reg1, 3, reg1; \
1107 set ch_err_tl1_paddrs, reg2; \
1108 ldx [reg1+reg2], reg1; \
1110 add reg1, offset, reg1; \
1111 set ch_err_tl1_data, reg1; \
1113 wr %g0, ASI_N, %asi; \
1114 1: rdpr %tl, reg2; \
1115 sub reg2, 3, reg2; \
1116 mulx reg2, CH_ERR_TL1_DATA_SIZE, reg2; \
1117 add reg1, reg2, reg1; \
1118 wr %g0, ASI_MEM, %asi; \
1122 * Macro to generate entry code for TL>0 error handlers.
1123 * At the end of this macro, %g1 will point to the ch_err_tl1_data
1124 * structure and %g2 will have the original flags in the ch_err_tl1_data
1125 * structure and %g5 will have the value of %tstate where the Fast ECC
1126 * routines will save the state of the D$ in Bit2 CH_ERR_TSTATE_DC_ON.
1127 * All %g registers except for %g1, %g2 and %g5 will be available after
1129 * Does the following steps:
1130 * 1. Compute physical address of per-cpu/per-tl save area using
1131 * only %g1+%g2 (which we've saved in %tpc, %tnpc, %tstate)
1132 * leaving address in %g1 and updating the %asi register.
1133 * If there is no data area available, we branch to label.
1134 * 2. Save %g3-%g7 in save area.
1135 * 3. Save %tpc->%g3, %tnpc->%g4, %tstate->%g5, which contain
1136 * original %g1+%g2 values (because we're going to change %tl).
1137 * 4. set %tl <- %tl - 1. We do this ASAP to make window of
1138 * running at %tl+1 as small as possible.
1139 * 5. Reconstitute %g1+%g2 from %tpc (%g3), %tnpc (%g4),
1140 * %tstate (%g5) and save in save area, carefully preserving %g5
1141 * because it has the CH_ERR_TSTATE_DC_ON value.
1142 * 6. Load existing ch_err_tl1_data flags in %g2
1143 * 7. Compute the new flags
1144 * 8. If %g2 is non-zero (the structure was busy), shift the new
1145 * flags by CH_ERR_ME_SHIFT and or them with the old flags.
1146 * 9. Store the updated flags into ch_err_tl1_data flags.
1147 * 10. If %g2 is non-zero, read the %tpc and store it in
1150 #define CH_ERR_TL1_ENTER(flags) \
1151 GET_CH_ERR_TL1_PTR(%g1, %g2, CHPR_TL1_ERR_DATA); \
1152 stxa %g3, [%g1 + CH_ERR_TL1_G3]%asi; \
1153 stxa %g4, [%g1 + CH_ERR_TL1_G4]%asi; \
1154 stxa %g5, [%g1 + CH_ERR_TL1_G5]%asi; \
1155 stxa %g6, [%g1 + CH_ERR_TL1_G6]%asi; \
1156 stxa %g7, [%g1 + CH_ERR_TL1_G7]%asi; \
1159 rdpr %tstate, %g5; \
1166 stxa %g3, [%g1 + CH_ERR_TL1_G1]%asi; \
1167 srlx %g5, CH_ERR_G2_TO_TSTATE_SHFT, %g6; \
1171 stxa %g4, [%g1 + CH_ERR_TL1_G2]%asi; \
1172 ldxa [%g1 + CH_ERR_TL1_FLAGS]%asi, %g2; \
1173 set flags | CH_ERR_TL, %g3; \
1175 sllx %g3, CH_ERR_ME_SHIFT, %g4; \
1177 9: stxa %g3, [%g1 + CH_ERR_TL1_FLAGS]%asi; \
1180 stxa %g4, [%g1 + CH_ERR_TL1_TPC]%asi; \
1184 * Turns off D$/I$ and saves the state of DCU_DC+DCU_IC in %tstate Bits 8+9
1185 * (CH_ERR_TSTATE_DC_ON/CH_ERR_TSTATE_IC_ON). This is invoked on Fast ECC
1186 * at TL>0 handlers because the D$ may have corrupted data and we need to
1187 * turn off the I$ to allow for diagnostic accesses. We then invoke
1188 * the normal entry macro and after it is done we save the values of
1189 * the original D$/I$ state, which is in %g5 bits CH_ERR_TSTATE_DC_ON/
1190 * CH_ERR_TSTATE_IC_ON in ch_err_tl1_tmp.
1192 #define CH_ERR_TL1_FECC_ENTER \
1193 ldxa [%g0]ASI_DCU, %g1; \
1194 andn %g1, DCU_DC + DCU_IC, %g2; \
1195 stxa %g2, [%g0]ASI_DCU; \
1196 flush %g0; /* DCU_IC need flush */ \
1197 rdpr %tstate, %g2; \
1198 and %g1, DCU_DC + DCU_IC, %g1; \
1199 sllx %g1, CH_ERR_DCU_TO_TSTATE_SHFT, %g1; \
1201 wrpr %g2, %tstate; \
1202 CH_ERR_TL1_ENTER(CH_ERR_FECC); \
1203 and %g5, CH_ERR_TSTATE_DC_ON + CH_ERR_TSTATE_IC_ON, %g5; \
1204 stxa %g5, [%g1 + CH_ERR_TL1_TMP]%asi
1207 * Macro to generate exit code for TL>0 error handlers.
1208 * We fall into this macro if we've successfully logged the error in
1209 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1211 * Does the following steps:
1212 * 1. Set pending flag for this cpu in ch_err_tl1_pending.
1213 * 2. Write %set_softint with (1<<pil) to cause a pil level trap
1214 * 3. Restore registers from ch_err_tl1_data, which is pointed to
1215 * by %g1, last register to restore is %g1 since it's pointing
1219 #define CH_ERR_TL1_EXIT \
1220 CPU_INDEX(%g2, %g3); \
1221 set ch_err_tl1_pending, %g3; \
1223 stb %g4, [%g2 + %g3]; \
1225 sll %g2, PIL_15, %g2; \
1226 wr %g2, SET_SOFTINT; \
1227 ldxa [%g1 + CH_ERR_TL1_G7]%asi, %g7; \
1228 ldxa [%g1 + CH_ERR_TL1_G6]%asi, %g6; \
1229 ldxa [%g1 + CH_ERR_TL1_G5]%asi, %g5; \
1230 ldxa [%g1 + CH_ERR_TL1_G4]%asi, %g4; \
1231 ldxa [%g1 + CH_ERR_TL1_G3]%asi, %g3; \
1232 ldxa [%g1 + CH_ERR_TL1_G2]%asi, %g2; \
1233 ldxa [%g1 + CH_ERR_TL1_G1]%asi, %g1; \
1237 * Generates unrecoverable error label for TL>0 handlers.
1238 * At label (Unrecoverable error routine)
1239 * 1. Sets flags in ch_err_tl1_data and leaves in %g2 (first
1240 * argument to cpu_tl1_err_panic).
1241 * 2. Call cpu_tl1_err_panic via systrap at PIL 15
1243 #define CH_ERR_TL1_PANIC_EXIT(label) \
1244 label: ldxa [%g1 + CH_ERR_TL1_FLAGS]%asi, %g2; \
1245 or %g2, CH_ERR_TL | CH_ERR_PANIC, %g2; \
1246 stxa %g2, [%g1 + CH_ERR_TL1_FLAGS]%asi; \
1247 set cpu_tl1_err_panic, %g1; \
1260 #endif /* _CHEETAHASM_H */