1 /* SPDX-License-Identifier: GPL-2.0-or-later */
4 * Intel Pentium L2 Cache initialization.
5 * This code was developed by reverse engineering
6 * the BIOS. Where the code accesses documented
7 * registers I have added comments as best I can.
8 * Some undocumented registers on the Pentium II are
9 * used so some of the documentation is incomplete.
12 * Intel Architecture Software Developer's Manual
13 * Volume 3B: System Programming Guide, Part 2 (#253669)
17 /* This code is ported from coreboot v1.
18 * The L2 cache initialization sequence here only apply to SECC/SECC2 P6 family
19 * CPUs with Klamath (63x), Deschutes (65x) and Katmai (67x) cores.
20 * It is not required for Coppermine (68x) and Tualatin (6bx) cores.
21 * It is currently not known if Celerons with Mendocino (66x) core require the
22 * special initialization.
23 * Covington-core Celerons do not have L2 cache.
26 #include <console/console.h>
28 #include <cpu/intel/l2_cache.h>
29 #include <cpu/x86/cache.h>
30 #include <cpu/x86/msr.h>
34 struct latency_entry
{
39 Latency maps for Deschutes and Katmai.
40 No such mapping is available for Klamath.
43 be written to L2 -----++++
45 0000 xx 00 -----> 000 cccc 0
48 |||| 01 133MHz (Katmai "B" only)
49 ++++------ CPU frequency multiplier
69 static const struct latency_entry latency_650_t0
[] = {
70 {0x10, 0x02}, {0x50, 0x02}, {0x20, 0x04}, {0x60, 0x06},
71 {0x00, 0x08}, {0x40, 0x0C}, {0x12, 0x06}, {0x52, 0x0A},
72 {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0xFF, 0x00}
75 static const struct latency_entry latency_650_t1
[] = {
76 {0x12, 0x14}, {0x52, 0x16}, {0x22, 0x16}, {0x62, 0x16},
80 static const struct latency_entry latency_670_t0
[] = {
81 {0x60, 0x06}, {0x00, 0x08}, {0x12, 0x06}, {0x52, 0x0A},
82 {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0x42, 0x02},
83 {0x11, 0x0E}, {0x51, 0x0C}, {0x21, 0x02}, {0x61, 0x10},
84 {0x01, 0x10}, {0x41, 0x02}, {0xFF, 0x00}
87 static const struct latency_entry latency_670_t1
[] = {
88 {0x22, 0x18}, {0x62, 0x18}, {0x02, 0x1A}, {0x11, 0x18},
92 static const struct latency_entry latency_670_t2
[] = {
93 {0x22, 0x12}, {0x62, 0x14}, {0x02, 0x16}, {0x42, 0x1E},
94 {0x11, 0x12}, {0x51, 0x16}, {0x21, 0x1E}, {0x61, 0x14},
95 {0x01, 0x16}, {0x41, 0x1E}, {0xFF, 0x00}
98 /* Latency tables for 650 model/type */
99 static const struct latency_entry
*latency_650
[] = {
100 latency_650_t0
, latency_650_t1
, latency_650_t1
103 /* Latency tables for 670 model/type */
104 static const struct latency_entry
*latency_670
[] = {
105 latency_670_t0
, latency_670_t1
, latency_670_t2
108 int calculate_l2_latency(void)
110 u32 eax
, l
, signature
;
111 const struct latency_entry
*latency_table
, *le
;
114 /* First, attempt to get cache latency value from
115 IA32_PLATFORM_ID[56:53]. (L2 Cache Latency Read)
117 msr
= rdmsr(IA32_PLATFORM_ID
);
119 printk(BIOS_DEBUG
, "rdmsr(IA32_PLATFORM_ID) = %x:%x\n", msr
.hi
, msr
.lo
);
121 l
= (msr
.hi
>> 20) & 0x1e;
124 /* If latency value isn't available from
125 IA32_PLATFORM_ID[56:53], read it from
126 L2 control register 0 for lookup from
130 /* The raw code is read from L2 register 0, bits [7:4]. */
146 printk(BIOS_DEBUG
, "L2 latency type = %x\n", t
);
148 /* Get CPUID family/model */
149 signature
= cpuid_eax(1) & 0xfff0;
151 /* Read EBL_CR_POWERON */
152 msr
= rdmsr(EBL_CR_POWERON
);
153 /* Get clock multiplier and FSB frequency.
154 * Multiplier is in [25:22].
155 * FSB is in [19:18] in Katmai, [19] in Deschutes ([18] is zero
159 if (signature
== 0x650) {
161 latency_table
= latency_650
[t
];
162 } else if (signature
== 0x670) {
164 latency_table
= latency_670
[t
];
168 /* Search table for matching entry */
169 for (le
= latency_table
; le
->key
!= eax
; le
++) {
170 /* Fail if we get to the end of the table */
171 if (le
->key
== 0xff) {
173 "Could not find key %02x in latency table\n",
182 printk(BIOS_DEBUG
, "L2 Cache latency is %d\n", l
/ 2);
184 /* Writes the calculated latency in BBL_CR_CTL3[4:1]. */
185 msr
= rdmsr(BBL_CR_CTL3
);
186 msr
.lo
&= 0xffffffe1;
188 wrmsr(BBL_CR_CTL3
, msr
);
193 /* Setup address, data_high:data_low into the L2
194 * control registers and then issue command with correct cache way
196 int signal_l2(u32 address
, u32 data_high
, u32 data_low
, int way
, u8 command
)
201 /* Write L2 Address to BBL_CR_ADDR */
204 wrmsr(BBL_CR_ADDR
, msr
);
206 /* Write data to BBL_CR_D{0..3} */
209 for (i
= BBL_CR_D0
; i
<= BBL_CR_D3
; i
++)
212 /* Put the command and way into BBL_CR_CTL */
213 msr
= rdmsr(BBL_CR_CTL
);
214 msr
.lo
= (msr
.lo
& 0xfffffce0) | command
| (way
<< 8);
215 wrmsr(BBL_CR_CTL
, msr
);
217 /* Trigger L2 controller */
220 wrmsr(BBL_CR_TRIG
, msr
);
222 /* Poll the controller to see when done */
223 for (i
= 0; i
< 0x100; i
++) {
224 /* Read BBL_CR_BUSY */
225 msr
= rdmsr(BBL_CR_BUSY
);
226 /* If not busy then return */
227 if ((msr
.lo
& 1) == 0)
231 /* Return timeout code */
235 /* Read the L2 Cache controller register at given address */
236 int read_l2(u32 address
)
240 /* Send a L2 Control Register Read to L2 controller */
241 if (signal_l2(address
<< 5, 0, 0, 0, L2CMD_CR
) != 0)
244 /* If OK then get the result from BBL_CR_ADDR */
245 msr
= rdmsr(BBL_CR_ADDR
);
246 return (msr
.lo
>> 0x15);
249 /* Write data into the L2 controller register at address */
250 int write_l2(u32 address
, u32 data
)
262 if ((v1
& 0x20) == 0) {
268 /* This write has to be replicated to a number of places. Not sure what.
271 for (i
= 0; i
< v2
; i
++) {
276 // address = 00aaaaaa
277 // Final address signaled:
278 // 000fffff fff000c0 000dcaaa aaa00000
281 data2
= (i
<< 11) & 0x1800;
287 /* Signal L2 controller */
288 if (signal_l2((address
<< 5) | data1
, 0, 0, 0, 3))
294 /* Write data_high:data_low into the cache at address1. Test address2
295 * to see if the same data is returned. Return 0 if the data matches.
296 * return lower 16 bits if mismatched data if mismatch. Return -1
299 int test_l2_address_alias(u32 address1
, u32 address2
,
300 u32 data_high
, u32 data_low
)
305 /* Tag Write with Data Write for L2 */
306 if (signal_l2(address1
, data_high
, data_low
, 0, L2CMD_TWW
))
309 /* Tag Read with Data Read for L2 */
310 if (signal_l2(address2
, 0, 0, 0, L2CMD_TRR
))
313 /* Read data from BBL_CR_D[0-3] */
314 for (d
= BBL_CR_D0
; d
<= BBL_CR_D3
; d
++) {
316 if (msr
.lo
!= data_low
|| msr
.hi
!= data_high
)
317 return (msr
.lo
& 0xffff);
323 /* Calculates the L2 cache size.
325 * Reference: Intel(R) 64 and IA-32 Architectures Software Developer's Manual
326 * Volume 3B: System Programming Guide, Part 2, Intel pub. 253669,
330 int calculate_l2_cache_size(void)
335 u32 address
, size
, eax
, bblcr3
;
340 if ((v
& 0x20) == 0) {
341 msr
= rdmsr(BBL_CR_CTL3
);
342 bblcr3
= msr
.lo
& ~BBLCR3_L2_SIZE
;
344 * Successively write in all the possible cache size per bank
345 * into BBL_CR_CTL3[17:13], starting from 256KB (00001) to 4MB
346 * (10000), and read the last value written and accepted by the
349 * No idea why these bits are writable at all.
351 for (cache_setting
= BBLCR3_L2_SIZE_256K
;
352 cache_setting
<= BBLCR3_L2_SIZE_4M
; cache_setting
<<= 1) {
353 eax
= bblcr3
| cache_setting
;
355 wrmsr(BBL_CR_CTL3
, msr
);
356 msr
= rdmsr(BBL_CR_CTL3
);
358 /* Value not accepted */
363 /* Backtrack to the last value that worked... */
366 /* and write it into BBL_CR_CTL3 */
367 msr
.lo
&= ~BBLCR3_L2_SIZE
;
368 msr
.lo
|= (cache_setting
& BBLCR3_L2_SIZE
);
370 wrmsr(BBL_CR_CTL3
, msr
);
372 printk(BIOS_DEBUG
, "Maximum cache mask is %x\n", cache_setting
);
374 /* For now, BBL_CR_CTL3 has the highest cache "size" that
375 * register will accept. Now we'll ping the cache and see where
379 /* Write aaaaaaaa:aaaaaaaa to address 0 in the l2 cache.
380 * If this "alias test" returns an "address", it means the
381 * cache cannot be written to properly, and we have a problem.
383 v
= test_l2_address_alias(0, 0, 0xaaaaaaaa, 0xaaaaaaaa);
387 /* Start with 32K wrap point (256KB actually) */
392 v
= test_l2_address_alias(address
, 0, 0x55555555,
404 if (address
> 0x200000)
411 /* Shift to [17:13] */
414 /* Set this into BBL_CR_CTL3 */
415 msr
= rdmsr(BBL_CR_CTL3
);
416 msr
.lo
&= ~BBLCR3_L2_SIZE
;
418 wrmsr(BBL_CR_CTL3
, msr
);
420 printk(BIOS_DEBUG
, "L2 Cache Mask is %x\n", size
);
430 printk(BIOS_DEBUG
, "L2(2): %x ", v
);
434 /* Shift size right by v */
437 /* Or in this size */
440 printk(BIOS_DEBUG
, "-> %x\n", v
);
442 if (write_l2(2, v
) != 0)
445 // Some cache size information is available from L2 registers.
451 printk(BIOS_DEBUG
, "L2(2) = %x\n", v
);
456 // L2 register 2 bitmap: cc---bbb
464 printk(BIOS_DEBUG
, "Calculated a = %x\n", v
);
472 /* Write this size into BBL_CR_CTL3 */
473 msr
= rdmsr(BBL_CR_CTL3
);
474 msr
.lo
&= ~BBLCR3_L2_SIZE
;
476 wrmsr(BBL_CR_CTL3
, msr
);
482 // L2 physical address range can be found from L2 control register 3,
484 int calculate_l2_physical_address_range(void)
502 printk(BIOS_DEBUG
, "L2 Physical Address Range is %dM\n",
505 /* Shift into [22:20] to be saved into BBL_CR_CTL3. */
508 msr
= rdmsr(BBL_CR_CTL3
);
509 msr
.lo
&= ~BBLCR3_L2_PHYSICAL_RANGE
;
511 wrmsr(BBL_CR_CTL3
, msr
);
519 const u32 data1
= 0xaa55aa55;
520 const u32 data2
= 0xaaaaaaaa;
523 /* Set User Supplied ECC in BBL_CR_CTL */
524 msr
= rdmsr(BBL_CR_CTL
);
525 msr
.lo
|= BBLCR3_L2_SUPPLIED_ECC
;
526 wrmsr(BBL_CR_CTL
, msr
);
528 /* Write a value into the L2 Data ECC register BBL_CR_DECC */
531 wrmsr(BBL_CR_DECC
, msr
);
533 if (test_l2_address_alias(0, 0, data2
, data2
) < 0)
536 /* Read back ECC from BBL_CR_DECC */
537 msr
= rdmsr(BBL_CR_DECC
);
541 printk(BIOS_DEBUG
, "L2 ECC Checking is enabled\n");
543 /* Set ECC Check Enable in BBL_CR_CTL3 */
544 msr
= rdmsr(BBL_CR_CTL3
);
545 msr
.lo
|= BBLCR3_L2_ECC_CHECK_ENABLE
;
546 wrmsr(BBL_CR_CTL3
, msr
);
549 /* Clear User Supplied ECC in BBL_CR_CTL */
550 msr
= rdmsr(BBL_CR_CTL
);
551 msr
.lo
&= ~BBLCR3_L2_SUPPLIED_ECC
;
552 wrmsr(BBL_CR_CTL
, msr
);
558 * This is the function called from CPU initialization
559 * driver to set up P6 family L2 cache.
562 int p6_configure_l2_cache(void)
567 int cache_size
, bank
;
568 int result
, calc_eax
;
571 int badclk1
, badclk2
, clkratio
;
574 printk(BIOS_INFO
, "Configuring L2 cache... ");
576 /* Read BBL_CR_CTL3 */
577 bblctl3
= rdmsr(BBL_CR_CTL3
);
578 /* If bit 23 (L2 Hardware disable) is set then done */
579 /* These would be Covington core Celerons with no L2 cache */
580 if (bblctl3
.lo
& BBLCR3_L2_NOT_PRESENT
) {
581 printk(BIOS_INFO
, "hardware disabled\n");
585 signature
= cpuid_eax(1) & 0xfff0;
587 /* Klamath-specific bit settings for certain
590 if (signature
== 0x630) {
591 clkratio
= 0x1c00000;
595 clkratio
= 0x3c00000;
601 /* Read EBL_CR_POWERON */
602 msr
= rdmsr(EBL_CR_POWERON
);
604 /* Mask out [22-25] Clock frequency ratio */
606 if (eax
== badclk1
|| eax
== badclk2
) {
607 printk(BIOS_ERR
, "Incorrect clock frequency ratio %x\n", eax
);
613 /* Mask out from BBL_CR_CTL3:
615 * [5] ECC Check Enable
616 * [6] Address Parity Check Enable
617 * [7] CRTN Parity Check Enable
619 * [12:11] Number of L2 banks
620 * [17:13] Cache size per bank
622 * [22:20] L2 Physical Address Range Support
624 bblctl3
.lo
&= 0xff88061e;
626 * [17:13] = 00010 = 512Kbyte Cache size per bank (63x)
627 * [17:13] = 00000 = 128Kbyte Cache size per bank (all others)
628 * [18] Cache state error checking enable
630 bblctl3
.lo
|= crctl3_or
;
632 /* Write BBL_CR_CTL3 */
633 wrmsr(BBL_CR_CTL3
, bblctl3
);
635 if (signature
!= 0x630) {
638 /* Set the l2 latency in BBL_CR_CTL3 */
639 if (calculate_l2_latency() != 0)
642 /* Read the new latency values back */
643 bblctl3
= rdmsr(BBL_CR_CTL3
);
644 calc_eax
= bblctl3
.lo
;
646 /* Write back the original default value */
648 wrmsr(BBL_CR_CTL3
, bblctl3
);
650 /* Write BBL_CR_CTL3[27:26] (reserved??) to bits [1:0] of L2
651 * register 4. Apparently all other bits must be preserved,
655 v
= (calc_eax
>> 26) & 0x3;
657 printk(BIOS_DEBUG
, "write_l2(4, %x)\n", v
);
664 /* a now contains result code from write_l2() */
669 /* Restore the correct latency value into BBL_CR_CTL3 */
670 bblctl3
.lo
= calc_eax
;
671 wrmsr(BBL_CR_CTL3
, bblctl3
);
674 /* Read L2 register 0 */
677 /* If L2(0)[5] set (and can be read properly), enable CRTN and address
680 if (v
>= 0 && (v
& 0x20)) {
681 bblctl3
= rdmsr(BBL_CR_CTL3
);
682 bblctl3
.lo
|= (BBLCR3_L2_ADDR_PARITY_ENABLE
|
683 BBLCR3_L2_CRTN_PARITY_ENABLE
);
684 wrmsr(BBL_CR_CTL3
, bblctl3
);
687 /* If something goes wrong at L2 ECC setup, cache ECC
688 * will just remain disabled.
692 if (calculate_l2_physical_address_range() != 0) {
694 "Failed to calculate L2 physical address range");
698 if (calculate_l2_cache_size() != 0) {
699 printk(BIOS_ERR
, "Failed to calculate L2 cache size");
703 /* Turn on cache. Only L1 is active at this time. */
706 /* Get the calculated cache size from BBL_CR_CTL3[17:13] */
707 bblctl3
= rdmsr(BBL_CR_CTL3
);
708 cache_size
= (bblctl3
.lo
& BBLCR3_L2_SIZE
);
711 cache_size
= cache_size
<< 3;
713 /* TODO: Cache size above is per bank. We're supposed to get
714 * the number of banks from BBL_CR_CTL3[12:11].
715 * Confirm that this still provides the correct answer.
717 bank
= (bblctl3
.lo
>> 11) & 0x3;
721 printk(BIOS_INFO
, "size %dK... ", cache_size
* bank
* 4 / 1024);
723 /* Write to all cache lines to initialize */
725 while (cache_size
> 0) {
726 /* Each cache line is 32 bytes. */
729 /* Update each way */
731 /* We're supposed to get L2 associativity from
732 * BBL_CR_CTL3[10:9]. But this code only applies to certain
733 * members of the P6 processor family and since all P6
734 * processors have 4-way L2 cache, we can safely assume
735 * 4 way for all cache operations.
738 for (v
= 0; v
< 4; v
++) {
739 /* Send Tag Write w/Data Write (TWW) to L2 controller
742 if (signal_l2(cache_size
, 0, 0, v
, L2CMD_TWW
743 | L2CMD_MESI_I
) != 0) {
745 "Failed on signal_l2(%x, %x)\n",
751 printk(BIOS_DEBUG
, "L2 Cache lines initialized\n");
756 /* Set L2 cache configured in BBL_CR_CTL3 */
757 bblctl3
= rdmsr(BBL_CR_CTL3
);
758 bblctl3
.lo
|= BBLCR3_L2_CONFIGURED
;
759 wrmsr(BBL_CR_CTL3
, bblctl3
);
761 /* Invalidate cache and discard unsaved writes */
762 asm volatile ("invd");
764 /* Write 0 to L2 control register 5 */
765 if (write_l2(5, 0) != 0) {
766 printk(BIOS_ERR
, "write_l2(5, 0) failed\n");
770 bblctl3
= rdmsr(BBL_CR_CTL3
);
771 if (signature
== 0x650) {
772 /* Change the L2 latency to 0101 then back to
773 * original value. I don't know why this is needed - dpd
776 bblctl3
.lo
&= ~BBLCR3_L2_LATENCY
;
778 wrmsr(BBL_CR_CTL3
, bblctl3
);
780 wrmsr(BBL_CR_CTL3
, bblctl3
);
783 /* Enable L2 in BBL_CR_CTL3 */
784 bblctl3
.lo
|= BBLCR3_L2_ENABLED
;
785 wrmsr(BBL_CR_CTL3
, bblctl3
);
787 /* Turn on cache. Both L1 and L2 are now active. Wahoo! */
794 printk(BIOS_INFO
, "done.\n");