northbridge/intel/sandybridge: Enable x86_64 for mrc.bin
[coreboot2.git] / src / northbridge / intel / sandybridge / raminit_common.c
blob2e4967532efe9d18a1e8b423ead18f852cb6780c
1 /* SPDX-License-Identifier: GPL-2.0-only */
3 #include <assert.h>
4 #include <commonlib/helpers.h>
5 #include <console/console.h>
6 #include <cpu/intel/model_206ax/model_206ax.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <northbridge/intel/sandybridge/chip.h>
10 #include <device/pci_def.h>
11 #include <delay.h>
12 #include <types.h>
14 #include "raminit_common.h"
15 #include "raminit_tables.h"
16 #include "sandybridge.h"
18 /* FIXME: no support for 3-channel chipsets */
20 static void sfence(void)
22 asm volatile ("sfence");
25 /* Toggle IO reset bit */
26 static void toggle_io_reset(void)
28 u32 r32 = mchbar_read32(MC_INIT_STATE_G);
29 mchbar_write32(MC_INIT_STATE_G, r32 | (1 << 5));
30 udelay(1);
31 mchbar_write32(MC_INIT_STATE_G, r32 & ~(1 << 5));
32 udelay(1);
35 static u32 get_XOVER_CLK(u8 rankmap)
37 return rankmap << 24;
40 static u32 get_XOVER_CMD(u8 rankmap)
42 u32 reg;
44 /* Enable xover cmd */
45 reg = 1 << 14;
47 /* Enable xover ctl */
48 if (rankmap & 0x03)
49 reg |= (1 << 17);
51 if (rankmap & 0x0c)
52 reg |= (1 << 26);
54 return reg;
57 void dram_find_common_params(ramctr_timing *ctrl)
59 size_t valid_dimms;
60 int channel, slot;
61 dimm_info *dimms = &ctrl->info;
63 ctrl->cas_supported = (1 << (MAX_CAS - MIN_CAS + 1)) - 1;
64 valid_dimms = 0;
66 FOR_ALL_CHANNELS for (slot = 0; slot < 2; slot++) {
68 const struct dimm_attr_ddr3_st *dimm = &dimms->dimm[channel][slot];
69 if (dimm->dram_type != SPD_MEMORY_TYPE_SDRAM_DDR3)
70 continue;
72 valid_dimms++;
74 /* Find all possible CAS combinations */
75 ctrl->cas_supported &= dimm->cas_supported;
77 /* Find the smallest common latencies supported by all DIMMs */
78 ctrl->tCK = MAX(ctrl->tCK, dimm->tCK);
79 ctrl->tAA = MAX(ctrl->tAA, dimm->tAA);
80 ctrl->tWR = MAX(ctrl->tWR, dimm->tWR);
81 ctrl->tRCD = MAX(ctrl->tRCD, dimm->tRCD);
82 ctrl->tRRD = MAX(ctrl->tRRD, dimm->tRRD);
83 ctrl->tRP = MAX(ctrl->tRP, dimm->tRP);
84 ctrl->tRAS = MAX(ctrl->tRAS, dimm->tRAS);
85 ctrl->tRFC = MAX(ctrl->tRFC, dimm->tRFC);
86 ctrl->tWTR = MAX(ctrl->tWTR, dimm->tWTR);
87 ctrl->tRTP = MAX(ctrl->tRTP, dimm->tRTP);
88 ctrl->tFAW = MAX(ctrl->tFAW, dimm->tFAW);
89 ctrl->tCWL = MAX(ctrl->tCWL, dimm->tCWL);
90 ctrl->tCMD = MAX(ctrl->tCMD, dimm->tCMD);
93 if (!ctrl->cas_supported)
94 die("Unsupported DIMM combination. DIMMS do not support common CAS latency");
96 if (!valid_dimms)
97 die("No valid DIMMs found");
100 void dram_xover(ramctr_timing *ctrl)
102 u32 reg;
103 int channel;
105 FOR_ALL_CHANNELS {
106 /* Enable xover clk */
107 reg = get_XOVER_CLK(ctrl->rankmap[channel]);
108 printram("XOVER CLK [%x] = %x\n", GDCRCKPICODE_ch(channel), reg);
109 mchbar_write32(GDCRCKPICODE_ch(channel), reg);
111 /* Enable xover ctl & xover cmd */
112 reg = get_XOVER_CMD(ctrl->rankmap[channel]);
113 printram("XOVER CMD [%x] = %x\n", GDCRCMDPICODING_ch(channel), reg);
114 mchbar_write32(GDCRCMDPICODING_ch(channel), reg);
118 static void dram_odt_stretch(ramctr_timing *ctrl, int channel)
120 u32 addr, stretch;
122 stretch = ctrl->ref_card_offset[channel];
124 * ODT stretch:
125 * Delay ODT signal by stretch value. Useful for multi DIMM setups on the same channel.
127 if (IS_SANDY_CPU(ctrl->cpu) && IS_SANDY_CPU_C(ctrl->cpu)) {
128 if (stretch == 2)
129 stretch = 3;
131 addr = SCHED_SECOND_CBIT_ch(channel);
132 mchbar_clrsetbits32(addr, 0xf << 10, stretch << 12 | stretch << 10);
133 printk(RAM_DEBUG, "OTHP Workaround [%x] = %x\n", addr, mchbar_read32(addr));
134 } else {
135 addr = TC_OTHP_ch(channel);
136 union tc_othp_reg tc_othp = {
137 .raw = mchbar_read32(addr),
139 tc_othp.odt_delay_d0 = stretch;
140 tc_othp.odt_delay_d1 = stretch;
141 mchbar_write32(addr, tc_othp.raw);
142 printk(RAM_DEBUG, "OTHP [%x] = %x\n", addr, mchbar_read32(addr));
146 void dram_timing_regs(ramctr_timing *ctrl)
148 int channel;
150 /* BIN parameters */
151 const union tc_dbp_reg tc_dbp = {
152 .tRCD = ctrl->tRCD,
153 .tRP = ctrl->tRP,
154 .tAA = ctrl->CAS,
155 .tCWL = ctrl->CWL,
156 .tRAS = ctrl->tRAS,
159 /* Regular access parameters */
160 const union tc_rap_reg tc_rap = {
161 .tRRD = ctrl->tRRD,
162 .tRTP = ctrl->tRTP,
163 .tCKE = ctrl->tCKE,
164 .tWTR = ctrl->tWTR,
165 .tFAW = ctrl->tFAW,
166 .tWR = ctrl->tWR,
167 .tCMD = 3,
170 /* Other parameters */
171 const union tc_othp_reg tc_othp = {
172 .tXPDLL = MIN(ctrl->tXPDLL, 31),
173 .tXP = MIN(ctrl->tXP, 7),
174 .tAONPD = ctrl->tAONPD,
175 .tCPDED = 2,
176 .tPRPDEN = 1,
180 * If tXP and tXPDLL are very high, they no longer fit in the bitfields
181 * of the TC_OTHP register. If so, we set bits in TC_DTP to compensate.
182 * This can only happen on Ivy Bridge, and when overclocking the RAM.
184 const union tc_dtp_reg tc_dtp = {
185 .overclock_tXP = ctrl->tXP >= 8,
186 .overclock_tXPDLL = ctrl->tXPDLL >= 32,
190 * TC-Refresh timing parameters:
191 * The tREFIx9 field should be programmed to minimum of 8.9 * tREFI (to allow
192 * for possible delays from ZQ or isoc) and tRASmax (70us) divided by 1024.
194 const u32 val32 = MIN((ctrl->tREFI * 89) / 10, (70000 << 8) / ctrl->tCK);
196 const union tc_rftp_reg tc_rftp = {
197 .tREFI = ctrl->tREFI,
198 .tRFC = ctrl->tRFC,
199 .tREFIx9 = val32 / 1024,
202 /* Self-refresh timing parameters */
203 const union tc_srftp_reg tc_srftp = {
204 .tXSDLL = tDLLK,
205 .tXS_offset = ctrl->tXSOffset,
206 .tZQOPER = tDLLK - ctrl->tXSOffset,
207 .tMOD = ctrl->tMOD - 8,
210 FOR_ALL_CHANNELS {
211 printram("DBP [%x] = %x\n", TC_DBP_ch(channel), tc_dbp.raw);
212 mchbar_write32(TC_DBP_ch(channel), tc_dbp.raw);
214 printram("RAP [%x] = %x\n", TC_RAP_ch(channel), tc_rap.raw);
215 mchbar_write32(TC_RAP_ch(channel), tc_rap.raw);
217 printram("OTHP [%x] = %x\n", TC_OTHP_ch(channel), tc_othp.raw);
218 mchbar_write32(TC_OTHP_ch(channel), tc_othp.raw);
220 if (IS_IVY_CPU(ctrl->cpu)) {
221 /* Debug parameters - only applies to Ivy Bridge */
222 mchbar_write32(TC_DTP_ch(channel), tc_dtp.raw);
225 dram_odt_stretch(ctrl, channel);
227 printram("REFI [%x] = %x\n", TC_RFTP_ch(channel), tc_rftp.raw);
228 mchbar_write32(TC_RFTP_ch(channel), tc_rftp.raw);
230 union tc_rfp_reg tc_rfp = {
231 .raw = mchbar_read32(TC_RFP_ch(channel)),
233 tc_rfp.oref_ri = 0xff;
234 mchbar_write32(TC_RFP_ch(channel), tc_rfp.raw);
236 printram("SRFTP [%x] = %x\n", TC_SRFTP_ch(channel), tc_srftp.raw);
237 mchbar_write32(TC_SRFTP_ch(channel), tc_srftp.raw);
241 void dram_dimm_mapping(ramctr_timing *ctrl)
243 int channel;
244 dimm_info *info = &ctrl->info;
246 FOR_ALL_CHANNELS {
247 struct dimm_attr_ddr3_st *dimmA, *dimmB;
248 u32 reg = 0;
250 if (info->dimm[channel][0].size_mb >= info->dimm[channel][1].size_mb) {
251 dimmA = &info->dimm[channel][0];
252 dimmB = &info->dimm[channel][1];
253 reg |= (0 << 16);
254 } else {
255 dimmA = &info->dimm[channel][1];
256 dimmB = &info->dimm[channel][0];
257 reg |= (1 << 16);
260 if (dimmA && (dimmA->ranks > 0)) {
261 reg |= (dimmA->size_mb / 256) << 0;
262 reg |= (dimmA->ranks - 1) << 17;
263 reg |= (dimmA->width / 8 - 1) << 19;
266 if (dimmB && (dimmB->ranks > 0)) {
267 reg |= (dimmB->size_mb / 256) << 8;
268 reg |= (dimmB->ranks - 1) << 18;
269 reg |= (dimmB->width / 8 - 1) << 20;
273 * Rank interleave: Bit 16 of the physical address space sets
274 * the rank to use in a dual single rank DIMM configuration.
275 * That results in every 64KiB being interleaved between two ranks.
277 reg |= 1 << 21;
278 /* Enhanced interleave */
279 reg |= 1 << 22;
281 if ((dimmA && (dimmA->ranks > 0)) || (dimmB && (dimmB->ranks > 0))) {
282 ctrl->mad_dimm[channel] = reg;
283 } else {
284 ctrl->mad_dimm[channel] = 0;
289 void dram_dimm_set_mapping(ramctr_timing *ctrl, int training)
291 int channel;
292 u32 ecc;
294 if (ctrl->ecc_enabled)
295 ecc = training ? (1 << 24) : (3 << 24);
296 else
297 ecc = 0;
299 FOR_ALL_CHANNELS {
300 mchbar_write32(MAD_DIMM(channel), ctrl->mad_dimm[channel] | ecc);
303 if (ctrl->ecc_enabled)
304 udelay(10);
307 void dram_zones(ramctr_timing *ctrl, int training)
309 u32 reg, ch0size, ch1size;
310 u8 val;
311 reg = 0;
312 val = 0;
314 if (training) {
315 ch0size = ctrl->channel_size_mb[0] ? 256 : 0;
316 ch1size = ctrl->channel_size_mb[1] ? 256 : 0;
317 } else {
318 ch0size = ctrl->channel_size_mb[0];
319 ch1size = ctrl->channel_size_mb[1];
322 if (ch0size >= ch1size) {
323 reg = mchbar_read32(MAD_ZR);
324 val = ch1size / 256;
325 reg = (reg & ~0xff000000) | val << 24;
326 reg = (reg & ~0x00ff0000) | (2 * val) << 16;
327 mchbar_write32(MAD_ZR, reg);
328 mchbar_write32(MAD_CHNL, 0x24);
330 } else {
331 reg = mchbar_read32(MAD_ZR);
332 val = ch0size / 256;
333 reg = (reg & ~0xff000000) | val << 24;
334 reg = (reg & ~0x00ff0000) | (2 * val) << 16;
335 mchbar_write32(MAD_ZR, reg);
336 mchbar_write32(MAD_CHNL, 0x21);
341 * Returns the ECC mode the NB is running at. It takes precedence over ECC capability.
342 * The ME/PCU/.. has the ability to change this.
343 * Return 0: ECC is optional
344 * Return 1: ECC is forced
346 bool get_host_ecc_forced(void)
348 /* read Capabilities A Register */
349 const u32 reg32 = pci_read_config32(HOST_BRIDGE, CAPID0_A);
350 return !!(reg32 & (1 << 24));
354 * Returns the ECC capability.
355 * The ME/PCU/.. has the ability to change this.
356 * Return 0: ECC is disabled
357 * Return 1: ECC is possible
359 bool get_host_ecc_cap(void)
361 /* read Capabilities A Register */
362 const u32 reg32 = pci_read_config32(HOST_BRIDGE, CAPID0_A);
363 return !(reg32 & (1 << 25));
366 #define DEFAULT_PCI_MMIO_SIZE 2048
368 void dram_memorymap(ramctr_timing *ctrl, int me_uma_size)
370 u32 reg, val, reclaim, tom, gfxstolen, gttsize;
371 size_t tsegbase, toludbase, remapbase, gfxstolenbase, mmiosize, gttbase;
372 size_t tsegsize, touudbase, remaplimit, mestolenbase, tsegbasedelta;
373 uint16_t ggc;
375 mmiosize = DEFAULT_PCI_MMIO_SIZE;
377 ggc = pci_read_config16(HOST_BRIDGE, GGC);
378 if (!(ggc & 2)) {
379 gfxstolen = ((ggc >> 3) & 0x1f) * 32;
380 gttsize = ((ggc >> 8) & 0x3);
381 } else {
382 gfxstolen = 0;
383 gttsize = 0;
386 tsegsize = CONFIG_SMM_TSEG_SIZE >> 20;
388 tom = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1];
390 mestolenbase = tom - me_uma_size;
392 toludbase = MIN(4096 - mmiosize + gfxstolen + gttsize + tsegsize, tom - me_uma_size);
394 gfxstolenbase = toludbase - gfxstolen;
395 gttbase = gfxstolenbase - gttsize;
397 tsegbase = gttbase - tsegsize;
399 /* Round tsegbase down to nearest address aligned to tsegsize */
400 tsegbasedelta = tsegbase & (tsegsize - 1);
401 tsegbase &= ~(tsegsize - 1);
403 gttbase -= tsegbasedelta;
404 gfxstolenbase -= tsegbasedelta;
405 toludbase -= tsegbasedelta;
407 /* Test if it is possible to reclaim a hole in the RAM addressing */
408 if (tom - me_uma_size > toludbase) {
409 /* Reclaim is possible */
410 reclaim = 1;
411 remapbase = MAX(4096, tom - me_uma_size);
412 remaplimit = remapbase + MIN(4096, tom - me_uma_size) - toludbase - 1;
413 touudbase = remaplimit + 1;
414 } else {
415 /* Reclaim not possible */
416 reclaim = 0;
417 touudbase = tom - me_uma_size;
420 /* Update memory map in PCIe configuration space */
421 printk(BIOS_DEBUG, "Update PCI-E configuration space:\n");
423 /* TOM (top of memory) */
424 reg = pci_read_config32(HOST_BRIDGE, TOM);
425 val = tom & 0xfff;
426 reg = (reg & ~0xfff00000) | (val << 20);
427 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOM, reg);
428 pci_write_config32(HOST_BRIDGE, TOM, reg);
430 reg = pci_read_config32(HOST_BRIDGE, TOM + 4);
431 val = tom & 0xfffff000;
432 reg = (reg & ~0x000fffff) | (val >> 12);
433 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOM + 4, reg);
434 pci_write_config32(HOST_BRIDGE, TOM + 4, reg);
436 /* TOLUD (Top Of Low Usable DRAM) */
437 reg = pci_read_config32(HOST_BRIDGE, TOLUD);
438 val = toludbase & 0xfff;
439 reg = (reg & ~0xfff00000) | (val << 20);
440 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOLUD, reg);
441 pci_write_config32(HOST_BRIDGE, TOLUD, reg);
443 /* TOUUD LSB (Top Of Upper Usable DRAM) */
444 reg = pci_read_config32(HOST_BRIDGE, TOUUD);
445 val = touudbase & 0xfff;
446 reg = (reg & ~0xfff00000) | (val << 20);
447 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOUUD, reg);
448 pci_write_config32(HOST_BRIDGE, TOUUD, reg);
450 /* TOUUD MSB */
451 reg = pci_read_config32(HOST_BRIDGE, TOUUD + 4);
452 val = touudbase & 0xfffff000;
453 reg = (reg & ~0x000fffff) | (val >> 12);
454 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TOUUD + 4, reg);
455 pci_write_config32(HOST_BRIDGE, TOUUD + 4, reg);
457 if (reclaim) {
458 /* REMAP BASE */
459 pci_write_config32(HOST_BRIDGE, REMAPBASE, remapbase << 20);
460 pci_write_config32(HOST_BRIDGE, REMAPBASE + 4, remapbase >> 12);
462 /* REMAP LIMIT */
463 pci_write_config32(HOST_BRIDGE, REMAPLIMIT, remaplimit << 20);
464 pci_write_config32(HOST_BRIDGE, REMAPLIMIT + 4, remaplimit >> 12);
466 /* TSEG */
467 reg = pci_read_config32(HOST_BRIDGE, TSEGMB);
468 val = tsegbase & 0xfff;
469 reg = (reg & ~0xfff00000) | (val << 20);
470 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", TSEGMB, reg);
471 pci_write_config32(HOST_BRIDGE, TSEGMB, reg);
473 /* GFX stolen memory */
474 reg = pci_read_config32(HOST_BRIDGE, BDSM);
475 val = gfxstolenbase & 0xfff;
476 reg = (reg & ~0xfff00000) | (val << 20);
477 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", BDSM, reg);
478 pci_write_config32(HOST_BRIDGE, BDSM, reg);
480 /* GTT stolen memory */
481 reg = pci_read_config32(HOST_BRIDGE, BGSM);
482 val = gttbase & 0xfff;
483 reg = (reg & ~0xfff00000) | (val << 20);
484 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", BGSM, reg);
485 pci_write_config32(HOST_BRIDGE, BGSM, reg);
487 if (me_uma_size) {
488 reg = pci_read_config32(HOST_BRIDGE, MESEG_MASK + 4);
489 val = (0x80000 - me_uma_size) & 0xfffff000;
490 reg = (reg & ~0x000fffff) | (val >> 12);
491 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_MASK + 4, reg);
492 pci_write_config32(HOST_BRIDGE, MESEG_MASK + 4, reg);
494 /* ME base */
495 reg = pci_read_config32(HOST_BRIDGE, MESEG_BASE);
496 val = mestolenbase & 0xfff;
497 reg = (reg & ~0xfff00000) | (val << 20);
498 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_BASE, reg);
499 pci_write_config32(HOST_BRIDGE, MESEG_BASE, reg);
501 reg = pci_read_config32(HOST_BRIDGE, MESEG_BASE + 4);
502 val = mestolenbase & 0xfffff000;
503 reg = (reg & ~0x000fffff) | (val >> 12);
504 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_BASE + 4, reg);
505 pci_write_config32(HOST_BRIDGE, MESEG_BASE + 4, reg);
507 /* ME mask */
508 reg = pci_read_config32(HOST_BRIDGE, MESEG_MASK);
509 val = (0x80000 - me_uma_size) & 0xfff;
510 reg = (reg & ~0xfff00000) | (val << 20);
511 reg = reg | ME_STLEN_EN; /* Set ME memory enable */
512 reg = reg | MELCK; /* Set lock bit on ME mem */
513 printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", MESEG_MASK, reg);
514 pci_write_config32(HOST_BRIDGE, MESEG_MASK, reg);
518 static void write_reset(ramctr_timing *ctrl)
520 int channel, slotrank;
522 /* Choose a populated channel */
523 channel = (ctrl->rankmap[0]) ? 0 : 1;
525 wait_for_iosav(channel);
527 /* Choose a populated rank */
528 slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2;
530 iosav_write_zqcs_sequence(channel, slotrank, 3, 8, 0);
532 /* This is actually using the IOSAV state machine as a timer */
533 iosav_run_queue(channel, 1, 1);
535 wait_for_iosav(channel);
538 void dram_jedecreset(ramctr_timing *ctrl)
540 u32 reg;
541 int channel;
543 while (!(mchbar_read32(RCOMP_TIMER) & (1 << 16)))
545 do {
546 reg = mchbar_read32(IOSAV_STATUS_ch(0));
547 } while ((reg & 0x14) == 0);
549 /* Set state of memory controller */
550 reg = 0x112;
551 mchbar_write32(MC_INIT_STATE_G, reg);
552 mchbar_write32(MC_INIT_STATE, 0);
553 reg |= 2; /* DDR reset */
554 mchbar_write32(MC_INIT_STATE_G, reg);
556 /* Assert DIMM reset signal */
557 mchbar_clrbits32(MC_INIT_STATE_G, 1 << 1);
559 /* Wait 200us */
560 udelay(200);
562 /* Deassert DIMM reset signal */
563 mchbar_setbits32(MC_INIT_STATE_G, 1 << 1);
565 /* Wait 500us */
566 udelay(500);
568 /* Enable DCLK */
569 mchbar_setbits32(MC_INIT_STATE_G, 1 << 2);
571 /* XXX Wait 20ns */
572 udelay(1);
574 FOR_ALL_CHANNELS {
575 /* Set valid rank CKE */
576 reg = ctrl->rankmap[channel];
577 mchbar_write32(MC_INIT_STATE_ch(channel), reg);
579 /* Wait 10ns for ranks to settle */
580 // udelay(0.01);
582 reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4);
583 mchbar_write32(MC_INIT_STATE_ch(channel), reg);
585 /* Write reset using a NOP */
586 write_reset(ctrl);
591 * DDR3 Rank1 Address mirror swap the following pins:
592 * A3<->A4, A5<->A6, A7<->A8, BA0<->BA1
594 static void ddr3_mirror_mrreg(int *bank, u32 *addr)
596 *bank = ((*bank >> 1) & 1) | ((*bank << 1) & 2);
597 *addr = (*addr & ~0x1f8) | ((*addr >> 1) & 0xa8) | ((*addr & 0xa8) << 1);
600 static void write_mrreg(ramctr_timing *ctrl, int channel, int slotrank, int reg, u32 val)
602 wait_for_iosav(channel);
604 if (ctrl->rank_mirror[channel][slotrank])
605 ddr3_mirror_mrreg(&reg, &val);
607 const struct iosav_ssq sequence[] = {
608 /* DRAM command MRS */
609 [0] = {
610 .sp_cmd_ctrl = {
611 .command = IOSAV_MRS,
613 .subseq_ctrl = {
614 .cmd_executions = 1,
615 .cmd_delay_gap = 4,
616 .post_ssq_wait = 4,
617 .data_direction = SSQ_NA,
619 .sp_cmd_addr = {
620 .address = val,
621 .rowbits = 6,
622 .bank = reg,
623 .rank = slotrank,
626 /* DRAM command MRS */
627 [1] = {
628 .sp_cmd_ctrl = {
629 .command = IOSAV_MRS,
630 .ranksel_ap = 1,
632 .subseq_ctrl = {
633 .cmd_executions = 1,
634 .cmd_delay_gap = 4,
635 .post_ssq_wait = 4,
636 .data_direction = SSQ_NA,
638 .sp_cmd_addr = {
639 .address = val,
640 .rowbits = 6,
641 .bank = reg,
642 .rank = slotrank,
645 /* DRAM command MRS */
646 [2] = {
647 .sp_cmd_ctrl = {
648 .command = IOSAV_MRS,
650 .subseq_ctrl = {
651 .cmd_executions = 1,
652 .cmd_delay_gap = 4,
653 .post_ssq_wait = ctrl->tMOD,
654 .data_direction = SSQ_NA,
656 .sp_cmd_addr = {
657 .address = val,
658 .rowbits = 6,
659 .bank = reg,
660 .rank = slotrank,
664 iosav_write_sequence(channel, sequence, ARRAY_SIZE(sequence));
666 iosav_run_once_and_wait(channel);
669 /* Obtain optimal power down mode for current configuration */
670 static enum power_down_mode get_power_down_mode(ramctr_timing *ctrl, int channel)
672 int slotrank;
674 if (ctrl->tXP > 8)
675 return PDM_NONE;
677 if (ctrl->tXPDLL > 32)
678 return PDM_PPD;
680 FOR_ALL_POPULATED_RANKS
681 if (!ctrl->info.dimm[channel][slotrank >> 1].flags.dll_off_mode)
682 return PDM_APD_PPD;
684 if (CONFIG(RAMINIT_ALWAYS_ALLOW_DLL_OFF) || get_platform_type() == PLATFORM_MOBILE)
685 return PDM_DLL_OFF;
687 return PDM_APD_PPD;
690 static u32 make_mr0(ramctr_timing *ctrl, int channel, u8 rank)
692 u16 mr0reg, mch_cas, mch_wr;
693 static const u8 mch_wr_t[12] = { 1, 2, 3, 4, 0, 5, 0, 6, 0, 7, 0, 0 };
695 const enum power_down_mode power_down = get_power_down_mode(ctrl, channel);
697 const bool slow_exit = power_down == PDM_DLL_OFF || power_down == PDM_APD_DLL_OFF;
699 /* Convert CAS to MCH register friendly */
700 if (ctrl->CAS < 12) {
701 mch_cas = (u16)((ctrl->CAS - 4) << 1);
702 } else {
703 mch_cas = (u16)(ctrl->CAS - 12);
704 mch_cas = ((mch_cas << 1) | 0x1);
707 /* Convert tWR to MCH register friendly */
708 mch_wr = mch_wr_t[ctrl->tWR - 5];
710 /* DLL Reset - self clearing - set after CLK frequency has been changed */
711 mr0reg = 1 << 8;
713 mr0reg |= (mch_cas & 0x1) << 2;
714 mr0reg |= (mch_cas & 0xe) << 3;
715 mr0reg |= mch_wr << 9;
717 /* Precharge PD - Use slow exit when DLL-off is used - mostly power-saving feature */
718 mr0reg |= !slow_exit << 12;
719 return mr0reg;
722 static void dram_mr0(ramctr_timing *ctrl, u8 rank, int channel)
724 write_mrreg(ctrl, channel, rank, 0, make_mr0(ctrl, channel, rank));
727 static odtmap get_ODT(ramctr_timing *ctrl, int channel)
729 /* Get ODT based on rankmap */
730 int dimms_per_ch = (ctrl->rankmap[channel] & 1) + ((ctrl->rankmap[channel] >> 2) & 1);
732 if (dimms_per_ch == 1) {
733 return (const odtmap){60, 60};
734 } else {
735 return (const odtmap){120, 30};
739 static u32 encode_odt(u32 odt)
741 switch (odt) {
742 case 30:
743 return (1 << 9) | (1 << 2); /* RZQ/8, RZQ/4 */
744 case 60:
745 return (1 << 2); /* RZQ/4 */
746 case 120:
747 return (1 << 6); /* RZQ/2 */
748 default:
749 case 0:
750 return 0;
754 static u32 make_mr1(ramctr_timing *ctrl, u8 rank, int channel)
756 odtmap odt;
757 u32 mr1reg;
759 odt = get_ODT(ctrl, channel);
760 mr1reg = 2;
762 mr1reg |= encode_odt(odt.rttnom);
764 return mr1reg;
767 static void dram_mr1(ramctr_timing *ctrl, u8 rank, int channel)
769 u16 mr1reg;
771 mr1reg = make_mr1(ctrl, rank, channel);
773 write_mrreg(ctrl, channel, rank, 1, mr1reg);
776 static void dram_mr2(ramctr_timing *ctrl, u8 rank, int channel)
778 const u16 pasr = 0;
779 const u16 cwl = ctrl->CWL - 5;
780 const odtmap odt = get_ODT(ctrl, channel);
782 int srt = 0;
783 if (IS_IVY_CPU(ctrl->cpu) && ctrl->tCK >= TCK_1066MHZ)
784 srt = ctrl->extended_temperature_range && !ctrl->auto_self_refresh;
786 u16 mr2reg = 0;
787 mr2reg |= pasr;
788 mr2reg |= cwl << 3;
789 mr2reg |= ctrl->auto_self_refresh << 6;
790 mr2reg |= srt << 7;
791 mr2reg |= (odt.rttwr / 60) << 9;
793 write_mrreg(ctrl, channel, rank, 2, mr2reg);
795 /* Program MR2 shadow */
796 u32 reg32 = mchbar_read32(TC_MR2_SHADOW_ch(channel));
798 reg32 &= 3 << 14 | 3 << 6;
800 reg32 |= mr2reg & ~(3 << 6);
802 if (srt)
803 reg32 |= 1 << (rank / 2 + 6);
805 if (ctrl->rank_mirror[channel][rank])
806 reg32 |= 1 << (rank / 2 + 14);
808 mchbar_write32(TC_MR2_SHADOW_ch(channel), reg32);
811 static void dram_mr3(ramctr_timing *ctrl, u8 rank, int channel)
813 write_mrreg(ctrl, channel, rank, 3, 0);
816 void dram_mrscommands(ramctr_timing *ctrl)
818 u8 slotrank;
819 int channel;
821 FOR_ALL_POPULATED_CHANNELS {
822 FOR_ALL_POPULATED_RANKS {
823 /* MR2 */
824 dram_mr2(ctrl, slotrank, channel);
826 /* MR3 */
827 dram_mr3(ctrl, slotrank, channel);
829 /* MR1 */
830 dram_mr1(ctrl, slotrank, channel);
832 /* MR0 */
833 dram_mr0(ctrl, slotrank, channel);
837 const struct iosav_ssq zqcl_sequence[] = {
838 /* DRAM command NOP (without ODT nor chip selects) */
839 [0] = {
840 .sp_cmd_ctrl = {
841 .command = IOSAV_NOP & ~(0xff << 8),
843 .subseq_ctrl = {
844 .cmd_executions = 1,
845 .cmd_delay_gap = 4,
846 .post_ssq_wait = 15,
847 .data_direction = SSQ_NA,
849 .sp_cmd_addr = {
850 .address = 2,
851 .rowbits = 6,
852 .bank = 0,
853 .rank = 0,
856 /* DRAM command ZQCL */
857 [1] = {
858 .sp_cmd_ctrl = {
859 .command = IOSAV_ZQCS,
860 .ranksel_ap = 1,
862 .subseq_ctrl = {
863 .cmd_executions = 1,
864 .cmd_delay_gap = 4,
865 .post_ssq_wait = 400,
866 .data_direction = SSQ_NA,
868 .sp_cmd_addr = {
869 .address = 1 << 10,
870 .rowbits = 6,
871 .bank = 0,
872 .rank = 0,
874 .addr_update = {
875 .inc_rank = 1,
876 .addr_wrap = 20,
880 iosav_write_sequence(BROADCAST_CH, zqcl_sequence, ARRAY_SIZE(zqcl_sequence));
882 iosav_run_queue(BROADCAST_CH, 4, 0);
884 FOR_ALL_CHANNELS {
885 wait_for_iosav(channel);
888 /* Refresh enable */
889 mchbar_setbits32(MC_INIT_STATE_G, 1 << 3);
891 FOR_ALL_POPULATED_CHANNELS {
892 mchbar_clrbits32(SCHED_CBIT_ch(channel), 1 << 21);
894 wait_for_iosav(channel);
896 slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2;
898 wait_for_iosav(channel);
900 iosav_write_zqcs_sequence(channel, slotrank, 4, 101, 31);
902 iosav_run_once_and_wait(channel);
906 static const u32 lane_base[] = {
907 LANEBASE_B0, LANEBASE_B1, LANEBASE_B2, LANEBASE_B3,
908 LANEBASE_B4, LANEBASE_B5, LANEBASE_B6, LANEBASE_B7,
909 LANEBASE_ECC
912 /* Maximum delay for command, control, clock */
913 #define CCC_MAX_PI (2 * QCLK_PI - 1)
915 void program_timings(ramctr_timing *ctrl, int channel)
917 u32 reg_roundtrip_latency, reg_io_latency;
918 int lane;
919 int slotrank, slot;
921 u32 ctl_delay[NUM_SLOTS] = { 0 };
922 int cmd_delay = 0;
924 /* Enable CLK XOVER */
925 u32 clk_pi_coding = get_XOVER_CLK(ctrl->rankmap[channel]);
926 u32 clk_logic_dly = 0;
929 * Compute command timing as abs() of the most negative PI code
930 * across all ranks. Use zero if none of the values is negative.
932 FOR_ALL_POPULATED_RANKS {
933 cmd_delay = MAX(cmd_delay, -ctrl->timings[channel][slotrank].pi_coding);
935 if (cmd_delay > CCC_MAX_PI) {
936 printk(BIOS_ERR, "C%d command delay overflow: %d\n", channel, cmd_delay);
937 cmd_delay = CCC_MAX_PI;
940 for (slot = 0; slot < NUM_SLOTS; slot++) {
941 const int pi_coding_0 = ctrl->timings[channel][2 * slot + 0].pi_coding;
942 const int pi_coding_1 = ctrl->timings[channel][2 * slot + 1].pi_coding;
944 const u8 slot_map = (ctrl->rankmap[channel] >> (2 * slot)) & 3;
946 if (slot_map & 1)
947 ctl_delay[slot] += pi_coding_0 + cmd_delay;
949 if (slot_map & 2)
950 ctl_delay[slot] += pi_coding_1 + cmd_delay;
952 /* If both ranks in a slot are populated, use the average */
953 if (slot_map == 3)
954 ctl_delay[slot] /= 2;
956 if (ctl_delay[slot] > CCC_MAX_PI) {
957 printk(BIOS_ERR, "C%dS%d control delay overflow: %d\n",
958 channel, slot, ctl_delay[slot]);
959 ctl_delay[slot] = CCC_MAX_PI;
962 FOR_ALL_POPULATED_RANKS {
963 int clk_delay = ctrl->timings[channel][slotrank].pi_coding + cmd_delay;
966 * Clock is a differential signal, whereas command and control are not.
967 * This affects its timing, and it is also why it needs a magic offset.
969 clk_delay += ctrl->pi_code_offset;
971 /* Can never happen with valid values */
972 if (clk_delay < 0) {
973 printk(BIOS_ERR, "C%dR%d clock delay underflow: %d\n",
974 channel, slotrank, clk_delay);
975 clk_delay = 0;
978 /* Clock can safely wrap around because it is a periodic signal */
979 clk_delay %= CCC_MAX_PI + 1;
981 clk_pi_coding |= (clk_delay % QCLK_PI) << (6 * slotrank);
982 clk_logic_dly |= (clk_delay / QCLK_PI) << slotrank;
985 /* Enable CMD XOVER */
986 union gdcr_cmd_pi_coding_reg cmd_pi_coding = {
987 .raw = get_XOVER_CMD(ctrl->rankmap[channel]),
989 cmd_pi_coding.cmd_pi_code = cmd_delay % QCLK_PI;
990 cmd_pi_coding.cmd_logic_delay = cmd_delay / QCLK_PI;
992 cmd_pi_coding.ctl_pi_code_d0 = ctl_delay[0] % QCLK_PI;
993 cmd_pi_coding.ctl_pi_code_d1 = ctl_delay[1] % QCLK_PI;
994 cmd_pi_coding.ctl_logic_delay_d0 = ctl_delay[0] / QCLK_PI;
995 cmd_pi_coding.ctl_logic_delay_d1 = ctl_delay[1] / QCLK_PI;
997 mchbar_write32(GDCRCMDPICODING_ch(channel), cmd_pi_coding.raw);
999 mchbar_write32(GDCRCKPICODE_ch(channel), clk_pi_coding);
1000 mchbar_write32(GDCRCKLOGICDELAY_ch(channel), clk_logic_dly);
1002 reg_io_latency = mchbar_read32(SC_IO_LATENCY_ch(channel));
1003 reg_io_latency &= ~0xffff;
1005 reg_roundtrip_latency = 0;
1007 FOR_ALL_POPULATED_RANKS {
1008 reg_io_latency |= ctrl->timings[channel][slotrank].io_latency << (4 * slotrank);
1010 reg_roundtrip_latency |=
1011 ctrl->timings[channel][slotrank].roundtrip_latency << (8 * slotrank);
1013 FOR_ALL_LANES {
1014 const u16 rcven = ctrl->timings[channel][slotrank].lanes[lane].rcven;
1015 const u8 dqs_p = ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p;
1016 const u8 dqs_n = ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n;
1017 const union gdcr_rx_reg gdcr_rx = {
1018 .rcven_pi_code = rcven % QCLK_PI,
1019 .rx_dqs_p_pi_code = dqs_p,
1020 .rcven_logic_delay = rcven / QCLK_PI,
1021 .rx_dqs_n_pi_code = dqs_n,
1023 mchbar_write32(lane_base[lane] + GDCRRX(channel, slotrank),
1024 gdcr_rx.raw);
1026 const u16 tx_dqs = ctrl->timings[channel][slotrank].lanes[lane].tx_dqs;
1027 const int tx_dq = ctrl->timings[channel][slotrank].lanes[lane].tx_dq;
1028 const union gdcr_tx_reg gdcr_tx = {
1029 .tx_dq_pi_code = tx_dq % QCLK_PI,
1030 .tx_dqs_pi_code = tx_dqs % QCLK_PI,
1031 .tx_dqs_logic_delay = tx_dqs / QCLK_PI,
1032 .tx_dq_logic_delay = tx_dq / QCLK_PI,
1034 mchbar_write32(lane_base[lane] + GDCRTX(channel, slotrank),
1035 gdcr_tx.raw);
1038 mchbar_write32(SC_ROUNDT_LAT_ch(channel), reg_roundtrip_latency);
1039 mchbar_write32(SC_IO_LATENCY_ch(channel), reg_io_latency);
1042 static void test_rcven(ramctr_timing *ctrl, int channel, int slotrank)
1044 wait_for_iosav(channel);
1046 /* Send a burst of 16 back-to-back read commands (4 DCLK apart) */
1047 iosav_write_read_mpr_sequence(channel, slotrank, ctrl->tMOD, 1, 3, 15, ctrl->CAS + 36);
1049 iosav_run_once_and_wait(channel);
1052 static int does_lane_work(ramctr_timing *ctrl, int channel, int slotrank, int lane)
1054 u32 rcven = ctrl->timings[channel][slotrank].lanes[lane].rcven;
1056 return (mchbar_read32(lane_base[lane] +
1057 GDCRTRAININGRESULT(channel, (rcven / 32) & 1)) >> (rcven % 32)) & 1;
1060 struct run {
1061 int middle;
1062 int end;
1063 int start;
1064 int all;
1065 int length;
1068 static struct run get_longest_zero_run(int *seq, int sz)
1070 int i, ls;
1071 int bl = 0, bs = 0;
1072 struct run ret;
1074 ls = 0;
1075 for (i = 0; i < 2 * sz; i++)
1076 if (seq[i % sz]) {
1077 if (i - ls > bl) {
1078 bl = i - ls;
1079 bs = ls;
1081 ls = i + 1;
1083 if (bl == 0) {
1084 ret.middle = sz / 2;
1085 ret.start = 0;
1086 ret.end = sz;
1087 ret.length = sz;
1088 ret.all = 1;
1089 return ret;
1092 ret.start = bs % sz;
1093 ret.end = (bs + bl - 1) % sz;
1094 ret.middle = (bs + (bl - 1) / 2) % sz;
1095 ret.length = bl;
1096 ret.all = 0;
1098 return ret;
1101 #define RCVEN_COARSE_PI_LENGTH (2 * QCLK_PI)
1103 static void find_rcven_pi_coarse(ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
1105 int rcven;
1106 int statistics[NUM_LANES][RCVEN_COARSE_PI_LENGTH];
1107 int lane;
1109 for (rcven = 0; rcven < RCVEN_COARSE_PI_LENGTH; rcven++) {
1110 FOR_ALL_LANES {
1111 ctrl->timings[channel][slotrank].lanes[lane].rcven = rcven;
1113 program_timings(ctrl, channel);
1115 test_rcven(ctrl, channel, slotrank);
1117 FOR_ALL_LANES {
1118 statistics[lane][rcven] =
1119 !does_lane_work(ctrl, channel, slotrank, lane);
1122 FOR_ALL_LANES {
1123 struct run rn = get_longest_zero_run(statistics[lane], RCVEN_COARSE_PI_LENGTH);
1124 ctrl->timings[channel][slotrank].lanes[lane].rcven = rn.middle;
1125 upperA[lane] = rn.end;
1126 if (upperA[lane] < rn.middle)
1127 upperA[lane] += 2 * QCLK_PI;
1129 printram("rcven: %d, %d, %d: % 4d-% 4d-% 4d\n",
1130 channel, slotrank, lane, rn.start, rn.middle, rn.end);
1134 static void fine_tune_rcven_pi(ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
1136 int rcven_delta;
1137 int statistics[NUM_LANES][51] = {0};
1138 int lane, i;
1140 for (rcven_delta = -25; rcven_delta <= 25; rcven_delta++) {
1142 FOR_ALL_LANES {
1143 ctrl->timings[channel][slotrank].lanes[lane].rcven
1144 = upperA[lane] + rcven_delta + QCLK_PI;
1146 program_timings(ctrl, channel);
1148 for (i = 0; i < 100; i++) {
1149 test_rcven(ctrl, channel, slotrank);
1150 FOR_ALL_LANES {
1151 statistics[lane][rcven_delta + 25] +=
1152 does_lane_work(ctrl, channel, slotrank, lane);
1156 FOR_ALL_LANES {
1157 int last_zero, first_all;
1159 for (last_zero = -25; last_zero <= 25; last_zero++)
1160 if (statistics[lane][last_zero + 25])
1161 break;
1163 last_zero--;
1164 for (first_all = -25; first_all <= 25; first_all++)
1165 if (statistics[lane][first_all + 25] == 100)
1166 break;
1168 printram("lane %d: %d, %d\n", lane, last_zero, first_all);
1170 ctrl->timings[channel][slotrank].lanes[lane].rcven =
1171 (last_zero + first_all) / 2 + upperA[lane];
1173 printram("Aval: %d, %d, %d: % 4d\n", channel, slotrank,
1174 lane, ctrl->timings[channel][slotrank].lanes[lane].rcven);
1179 * Once the DQS high phase has been found (for each DRAM) the next stage
1180 * is to find out the round trip latency, by locating the preamble cycle.
1181 * This is achieved by trying smaller and smaller roundtrip values until
1182 * the strobe sampling is done on the preamble cycle.
1184 static int find_roundtrip_latency(ramctr_timing *ctrl, int channel, int slotrank, int *upperA)
1186 int works[NUM_LANES];
1187 int lane;
1189 while (1) {
1190 int all_works = 1, some_works = 0;
1192 program_timings(ctrl, channel);
1193 test_rcven(ctrl, channel, slotrank);
1195 FOR_ALL_LANES {
1196 works[lane] = !does_lane_work(ctrl, channel, slotrank, lane);
1198 if (works[lane])
1199 some_works = 1;
1200 else
1201 all_works = 0;
1204 /* If every lane is working, exit */
1205 if (all_works)
1206 return 0;
1209 * If all bits are one (everyone is failing), decrement
1210 * the roundtrip value by two, and do another iteration.
1212 if (!some_works) {
1213 /* Guard against roundtrip latency underflow */
1214 if (ctrl->timings[channel][slotrank].roundtrip_latency < 2) {
1215 printk(BIOS_EMERG, "Roundtrip latency underflow: %d, %d\n",
1216 channel, slotrank);
1217 return MAKE_ERR;
1219 ctrl->timings[channel][slotrank].roundtrip_latency -= 2;
1220 printram("4024 -= 2;\n");
1221 continue;
1225 * Else (if some lanes are failing), increase the rank's
1226 * I/O latency by 2, and increase rcven logic delay by 2
1227 * on the working lanes, then perform another iteration.
1229 ctrl->timings[channel][slotrank].io_latency += 2;
1230 printram("4028 += 2;\n");
1232 /* Guard against I/O latency overflow */
1233 if (ctrl->timings[channel][slotrank].io_latency >= 16) {
1234 printk(BIOS_EMERG, "I/O latency overflow: %d, %d\n",
1235 channel, slotrank);
1236 return MAKE_ERR;
1238 FOR_ALL_LANES if (works[lane]) {
1239 ctrl->timings[channel][slotrank].lanes[lane].rcven += 2 * QCLK_PI;
1240 upperA[lane] += 2 * QCLK_PI;
1241 printram("increment %d, %d, %d\n", channel, slotrank, lane);
1244 return 0;
1247 static int get_logic_delay_delta(ramctr_timing *ctrl, int channel, int slotrank)
1249 int lane;
1250 u16 logic_delay_min = 7;
1251 u16 logic_delay_max = 0;
1253 FOR_ALL_LANES {
1254 const u16 logic_delay = ctrl->timings[channel][slotrank].lanes[lane].rcven >> 6;
1256 logic_delay_min = MIN(logic_delay_min, logic_delay);
1257 logic_delay_max = MAX(logic_delay_max, logic_delay);
1260 if (logic_delay_max < logic_delay_min) {
1261 printk(BIOS_EMERG, "Logic delay max < min (%u < %u): %d, %d\n",
1262 logic_delay_max, logic_delay_min, channel, slotrank);
1265 assert(logic_delay_max >= logic_delay_min);
1267 return logic_delay_max - logic_delay_min;
1270 static int align_rt_io_latency(ramctr_timing *ctrl, int channel, int slotrank, int prev)
1272 int latency_offset = 0;
1274 /* Get changed maxima */
1275 const int post = get_logic_delay_delta(ctrl, channel, slotrank);
1277 if (prev < post)
1278 latency_offset = +1;
1280 else if (prev > post)
1281 latency_offset = -1;
1283 else
1284 latency_offset = 0;
1286 ctrl->timings[channel][slotrank].io_latency += latency_offset;
1287 ctrl->timings[channel][slotrank].roundtrip_latency += latency_offset;
1288 printram("4024 += %d;\n", latency_offset);
1289 printram("4028 += %d;\n", latency_offset);
1291 return post;
1294 static void compute_final_logic_delay(ramctr_timing *ctrl, int channel, int slotrank)
1296 u16 logic_delay_min = 7;
1297 int lane;
1299 FOR_ALL_LANES {
1300 const u16 logic_delay = ctrl->timings[channel][slotrank].lanes[lane].rcven >> 6;
1302 logic_delay_min = MIN(logic_delay_min, logic_delay);
1305 if (logic_delay_min >= 2) {
1306 printk(BIOS_WARNING, "Logic delay %u greater than 1: %d %d\n",
1307 logic_delay_min, channel, slotrank);
1310 FOR_ALL_LANES {
1311 ctrl->timings[channel][slotrank].lanes[lane].rcven -= logic_delay_min << 6;
1313 ctrl->timings[channel][slotrank].io_latency -= logic_delay_min;
1314 printram("4028 -= %d;\n", logic_delay_min);
1317 int receive_enable_calibration(ramctr_timing *ctrl)
1319 int channel, slotrank, lane;
1320 int err;
1322 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
1323 int all_high, some_high;
1324 int upperA[NUM_LANES];
1325 int prev;
1327 wait_for_iosav(channel);
1329 iosav_write_prea_sequence(channel, slotrank, ctrl->tRP, 0);
1331 iosav_run_once_and_wait(channel);
1333 const union gdcr_training_mod_reg training_mod = {
1334 .receive_enable_mode = 1,
1335 .training_rank_sel = slotrank,
1336 .odt_always_on = 1,
1338 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1340 ctrl->timings[channel][slotrank].io_latency = 4;
1341 ctrl->timings[channel][slotrank].roundtrip_latency = 55;
1342 program_timings(ctrl, channel);
1344 find_rcven_pi_coarse(ctrl, channel, slotrank, upperA);
1346 all_high = 1;
1347 some_high = 0;
1348 FOR_ALL_LANES {
1349 if (ctrl->timings[channel][slotrank].lanes[lane].rcven >= QCLK_PI)
1350 some_high = 1;
1351 else
1352 all_high = 0;
1355 if (all_high) {
1356 ctrl->timings[channel][slotrank].io_latency--;
1357 printram("4028--;\n");
1358 FOR_ALL_LANES {
1359 ctrl->timings[channel][slotrank].lanes[lane].rcven -= QCLK_PI;
1360 upperA[lane] -= QCLK_PI;
1363 } else if (some_high) {
1364 ctrl->timings[channel][slotrank].roundtrip_latency++;
1365 ctrl->timings[channel][slotrank].io_latency++;
1366 printram("4024++;\n");
1367 printram("4028++;\n");
1370 program_timings(ctrl, channel);
1372 prev = get_logic_delay_delta(ctrl, channel, slotrank);
1374 err = find_roundtrip_latency(ctrl, channel, slotrank, upperA);
1375 if (err)
1376 return err;
1378 prev = align_rt_io_latency(ctrl, channel, slotrank, prev);
1380 fine_tune_rcven_pi(ctrl, channel, slotrank, upperA);
1382 prev = align_rt_io_latency(ctrl, channel, slotrank, prev);
1384 compute_final_logic_delay(ctrl, channel, slotrank);
1386 align_rt_io_latency(ctrl, channel, slotrank, prev);
1388 printram("4/8: %d, %d, % 4d, % 4d\n", channel, slotrank,
1389 ctrl->timings[channel][slotrank].roundtrip_latency,
1390 ctrl->timings[channel][slotrank].io_latency);
1392 printram("final results:\n");
1393 FOR_ALL_LANES
1394 printram("Aval: %d, %d, %d: % 4d\n", channel, slotrank, lane,
1395 ctrl->timings[channel][slotrank].lanes[lane].rcven);
1397 mchbar_write32(GDCRTRAININGMOD, 0);
1399 toggle_io_reset();
1402 FOR_ALL_POPULATED_CHANNELS {
1403 program_timings(ctrl, channel);
1406 return 0;
1409 static void test_tx_dq(ramctr_timing *ctrl, int channel, int slotrank)
1411 int lane;
1413 FOR_ALL_LANES {
1414 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel, lane), 0);
1415 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
1418 wait_for_iosav(channel);
1420 iosav_write_misc_write_sequence(ctrl, channel, slotrank,
1421 MAX(ctrl->tRRD, (ctrl->tFAW >> 2) + 1), 4, 4, 500, 18);
1423 iosav_run_once_and_wait(channel);
1425 iosav_write_prea_act_read_sequence(ctrl, channel, slotrank);
1427 iosav_run_once_and_wait(channel);
1430 static void tx_dq_threshold_process(int *data, const int count)
1432 int min = data[0];
1433 int max = min;
1434 int i;
1435 for (i = 1; i < count; i++) {
1436 if (min > data[i])
1437 min = data[i];
1439 if (max < data[i])
1440 max = data[i];
1442 int threshold = min / 2 + max / 2;
1443 for (i = 0; i < count; i++)
1444 data[i] = data[i] > threshold;
1446 printram("threshold=%d min=%d max=%d\n", threshold, min, max);
1449 static int tx_dq_write_leveling(ramctr_timing *ctrl, int channel, int slotrank)
1451 int tx_dq;
1452 int stats[NUM_LANES][MAX_TX_DQ + 1];
1453 int lane;
1455 wait_for_iosav(channel);
1457 iosav_write_prea_sequence(channel, slotrank, ctrl->tRP, 18);
1459 iosav_run_once_and_wait(channel);
1461 for (tx_dq = 0; tx_dq <= MAX_TX_DQ; tx_dq++) {
1462 FOR_ALL_LANES ctrl->timings[channel][slotrank].lanes[lane].tx_dq = tx_dq;
1463 program_timings(ctrl, channel);
1465 test_tx_dq(ctrl, channel, slotrank);
1467 FOR_ALL_LANES {
1468 stats[lane][tx_dq] = mchbar_read32(
1469 IOSAV_By_ERROR_COUNT_ch(channel, lane));
1472 FOR_ALL_LANES {
1473 struct run rn = get_longest_zero_run(stats[lane], ARRAY_SIZE(stats[lane]));
1475 if (rn.all || rn.length < 8) {
1476 printk(BIOS_EMERG, "tx_dq write leveling failed: %d, %d, %d\n",
1477 channel, slotrank, lane);
1479 * With command training not being done yet, the lane can be erroneous.
1480 * Take the average as reference and try again to find a run.
1482 tx_dq_threshold_process(stats[lane], ARRAY_SIZE(stats[lane]));
1483 rn = get_longest_zero_run(stats[lane], ARRAY_SIZE(stats[lane]));
1485 if (rn.all || rn.length < 8) {
1486 printk(BIOS_EMERG, "tx_dq recovery failed\n");
1487 return MAKE_ERR;
1490 ctrl->timings[channel][slotrank].lanes[lane].tx_dq = rn.middle;
1491 printram("tx_dq: %d, %d, %d: % 4d-% 4d-% 4d\n",
1492 channel, slotrank, lane, rn.start, rn.middle, rn.end);
1494 return 0;
1497 static int get_precedening_channels(ramctr_timing *ctrl, int target_channel)
1499 int channel, ret = 0;
1501 FOR_ALL_POPULATED_CHANNELS if (channel < target_channel)
1502 ret++;
1504 return ret;
1507 /* Each cacheline is 64 bits long */
1508 static void program_wdb_pattern_length(int channel, const unsigned int num_cachelines)
1510 mchbar_write8(IOSAV_DATA_CTL_ch(channel), num_cachelines / 8 - 1);
1513 static void fill_pattern0(ramctr_timing *ctrl, int channel, u32 a, u32 b)
1515 unsigned int j;
1516 unsigned int channel_offset = get_precedening_channels(ctrl, channel) * 64;
1517 uintptr_t addr;
1519 for (j = 0; j < 16; j++) {
1520 addr = 0x04000000 + channel_offset + 4 * j;
1521 write32p(addr, j & 2 ? b : a);
1524 sfence();
1526 program_wdb_pattern_length(channel, 8);
1529 static int num_of_channels(const ramctr_timing *ctrl)
1531 int ret = 0;
1532 int channel;
1533 FOR_ALL_POPULATED_CHANNELS ret++;
1534 return ret;
1537 static void fill_pattern1(ramctr_timing *ctrl, int channel)
1539 unsigned int j;
1540 unsigned int channel_offset = get_precedening_channels(ctrl, channel) * 64;
1541 unsigned int channel_step = 64 * num_of_channels(ctrl);
1542 uintptr_t addr;
1544 for (j = 0; j < 16; j++) {
1545 addr = 0x04000000 + channel_offset + j * 4;
1546 write32p(addr, 0xffffffff);
1548 for (j = 0; j < 16; j++) {
1549 addr = 0x04000000 + channel_offset + channel_step + j * 4;
1550 write32p(addr, 0);
1552 sfence();
1554 program_wdb_pattern_length(channel, 16);
1557 #define TX_DQS_PI_LENGTH (2 * QCLK_PI)
1559 static int write_level_rank(ramctr_timing *ctrl, int channel, int slotrank)
1561 int tx_dqs;
1562 int statistics[NUM_LANES][TX_DQS_PI_LENGTH];
1563 int lane;
1565 const union gdcr_training_mod_reg training_mod = {
1566 .write_leveling_mode = 1,
1567 .training_rank_sel = slotrank,
1568 .enable_dqs_wl = 5,
1569 .odt_always_on = 1,
1570 .force_drive_enable = 1,
1572 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1574 u32 mr1reg = make_mr1(ctrl, slotrank, channel) | 1 << 7;
1575 int bank = 1;
1577 if (ctrl->rank_mirror[channel][slotrank])
1578 ddr3_mirror_mrreg(&bank, &mr1reg);
1580 wait_for_iosav(channel);
1582 iosav_write_jedec_write_leveling_sequence(ctrl, channel, slotrank, bank, mr1reg);
1584 for (tx_dqs = 0; tx_dqs < TX_DQS_PI_LENGTH; tx_dqs++) {
1585 FOR_ALL_LANES {
1586 ctrl->timings[channel][slotrank].lanes[lane].tx_dqs = tx_dqs;
1588 program_timings(ctrl, channel);
1590 iosav_run_once_and_wait(channel);
1592 FOR_ALL_LANES {
1593 statistics[lane][tx_dqs] = !((mchbar_read32(lane_base[lane] +
1594 GDCRTRAININGRESULT(channel, (tx_dqs / 32) & 1)) >>
1595 (tx_dqs % 32)) & 1);
1598 FOR_ALL_LANES {
1599 struct run rn = get_longest_zero_run(statistics[lane], TX_DQS_PI_LENGTH);
1601 * tx_dq is a direct function of tx_dqs's 6 LSBs. Some tests increment the value
1602 * of tx_dqs by a small value, which might cause the 6-bit value to overflow if
1603 * it's close to 0x3f. Increment the value by a small offset if it's likely
1604 * to overflow, to make sure it won't overflow while running tests and bricks
1605 * the system due to a non matching tx_dq.
1607 * TODO: find out why some tests (edge write discovery) increment tx_dqs.
1609 if ((rn.start & 0x3f) == 0x3e)
1610 rn.start += 2;
1611 else if ((rn.start & 0x3f) == 0x3f)
1612 rn.start += 1;
1614 ctrl->timings[channel][slotrank].lanes[lane].tx_dqs = rn.start;
1615 if (rn.all) {
1616 printk(BIOS_EMERG, "JEDEC write leveling failed: %d, %d, %d\n",
1617 channel, slotrank, lane);
1619 return MAKE_ERR;
1621 printram("tx_dqs: %d, %d, %d: % 4d-% 4d-% 4d\n",
1622 channel, slotrank, lane, rn.start, rn.middle, rn.end);
1624 return 0;
1627 static int get_dqs_flyby_adjust(u64 val)
1629 int i;
1630 /* DQS is good enough */
1631 if (val == 0xffffffffffffffffLL)
1632 return 0;
1633 if (val >= 0xf000000000000000LL) {
1634 /* DQS is late, needs negative adjustment */
1635 for (i = 0; i < 8; i++)
1636 if (val << (8 * (7 - i) + 4))
1637 return -i;
1638 } else {
1639 /* DQS is early, needs positive adjustment */
1640 for (i = 0; i < 8; i++)
1641 if (val >> (8 * (7 - i) + 4))
1642 return i;
1644 return 8;
1647 static void train_write_flyby(ramctr_timing *ctrl)
1649 int channel, slotrank, lane, old;
1651 const union gdcr_training_mod_reg training_mod = {
1652 .dq_dqs_training_res = 1,
1654 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1656 FOR_ALL_POPULATED_CHANNELS {
1657 fill_pattern1(ctrl, channel);
1659 FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS {
1661 /* Reset read and write WDB pointers */
1662 mchbar_write32(IOSAV_DATA_CTL_ch(channel), 0x10001);
1664 wait_for_iosav(channel);
1666 iosav_write_misc_write_sequence(ctrl, channel, slotrank, 3, 1, 3, 3, 31);
1668 iosav_run_once_and_wait(channel);
1670 const struct iosav_ssq rd_sequence[] = {
1671 /* DRAM command PREA */
1672 [0] = {
1673 .sp_cmd_ctrl = {
1674 .command = IOSAV_PRE,
1675 .ranksel_ap = 1,
1677 .subseq_ctrl = {
1678 .cmd_executions = 1,
1679 .cmd_delay_gap = 3,
1680 .post_ssq_wait = ctrl->tRP,
1681 .data_direction = SSQ_NA,
1683 .sp_cmd_addr = {
1684 .address = 1 << 10,
1685 .rowbits = 6,
1686 .bank = 0,
1687 .rank = slotrank,
1689 .addr_update = {
1690 .addr_wrap = 18,
1693 /* DRAM command ACT */
1694 [1] = {
1695 .sp_cmd_ctrl = {
1696 .command = IOSAV_ACT,
1697 .ranksel_ap = 1,
1699 .subseq_ctrl = {
1700 .cmd_executions = 1,
1701 .cmd_delay_gap = 3,
1702 .post_ssq_wait = ctrl->tRCD,
1703 .data_direction = SSQ_NA,
1705 .sp_cmd_addr = {
1706 .address = 0,
1707 .rowbits = 6,
1708 .bank = 0,
1709 .rank = slotrank,
1712 /* DRAM command RDA */
1713 [2] = {
1714 .sp_cmd_ctrl = {
1715 .command = IOSAV_RD,
1716 .ranksel_ap = 3,
1718 .subseq_ctrl = {
1719 .cmd_executions = 1,
1720 .cmd_delay_gap = 3,
1721 .post_ssq_wait = ctrl->tRP +
1722 ctrl->timings[channel][slotrank].roundtrip_latency +
1723 ctrl->timings[channel][slotrank].io_latency,
1724 .data_direction = SSQ_RD,
1726 .sp_cmd_addr = {
1727 .address = 8,
1728 .rowbits = 6,
1729 .bank = 0,
1730 .rank = slotrank,
1734 iosav_write_sequence(channel, rd_sequence, ARRAY_SIZE(rd_sequence));
1736 iosav_run_once_and_wait(channel);
1738 FOR_ALL_LANES {
1739 u64 res = mchbar_read32(lane_base[lane] + GDCRTRAININGRESULT1(channel));
1740 res |= ((u64)mchbar_read32(lane_base[lane] +
1741 GDCRTRAININGRESULT2(channel))) << 32;
1743 old = ctrl->timings[channel][slotrank].lanes[lane].tx_dqs;
1744 ctrl->timings[channel][slotrank].lanes[lane].tx_dqs +=
1745 get_dqs_flyby_adjust(res) * QCLK_PI;
1747 printram("High adjust %d:%016llx\n", lane, res);
1748 printram("Bval+: %d, %d, %d, % 4d -> % 4d\n", channel, slotrank, lane,
1749 old, ctrl->timings[channel][slotrank].lanes[lane].tx_dqs);
1752 mchbar_write32(GDCRTRAININGMOD, 0);
1755 static void disable_refresh_machine(ramctr_timing *ctrl)
1757 int channel;
1759 FOR_ALL_POPULATED_CHANNELS {
1760 /* choose an existing rank */
1761 const int slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0;
1763 iosav_write_zqcs_sequence(channel, slotrank, 4, 4, 31);
1765 iosav_run_once_and_wait(channel);
1767 mchbar_setbits32(SCHED_CBIT_ch(channel), 1 << 21);
1770 /* Refresh disable */
1771 mchbar_clrbits32(MC_INIT_STATE_G, 1 << 3);
1773 FOR_ALL_POPULATED_CHANNELS {
1774 /* Execute the same command queue */
1775 iosav_run_once_and_wait(channel);
1780 * Compensate the skew between CMD/ADDR/CLK and DQ/DQS lanes.
1782 * Since DDR3 uses a fly-by topology, the data and strobes signals reach the chips at different
1783 * times with respect to command, address and clock signals. By delaying either all DQ/DQS or
1784 * all CMD/ADDR/CLK signals, a full phase shift can be introduced. It is assumed that the
1785 * CLK/ADDR/CMD signals have the same routing delay.
1787 * To find the required phase shift the DRAM is placed in "write leveling" mode. In this mode,
1788 * the DRAM-chip samples the CLK on every DQS edge and feeds back the sampled value on the data
1789 * lanes (DQ).
1791 static int jedec_write_leveling(ramctr_timing *ctrl)
1793 int channel, slotrank;
1795 disable_refresh_machine(ctrl);
1797 /* Enable write leveling on all ranks
1798 Disable all DQ outputs
1799 Only NOP is allowed in this mode */
1800 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
1801 write_mrreg(ctrl, channel, slotrank, 1,
1802 make_mr1(ctrl, slotrank, channel) | 1 << 12 | 1 << 7);
1804 /* Needs to be programmed before I/O reset below */
1805 const union gdcr_training_mod_reg training_mod = {
1806 .write_leveling_mode = 1,
1807 .enable_dqs_wl = 5,
1808 .odt_always_on = 1,
1809 .force_drive_enable = 1,
1811 mchbar_write32(GDCRTRAININGMOD, training_mod.raw);
1813 toggle_io_reset();
1815 /* Set any valid value for tx_dqs, it gets corrected later */
1816 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
1817 const int err = write_level_rank(ctrl, channel, slotrank);
1818 if (err)
1819 return err;
1822 /* Disable write leveling on all ranks */
1823 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS
1824 write_mrreg(ctrl, channel, slotrank, 1, make_mr1(ctrl, slotrank, channel));
1826 mchbar_write32(GDCRTRAININGMOD, 0);
1828 FOR_ALL_POPULATED_CHANNELS
1829 wait_for_iosav(channel);
1831 /* Refresh enable */
1832 mchbar_setbits32(MC_INIT_STATE_G, 1 << 3);
1834 FOR_ALL_POPULATED_CHANNELS {
1835 mchbar_clrbits32(SCHED_CBIT_ch(channel), 1 << 21);
1836 mchbar_read32(IOSAV_STATUS_ch(channel));
1837 wait_for_iosav(channel);
1839 iosav_write_zqcs_sequence(channel, 0, 4, 101, 31);
1841 iosav_run_once_and_wait(channel);
1844 toggle_io_reset();
1846 return 0;
1849 int write_training(ramctr_timing *ctrl)
1851 int channel, slotrank;
1852 int err;
1855 * Set the DEC_WRD bit, required for the write flyby algorithm.
1856 * Needs to be done before starting the write training procedure.
1858 FOR_ALL_POPULATED_CHANNELS
1859 mchbar_setbits32(TC_RWP_ch(channel), 1 << 27);
1861 printram("CPE\n");
1863 err = jedec_write_leveling(ctrl);
1864 if (err)
1865 return err;
1867 printram("CPF\n");
1869 FOR_ALL_POPULATED_CHANNELS {
1870 fill_pattern0(ctrl, channel, 0xaaaaaaaa, 0x55555555);
1873 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
1874 err = tx_dq_write_leveling(ctrl, channel, slotrank);
1875 if (err)
1876 return err;
1879 FOR_ALL_POPULATED_CHANNELS
1880 program_timings(ctrl, channel);
1882 /* measure and adjust tx_dqs timings */
1883 train_write_flyby(ctrl);
1885 FOR_ALL_POPULATED_CHANNELS
1886 program_timings(ctrl, channel);
1888 return 0;
1891 static int test_command_training(ramctr_timing *ctrl, int channel, int slotrank)
1893 struct ram_rank_timings saved_rt = ctrl->timings[channel][slotrank];
1894 int tx_dq_delta;
1895 int lanes_ok = 0;
1896 int ctr = 0;
1897 int lane;
1899 for (tx_dq_delta = -5; tx_dq_delta <= 5; tx_dq_delta++) {
1900 FOR_ALL_LANES {
1901 ctrl->timings[channel][slotrank].lanes[lane].tx_dq =
1902 saved_rt.lanes[lane].tx_dq + tx_dq_delta;
1904 program_timings(ctrl, channel);
1905 FOR_ALL_LANES {
1906 mchbar_write32(IOSAV_By_ERROR_COUNT(lane), 0);
1909 /* Reset read WDB pointer */
1910 mchbar_write32(IOSAV_DATA_CTL_ch(channel), 0x1f);
1912 wait_for_iosav(channel);
1914 iosav_write_command_training_sequence(ctrl, channel, slotrank, ctr);
1916 /* Program LFSR for the RD/WR subsequences */
1917 mchbar_write32(IOSAV_n_ADDRESS_LFSR_ch(channel, 1), 0x389abcd);
1918 mchbar_write32(IOSAV_n_ADDRESS_LFSR_ch(channel, 2), 0x389abcd);
1920 iosav_run_once_and_wait(channel);
1922 FOR_ALL_LANES {
1923 u32 r32 = mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel, lane));
1925 if (r32 == 0)
1926 lanes_ok |= 1 << lane;
1928 ctr++;
1929 if (lanes_ok == ((1 << ctrl->lanes) - 1))
1930 break;
1933 ctrl->timings[channel][slotrank] = saved_rt;
1935 return lanes_ok != ((1 << ctrl->lanes) - 1);
1938 static void fill_pattern5(ramctr_timing *ctrl, int channel, int patno)
1940 unsigned int i, j;
1941 unsigned int offset = get_precedening_channels(ctrl, channel) * 64;
1942 unsigned int step = 64 * num_of_channels(ctrl);
1943 uintptr_t addr;
1945 if (patno) {
1946 u8 base8 = 0x80 >> ((patno - 1) % 8);
1947 u32 base = base8 | (base8 << 8) | (base8 << 16) | (base8 << 24);
1948 for (i = 0; i < 32; i++) {
1949 for (j = 0; j < 16; j++) {
1950 u32 val = use_base[patno - 1][i] & (1 << (j / 2)) ? base : 0;
1952 if (invert[patno - 1][i] & (1 << (j / 2)))
1953 val = ~val;
1955 addr = (1 << 26) + offset + i * step + j * 4;
1956 write32p(addr, val);
1959 } else {
1960 for (i = 0; i < ARRAY_SIZE(pattern); i++) {
1961 for (j = 0; j < 16; j++) {
1962 const u32 val = pattern[i][j];
1963 addr = (1 << 26) + offset + i * step + j * 4;
1964 write32p(addr, val);
1967 sfence();
1970 program_wdb_pattern_length(channel, 256);
1973 static void reprogram_320c(ramctr_timing *ctrl)
1975 disable_refresh_machine(ctrl);
1977 /* JEDEC reset */
1978 dram_jedecreset(ctrl);
1980 /* MRS commands */
1981 dram_mrscommands(ctrl);
1983 toggle_io_reset();
1986 #define CT_MIN_PI (-CCC_MAX_PI)
1987 #define CT_MAX_PI (+CCC_MAX_PI + 1)
1988 #define CT_PI_LENGTH (CT_MAX_PI - CT_MIN_PI + 1)
1990 #define MIN_C320C_LEN 13
1992 static int try_cmd_stretch(ramctr_timing *ctrl, int channel, int cmd_stretch)
1994 struct ram_rank_timings saved_timings[NUM_CHANNELS][NUM_SLOTRANKS];
1995 int slotrank;
1996 int command_pi;
1997 int stat[NUM_SLOTRANKS][CT_PI_LENGTH];
1998 int delta = 0;
2000 printram("Trying cmd_stretch %d on channel %d\n", cmd_stretch, channel);
2002 FOR_ALL_POPULATED_RANKS {
2003 saved_timings[channel][slotrank] = ctrl->timings[channel][slotrank];
2006 ctrl->cmd_stretch[channel] = cmd_stretch;
2008 const union tc_rap_reg tc_rap = {
2009 .tRRD = ctrl->tRRD,
2010 .tRTP = ctrl->tRTP,
2011 .tCKE = ctrl->tCKE,
2012 .tWTR = ctrl->tWTR,
2013 .tFAW = ctrl->tFAW,
2014 .tWR = ctrl->tWR,
2015 .tCMD = ctrl->cmd_stretch[channel],
2017 mchbar_write32(TC_RAP_ch(channel), tc_rap.raw);
2019 if (ctrl->cmd_stretch[channel] == 2)
2020 delta = 2;
2021 else if (ctrl->cmd_stretch[channel] == 0)
2022 delta = 4;
2024 FOR_ALL_POPULATED_RANKS {
2025 ctrl->timings[channel][slotrank].roundtrip_latency -= delta;
2028 for (command_pi = CT_MIN_PI; command_pi < CT_MAX_PI; command_pi++) {
2029 FOR_ALL_POPULATED_RANKS {
2030 ctrl->timings[channel][slotrank].pi_coding = command_pi;
2032 program_timings(ctrl, channel);
2033 reprogram_320c(ctrl);
2034 FOR_ALL_POPULATED_RANKS {
2035 stat[slotrank][command_pi - CT_MIN_PI] =
2036 test_command_training(ctrl, channel, slotrank);
2039 FOR_ALL_POPULATED_RANKS {
2040 struct run rn = get_longest_zero_run(stat[slotrank], CT_PI_LENGTH - 1);
2042 ctrl->timings[channel][slotrank].pi_coding = rn.middle + CT_MIN_PI;
2043 printram("cmd_stretch: %d, %d: % 4d-% 4d-% 4d\n",
2044 channel, slotrank, rn.start, rn.middle, rn.end);
2046 if (rn.all || rn.length < MIN_C320C_LEN) {
2047 FOR_ALL_POPULATED_RANKS {
2048 ctrl->timings[channel][slotrank] =
2049 saved_timings[channel][slotrank];
2051 return MAKE_ERR;
2055 return 0;
2059 * Adjust CMD phase shift and try multiple command rates.
2060 * A command rate of 2T doubles the time needed for address and command decode.
2062 int command_training(ramctr_timing *ctrl)
2064 int channel;
2066 FOR_ALL_POPULATED_CHANNELS {
2067 fill_pattern5(ctrl, channel, 0);
2070 FOR_ALL_POPULATED_CHANNELS {
2071 int cmdrate, err;
2074 * Dual DIMM per channel:
2075 * Issue:
2076 * While command training seems to succeed, raminit will fail in write training.
2078 * Workaround:
2079 * Skip 1T in dual DIMM mode, that's only supported by a few DIMMs.
2080 * Only try 1T mode for XMP DIMMs that request it in dual DIMM mode.
2082 * Single DIMM per channel:
2083 * Try command rate 1T and 2T
2085 cmdrate = ((ctrl->rankmap[channel] & 0x5) == 0x5);
2086 if (ctrl->tCMD)
2087 /* XMP gives the CMD rate in clock ticks, not ns */
2088 cmdrate = MIN(DIV_ROUND_UP(ctrl->tCMD, 256) - 1, 1);
2090 for (; cmdrate < 2; cmdrate++) {
2091 err = try_cmd_stretch(ctrl, channel, cmdrate << 1);
2093 if (!err)
2094 break;
2097 if (err) {
2098 printk(BIOS_EMERG, "Command training failed: %d\n", channel);
2099 return err;
2102 printram("Using CMD rate %uT on channel %u\n", cmdrate + 1, channel);
2105 FOR_ALL_POPULATED_CHANNELS
2106 program_timings(ctrl, channel);
2108 reprogram_320c(ctrl);
2109 return 0;
2112 static int find_read_mpr_margin(ramctr_timing *ctrl, int channel, int slotrank, int *edges)
2114 int dqs_pi;
2115 int stats[NUM_LANES][MAX_EDGE_TIMING + 1];
2116 int lane;
2118 for (dqs_pi = 0; dqs_pi <= MAX_EDGE_TIMING; dqs_pi++) {
2119 FOR_ALL_LANES {
2120 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p = dqs_pi;
2121 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n = dqs_pi;
2123 program_timings(ctrl, channel);
2125 FOR_ALL_LANES {
2126 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel, lane), 0);
2127 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
2130 wait_for_iosav(channel);
2132 iosav_write_read_mpr_sequence(
2133 channel, slotrank, ctrl->tMOD, 500, 4, 1, ctrl->CAS + 8);
2135 iosav_run_once_and_wait(channel);
2137 FOR_ALL_LANES {
2138 stats[lane][dqs_pi] = mchbar_read32(
2139 IOSAV_By_ERROR_COUNT_ch(channel, lane));
2143 FOR_ALL_LANES {
2144 struct run rn = get_longest_zero_run(stats[lane], MAX_EDGE_TIMING + 1);
2145 edges[lane] = rn.middle;
2147 if (rn.all) {
2148 printk(BIOS_EMERG, "Read MPR training failed: %d, %d, %d\n", channel,
2149 slotrank, lane);
2150 return MAKE_ERR;
2152 printram("eval %d, %d, %d: % 4d\n", channel, slotrank, lane, edges[lane]);
2154 return 0;
2157 static void find_predefined_pattern(ramctr_timing *ctrl, const int channel)
2159 int slotrank, lane;
2161 fill_pattern0(ctrl, channel, 0, 0);
2162 FOR_ALL_LANES {
2163 mchbar_write32(IOSAV_By_BW_MASK_ch(channel, lane), 0);
2164 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
2167 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2168 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n = 16;
2169 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p = 16;
2172 program_timings(ctrl, channel);
2174 FOR_ALL_POPULATED_RANKS {
2175 wait_for_iosav(channel);
2177 iosav_write_read_mpr_sequence(
2178 channel, slotrank, ctrl->tMOD, 3, 4, 1, ctrl->CAS + 8);
2180 iosav_run_once_and_wait(channel);
2183 /* XXX: check any measured value ? */
2185 FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2186 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n = 48;
2187 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p = 48;
2190 program_timings(ctrl, channel);
2192 FOR_ALL_POPULATED_RANKS {
2193 wait_for_iosav(channel);
2195 iosav_write_read_mpr_sequence(
2196 channel, slotrank, ctrl->tMOD, 3, 4, 1, ctrl->CAS + 8);
2198 iosav_run_once_and_wait(channel);
2201 /* XXX: check any measured value ? */
2203 FOR_ALL_LANES {
2204 mchbar_write32(IOSAV_By_BW_MASK_ch(channel, lane),
2205 ~mchbar_read32(IOSAV_By_BW_SERROR_ch(channel, lane)) & 0xff);
2209 int read_mpr_training(ramctr_timing *ctrl)
2211 int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2212 int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2213 int channel, slotrank, lane;
2214 int err;
2216 mchbar_write32(GDCRTRAININGMOD, 0);
2218 toggle_io_reset();
2220 FOR_ALL_POPULATED_CHANNELS {
2221 find_predefined_pattern(ctrl, channel);
2223 fill_pattern0(ctrl, channel, 0, 0xffffffff);
2227 * FIXME: Under some conditions, vendor BIOS sets both edges to the same value. It will
2228 * also use a single loop. It would seem that it is a debugging configuration.
2230 mchbar_write32(IOSAV_DC_MASK, 3 << 8);
2231 printram("discover falling edges:\n[%x] = %x\n", IOSAV_DC_MASK, 3 << 8);
2233 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2234 err = find_read_mpr_margin(ctrl, channel, slotrank,
2235 falling_edges[channel][slotrank]);
2236 if (err)
2237 return err;
2240 mchbar_write32(IOSAV_DC_MASK, 2 << 8);
2241 printram("discover rising edges:\n[%x] = %x\n", IOSAV_DC_MASK, 2 << 8);
2243 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2244 err = find_read_mpr_margin(ctrl, channel, slotrank,
2245 rising_edges[channel][slotrank]);
2246 if (err)
2247 return err;
2250 mchbar_write32(IOSAV_DC_MASK, 0);
2252 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2253 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n =
2254 falling_edges[channel][slotrank][lane];
2255 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p =
2256 rising_edges[channel][slotrank][lane];
2259 FOR_ALL_POPULATED_CHANNELS {
2260 program_timings(ctrl, channel);
2263 FOR_ALL_POPULATED_CHANNELS FOR_ALL_LANES {
2264 mchbar_write32(IOSAV_By_BW_MASK_ch(channel, lane), 0);
2266 return 0;
2269 static int find_agrsv_read_margin(ramctr_timing *ctrl, int channel, int slotrank, int *edges)
2271 const int rd_vref_offsets[] = { 0, 0xc, 0x2c };
2273 u32 raw_stats[MAX_EDGE_TIMING + 1];
2274 int lower[NUM_LANES];
2275 int upper[NUM_LANES];
2276 int lane, i, read_pi, pat;
2278 FOR_ALL_LANES {
2279 lower[lane] = 0;
2280 upper[lane] = MAX_EDGE_TIMING;
2283 for (i = 0; i < ARRAY_SIZE(rd_vref_offsets); i++) {
2284 const union gdcr_training_mod_reg training_mod = {
2285 .vref_gen_ctl = rd_vref_offsets[i],
2287 mchbar_write32(GDCRTRAININGMOD_ch(channel), training_mod.raw);
2288 printram("[%x] = 0x%08x\n", GDCRTRAININGMOD_ch(channel), training_mod.raw);
2290 for (pat = 0; pat < NUM_PATTERNS; pat++) {
2291 fill_pattern5(ctrl, channel, pat);
2292 printram("using pattern %d\n", pat);
2294 for (read_pi = 0; read_pi <= MAX_EDGE_TIMING; read_pi++) {
2295 FOR_ALL_LANES {
2296 ctrl->timings[channel][slotrank].lanes[lane]
2297 .rx_dqs_p = read_pi;
2298 ctrl->timings[channel][slotrank].lanes[lane]
2299 .rx_dqs_n = read_pi;
2301 program_timings(ctrl, channel);
2303 FOR_ALL_LANES {
2304 mchbar_write32(IOSAV_By_ERROR_COUNT_ch(channel, lane),
2306 mchbar_read32(IOSAV_By_BW_SERROR_C_ch(channel, lane));
2308 wait_for_iosav(channel);
2310 iosav_write_data_write_sequence(ctrl, channel, slotrank);
2312 iosav_run_once_and_wait(channel);
2314 FOR_ALL_LANES {
2315 mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel, lane));
2318 /* FIXME: This register only exists on Ivy Bridge */
2319 raw_stats[read_pi] = mchbar_read32(
2320 IOSAV_BYTE_SERROR_C_ch(channel));
2323 FOR_ALL_LANES {
2324 int stats[MAX_EDGE_TIMING + 1];
2325 struct run rn;
2327 for (read_pi = 0; read_pi <= MAX_EDGE_TIMING; read_pi++)
2328 stats[read_pi] = !!(raw_stats[read_pi] & (1 << lane));
2330 rn = get_longest_zero_run(stats, MAX_EDGE_TIMING + 1);
2332 printram("edges: %d, %d, %d: % 4d-% 4d-% 4d, "
2333 "% 4d-% 4d\n", channel, slotrank, i, rn.start,
2334 rn.middle, rn.end, rn.start + ctrl->edge_offset[i],
2335 rn.end - ctrl->edge_offset[i]);
2337 lower[lane] = MAX(rn.start + ctrl->edge_offset[i], lower[lane]);
2338 upper[lane] = MIN(rn.end - ctrl->edge_offset[i], upper[lane]);
2340 edges[lane] = (lower[lane] + upper[lane]) / 2;
2341 if (rn.all || (lower[lane] > upper[lane])) {
2342 printk(BIOS_EMERG, "Aggressive read training failed: "
2343 "%d, %d, %d\n", channel, slotrank, lane);
2345 return MAKE_ERR;
2351 /* Restore nominal Vref after training */
2352 mchbar_write32(GDCRTRAININGMOD_ch(channel), 0);
2353 printram("CPA\n");
2354 return 0;
2357 int aggressive_read_training(ramctr_timing *ctrl)
2359 int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2360 int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2361 int channel, slotrank, lane, err;
2364 * FIXME: Under some conditions, vendor BIOS sets both edges to the same value. It will
2365 * also use a single loop. It would seem that it is a debugging configuration.
2367 mchbar_write32(IOSAV_DC_MASK, 3 << 8);
2368 printram("discover falling edges aggressive:\n[%x] = %x\n", IOSAV_DC_MASK, 3 << 8);
2370 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2371 err = find_agrsv_read_margin(ctrl, channel, slotrank,
2372 falling_edges[channel][slotrank]);
2373 if (err)
2374 return err;
2377 mchbar_write32(IOSAV_DC_MASK, 2 << 8);
2378 printram("discover rising edges aggressive:\n[%x] = %x\n", IOSAV_DC_MASK, 2 << 8);
2380 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2381 err = find_agrsv_read_margin(ctrl, channel, slotrank,
2382 rising_edges[channel][slotrank]);
2383 if (err)
2384 return err;
2387 mchbar_write32(IOSAV_DC_MASK, 0);
2389 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2390 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_n =
2391 falling_edges[channel][slotrank][lane];
2393 ctrl->timings[channel][slotrank].lanes[lane].rx_dqs_p =
2394 rising_edges[channel][slotrank][lane];
2397 FOR_ALL_POPULATED_CHANNELS
2398 program_timings(ctrl, channel);
2400 return 0;
2403 static void test_aggressive_write(ramctr_timing *ctrl, int channel, int slotrank)
2405 wait_for_iosav(channel);
2407 iosav_write_aggressive_write_read_sequence(ctrl, channel, slotrank);
2409 iosav_run_once_and_wait(channel);
2412 static void set_write_vref(const int channel, const u8 wr_vref)
2414 mchbar_clrsetbits32(GDCRCMDDEBUGMUXCFG_Cz_S(channel), 0x3f << 24, wr_vref << 24);
2415 udelay(2);
2418 int aggressive_write_training(ramctr_timing *ctrl)
2420 const u8 wr_vref_offsets[3] = { 0, 0x0f, 0x2f };
2421 int i, pat;
2423 int lower[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2424 int upper[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
2425 int channel, slotrank, lane;
2427 /* Changing the write Vref is only supported on some Ivy Bridge SKUs */
2428 if (!IS_IVY_CPU(ctrl->cpu))
2429 return 0;
2431 if (!(pci_read_config32(HOST_BRIDGE, CAPID0_A) & CAPID_WRTVREF))
2432 return 0;
2434 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2435 lower[channel][slotrank][lane] = 0;
2436 upper[channel][slotrank][lane] = MAX_TX_DQ;
2439 /* Only enable IOSAV_n_SPECIAL_COMMAND_ADDR optimization on later steppings */
2440 const bool enable_iosav_opt = IS_IVY_CPU_D(ctrl->cpu) || IS_IVY_CPU_E(ctrl->cpu);
2442 if (enable_iosav_opt)
2443 mchbar_write32(MCMNTS_SPARE, 1);
2445 printram("Aggressive write training:\n");
2447 for (i = 0; i < ARRAY_SIZE(wr_vref_offsets); i++) {
2448 FOR_ALL_POPULATED_CHANNELS {
2449 set_write_vref(channel, wr_vref_offsets[i]);
2451 for (pat = 0; pat < NUM_PATTERNS; pat++) {
2452 FOR_ALL_POPULATED_RANKS {
2453 int tx_dq;
2454 u32 raw_stats[MAX_TX_DQ + 1];
2455 int stats[MAX_TX_DQ + 1];
2457 /* Make sure rn.start < rn.end */
2458 stats[MAX_TX_DQ] = 1;
2460 fill_pattern5(ctrl, channel, pat);
2462 for (tx_dq = 0; tx_dq < MAX_TX_DQ; tx_dq++) {
2463 FOR_ALL_LANES {
2464 ctrl->timings[channel][slotrank]
2465 .lanes[lane].tx_dq = tx_dq;
2467 program_timings(ctrl, channel);
2469 test_aggressive_write(ctrl, channel, slotrank);
2471 raw_stats[tx_dq] = mchbar_read32(
2472 IOSAV_BYTE_SERROR_C_ch(channel));
2474 FOR_ALL_LANES {
2475 struct run rn;
2476 for (tx_dq = 0; tx_dq < MAX_TX_DQ; tx_dq++) {
2477 stats[tx_dq] = !!(raw_stats[tx_dq]
2478 & (1 << lane));
2481 rn = get_longest_zero_run(stats, MAX_TX_DQ + 1);
2482 if (rn.all) {
2483 printk(BIOS_EMERG, "Aggressive "
2484 "write training failed: "
2485 "%d, %d, %d\n", channel,
2486 slotrank, lane);
2488 return MAKE_ERR;
2490 printram("tx_dq: %d, %d, %d: "
2491 "% 4d-% 4d-% 4d, "
2492 "% 4d-% 4d\n", channel, slotrank,
2493 i, rn.start, rn.middle, rn.end,
2494 rn.start + ctrl->tx_dq_offset[i],
2495 rn.end - ctrl->tx_dq_offset[i]);
2497 lower[channel][slotrank][lane] =
2498 MAX(rn.start + ctrl->tx_dq_offset[i],
2499 lower[channel][slotrank][lane]);
2501 upper[channel][slotrank][lane] =
2502 MIN(rn.end - ctrl->tx_dq_offset[i],
2503 upper[channel][slotrank][lane]);
2511 FOR_ALL_CHANNELS {
2512 /* Restore nominal write Vref after training */
2513 set_write_vref(channel, 0);
2516 /* Disable IOSAV_n_SPECIAL_COMMAND_ADDR optimization */
2517 if (enable_iosav_opt)
2518 mchbar_write32(MCMNTS_SPARE, 0);
2520 printram("CPB\n");
2522 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES {
2523 printram("tx_dq %d, %d, %d: % 4d\n", channel, slotrank, lane,
2524 (lower[channel][slotrank][lane] +
2525 upper[channel][slotrank][lane]) / 2);
2527 ctrl->timings[channel][slotrank].lanes[lane].tx_dq =
2528 (lower[channel][slotrank][lane] +
2529 upper[channel][slotrank][lane]) / 2;
2531 FOR_ALL_POPULATED_CHANNELS {
2532 program_timings(ctrl, channel);
2534 return 0;
2537 void normalize_training(ramctr_timing *ctrl)
2539 int channel, slotrank, lane;
2540 int mat;
2542 FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS {
2543 int delta;
2544 mat = 0;
2545 FOR_ALL_LANES mat =
2546 MAX(ctrl->timings[channel][slotrank].lanes[lane].rcven, mat);
2547 printram("normalize %d, %d, %d: mat %d\n",
2548 channel, slotrank, lane, mat);
2550 delta = (mat >> 6) - ctrl->timings[channel][slotrank].io_latency;
2551 printram("normalize %d, %d, %d: delta %d\n",
2552 channel, slotrank, lane, delta);
2554 ctrl->timings[channel][slotrank].roundtrip_latency += delta;
2555 ctrl->timings[channel][slotrank].io_latency += delta;
2558 FOR_ALL_POPULATED_CHANNELS {
2559 program_timings(ctrl, channel);
2563 int channel_test(ramctr_timing *ctrl)
2565 int channel, slotrank, lane;
2567 slotrank = 0;
2568 FOR_ALL_POPULATED_CHANNELS
2569 if (mchbar_read32(MC_INIT_STATE_ch(channel)) & 0xa000) {
2570 printk(BIOS_EMERG, "Mini channel test failed (1): %d\n", channel);
2571 return MAKE_ERR;
2573 FOR_ALL_POPULATED_CHANNELS {
2574 fill_pattern0(ctrl, channel, 0x12345678, 0x98765432);
2577 for (slotrank = 0; slotrank < 4; slotrank++)
2578 FOR_ALL_CHANNELS
2579 if (ctrl->rankmap[channel] & (1 << slotrank)) {
2580 FOR_ALL_LANES {
2581 mchbar_write32(IOSAV_By_ERROR_COUNT(lane), 0);
2582 mchbar_write32(IOSAV_By_BW_SERROR_C(lane), 0);
2584 wait_for_iosav(channel);
2586 iosav_write_memory_test_sequence(ctrl, channel, slotrank);
2588 iosav_run_once_and_wait(channel);
2590 FOR_ALL_LANES
2591 if (mchbar_read32(IOSAV_By_ERROR_COUNT_ch(channel, lane))) {
2592 printk(BIOS_EMERG, "Mini channel test failed (2): %d, %d, %d\n",
2593 channel, slotrank, lane);
2594 return MAKE_ERR;
2597 return 0;
2600 void channel_scrub(ramctr_timing *ctrl)
2602 int channel, slotrank, row, rowsize;
2603 u8 bank;
2605 FOR_ALL_POPULATED_CHANNELS {
2606 wait_for_iosav(channel);
2607 fill_pattern0(ctrl, channel, 0, 0);
2611 * During runtime the "scrubber" will periodically scan through the memory in the
2612 * physical address space, to identify and fix CRC errors.
2613 * The following loops writes to every DRAM address, setting the ECC bits to the
2614 * correct value. A read from this location will no longer return a CRC error,
2615 * except when a bit has toggled due to external events.
2616 * The same could be achieved by writing to the physical memory map, but it's
2617 * much more difficult due to SMM remapping, ME stolen memory, GFX stolen memory,
2618 * and firmware running in x86_32.
2620 FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS {
2621 rowsize = 1 << ctrl->info.dimm[channel][slotrank >> 1].row_bits;
2622 for (bank = 0; bank < 8; bank++) {
2623 for (row = 0; row < rowsize; row += 16) {
2625 u8 gap = MAX((ctrl->tFAW >> 2) + 1, ctrl->tRRD);
2626 const struct iosav_ssq sequence[] = {
2628 * DRAM command ACT
2629 * Opens the row for writing.
2631 [0] = {
2632 .sp_cmd_ctrl = {
2633 .command = IOSAV_ACT,
2634 .ranksel_ap = 1,
2636 .subseq_ctrl = {
2637 .cmd_executions = 1,
2638 .cmd_delay_gap = gap,
2639 .post_ssq_wait = ctrl->tRCD,
2640 .data_direction = SSQ_NA,
2642 .sp_cmd_addr = {
2643 .address = row,
2644 .rowbits = 6,
2645 .bank = bank,
2646 .rank = slotrank,
2648 .addr_update = {
2649 .inc_addr_1 = 1,
2650 .addr_wrap = 18,
2654 * DRAM command WR
2655 * Writes (128 + 1) * 8 (burst length) * 8 (bus width)
2656 * bytes.
2658 [1] = {
2659 .sp_cmd_ctrl = {
2660 .command = IOSAV_WR,
2661 .ranksel_ap = 1,
2663 .subseq_ctrl = {
2664 .cmd_executions = 129,
2665 .cmd_delay_gap = 4,
2666 .post_ssq_wait = ctrl->tWTR +
2667 ctrl->CWL + 8,
2668 .data_direction = SSQ_WR,
2670 .sp_cmd_addr = {
2671 .address = row,
2672 .rowbits = 0,
2673 .bank = bank,
2674 .rank = slotrank,
2676 .addr_update = {
2677 .inc_addr_8 = 1,
2678 .addr_wrap = 9,
2682 * DRAM command PRE
2683 * Closes the row.
2685 [2] = {
2686 .sp_cmd_ctrl = {
2687 .command = IOSAV_PRE,
2688 .ranksel_ap = 1,
2690 .subseq_ctrl = {
2691 .cmd_executions = 1,
2692 .cmd_delay_gap = 4,
2693 .post_ssq_wait = ctrl->tRP,
2694 .data_direction = SSQ_NA,
2696 .sp_cmd_addr = {
2697 .address = 0,
2698 .rowbits = 6,
2699 .bank = bank,
2700 .rank = slotrank,
2702 .addr_update = {
2703 .addr_wrap = 18,
2707 iosav_write_sequence(channel, sequence, ARRAY_SIZE(sequence));
2709 iosav_run_queue(channel, 16, 0);
2711 wait_for_iosav(channel);
2717 void set_scrambling_seed(ramctr_timing *ctrl)
2719 int channel;
2721 /* FIXME: we hardcode seeds. Do we need to use some PRNG for them? I don't think so. */
2722 static u32 seeds[NUM_CHANNELS][3] = {
2723 {0x00009a36, 0xbafcfdcf, 0x46d1ab68},
2724 {0x00028bfa, 0x53fe4b49, 0x19ed5483}
2726 FOR_ALL_POPULATED_CHANNELS {
2727 mchbar_clrbits32(SCHED_CBIT_ch(channel), 1 << 28);
2728 mchbar_write32(SCRAMBLING_SEED_1_ch(channel), seeds[channel][0]);
2729 mchbar_write32(SCRAMBLING_SEED_2_HI_ch(channel), seeds[channel][1]);
2730 mchbar_write32(SCRAMBLING_SEED_2_LO_ch(channel), seeds[channel][2]);
2734 void set_wmm_behavior(const u32 cpu)
2736 if (IS_SANDY_CPU(cpu) && (IS_SANDY_CPU_D0(cpu) || IS_SANDY_CPU_D1(cpu))) {
2737 mchbar_write32(SC_WDBWM, 0x141d1519);
2738 } else {
2739 mchbar_write32(SC_WDBWM, 0x551d1519);
2743 void prepare_training(ramctr_timing *ctrl)
2745 int channel;
2747 FOR_ALL_POPULATED_CHANNELS {
2748 /* Always drive command bus */
2749 mchbar_setbits32(TC_RAP_ch(channel), 1 << 29);
2752 udelay(1);
2754 FOR_ALL_POPULATED_CHANNELS {
2755 wait_for_iosav(channel);
2759 void set_read_write_timings(ramctr_timing *ctrl)
2761 /* Use a larger delay when running fast to improve stability */
2762 const u32 tRWDRDD_inc = ctrl->tCK <= TCK_1066MHZ ? 4 : 2;
2764 int channel, slotrank;
2766 FOR_ALL_POPULATED_CHANNELS {
2767 int min_pi = 10000;
2768 int max_pi = -10000;
2770 FOR_ALL_POPULATED_RANKS {
2771 max_pi = MAX(ctrl->timings[channel][slotrank].pi_coding, max_pi);
2772 min_pi = MIN(ctrl->timings[channel][slotrank].pi_coding, min_pi);
2775 const u32 tWRDRDD = (max_pi - min_pi > 51) ? 0 : ctrl->ref_card_offset[channel];
2777 const u32 val = (ctrl->pi_coding_threshold < max_pi - min_pi) ? 3 : 2;
2779 dram_odt_stretch(ctrl, channel);
2781 const union tc_rwp_reg tc_rwp = {
2782 .tRRDR = 0,
2783 .tRRDD = val,
2784 .tWWDR = val,
2785 .tWWDD = val,
2786 .tRWDRDD = ctrl->ref_card_offset[channel] + tRWDRDD_inc,
2787 .tWRDRDD = tWRDRDD,
2788 .tRWSR = 2,
2789 .dec_wrd = 1,
2791 mchbar_write32(TC_RWP_ch(channel), tc_rwp.raw);
2795 void set_normal_operation(ramctr_timing *ctrl)
2797 int channel;
2798 FOR_ALL_POPULATED_CHANNELS {
2799 mchbar_write32(MC_INIT_STATE_ch(channel), 1 << 12 | ctrl->rankmap[channel]);
2800 mchbar_clrbits32(TC_RAP_ch(channel), 1 << 29);
2804 /* Encode the watermark latencies in a suitable format for graphics drivers consumption */
2805 static int encode_wm(int ns)
2807 return (ns + 499) / 500;
2810 /* FIXME: values in this function should be hardware revision-dependent */
2811 void final_registers(ramctr_timing *ctrl)
2813 int channel;
2814 int t1_cycles = 0, t1_ns = 0, t2_ns;
2815 int t3_ns;
2816 u32 r32;
2818 /* FIXME: This register only exists on Ivy Bridge */
2819 mchbar_write32(WMM_READ_CONFIG, 0x46);
2821 FOR_ALL_CHANNELS {
2822 union tc_othp_reg tc_othp = {
2823 .raw = mchbar_read32(TC_OTHP_ch(channel)),
2825 tc_othp.tCPDED = 1;
2826 mchbar_write32(TC_OTHP_ch(channel), tc_othp.raw);
2828 /* 64 DCLKs until idle, decision per rank */
2829 r32 = get_power_down_mode(ctrl, channel) << 8 | 64;
2830 mchbar_write32(PM_PDWN_CONFIG_ch(channel), r32);
2832 mchbar_write32(PM_TRML_M_CONFIG_ch(channel), 0x00000aaa);
2835 mchbar_write32(PM_BW_LIMIT_CONFIG, 0x5f7003ff);
2836 mchbar_write32(PM_DLL_CONFIG, 0x00073000 | ctrl->mdll_wake_delay);
2838 FOR_ALL_CHANNELS {
2839 switch (ctrl->rankmap[channel]) {
2840 /* Unpopulated channel */
2841 case 0:
2842 mchbar_write32(PM_CMD_PWR_ch(channel), 0);
2843 break;
2844 /* Only single-ranked dimms */
2845 case 1:
2846 case 4:
2847 case 5:
2848 mchbar_write32(PM_CMD_PWR_ch(channel), 0x00373131);
2849 break;
2850 /* Dual-ranked dimms present */
2851 default:
2852 mchbar_write32(PM_CMD_PWR_ch(channel), 0x009b6ea1);
2853 break;
2857 mchbar_write32(MEM_TRML_ESTIMATION_CONFIG, 0xca9171e5);
2858 mchbar_clrsetbits32(MEM_TRML_THRESHOLDS_CONFIG, 0x00ffffff, 0x00e4d5d0);
2859 mchbar_clrbits32(MEM_TRML_INTERRUPT, 0x1f);
2861 FOR_ALL_CHANNELS {
2862 union tc_rfp_reg tc_rfp = {
2863 .raw = mchbar_read32(TC_RFP_ch(channel)),
2865 tc_rfp.refresh_2x_control = 1;
2866 mchbar_write32(TC_RFP_ch(channel), tc_rfp.raw);
2869 mchbar_setbits32(MC_INIT_STATE_G, 1 << 0);
2870 mchbar_setbits32(MC_INIT_STATE_G, 1 << 7);
2871 mchbar_write32(BANDTIMERS_SNB, 0xfa);
2873 /* Find a populated channel */
2874 FOR_ALL_POPULATED_CHANNELS
2875 break;
2877 t1_cycles = (mchbar_read32(TC_ZQCAL_ch(channel)) >> 8) & 0xff;
2878 r32 = mchbar_read32(PM_DLL_CONFIG);
2879 if (r32 & (1 << 17))
2880 t1_cycles += (r32 & 0xfff);
2881 t1_cycles += mchbar_read32(TC_SRFTP_ch(channel)) & 0xfff;
2882 t1_ns = t1_cycles * ctrl->tCK / 256 + 544;
2883 if (!(r32 & (1 << 17)))
2884 t1_ns += 500;
2886 t2_ns = 10 * ((mchbar_read32(SAPMTIMERS) >> 8) & 0xfff);
2887 if (mchbar_read32(SAPMCTL) & 8) {
2888 t3_ns = 10 * ((mchbar_read32(BANDTIMERS_IVB) >> 8) & 0xfff);
2889 t3_ns += 10 * (mchbar_read32(SAPMTIMERS2_IVB) & 0xff);
2890 } else {
2891 t3_ns = 500;
2894 /* The graphics driver will use these watermark values */
2895 printk(BIOS_DEBUG, "t123: %d, %d, %d\n", t1_ns, t2_ns, t3_ns);
2896 mchbar_clrsetbits32(SSKPD, 0x3f3f3f3f,
2897 ((encode_wm(t1_ns) + encode_wm(t2_ns)) << 16) | (encode_wm(t1_ns) << 8) |
2898 ((encode_wm(t3_ns) + encode_wm(t2_ns) + encode_wm(t1_ns)) << 24) | 0x0c);
2901 void restore_timings(ramctr_timing *ctrl)
2903 int channel;
2905 FOR_ALL_POPULATED_CHANNELS {
2906 const union tc_rap_reg tc_rap = {
2907 .tRRD = ctrl->tRRD,
2908 .tRTP = ctrl->tRTP,
2909 .tCKE = ctrl->tCKE,
2910 .tWTR = ctrl->tWTR,
2911 .tFAW = ctrl->tFAW,
2912 .tWR = ctrl->tWR,
2913 .tCMD = ctrl->cmd_stretch[channel],
2915 mchbar_write32(TC_RAP_ch(channel), tc_rap.raw);
2918 udelay(1);
2920 FOR_ALL_POPULATED_CHANNELS {
2921 wait_for_iosav(channel);
2924 FOR_ALL_POPULATED_CHANNELS
2925 mchbar_setbits32(TC_RWP_ch(channel), 1 << 27);
2927 FOR_ALL_POPULATED_CHANNELS {
2928 udelay(1);
2929 mchbar_setbits32(SCHED_CBIT_ch(channel), 1 << 21);
2932 printram("CPE\n");
2934 mchbar_write32(GDCRTRAININGMOD, 0);
2935 mchbar_write32(IOSAV_DC_MASK, 0);
2937 printram("CP5b\n");
2939 FOR_ALL_POPULATED_CHANNELS {
2940 program_timings(ctrl, channel);
2943 u32 reg, addr;
2945 /* Poll for RCOMP */
2946 while (!(mchbar_read32(RCOMP_TIMER) & (1 << 16)))
2949 do {
2950 reg = mchbar_read32(IOSAV_STATUS_ch(0));
2951 } while ((reg & 0x14) == 0);
2953 /* Set state of memory controller */
2954 mchbar_write32(MC_INIT_STATE_G, 0x116);
2955 mchbar_write32(MC_INIT_STATE, 0);
2957 /* Wait 500us */
2958 udelay(500);
2960 FOR_ALL_CHANNELS {
2961 /* Set valid rank CKE */
2962 reg = 0;
2963 reg = (reg & ~0x0f) | ctrl->rankmap[channel];
2964 addr = MC_INIT_STATE_ch(channel);
2965 mchbar_write32(addr, reg);
2967 /* Wait 10ns for ranks to settle */
2968 // udelay(0.01);
2970 reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4);
2971 mchbar_write32(addr, reg);
2973 /* Write reset using a NOP */
2974 write_reset(ctrl);
2977 /* MRS commands */
2978 dram_mrscommands(ctrl);
2980 printram("CP5c\n");
2982 mchbar_write32(GDCRTRAININGMOD_ch(0), 0);
2984 FOR_ALL_CHANNELS {
2985 mchbar_clrbits32(GDCRCMDDEBUGMUXCFG_Cz_S(channel), 0x3f << 24);
2986 udelay(2);