2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
9 #include <linux/module.h>
10 #include <linux/init.h>
11 #include <linux/delay.h>
12 #include <linux/kernel.h>
13 #include <linux/kdev_t.h>
14 #include <linux/string.h>
15 #include <linux/tty.h>
16 #include <linux/console.h>
17 #include <linux/timex.h>
18 #include <linux/sched.h>
19 #include <linux/ioport.h>
21 #include <linux/serial.h>
22 #include <linux/irq.h>
23 #include <linux/bootmem.h>
24 #include <linux/mmzone.h>
25 #include <linux/interrupt.h>
26 #include <linux/acpi.h>
27 #include <linux/compiler.h>
28 #include <linux/sched.h>
29 #include <linux/root_dev.h>
30 #include <linux/nodemask.h>
32 #include <linux/efi.h>
36 #include <asm/machvec.h>
37 #include <asm/system.h>
38 #include <asm/processor.h>
40 #include <asm/sn/arch.h>
41 #include <asm/sn/addrs.h>
42 #include <asm/sn/pda.h>
43 #include <asm/sn/nodepda.h>
44 #include <asm/sn/sn_cpuid.h>
45 #include <asm/sn/simulator.h>
46 #include <asm/sn/leds.h>
47 #include <asm/sn/bte.h>
48 #include <asm/sn/shub_mmr.h>
49 #include <asm/sn/clksupport.h>
50 #include <asm/sn/sn_sal.h>
51 #include <asm/sn/geo.h>
52 #include <asm/sn/sn_feature_sets.h>
53 #include "xtalk/xwidgetdev.h"
54 #include "xtalk/hubdev.h"
55 #include <asm/sn/klconfig.h>
58 DEFINE_PER_CPU(struct pda_s
, pda_percpu
);
60 #define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */
62 extern void bte_init_node(nodepda_t
*, cnodeid_t
);
64 extern void sn_timer_init(void);
65 extern unsigned long last_time_offset
;
66 extern void (*ia64_mark_idle
) (int);
67 extern void snidle(int);
68 extern unsigned char acpi_kbd_controller_present
;
69 extern unsigned long long (*ia64_printk_clock
)(void);
71 unsigned long sn_rtc_cycles_per_second
;
72 EXPORT_SYMBOL(sn_rtc_cycles_per_second
);
74 DEFINE_PER_CPU(struct sn_hub_info_s
, __sn_hub_info
);
75 EXPORT_PER_CPU_SYMBOL(__sn_hub_info
);
77 DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid
[MAX_COMPACT_NODES
]);
78 EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid
);
80 DEFINE_PER_CPU(struct nodepda_s
*, __sn_nodepda
);
81 EXPORT_PER_CPU_SYMBOL(__sn_nodepda
);
83 char sn_system_serial_number_string
[128];
84 EXPORT_SYMBOL(sn_system_serial_number_string
);
85 u64 sn_partition_serial_number
;
86 EXPORT_SYMBOL(sn_partition_serial_number
);
88 EXPORT_SYMBOL(sn_partition_id
);
90 EXPORT_SYMBOL(sn_system_size
);
91 u8 sn_sharing_domain_size
;
92 EXPORT_SYMBOL(sn_sharing_domain_size
);
94 EXPORT_SYMBOL(sn_coherency_id
);
96 EXPORT_SYMBOL(sn_region_size
);
97 int sn_prom_type
; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
99 short physical_node_map
[MAX_NUMALINK_NODES
];
100 static unsigned long sn_prom_features
[MAX_PROM_FEATURE_SETS
];
102 EXPORT_SYMBOL(physical_node_map
);
106 static void sn_init_pdas(char **);
107 static void build_cnode_tables(void);
109 static nodepda_t
*nodepdaindr
[MAX_COMPACT_NODES
];
112 * The format of "screen_info" is strange, and due to early i386-setup
113 * code. This is just enough to make the console code think we're on a
116 struct screen_info sn_screen_info
= {
119 .orig_video_mode
= 3,
120 .orig_video_cols
= 80,
121 .orig_video_ega_bx
= 3,
122 .orig_video_lines
= 25,
123 .orig_video_isVGA
= 1,
124 .orig_video_points
= 16
128 * This routine can only be used during init, since
129 * smp_boot_data is an init data structure.
130 * We have to use smp_boot_data.cpu_phys_id to find
131 * the physical id of the processor because the normal
132 * cpu_physical_id() relies on data structures that
133 * may not be initialized yet.
136 static int __init
pxm_to_nasid(int pxm
)
141 nid
= pxm_to_node(pxm
);
142 for (i
= 0; i
< num_node_memblks
; i
++) {
143 if (node_memblk
[i
].nid
== nid
) {
144 return NASID_GET(node_memblk
[i
].start_paddr
);
151 * early_sn_setup - early setup routine for SN platforms
153 * Sets up an initial console to aid debugging. Intended primarily
154 * for bringup. See start_kernel() in init/main.c.
157 void __init
early_sn_setup(void)
159 efi_system_table_t
*efi_systab
;
160 efi_config_table_t
*config_tables
;
161 struct ia64_sal_systab
*sal_systab
;
162 struct ia64_sal_desc_entry_point
*ep
;
167 * Parse enough of the SAL tables to locate the SAL entry point. Since, console
168 * IO on SN2 is done via SAL calls, early_printk won't work without this.
170 * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
171 * Any changes to those file may have to be made hereas well.
173 efi_systab
= (efi_system_table_t
*) __va(ia64_boot_param
->efi_systab
);
174 config_tables
= __va(efi_systab
->tables
);
175 for (i
= 0; i
< efi_systab
->nr_tables
; i
++) {
176 if (efi_guidcmp(config_tables
[i
].guid
, SAL_SYSTEM_TABLE_GUID
) ==
178 sal_systab
= __va(config_tables
[i
].table
);
179 p
= (char *)(sal_systab
+ 1);
180 for (j
= 0; j
< sal_systab
->entry_count
; j
++) {
181 if (*p
== SAL_DESC_ENTRY_POINT
) {
182 ep
= (struct ia64_sal_desc_entry_point
184 ia64_sal_handler_init(__va
189 p
+= SAL_DESC_SIZE(*p
);
193 /* Uh-oh, SAL not available?? */
194 printk(KERN_ERR
"failed to find SAL entry point\n");
197 extern int platform_intr_list
[];
198 static int __initdata shub_1_1_found
;
203 * Set flag for enabling shub specific wars
206 static inline int __init
is_shub_1_1(int nasid
)
213 id
= REMOTE_HUB_L(nasid
, SH1_SHUB_ID
);
214 rev
= (id
& SH1_SHUB_ID_REVISION_MASK
) >> SH1_SHUB_ID_REVISION_SHFT
;
218 static void __init
sn_check_for_wars(void)
225 for_each_online_node(cnode
) {
226 if (is_shub_1_1(cnodeid_to_nasid(cnode
)))
233 * Scan the EFI PCDP table (if it exists) for an acceptable VGA console
234 * output device. If one exists, pick it and set sn_legacy_{io,mem} to
235 * reflect the bus offsets needed to address it.
237 * Since pcdp support in SN is not supported in the 2.4 kernel (or at least
238 * the one lbs is based on) just declare the needed structs here.
240 * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf
242 * Returns 0 if no acceptable vga is found, !0 otherwise.
244 * Note: This stuff is duped here because Altix requires the PCDP to
245 * locate a usable VGA device due to lack of proper ACPI support. Structures
246 * could be used from drivers/firmware/pcdp.h, but it was decided that moving
247 * this file to a more public location just for Altix use was undesireable.
250 struct hcdp_uart_desc
{
255 u8 signature
[4]; /* should be 'HCDP' */
257 u8 rev
; /* should be >=3 for pcdp, <3 for hcdp */
265 struct hcdp_uart_desc uart
[0]; /* num_type0 of these */
266 /* pcdp descriptors follow */
267 } __attribute__((packed
));
269 struct pcdp_device_desc
{
274 /* interconnect specific structure follows */
275 /* device specific structure follows that */
276 } __attribute__((packed
));
278 struct pcdp_interface_pci
{
279 u8 type
; /* 1 == pci */
293 } __attribute__((packed
));
295 struct pcdp_vga_device
{
297 /* ACPI Extended Address Space Desc follows */
298 } __attribute__((packed
));
300 /* from pcdp_device_desc.primary */
301 #define PCDP_PRIMARY_CONSOLE 0x01
303 /* from pcdp_device_desc.type */
304 #define PCDP_CONSOLE_INOUT 0x0
305 #define PCDP_CONSOLE_DEBUG 0x1
306 #define PCDP_CONSOLE_OUT 0x2
307 #define PCDP_CONSOLE_IN 0x3
308 #define PCDP_CONSOLE_TYPE_VGA 0x8
310 #define PCDP_CONSOLE_VGA (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT)
312 /* from pcdp_interface_pci.type */
313 #define PCDP_IF_PCI 1
315 /* from pcdp_interface_pci.translation */
316 #define PCDP_PCI_TRANS_IOPORT 0x02
317 #define PCDP_PCI_TRANS_MMIO 0x01
319 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
325 struct pcdp_device_desc device
;
326 struct pcdp_interface_pci if_pci
;
327 extern struct efi efi
;
329 if (efi
.hcdp
== EFI_INVALID_TABLE_ADDR
)
330 return; /* no hcdp/pcdp table */
332 pcdp
= __va(efi
.hcdp
);
335 return; /* only support PCDP (rev >= 3) */
337 for (bp
= (u8
*)&pcdp
->uart
[pcdp
->num_type0
];
338 bp
< (u8
*)pcdp
+ pcdp
->length
;
339 bp
+= device
.length
) {
340 memcpy(&device
, bp
, sizeof(device
));
341 if (! (device
.primary
& PCDP_PRIMARY_CONSOLE
))
342 continue; /* not primary console */
344 if (device
.type
!= PCDP_CONSOLE_VGA
)
345 continue; /* not VGA descriptor */
347 memcpy(&if_pci
, bp
+sizeof(device
), sizeof(if_pci
));
348 if (if_pci
.type
!= PCDP_IF_PCI
)
349 continue; /* not PCI interconnect */
351 if (if_pci
.translation
& PCDP_PCI_TRANS_IOPORT
)
353 if_pci
.ioport_tra
| __IA64_UNCACHED_OFFSET
;
355 if (if_pci
.translation
& PCDP_PCI_TRANS_MMIO
)
356 vga_console_membase
=
357 if_pci
.mmio_tra
| __IA64_UNCACHED_OFFSET
;
359 break; /* once we find the primary, we're done */
364 static unsigned long sn2_rtc_initial
;
366 static unsigned long long ia64_sn2_printk_clock(void)
368 unsigned long rtc_now
= rtc_time();
370 return (rtc_now
- sn2_rtc_initial
) *
371 (1000000000 / sn_rtc_cycles_per_second
);
375 * sn_setup - SN platform setup routine
376 * @cmdline_p: kernel command line
378 * Handles platform setup for SN machines. This includes determining
379 * the RTC frequency (via a SAL call), initializing secondary CPUs, and
380 * setting up per-node data areas. The console is also initialized here.
382 void __init
sn_setup(char **cmdline_p
)
384 long status
, ticks_per_sec
, drift
;
385 u32 version
= sn_sal_rev();
386 extern void sn_cpu_init(void);
388 sn2_rtc_initial
= rtc_time();
389 ia64_sn_plat_set_error_handling_features(); // obsolete
390 ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV
);
391 ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES
);
394 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
396 * Handle SN vga console.
398 * SN systems do not have enough ACPI table information
399 * being passed from prom to identify VGA adapters and the legacy
400 * addresses to access them. Until that is done, SN systems rely
401 * on the PCDP table to identify the primary VGA console if one
404 * However, kernel PCDP support is optional, and even if it is built
405 * into the kernel, it will not be used if the boot cmdline contains
406 * console= directives.
408 * So, to work around this mess, we duplicate some of the PCDP code
409 * here so that the primary VGA console (as defined by PCDP) will
410 * work on SN systems even if a different console (e.g. serial) is
411 * selected on the boot line (or CONFIG_EFI_PCDP is off).
414 if (! vga_console_membase
)
417 if (vga_console_membase
) {
418 /* usable vga ... make tty0 the preferred default console */
419 if (!strstr(*cmdline_p
, "console="))
420 add_preferred_console("tty", 0, NULL
);
422 printk(KERN_DEBUG
"SGI: Disabling VGA console\n");
423 if (!strstr(*cmdline_p
, "console="))
424 add_preferred_console("ttySG", 0, NULL
);
425 #ifdef CONFIG_DUMMY_CONSOLE
426 conswitchp
= &dummy_con
;
429 #endif /* CONFIG_DUMMY_CONSOLE */
431 #endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
433 MAX_DMA_ADDRESS
= PAGE_OFFSET
+ MAX_PHYS_MEMORY
;
436 * Build the tables for managing cnodes.
438 build_cnode_tables();
441 ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK
, &ticks_per_sec
,
443 if (status
!= 0 || ticks_per_sec
< 100000) {
445 "unable to determine platform RTC clock frequency, guessing.\n");
446 /* PROM gives wrong value for clock freq. so guess */
447 sn_rtc_cycles_per_second
= 1000000000000UL / 30000UL;
449 sn_rtc_cycles_per_second
= ticks_per_sec
;
451 platform_intr_list
[ACPI_INTERRUPT_CPEI
] = IA64_CPE_VECTOR
;
453 ia64_printk_clock
= ia64_sn2_printk_clock
;
456 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
457 * support here so we don't have to listen to failed keyboard probe
460 if (is_shub1() && version
<= 0x0209 && acpi_kbd_controller_present
) {
461 printk(KERN_INFO
"Disabling legacy keyboard support as prom "
462 "is too old and doesn't provide FADT\n");
463 acpi_kbd_controller_present
= 0;
466 printk("SGI SAL version %x.%02x\n", version
>> 8, version
& 0x00FF);
469 * we set the default root device to /dev/hda
470 * to make simulation easy
472 ROOT_DEV
= Root_HDA1
;
475 * Create the PDAs and NODEPDAs for all the cpus.
477 sn_init_pdas(cmdline_p
);
479 ia64_mark_idle
= &snidle
;
482 * For the bootcpu, we do this here. All other cpus will make the
483 * call as part of cpu_init in slave cpu initialization.
490 screen_info
= sn_screen_info
;
495 * set pm_power_off to a SAL call to allow
496 * sn machines to power off. The SAL call can be replaced
497 * by an ACPI interface call when ACPI is fully implemented
500 pm_power_off
= ia64_sn_power_down
;
501 current
->thread
.flags
|= IA64_THREAD_MIGRATION
;
505 * sn_init_pdas - setup node data areas
507 * One time setup for Node Data Area. Called by sn_setup().
509 static void __init
sn_init_pdas(char **cmdline_p
)
514 * Allocate & initalize the nodepda for each node.
516 for_each_online_node(cnode
) {
518 alloc_bootmem_node(NODE_DATA(cnode
), sizeof(nodepda_t
));
519 memset(nodepdaindr
[cnode
], 0, sizeof(nodepda_t
));
520 memset(nodepdaindr
[cnode
]->phys_cpuid
, -1,
521 sizeof(nodepdaindr
[cnode
]->phys_cpuid
));
522 spin_lock_init(&nodepdaindr
[cnode
]->ptc_lock
);
526 * Allocate & initialize nodepda for TIOs. For now, put them on node 0.
528 for (cnode
= num_online_nodes(); cnode
< num_cnodes
; cnode
++) {
530 alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t
));
531 memset(nodepdaindr
[cnode
], 0, sizeof(nodepda_t
));
535 * Now copy the array of nodepda pointers to each nodepda.
537 for (cnode
= 0; cnode
< num_cnodes
; cnode
++)
538 memcpy(nodepdaindr
[cnode
]->pernode_pdaindr
, nodepdaindr
,
539 sizeof(nodepdaindr
));
542 * Set up IO related platform-dependent nodepda fields.
543 * The following routine actually sets up the hubinfo struct
546 for_each_online_node(cnode
) {
547 bte_init_node(nodepdaindr
[cnode
], cnode
);
551 * Initialize the per node hubdev. This includes IO Nodes and
552 * headless/memless nodes.
554 for (cnode
= 0; cnode
< num_cnodes
; cnode
++) {
555 hubdev_init_node(nodepdaindr
[cnode
], cnode
);
560 * sn_cpu_init - initialize per-cpu data areas
561 * @cpuid: cpuid of the caller
563 * Called during cpu initialization on each cpu as it starts.
564 * Currently, initializes the per-cpu data area for SNIA.
565 * Also sets up a few fields in the nodepda. Also known as
566 * platform_cpu_init() by the ia64 machvec code.
568 void __init
sn_cpu_init(void)
577 static int wars_have_been_checked
;
579 cpuid
= smp_processor_id();
580 if (cpuid
== 0 && IS_MEDUSA()) {
581 if (ia64_sn_is_fake_prom())
585 printk(KERN_INFO
"Running on medusa with %s PROM\n",
586 (sn_prom_type
== 1) ? "real" : "fake");
589 memset(pda
, 0, sizeof(pda
));
590 if (ia64_sn_get_sn_info(0, &sn_hub_info
->shub2
,
591 &sn_hub_info
->nasid_bitmask
,
592 &sn_hub_info
->nasid_shift
,
593 &sn_system_size
, &sn_sharing_domain_size
,
594 &sn_partition_id
, &sn_coherency_id
,
597 sn_hub_info
->as_shift
= sn_hub_info
->nasid_shift
- 2;
600 * Don't check status. The SAL call is not supported on all PROMs
601 * but a failure is harmless.
603 (void) ia64_sn_set_cpu_number(cpuid
);
606 * The boot cpu makes this call again after platform initialization is
609 if (nodepdaindr
[0] == NULL
)
612 for (i
= 0; i
< MAX_PROM_FEATURE_SETS
; i
++)
613 if (ia64_sn_get_prom_feature_set(i
, &sn_prom_features
[i
]) != 0)
616 cpuphyid
= get_sapicid();
618 if (ia64_sn_get_sapic_info(cpuphyid
, &nasid
, &subnode
, &slice
))
621 for (i
=0; i
< MAX_NUMNODES
; i
++) {
622 if (nodepdaindr
[i
]) {
623 nodepdaindr
[i
]->phys_cpuid
[cpuid
].nasid
= nasid
;
624 nodepdaindr
[i
]->phys_cpuid
[cpuid
].slice
= slice
;
625 nodepdaindr
[i
]->phys_cpuid
[cpuid
].subnode
= subnode
;
629 cnode
= nasid_to_cnodeid(nasid
);
631 sn_nodepda
= nodepdaindr
[cnode
];
634 (typeof(pda
->led_address
)) (LED0
+ (slice
<< LED_CPU_SHIFT
));
635 pda
->led_state
= LED_ALWAYS_SET
;
636 pda
->hb_count
= HZ
/ 2;
641 /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */
642 memcpy(sn_cnodeid_to_nasid
,
643 (&per_cpu(__sn_cnodeid_to_nasid
, 0)),
644 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid
)));
649 * Only needs to be done once, on BSP.
650 * Has to be done after loop above, because it uses this cpu's
651 * sn_cnodeid_to_nasid table which was just initialized if this
653 * Has to be done before assignment below.
655 if (!wars_have_been_checked
) {
657 wars_have_been_checked
= 1;
659 sn_hub_info
->shub_1_1_found
= shub_1_1_found
;
662 * Set up addresses of PIO/MEM write status registers.
665 u64 pio1
[] = {SH1_PIO_WRITE_STATUS_0
, 0, SH1_PIO_WRITE_STATUS_1
, 0};
666 u64 pio2
[] = {SH2_PIO_WRITE_STATUS_0
, SH2_PIO_WRITE_STATUS_2
,
667 SH2_PIO_WRITE_STATUS_1
, SH2_PIO_WRITE_STATUS_3
};
669 pio
= is_shub1() ? pio1
: pio2
;
670 pda
->pio_write_status_addr
=
671 (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid
, pio
[slice
]);
672 pda
->pio_write_status_val
= is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK
: 0;
676 * WAR addresses for SHUB 1.x.
678 if (local_node_data
->active_cpu_count
++ == 0 && is_shub1()) {
681 cnodeid_to_nasid(numa_node_id() ==
682 num_online_nodes() - 1 ? 0 : numa_node_id() + 1);
683 pda
->pio_shub_war_cam_addr
=
684 (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid
,
690 * Build tables for converting between NASIDs and cnodes.
692 static inline int __init
board_needs_cnode(int type
)
694 return (type
== KLTYPE_SNIA
|| type
== KLTYPE_TIO
);
697 void __init
build_cnode_tables(void)
703 memset(physical_node_map
, -1, sizeof(physical_node_map
));
704 memset(sn_cnodeid_to_nasid
, -1,
705 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid
)));
708 * First populate the tables with C/M bricks. This ensures that
709 * cnode == node for all C & M bricks.
711 for_each_online_node(node
) {
712 nasid
= pxm_to_nasid(node_to_pxm(node
));
713 sn_cnodeid_to_nasid
[node
] = nasid
;
714 physical_node_map
[nasid
] = node
;
718 * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
719 * limit on the number of nodes, we can't use the generic node numbers
720 * for this. Note that num_cnodes is incremented below as TIOs or
721 * headless/memoryless nodes are discovered.
723 num_cnodes
= num_online_nodes();
725 /* fakeprom does not support klgraph */
726 if (IS_RUNNING_ON_FAKE_PROM())
729 /* Find TIOs & headless/memoryless nodes and add them to the tables */
730 for_each_online_node(node
) {
731 kl_config_hdr_t
*klgraph_header
;
732 nasid
= cnodeid_to_nasid(node
);
733 klgraph_header
= ia64_sn_get_klconfig_addr(nasid
);
734 if (klgraph_header
== NULL
)
736 brd
= NODE_OFFSET_TO_LBOARD(nasid
, klgraph_header
->ch_board_info
);
738 if (board_needs_cnode(brd
->brd_type
) && physical_node_map
[brd
->brd_nasid
] < 0) {
739 sn_cnodeid_to_nasid
[num_cnodes
] = brd
->brd_nasid
;
740 physical_node_map
[brd
->brd_nasid
] = num_cnodes
++;
742 brd
= find_lboard_next(brd
);
748 nasid_slice_to_cpuid(int nasid
, int slice
)
752 for (cpu
= 0; cpu
< NR_CPUS
; cpu
++)
753 if (cpuid_to_nasid(cpu
) == nasid
&&
754 cpuid_to_slice(cpu
) == slice
)
760 int sn_prom_feature_available(int id
)
762 if (id
>= BITS_PER_LONG
* MAX_PROM_FEATURE_SETS
)
764 return test_bit(id
, sn_prom_features
);
766 EXPORT_SYMBOL(sn_prom_feature_available
);