2 * Copyright 2018 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
25 #include "amdgpu_discovery.h"
26 #include "soc15_common.h"
27 #include "soc15_hw_ip.h"
28 #include "nbio/nbio_2_3_offset.h"
29 #include "discovery.h"
31 #define mmRCC_CONFIG_MEMSIZE 0xde3
32 #define mmMM_INDEX 0x0
33 #define mmMM_INDEX_HI 0x6
37 const char *hw_id_names
[HW_ID_MAX
] = {
41 [SMUIO_HWID
] = "SMUIO",
47 [AUDIO_AZ_HWID
] = "AUDIO_AZ",
54 [DCEAZ_HWID
] = "DCEAZ",
56 [SDPMUX_HWID
] = "SDPMUX",
59 [L2IMU_HWID
] = "L2IMU",
61 [MMHUB_HWID
] = "MMHUB",
62 [ATHUB_HWID
] = "ATHUB",
63 [DBGU_NBIO_HWID
] = "DBGU_NBIO",
65 [DBGU0_HWID
] = "DBGU0",
66 [DBGU1_HWID
] = "DBGU1",
67 [OSSSYS_HWID
] = "OSSSYS",
69 [SDMA0_HWID
] = "SDMA0",
70 [SDMA1_HWID
] = "SDMA1",
72 [DBGU_IO_HWID
] = "DBGU_IO",
76 [DFX_DAP_HWID
] = "DFX_DAP",
77 [L1IMU_PCIE_HWID
] = "L1IMU_PCIE",
78 [L1IMU_NBIF_HWID
] = "L1IMU_NBIF",
79 [L1IMU_IOAGR_HWID
] = "L1IMU_IOAGR",
80 [L1IMU3_HWID
] = "L1IMU3",
81 [L1IMU4_HWID
] = "L1IMU4",
82 [L1IMU5_HWID
] = "L1IMU5",
83 [L1IMU6_HWID
] = "L1IMU6",
84 [L1IMU7_HWID
] = "L1IMU7",
85 [L1IMU8_HWID
] = "L1IMU8",
86 [L1IMU9_HWID
] = "L1IMU9",
87 [L1IMU10_HWID
] = "L1IMU10",
88 [L1IMU11_HWID
] = "L1IMU11",
89 [L1IMU12_HWID
] = "L1IMU12",
90 [L1IMU13_HWID
] = "L1IMU13",
91 [L1IMU14_HWID
] = "L1IMU14",
92 [L1IMU15_HWID
] = "L1IMU15",
93 [WAFLC_HWID
] = "WAFLC",
94 [FCH_USB_PD_HWID
] = "FCH_USB_PD",
99 [IOAGR_HWID
] = "IOAGR",
100 [NBIF_HWID
] = "NBIF",
101 [IOAPIC_HWID
] = "IOAPIC",
102 [SYSTEMHUB_HWID
] = "SYSTEMHUB",
103 [NTBCCP_HWID
] = "NTBCCP",
105 [SATA_HWID
] = "SATA",
107 [CCXSEC_HWID
] = "CCXSEC",
108 [XGMI_HWID
] = "XGMI",
109 [XGBE_HWID
] = "XGBE",
113 static int hw_id_map
[MAX_HWIP
] = {
115 [HDP_HWIP
] = HDP_HWID
,
116 [SDMA0_HWIP
] = SDMA0_HWID
,
117 [SDMA1_HWIP
] = SDMA1_HWID
,
118 [MMHUB_HWIP
] = MMHUB_HWID
,
119 [ATHUB_HWIP
] = ATHUB_HWID
,
120 [NBIO_HWIP
] = NBIF_HWID
,
121 [MP0_HWIP
] = MP0_HWID
,
122 [MP1_HWIP
] = MP1_HWID
,
123 [UVD_HWIP
] = UVD_HWID
,
124 [VCE_HWIP
] = VCE_HWID
,
126 [DCE_HWIP
] = DMU_HWID
,
127 [OSSSYS_HWIP
] = OSSSYS_HWID
,
128 [SMUIO_HWIP
] = SMUIO_HWID
,
129 [PWR_HWIP
] = PWR_HWID
,
130 [NBIF_HWIP
] = NBIF_HWID
,
131 [THM_HWIP
] = THM_HWID
,
132 [CLK_HWIP
] = CLKA_HWID
,
135 static int amdgpu_discovery_read_binary(struct amdgpu_device
*adev
, uint8_t *binary
)
137 uint64_t vram_size
= (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE
) << 20;
138 uint64_t pos
= vram_size
- DISCOVERY_TMR_SIZE
;
140 amdgpu_device_vram_access(adev
, pos
, (uint32_t *)binary
, DISCOVERY_TMR_SIZE
, false);
144 static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data
, uint32_t size
)
146 uint16_t checksum
= 0;
149 for (i
= 0; i
< size
; i
++)
155 static inline bool amdgpu_discovery_verify_checksum(uint8_t *data
, uint32_t size
,
158 return !!(amdgpu_discovery_calculate_checksum(data
, size
) == expected
);
161 int amdgpu_discovery_init(struct amdgpu_device
*adev
)
163 struct table_info
*info
;
164 struct binary_header
*bhdr
;
165 struct ip_discovery_header
*ihdr
;
166 struct gpu_info_header
*ghdr
;
172 adev
->discovery
= kzalloc(DISCOVERY_TMR_SIZE
, GFP_KERNEL
);
173 if (!adev
->discovery
)
176 r
= amdgpu_discovery_read_binary(adev
, adev
->discovery
);
178 DRM_ERROR("failed to read ip discovery binary\n");
182 bhdr
= (struct binary_header
*)adev
->discovery
;
184 if (le32_to_cpu(bhdr
->binary_signature
) != BINARY_SIGNATURE
) {
185 DRM_ERROR("invalid ip discovery binary signature\n");
190 offset
= offsetof(struct binary_header
, binary_checksum
) +
191 sizeof(bhdr
->binary_checksum
);
192 size
= bhdr
->binary_size
- offset
;
193 checksum
= bhdr
->binary_checksum
;
195 if (!amdgpu_discovery_verify_checksum(adev
->discovery
+ offset
,
197 DRM_ERROR("invalid ip discovery binary checksum\n");
202 info
= &bhdr
->table_list
[IP_DISCOVERY
];
203 offset
= le16_to_cpu(info
->offset
);
204 checksum
= le16_to_cpu(info
->checksum
);
205 ihdr
= (struct ip_discovery_header
*)(adev
->discovery
+ offset
);
207 if (le32_to_cpu(ihdr
->signature
) != DISCOVERY_TABLE_SIGNATURE
) {
208 DRM_ERROR("invalid ip discovery data table signature\n");
213 if (!amdgpu_discovery_verify_checksum(adev
->discovery
+ offset
,
214 ihdr
->size
, checksum
)) {
215 DRM_ERROR("invalid ip discovery data table checksum\n");
220 info
= &bhdr
->table_list
[GC
];
221 offset
= le16_to_cpu(info
->offset
);
222 checksum
= le16_to_cpu(info
->checksum
);
223 ghdr
= (struct gpu_info_header
*)(adev
->discovery
+ offset
);
225 if (!amdgpu_discovery_verify_checksum(adev
->discovery
+ offset
,
226 ghdr
->size
, checksum
)) {
227 DRM_ERROR("invalid gc data table checksum\n");
235 kfree(adev
->discovery
);
236 adev
->discovery
= NULL
;
241 void amdgpu_discovery_fini(struct amdgpu_device
*adev
)
243 kfree(adev
->discovery
);
244 adev
->discovery
= NULL
;
247 int amdgpu_discovery_reg_base_init(struct amdgpu_device
*adev
)
249 struct binary_header
*bhdr
;
250 struct ip_discovery_header
*ihdr
;
251 struct die_header
*dhdr
;
257 uint8_t num_base_address
;
261 if (!adev
->discovery
) {
262 DRM_ERROR("ip discovery uninitialized\n");
266 bhdr
= (struct binary_header
*)adev
->discovery
;
267 ihdr
= (struct ip_discovery_header
*)(adev
->discovery
+
268 le16_to_cpu(bhdr
->table_list
[IP_DISCOVERY
].offset
));
269 num_dies
= le16_to_cpu(ihdr
->num_dies
);
271 DRM_DEBUG("number of dies: %d\n", num_dies
);
273 for (i
= 0; i
< num_dies
; i
++) {
274 die_offset
= le16_to_cpu(ihdr
->die_info
[i
].die_offset
);
275 dhdr
= (struct die_header
*)(adev
->discovery
+ die_offset
);
276 num_ips
= le16_to_cpu(dhdr
->num_ips
);
277 ip_offset
= die_offset
+ sizeof(*dhdr
);
279 if (le16_to_cpu(dhdr
->die_id
) != i
) {
280 DRM_ERROR("invalid die id %d, expected %d\n",
281 le16_to_cpu(dhdr
->die_id
), i
);
285 DRM_DEBUG("number of hardware IPs on die%d: %d\n",
286 le16_to_cpu(dhdr
->die_id
), num_ips
);
288 for (j
= 0; j
< num_ips
; j
++) {
289 ip
= (struct ip
*)(adev
->discovery
+ ip_offset
);
290 num_base_address
= ip
->num_base_address
;
292 DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
293 hw_id_names
[le16_to_cpu(ip
->hw_id
)],
294 le16_to_cpu(ip
->hw_id
),
296 ip
->major
, ip
->minor
,
299 for (k
= 0; k
< num_base_address
; k
++) {
301 * convert the endianness of base addresses in place,
302 * so that we don't need to convert them when accessing adev->reg_offset.
304 ip
->base_address
[k
] = le32_to_cpu(ip
->base_address
[k
]);
305 DRM_DEBUG("\t0x%08x\n", ip
->base_address
[k
]);
308 for (hw_ip
= 0; hw_ip
< MAX_HWIP
; hw_ip
++) {
309 if (hw_id_map
[hw_ip
] == le16_to_cpu(ip
->hw_id
)) {
310 DRM_INFO("set register base offset for %s\n",
311 hw_id_names
[le16_to_cpu(ip
->hw_id
)]);
312 adev
->reg_offset
[hw_ip
][ip
->number_instance
] =
318 ip_offset
+= sizeof(*ip
) + 4 * (ip
->num_base_address
- 1);
325 int amdgpu_discovery_get_ip_version(struct amdgpu_device
*adev
, int hw_id
,
326 int *major
, int *minor
, int *revision
)
328 struct binary_header
*bhdr
;
329 struct ip_discovery_header
*ihdr
;
330 struct die_header
*dhdr
;
338 if (!adev
->discovery
) {
339 DRM_ERROR("ip discovery uninitialized\n");
343 bhdr
= (struct binary_header
*)adev
->discovery
;
344 ihdr
= (struct ip_discovery_header
*)(adev
->discovery
+
345 le16_to_cpu(bhdr
->table_list
[IP_DISCOVERY
].offset
));
346 num_dies
= le16_to_cpu(ihdr
->num_dies
);
348 for (i
= 0; i
< num_dies
; i
++) {
349 die_offset
= le16_to_cpu(ihdr
->die_info
[i
].die_offset
);
350 dhdr
= (struct die_header
*)(adev
->discovery
+ die_offset
);
351 num_ips
= le16_to_cpu(dhdr
->num_ips
);
352 ip_offset
= die_offset
+ sizeof(*dhdr
);
354 for (j
= 0; j
< num_ips
; j
++) {
355 ip
= (struct ip
*)(adev
->discovery
+ ip_offset
);
357 if (le16_to_cpu(ip
->hw_id
) == hw_id
) {
363 *revision
= ip
->revision
;
366 ip_offset
+= sizeof(*ip
) + 4 * (ip
->num_base_address
- 1);
373 int amdgpu_discovery_get_gfx_info(struct amdgpu_device
*adev
)
375 struct binary_header
*bhdr
;
376 struct gc_info_v1_0
*gc_info
;
378 if (!adev
->discovery
) {
379 DRM_ERROR("ip discovery uninitialized\n");
383 bhdr
= (struct binary_header
*)adev
->discovery
;
384 gc_info
= (struct gc_info_v1_0
*)(adev
->discovery
+
385 le16_to_cpu(bhdr
->table_list
[GC
].offset
));
387 adev
->gfx
.config
.max_shader_engines
= le32_to_cpu(gc_info
->gc_num_se
);
388 adev
->gfx
.config
.max_cu_per_sh
= 2 * (le32_to_cpu(gc_info
->gc_num_wgp0_per_sa
) +
389 le32_to_cpu(gc_info
->gc_num_wgp1_per_sa
));
390 adev
->gfx
.config
.max_sh_per_se
= le32_to_cpu(gc_info
->gc_num_sa_per_se
);
391 adev
->gfx
.config
.max_backends_per_se
= le32_to_cpu(gc_info
->gc_num_rb_per_se
);
392 adev
->gfx
.config
.max_texture_channel_caches
= le32_to_cpu(gc_info
->gc_num_gl2c
);
393 adev
->gfx
.config
.max_gprs
= le32_to_cpu(gc_info
->gc_num_gprs
);
394 adev
->gfx
.config
.max_gs_threads
= le32_to_cpu(gc_info
->gc_num_max_gs_thds
);
395 adev
->gfx
.config
.gs_vgt_table_depth
= le32_to_cpu(gc_info
->gc_gs_table_depth
);
396 adev
->gfx
.config
.gs_prim_buffer_depth
= le32_to_cpu(gc_info
->gc_gsprim_buff_depth
);
397 adev
->gfx
.config
.double_offchip_lds_buf
= le32_to_cpu(gc_info
->gc_double_offchip_lds_buffer
);
398 adev
->gfx
.cu_info
.wave_front_size
= le32_to_cpu(gc_info
->gc_wave_size
);
399 adev
->gfx
.cu_info
.max_waves_per_simd
= le32_to_cpu(gc_info
->gc_max_waves_per_simd
);
400 adev
->gfx
.cu_info
.max_scratch_slots_per_cu
= le32_to_cpu(gc_info
->gc_max_scratch_slots_per_cu
);
401 adev
->gfx
.cu_info
.lds_size
= le32_to_cpu(gc_info
->gc_lds_size
);
402 adev
->gfx
.config
.num_sc_per_sh
= le32_to_cpu(gc_info
->gc_num_sc_per_se
) /
403 le32_to_cpu(gc_info
->gc_num_sa_per_se
);
404 adev
->gfx
.config
.num_packer_per_sc
= le32_to_cpu(gc_info
->gc_num_packer_per_sc
);