2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/errno.h>
27 #include <linux/acpi.h>
28 #include <linux/hash.h>
29 #include <linux/cpufreq.h>
30 #include <linux/log2.h>
34 #include "kfd_topology.h"
36 static struct list_head topology_device_list
;
37 static int topology_crat_parsed
;
38 static struct kfd_system_properties sys_props
;
40 static DECLARE_RWSEM(topology_lock
);
42 struct kfd_dev
*kfd_device_by_id(uint32_t gpu_id
)
44 struct kfd_topology_device
*top_dev
;
45 struct kfd_dev
*device
= NULL
;
47 down_read(&topology_lock
);
49 list_for_each_entry(top_dev
, &topology_device_list
, list
)
50 if (top_dev
->gpu_id
== gpu_id
) {
51 device
= top_dev
->gpu
;
55 up_read(&topology_lock
);
60 struct kfd_dev
*kfd_device_by_pci_dev(const struct pci_dev
*pdev
)
62 struct kfd_topology_device
*top_dev
;
63 struct kfd_dev
*device
= NULL
;
65 down_read(&topology_lock
);
67 list_for_each_entry(top_dev
, &topology_device_list
, list
)
68 if (top_dev
->gpu
->pdev
== pdev
) {
69 device
= top_dev
->gpu
;
73 up_read(&topology_lock
);
78 static int kfd_topology_get_crat_acpi(void *crat_image
, size_t *size
)
80 struct acpi_table_header
*crat_table
;
87 * Fetch the CRAT table from ACPI
89 status
= acpi_get_table(CRAT_SIGNATURE
, 0, &crat_table
);
90 if (status
== AE_NOT_FOUND
) {
91 pr_warn("CRAT table not found\n");
93 } else if (ACPI_FAILURE(status
)) {
94 const char *err
= acpi_format_exception(status
);
96 pr_err("CRAT table error: %s\n", err
);
100 if (*size
>= crat_table
->length
&& crat_image
!= NULL
)
101 memcpy(crat_image
, crat_table
, crat_table
->length
);
103 *size
= crat_table
->length
;
108 static void kfd_populated_cu_info_cpu(struct kfd_topology_device
*dev
,
109 struct crat_subtype_computeunit
*cu
)
114 dev
->node_props
.cpu_cores_count
= cu
->num_cpu_cores
;
115 dev
->node_props
.cpu_core_id_base
= cu
->processor_id_low
;
116 if (cu
->hsa_capability
& CRAT_CU_FLAGS_IOMMU_PRESENT
)
117 dev
->node_props
.capability
|= HSA_CAP_ATS_PRESENT
;
119 pr_info("CU CPU: cores=%d id_base=%d\n", cu
->num_cpu_cores
,
120 cu
->processor_id_low
);
123 static void kfd_populated_cu_info_gpu(struct kfd_topology_device
*dev
,
124 struct crat_subtype_computeunit
*cu
)
129 dev
->node_props
.simd_id_base
= cu
->processor_id_low
;
130 dev
->node_props
.simd_count
= cu
->num_simd_cores
;
131 dev
->node_props
.lds_size_in_kb
= cu
->lds_size_in_kb
;
132 dev
->node_props
.max_waves_per_simd
= cu
->max_waves_simd
;
133 dev
->node_props
.wave_front_size
= cu
->wave_front_size
;
134 dev
->node_props
.mem_banks_count
= cu
->num_banks
;
135 dev
->node_props
.array_count
= cu
->num_arrays
;
136 dev
->node_props
.cu_per_simd_array
= cu
->num_cu_per_array
;
137 dev
->node_props
.simd_per_cu
= cu
->num_simd_per_cu
;
138 dev
->node_props
.max_slots_scratch_cu
= cu
->max_slots_scatch_cu
;
139 if (cu
->hsa_capability
& CRAT_CU_FLAGS_HOT_PLUGGABLE
)
140 dev
->node_props
.capability
|= HSA_CAP_HOT_PLUGGABLE
;
141 pr_info("CU GPU: simds=%d id_base=%d\n", cu
->num_simd_cores
,
142 cu
->processor_id_low
);
145 /* kfd_parse_subtype_cu is called when the topology mutex is already acquired */
146 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit
*cu
)
148 struct kfd_topology_device
*dev
;
153 pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
154 cu
->proximity_domain
, cu
->hsa_capability
);
155 list_for_each_entry(dev
, &topology_device_list
, list
) {
156 if (cu
->proximity_domain
== i
) {
157 if (cu
->flags
& CRAT_CU_FLAGS_CPU_PRESENT
)
158 kfd_populated_cu_info_cpu(dev
, cu
);
160 if (cu
->flags
& CRAT_CU_FLAGS_GPU_PRESENT
)
161 kfd_populated_cu_info_gpu(dev
, cu
);
171 * kfd_parse_subtype_mem is called when the topology mutex is
174 static int kfd_parse_subtype_mem(struct crat_subtype_memory
*mem
)
176 struct kfd_mem_properties
*props
;
177 struct kfd_topology_device
*dev
;
182 pr_info("Found memory entry in CRAT table with proximity_domain=%d\n",
183 mem
->promixity_domain
);
184 list_for_each_entry(dev
, &topology_device_list
, list
) {
185 if (mem
->promixity_domain
== i
) {
186 props
= kfd_alloc_struct(props
);
190 if (dev
->node_props
.cpu_cores_count
== 0)
191 props
->heap_type
= HSA_MEM_HEAP_TYPE_FB_PRIVATE
;
193 props
->heap_type
= HSA_MEM_HEAP_TYPE_SYSTEM
;
195 if (mem
->flags
& CRAT_MEM_FLAGS_HOT_PLUGGABLE
)
196 props
->flags
|= HSA_MEM_FLAGS_HOT_PLUGGABLE
;
197 if (mem
->flags
& CRAT_MEM_FLAGS_NON_VOLATILE
)
198 props
->flags
|= HSA_MEM_FLAGS_NON_VOLATILE
;
200 props
->size_in_bytes
=
201 ((uint64_t)mem
->length_high
<< 32) +
203 props
->width
= mem
->width
;
205 dev
->mem_bank_count
++;
206 list_add_tail(&props
->list
, &dev
->mem_props
);
217 * kfd_parse_subtype_cache is called when the topology mutex
218 * is already acquired
220 static int kfd_parse_subtype_cache(struct crat_subtype_cache
*cache
)
222 struct kfd_cache_properties
*props
;
223 struct kfd_topology_device
*dev
;
228 id
= cache
->processor_id_low
;
230 pr_info("Found cache entry in CRAT table with processor_id=%d\n", id
);
231 list_for_each_entry(dev
, &topology_device_list
, list
)
232 if (id
== dev
->node_props
.cpu_core_id_base
||
233 id
== dev
->node_props
.simd_id_base
) {
234 props
= kfd_alloc_struct(props
);
238 props
->processor_id_low
= id
;
239 props
->cache_level
= cache
->cache_level
;
240 props
->cache_size
= cache
->cache_size
;
241 props
->cacheline_size
= cache
->cache_line_size
;
242 props
->cachelines_per_tag
= cache
->lines_per_tag
;
243 props
->cache_assoc
= cache
->associativity
;
244 props
->cache_latency
= cache
->cache_latency
;
246 if (cache
->flags
& CRAT_CACHE_FLAGS_DATA_CACHE
)
247 props
->cache_type
|= HSA_CACHE_TYPE_DATA
;
248 if (cache
->flags
& CRAT_CACHE_FLAGS_INST_CACHE
)
249 props
->cache_type
|= HSA_CACHE_TYPE_INSTRUCTION
;
250 if (cache
->flags
& CRAT_CACHE_FLAGS_CPU_CACHE
)
251 props
->cache_type
|= HSA_CACHE_TYPE_CPU
;
252 if (cache
->flags
& CRAT_CACHE_FLAGS_SIMD_CACHE
)
253 props
->cache_type
|= HSA_CACHE_TYPE_HSACU
;
256 dev
->node_props
.caches_count
++;
257 list_add_tail(&props
->list
, &dev
->cache_props
);
266 * kfd_parse_subtype_iolink is called when the topology mutex
267 * is already acquired
269 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink
*iolink
)
271 struct kfd_iolink_properties
*props
;
272 struct kfd_topology_device
*dev
;
279 id_from
= iolink
->proximity_domain_from
;
280 id_to
= iolink
->proximity_domain_to
;
282 pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from
);
283 list_for_each_entry(dev
, &topology_device_list
, list
) {
285 props
= kfd_alloc_struct(props
);
289 props
->node_from
= id_from
;
290 props
->node_to
= id_to
;
291 props
->ver_maj
= iolink
->version_major
;
292 props
->ver_min
= iolink
->version_minor
;
295 * weight factor (derived from CDIR), currently always 1
299 props
->min_latency
= iolink
->minimum_latency
;
300 props
->max_latency
= iolink
->maximum_latency
;
301 props
->min_bandwidth
= iolink
->minimum_bandwidth_mbs
;
302 props
->max_bandwidth
= iolink
->maximum_bandwidth_mbs
;
303 props
->rec_transfer_size
=
304 iolink
->recommended_transfer_size
;
306 dev
->io_link_count
++;
307 dev
->node_props
.io_links_count
++;
308 list_add_tail(&props
->list
, &dev
->io_link_props
);
318 static int kfd_parse_subtype(struct crat_subtype_generic
*sub_type_hdr
)
320 struct crat_subtype_computeunit
*cu
;
321 struct crat_subtype_memory
*mem
;
322 struct crat_subtype_cache
*cache
;
323 struct crat_subtype_iolink
*iolink
;
326 BUG_ON(!sub_type_hdr
);
328 switch (sub_type_hdr
->type
) {
329 case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY
:
330 cu
= (struct crat_subtype_computeunit
*)sub_type_hdr
;
331 ret
= kfd_parse_subtype_cu(cu
);
333 case CRAT_SUBTYPE_MEMORY_AFFINITY
:
334 mem
= (struct crat_subtype_memory
*)sub_type_hdr
;
335 ret
= kfd_parse_subtype_mem(mem
);
337 case CRAT_SUBTYPE_CACHE_AFFINITY
:
338 cache
= (struct crat_subtype_cache
*)sub_type_hdr
;
339 ret
= kfd_parse_subtype_cache(cache
);
341 case CRAT_SUBTYPE_TLB_AFFINITY
:
343 * For now, nothing to do here
345 pr_info("Found TLB entry in CRAT table (not processing)\n");
347 case CRAT_SUBTYPE_CCOMPUTE_AFFINITY
:
349 * For now, nothing to do here
351 pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n");
353 case CRAT_SUBTYPE_IOLINK_AFFINITY
:
354 iolink
= (struct crat_subtype_iolink
*)sub_type_hdr
;
355 ret
= kfd_parse_subtype_iolink(iolink
);
358 pr_warn("Unknown subtype (%d) in CRAT\n",
365 static void kfd_release_topology_device(struct kfd_topology_device
*dev
)
367 struct kfd_mem_properties
*mem
;
368 struct kfd_cache_properties
*cache
;
369 struct kfd_iolink_properties
*iolink
;
373 list_del(&dev
->list
);
375 while (dev
->mem_props
.next
!= &dev
->mem_props
) {
376 mem
= container_of(dev
->mem_props
.next
,
377 struct kfd_mem_properties
, list
);
378 list_del(&mem
->list
);
382 while (dev
->cache_props
.next
!= &dev
->cache_props
) {
383 cache
= container_of(dev
->cache_props
.next
,
384 struct kfd_cache_properties
, list
);
385 list_del(&cache
->list
);
389 while (dev
->io_link_props
.next
!= &dev
->io_link_props
) {
390 iolink
= container_of(dev
->io_link_props
.next
,
391 struct kfd_iolink_properties
, list
);
392 list_del(&iolink
->list
);
398 sys_props
.num_devices
--;
401 static void kfd_release_live_view(void)
403 struct kfd_topology_device
*dev
;
405 while (topology_device_list
.next
!= &topology_device_list
) {
406 dev
= container_of(topology_device_list
.next
,
407 struct kfd_topology_device
, list
);
408 kfd_release_topology_device(dev
);
411 memset(&sys_props
, 0, sizeof(sys_props
));
414 static struct kfd_topology_device
*kfd_create_topology_device(void)
416 struct kfd_topology_device
*dev
;
418 dev
= kfd_alloc_struct(dev
);
420 pr_err("No memory to allocate a topology device");
424 INIT_LIST_HEAD(&dev
->mem_props
);
425 INIT_LIST_HEAD(&dev
->cache_props
);
426 INIT_LIST_HEAD(&dev
->io_link_props
);
428 list_add_tail(&dev
->list
, &topology_device_list
);
429 sys_props
.num_devices
++;
434 static int kfd_parse_crat_table(void *crat_image
)
436 struct kfd_topology_device
*top_dev
;
437 struct crat_subtype_generic
*sub_type_hdr
;
440 struct crat_header
*crat_table
= (struct crat_header
*)crat_image
;
447 num_nodes
= crat_table
->num_domains
;
448 image_len
= crat_table
->length
;
450 pr_info("Parsing CRAT table with %d nodes\n", num_nodes
);
452 for (node_id
= 0; node_id
< num_nodes
; node_id
++) {
453 top_dev
= kfd_create_topology_device();
455 kfd_release_live_view();
460 sys_props
.platform_id
=
461 (*((uint64_t *)crat_table
->oem_id
)) & CRAT_OEMID_64BIT_MASK
;
462 sys_props
.platform_oem
= *((uint64_t *)crat_table
->oem_table_id
);
463 sys_props
.platform_rev
= crat_table
->revision
;
465 sub_type_hdr
= (struct crat_subtype_generic
*)(crat_table
+1);
466 while ((char *)sub_type_hdr
+ sizeof(struct crat_subtype_generic
) <
467 ((char *)crat_image
) + image_len
) {
468 if (sub_type_hdr
->flags
& CRAT_SUBTYPE_FLAGS_ENABLED
) {
469 ret
= kfd_parse_subtype(sub_type_hdr
);
471 kfd_release_live_view();
476 sub_type_hdr
= (typeof(sub_type_hdr
))((char *)sub_type_hdr
+
477 sub_type_hdr
->length
);
480 sys_props
.generation_count
++;
481 topology_crat_parsed
= 1;
487 #define sysfs_show_gen_prop(buffer, fmt, ...) \
488 snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
489 #define sysfs_show_32bit_prop(buffer, name, value) \
490 sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
491 #define sysfs_show_64bit_prop(buffer, name, value) \
492 sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
493 #define sysfs_show_32bit_val(buffer, value) \
494 sysfs_show_gen_prop(buffer, "%u\n", value)
495 #define sysfs_show_str_val(buffer, value) \
496 sysfs_show_gen_prop(buffer, "%s\n", value)
498 static ssize_t
sysprops_show(struct kobject
*kobj
, struct attribute
*attr
,
503 /* Making sure that the buffer is an empty string */
506 if (attr
== &sys_props
.attr_genid
) {
507 ret
= sysfs_show_32bit_val(buffer
, sys_props
.generation_count
);
508 } else if (attr
== &sys_props
.attr_props
) {
509 sysfs_show_64bit_prop(buffer
, "platform_oem",
510 sys_props
.platform_oem
);
511 sysfs_show_64bit_prop(buffer
, "platform_id",
512 sys_props
.platform_id
);
513 ret
= sysfs_show_64bit_prop(buffer
, "platform_rev",
514 sys_props
.platform_rev
);
522 static const struct sysfs_ops sysprops_ops
= {
523 .show
= sysprops_show
,
526 static struct kobj_type sysprops_type
= {
527 .sysfs_ops
= &sysprops_ops
,
530 static ssize_t
iolink_show(struct kobject
*kobj
, struct attribute
*attr
,
534 struct kfd_iolink_properties
*iolink
;
536 /* Making sure that the buffer is an empty string */
539 iolink
= container_of(attr
, struct kfd_iolink_properties
, attr
);
540 sysfs_show_32bit_prop(buffer
, "type", iolink
->iolink_type
);
541 sysfs_show_32bit_prop(buffer
, "version_major", iolink
->ver_maj
);
542 sysfs_show_32bit_prop(buffer
, "version_minor", iolink
->ver_min
);
543 sysfs_show_32bit_prop(buffer
, "node_from", iolink
->node_from
);
544 sysfs_show_32bit_prop(buffer
, "node_to", iolink
->node_to
);
545 sysfs_show_32bit_prop(buffer
, "weight", iolink
->weight
);
546 sysfs_show_32bit_prop(buffer
, "min_latency", iolink
->min_latency
);
547 sysfs_show_32bit_prop(buffer
, "max_latency", iolink
->max_latency
);
548 sysfs_show_32bit_prop(buffer
, "min_bandwidth", iolink
->min_bandwidth
);
549 sysfs_show_32bit_prop(buffer
, "max_bandwidth", iolink
->max_bandwidth
);
550 sysfs_show_32bit_prop(buffer
, "recommended_transfer_size",
551 iolink
->rec_transfer_size
);
552 ret
= sysfs_show_32bit_prop(buffer
, "flags", iolink
->flags
);
557 static const struct sysfs_ops iolink_ops
= {
561 static struct kobj_type iolink_type
= {
562 .sysfs_ops
= &iolink_ops
,
565 static ssize_t
mem_show(struct kobject
*kobj
, struct attribute
*attr
,
569 struct kfd_mem_properties
*mem
;
571 /* Making sure that the buffer is an empty string */
574 mem
= container_of(attr
, struct kfd_mem_properties
, attr
);
575 sysfs_show_32bit_prop(buffer
, "heap_type", mem
->heap_type
);
576 sysfs_show_64bit_prop(buffer
, "size_in_bytes", mem
->size_in_bytes
);
577 sysfs_show_32bit_prop(buffer
, "flags", mem
->flags
);
578 sysfs_show_32bit_prop(buffer
, "width", mem
->width
);
579 ret
= sysfs_show_32bit_prop(buffer
, "mem_clk_max", mem
->mem_clk_max
);
584 static const struct sysfs_ops mem_ops
= {
588 static struct kobj_type mem_type
= {
589 .sysfs_ops
= &mem_ops
,
592 static ssize_t
kfd_cache_show(struct kobject
*kobj
, struct attribute
*attr
,
597 struct kfd_cache_properties
*cache
;
599 /* Making sure that the buffer is an empty string */
602 cache
= container_of(attr
, struct kfd_cache_properties
, attr
);
603 sysfs_show_32bit_prop(buffer
, "processor_id_low",
604 cache
->processor_id_low
);
605 sysfs_show_32bit_prop(buffer
, "level", cache
->cache_level
);
606 sysfs_show_32bit_prop(buffer
, "size", cache
->cache_size
);
607 sysfs_show_32bit_prop(buffer
, "cache_line_size", cache
->cacheline_size
);
608 sysfs_show_32bit_prop(buffer
, "cache_lines_per_tag",
609 cache
->cachelines_per_tag
);
610 sysfs_show_32bit_prop(buffer
, "association", cache
->cache_assoc
);
611 sysfs_show_32bit_prop(buffer
, "latency", cache
->cache_latency
);
612 sysfs_show_32bit_prop(buffer
, "type", cache
->cache_type
);
613 snprintf(buffer
, PAGE_SIZE
, "%ssibling_map ", buffer
);
614 for (i
= 0; i
< KFD_TOPOLOGY_CPU_SIBLINGS
; i
++)
615 ret
= snprintf(buffer
, PAGE_SIZE
, "%s%d%s",
616 buffer
, cache
->sibling_map
[i
],
617 (i
== KFD_TOPOLOGY_CPU_SIBLINGS
-1) ?
623 static const struct sysfs_ops cache_ops
= {
624 .show
= kfd_cache_show
,
627 static struct kobj_type cache_type
= {
628 .sysfs_ops
= &cache_ops
,
631 static ssize_t
node_show(struct kobject
*kobj
, struct attribute
*attr
,
634 struct kfd_topology_device
*dev
;
635 char public_name
[KFD_TOPOLOGY_PUBLIC_NAME_SIZE
];
637 uint32_t log_max_watch_addr
;
639 /* Making sure that the buffer is an empty string */
642 if (strcmp(attr
->name
, "gpu_id") == 0) {
643 dev
= container_of(attr
, struct kfd_topology_device
,
645 return sysfs_show_32bit_val(buffer
, dev
->gpu_id
);
648 if (strcmp(attr
->name
, "name") == 0) {
649 dev
= container_of(attr
, struct kfd_topology_device
,
651 for (i
= 0; i
< KFD_TOPOLOGY_PUBLIC_NAME_SIZE
; i
++) {
653 (char)dev
->node_props
.marketing_name
[i
];
654 if (dev
->node_props
.marketing_name
[i
] == 0)
657 public_name
[KFD_TOPOLOGY_PUBLIC_NAME_SIZE
-1] = 0x0;
658 return sysfs_show_str_val(buffer
, public_name
);
661 dev
= container_of(attr
, struct kfd_topology_device
,
663 sysfs_show_32bit_prop(buffer
, "cpu_cores_count",
664 dev
->node_props
.cpu_cores_count
);
665 sysfs_show_32bit_prop(buffer
, "simd_count",
666 dev
->node_props
.simd_count
);
668 if (dev
->mem_bank_count
< dev
->node_props
.mem_banks_count
) {
669 pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
670 dev
->node_props
.mem_banks_count
,
671 dev
->mem_bank_count
);
672 sysfs_show_32bit_prop(buffer
, "mem_banks_count",
673 dev
->mem_bank_count
);
675 sysfs_show_32bit_prop(buffer
, "mem_banks_count",
676 dev
->node_props
.mem_banks_count
);
679 sysfs_show_32bit_prop(buffer
, "caches_count",
680 dev
->node_props
.caches_count
);
681 sysfs_show_32bit_prop(buffer
, "io_links_count",
682 dev
->node_props
.io_links_count
);
683 sysfs_show_32bit_prop(buffer
, "cpu_core_id_base",
684 dev
->node_props
.cpu_core_id_base
);
685 sysfs_show_32bit_prop(buffer
, "simd_id_base",
686 dev
->node_props
.simd_id_base
);
687 sysfs_show_32bit_prop(buffer
, "max_waves_per_simd",
688 dev
->node_props
.max_waves_per_simd
);
689 sysfs_show_32bit_prop(buffer
, "lds_size_in_kb",
690 dev
->node_props
.lds_size_in_kb
);
691 sysfs_show_32bit_prop(buffer
, "gds_size_in_kb",
692 dev
->node_props
.gds_size_in_kb
);
693 sysfs_show_32bit_prop(buffer
, "wave_front_size",
694 dev
->node_props
.wave_front_size
);
695 sysfs_show_32bit_prop(buffer
, "array_count",
696 dev
->node_props
.array_count
);
697 sysfs_show_32bit_prop(buffer
, "simd_arrays_per_engine",
698 dev
->node_props
.simd_arrays_per_engine
);
699 sysfs_show_32bit_prop(buffer
, "cu_per_simd_array",
700 dev
->node_props
.cu_per_simd_array
);
701 sysfs_show_32bit_prop(buffer
, "simd_per_cu",
702 dev
->node_props
.simd_per_cu
);
703 sysfs_show_32bit_prop(buffer
, "max_slots_scratch_cu",
704 dev
->node_props
.max_slots_scratch_cu
);
705 sysfs_show_32bit_prop(buffer
, "vendor_id",
706 dev
->node_props
.vendor_id
);
707 sysfs_show_32bit_prop(buffer
, "device_id",
708 dev
->node_props
.device_id
);
709 sysfs_show_32bit_prop(buffer
, "location_id",
710 dev
->node_props
.location_id
);
714 __ilog2_u32(dev
->gpu
->device_info
->num_of_watch_points
);
716 if (log_max_watch_addr
) {
717 dev
->node_props
.capability
|=
718 HSA_CAP_WATCH_POINTS_SUPPORTED
;
720 dev
->node_props
.capability
|=
721 ((log_max_watch_addr
<<
722 HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT
) &
723 HSA_CAP_WATCH_POINTS_TOTALBITS_MASK
);
726 sysfs_show_32bit_prop(buffer
, "max_engine_clk_fcompute",
727 dev
->gpu
->kfd2kgd
->get_max_engine_clock_in_mhz(
730 sysfs_show_64bit_prop(buffer
, "local_mem_size",
731 (unsigned long long int) 0);
733 sysfs_show_32bit_prop(buffer
, "fw_version",
734 dev
->gpu
->kfd2kgd
->get_fw_version(
737 sysfs_show_32bit_prop(buffer
, "capability",
738 dev
->node_props
.capability
);
741 return sysfs_show_32bit_prop(buffer
, "max_engine_clk_ccompute",
742 cpufreq_quick_get_max(0)/1000);
745 static const struct sysfs_ops node_ops
= {
749 static struct kobj_type node_type
= {
750 .sysfs_ops
= &node_ops
,
753 static void kfd_remove_sysfs_file(struct kobject
*kobj
, struct attribute
*attr
)
755 sysfs_remove_file(kobj
, attr
);
760 static void kfd_remove_sysfs_node_entry(struct kfd_topology_device
*dev
)
762 struct kfd_iolink_properties
*iolink
;
763 struct kfd_cache_properties
*cache
;
764 struct kfd_mem_properties
*mem
;
768 if (dev
->kobj_iolink
) {
769 list_for_each_entry(iolink
, &dev
->io_link_props
, list
)
771 kfd_remove_sysfs_file(iolink
->kobj
,
775 kobject_del(dev
->kobj_iolink
);
776 kobject_put(dev
->kobj_iolink
);
777 dev
->kobj_iolink
= NULL
;
780 if (dev
->kobj_cache
) {
781 list_for_each_entry(cache
, &dev
->cache_props
, list
)
783 kfd_remove_sysfs_file(cache
->kobj
,
787 kobject_del(dev
->kobj_cache
);
788 kobject_put(dev
->kobj_cache
);
789 dev
->kobj_cache
= NULL
;
793 list_for_each_entry(mem
, &dev
->mem_props
, list
)
795 kfd_remove_sysfs_file(mem
->kobj
, &mem
->attr
);
798 kobject_del(dev
->kobj_mem
);
799 kobject_put(dev
->kobj_mem
);
800 dev
->kobj_mem
= NULL
;
803 if (dev
->kobj_node
) {
804 sysfs_remove_file(dev
->kobj_node
, &dev
->attr_gpuid
);
805 sysfs_remove_file(dev
->kobj_node
, &dev
->attr_name
);
806 sysfs_remove_file(dev
->kobj_node
, &dev
->attr_props
);
807 kobject_del(dev
->kobj_node
);
808 kobject_put(dev
->kobj_node
);
809 dev
->kobj_node
= NULL
;
813 static int kfd_build_sysfs_node_entry(struct kfd_topology_device
*dev
,
816 struct kfd_iolink_properties
*iolink
;
817 struct kfd_cache_properties
*cache
;
818 struct kfd_mem_properties
*mem
;
825 * Creating the sysfs folders
827 BUG_ON(dev
->kobj_node
);
828 dev
->kobj_node
= kfd_alloc_struct(dev
->kobj_node
);
832 ret
= kobject_init_and_add(dev
->kobj_node
, &node_type
,
833 sys_props
.kobj_nodes
, "%d", id
);
837 dev
->kobj_mem
= kobject_create_and_add("mem_banks", dev
->kobj_node
);
841 dev
->kobj_cache
= kobject_create_and_add("caches", dev
->kobj_node
);
842 if (!dev
->kobj_cache
)
845 dev
->kobj_iolink
= kobject_create_and_add("io_links", dev
->kobj_node
);
846 if (!dev
->kobj_iolink
)
850 * Creating sysfs files for node properties
852 dev
->attr_gpuid
.name
= "gpu_id";
853 dev
->attr_gpuid
.mode
= KFD_SYSFS_FILE_MODE
;
854 sysfs_attr_init(&dev
->attr_gpuid
);
855 dev
->attr_name
.name
= "name";
856 dev
->attr_name
.mode
= KFD_SYSFS_FILE_MODE
;
857 sysfs_attr_init(&dev
->attr_name
);
858 dev
->attr_props
.name
= "properties";
859 dev
->attr_props
.mode
= KFD_SYSFS_FILE_MODE
;
860 sysfs_attr_init(&dev
->attr_props
);
861 ret
= sysfs_create_file(dev
->kobj_node
, &dev
->attr_gpuid
);
864 ret
= sysfs_create_file(dev
->kobj_node
, &dev
->attr_name
);
867 ret
= sysfs_create_file(dev
->kobj_node
, &dev
->attr_props
);
872 list_for_each_entry(mem
, &dev
->mem_props
, list
) {
873 mem
->kobj
= kzalloc(sizeof(struct kobject
), GFP_KERNEL
);
876 ret
= kobject_init_and_add(mem
->kobj
, &mem_type
,
877 dev
->kobj_mem
, "%d", i
);
881 mem
->attr
.name
= "properties";
882 mem
->attr
.mode
= KFD_SYSFS_FILE_MODE
;
883 sysfs_attr_init(&mem
->attr
);
884 ret
= sysfs_create_file(mem
->kobj
, &mem
->attr
);
891 list_for_each_entry(cache
, &dev
->cache_props
, list
) {
892 cache
->kobj
= kzalloc(sizeof(struct kobject
), GFP_KERNEL
);
895 ret
= kobject_init_and_add(cache
->kobj
, &cache_type
,
896 dev
->kobj_cache
, "%d", i
);
900 cache
->attr
.name
= "properties";
901 cache
->attr
.mode
= KFD_SYSFS_FILE_MODE
;
902 sysfs_attr_init(&cache
->attr
);
903 ret
= sysfs_create_file(cache
->kobj
, &cache
->attr
);
910 list_for_each_entry(iolink
, &dev
->io_link_props
, list
) {
911 iolink
->kobj
= kzalloc(sizeof(struct kobject
), GFP_KERNEL
);
914 ret
= kobject_init_and_add(iolink
->kobj
, &iolink_type
,
915 dev
->kobj_iolink
, "%d", i
);
919 iolink
->attr
.name
= "properties";
920 iolink
->attr
.mode
= KFD_SYSFS_FILE_MODE
;
921 sysfs_attr_init(&iolink
->attr
);
922 ret
= sysfs_create_file(iolink
->kobj
, &iolink
->attr
);
931 static int kfd_build_sysfs_node_tree(void)
933 struct kfd_topology_device
*dev
;
937 list_for_each_entry(dev
, &topology_device_list
, list
) {
938 ret
= kfd_build_sysfs_node_entry(dev
, i
);
947 static void kfd_remove_sysfs_node_tree(void)
949 struct kfd_topology_device
*dev
;
951 list_for_each_entry(dev
, &topology_device_list
, list
)
952 kfd_remove_sysfs_node_entry(dev
);
955 static int kfd_topology_update_sysfs(void)
959 pr_info("Creating topology SYSFS entries\n");
960 if (sys_props
.kobj_topology
== NULL
) {
961 sys_props
.kobj_topology
=
962 kfd_alloc_struct(sys_props
.kobj_topology
);
963 if (!sys_props
.kobj_topology
)
966 ret
= kobject_init_and_add(sys_props
.kobj_topology
,
967 &sysprops_type
, &kfd_device
->kobj
,
972 sys_props
.kobj_nodes
= kobject_create_and_add("nodes",
973 sys_props
.kobj_topology
);
974 if (!sys_props
.kobj_nodes
)
977 sys_props
.attr_genid
.name
= "generation_id";
978 sys_props
.attr_genid
.mode
= KFD_SYSFS_FILE_MODE
;
979 sysfs_attr_init(&sys_props
.attr_genid
);
980 ret
= sysfs_create_file(sys_props
.kobj_topology
,
981 &sys_props
.attr_genid
);
985 sys_props
.attr_props
.name
= "system_properties";
986 sys_props
.attr_props
.mode
= KFD_SYSFS_FILE_MODE
;
987 sysfs_attr_init(&sys_props
.attr_props
);
988 ret
= sysfs_create_file(sys_props
.kobj_topology
,
989 &sys_props
.attr_props
);
994 kfd_remove_sysfs_node_tree();
996 return kfd_build_sysfs_node_tree();
999 static void kfd_topology_release_sysfs(void)
1001 kfd_remove_sysfs_node_tree();
1002 if (sys_props
.kobj_topology
) {
1003 sysfs_remove_file(sys_props
.kobj_topology
,
1004 &sys_props
.attr_genid
);
1005 sysfs_remove_file(sys_props
.kobj_topology
,
1006 &sys_props
.attr_props
);
1007 if (sys_props
.kobj_nodes
) {
1008 kobject_del(sys_props
.kobj_nodes
);
1009 kobject_put(sys_props
.kobj_nodes
);
1010 sys_props
.kobj_nodes
= NULL
;
1012 kobject_del(sys_props
.kobj_topology
);
1013 kobject_put(sys_props
.kobj_topology
);
1014 sys_props
.kobj_topology
= NULL
;
1018 int kfd_topology_init(void)
1020 void *crat_image
= NULL
;
1021 size_t image_size
= 0;
1025 * Initialize the head for the topology device list
1027 INIT_LIST_HEAD(&topology_device_list
);
1028 init_rwsem(&topology_lock
);
1029 topology_crat_parsed
= 0;
1031 memset(&sys_props
, 0, sizeof(sys_props
));
1034 * Get the CRAT image from the ACPI
1036 ret
= kfd_topology_get_crat_acpi(crat_image
, &image_size
);
1037 if (ret
== 0 && image_size
> 0) {
1038 pr_info("Found CRAT image with size=%zd\n", image_size
);
1039 crat_image
= kmalloc(image_size
, GFP_KERNEL
);
1042 pr_err("No memory for allocating CRAT image\n");
1045 ret
= kfd_topology_get_crat_acpi(crat_image
, &image_size
);
1048 down_write(&topology_lock
);
1049 ret
= kfd_parse_crat_table(crat_image
);
1051 ret
= kfd_topology_update_sysfs();
1052 up_write(&topology_lock
);
1054 pr_err("Couldn't get CRAT table size from ACPI\n");
1057 } else if (ret
== -ENODATA
) {
1060 pr_err("Couldn't get CRAT table size from ACPI\n");
1064 pr_info("Finished initializing topology ret=%d\n", ret
);
1068 void kfd_topology_shutdown(void)
1070 kfd_topology_release_sysfs();
1071 kfd_release_live_view();
1074 static void kfd_debug_print_topology(void)
1076 struct kfd_topology_device
*dev
;
1079 pr_info("DEBUG PRINT OF TOPOLOGY:");
1080 list_for_each_entry(dev
, &topology_device_list
, list
) {
1081 pr_info("Node: %d\n", i
);
1082 pr_info("\tGPU assigned: %s\n", (dev
->gpu
? "yes" : "no"));
1083 pr_info("\tCPU count: %d\n", dev
->node_props
.cpu_cores_count
);
1084 pr_info("\tSIMD count: %d", dev
->node_props
.simd_count
);
1089 static uint32_t kfd_generate_gpu_id(struct kfd_dev
*gpu
)
1098 buf
[0] = gpu
->pdev
->devfn
;
1099 buf
[1] = gpu
->pdev
->subsystem_vendor
;
1100 buf
[2] = gpu
->pdev
->subsystem_device
;
1101 buf
[3] = gpu
->pdev
->device
;
1102 buf
[4] = gpu
->pdev
->bus
->number
;
1103 buf
[5] = (uint32_t)(gpu
->kfd2kgd
->get_vmem_size(gpu
->kgd
)
1105 buf
[6] = (uint32_t)(gpu
->kfd2kgd
->get_vmem_size(gpu
->kgd
) >> 32);
1107 for (i
= 0, hashout
= 0; i
< 7; i
++)
1108 hashout
^= hash_32(buf
[i
], KFD_GPU_ID_HASH_WIDTH
);
1113 static struct kfd_topology_device
*kfd_assign_gpu(struct kfd_dev
*gpu
)
1115 struct kfd_topology_device
*dev
;
1116 struct kfd_topology_device
*out_dev
= NULL
;
1120 list_for_each_entry(dev
, &topology_device_list
, list
)
1121 if (dev
->gpu
== NULL
&& dev
->node_props
.simd_count
> 0) {
1130 static void kfd_notify_gpu_change(uint32_t gpu_id
, int arrival
)
1133 * TODO: Generate an event for thunk about the arrival/removal
1138 int kfd_topology_add_device(struct kfd_dev
*gpu
)
1141 struct kfd_topology_device
*dev
;
1146 gpu_id
= kfd_generate_gpu_id(gpu
);
1148 pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id
);
1150 down_write(&topology_lock
);
1152 * Try to assign the GPU to existing topology device (generated from
1155 dev
= kfd_assign_gpu(gpu
);
1157 pr_info("GPU was not found in the current topology. Extending.\n");
1158 kfd_debug_print_topology();
1159 dev
= kfd_create_topology_device();
1167 * TODO: Make a call to retrieve topology information from the
1172 * Update the SYSFS tree, since we added another topology device
1174 if (kfd_topology_update_sysfs() < 0)
1175 kfd_topology_release_sysfs();
1179 dev
->gpu_id
= gpu_id
;
1181 dev
->node_props
.vendor_id
= gpu
->pdev
->vendor
;
1182 dev
->node_props
.device_id
= gpu
->pdev
->device
;
1183 dev
->node_props
.location_id
= (gpu
->pdev
->bus
->number
<< 24) +
1184 (gpu
->pdev
->devfn
& 0xffffff);
1186 * TODO: Retrieve max engine clock values from KGD
1189 if (dev
->gpu
->device_info
->asic_family
== CHIP_CARRIZO
) {
1190 dev
->node_props
.capability
|= HSA_CAP_DOORBELL_PACKET_TYPE
;
1191 pr_info("amdkfd: adding doorbell packet type capability\n");
1197 up_write(&topology_lock
);
1200 kfd_notify_gpu_change(gpu_id
, 1);
1205 int kfd_topology_remove_device(struct kfd_dev
*gpu
)
1207 struct kfd_topology_device
*dev
;
1213 down_write(&topology_lock
);
1215 list_for_each_entry(dev
, &topology_device_list
, list
)
1216 if (dev
->gpu
== gpu
) {
1217 gpu_id
= dev
->gpu_id
;
1218 kfd_remove_sysfs_node_entry(dev
);
1219 kfd_release_topology_device(dev
);
1221 if (kfd_topology_update_sysfs() < 0)
1222 kfd_topology_release_sysfs();
1226 up_write(&topology_lock
);
1229 kfd_notify_gpu_change(gpu_id
, 0);
1235 * When idx is out of bounds, the function will return NULL
1237 struct kfd_dev
*kfd_topology_enum_kfd_devices(uint8_t idx
)
1240 struct kfd_topology_device
*top_dev
;
1241 struct kfd_dev
*device
= NULL
;
1242 uint8_t device_idx
= 0;
1244 down_read(&topology_lock
);
1246 list_for_each_entry(top_dev
, &topology_device_list
, list
) {
1247 if (device_idx
== idx
) {
1248 device
= top_dev
->gpu
;
1255 up_read(&topology_lock
);