Linux 4.6-rc6
[linux/fpc-iii.git] / arch / x86 / events / intel / uncore.c
blob7012d18bb293073e7cf0021cab79c50e3e42289c
1 #include "uncore.h"
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
5 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
7 static bool pcidrv_registered;
8 struct pci_driver *uncore_pci_driver;
9 /* pci bus to socket mapping */
10 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
11 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
12 struct pci_extra_dev *uncore_extra_pci_dev;
13 static int max_packages;
15 /* mask of cpus that collect uncore events */
16 static cpumask_t uncore_cpu_mask;
18 /* constraint for the fixed counter */
19 static struct event_constraint uncore_constraint_fixed =
20 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
21 struct event_constraint uncore_constraint_empty =
22 EVENT_CONSTRAINT(0, 0, 0);
24 static int uncore_pcibus_to_physid(struct pci_bus *bus)
26 struct pci2phy_map *map;
27 int phys_id = -1;
29 raw_spin_lock(&pci2phy_map_lock);
30 list_for_each_entry(map, &pci2phy_map_head, list) {
31 if (map->segment == pci_domain_nr(bus)) {
32 phys_id = map->pbus_to_physid[bus->number];
33 break;
36 raw_spin_unlock(&pci2phy_map_lock);
38 return phys_id;
41 static void uncore_free_pcibus_map(void)
43 struct pci2phy_map *map, *tmp;
45 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
46 list_del(&map->list);
47 kfree(map);
51 struct pci2phy_map *__find_pci2phy_map(int segment)
53 struct pci2phy_map *map, *alloc = NULL;
54 int i;
56 lockdep_assert_held(&pci2phy_map_lock);
58 lookup:
59 list_for_each_entry(map, &pci2phy_map_head, list) {
60 if (map->segment == segment)
61 goto end;
64 if (!alloc) {
65 raw_spin_unlock(&pci2phy_map_lock);
66 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
67 raw_spin_lock(&pci2phy_map_lock);
69 if (!alloc)
70 return NULL;
72 goto lookup;
75 map = alloc;
76 alloc = NULL;
77 map->segment = segment;
78 for (i = 0; i < 256; i++)
79 map->pbus_to_physid[i] = -1;
80 list_add_tail(&map->list, &pci2phy_map_head);
82 end:
83 kfree(alloc);
84 return map;
87 ssize_t uncore_event_show(struct kobject *kobj,
88 struct kobj_attribute *attr, char *buf)
90 struct uncore_event_desc *event =
91 container_of(attr, struct uncore_event_desc, attr);
92 return sprintf(buf, "%s", event->config);
95 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
97 return pmu->boxes[topology_logical_package_id(cpu)];
100 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
102 u64 count;
104 rdmsrl(event->hw.event_base, count);
106 return count;
110 * generic get constraint function for shared match/mask registers.
112 struct event_constraint *
113 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
115 struct intel_uncore_extra_reg *er;
116 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
117 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
118 unsigned long flags;
119 bool ok = false;
122 * reg->alloc can be set due to existing state, so for fake box we
123 * need to ignore this, otherwise we might fail to allocate proper
124 * fake state for this extra reg constraint.
126 if (reg1->idx == EXTRA_REG_NONE ||
127 (!uncore_box_is_fake(box) && reg1->alloc))
128 return NULL;
130 er = &box->shared_regs[reg1->idx];
131 raw_spin_lock_irqsave(&er->lock, flags);
132 if (!atomic_read(&er->ref) ||
133 (er->config1 == reg1->config && er->config2 == reg2->config)) {
134 atomic_inc(&er->ref);
135 er->config1 = reg1->config;
136 er->config2 = reg2->config;
137 ok = true;
139 raw_spin_unlock_irqrestore(&er->lock, flags);
141 if (ok) {
142 if (!uncore_box_is_fake(box))
143 reg1->alloc = 1;
144 return NULL;
147 return &uncore_constraint_empty;
150 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
152 struct intel_uncore_extra_reg *er;
153 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
156 * Only put constraint if extra reg was actually allocated. Also
157 * takes care of event which do not use an extra shared reg.
159 * Also, if this is a fake box we shouldn't touch any event state
160 * (reg->alloc) and we don't care about leaving inconsistent box
161 * state either since it will be thrown out.
163 if (uncore_box_is_fake(box) || !reg1->alloc)
164 return;
166 er = &box->shared_regs[reg1->idx];
167 atomic_dec(&er->ref);
168 reg1->alloc = 0;
171 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
173 struct intel_uncore_extra_reg *er;
174 unsigned long flags;
175 u64 config;
177 er = &box->shared_regs[idx];
179 raw_spin_lock_irqsave(&er->lock, flags);
180 config = er->config;
181 raw_spin_unlock_irqrestore(&er->lock, flags);
183 return config;
186 static void uncore_assign_hw_event(struct intel_uncore_box *box,
187 struct perf_event *event, int idx)
189 struct hw_perf_event *hwc = &event->hw;
191 hwc->idx = idx;
192 hwc->last_tag = ++box->tags[idx];
194 if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
195 hwc->event_base = uncore_fixed_ctr(box);
196 hwc->config_base = uncore_fixed_ctl(box);
197 return;
200 hwc->config_base = uncore_event_ctl(box, hwc->idx);
201 hwc->event_base = uncore_perf_ctr(box, hwc->idx);
204 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
206 u64 prev_count, new_count, delta;
207 int shift;
209 if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
210 shift = 64 - uncore_fixed_ctr_bits(box);
211 else
212 shift = 64 - uncore_perf_ctr_bits(box);
214 /* the hrtimer might modify the previous event value */
215 again:
216 prev_count = local64_read(&event->hw.prev_count);
217 new_count = uncore_read_counter(box, event);
218 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
219 goto again;
221 delta = (new_count << shift) - (prev_count << shift);
222 delta >>= shift;
224 local64_add(delta, &event->count);
228 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
229 * for SandyBridge. So we use hrtimer to periodically poll the counter
230 * to avoid overflow.
232 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
234 struct intel_uncore_box *box;
235 struct perf_event *event;
236 unsigned long flags;
237 int bit;
239 box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
240 if (!box->n_active || box->cpu != smp_processor_id())
241 return HRTIMER_NORESTART;
243 * disable local interrupt to prevent uncore_pmu_event_start/stop
244 * to interrupt the update process
246 local_irq_save(flags);
249 * handle boxes with an active event list as opposed to active
250 * counters
252 list_for_each_entry(event, &box->active_list, active_entry) {
253 uncore_perf_event_update(box, event);
256 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
257 uncore_perf_event_update(box, box->events[bit]);
259 local_irq_restore(flags);
261 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
262 return HRTIMER_RESTART;
265 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
267 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
268 HRTIMER_MODE_REL_PINNED);
271 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
273 hrtimer_cancel(&box->hrtimer);
276 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
278 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
279 box->hrtimer.function = uncore_pmu_hrtimer;
282 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
283 int node)
285 int i, size, numshared = type->num_shared_regs ;
286 struct intel_uncore_box *box;
288 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
290 box = kzalloc_node(size, GFP_KERNEL, node);
291 if (!box)
292 return NULL;
294 for (i = 0; i < numshared; i++)
295 raw_spin_lock_init(&box->shared_regs[i].lock);
297 uncore_pmu_init_hrtimer(box);
298 box->cpu = -1;
299 box->pci_phys_id = -1;
300 box->pkgid = -1;
302 /* set default hrtimer timeout */
303 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
305 INIT_LIST_HEAD(&box->active_list);
307 return box;
311 * Using uncore_pmu_event_init pmu event_init callback
312 * as a detection point for uncore events.
314 static int uncore_pmu_event_init(struct perf_event *event);
316 static bool is_uncore_event(struct perf_event *event)
318 return event->pmu->event_init == uncore_pmu_event_init;
321 static int
322 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
323 bool dogrp)
325 struct perf_event *event;
326 int n, max_count;
328 max_count = box->pmu->type->num_counters;
329 if (box->pmu->type->fixed_ctl)
330 max_count++;
332 if (box->n_events >= max_count)
333 return -EINVAL;
335 n = box->n_events;
337 if (is_uncore_event(leader)) {
338 box->event_list[n] = leader;
339 n++;
342 if (!dogrp)
343 return n;
345 list_for_each_entry(event, &leader->sibling_list, group_entry) {
346 if (!is_uncore_event(event) ||
347 event->state <= PERF_EVENT_STATE_OFF)
348 continue;
350 if (n >= max_count)
351 return -EINVAL;
353 box->event_list[n] = event;
354 n++;
356 return n;
359 static struct event_constraint *
360 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
362 struct intel_uncore_type *type = box->pmu->type;
363 struct event_constraint *c;
365 if (type->ops->get_constraint) {
366 c = type->ops->get_constraint(box, event);
367 if (c)
368 return c;
371 if (event->attr.config == UNCORE_FIXED_EVENT)
372 return &uncore_constraint_fixed;
374 if (type->constraints) {
375 for_each_event_constraint(c, type->constraints) {
376 if ((event->hw.config & c->cmask) == c->code)
377 return c;
381 return &type->unconstrainted;
384 static void uncore_put_event_constraint(struct intel_uncore_box *box,
385 struct perf_event *event)
387 if (box->pmu->type->ops->put_constraint)
388 box->pmu->type->ops->put_constraint(box, event);
391 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
393 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
394 struct event_constraint *c;
395 int i, wmin, wmax, ret = 0;
396 struct hw_perf_event *hwc;
398 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
400 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
401 c = uncore_get_event_constraint(box, box->event_list[i]);
402 box->event_constraint[i] = c;
403 wmin = min(wmin, c->weight);
404 wmax = max(wmax, c->weight);
407 /* fastpath, try to reuse previous register */
408 for (i = 0; i < n; i++) {
409 hwc = &box->event_list[i]->hw;
410 c = box->event_constraint[i];
412 /* never assigned */
413 if (hwc->idx == -1)
414 break;
416 /* constraint still honored */
417 if (!test_bit(hwc->idx, c->idxmsk))
418 break;
420 /* not already used */
421 if (test_bit(hwc->idx, used_mask))
422 break;
424 __set_bit(hwc->idx, used_mask);
425 if (assign)
426 assign[i] = hwc->idx;
428 /* slow path */
429 if (i != n)
430 ret = perf_assign_events(box->event_constraint, n,
431 wmin, wmax, n, assign);
433 if (!assign || ret) {
434 for (i = 0; i < n; i++)
435 uncore_put_event_constraint(box, box->event_list[i]);
437 return ret ? -EINVAL : 0;
440 static void uncore_pmu_event_start(struct perf_event *event, int flags)
442 struct intel_uncore_box *box = uncore_event_to_box(event);
443 int idx = event->hw.idx;
445 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
446 return;
448 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
449 return;
451 event->hw.state = 0;
452 box->events[idx] = event;
453 box->n_active++;
454 __set_bit(idx, box->active_mask);
456 local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
457 uncore_enable_event(box, event);
459 if (box->n_active == 1) {
460 uncore_enable_box(box);
461 uncore_pmu_start_hrtimer(box);
465 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
467 struct intel_uncore_box *box = uncore_event_to_box(event);
468 struct hw_perf_event *hwc = &event->hw;
470 if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
471 uncore_disable_event(box, event);
472 box->n_active--;
473 box->events[hwc->idx] = NULL;
474 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
475 hwc->state |= PERF_HES_STOPPED;
477 if (box->n_active == 0) {
478 uncore_disable_box(box);
479 uncore_pmu_cancel_hrtimer(box);
483 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
485 * Drain the remaining delta count out of a event
486 * that we are disabling:
488 uncore_perf_event_update(box, event);
489 hwc->state |= PERF_HES_UPTODATE;
493 static int uncore_pmu_event_add(struct perf_event *event, int flags)
495 struct intel_uncore_box *box = uncore_event_to_box(event);
496 struct hw_perf_event *hwc = &event->hw;
497 int assign[UNCORE_PMC_IDX_MAX];
498 int i, n, ret;
500 if (!box)
501 return -ENODEV;
503 ret = n = uncore_collect_events(box, event, false);
504 if (ret < 0)
505 return ret;
507 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
508 if (!(flags & PERF_EF_START))
509 hwc->state |= PERF_HES_ARCH;
511 ret = uncore_assign_events(box, assign, n);
512 if (ret)
513 return ret;
515 /* save events moving to new counters */
516 for (i = 0; i < box->n_events; i++) {
517 event = box->event_list[i];
518 hwc = &event->hw;
520 if (hwc->idx == assign[i] &&
521 hwc->last_tag == box->tags[assign[i]])
522 continue;
524 * Ensure we don't accidentally enable a stopped
525 * counter simply because we rescheduled.
527 if (hwc->state & PERF_HES_STOPPED)
528 hwc->state |= PERF_HES_ARCH;
530 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
533 /* reprogram moved events into new counters */
534 for (i = 0; i < n; i++) {
535 event = box->event_list[i];
536 hwc = &event->hw;
538 if (hwc->idx != assign[i] ||
539 hwc->last_tag != box->tags[assign[i]])
540 uncore_assign_hw_event(box, event, assign[i]);
541 else if (i < box->n_events)
542 continue;
544 if (hwc->state & PERF_HES_ARCH)
545 continue;
547 uncore_pmu_event_start(event, 0);
549 box->n_events = n;
551 return 0;
554 static void uncore_pmu_event_del(struct perf_event *event, int flags)
556 struct intel_uncore_box *box = uncore_event_to_box(event);
557 int i;
559 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
561 for (i = 0; i < box->n_events; i++) {
562 if (event == box->event_list[i]) {
563 uncore_put_event_constraint(box, event);
565 for (++i; i < box->n_events; i++)
566 box->event_list[i - 1] = box->event_list[i];
568 --box->n_events;
569 break;
573 event->hw.idx = -1;
574 event->hw.last_tag = ~0ULL;
577 void uncore_pmu_event_read(struct perf_event *event)
579 struct intel_uncore_box *box = uncore_event_to_box(event);
580 uncore_perf_event_update(box, event);
584 * validation ensures the group can be loaded onto the
585 * PMU if it was the only group available.
587 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
588 struct perf_event *event)
590 struct perf_event *leader = event->group_leader;
591 struct intel_uncore_box *fake_box;
592 int ret = -EINVAL, n;
594 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
595 if (!fake_box)
596 return -ENOMEM;
598 fake_box->pmu = pmu;
600 * the event is not yet connected with its
601 * siblings therefore we must first collect
602 * existing siblings, then add the new event
603 * before we can simulate the scheduling
605 n = uncore_collect_events(fake_box, leader, true);
606 if (n < 0)
607 goto out;
609 fake_box->n_events = n;
610 n = uncore_collect_events(fake_box, event, false);
611 if (n < 0)
612 goto out;
614 fake_box->n_events = n;
616 ret = uncore_assign_events(fake_box, NULL, n);
617 out:
618 kfree(fake_box);
619 return ret;
622 static int uncore_pmu_event_init(struct perf_event *event)
624 struct intel_uncore_pmu *pmu;
625 struct intel_uncore_box *box;
626 struct hw_perf_event *hwc = &event->hw;
627 int ret;
629 if (event->attr.type != event->pmu->type)
630 return -ENOENT;
632 pmu = uncore_event_to_pmu(event);
633 /* no device found for this pmu */
634 if (pmu->func_id < 0)
635 return -ENOENT;
638 * Uncore PMU does measure at all privilege level all the time.
639 * So it doesn't make sense to specify any exclude bits.
641 if (event->attr.exclude_user || event->attr.exclude_kernel ||
642 event->attr.exclude_hv || event->attr.exclude_idle)
643 return -EINVAL;
645 /* Sampling not supported yet */
646 if (hwc->sample_period)
647 return -EINVAL;
650 * Place all uncore events for a particular physical package
651 * onto a single cpu
653 if (event->cpu < 0)
654 return -EINVAL;
655 box = uncore_pmu_to_box(pmu, event->cpu);
656 if (!box || box->cpu < 0)
657 return -EINVAL;
658 event->cpu = box->cpu;
659 event->pmu_private = box;
661 event->hw.idx = -1;
662 event->hw.last_tag = ~0ULL;
663 event->hw.extra_reg.idx = EXTRA_REG_NONE;
664 event->hw.branch_reg.idx = EXTRA_REG_NONE;
666 if (event->attr.config == UNCORE_FIXED_EVENT) {
667 /* no fixed counter */
668 if (!pmu->type->fixed_ctl)
669 return -EINVAL;
671 * if there is only one fixed counter, only the first pmu
672 * can access the fixed counter
674 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
675 return -EINVAL;
677 /* fixed counters have event field hardcoded to zero */
678 hwc->config = 0ULL;
679 } else {
680 hwc->config = event->attr.config & pmu->type->event_mask;
681 if (pmu->type->ops->hw_config) {
682 ret = pmu->type->ops->hw_config(box, event);
683 if (ret)
684 return ret;
688 if (event->group_leader != event)
689 ret = uncore_validate_group(pmu, event);
690 else
691 ret = 0;
693 return ret;
696 static ssize_t uncore_get_attr_cpumask(struct device *dev,
697 struct device_attribute *attr, char *buf)
699 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
702 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
704 static struct attribute *uncore_pmu_attrs[] = {
705 &dev_attr_cpumask.attr,
706 NULL,
709 static struct attribute_group uncore_pmu_attr_group = {
710 .attrs = uncore_pmu_attrs,
713 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
715 int ret;
717 if (!pmu->type->pmu) {
718 pmu->pmu = (struct pmu) {
719 .attr_groups = pmu->type->attr_groups,
720 .task_ctx_nr = perf_invalid_context,
721 .event_init = uncore_pmu_event_init,
722 .add = uncore_pmu_event_add,
723 .del = uncore_pmu_event_del,
724 .start = uncore_pmu_event_start,
725 .stop = uncore_pmu_event_stop,
726 .read = uncore_pmu_event_read,
728 } else {
729 pmu->pmu = *pmu->type->pmu;
730 pmu->pmu.attr_groups = pmu->type->attr_groups;
733 if (pmu->type->num_boxes == 1) {
734 if (strlen(pmu->type->name) > 0)
735 sprintf(pmu->name, "uncore_%s", pmu->type->name);
736 else
737 sprintf(pmu->name, "uncore");
738 } else {
739 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
740 pmu->pmu_idx);
743 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
744 if (!ret)
745 pmu->registered = true;
746 return ret;
749 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
751 if (!pmu->registered)
752 return;
753 perf_pmu_unregister(&pmu->pmu);
754 pmu->registered = false;
757 static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
759 struct intel_uncore_pmu *pmu = type->pmus;
760 struct intel_uncore_box *box;
761 int i, pkg;
763 if (pmu) {
764 pkg = topology_physical_package_id(cpu);
765 for (i = 0; i < type->num_boxes; i++, pmu++) {
766 box = pmu->boxes[pkg];
767 if (box)
768 uncore_box_exit(box);
773 static void __init uncore_exit_boxes(void *dummy)
775 struct intel_uncore_type **types;
777 for (types = uncore_msr_uncores; *types; types++)
778 __uncore_exit_boxes(*types++, smp_processor_id());
781 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
783 int pkg;
785 for (pkg = 0; pkg < max_packages; pkg++)
786 kfree(pmu->boxes[pkg]);
787 kfree(pmu->boxes);
790 static void __init uncore_type_exit(struct intel_uncore_type *type)
792 struct intel_uncore_pmu *pmu = type->pmus;
793 int i;
795 if (pmu) {
796 for (i = 0; i < type->num_boxes; i++, pmu++) {
797 uncore_pmu_unregister(pmu);
798 uncore_free_boxes(pmu);
800 kfree(type->pmus);
801 type->pmus = NULL;
803 kfree(type->events_group);
804 type->events_group = NULL;
807 static void __init uncore_types_exit(struct intel_uncore_type **types)
809 for (; *types; types++)
810 uncore_type_exit(*types);
813 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
815 struct intel_uncore_pmu *pmus;
816 struct attribute_group *attr_group;
817 struct attribute **attrs;
818 size_t size;
819 int i, j;
821 pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
822 if (!pmus)
823 return -ENOMEM;
825 size = max_packages * sizeof(struct intel_uncore_box *);
827 for (i = 0; i < type->num_boxes; i++) {
828 pmus[i].func_id = setid ? i : -1;
829 pmus[i].pmu_idx = i;
830 pmus[i].type = type;
831 pmus[i].boxes = kzalloc(size, GFP_KERNEL);
832 if (!pmus[i].boxes)
833 return -ENOMEM;
836 type->pmus = pmus;
837 type->unconstrainted = (struct event_constraint)
838 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
839 0, type->num_counters, 0, 0);
841 if (type->event_descs) {
842 for (i = 0; type->event_descs[i].attr.attr.name; i++);
844 attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
845 sizeof(*attr_group), GFP_KERNEL);
846 if (!attr_group)
847 return -ENOMEM;
849 attrs = (struct attribute **)(attr_group + 1);
850 attr_group->name = "events";
851 attr_group->attrs = attrs;
853 for (j = 0; j < i; j++)
854 attrs[j] = &type->event_descs[j].attr.attr;
856 type->events_group = attr_group;
859 type->pmu_group = &uncore_pmu_attr_group;
860 return 0;
863 static int __init
864 uncore_types_init(struct intel_uncore_type **types, bool setid)
866 int ret;
868 for (; *types; types++) {
869 ret = uncore_type_init(*types, setid);
870 if (ret)
871 return ret;
873 return 0;
877 * add a pci uncore device
879 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
881 struct intel_uncore_type *type;
882 struct intel_uncore_pmu *pmu;
883 struct intel_uncore_box *box;
884 int phys_id, pkg, ret;
886 phys_id = uncore_pcibus_to_physid(pdev->bus);
887 if (phys_id < 0)
888 return -ENODEV;
890 pkg = topology_phys_to_logical_pkg(phys_id);
891 if (WARN_ON_ONCE(pkg < 0))
892 return -EINVAL;
894 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
895 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
897 uncore_extra_pci_dev[pkg].dev[idx] = pdev;
898 pci_set_drvdata(pdev, NULL);
899 return 0;
902 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
904 * for performance monitoring unit with multiple boxes,
905 * each box has a different function id.
907 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
908 /* Knights Landing uses a common PCI device ID for multiple instances of
909 * an uncore PMU device type. There is only one entry per device type in
910 * the knl_uncore_pci_ids table inspite of multiple devices present for
911 * some device types. Hence PCI device idx would be 0 for all devices.
912 * So increment pmu pointer to point to an unused array element.
914 if (boot_cpu_data.x86_model == 87) {
915 while (pmu->func_id >= 0)
916 pmu++;
919 if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
920 return -EINVAL;
922 box = uncore_alloc_box(type, NUMA_NO_NODE);
923 if (!box)
924 return -ENOMEM;
926 if (pmu->func_id < 0)
927 pmu->func_id = pdev->devfn;
928 else
929 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
931 atomic_inc(&box->refcnt);
932 box->pci_phys_id = phys_id;
933 box->pkgid = pkg;
934 box->pci_dev = pdev;
935 box->pmu = pmu;
936 uncore_box_init(box);
937 pci_set_drvdata(pdev, box);
939 pmu->boxes[pkg] = box;
940 if (atomic_inc_return(&pmu->activeboxes) > 1)
941 return 0;
943 /* First active box registers the pmu */
944 ret = uncore_pmu_register(pmu);
945 if (ret) {
946 pci_set_drvdata(pdev, NULL);
947 pmu->boxes[pkg] = NULL;
948 uncore_box_exit(box);
949 kfree(box);
951 return ret;
954 static void uncore_pci_remove(struct pci_dev *pdev)
956 struct intel_uncore_box *box = pci_get_drvdata(pdev);
957 struct intel_uncore_pmu *pmu;
958 int i, phys_id, pkg;
960 phys_id = uncore_pcibus_to_physid(pdev->bus);
961 pkg = topology_phys_to_logical_pkg(phys_id);
963 box = pci_get_drvdata(pdev);
964 if (!box) {
965 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
966 if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
967 uncore_extra_pci_dev[pkg].dev[i] = NULL;
968 break;
971 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
972 return;
975 pmu = box->pmu;
976 if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
977 return;
979 pci_set_drvdata(pdev, NULL);
980 pmu->boxes[pkg] = NULL;
981 if (atomic_dec_return(&pmu->activeboxes) == 0)
982 uncore_pmu_unregister(pmu);
983 uncore_box_exit(box);
984 kfree(box);
987 static int __init uncore_pci_init(void)
989 size_t size;
990 int ret;
992 switch (boot_cpu_data.x86_model) {
993 case 45: /* Sandy Bridge-EP */
994 ret = snbep_uncore_pci_init();
995 break;
996 case 62: /* Ivy Bridge-EP */
997 ret = ivbep_uncore_pci_init();
998 break;
999 case 63: /* Haswell-EP */
1000 ret = hswep_uncore_pci_init();
1001 break;
1002 case 79: /* BDX-EP */
1003 case 86: /* BDX-DE */
1004 ret = bdx_uncore_pci_init();
1005 break;
1006 case 42: /* Sandy Bridge */
1007 ret = snb_uncore_pci_init();
1008 break;
1009 case 58: /* Ivy Bridge */
1010 ret = ivb_uncore_pci_init();
1011 break;
1012 case 60: /* Haswell */
1013 case 69: /* Haswell Celeron */
1014 ret = hsw_uncore_pci_init();
1015 break;
1016 case 61: /* Broadwell */
1017 ret = bdw_uncore_pci_init();
1018 break;
1019 case 87: /* Knights Landing */
1020 ret = knl_uncore_pci_init();
1021 break;
1022 case 94: /* SkyLake */
1023 ret = skl_uncore_pci_init();
1024 break;
1025 default:
1026 return -ENODEV;
1029 if (ret)
1030 return ret;
1032 size = max_packages * sizeof(struct pci_extra_dev);
1033 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1034 if (!uncore_extra_pci_dev) {
1035 ret = -ENOMEM;
1036 goto err;
1039 ret = uncore_types_init(uncore_pci_uncores, false);
1040 if (ret)
1041 goto errtype;
1043 uncore_pci_driver->probe = uncore_pci_probe;
1044 uncore_pci_driver->remove = uncore_pci_remove;
1046 ret = pci_register_driver(uncore_pci_driver);
1047 if (ret)
1048 goto errtype;
1050 pcidrv_registered = true;
1051 return 0;
1053 errtype:
1054 uncore_types_exit(uncore_pci_uncores);
1055 kfree(uncore_extra_pci_dev);
1056 uncore_extra_pci_dev = NULL;
1057 uncore_free_pcibus_map();
1058 err:
1059 uncore_pci_uncores = empty_uncore;
1060 return ret;
1063 static void __init uncore_pci_exit(void)
1065 if (pcidrv_registered) {
1066 pcidrv_registered = false;
1067 pci_unregister_driver(uncore_pci_driver);
1068 uncore_types_exit(uncore_pci_uncores);
1069 kfree(uncore_extra_pci_dev);
1070 uncore_free_pcibus_map();
1074 static void uncore_cpu_dying(int cpu)
1076 struct intel_uncore_type *type, **types = uncore_msr_uncores;
1077 struct intel_uncore_pmu *pmu;
1078 struct intel_uncore_box *box;
1079 int i, pkg;
1081 pkg = topology_logical_package_id(cpu);
1082 for (; *types; types++) {
1083 type = *types;
1084 pmu = type->pmus;
1085 for (i = 0; i < type->num_boxes; i++, pmu++) {
1086 box = pmu->boxes[pkg];
1087 if (box && atomic_dec_return(&box->refcnt) == 0)
1088 uncore_box_exit(box);
1093 static void uncore_cpu_starting(int cpu, bool init)
1095 struct intel_uncore_type *type, **types = uncore_msr_uncores;
1096 struct intel_uncore_pmu *pmu;
1097 struct intel_uncore_box *box;
1098 int i, pkg, ncpus = 1;
1100 if (init) {
1102 * On init we get the number of online cpus in the package
1103 * and set refcount for all of them.
1105 ncpus = cpumask_weight(topology_core_cpumask(cpu));
1108 pkg = topology_logical_package_id(cpu);
1109 for (; *types; types++) {
1110 type = *types;
1111 pmu = type->pmus;
1112 for (i = 0; i < type->num_boxes; i++, pmu++) {
1113 box = pmu->boxes[pkg];
1114 if (!box)
1115 continue;
1116 /* The first cpu on a package activates the box */
1117 if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
1118 uncore_box_init(box);
1123 static int uncore_cpu_prepare(int cpu)
1125 struct intel_uncore_type *type, **types = uncore_msr_uncores;
1126 struct intel_uncore_pmu *pmu;
1127 struct intel_uncore_box *box;
1128 int i, pkg;
1130 pkg = topology_logical_package_id(cpu);
1131 for (; *types; types++) {
1132 type = *types;
1133 pmu = type->pmus;
1134 for (i = 0; i < type->num_boxes; i++, pmu++) {
1135 if (pmu->boxes[pkg])
1136 continue;
1137 /* First cpu of a package allocates the box */
1138 box = uncore_alloc_box(type, cpu_to_node(cpu));
1139 if (!box)
1140 return -ENOMEM;
1141 box->pmu = pmu;
1142 box->pkgid = pkg;
1143 pmu->boxes[pkg] = box;
1146 return 0;
1149 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1150 int new_cpu)
1152 struct intel_uncore_pmu *pmu = type->pmus;
1153 struct intel_uncore_box *box;
1154 int i, pkg;
1156 pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
1157 for (i = 0; i < type->num_boxes; i++, pmu++) {
1158 box = pmu->boxes[pkg];
1159 if (!box)
1160 continue;
1162 if (old_cpu < 0) {
1163 WARN_ON_ONCE(box->cpu != -1);
1164 box->cpu = new_cpu;
1165 continue;
1168 WARN_ON_ONCE(box->cpu != old_cpu);
1169 box->cpu = -1;
1170 if (new_cpu < 0)
1171 continue;
1173 uncore_pmu_cancel_hrtimer(box);
1174 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1175 box->cpu = new_cpu;
1179 static void uncore_change_context(struct intel_uncore_type **uncores,
1180 int old_cpu, int new_cpu)
1182 for (; *uncores; uncores++)
1183 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1186 static void uncore_event_exit_cpu(int cpu)
1188 int target;
1190 /* Check if exiting cpu is used for collecting uncore events */
1191 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1192 return;
1194 /* Find a new cpu to collect uncore events */
1195 target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
1197 /* Migrate uncore events to the new target */
1198 if (target < nr_cpu_ids)
1199 cpumask_set_cpu(target, &uncore_cpu_mask);
1200 else
1201 target = -1;
1203 uncore_change_context(uncore_msr_uncores, cpu, target);
1204 uncore_change_context(uncore_pci_uncores, cpu, target);
1207 static void uncore_event_init_cpu(int cpu)
1209 int target;
1212 * Check if there is an online cpu in the package
1213 * which collects uncore events already.
1215 target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
1216 if (target < nr_cpu_ids)
1217 return;
1219 cpumask_set_cpu(cpu, &uncore_cpu_mask);
1221 uncore_change_context(uncore_msr_uncores, -1, cpu);
1222 uncore_change_context(uncore_pci_uncores, -1, cpu);
1225 static int uncore_cpu_notifier(struct notifier_block *self,
1226 unsigned long action, void *hcpu)
1228 unsigned int cpu = (long)hcpu;
1230 switch (action & ~CPU_TASKS_FROZEN) {
1231 case CPU_UP_PREPARE:
1232 return notifier_from_errno(uncore_cpu_prepare(cpu));
1234 case CPU_STARTING:
1235 uncore_cpu_starting(cpu, false);
1236 case CPU_DOWN_FAILED:
1237 uncore_event_init_cpu(cpu);
1238 break;
1240 case CPU_UP_CANCELED:
1241 case CPU_DYING:
1242 uncore_cpu_dying(cpu);
1243 break;
1245 case CPU_DOWN_PREPARE:
1246 uncore_event_exit_cpu(cpu);
1247 break;
1249 return NOTIFY_OK;
1252 static struct notifier_block uncore_cpu_nb = {
1253 .notifier_call = uncore_cpu_notifier,
1255 * to migrate uncore events, our notifier should be executed
1256 * before perf core's notifier.
1258 .priority = CPU_PRI_PERF + 1,
1261 static int __init type_pmu_register(struct intel_uncore_type *type)
1263 int i, ret;
1265 for (i = 0; i < type->num_boxes; i++) {
1266 ret = uncore_pmu_register(&type->pmus[i]);
1267 if (ret)
1268 return ret;
1270 return 0;
1273 static int __init uncore_msr_pmus_register(void)
1275 struct intel_uncore_type **types = uncore_msr_uncores;
1276 int ret;
1278 for (; *types; types++) {
1279 ret = type_pmu_register(*types);
1280 if (ret)
1281 return ret;
1283 return 0;
1286 static int __init uncore_cpu_init(void)
1288 int ret;
1290 switch (boot_cpu_data.x86_model) {
1291 case 26: /* Nehalem */
1292 case 30:
1293 case 37: /* Westmere */
1294 case 44:
1295 nhm_uncore_cpu_init();
1296 break;
1297 case 42: /* Sandy Bridge */
1298 case 58: /* Ivy Bridge */
1299 case 60: /* Haswell */
1300 case 69: /* Haswell */
1301 case 70: /* Haswell */
1302 case 61: /* Broadwell */
1303 case 71: /* Broadwell */
1304 snb_uncore_cpu_init();
1305 break;
1306 case 45: /* Sandy Bridge-EP */
1307 snbep_uncore_cpu_init();
1308 break;
1309 case 46: /* Nehalem-EX */
1310 case 47: /* Westmere-EX aka. Xeon E7 */
1311 nhmex_uncore_cpu_init();
1312 break;
1313 case 62: /* Ivy Bridge-EP */
1314 ivbep_uncore_cpu_init();
1315 break;
1316 case 63: /* Haswell-EP */
1317 hswep_uncore_cpu_init();
1318 break;
1319 case 79: /* BDX-EP */
1320 case 86: /* BDX-DE */
1321 bdx_uncore_cpu_init();
1322 break;
1323 case 87: /* Knights Landing */
1324 knl_uncore_cpu_init();
1325 break;
1326 default:
1327 return -ENODEV;
1330 ret = uncore_types_init(uncore_msr_uncores, true);
1331 if (ret)
1332 goto err;
1334 ret = uncore_msr_pmus_register();
1335 if (ret)
1336 goto err;
1337 return 0;
1338 err:
1339 uncore_types_exit(uncore_msr_uncores);
1340 uncore_msr_uncores = empty_uncore;
1341 return ret;
1344 static void __init uncore_cpu_setup(void *dummy)
1346 uncore_cpu_starting(smp_processor_id(), true);
1349 /* Lazy to avoid allocation of a few bytes for the normal case */
1350 static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
1352 static int __init uncore_cpumask_init(bool msr)
1354 unsigned int cpu;
1356 for_each_online_cpu(cpu) {
1357 unsigned int pkg = topology_logical_package_id(cpu);
1358 int ret;
1360 if (test_and_set_bit(pkg, packages))
1361 continue;
1363 * The first online cpu of each package allocates and takes
1364 * the refcounts for all other online cpus in that package.
1365 * If msrs are not enabled no allocation is required.
1367 if (msr) {
1368 ret = uncore_cpu_prepare(cpu);
1369 if (ret)
1370 return ret;
1372 uncore_event_init_cpu(cpu);
1373 smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
1375 __register_cpu_notifier(&uncore_cpu_nb);
1376 return 0;
1379 static int __init intel_uncore_init(void)
1381 int pret, cret, ret;
1383 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1384 return -ENODEV;
1386 if (cpu_has_hypervisor)
1387 return -ENODEV;
1389 max_packages = topology_max_packages();
1391 pret = uncore_pci_init();
1392 cret = uncore_cpu_init();
1394 if (cret && pret)
1395 return -ENODEV;
1397 cpu_notifier_register_begin();
1398 ret = uncore_cpumask_init(!cret);
1399 if (ret)
1400 goto err;
1401 cpu_notifier_register_done();
1402 return 0;
1404 err:
1405 /* Undo box->init_box() */
1406 on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
1407 uncore_types_exit(uncore_msr_uncores);
1408 uncore_pci_exit();
1409 cpu_notifier_register_done();
1410 return ret;
1412 device_initcall(intel_uncore_init);