8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / uts / common / os / pg.c
blob835ae3d322c2704cfb5b10e97e380999d3a28e47
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/systm.h>
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/thread.h>
30 #include <sys/cpuvar.h>
31 #include <sys/cpupart.h>
32 #include <sys/kmem.h>
33 #include <sys/cmn_err.h>
34 #include <sys/kstat.h>
35 #include <sys/processor.h>
36 #include <sys/disp.h>
37 #include <sys/group.h>
38 #include <sys/pg.h>
41 * Processor groups
43 * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
44 * it is no longer necessarily true that a given physical processor module
45 * will present itself as a single schedulable entity (cpu_t). Rather, each
46 * chip and/or processor core may present itself as one or more "logical" CPUs.
48 * The logical CPUs presented may share physical components such as caches,
49 * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
50 * kernel be aware of the relationships existing between logical CPUs so that
51 * the appropriate optmizations may be employed.
53 * The processor group abstraction represents a set of logical CPUs that
54 * generally share some sort of physical or characteristic relationship.
56 * In the case of a physical sharing relationship, the CPUs in the group may
57 * share a pipeline, cache or floating point unit. In the case of a logical
58 * relationship, a PG may represent the set of CPUs in a processor set, or the
59 * set of CPUs running at a particular clock speed.
61 * The generic processor group structure, pg_t, contains the elements generic
62 * to a group of CPUs. Depending on the nature of the CPU relationship
63 * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
64 * PG where more specific data is represented.
66 * As an example, a PG representing a PHYSICAL relationship, may be recast to
67 * a pghw_t, where data further describing the hardware sharing relationship
68 * is maintained. See pghw.c and pghw.h for details on physical PGs.
70 * At this time a more specialized casting of a PG representing a LOGICAL
71 * relationship has not been implemented, but the architecture allows for this
72 * in the future.
74 * Processor Group Classes
76 * Processor group consumers may wish to maintain and associate specific
77 * data with the PGs they create. For this reason, a mechanism for creating
78 * class specific PGs exists. Classes may overload the default functions for
79 * creating, destroying, and associating CPUs with PGs, and may also register
80 * class specific callbacks to be invoked when the CPU related system
81 * configuration changes. Class specific data is stored/associated with
82 * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
83 * element of a class specific PG object. In memory, such a structure may look
84 * like:
86 * ----------------------- - - -
87 * | common | | | | <--(pg_t *)
88 * ----------------------- | | -
89 * | HW specific | | | <-----(pghw_t *)
90 * ----------------------- | -
91 * | class specific | | <-------(pg_cmt_t *)
92 * ----------------------- -
94 * Access to the PG class specific data can be had by casting a pointer to
95 * it's class specific view.
98 static pg_t *pg_alloc_default(pg_class_t);
99 static void pg_free_default(pg_t *);
100 static void pg_null_op();
103 * Bootstrap CPU specific PG data
104 * See pg_cpu_bootstrap()
106 static cpu_pg_t bootstrap_pg_data;
109 * Bitset of allocated PG ids (they are sequential)
110 * and the next free id in the set.
112 static bitset_t pg_id_set;
115 * ID space starts from 1 to assume that root has ID 0;
117 static pgid_t pg_id_next = 1;
120 * Default and externed PG ops vectors
122 static struct pg_ops pg_ops_default = {
123 pg_alloc_default, /* alloc */
124 pg_free_default, /* free */
125 NULL, /* cpu_init */
126 NULL, /* cpu_fini */
127 NULL, /* cpu_active */
128 NULL, /* cpu_inactive */
129 NULL, /* cpupart_in */
130 NULL, /* cpupart_out */
131 NULL, /* cpupart_move */
132 NULL, /* cpu_belongs */
133 NULL, /* policy_name */
136 static struct pg_cb_ops pg_cb_ops_default = {
137 pg_null_op, /* thread_swtch */
138 pg_null_op, /* thread_remain */
142 * Class specific PG allocation callbacks
144 #define PG_ALLOC(class) \
145 (pg_classes[class].pgc_ops->alloc ? \
146 pg_classes[class].pgc_ops->alloc() : \
147 pg_classes[pg_default_cid].pgc_ops->alloc())
149 #define PG_FREE(pg) \
150 ((pg)->pg_class->pgc_ops->free ? \
151 (pg)->pg_class->pgc_ops->free(pg) : \
152 pg_classes[pg_default_cid].pgc_ops->free(pg)) \
156 * Class specific PG policy name
158 #define PG_POLICY_NAME(pg) \
159 ((pg)->pg_class->pgc_ops->policy_name ? \
160 (pg)->pg_class->pgc_ops->policy_name(pg) : NULL) \
163 * Class specific membership test callback
165 #define PG_CPU_BELONGS(pg, cp) \
166 ((pg)->pg_class->pgc_ops->cpu_belongs ? \
167 (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0) \
170 * CPU configuration callbacks
172 #define PG_CPU_INIT(class, cp, cpu_pg) \
174 if (pg_classes[class].pgc_ops->cpu_init) \
175 pg_classes[class].pgc_ops->cpu_init(cp, cpu_pg); \
178 #define PG_CPU_FINI(class, cp, cpu_pg) \
180 if (pg_classes[class].pgc_ops->cpu_fini) \
181 pg_classes[class].pgc_ops->cpu_fini(cp, cpu_pg); \
184 #define PG_CPU_ACTIVE(class, cp) \
186 if (pg_classes[class].pgc_ops->cpu_active) \
187 pg_classes[class].pgc_ops->cpu_active(cp); \
190 #define PG_CPU_INACTIVE(class, cp) \
192 if (pg_classes[class].pgc_ops->cpu_inactive) \
193 pg_classes[class].pgc_ops->cpu_inactive(cp); \
197 * CPU / cpupart configuration callbacks
199 #define PG_CPUPART_IN(class, cp, pp) \
201 if (pg_classes[class].pgc_ops->cpupart_in) \
202 pg_classes[class].pgc_ops->cpupart_in(cp, pp); \
205 #define PG_CPUPART_OUT(class, cp, pp) \
207 if (pg_classes[class].pgc_ops->cpupart_out) \
208 pg_classes[class].pgc_ops->cpupart_out(cp, pp); \
211 #define PG_CPUPART_MOVE(class, cp, old, new) \
213 if (pg_classes[class].pgc_ops->cpupart_move) \
214 pg_classes[class].pgc_ops->cpupart_move(cp, old, new); \
219 static pg_class_t *pg_classes;
220 static int pg_nclasses;
222 static pg_cid_t pg_default_cid;
225 * Initialze common PG subsystem.
227 void
228 pg_init(void)
230 extern void pg_cmt_class_init();
231 extern void pg_cmt_cpu_startup();
233 pg_default_cid =
234 pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
237 * Initialize classes to allow them to register with the framework
239 pg_cmt_class_init();
241 pg_cpu0_init();
242 pg_cmt_cpu_startup(CPU);
246 * Perform CPU 0 initialization
248 void
249 pg_cpu0_init(void)
251 extern void pghw_physid_create();
254 * Create the physical ID cache for the boot CPU
256 pghw_physid_create(CPU);
259 * pg_cpu_* require that cpu_lock be held
261 mutex_enter(&cpu_lock);
263 (void) pg_cpu_init(CPU, B_FALSE);
264 pg_cpupart_in(CPU, &cp_default);
265 pg_cpu_active(CPU);
267 mutex_exit(&cpu_lock);
271 * Invoked when topology for CPU0 changes
272 * post pg_cpu0_init().
274 * Currently happens as a result of null_proc_lpa
275 * on Starcat.
277 void
278 pg_cpu0_reinit(void)
280 mutex_enter(&cpu_lock);
281 pg_cpu_inactive(CPU);
282 pg_cpupart_out(CPU, &cp_default);
283 pg_cpu_fini(CPU, NULL);
285 (void) pg_cpu_init(CPU, B_FALSE);
286 pg_cpupart_in(CPU, &cp_default);
287 pg_cpu_active(CPU);
288 mutex_exit(&cpu_lock);
292 * Register a new PG class
294 pg_cid_t
295 pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
297 pg_class_t *newclass;
298 pg_class_t *classes_old;
299 id_t cid;
301 mutex_enter(&cpu_lock);
304 * Allocate a new pg_class_t in the pg_classes array
306 if (pg_nclasses == 0) {
307 pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
308 } else {
309 classes_old = pg_classes;
310 pg_classes =
311 kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
312 KM_SLEEP);
313 (void) kcopy(classes_old, pg_classes,
314 sizeof (pg_class_t) * pg_nclasses);
315 kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
318 cid = pg_nclasses++;
319 newclass = &pg_classes[cid];
321 (void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
322 newclass->pgc_id = cid;
323 newclass->pgc_ops = ops;
324 newclass->pgc_relation = relation;
326 mutex_exit(&cpu_lock);
328 return (cid);
332 * Try to find an existing pg in set in which to place cp.
333 * Returns the pg if found, and NULL otherwise.
334 * In the event that the CPU could belong to multiple
335 * PGs in the set, the first matching PG will be returned.
337 pg_t *
338 pg_cpu_find_pg(cpu_t *cp, group_t *set)
340 pg_t *pg;
341 group_iter_t i;
343 group_iter_init(&i);
344 while ((pg = group_iterate(set, &i)) != NULL) {
346 * Ask the class if the CPU belongs here
348 if (PG_CPU_BELONGS(pg, cp))
349 return (pg);
351 return (NULL);
355 * Iterate over the CPUs in a PG after initializing
356 * the iterator with PG_CPU_ITR_INIT()
358 cpu_t *
359 pg_cpu_next(pg_cpu_itr_t *itr)
361 cpu_t *cpu;
362 pg_t *pg = itr->pg;
364 cpu = group_iterate(&pg->pg_cpus, &itr->position);
365 return (cpu);
369 * Test if a given PG contains a given CPU
371 boolean_t
372 pg_cpu_find(pg_t *pg, cpu_t *cp)
374 if (group_find(&pg->pg_cpus, cp) == (uint_t)-1)
375 return (B_FALSE);
377 return (B_TRUE);
381 * Set the PGs callbacks to the default
383 void
384 pg_callback_set_defaults(pg_t *pg)
386 bcopy(&pg_cb_ops_default, &pg->pg_cb, sizeof (struct pg_cb_ops));
390 * Create a PG of a given class.
391 * This routine may block.
393 pg_t *
394 pg_create(pg_cid_t cid)
396 pg_t *pg;
397 pgid_t id;
399 ASSERT(MUTEX_HELD(&cpu_lock));
402 * Call the class specific PG allocation routine
404 pg = PG_ALLOC(cid);
405 pg->pg_class = &pg_classes[cid];
406 pg->pg_relation = pg->pg_class->pgc_relation;
409 * Find the next free sequential pg id
411 do {
412 if (pg_id_next >= bitset_capacity(&pg_id_set))
413 bitset_resize(&pg_id_set, pg_id_next + 1);
414 id = pg_id_next++;
415 } while (bitset_in_set(&pg_id_set, id));
417 pg->pg_id = id;
418 bitset_add(&pg_id_set, pg->pg_id);
421 * Create the PG's CPU group
423 group_create(&pg->pg_cpus);
426 * Initialize the events ops vector
428 pg_callback_set_defaults(pg);
430 return (pg);
434 * Destroy a PG.
435 * This routine may block.
437 void
438 pg_destroy(pg_t *pg)
440 ASSERT(MUTEX_HELD(&cpu_lock));
442 group_destroy(&pg->pg_cpus);
445 * Unassign the pg_id
447 if (pg_id_next > pg->pg_id)
448 pg_id_next = pg->pg_id;
449 bitset_del(&pg_id_set, pg->pg_id);
452 * Invoke the class specific de-allocation routine
454 PG_FREE(pg);
458 * Add the CPU "cp" to processor group "pg"
459 * This routine may block.
461 void
462 pg_cpu_add(pg_t *pg, cpu_t *cp, cpu_pg_t *cpu_pg)
464 int err;
466 ASSERT(MUTEX_HELD(&cpu_lock));
468 /* This adds the CPU to the PG's CPU group */
469 err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
470 ASSERT(err == 0);
473 * The CPU should be referencing the bootstrap PG data still
474 * at this point, since this routine may block causing us to
475 * enter the dispatcher.
477 ASSERT(pg_cpu_is_bootstrapped(cp));
479 /* This adds the PG to the CPUs PG group */
480 err = group_add(&cpu_pg->pgs, pg, GRP_RESIZE);
481 ASSERT(err == 0);
485 * Remove "cp" from "pg".
486 * This routine may block.
488 void
489 pg_cpu_delete(pg_t *pg, cpu_t *cp, cpu_pg_t *cpu_pg)
491 int err;
493 ASSERT(MUTEX_HELD(&cpu_lock));
495 /* Remove the CPU from the PG */
496 err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
497 ASSERT(err == 0);
500 * The CPU should be referencing the bootstrap PG data still
501 * at this point, since this routine may block causing us to
502 * enter the dispatcher.
504 ASSERT(pg_cpu_is_bootstrapped(cp));
506 /* Remove the PG from the CPU's PG group */
507 err = group_remove(&cpu_pg->pgs, pg, GRP_RESIZE);
508 ASSERT(err == 0);
512 * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
514 static cpu_pg_t *
515 pg_cpu_data_alloc(void)
517 cpu_pg_t *pgd;
519 pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
520 group_create(&pgd->pgs);
521 group_create(&pgd->cmt_pgs);
523 return (pgd);
527 * Free the CPU's PG data.
529 static void
530 pg_cpu_data_free(cpu_pg_t *pgd)
532 group_destroy(&pgd->pgs);
533 group_destroy(&pgd->cmt_pgs);
534 kmem_free(pgd, sizeof (cpu_pg_t));
538 * Called when either a new CPU is coming into the system (either
539 * via booting or DR) or when the CPU's PG data is being recalculated.
540 * Allocate its PG data, and notify all registered classes about
541 * the new CPU.
543 * If "deferred_init" is B_TRUE, the CPU's PG data will be allocated
544 * and returned, but the "bootstrap" structure will be left in place.
545 * The deferred_init option is used when all CPUs in the system are
546 * using the bootstrap structure as part of the process of recalculating
547 * all PG data. The caller must replace the bootstrap structure with the
548 * allocated PG data before pg_cpu_active is called.
550 * This routine may block.
552 cpu_pg_t *
553 pg_cpu_init(cpu_t *cp, boolean_t deferred_init)
555 pg_cid_t i;
556 cpu_pg_t *cpu_pg;
558 ASSERT(MUTEX_HELD(&cpu_lock));
561 * Allocate and size the per CPU pg data
563 * The CPU's PG data will be populated by the various
564 * PG classes during the invocation of the PG_CPU_INIT()
565 * callback below.
567 * Since the we could block and enter the dispatcher during
568 * this process, the CPU will continue to reference the bootstrap
569 * PG data until all the initialization completes.
571 ASSERT(pg_cpu_is_bootstrapped(cp));
573 cpu_pg = pg_cpu_data_alloc();
576 * Notify all registered classes about the new CPU
578 for (i = 0; i < pg_nclasses; i++)
579 PG_CPU_INIT(i, cp, cpu_pg);
582 * The CPU's PG data is now ready to use.
584 if (deferred_init == B_FALSE)
585 cp->cpu_pg = cpu_pg;
587 return (cpu_pg);
591 * Either this CPU is being deleted from the system or its PG data is
592 * being recalculated. Notify the classes and free up the CPU's PG data.
594 * If "cpu_pg_deferred" is non-NULL, it points to the CPU's PG data and
595 * serves to indicate that this CPU is already using the bootstrap
596 * stucture. Used as part of the process to recalculate the PG data for
597 * all CPUs in the system.
599 void
600 pg_cpu_fini(cpu_t *cp, cpu_pg_t *cpu_pg_deferred)
602 pg_cid_t i;
603 cpu_pg_t *cpu_pg;
605 ASSERT(MUTEX_HELD(&cpu_lock));
607 if (cpu_pg_deferred == NULL) {
608 cpu_pg = cp->cpu_pg;
611 * This can happen if the CPU coming into the system
612 * failed to power on.
614 if (cpu_pg == NULL || pg_cpu_is_bootstrapped(cp))
615 return;
618 * Have the CPU reference the bootstrap PG data to survive
619 * the dispatcher should it block from here on out.
621 pg_cpu_bootstrap(cp);
622 } else {
623 ASSERT(pg_cpu_is_bootstrapped(cp));
624 cpu_pg = cpu_pg_deferred;
627 for (i = 0; i < pg_nclasses; i++)
628 PG_CPU_FINI(i, cp, cpu_pg);
630 pg_cpu_data_free(cpu_pg);
634 * This CPU is becoming active (online)
635 * This routine may not block as it is called from paused CPUs
636 * context.
638 void
639 pg_cpu_active(cpu_t *cp)
641 pg_cid_t i;
643 ASSERT(MUTEX_HELD(&cpu_lock));
646 * Notify all registered classes about the new CPU
648 for (i = 0; i < pg_nclasses; i++)
649 PG_CPU_ACTIVE(i, cp);
653 * This CPU is going inactive (offline)
654 * This routine may not block, as it is called from paused
655 * CPUs context.
657 void
658 pg_cpu_inactive(cpu_t *cp)
660 pg_cid_t i;
662 ASSERT(MUTEX_HELD(&cpu_lock));
665 * Notify all registered classes about the new CPU
667 for (i = 0; i < pg_nclasses; i++)
668 PG_CPU_INACTIVE(i, cp);
672 * Invoked when the CPU is about to move into the partition
673 * This routine may block.
675 void
676 pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
678 int i;
680 ASSERT(MUTEX_HELD(&cpu_lock));
683 * Notify all registered classes that the
684 * CPU is about to enter the CPU partition
686 for (i = 0; i < pg_nclasses; i++)
687 PG_CPUPART_IN(i, cp, pp);
691 * Invoked when the CPU is about to move out of the partition
692 * This routine may block.
694 /*ARGSUSED*/
695 void
696 pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
698 int i;
700 ASSERT(MUTEX_HELD(&cpu_lock));
703 * Notify all registered classes that the
704 * CPU is about to leave the CPU partition
706 for (i = 0; i < pg_nclasses; i++)
707 PG_CPUPART_OUT(i, cp, pp);
711 * Invoked when the CPU is *moving* partitions.
713 * This routine may not block, as it is called from paused CPUs
714 * context.
716 void
717 pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
719 int i;
721 ASSERT(MUTEX_HELD(&cpu_lock));
724 * Notify all registered classes that the
725 * CPU is about to leave the CPU partition
727 for (i = 0; i < pg_nclasses; i++)
728 PG_CPUPART_MOVE(i, cp, oldpp, newpp);
732 * Return a class specific string describing a policy implemented
733 * across this PG
735 char *
736 pg_policy_name(pg_t *pg)
738 char *str;
739 if ((str = PG_POLICY_NAME(pg)) != NULL)
740 return (str);
742 return ("N/A");
746 * Provide the specified CPU a bootstrap pg
747 * This is needed to allow sane behaviour if any PG consuming
748 * code needs to deal with a partially initialized CPU
750 void
751 pg_cpu_bootstrap(cpu_t *cp)
753 cp->cpu_pg = &bootstrap_pg_data;
757 * Return non-zero if the specified CPU is bootstrapped,
758 * which means it's CPU specific PG data has not yet been
759 * fully constructed.
762 pg_cpu_is_bootstrapped(cpu_t *cp)
764 return (cp->cpu_pg == &bootstrap_pg_data);
767 /*ARGSUSED*/
768 static pg_t *
769 pg_alloc_default(pg_class_t class)
771 return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
774 /*ARGSUSED*/
775 static void
776 pg_free_default(struct pg *pg)
778 kmem_free(pg, sizeof (pg_t));
781 static void
782 pg_null_op()
787 * Invoke the "thread switch" callback for each of the CPU's PGs
788 * This is invoked from the dispatcher swtch() routine, which is called
789 * when a thread running an a CPU should switch to another thread.
790 * "cp" is the CPU on which the thread switch is happening
791 * "now" is an unscaled hrtime_t timestamp taken in swtch()
792 * "old" and "new" are the outgoing and incoming threads, respectively.
794 void
795 pg_ev_thread_swtch(struct cpu *cp, hrtime_t now, kthread_t *old, kthread_t *new)
797 int i, sz;
798 group_t *grp;
799 pg_t *pg;
801 grp = &cp->cpu_pg->pgs;
802 sz = GROUP_SIZE(grp);
803 for (i = 0; i < sz; i++) {
804 pg = GROUP_ACCESS(grp, i);
805 pg->pg_cb.thread_swtch(pg, cp, now, old, new);
810 * Invoke the "thread remain" callback for each of the CPU's PGs.
811 * This is called from the dispatcher's swtch() routine when a thread
812 * running on the CPU "cp" is switching to itself, which can happen as an
813 * artifact of the thread's timeslice expiring.
815 void
816 pg_ev_thread_remain(struct cpu *cp, kthread_t *t)
818 int i, sz;
819 group_t *grp;
820 pg_t *pg;
822 grp = &cp->cpu_pg->pgs;
823 sz = GROUP_SIZE(grp);
824 for (i = 0; i < sz; i++) {
825 pg = GROUP_ACCESS(grp, i);
826 pg->pg_cb.thread_remain(pg, cp, t);