4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * DR memory support routines.
32 #include <sys/debug.h>
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/param.h>
36 #include <sys/dditypes.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/ddi_impldefs.h>
43 #include <sys/ndi_impldefs.h>
44 #include <sys/sysmacros.h>
45 #include <sys/machsystm.h>
46 #include <sys/spitregs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/promif.h>
49 #include <vm/seg_kmem.h>
51 #include <sys/platform_module.h>
56 #include <sys/dr_util.h>
58 extern struct memlist
*phys_install
;
60 /* TODO: push this reference below drmach line */
63 /* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
64 static char *dr_ie_fmt
= "dr_mem.c %d";
66 static int dr_post_detach_mem_unit(dr_mem_unit_t
*mp
);
67 static int dr_reserve_mem_spans(memhandle_t
*mhp
, struct memlist
*mlist
);
68 static int dr_select_mem_target(dr_handle_t
*hp
, dr_mem_unit_t
*mp
,
70 static void dr_init_mem_unit_data(dr_mem_unit_t
*mp
);
72 static int memlist_canfit(struct memlist
*s_mlist
,
73 struct memlist
*t_mlist
);
76 * dr_mem_unit_t.sbm_flags
78 #define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */
79 #define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */
80 #define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */
81 #define DR_MFLAG_MEMUPSIZE 0x08 /* move from big to small board */
82 #define DR_MFLAG_MEMDOWNSIZE 0x10 /* move from small to big board */
83 #define DR_MFLAG_MEMRESIZE 0x18 /* move to different size board */
84 #define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */
85 #define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */
88 #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
89 #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
91 static struct memlist
*
92 dr_get_memlist(dr_mem_unit_t
*mp
)
94 struct memlist
*mlist
= NULL
;
96 static fn_t f
= "dr_get_memlist";
98 PR_MEM("%s for %s...\n", f
, mp
->sbm_cm
.sbdev_path
);
101 * Return cached memlist, if present.
102 * This memlist will be present following an
103 * unconfigure (a.k.a: detach) of this memunit.
104 * It should only be used in the case were a configure
105 * is bringing this memunit back in without going
106 * through the disconnect and connect states.
109 PR_MEM("%s: found cached memlist\n", f
);
111 mlist
= memlist_dup(mp
->sbm_mlist
);
113 uint64_t basepa
= _ptob64(mp
->sbm_basepfn
);
115 /* attempt to construct a memlist using phys_install */
117 /* round down to slice base address */
118 basepa
&= ~(mp
->sbm_slice_size
- 1);
120 /* get a copy of phys_install to edit */
122 mlist
= memlist_dup(phys_install
);
123 memlist_read_unlock();
125 /* trim lower irrelevant span */
127 mlist
= memlist_del_span(mlist
, 0ull, basepa
);
129 /* trim upper irrelevant span */
133 basepa
+= mp
->sbm_slice_size
;
134 endpa
= _ptob64(physmax
+ 1);
136 mlist
= memlist_del_span(
143 /* successfully built a memlist */
144 PR_MEM("%s: derived memlist from phys_install\n", f
);
147 /* if no mlist yet, try platform layer */
149 err
= drmach_mem_get_memlist(
150 mp
->sbm_cm
.sbdev_id
, &mlist
);
152 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
153 mlist
= NULL
; /* paranoia */
158 PR_MEM("%s: memlist for %s\n", f
, mp
->sbm_cm
.sbdev_path
);
159 PR_MEMLIST_DUMP(mlist
);
169 } dr_release_mem_sync_t
;
172 * Memory has been logically removed by the time this routine is called.
175 dr_mem_del_done(void *arg
, int error
)
177 dr_release_mem_sync_t
*ds
= arg
;
179 mutex_enter(&ds
->lock
);
182 cv_signal(&ds
->cond
);
183 mutex_exit(&ds
->lock
);
187 * When we reach here the memory being drained should have
188 * already been reserved in dr_pre_release_mem().
189 * Our only task here is to kick off the "drain" and wait
193 dr_release_mem(dr_common_unit_t
*cp
)
195 dr_mem_unit_t
*mp
= (dr_mem_unit_t
*)cp
;
197 dr_release_mem_sync_t rms
;
198 static fn_t f
= "dr_release_mem";
200 /* check that this memory unit has been reserved */
201 if (!(mp
->sbm_flags
& DR_MFLAG_RELOWNER
)) {
202 DR_DEV_INTERNAL_ERROR(&mp
->sbm_cm
);
206 bzero((void *) &rms
, sizeof (rms
));
208 mutex_init(&rms
.lock
, NULL
, MUTEX_DRIVER
, NULL
);
209 cv_init(&rms
.cond
, NULL
, CV_DRIVER
, NULL
);
211 mutex_enter(&rms
.lock
);
212 err
= kphysm_del_start(mp
->sbm_memhandle
, dr_mem_del_done
,
214 if (err
== KPHYSM_OK
) {
215 /* wait for completion or interrupt */
217 if (cv_wait_sig(&rms
.cond
, &rms
.lock
) == 0) {
218 /* then there is a pending UNIX signal */
219 (void) kphysm_del_cancel(mp
->sbm_memhandle
);
221 /* wait for completion */
223 cv_wait(&rms
.cond
, &rms
.lock
);
226 /* get the result of the memory delete operation */
229 mutex_exit(&rms
.lock
);
231 cv_destroy(&rms
.cond
);
232 mutex_destroy(&rms
.lock
);
234 if (err
!= KPHYSM_OK
) {
239 e_code
= ESBD_NOERROR
;
243 case KPHYSM_ESEQUENCE
:
244 e_code
= ESBD_INTERNAL
;
247 case KPHYSM_ENOTVIABLE
:
248 e_code
= ESBD_MEM_NOTVIABLE
;
251 case KPHYSM_EREFUSED
:
252 e_code
= ESBD_MEM_REFUSED
;
255 case KPHYSM_ENONRELOC
:
256 e_code
= ESBD_MEM_NONRELOC
;
259 case KPHYSM_ECANCELLED
:
260 e_code
= ESBD_MEM_CANCELLED
;
263 case KPHYSM_ERESOURCE
:
264 e_code
= ESBD_MEMFAIL
;
269 "%s: unexpected kphysm error code %d,"
271 f
, err
, mp
->sbm_cm
.sbdev_id
);
277 if (e_code
!= ESBD_NOERROR
) {
278 dr_dev_err(CE_IGNORE
, &mp
->sbm_cm
, e_code
);
284 dr_attach_mem(dr_handle_t
*hp
, dr_common_unit_t
*cp
)
288 dr_mem_unit_t
*mp
= (dr_mem_unit_t
*)cp
;
289 struct memlist
*ml
, *mc
;
291 static fn_t f
= "dr_attach_mem";
293 PR_MEM("%s...\n", f
);
295 dr_lock_status(hp
->h_bd
);
296 err
= drmach_configure(cp
->sbdev_id
, 0);
297 dr_unlock_status(hp
->h_bd
);
299 DRERR_SET_C(&cp
->sbdev_error
, &err
);
303 ml
= dr_get_memlist(mp
);
304 for (mc
= ml
; mc
; mc
= mc
->ml_next
) {
308 rv
= kphysm_add_memory_dynamic(
309 (pfn_t
)(mc
->ml_address
>> PAGESHIFT
),
310 (pgcnt_t
)(mc
->ml_size
>> PAGESHIFT
));
311 if (rv
!= KPHYSM_OK
) {
313 * translate kphysm error and
314 * store in devlist error
317 case KPHYSM_ERESOURCE
:
330 if (rv
== ESBD_INTERNAL
) {
331 DR_DEV_INTERNAL_ERROR(&mp
->sbm_cm
);
333 dr_dev_err(CE_WARN
, &mp
->sbm_cm
, rv
);
337 err
= drmach_mem_add_span(
338 mp
->sbm_cm
.sbdev_id
, mc
->ml_address
, mc
->ml_size
);
340 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
347 /* back out if configure failed */
348 if (mp
->sbm_cm
.sbdev_error
!= NULL
) {
349 dr_lock_status(hp
->h_bd
);
350 err
= drmach_unconfigure(cp
->sbdev_id
,
351 DEVI_BRANCH_DESTROY
);
354 dr_unlock_status(hp
->h_bd
);
358 #define DR_SCRUB_VALUE 0x0d0e0a0d0b0e0e0fULL
361 dr_mem_ecache_scrub(dr_mem_unit_t
*mp
, struct memlist
*mlist
)
364 clock_t stime
= ddi_get_lbolt();
368 uint64_t scrub_value
= DR_SCRUB_VALUE
;
370 static fn_t f
= "dr_mem_ecache_scrub";
372 cpuid
= drmach_mem_cpu_affinity(mp
->sbm_cm
.sbdev_id
);
375 PR_MEM("%s: using proc %d, memlist...\n", f
,
376 (cpuid
== CPU_CURRENT
) ? CPU
->cpu_id
: cpuid
);
377 PR_MEMLIST_DUMP(mlist
);
379 for (ml
= mlist
; ml
; ml
= ml
->ml_next
) {
383 /* calculate the destination physical address */
384 dst_pa
= ml
->ml_address
;
385 if (ml
->ml_address
& PAGEOFFSET
)
387 "%s: address (0x%lx) not on "
388 "page boundary", f
, ml
->ml_address
);
390 nbytes
= ml
->ml_size
;
391 if (ml
->ml_size
& PAGEOFFSET
)
393 "%s: size (0x%lx) not on "
394 "page boundary", f
, ml
->ml_size
);
398 /* write 64 bits to dst_pa */
399 stdphys(dst_pa
, scrub_value
);
401 /* increment/decrement by cacheline sizes */
402 dst_pa
+= DRMACH_COHERENCY_UNIT
;
403 nbytes
-= DRMACH_COHERENCY_UNIT
;
408 * flush this cpu's ecache and take care to ensure
409 * that all of it's bus transactions have retired.
411 drmach_cpu_flush_ecache_sync();
416 stime
= ddi_get_lbolt() - stime
;
417 PR_MEM("%s: scrub ticks = %ld (%ld secs)\n", f
, stime
, stime
/ hz
);
422 dr_move_memory(dr_handle_t
*hp
, dr_mem_unit_t
*s_mp
, dr_mem_unit_t
*t_mp
)
426 dr_sr_handle_t
*srhp
;
427 struct memlist
*c_ml
, *d_ml
;
429 static fn_t f
= "dr_move_memory";
431 PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
433 s_mp
->sbm_cm
.sbdev_path
,
434 t_mp
->sbm_cm
.sbdev_path
);
436 ASSERT(s_mp
->sbm_flags
& DR_MFLAG_SOURCE
);
437 ASSERT(s_mp
->sbm_peer
== t_mp
);
438 ASSERT(s_mp
->sbm_mlist
);
440 ASSERT(t_mp
->sbm_flags
& DR_MFLAG_TARGET
);
441 ASSERT(t_mp
->sbm_peer
== s_mp
);
444 * create a memlist of spans to copy by removing
445 * the spans that have been deleted, if any, from
446 * the full source board memlist. s_mp->sbm_del_mlist
447 * will be NULL if there were no spans deleted from
450 c_ml
= memlist_dup(s_mp
->sbm_mlist
);
451 d_ml
= s_mp
->sbm_del_mlist
;
452 while (d_ml
!= NULL
) {
453 c_ml
= memlist_del_span(c_ml
, d_ml
->ml_address
, d_ml
->ml_size
);
454 d_ml
= d_ml
->ml_next
;
457 affinity_set(drmach_mem_cpu_affinity(t_mp
->sbm_cm
.sbdev_id
));
459 err
= drmach_copy_rename_init(
460 t_mp
->sbm_cm
.sbdev_id
, _ptob64(t_mp
->sbm_slice_offset
),
461 s_mp
->sbm_cm
.sbdev_id
, c_ml
, &cr_id
);
463 DRERR_SET_C(&s_mp
->sbm_cm
.sbdev_error
, &err
);
468 srhp
= dr_get_sr_handle(hp
);
471 copytime
= ddi_get_lbolt();
473 /* Quiesce the OS. */
474 if (dr_suspend(srhp
)) {
475 cmn_err(CE_WARN
, "%s: failed to quiesce OS"
476 " for copy-rename", f
);
478 dr_release_sr_handle(srhp
);
479 err
= drmach_copy_rename_fini(cr_id
);
482 * no error is expected since the program has
486 /* catch this in debug kernels */
492 /* suspend error reached via hp */
493 s_mp
->sbm_cm
.sbdev_error
= hp
->h_err
;
501 * Rename memory for lgroup.
502 * Source and target board numbers are packaged in arg.
505 dr_board_t
*t_bp
, *s_bp
;
507 s_bp
= s_mp
->sbm_cm
.sbdev_bp
;
508 t_bp
= t_mp
->sbm_cm
.sbdev_bp
;
510 lgrp_plat_config(LGRP_CONFIG_MEM_RENAME
,
511 (uintptr_t)(s_bp
->b_num
| (t_bp
->b_num
<< 16)));
514 drmach_copy_rename(cr_id
);
519 copytime
= ddi_get_lbolt() - copytime
;
521 dr_release_sr_handle(srhp
);
522 err
= drmach_copy_rename_fini(cr_id
);
524 DRERR_SET_C(&s_mp
->sbm_cm
.sbdev_error
, &err
);
528 PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
529 f
, copytime
, copytime
/ hz
);
531 /* return -1 if dr_suspend or copy/rename recorded an error */
532 return (err
== NULL
? 0 : -1);
536 * If detaching node contains memory that is "non-permanent"
537 * then the memory adr's are simply cleared. If the memory
538 * is non-relocatable, then do a copy-rename.
541 dr_detach_mem(dr_handle_t
*hp
, dr_common_unit_t
*cp
)
544 dr_mem_unit_t
*s_mp
= (dr_mem_unit_t
*)cp
;
547 static fn_t f
= "dr_detach_mem";
549 PR_MEM("%s...\n", f
);
551 /* lookup target mem unit and target board structure, if any */
552 if (s_mp
->sbm_flags
& DR_MFLAG_SOURCE
) {
553 t_mp
= s_mp
->sbm_peer
;
554 ASSERT(t_mp
!= NULL
);
555 ASSERT(t_mp
->sbm_peer
== s_mp
);
560 /* verify mem unit's state is UNREFERENCED */
561 state
= s_mp
->sbm_cm
.sbdev_state
;
562 if (state
!= DR_STATE_UNREFERENCED
) {
563 dr_dev_err(CE_IGNORE
, &s_mp
->sbm_cm
, ESBD_STATE
);
567 /* verify target mem unit's state is UNREFERENCED, if any */
569 state
= t_mp
->sbm_cm
.sbdev_state
;
570 if (state
!= DR_STATE_UNREFERENCED
) {
571 dr_dev_err(CE_IGNORE
, &t_mp
->sbm_cm
, ESBD_STATE
);
577 * Scrub deleted memory. This will cause all cachelines
578 * referencing the memory to only be in the local cpu's
581 if (s_mp
->sbm_flags
& DR_MFLAG_RELDONE
) {
582 /* no del mlist for src<=dst mem size copy/rename */
583 if (s_mp
->sbm_del_mlist
)
584 dr_mem_ecache_scrub(s_mp
, s_mp
->sbm_del_mlist
);
586 if (t_mp
!= NULL
&& (t_mp
->sbm_flags
& DR_MFLAG_RELDONE
)) {
587 ASSERT(t_mp
->sbm_del_mlist
);
588 dr_mem_ecache_scrub(t_mp
, t_mp
->sbm_del_mlist
);
592 * If there is no target board (no copy/rename was needed), then
598 * Reprogram interconnect hardware and disable
599 * memory controllers for memory node that's going away.
602 err
= drmach_mem_disable(s_mp
->sbm_cm
.sbdev_id
);
604 DRERR_SET_C(&s_mp
->sbm_cm
.sbdev_error
, &err
);
608 rv
= dr_move_memory(hp
, s_mp
, t_mp
);
609 PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
611 rv
? "FAILED" : "COMPLETED",
612 s_mp
->sbm_cm
.sbdev_bp
->b_num
,
613 t_mp
->sbm_cm
.sbdev_bp
->b_num
);
616 (void) dr_cancel_mem(s_mp
);
622 dr_lock_status(hp
->h_bd
);
623 err
= drmach_unconfigure(s_mp
->sbm_cm
.sbdev_id
,
624 DEVI_BRANCH_DESTROY
);
625 dr_unlock_status(hp
->h_bd
);
633 * XXX workaround for certain lab configurations (see also starcat drmach.c)
634 * Temporary code to get around observed incorrect results from
635 * kphysm_del_span_query when the queried span contains address spans
636 * not occupied by memory in between spans that do have memory.
637 * This routine acts as a wrapper to kphysm_del_span_query. It builds
638 * a memlist from phys_install of spans that exist between base and
639 * base + npages, inclusively. Kphysm_del_span_query is called for each
640 * node in the memlist with the results accumulated in *mp.
643 dr_del_span_query(pfn_t base
, pgcnt_t npages
, memquery_t
*mp
)
645 uint64_t pa
= _ptob64(base
);
646 uint64_t sm
= ~ (137438953472ull - 1);
647 uint64_t sa
= pa
& sm
;
648 struct memlist
*mlist
, *ml
;
651 npages
= npages
; /* silence lint */
653 mlist
= memlist_dup(phys_install
);
654 memlist_read_unlock();
657 for (ml
= mlist
; ml
; ml
= ml
->ml_next
) {
658 if ((ml
->ml_address
& sm
) != sa
) {
659 mlist
= memlist_del_span(mlist
,
660 ml
->ml_address
, ml
->ml_size
);
667 mp
->nonrelocatable
= 0;
668 mp
->first_nonrelocatable
= (pfn_t
)-1; /* XXX */
669 mp
->last_nonrelocatable
= 0;
671 for (ml
= mlist
; ml
; ml
= ml
->ml_next
) {
674 rv
= kphysm_del_span_query(
675 _b64top(ml
->ml_address
), _b64top(ml
->ml_size
), &mq
);
679 mp
->phys_pages
+= mq
.phys_pages
;
680 mp
->managed
+= mq
.managed
;
681 mp
->nonrelocatable
+= mq
.nonrelocatable
;
683 if (mq
.nonrelocatable
!= 0) {
684 if (mq
.first_nonrelocatable
< mp
->first_nonrelocatable
)
685 mp
->first_nonrelocatable
=
686 mq
.first_nonrelocatable
;
687 if (mq
.last_nonrelocatable
> mp
->last_nonrelocatable
)
688 mp
->last_nonrelocatable
=
689 mq
.last_nonrelocatable
;
693 if (mp
->nonrelocatable
== 0)
694 mp
->first_nonrelocatable
= 0; /* XXX */
696 memlist_delete(mlist
);
700 #define kphysm_del_span_query dr_del_span_query
701 #endif /* _STARFIRE */
704 * NOTE: This routine is only partially smart about multiple
705 * mem-units. Need to make mem-status structure smart
709 dr_mem_status(dr_handle_t
*hp
, dr_devset_t devset
, sbd_dev_stat_t
*dsp
)
717 static fn_t f
= "dr_mem_status";
720 devset
&= DR_DEVS_PRESENT(bp
);
722 for (m
= mix
= 0; m
< MAX_MEM_UNITS_PER_BOARD
; m
++) {
725 drmach_status_t pstat
;
728 if (DEVSET_IN_SET(devset
, SBD_COMP_MEM
, m
) == 0)
731 mp
= dr_get_mem_unit(bp
, m
);
733 if (mp
->sbm_cm
.sbdev_state
== DR_STATE_EMPTY
) {
734 /* present, but not fully initialized */
738 if (mp
->sbm_cm
.sbdev_id
== (drmachid_t
)0)
741 /* fetch platform status */
742 err
= drmach_status(mp
->sbm_cm
.sbdev_id
, &pstat
);
744 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
749 bzero((caddr_t
)msp
, sizeof (*msp
));
751 (void) strncpy(msp
->ms_cm
.c_id
.c_name
, pstat
.type
,
752 sizeof (msp
->ms_cm
.c_id
.c_name
));
753 msp
->ms_cm
.c_id
.c_type
= mp
->sbm_cm
.sbdev_type
;
754 msp
->ms_cm
.c_id
.c_unit
= SBD_NULL_UNIT
;
755 msp
->ms_cm
.c_cond
= mp
->sbm_cm
.sbdev_cond
;
756 msp
->ms_cm
.c_busy
= mp
->sbm_cm
.sbdev_busy
| pstat
.busy
;
757 msp
->ms_cm
.c_time
= mp
->sbm_cm
.sbdev_time
;
758 msp
->ms_cm
.c_ostate
= mp
->sbm_cm
.sbdev_ostate
;
760 msp
->ms_totpages
= mp
->sbm_npages
;
761 msp
->ms_basepfn
= mp
->sbm_basepfn
;
762 msp
->ms_pageslost
= mp
->sbm_pageslost
;
763 msp
->ms_cage_enabled
= kcage_on
;
765 if (mp
->sbm_flags
& DR_MFLAG_RESERVED
)
771 msp
->ms_peer_is_target
= 0;
772 msp
->ms_peer_ap_id
[0] = '\0';
773 } else if (p_mp
->sbm_flags
& DR_MFLAG_RESERVED
) {
774 char *path
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
778 * b_dip doesn't have to be held for ddi_pathname()
779 * because the board struct (dr_board_t) will be
780 * destroyed before b_dip detaches.
782 (void) ddi_pathname(bp
->b_dip
, path
);
783 minor
= strchr(p_mp
->sbm_cm
.sbdev_path
, ':');
785 (void) snprintf(msp
->ms_peer_ap_id
,
786 sizeof (msp
->ms_peer_ap_id
), "%s%s",
787 path
, (minor
== NULL
) ? "" : minor
);
789 kmem_free(path
, MAXPATHLEN
);
791 if (p_mp
->sbm_flags
& DR_MFLAG_TARGET
)
792 msp
->ms_peer_is_target
= 1;
795 if (mp
->sbm_flags
& DR_MFLAG_RELOWNER
)
796 rv
= kphysm_del_status(mp
->sbm_memhandle
, &mdst
);
798 rv
= KPHYSM_EHANDLE
; /* force 'if' to fail */
800 if (rv
== KPHYSM_OK
) {
802 * Any pages above managed is "free",
803 * i.e. it's collected.
805 msp
->ms_detpages
+= (uint_t
)(mdst
.collected
+
806 mdst
.phys_pages
- mdst
.managed
);
809 * If we're UNREFERENCED or UNCONFIGURED,
810 * then the number of detached pages is
811 * however many pages are on the board.
812 * I.e. detached = not in use by OS.
814 switch (msp
->ms_cm
.c_ostate
) {
816 * changed to use cfgadm states
819 * case DR_STATE_UNREFERENCED:
820 * case DR_STATE_UNCONFIGURED:
822 case SBD_STAT_UNCONFIGURED
:
823 msp
->ms_detpages
= msp
->ms_totpages
;
832 * kphysm_del_span_query can report non-reloc pages = total
833 * pages for memory that is not yet configured
835 if (mp
->sbm_cm
.sbdev_state
!= DR_STATE_UNCONFIGURED
) {
837 rv
= kphysm_del_span_query(mp
->sbm_basepfn
,
838 mp
->sbm_npages
, &mq
);
840 if (rv
== KPHYSM_OK
) {
841 msp
->ms_managed_pages
= mq
.managed
;
842 msp
->ms_noreloc_pages
= mq
.nonrelocatable
;
843 msp
->ms_noreloc_first
=
844 mq
.first_nonrelocatable
;
845 msp
->ms_noreloc_last
=
846 mq
.last_nonrelocatable
;
847 msp
->ms_cm
.c_sflags
= 0;
848 if (mq
.nonrelocatable
) {
849 SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE
,
850 msp
->ms_cm
.c_sflags
);
853 PR_MEM("%s: kphysm_del_span_query() = %d\n",
859 * Check source unit state during copy-rename
861 if ((mp
->sbm_flags
& DR_MFLAG_SOURCE
) &&
862 (mp
->sbm_cm
.sbdev_state
== DR_STATE_UNREFERENCED
||
863 mp
->sbm_cm
.sbdev_state
== DR_STATE_RELEASE
))
864 msp
->ms_cm
.c_ostate
= SBD_STAT_CONFIGURED
;
874 dr_pre_attach_mem(dr_handle_t
*hp
, dr_common_unit_t
**devlist
, int devnum
)
881 static fn_t f
= "dr_pre_attach_mem";
883 PR_MEM("%s...\n", f
);
885 for (d
= 0; d
< devnum
; d
++) {
886 dr_mem_unit_t
*mp
= (dr_mem_unit_t
*)devlist
[d
];
889 cmn_err(CE_CONT
, "OS configure %s", mp
->sbm_cm
.sbdev_path
);
891 state
= mp
->sbm_cm
.sbdev_state
;
893 case DR_STATE_UNCONFIGURED
:
894 PR_MEM("%s: recovering from UNCONFIG for %s\n",
896 mp
->sbm_cm
.sbdev_path
);
898 /* use memlist cached by dr_post_detach_mem_unit */
899 ASSERT(mp
->sbm_mlist
!= NULL
);
900 PR_MEM("%s: re-configuring cached memlist for %s:\n",
901 f
, mp
->sbm_cm
.sbdev_path
);
902 PR_MEMLIST_DUMP(mp
->sbm_mlist
);
904 /* kphysm del handle should be have been freed */
905 ASSERT((mp
->sbm_flags
& DR_MFLAG_RELOWNER
) == 0);
909 case DR_STATE_CONNECTED
:
910 PR_MEM("%s: reprogramming mem hardware on %s\n",
911 f
, mp
->sbm_cm
.sbdev_bp
->b_path
);
913 PR_MEM("%s: enabling %s\n",
914 f
, mp
->sbm_cm
.sbdev_path
);
916 err
= drmach_mem_enable(mp
->sbm_cm
.sbdev_id
);
918 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
924 dr_dev_err(CE_WARN
, &mp
->sbm_cm
, ESBD_STATE
);
929 /* exit for loop if error encountered */
934 return (err_flag
? -1 : 0);
938 dr_post_attach_mem(dr_handle_t
*hp
, dr_common_unit_t
**devlist
, int devnum
)
943 static fn_t f
= "dr_post_attach_mem";
945 PR_MEM("%s...\n", f
);
947 for (d
= 0; d
< devnum
; d
++) {
948 dr_mem_unit_t
*mp
= (dr_mem_unit_t
*)devlist
[d
];
949 struct memlist
*mlist
, *ml
;
951 mlist
= dr_get_memlist(mp
);
953 dr_dev_err(CE_WARN
, &mp
->sbm_cm
, ESBD_MEMFAIL
);
958 * Verify the memory really did successfully attach
959 * by checking for its existence in phys_install.
962 if (memlist_intersect(phys_install
, mlist
) == 0) {
963 memlist_read_unlock();
965 DR_DEV_INTERNAL_ERROR(&mp
->sbm_cm
);
967 PR_MEM("%s: %s memlist not in phys_install",
968 f
, mp
->sbm_cm
.sbdev_path
);
970 memlist_delete(mlist
);
973 memlist_read_unlock();
975 for (ml
= mlist
; ml
!= NULL
; ml
= ml
->ml_next
) {
978 err
= drmach_mem_add_span(
983 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
986 memlist_delete(mlist
);
989 * Destroy cached memlist, if any.
990 * There will be a cached memlist in sbm_mlist if
991 * this board is being configured directly after
993 * To support this transition, dr_post_detach_mem
994 * left a copy of the last known memlist in sbm_mlist.
995 * This memlist could differ from any derived from
996 * hardware if while this memunit was last configured
997 * the system detected and deleted bad pages from
998 * phys_install. The location of those bad pages
999 * will be reflected in the cached memlist.
1001 if (mp
->sbm_mlist
) {
1002 memlist_delete(mp
->sbm_mlist
);
1003 mp
->sbm_mlist
= NULL
;
1007 * TODO: why is this call to dr_init_mem_unit_data here?
1008 * this has been done at discovery or connect time, so this is
1009 * probably redundant and unnecessary.
1011 dr_init_mem_unit_data(mp
);
1018 dr_pre_detach_mem(dr_handle_t
*hp
, dr_common_unit_t
**devlist
, int devnum
)
1020 _NOTE(ARGUNUSED(hp
))
1024 for (d
= 0; d
< devnum
; d
++) {
1025 dr_mem_unit_t
*mp
= (dr_mem_unit_t
*)devlist
[d
];
1027 cmn_err(CE_CONT
, "OS unconfigure %s", mp
->sbm_cm
.sbdev_path
);
1035 dr_post_detach_mem(dr_handle_t
*hp
, dr_common_unit_t
**devlist
, int devnum
)
1037 _NOTE(ARGUNUSED(hp
))
1040 static fn_t f
= "dr_post_detach_mem";
1042 PR_MEM("%s...\n", f
);
1045 for (d
= 0; d
< devnum
; d
++) {
1046 dr_mem_unit_t
*mp
= (dr_mem_unit_t
*)devlist
[d
];
1048 ASSERT(mp
->sbm_cm
.sbdev_bp
== hp
->h_bd
);
1050 if (dr_post_detach_mem_unit(mp
))
1058 dr_add_memory_spans(dr_mem_unit_t
*mp
, struct memlist
*ml
)
1060 static fn_t f
= "dr_add_memory_spans";
1063 PR_MEMLIST_DUMP(ml
);
1066 memlist_read_lock();
1067 if (memlist_intersect(phys_install
, ml
)) {
1068 PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f
);
1070 memlist_read_unlock();
1073 for (; ml
; ml
= ml
->ml_next
) {
1079 base
= _b64top(ml
->ml_address
);
1080 npgs
= _b64top(ml
->ml_size
);
1082 rv
= kphysm_add_memory_dynamic(base
, npgs
);
1084 err
= drmach_mem_add_span(
1085 mp
->sbm_cm
.sbdev_id
,
1090 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
1092 if (rv
!= KPHYSM_OK
) {
1093 cmn_err(CE_WARN
, "%s:"
1094 " unexpected kphysm_add_memory_dynamic"
1096 " basepfn=0x%lx, npages=%ld\n",
1105 dr_post_detach_mem_unit(dr_mem_unit_t
*s_mp
)
1107 uint64_t sz
= s_mp
->sbm_slice_size
;
1108 uint64_t sm
= sz
- 1;
1109 /* old and new below refer to PAs before and after copy-rename */
1110 uint64_t s_old_basepa
, s_new_basepa
;
1111 uint64_t t_old_basepa
, t_new_basepa
;
1112 uint64_t t_new_smallsize
= 0;
1113 dr_mem_unit_t
*t_mp
, *x_mp
;
1117 static fn_t f
= "dr_post_detach_mem_unit";
1119 PR_MEM("%s...\n", f
);
1121 /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1122 PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1123 f
, s_mp
->sbm_cm
.sbdev_path
);
1124 PR_MEMLIST_DUMP(s_mp
->sbm_del_mlist
);
1127 ASSERT(s_mp
->sbm_del_mlist
== NULL
||
1128 (s_mp
->sbm_flags
& DR_MFLAG_RELDONE
) != 0);
1130 if (s_mp
->sbm_flags
& DR_MFLAG_SOURCE
) {
1131 t_mp
= s_mp
->sbm_peer
;
1132 ASSERT(t_mp
!= NULL
);
1133 ASSERT(t_mp
->sbm_flags
& DR_MFLAG_TARGET
);
1134 ASSERT(t_mp
->sbm_peer
== s_mp
);
1136 ASSERT(t_mp
->sbm_flags
& DR_MFLAG_RELDONE
);
1137 ASSERT(t_mp
->sbm_del_mlist
);
1139 PR_MEM("%s: target %s: deleted memlist:\n",
1140 f
, t_mp
->sbm_cm
.sbdev_path
);
1141 PR_MEMLIST_DUMP(t_mp
->sbm_del_mlist
);
1143 /* this is no target unit */
1148 * Verify the memory really did successfully detach
1149 * by checking for its non-existence in phys_install.
1152 memlist_read_lock();
1153 if (s_mp
->sbm_flags
& DR_MFLAG_RELDONE
) {
1155 rv
= memlist_intersect(phys_install
, x_mp
->sbm_del_mlist
);
1157 if (rv
== 0 && t_mp
&& (t_mp
->sbm_flags
& DR_MFLAG_RELDONE
)) {
1159 rv
= memlist_intersect(phys_install
, x_mp
->sbm_del_mlist
);
1161 memlist_read_unlock();
1164 /* error: memlist still in phys_install */
1165 DR_DEV_INTERNAL_ERROR(&x_mp
->sbm_cm
);
1169 * clean mem unit state and bail out if an error has been recorded.
1172 if (s_mp
->sbm_cm
.sbdev_error
) {
1173 PR_MEM("%s: %s flags=%x", f
,
1174 s_mp
->sbm_cm
.sbdev_path
, s_mp
->sbm_flags
);
1175 DR_DEV_CLR_UNREFERENCED(&s_mp
->sbm_cm
);
1176 DR_DEV_CLR_RELEASED(&s_mp
->sbm_cm
);
1177 dr_device_transition(&s_mp
->sbm_cm
, DR_STATE_CONFIGURED
);
1180 if (t_mp
!= NULL
&& t_mp
->sbm_cm
.sbdev_error
!= NULL
) {
1181 PR_MEM("%s: %s flags=%x", f
,
1182 s_mp
->sbm_cm
.sbdev_path
, s_mp
->sbm_flags
);
1183 DR_DEV_CLR_UNREFERENCED(&t_mp
->sbm_cm
);
1184 DR_DEV_CLR_RELEASED(&t_mp
->sbm_cm
);
1185 dr_device_transition(&t_mp
->sbm_cm
, DR_STATE_CONFIGURED
);
1191 s_old_basepa
= _ptob64(s_mp
->sbm_basepfn
);
1192 err
= drmach_mem_get_base_physaddr(s_mp
->sbm_cm
.sbdev_id
,
1194 ASSERT(err
== NULL
);
1196 PR_MEM("%s:s_old_basepa: 0x%lx\n", f
, s_old_basepa
);
1197 PR_MEM("%s:s_new_basepa: 0x%lx\n", f
, s_new_basepa
);
1200 struct memlist
*s_copy_mlist
;
1202 t_old_basepa
= _ptob64(t_mp
->sbm_basepfn
);
1203 err
= drmach_mem_get_base_physaddr(t_mp
->sbm_cm
.sbdev_id
,
1205 ASSERT(err
== NULL
);
1207 PR_MEM("%s:t_old_basepa: 0x%lx\n", f
, t_old_basepa
);
1208 PR_MEM("%s:t_new_basepa: 0x%lx\n", f
, t_new_basepa
);
1211 * Construct copy list with original source addresses.
1212 * Used to add back excess target mem.
1214 s_copy_mlist
= memlist_dup(s_mp
->sbm_mlist
);
1215 for (ml
= s_mp
->sbm_del_mlist
; ml
; ml
= ml
->ml_next
) {
1216 s_copy_mlist
= memlist_del_span(s_copy_mlist
,
1217 ml
->ml_address
, ml
->ml_size
);
1220 PR_MEM("%s: source copy list:\n:", f
);
1221 PR_MEMLIST_DUMP(s_copy_mlist
);
1224 * We had to swap mem-units, so update
1225 * memlists accordingly with new base
1228 for (ml
= t_mp
->sbm_mlist
; ml
; ml
= ml
->ml_next
) {
1229 ml
->ml_address
-= t_old_basepa
;
1230 ml
->ml_address
+= t_new_basepa
;
1234 * There is no need to explicitly rename the target delete
1235 * memlist, because sbm_del_mlist and sbm_mlist always
1236 * point to the same memlist for a copy/rename operation.
1238 ASSERT(t_mp
->sbm_del_mlist
== t_mp
->sbm_mlist
);
1240 PR_MEM("%s: renamed target memlist and delete memlist:\n", f
);
1241 PR_MEMLIST_DUMP(t_mp
->sbm_mlist
);
1243 for (ml
= s_mp
->sbm_mlist
; ml
; ml
= ml
->ml_next
) {
1244 ml
->ml_address
-= s_old_basepa
;
1245 ml
->ml_address
+= s_new_basepa
;
1248 PR_MEM("%s: renamed source memlist:\n", f
);
1249 PR_MEMLIST_DUMP(s_mp
->sbm_mlist
);
1252 * Keep track of dynamically added segments
1253 * since they cannot be split if we need to delete
1254 * excess source memory later for this board.
1256 if (t_mp
->sbm_dyn_segs
)
1257 memlist_delete(t_mp
->sbm_dyn_segs
);
1258 t_mp
->sbm_dyn_segs
= s_mp
->sbm_dyn_segs
;
1259 s_mp
->sbm_dyn_segs
= NULL
;
1262 * If the target memory range with the new target base PA
1263 * extends beyond the usable slice, prevent any "target excess"
1264 * from being added back after this copy/rename and
1265 * calculate the new smaller size of the target board
1266 * to be set as part of target cleanup. The base + npages
1267 * must only include the range of memory up to the end of
1268 * this slice. This will only be used after a category 4
1269 * large-to-small target type copy/rename - see comments
1270 * in dr_select_mem_target.
1272 if (((t_new_basepa
& sm
) + _ptob64(t_mp
->sbm_npages
)) > sz
) {
1273 t_new_smallsize
= sz
- (t_new_basepa
& sm
);
1276 if (s_mp
->sbm_flags
& DR_MFLAG_MEMRESIZE
&&
1277 t_new_smallsize
== 0) {
1278 struct memlist
*t_excess_mlist
;
1281 * Add back excess target memory.
1282 * Subtract out the portion of the target memory
1283 * node that was taken over by the source memory
1286 t_excess_mlist
= memlist_dup(t_mp
->sbm_mlist
);
1287 for (ml
= s_copy_mlist
; ml
; ml
= ml
->ml_next
) {
1289 memlist_del_span(t_excess_mlist
,
1290 ml
->ml_address
, ml
->ml_size
);
1294 * Update dynamically added segs
1296 for (ml
= s_mp
->sbm_del_mlist
; ml
; ml
= ml
->ml_next
) {
1297 t_mp
->sbm_dyn_segs
=
1298 memlist_del_span(t_mp
->sbm_dyn_segs
,
1299 ml
->ml_address
, ml
->ml_size
);
1301 for (ml
= t_excess_mlist
; ml
; ml
= ml
->ml_next
) {
1302 t_mp
->sbm_dyn_segs
=
1303 memlist_cat_span(t_mp
->sbm_dyn_segs
,
1304 ml
->ml_address
, ml
->ml_size
);
1306 PR_MEM("%s: %s: updated dynamic seg list:\n",
1307 f
, t_mp
->sbm_cm
.sbdev_path
);
1308 PR_MEMLIST_DUMP(t_mp
->sbm_dyn_segs
);
1310 PR_MEM("%s: adding back remaining portion"
1311 " of %s, memlist:\n",
1312 f
, t_mp
->sbm_cm
.sbdev_path
);
1313 PR_MEMLIST_DUMP(t_excess_mlist
);
1315 dr_add_memory_spans(s_mp
, t_excess_mlist
);
1316 memlist_delete(t_excess_mlist
);
1318 memlist_delete(s_copy_mlist
);
1322 * Renaming s_mp->sbm_del_mlist is not necessary. This
1323 * list is not used beyond this point, and in fact, is
1324 * disposed of at the end of this function.
1326 for (ml
= s_mp
->sbm_del_mlist
; ml
; ml
= ml
->ml_next
) {
1327 ml
->ml_address
-= s_old_basepa
;
1328 ml
->ml_address
+= s_new_basepa
;
1331 PR_MEM("%s: renamed source delete memlist", f
);
1332 PR_MEMLIST_DUMP(s_mp
->sbm_del_mlist
);
1338 /* delete target's entire address space */
1339 err
= drmach_mem_del_span(t_mp
->sbm_cm
.sbdev_id
,
1340 t_old_basepa
& ~ sm
, sz
);
1342 DRERR_SET_C(&t_mp
->sbm_cm
.sbdev_error
, &err
);
1343 ASSERT(err
== NULL
);
1346 * After the copy/rename, the original address space
1347 * for the source board (which is now located on the
1348 * target board) may now have some excess to be deleted.
1349 * The amount is calculated by masking the slice
1350 * info and keeping the slice offset from t_new_basepa.
1352 err
= drmach_mem_del_span(s_mp
->sbm_cm
.sbdev_id
,
1353 s_old_basepa
& ~ sm
, t_new_basepa
& sm
);
1355 DRERR_SET_C(&s_mp
->sbm_cm
.sbdev_error
, &err
);
1356 ASSERT(err
== NULL
);
1359 /* delete board's entire address space */
1360 err
= drmach_mem_del_span(s_mp
->sbm_cm
.sbdev_id
,
1361 s_old_basepa
& ~ sm
, sz
);
1363 DRERR_SET_C(&s_mp
->sbm_cm
.sbdev_error
, &err
);
1364 ASSERT(err
== NULL
);
1368 /* clean up target mem unit */
1370 memlist_delete(t_mp
->sbm_del_mlist
);
1371 /* no need to delete sbm_mlist, it shares sbm_del_mlist */
1373 t_mp
->sbm_del_mlist
= NULL
;
1374 t_mp
->sbm_mlist
= NULL
;
1375 t_mp
->sbm_peer
= NULL
;
1376 t_mp
->sbm_flags
= 0;
1377 t_mp
->sbm_cm
.sbdev_busy
= 0;
1378 dr_init_mem_unit_data(t_mp
);
1380 /* reduce target size if new PAs go past end of usable slice */
1381 if (t_new_smallsize
> 0) {
1382 t_mp
->sbm_npages
= _b64top(t_new_smallsize
);
1383 PR_MEM("%s: target new size 0x%lx bytes\n",
1384 f
, t_new_smallsize
);
1387 if (t_mp
!= NULL
&& t_mp
->sbm_cm
.sbdev_error
== NULL
) {
1389 * now that copy/rename has completed, undo this
1390 * work that was done in dr_release_mem_done.
1392 DR_DEV_CLR_UNREFERENCED(&t_mp
->sbm_cm
);
1393 DR_DEV_CLR_RELEASED(&t_mp
->sbm_cm
);
1394 dr_device_transition(&t_mp
->sbm_cm
, DR_STATE_CONFIGURED
);
1398 * clean up (source) board's mem unit structure.
1399 * NOTE: sbm_mlist is retained if no error has been record (in other
1400 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1401 * referred to elsewhere as the cached memlist. The cached memlist
1402 * is used to re-attach (configure back in) this memunit from the
1403 * unconfigured state. The memlist is retained because it may
1404 * represent bad pages that were detected while the memory was
1405 * configured into the OS. The OS deletes bad pages from phys_install.
1406 * Those deletes, if any, will be represented in the cached mlist.
1408 if (s_mp
->sbm_del_mlist
&& s_mp
->sbm_del_mlist
!= s_mp
->sbm_mlist
)
1409 memlist_delete(s_mp
->sbm_del_mlist
);
1411 if (s_mp
->sbm_cm
.sbdev_error
&& s_mp
->sbm_mlist
) {
1412 memlist_delete(s_mp
->sbm_mlist
);
1413 s_mp
->sbm_mlist
= NULL
;
1416 if (s_mp
->sbm_dyn_segs
!= NULL
&& s_mp
->sbm_cm
.sbdev_error
== 0) {
1417 memlist_delete(s_mp
->sbm_dyn_segs
);
1418 s_mp
->sbm_dyn_segs
= NULL
;
1421 s_mp
->sbm_del_mlist
= NULL
;
1422 s_mp
->sbm_peer
= NULL
;
1423 s_mp
->sbm_flags
= 0;
1424 s_mp
->sbm_cm
.sbdev_busy
= 0;
1425 dr_init_mem_unit_data(s_mp
);
1427 PR_MEM("%s: cached memlist for %s:", f
, s_mp
->sbm_cm
.sbdev_path
);
1428 PR_MEMLIST_DUMP(s_mp
->sbm_mlist
);
1434 * Successful return from this function will have the memory
1435 * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1436 * and waiting. This routine's job is to select the memory that
1437 * actually has to be released (detached) which may not necessarily
1438 * be the same memory node that came in in devlist[],
1439 * i.e. a copy-rename is needed.
1442 dr_pre_release_mem(dr_handle_t
*hp
, dr_common_unit_t
**devlist
, int devnum
)
1446 static fn_t f
= "dr_pre_release_mem";
1448 PR_MEM("%s...\n", f
);
1450 for (d
= 0; d
< devnum
; d
++) {
1451 dr_mem_unit_t
*mp
= (dr_mem_unit_t
*)devlist
[d
];
1456 if (mp
->sbm_cm
.sbdev_error
) {
1459 } else if (!kcage_on
) {
1460 dr_dev_err(CE_WARN
, &mp
->sbm_cm
, ESBD_KCAGE_OFF
);
1465 if (mp
->sbm_flags
& DR_MFLAG_RESERVED
) {
1467 * Board is currently involved in a delete
1468 * memory operation. Can't detach this guy until
1469 * that operation completes.
1471 dr_dev_err(CE_WARN
, &mp
->sbm_cm
, ESBD_INVAL
);
1477 * Check whether the detaching memory requires a
1480 ASSERT(mp
->sbm_npages
!= 0);
1481 rv
= kphysm_del_span_query(mp
->sbm_basepfn
, mp
->sbm_npages
,
1483 if (rv
!= KPHYSM_OK
) {
1484 DR_DEV_INTERNAL_ERROR(&mp
->sbm_cm
);
1489 if (mq
.nonrelocatable
!= 0) {
1490 if (!(dr_cmd_flags(hp
) &
1491 (SBD_FLAG_FORCE
| SBD_FLAG_QUIESCE_OKAY
))) {
1492 /* caller wasn't prompted for a suspend */
1493 dr_dev_err(CE_WARN
, &mp
->sbm_cm
,
1500 /* flags should be clean at this time */
1501 ASSERT(mp
->sbm_flags
== 0);
1503 ASSERT(mp
->sbm_mlist
== NULL
); /* should be null */
1504 ASSERT(mp
->sbm_del_mlist
== NULL
); /* should be null */
1505 if (mp
->sbm_mlist
!= NULL
) {
1506 memlist_delete(mp
->sbm_mlist
);
1507 mp
->sbm_mlist
= NULL
;
1510 ml
= dr_get_memlist(mp
);
1513 PR_MEM("%s: no memlist found for %s\n",
1514 f
, mp
->sbm_cm
.sbdev_path
);
1518 /* allocate a kphysm handle */
1519 rv
= kphysm_del_gethandle(&mp
->sbm_memhandle
);
1520 if (rv
!= KPHYSM_OK
) {
1523 DR_DEV_INTERNAL_ERROR(&mp
->sbm_cm
);
1527 mp
->sbm_flags
|= DR_MFLAG_RELOWNER
;
1529 if ((mq
.nonrelocatable
!= 0) ||
1530 dr_reserve_mem_spans(&mp
->sbm_memhandle
, ml
)) {
1532 * Either the detaching memory node contains
1533 * non-reloc memory or we failed to reserve the
1534 * detaching memory node (which did _not_ have
1535 * any non-reloc memory, i.e. some non-reloc mem
1539 if (dr_select_mem_target(hp
, mp
, ml
)) {
1543 * We had no luck locating a target
1544 * memory node to be the recipient of
1545 * the non-reloc memory on the node
1546 * we're trying to detach.
1547 * Clean up be disposing the mem handle
1550 rv
= kphysm_del_release(mp
->sbm_memhandle
);
1551 if (rv
!= KPHYSM_OK
) {
1553 * can do nothing but complain
1554 * and hope helpful for debug
1556 cmn_err(CE_WARN
, "%s: unexpected"
1557 " kphysm_del_release return"
1561 mp
->sbm_flags
&= ~DR_MFLAG_RELOWNER
;
1565 /* make sure sbm_flags is clean */
1566 ASSERT(mp
->sbm_flags
== 0);
1568 dr_dev_err(CE_WARN
, &mp
->sbm_cm
,
1576 * ml is not memlist_delete'd here because
1577 * it has been assigned to mp->sbm_mlist
1578 * by dr_select_mem_target.
1581 /* no target needed to detach this board */
1582 mp
->sbm_flags
|= DR_MFLAG_RESERVED
;
1583 mp
->sbm_peer
= NULL
;
1584 mp
->sbm_del_mlist
= ml
;
1586 mp
->sbm_cm
.sbdev_busy
= 1;
1589 ASSERT(mp
->sbm_mlist
!= NULL
);
1591 if (mp
->sbm_flags
& DR_MFLAG_SOURCE
) {
1592 PR_MEM("%s: release of %s requires copy/rename;"
1593 " selected target board %s\n",
1595 mp
->sbm_cm
.sbdev_path
,
1596 mp
->sbm_peer
->sbm_cm
.sbdev_path
);
1598 PR_MEM("%s: copy/rename not required to release %s\n",
1599 f
, mp
->sbm_cm
.sbdev_path
);
1602 ASSERT(mp
->sbm_flags
& DR_MFLAG_RELOWNER
);
1603 ASSERT(mp
->sbm_flags
& DR_MFLAG_RESERVED
);
1607 return (err_flag
? -1 : 0);
1611 dr_release_mem_done(dr_common_unit_t
*cp
)
1613 dr_mem_unit_t
*s_mp
= (dr_mem_unit_t
*)cp
;
1614 dr_mem_unit_t
*t_mp
, *mp
;
1616 static fn_t f
= "dr_release_mem_done";
1619 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1620 * has a target unit.
1622 if (s_mp
->sbm_flags
& DR_MFLAG_SOURCE
) {
1623 t_mp
= s_mp
->sbm_peer
;
1624 ASSERT(t_mp
!= NULL
);
1625 ASSERT(t_mp
->sbm_peer
== s_mp
);
1626 ASSERT(t_mp
->sbm_flags
& DR_MFLAG_TARGET
);
1627 ASSERT(t_mp
->sbm_flags
& DR_MFLAG_RESERVED
);
1629 /* this is no target unit */
1633 /* free delete handle */
1634 ASSERT(s_mp
->sbm_flags
& DR_MFLAG_RELOWNER
);
1635 ASSERT(s_mp
->sbm_flags
& DR_MFLAG_RESERVED
);
1636 rv
= kphysm_del_release(s_mp
->sbm_memhandle
);
1637 if (rv
!= KPHYSM_OK
) {
1639 * can do nothing but complain
1640 * and hope helpful for debug
1642 cmn_err(CE_WARN
, "%s: unexpected kphysm_del_release"
1643 " return value %d", f
, rv
);
1645 s_mp
->sbm_flags
&= ~DR_MFLAG_RELOWNER
;
1648 * If an error was encountered during release, clean up
1649 * the source (and target, if present) unit data.
1651 /* XXX Can we know that sbdev_error was encountered during release? */
1652 if (s_mp
->sbm_cm
.sbdev_error
!= NULL
) {
1653 PR_MEM("%s: %s: error %d noted\n",
1655 s_mp
->sbm_cm
.sbdev_path
,
1656 s_mp
->sbm_cm
.sbdev_error
->e_code
);
1659 ASSERT(t_mp
->sbm_del_mlist
== t_mp
->sbm_mlist
);
1660 t_mp
->sbm_del_mlist
= NULL
;
1662 if (t_mp
->sbm_mlist
!= NULL
) {
1663 memlist_delete(t_mp
->sbm_mlist
);
1664 t_mp
->sbm_mlist
= NULL
;
1667 t_mp
->sbm_peer
= NULL
;
1668 t_mp
->sbm_flags
= 0;
1669 t_mp
->sbm_cm
.sbdev_busy
= 0;
1672 if (s_mp
->sbm_del_mlist
!= s_mp
->sbm_mlist
)
1673 memlist_delete(s_mp
->sbm_del_mlist
);
1674 s_mp
->sbm_del_mlist
= NULL
;
1676 if (s_mp
->sbm_mlist
!= NULL
) {
1677 memlist_delete(s_mp
->sbm_mlist
);
1678 s_mp
->sbm_mlist
= NULL
;
1681 s_mp
->sbm_peer
= NULL
;
1682 s_mp
->sbm_flags
= 0;
1683 s_mp
->sbm_cm
.sbdev_busy
= 0;
1689 DR_DEV_SET_RELEASED(&s_mp
->sbm_cm
);
1690 dr_device_transition(&s_mp
->sbm_cm
, DR_STATE_RELEASE
);
1694 * the kphysm delete operation that drained the source
1695 * board also drained this target board. Since the source
1696 * board drain is now known to have succeeded, we know this
1697 * target board is drained too.
1699 * because DR_DEV_SET_RELEASED and dr_device_transition
1700 * is done here, the dr_release_dev_done should not
1703 DR_DEV_SET_RELEASED(&t_mp
->sbm_cm
);
1704 dr_device_transition(&t_mp
->sbm_cm
, DR_STATE_RELEASE
);
1707 * NOTE: do not transition target's board state,
1708 * even if the mem-unit was the last configure
1709 * unit of the board. When copy/rename completes
1710 * this mem-unit will transitioned back to
1711 * the configured state. In the meantime, the
1712 * board's must remain as is.
1716 /* if board(s) had deleted memory, verify it is gone */
1718 memlist_read_lock();
1719 if (s_mp
->sbm_del_mlist
!= NULL
) {
1721 rv
= memlist_intersect(phys_install
, mp
->sbm_del_mlist
);
1723 if (rv
== 0 && t_mp
&& t_mp
->sbm_del_mlist
!= NULL
) {
1725 rv
= memlist_intersect(phys_install
, mp
->sbm_del_mlist
);
1727 memlist_read_unlock();
1729 cmn_err(CE_WARN
, "%s: %smem-unit (%d.%d): "
1730 "deleted memory still found in phys_install",
1732 (mp
== t_mp
? "target " : ""),
1733 mp
->sbm_cm
.sbdev_bp
->b_num
,
1734 mp
->sbm_cm
.sbdev_unum
);
1736 DR_DEV_INTERNAL_ERROR(&s_mp
->sbm_cm
);
1740 s_mp
->sbm_flags
|= DR_MFLAG_RELDONE
;
1742 t_mp
->sbm_flags
|= DR_MFLAG_RELDONE
;
1744 /* this should not fail */
1745 if (dr_release_dev_done(&s_mp
->sbm_cm
) != 0) {
1746 /* catch this in debug kernels */
1751 PR_MEM("%s: marking %s release DONE\n",
1752 f
, s_mp
->sbm_cm
.sbdev_path
);
1754 s_mp
->sbm_cm
.sbdev_ostate
= SBD_STAT_UNCONFIGURED
;
1757 /* should not fail */
1758 rv
= dr_release_dev_done(&t_mp
->sbm_cm
);
1760 /* catch this in debug kernels */
1765 PR_MEM("%s: marking %s release DONE\n",
1766 f
, t_mp
->sbm_cm
.sbdev_path
);
1768 t_mp
->sbm_cm
.sbdev_ostate
= SBD_STAT_UNCONFIGURED
;
1774 dr_disconnect_mem(dr_mem_unit_t
*mp
)
1776 static fn_t f
= "dr_disconnect_mem";
1777 update_membounds_t umb
;
1780 int state
= mp
->sbm_cm
.sbdev_state
;
1781 ASSERT(state
== DR_STATE_CONNECTED
|| state
== DR_STATE_UNCONFIGURED
);
1784 PR_MEM("%s...\n", f
);
1786 if (mp
->sbm_del_mlist
&& mp
->sbm_del_mlist
!= mp
->sbm_mlist
)
1787 memlist_delete(mp
->sbm_del_mlist
);
1788 mp
->sbm_del_mlist
= NULL
;
1790 if (mp
->sbm_mlist
) {
1791 memlist_delete(mp
->sbm_mlist
);
1792 mp
->sbm_mlist
= NULL
;
1796 * Remove memory from lgroup
1797 * For now, only board info is required.
1799 umb
.u_board
= mp
->sbm_cm
.sbdev_bp
->b_num
;
1800 umb
.u_base
= (uint64_t)-1;
1801 umb
.u_len
= (uint64_t)-1;
1803 lgrp_plat_config(LGRP_CONFIG_MEM_DEL
, (uintptr_t)&umb
);
1809 dr_cancel_mem(dr_mem_unit_t
*s_mp
)
1811 dr_mem_unit_t
*t_mp
;
1813 static fn_t f
= "dr_cancel_mem";
1815 state
= s_mp
->sbm_cm
.sbdev_state
;
1817 if (s_mp
->sbm_flags
& DR_MFLAG_TARGET
) {
1818 /* must cancel source board, not target board */
1819 /* TODO: set error */
1821 } else if (s_mp
->sbm_flags
& DR_MFLAG_SOURCE
) {
1822 t_mp
= s_mp
->sbm_peer
;
1823 ASSERT(t_mp
!= NULL
);
1824 ASSERT(t_mp
->sbm_peer
== s_mp
);
1826 /* must always match the source board's state */
1827 /* TODO: is this assertion correct? */
1828 ASSERT(t_mp
->sbm_cm
.sbdev_state
== state
);
1830 /* this is no target unit */
1835 case DR_STATE_UNREFERENCED
: /* state set by dr_release_dev_done */
1836 ASSERT((s_mp
->sbm_flags
& DR_MFLAG_RELOWNER
) == 0);
1838 if (t_mp
!= NULL
&& t_mp
->sbm_del_mlist
!= NULL
) {
1839 PR_MEM("%s: undoing target %s memory delete\n",
1840 f
, t_mp
->sbm_cm
.sbdev_path
);
1841 dr_add_memory_spans(t_mp
, t_mp
->sbm_del_mlist
);
1843 DR_DEV_CLR_UNREFERENCED(&t_mp
->sbm_cm
);
1846 if (s_mp
->sbm_del_mlist
!= NULL
) {
1847 PR_MEM("%s: undoing %s memory delete\n",
1848 f
, s_mp
->sbm_cm
.sbdev_path
);
1850 dr_add_memory_spans(s_mp
, s_mp
->sbm_del_mlist
);
1855 /* TODO: should no longer be possible to see the release state here */
1856 case DR_STATE_RELEASE
: /* state set by dr_release_mem_done */
1858 ASSERT((s_mp
->sbm_flags
& DR_MFLAG_RELOWNER
) == 0);
1861 ASSERT(t_mp
->sbm_del_mlist
== t_mp
->sbm_mlist
);
1862 t_mp
->sbm_del_mlist
= NULL
;
1864 if (t_mp
->sbm_mlist
!= NULL
) {
1865 memlist_delete(t_mp
->sbm_mlist
);
1866 t_mp
->sbm_mlist
= NULL
;
1869 t_mp
->sbm_peer
= NULL
;
1870 t_mp
->sbm_flags
= 0;
1871 t_mp
->sbm_cm
.sbdev_busy
= 0;
1872 dr_init_mem_unit_data(t_mp
);
1874 DR_DEV_CLR_RELEASED(&t_mp
->sbm_cm
);
1876 dr_device_transition(&t_mp
->sbm_cm
,
1877 DR_STATE_CONFIGURED
);
1880 if (s_mp
->sbm_del_mlist
!= s_mp
->sbm_mlist
)
1881 memlist_delete(s_mp
->sbm_del_mlist
);
1882 s_mp
->sbm_del_mlist
= NULL
;
1884 if (s_mp
->sbm_mlist
!= NULL
) {
1885 memlist_delete(s_mp
->sbm_mlist
);
1886 s_mp
->sbm_mlist
= NULL
;
1889 s_mp
->sbm_peer
= NULL
;
1890 s_mp
->sbm_flags
= 0;
1891 s_mp
->sbm_cm
.sbdev_busy
= 0;
1892 dr_init_mem_unit_data(s_mp
);
1897 PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
1898 f
, (int)state
, s_mp
->sbm_cm
.sbdev_path
);
1906 dr_init_mem_unit(dr_mem_unit_t
*mp
)
1908 dr_state_t new_state
;
1911 if (DR_DEV_IS_ATTACHED(&mp
->sbm_cm
)) {
1912 new_state
= DR_STATE_CONFIGURED
;
1913 mp
->sbm_cm
.sbdev_cond
= SBD_COND_OK
;
1914 } else if (DR_DEV_IS_PRESENT(&mp
->sbm_cm
)) {
1915 new_state
= DR_STATE_CONNECTED
;
1916 mp
->sbm_cm
.sbdev_cond
= SBD_COND_OK
;
1917 } else if (mp
->sbm_cm
.sbdev_id
!= (drmachid_t
)0) {
1918 new_state
= DR_STATE_OCCUPIED
;
1920 new_state
= DR_STATE_EMPTY
;
1923 if (DR_DEV_IS_PRESENT(&mp
->sbm_cm
))
1924 dr_init_mem_unit_data(mp
);
1926 /* delay transition until fully initialized */
1927 dr_device_transition(&mp
->sbm_cm
, new_state
);
1931 dr_init_mem_unit_data(dr_mem_unit_t
*mp
)
1933 drmachid_t id
= mp
->sbm_cm
.sbdev_id
;
1936 static fn_t f
= "dr_init_mem_unit_data";
1937 update_membounds_t umb
;
1939 PR_MEM("%s...\n", f
);
1941 /* a little sanity checking */
1942 ASSERT(mp
->sbm_peer
== NULL
);
1943 ASSERT(mp
->sbm_flags
== 0);
1945 /* get basepfn of mem unit */
1946 err
= drmach_mem_get_base_physaddr(id
, &bytes
);
1948 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
1949 mp
->sbm_basepfn
= (pfn_t
)-1;
1951 mp
->sbm_basepfn
= _b64top(bytes
);
1953 /* attempt to get number of pages from PDA */
1954 err
= drmach_mem_get_size(id
, &bytes
);
1956 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
1959 mp
->sbm_npages
= _b64top(bytes
);
1961 /* if didn't work, calculate using memlist */
1962 if (mp
->sbm_npages
== 0) {
1963 struct memlist
*ml
, *mlist
;
1965 * Either we couldn't open the PDA or our
1966 * PDA has garbage in it. We must have the
1967 * page count consistent and whatever the
1968 * OS states has precedence over the PDA
1969 * so let's check the kernel.
1971 /* TODO: curious comment. it suggests pda query should happen if this fails */
1972 PR_MEM("%s: PDA query failed for npages."
1973 " Checking memlist for %s\n",
1974 f
, mp
->sbm_cm
.sbdev_path
);
1976 mlist
= dr_get_memlist(mp
);
1977 for (ml
= mlist
; ml
; ml
= ml
->ml_next
)
1978 mp
->sbm_npages
+= btop(ml
->ml_size
);
1979 memlist_delete(mlist
);
1982 err
= drmach_mem_get_alignment(id
, &bytes
);
1984 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
1985 mp
->sbm_alignment_mask
= 0;
1987 mp
->sbm_alignment_mask
= _b64top(bytes
);
1989 err
= drmach_mem_get_slice_size(id
, &bytes
);
1991 DRERR_SET_C(&mp
->sbm_cm
.sbdev_error
, &err
);
1992 mp
->sbm_slice_size
= 0; /* paranoia */
1994 mp
->sbm_slice_size
= bytes
;
1997 * Add memory to lgroup
1999 umb
.u_board
= mp
->sbm_cm
.sbdev_bp
->b_num
;
2000 umb
.u_base
= (uint64_t)mp
->sbm_basepfn
<< MMU_PAGESHIFT
;
2001 umb
.u_len
= (uint64_t)mp
->sbm_npages
<< MMU_PAGESHIFT
;
2003 lgrp_plat_config(LGRP_CONFIG_MEM_ADD
, (uintptr_t)&umb
);
2005 PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2006 f
, mp
->sbm_cm
.sbdev_path
, mp
->sbm_basepfn
, mp
->sbm_npages
);
2010 dr_reserve_mem_spans(memhandle_t
*mhp
, struct memlist
*ml
)
2016 static fn_t f
= "dr_reserve_mem_spans";
2018 PR_MEM("%s...\n", f
);
2021 * Walk the supplied memlist scheduling each span for removal
2022 * with kphysm_del_span. It is possible that a span may intersect
2023 * an area occupied by the cage.
2025 for (mc
= ml
; mc
!= NULL
; mc
= mc
->ml_next
) {
2026 base
= _b64top(mc
->ml_address
);
2027 npgs
= _b64top(mc
->ml_size
);
2029 err
= kphysm_del_span(*mhp
, base
, npgs
);
2030 if (err
!= KPHYSM_OK
) {
2031 cmn_err(CE_WARN
, "%s memory reserve failed."
2032 " unexpected kphysm_del_span return value %d;"
2033 " basepfn=0x%lx npages=%ld",
2034 f
, err
, base
, npgs
);
2043 /* debug counters */
2044 int dr_smt_realigned
;
2045 int dr_smt_preference
[4];
2048 uint_t dr_ignore_board
; /* if bit[bnum-1] set, board won't be candidate */
2052 * Find and reserve a copy/rename target board suitable for the
2053 * given source board.
2054 * All boards in the system are examined and categorized in relation to
2055 * their memory size versus the source board's memory size. Order of
2057 * 1st: board has same memory size
2058 * 2nd: board has larger memory size
2059 * 3rd: board has smaller memory size
2060 * 4th: board has smaller memory size, available memory will be reduced.
2061 * Boards in category 3 and 4 will have their MC's reprogrammed to locate the
2062 * span to which the MC responds to address span that appropriately covers
2063 * the nonrelocatable span of the source board.
2066 dr_select_mem_target(dr_handle_t
*hp
,
2067 dr_mem_unit_t
*s_mp
, struct memlist
*s_ml
)
2069 pgcnt_t sz
= _b64top(s_mp
->sbm_slice_size
);
2070 pgcnt_t sm
= sz
- 1; /* mem_slice_mask */
2073 int n_sets
= 4; /* same, larger, smaller, clipped */
2074 int preference
; /* lower value is higher preference */
2075 int n_units_per_set
;
2077 dr_mem_unit_t
**sets
;
2082 int allow_src_memrange_modify
;
2083 int allow_targ_memrange_modify
;
2085 dr_board_t
*s_bp
, *t_bp
;
2086 dr_mem_unit_t
*t_mp
, *c_mp
;
2087 struct memlist
*d_ml
, *t_ml
, *x_ml
;
2088 memquery_t s_mq
= {0};
2089 static fn_t f
= "dr_select_mem_target";
2091 PR_MEM("%s...\n", f
);
2093 ASSERT(s_ml
!= NULL
);
2095 n_units_per_set
= MAX_BOARDS
* MAX_MEM_UNITS_PER_BOARD
;
2096 sets
= GETSTRUCT(dr_mem_unit_t
*, n_units_per_set
* n_sets
);
2099 /* calculate the offset into the slice of the last source board pfn */
2100 ASSERT(s_mp
->sbm_npages
!= 0);
2101 s_phi
= (s_mp
->sbm_basepfn
+ s_mp
->sbm_npages
- 1) & sm
;
2103 allow_src_memrange_modify
= drmach_allow_memrange_modify(s_bp
->b_id
);
2106 * Make one pass through all memory units on all boards
2107 * and categorize them with respect to the source board.
2109 for (t_bd
= 0; t_bd
< MAX_BOARDS
; t_bd
++) {
2111 * The board structs are a contiguous array
2112 * so we take advantage of that to find the
2113 * correct board struct pointer for a given
2116 t_bp
= dr_lookup_board(t_bd
);
2118 /* source board can not be its own target */
2119 if (s_bp
->b_num
== t_bp
->b_num
)
2122 for (t_unit
= 0; t_unit
< MAX_MEM_UNITS_PER_BOARD
; t_unit
++) {
2124 t_mp
= dr_get_mem_unit(t_bp
, t_unit
);
2126 /* this memory node must be attached */
2127 if (!DR_DEV_IS_ATTACHED(&t_mp
->sbm_cm
))
2130 /* source unit can not be its own target */
2132 /* catch this is debug kernels */
2138 * this memory node must not already be reserved
2139 * by some other memory delete operation.
2141 if (t_mp
->sbm_flags
& DR_MFLAG_RESERVED
)
2145 * categorize the memory node
2146 * If this is a smaller memory node, create a
2147 * temporary, edited copy of the source board's
2148 * memlist containing only the span of the non-
2149 * relocatable pages.
2151 t_phi
= (t_mp
->sbm_basepfn
+ t_mp
->sbm_npages
- 1) & sm
;
2152 t_id
= t_mp
->sbm_cm
.sbdev_bp
->b_id
;
2153 allow_targ_memrange_modify
=
2154 drmach_allow_memrange_modify(t_id
);
2155 if (t_mp
->sbm_npages
== s_mp
->sbm_npages
&&
2158 t_mp
->sbm_slice_offset
= 0;
2159 } else if (t_mp
->sbm_npages
> s_mp
->sbm_npages
&&
2162 * Selecting this target will require modifying
2163 * the source and/or target physical address
2164 * ranges. Skip if not supported by platform.
2166 if (!allow_src_memrange_modify
||
2167 !allow_targ_memrange_modify
) {
2168 PR_MEM("%s: skip target %s, memory "
2169 "range relocation not supported "
2171 t_mp
->sbm_cm
.sbdev_path
);
2175 t_mp
->sbm_slice_offset
= 0;
2180 * Selecting this target will require modifying
2181 * the source and/or target physical address
2182 * ranges. Skip if not supported by platform.
2184 if (!allow_src_memrange_modify
||
2185 !allow_targ_memrange_modify
) {
2186 PR_MEM("%s: skip target %s, memory "
2187 "range relocation not supported "
2189 t_mp
->sbm_cm
.sbdev_path
);
2194 * Check if its mc can be programmed to relocate
2195 * the active address range to match the
2196 * nonrelocatable span of the source board.
2200 if (s_mq
.phys_pages
== 0) {
2202 * find non-relocatable span on
2205 rv
= kphysm_del_span_query(
2207 s_mp
->sbm_npages
, &s_mq
);
2208 if (rv
!= KPHYSM_OK
) {
2209 PR_MEM("%s: %s: unexpected"
2210 " kphysm_del_span_query"
2215 s_mp
->sbm_cm
.sbdev_path
,
2221 s_mq
.phys_pages
= 0;
2227 ASSERT(s_mq
.phys_pages
!= 0);
2228 ASSERT(s_mq
.nonrelocatable
!= 0);
2231 * this should not happen
2232 * if it does, it simply means that
2233 * we can not proceed with qualifying
2234 * this target candidate.
2236 if (s_mq
.nonrelocatable
== 0)
2239 PR_MEM("%s: %s: nonrelocatable"
2240 " span (0x%lx..0x%lx)\n",
2242 s_mp
->sbm_cm
.sbdev_path
,
2243 s_mq
.first_nonrelocatable
,
2244 s_mq
.last_nonrelocatable
);
2248 * Round down the starting pfn of the
2249 * nonrelocatable span on the source board
2250 * to nearest programmable boundary possible
2251 * with this target candidate.
2253 pfn
= s_mq
.first_nonrelocatable
&
2254 ~t_mp
->sbm_alignment_mask
;
2256 /* skip candidate if memory is too small */
2257 if (pfn
+ t_mp
->sbm_npages
<
2258 s_mq
.last_nonrelocatable
)
2262 * reprogramming an mc to relocate its
2263 * active address range means the beginning
2264 * address to which the DIMMS respond will
2265 * be somewhere above the slice boundary
2266 * address. The larger the size of memory
2267 * on this unit, the more likely part of it
2268 * will exist beyond the end of the slice.
2269 * The portion of the memory that does is
2270 * unavailable to the system until the mc
2271 * reprogrammed to a more favorable base
2273 * An attempt is made to avoid the loss by
2274 * recalculating the mc base address relative
2275 * to the end of the slice. This may produce
2276 * a more favorable result. If not, we lower
2277 * the board's preference rating so that it
2278 * is one the last candidate boards to be
2281 if ((pfn
+ t_mp
->sbm_npages
) & ~sm
) {
2284 ASSERT(sz
>= t_mp
->sbm_npages
);
2287 * calculate an alternative starting
2288 * address relative to the end of the
2289 * slice's address space.
2292 p
= p
+ (sz
- t_mp
->sbm_npages
);
2293 p
= p
& ~t_mp
->sbm_alignment_mask
;
2295 if ((p
> s_mq
.first_nonrelocatable
) ||
2296 (p
+ t_mp
->sbm_npages
<
2297 s_mq
.last_nonrelocatable
)) {
2300 * alternative starting addr
2301 * won't work. Lower preference
2302 * rating of this board, since
2303 * some number of pages will
2304 * unavailable for use.
2314 * translate calculated pfn to an offset
2315 * relative to the slice boundary. If the
2316 * candidate board is selected, this offset
2317 * will be used to calculate the values
2318 * programmed into the mc.
2320 t_mp
->sbm_slice_offset
= pfn
& sm
;
2322 " proposed mc offset 0x%lx\n",
2324 t_mp
->sbm_cm
.sbdev_path
,
2325 t_mp
->sbm_slice_offset
);
2328 dr_smt_preference
[preference
]++;
2330 /* calculate index to start of preference set */
2331 idx
= n_units_per_set
* preference
;
2332 /* calculate offset to respective element */
2333 idx
+= t_bd
* MAX_MEM_UNITS_PER_BOARD
+ t_unit
;
2335 ASSERT(idx
< n_units_per_set
* n_sets
);
2341 * NOTE: this would be a good place to sort each candidate
2342 * set in to some desired order, e.g. memory size in ascending
2343 * order. Without an additional sorting step here, the order
2344 * within a set is ascending board number order.
2350 for (idx
= 0; idx
< n_units_per_set
* n_sets
; idx
++) {
2353 /* cleanup t_ml after previous pass */
2355 memlist_delete(t_ml
);
2359 /* get candidate target board mem unit */
2364 /* get target board memlist */
2365 t_ml
= dr_get_memlist(t_mp
);
2367 cmn_err(CE_WARN
, "%s: no memlist for"
2368 " mem-unit %d, board %d",
2370 t_mp
->sbm_cm
.sbdev_bp
->b_num
,
2371 t_mp
->sbm_cm
.sbdev_unum
);
2376 /* get appropriate source board memlist */
2377 t_phi
= (t_mp
->sbm_basepfn
+ t_mp
->sbm_npages
- 1) & sm
;
2378 if (t_mp
->sbm_npages
< s_mp
->sbm_npages
|| t_phi
< s_phi
) {
2382 * make a copy of the source board memlist
2383 * then edit it to remove the spans that
2384 * are outside the calculated span of
2385 * [pfn..s_mq.last_nonrelocatable].
2388 memlist_delete(x_ml
);
2390 x_ml
= memlist_dup(s_ml
);
2392 PR_MEM("%s: memlist_dup failed\n", f
);
2393 /* TODO: should abort */
2397 /* trim off lower portion */
2398 excess
= t_mp
->sbm_slice_offset
-
2399 (s_mp
->sbm_basepfn
& sm
);
2402 x_ml
= memlist_del_span(
2404 _ptob64(s_mp
->sbm_basepfn
),
2410 * Since this candidate target board is smaller
2411 * than the source board, s_mq must have been
2412 * initialized in previous loop while processing
2413 * this or some other candidate board.
2414 * FIXME: this is weak.
2416 ASSERT(s_mq
.phys_pages
!= 0);
2418 /* trim off upper portion */
2419 excess
= (s_mp
->sbm_basepfn
+ s_mp
->sbm_npages
)
2420 - (s_mq
.last_nonrelocatable
+ 1);
2424 p
= s_mq
.last_nonrelocatable
+ 1;
2425 x_ml
= memlist_del_span(
2431 PR_MEM("%s: %s: edited source memlist:\n",
2432 f
, s_mp
->sbm_cm
.sbdev_path
);
2433 PR_MEMLIST_DUMP(x_ml
);
2436 /* sanity check memlist */
2438 while (d_ml
->ml_next
!= NULL
)
2439 d_ml
= d_ml
->ml_next
;
2441 ASSERT(d_ml
->ml_address
+ d_ml
->ml_size
==
2442 _ptob64(s_mq
.last_nonrelocatable
+ 1));
2446 * x_ml now describes only the portion of the
2447 * source board that will be moved during the
2448 * copy/rename operation.
2452 /* use original memlist; all spans will be moved */
2456 /* verify target can support source memory spans. */
2457 if (memlist_canfit(d_ml
, t_ml
) == 0) {
2458 PR_MEM("%s: source memlist won't"
2459 " fit in target memlist\n", f
);
2460 PR_MEM("%s: source memlist:\n", f
);
2461 PR_MEMLIST_DUMP(d_ml
);
2462 PR_MEM("%s: target memlist:\n", f
);
2463 PR_MEMLIST_DUMP(t_ml
);
2468 /* NOTE: the value of d_ml is not used beyond this point */
2470 PR_MEM("%s: checking for no-reloc in %s, "
2471 " basepfn=0x%lx, npages=%ld\n",
2473 t_mp
->sbm_cm
.sbdev_path
,
2477 rv
= kphysm_del_span_query(
2478 t_mp
->sbm_basepfn
, t_mp
->sbm_npages
, &mq
);
2479 if (rv
!= KPHYSM_OK
) {
2480 PR_MEM("%s: kphysm_del_span_query:"
2481 " unexpected return value %d\n", f
, rv
);
2486 if (mq
.nonrelocatable
!= 0) {
2487 PR_MEM("%s: candidate %s has"
2488 " nonrelocatable span [0x%lx..0x%lx]\n",
2490 t_mp
->sbm_cm
.sbdev_path
,
2491 mq
.first_nonrelocatable
,
2492 mq
.last_nonrelocatable
);
2499 * This is a debug tool for excluding certain boards
2500 * from being selected as a target board candidate.
2501 * dr_ignore_board is only tested by this driver.
2502 * It must be set with adb, obp, /etc/system or your
2503 * favorite debugger.
2505 if (dr_ignore_board
&
2506 (1 << (t_mp
->sbm_cm
.sbdev_bp
->b_num
- 1))) {
2507 PR_MEM("%s: dr_ignore_board flag set,"
2508 " ignoring %s as candidate\n",
2509 f
, t_mp
->sbm_cm
.sbdev_path
);
2515 * Reserve excess source board memory, if any.
2517 * When the number of pages on the candidate target
2518 * board is less than the number of pages on the source,
2519 * then some spans (clearly) of the source board's address
2520 * space will not be covered by physical memory after the
2521 * copy/rename completes. The following code block
2522 * schedules those spans to be deleted.
2524 if (t_mp
->sbm_npages
< s_mp
->sbm_npages
|| t_phi
< s_phi
) {
2529 d_ml
= memlist_dup(s_ml
);
2531 PR_MEM("%s: cant dup src brd memlist\n", f
);
2532 /* TODO: should abort */
2536 /* calculate base pfn relative to target board */
2537 pfn
= s_mp
->sbm_basepfn
& ~sm
;
2538 pfn
+= t_mp
->sbm_slice_offset
;
2541 * cannot split dynamically added segment
2543 s_del_pa
= _ptob64(pfn
+ t_mp
->sbm_npages
);
2544 PR_MEM("%s: proposed src delete pa=0x%lx\n", f
,
2546 PR_MEM("%s: checking for split of dyn seg list:\n", f
);
2547 PR_MEMLIST_DUMP(s_mp
->sbm_dyn_segs
);
2548 for (ml
= s_mp
->sbm_dyn_segs
; ml
; ml
= ml
->ml_next
) {
2549 if (s_del_pa
> ml
->ml_address
&&
2550 s_del_pa
< ml
->ml_address
+ ml
->ml_size
) {
2551 s_del_pa
= ml
->ml_address
;
2556 /* remove span that will reside on candidate board */
2557 d_ml
= memlist_del_span(d_ml
, _ptob64(pfn
),
2558 s_del_pa
- _ptob64(pfn
));
2560 PR_MEM("%s: %s: reserving src brd memlist:\n",
2561 f
, s_mp
->sbm_cm
.sbdev_path
);
2562 PR_MEMLIST_DUMP(d_ml
);
2564 /* reserve excess spans */
2565 if (dr_reserve_mem_spans(&s_mp
->sbm_memhandle
, d_ml
)
2568 /* likely more non-reloc pages appeared */
2569 /* TODO: restart from top? */
2573 /* no excess source board memory */
2577 s_mp
->sbm_flags
|= DR_MFLAG_RESERVED
;
2580 * reserve all memory on target board.
2581 * NOTE: source board's memhandle is used.
2583 * If this succeeds (eq 0), then target selection is
2584 * complete and all unwanted memory spans, both source and
2585 * target, have been reserved. Loop is terminated.
2587 if (dr_reserve_mem_spans(&s_mp
->sbm_memhandle
, t_ml
) == 0) {
2588 PR_MEM("%s: %s: target board memory reserved\n",
2589 f
, t_mp
->sbm_cm
.sbdev_path
);
2591 /* a candidate target board is now reserved */
2592 t_mp
->sbm_flags
|= DR_MFLAG_RESERVED
;
2595 /* *** EXITING LOOP *** */
2599 /* did not successfully reserve the target board. */
2600 PR_MEM("%s: could not reserve target %s\n",
2601 f
, t_mp
->sbm_cm
.sbdev_path
);
2604 * NOTE: an undo of the dr_reserve_mem_span work
2605 * will happen automatically when the memhandle
2606 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2609 s_mp
->sbm_flags
&= ~DR_MFLAG_RESERVED
;
2612 /* clean up after memlist editing logic */
2614 memlist_delete(x_ml
);
2616 FREESTRUCT(sets
, dr_mem_unit_t
*, n_units_per_set
* n_sets
);
2619 * c_mp will be NULL when the entire sets[] array
2620 * has been searched without reserving a target board.
2623 PR_MEM("%s: %s: target selection failed.\n",
2624 f
, s_mp
->sbm_cm
.sbdev_path
);
2627 memlist_delete(t_ml
);
2632 PR_MEM("%s: found target %s for source %s\n",
2634 c_mp
->sbm_cm
.sbdev_path
,
2635 s_mp
->sbm_cm
.sbdev_path
);
2637 s_mp
->sbm_peer
= c_mp
;
2638 s_mp
->sbm_flags
|= DR_MFLAG_SOURCE
;
2639 s_mp
->sbm_del_mlist
= d_ml
; /* spans to be deleted, if any */
2640 s_mp
->sbm_mlist
= s_ml
;
2641 s_mp
->sbm_cm
.sbdev_busy
= 1;
2643 c_mp
->sbm_peer
= s_mp
;
2644 c_mp
->sbm_flags
|= DR_MFLAG_TARGET
;
2645 c_mp
->sbm_del_mlist
= t_ml
; /* spans to be deleted */
2646 c_mp
->sbm_mlist
= t_ml
;
2647 c_mp
->sbm_cm
.sbdev_busy
= 1;
2649 s_mp
->sbm_flags
&= ~DR_MFLAG_MEMRESIZE
;
2650 if (c_mp
->sbm_npages
> s_mp
->sbm_npages
) {
2651 s_mp
->sbm_flags
|= DR_MFLAG_MEMUPSIZE
;
2652 PR_MEM("%s: upsize detected (source=%ld < target=%ld)\n",
2653 f
, s_mp
->sbm_npages
, c_mp
->sbm_npages
);
2654 } else if (c_mp
->sbm_npages
< s_mp
->sbm_npages
) {
2655 s_mp
->sbm_flags
|= DR_MFLAG_MEMDOWNSIZE
;
2656 PR_MEM("%s: downsize detected (source=%ld > target=%ld)\n",
2657 f
, s_mp
->sbm_npages
, c_mp
->sbm_npages
);
2668 * Determine whether the source memlist (s_mlist) will
2669 * fit into the target memlist (t_mlist) in terms of
2670 * size and holes (i.e. based on same relative base address).
2673 memlist_canfit(struct memlist
*s_mlist
, struct memlist
*t_mlist
)
2676 uint64_t s_basepa
, t_basepa
;
2677 struct memlist
*s_ml
, *t_ml
;
2679 if ((s_mlist
== NULL
) || (t_mlist
== NULL
))
2683 * Base both memlists on common base address (0).
2685 s_basepa
= s_mlist
->ml_address
;
2686 t_basepa
= t_mlist
->ml_address
;
2688 for (s_ml
= s_mlist
; s_ml
; s_ml
= s_ml
->ml_next
)
2689 s_ml
->ml_address
-= s_basepa
;
2691 for (t_ml
= t_mlist
; t_ml
; t_ml
= t_ml
->ml_next
)
2692 t_ml
->ml_address
-= t_basepa
;
2695 for (t_ml
= t_mlist
; t_ml
&& s_ml
; t_ml
= t_ml
->ml_next
) {
2696 uint64_t s_start
, s_end
;
2697 uint64_t t_start
, t_end
;
2699 t_start
= t_ml
->ml_address
;
2700 t_end
= t_start
+ t_ml
->ml_size
;
2702 for (; s_ml
; s_ml
= s_ml
->ml_next
) {
2703 s_start
= s_ml
->ml_address
;
2704 s_end
= s_start
+ s_ml
->ml_size
;
2706 if ((s_start
< t_start
) || (s_end
> t_end
))
2711 * If we ran out of source memlist chunks that mean
2712 * we found a home for all of them.
2718 * Need to add base addresses back since memlists
2719 * are probably in use by caller.
2721 for (s_ml
= s_mlist
; s_ml
; s_ml
= s_ml
->ml_next
)
2722 s_ml
->ml_address
+= s_basepa
;
2724 for (t_ml
= t_mlist
; t_ml
; t_ml
= t_ml
->ml_next
)
2725 t_ml
->ml_address
+= t_basepa
;