1 /* $NetBSD: rf_reconmap.c,v 1.30 2007/03/12 18:18:31 ad Exp $ */
3 * Copyright (c) 1995 Carnegie-Mellon University.
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
29 /*************************************************************************
32 * code to maintain a map of what sectors have/have not been reconstructed
34 *************************************************************************/
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.30 2007/03/12 18:18:31 ad Exp $");
41 #include "rf_general.h"
44 /* special pointer values indicating that a reconstruction unit
45 * has been either totally reconstructed or not at all. Both
46 * are illegal pointer values, so you have to be careful not to
47 * dereference through them. RU_NOTHING must be zero, since
48 * MakeReconMap uses memset to initialize the structure. These are used
49 * only at the head of the list.
51 #define RU_ALL ((RF_ReconMapListElem_t *) -1)
52 #define RU_NOTHING ((RF_ReconMapListElem_t *) 0)
54 /* For most reconstructs we need at most 3 RF_ReconMapListElem_t's.
55 * Bounding the number we need is quite difficult, as it depends on how
56 * badly the sectors to be reconstructed get divided up. In the current
57 * code, the reconstructed sectors appeared aligned on stripe boundaries,
58 * and are always presented in stripe width units, so we're probably
59 * allocating quite a bit more than we'll ever need.
61 #define RF_NUM_RECON_POOL_ELEM 100
64 compact_stat_entry(RF_Raid_t
*, RF_ReconMap_t
*, int, int);
65 static void crunch_list(RF_ReconMap_t
*, RF_ReconMapListElem_t
*);
66 static RF_ReconMapListElem_t
*
67 MakeReconMapListElem(RF_ReconMap_t
*, RF_SectorNum_t
, RF_SectorNum_t
,
68 RF_ReconMapListElem_t
*);
70 FreeReconMapListElem(RF_ReconMap_t
*mapPtr
, RF_ReconMapListElem_t
* p
);
72 /*---------------------------------------------------------------------------
74 * Creates and initializes new Reconstruction map
76 * ru_sectors - size of reconstruction unit in sectors
77 * disk_sectors - size of disk in sectors
78 * spareUnitsPerDisk - zero unless distributed sparing
79 *-------------------------------------------------------------------------*/
82 rf_MakeReconMap(RF_Raid_t
*raidPtr
, RF_SectorCount_t ru_sectors
,
83 RF_SectorCount_t disk_sectors
,
84 RF_ReconUnitCount_t spareUnitsPerDisk
)
86 RF_RaidLayout_t
*layoutPtr
= &raidPtr
->Layout
;
87 RF_ReconUnitCount_t num_rus
= layoutPtr
->stripeUnitsPerDisk
/ layoutPtr
->SUsPerRU
;
90 RF_Malloc(p
, sizeof(RF_ReconMap_t
), (RF_ReconMap_t
*));
91 p
->sectorsPerReconUnit
= ru_sectors
;
92 p
->sectorsInDisk
= disk_sectors
;
94 p
->totalRUs
= num_rus
;
95 p
->spareRUs
= spareUnitsPerDisk
;
96 p
->unitsLeft
= num_rus
- spareUnitsPerDisk
;
98 p
->status_size
= RF_RECONMAP_SIZE
;
99 p
->high_ru
= p
->status_size
- 1;
102 RF_Malloc(p
->status
, p
->status_size
* sizeof(RF_ReconMapListElem_t
*), (RF_ReconMapListElem_t
**));
103 RF_ASSERT(p
->status
!= (RF_ReconMapListElem_t
**) NULL
);
105 (void) memset((char *) p
->status
, 0,
106 p
->status_size
* sizeof(RF_ReconMapListElem_t
*));
108 pool_init(&p
->elem_pool
, sizeof(RF_ReconMapListElem_t
), 0,
109 0, 0, "raidreconpl", NULL
, IPL_BIO
);
110 pool_prime(&p
->elem_pool
, RF_NUM_RECON_POOL_ELEM
);
112 rf_mutex_init(&p
->mutex
);
117 /*---------------------------------------------------------------------------
119 * marks a new set of sectors as reconstructed. All the possible
120 * mergings get complicated. To simplify matters, the approach I take
121 * is to just dump something into the list, and then clean it up
122 * (i.e. merge elements and eliminate redundant ones) in a second pass
123 * over the list (compact_stat_entry()). Not 100% efficient, since a
124 * structure can be allocated and then immediately freed, but it keeps
125 * this code from becoming (more of) a nightmare of special cases.
126 * The only thing that compact_stat_entry() assumes is that the list
127 * is sorted by startSector, and so this is the only condition I
128 * maintain here. (MCH)
130 * This code now uses a pool instead of the previous malloc/free
132 *-------------------------------------------------------------------------*/
135 rf_ReconMapUpdate(RF_Raid_t
*raidPtr
, RF_ReconMap_t
*mapPtr
,
136 RF_SectorNum_t startSector
, RF_SectorNum_t stopSector
)
138 RF_SectorCount_t sectorsPerReconUnit
= mapPtr
->sectorsPerReconUnit
;
139 RF_SectorNum_t i
, first_in_RU
, last_in_RU
, ru
;
140 RF_ReconMapListElem_t
*p
, *pt
;
142 RF_LOCK_MUTEX(mapPtr
->mutex
);
143 while(mapPtr
->lock
) {
144 ltsleep(&mapPtr
->lock
, PRIBIO
, "reconupdate", 0,
148 RF_UNLOCK_MUTEX(mapPtr
->mutex
);
149 RF_ASSERT(startSector
>= 0 && stopSector
< mapPtr
->sectorsInDisk
&&
150 stopSector
>= startSector
);
152 while (startSector
<= stopSector
) {
153 i
= startSector
/ mapPtr
->sectorsPerReconUnit
;
154 first_in_RU
= i
* sectorsPerReconUnit
;
155 last_in_RU
= first_in_RU
+ sectorsPerReconUnit
- 1;
157 /* do we need to move the queue? */
158 while (i
> mapPtr
->high_ru
) {
160 if (mapPtr
->status
[mapPtr
->head
]!=RU_ALL
) {
161 printf("\nraid%d: reconmap incorrect -- working on i %" PRIu64
"\n",
163 printf("raid%d: ru %" PRIu64
" not completed!!!\n",
164 raidPtr
->raidid
, mapPtr
->head
);
166 printf("raid%d: low: %" PRIu64
" high: %" PRIu64
"\n",
167 raidPtr
->raidid
, mapPtr
->low_ru
, mapPtr
->high_ru
);
169 panic("reconmap incorrect");
174 /* initialize "highest" RU status entry, which
175 will take over the current head postion */
176 mapPtr
->status
[mapPtr
->head
]=RU_NOTHING
;
180 if (mapPtr
->head
>= mapPtr
->status_size
)
185 ru
= i
- mapPtr
->low_ru
+ mapPtr
->head
;
186 if (ru
>= mapPtr
->status_size
)
187 ru
= ru
- mapPtr
->status_size
;
189 if ((ru
< 0) || (ru
>= mapPtr
->status_size
)) {
190 printf("raid%d: ru is bogus %" PRIu64
"%" PRIu64
"%" PRIu64
"%" PRIu64
"%" PRIu64
"\n",
191 raidPtr
->raidid
, i
, ru
, mapPtr
->head
, mapPtr
->low_ru
, mapPtr
->high_ru
);
192 panic("bogus ru in reconmap");
195 p
= mapPtr
->status
[ru
];
197 if (p
== RU_NOTHING
|| p
->startSector
> startSector
) {
198 /* insert at front of list */
200 mapPtr
->status
[ru
] = MakeReconMapListElem(mapPtr
,startSector
, RF_MIN(stopSector
, last_in_RU
), (p
== RU_NOTHING
) ? NULL
: p
);
202 } else {/* general case */
203 do { /* search for place to insert */
206 } while (p
&& (p
->startSector
< startSector
));
207 pt
->next
= MakeReconMapListElem(mapPtr
,startSector
, RF_MIN(stopSector
, last_in_RU
), p
);
210 compact_stat_entry(raidPtr
, mapPtr
, i
, ru
);
212 startSector
= RF_MIN(stopSector
, last_in_RU
) + 1;
214 RF_LOCK_MUTEX(mapPtr
->mutex
);
216 wakeup(&mapPtr
->lock
);
217 RF_UNLOCK_MUTEX(mapPtr
->mutex
);
222 /*---------------------------------------------------------------------------
224 * performs whatever list compactions can be done, and frees any space
225 * that is no longer necessary. Assumes only that the list is sorted
226 * by startSector. crunch_list() compacts a single list as much as
227 * possible, and the second block of code deletes the entire list if
228 * possible. crunch_list() is also called from
229 * MakeReconMapAccessList().
231 * When a recon unit is detected to be fully reconstructed, we set the
232 * corresponding bit in the parity stripe map so that the head follow
233 * code will not select this parity stripe again. This is redundant
234 * (but harmless) when compact_stat_entry is called from the
235 * reconstruction code, but necessary when called from the user-write
238 *-------------------------------------------------------------------------*/
241 compact_stat_entry(RF_Raid_t
*raidPtr
, RF_ReconMap_t
*mapPtr
, int i
, int j
)
243 RF_SectorCount_t sectorsPerReconUnit
= mapPtr
->sectorsPerReconUnit
;
244 RF_ReconMapListElem_t
*p
= mapPtr
->status
[j
];
246 crunch_list(mapPtr
, p
);
248 if ((p
->startSector
== i
* sectorsPerReconUnit
) &&
249 (p
->stopSector
== i
* sectorsPerReconUnit
+
250 sectorsPerReconUnit
- 1)) {
251 mapPtr
->status
[j
] = RU_ALL
;
253 FreeReconMapListElem(mapPtr
, p
);
259 crunch_list(RF_ReconMap_t
*mapPtr
, RF_ReconMapListElem_t
*listPtr
)
261 RF_ReconMapListElem_t
*pt
, *p
= listPtr
;
268 if (pt
->stopSector
>= p
->startSector
- 1) {
269 pt
->stopSector
= RF_MAX(pt
->stopSector
, p
->stopSector
);
271 FreeReconMapListElem(mapPtr
, p
);
279 /*---------------------------------------------------------------------------
281 * Allocate and fill a new list element
283 *-------------------------------------------------------------------------*/
285 static RF_ReconMapListElem_t
*
286 MakeReconMapListElem(RF_ReconMap_t
*mapPtr
, RF_SectorNum_t startSector
,
287 RF_SectorNum_t stopSector
, RF_ReconMapListElem_t
*next
)
289 RF_ReconMapListElem_t
*p
;
291 p
= pool_get(&mapPtr
->elem_pool
, PR_WAITOK
);
292 p
->startSector
= startSector
;
293 p
->stopSector
= stopSector
;
297 /*---------------------------------------------------------------------------
299 * Free a list element
301 *-------------------------------------------------------------------------*/
304 FreeReconMapListElem(RF_ReconMap_t
*mapPtr
, RF_ReconMapListElem_t
*p
)
306 pool_put(&mapPtr
->elem_pool
, p
);
308 /*---------------------------------------------------------------------------
310 * Free an entire status structure. Inefficient, but can be called at
313 *-------------------------------------------------------------------------*/
315 rf_FreeReconMap(RF_ReconMap_t
*mapPtr
)
317 RF_ReconMapListElem_t
*p
, *q
;
318 RF_ReconUnitCount_t numRUs
;
321 numRUs
= mapPtr
->sectorsInDisk
/ mapPtr
->sectorsPerReconUnit
;
322 if (mapPtr
->sectorsInDisk
% mapPtr
->sectorsPerReconUnit
)
325 for (i
= 0; i
< mapPtr
->status_size
; i
++) {
326 p
= mapPtr
->status
[i
];
327 while (p
!= RU_NOTHING
&& p
!= RU_ALL
) {
330 RF_Free(q
, sizeof(*q
));
334 pool_destroy(&mapPtr
->elem_pool
);
335 RF_Free(mapPtr
->status
, mapPtr
->status_size
*
336 sizeof(RF_ReconMapListElem_t
*));
337 RF_Free(mapPtr
, sizeof(RF_ReconMap_t
));
339 /*---------------------------------------------------------------------------
341 * returns nonzero if the indicated RU has been reconstructed already
343 *-------------------------------------------------------------------------*/
346 rf_CheckRUReconstructed(RF_ReconMap_t
*mapPtr
, RF_SectorNum_t startSector
)
351 i
= startSector
/ mapPtr
->sectorsPerReconUnit
;
353 if (i
< mapPtr
->low_ru
)
355 else if (i
> mapPtr
->high_ru
)
358 i
= i
- mapPtr
->low_ru
+ mapPtr
->head
;
359 if (i
>= mapPtr
->status_size
)
360 i
= i
- mapPtr
->status_size
;
361 if (mapPtr
->status
[i
] == RU_ALL
)
371 rf_UnitsLeftToReconstruct(RF_ReconMap_t
*mapPtr
)
373 RF_ASSERT(mapPtr
!= NULL
);
374 return (mapPtr
->unitsLeft
);
379 rf_PrintReconSchedule(RF_ReconMap_t
*mapPtr
, struct timeval
*starttime
)
381 static int old_pctg
= -1;
382 struct timeval tv
, diff
;
385 new_pctg
= 100 - (rf_UnitsLeftToReconstruct(mapPtr
) *
386 100 / mapPtr
->totalRUs
);
387 if (new_pctg
!= old_pctg
) {
389 RF_TIMEVAL_DIFF(starttime
, &tv
, &diff
);
390 printf("%d %d.%06d\n", (int) new_pctg
, (int) diff
.tv_sec
,