1 /* $NetBSD: rf_reconbuffer.c,v 1.23 2005/12/11 12:23:37 christos Exp $ */
3 * Copyright (c) 1995 Carnegie-Mellon University.
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
18 * Carnegie Mellon requests users of this software to return to
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
29 /***************************************************
31 * rf_reconbuffer.c -- reconstruction buffer manager
33 ***************************************************/
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.23 2005/12/11 12:23:37 christos Exp $");
39 #include "rf_reconbuffer.h"
40 #include "rf_acctrace.h"
41 #include "rf_etimer.h"
42 #include "rf_general.h"
43 #include "rf_revent.h"
44 #include "rf_reconutil.h"
45 #include "rf_nwayxor.h"
49 #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
50 #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
51 #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
52 #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
53 #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
57 #define Dprintf1(s,a) {}
58 #define Dprintf2(s,a,b) {}
59 #define Dprintf3(s,a,b,c) {}
60 #define Dprintf4(s,a,b,c,d) {}
61 #define Dprintf5(s,a,b,c,d,e) {}
65 /*****************************************************************************
67 * Submit a reconstruction buffer to the manager for XOR. We can only
68 * submit a buffer if (1) we can xor into an existing buffer, which
69 * means we don't have to acquire a new one, (2) we can acquire a
70 * floating recon buffer, or (3) the caller has indicated that we are
71 * allowed to keep the submitted buffer.
73 * Returns non-zero if and only if we were not able to submit.
74 * In this case, we append the current disk ID to the wait list on the
75 * indicated RU, so that it will be re-enabled when we acquire a buffer
78 ****************************************************************************/
81 * nWayXorFuncs[i] is a pointer to a function that will xor "i"
82 * bufs into the accumulating sum.
84 static const RF_VoidFuncPtr nWayXorFuncs
[] = {
86 (RF_VoidFuncPtr
) rf_nWayXor1
,
87 (RF_VoidFuncPtr
) rf_nWayXor2
,
88 (RF_VoidFuncPtr
) rf_nWayXor3
,
89 (RF_VoidFuncPtr
) rf_nWayXor4
,
90 (RF_VoidFuncPtr
) rf_nWayXor5
,
91 (RF_VoidFuncPtr
) rf_nWayXor6
,
92 (RF_VoidFuncPtr
) rf_nWayXor7
,
93 (RF_VoidFuncPtr
) rf_nWayXor8
,
94 (RF_VoidFuncPtr
) rf_nWayXor9
98 * rbuf - the recon buffer to submit
99 * keep_it - whether we can keep this buffer or we have to return it
100 * use_committed - whether to use a committed or an available recon buffer
103 rf_SubmitReconBuffer(RF_ReconBuffer_t
*rbuf
, int keep_it
, int use_committed
)
105 const RF_LayoutSW_t
*lp
;
108 lp
= rbuf
->raidPtr
->Layout
.map
;
109 rc
= lp
->SubmitReconBuffer(rbuf
, keep_it
, use_committed
);
114 * rbuf - the recon buffer to submit
115 * keep_it - whether we can keep this buffer or we have to return it
116 * use_committed - whether to use a committed or an available recon buffer
119 rf_SubmitReconBufferBasic(RF_ReconBuffer_t
*rbuf
, int keep_it
,
122 RF_Raid_t
*raidPtr
= rbuf
->raidPtr
;
123 RF_RaidLayout_t
*layoutPtr
= &raidPtr
->Layout
;
124 RF_ReconCtrl_t
*reconCtrlPtr
= raidPtr
->reconControl
;
125 RF_ReconParityStripeStatus_t
*pssPtr
;
126 RF_ReconBuffer_t
*targetRbuf
, *t
= NULL
; /* temporary rbuf
128 void *ta
; /* temporary data buffer pointer */
129 RF_CallbackDesc_t
*cb
, *p
;
134 /* makes no sense to have a submission from the failed disk */
136 RF_ASSERT(rbuf
->col
!= reconCtrlPtr
->fcol
);
138 Dprintf4("RECON: submission by col %d for psid %ld ru %d (failed offset %ld)\n",
139 rbuf
->col
, (long) rbuf
->parityStripeID
, rbuf
->which_ru
, (long) rbuf
->failedDiskSectorOffset
);
141 RF_LOCK_PSS_MUTEX(raidPtr
, rbuf
->parityStripeID
);
143 RF_LOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
144 while(reconCtrlPtr
->rb_lock
) {
145 ltsleep(&reconCtrlPtr
->rb_lock
, PRIBIO
, "reconctlcnmhs", 0, &reconCtrlPtr
->rb_mutex
);
147 reconCtrlPtr
->rb_lock
= 1;
148 RF_UNLOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
150 pssPtr
= rf_LookupRUStatus(raidPtr
, reconCtrlPtr
->pssTable
, rbuf
->parityStripeID
, rbuf
->which_ru
, RF_PSS_NONE
, NULL
);
151 RF_ASSERT(pssPtr
); /* if it didn't exist, we wouldn't have gotten
154 /* check to see if enough buffers have accumulated to do an XOR. If
155 * so, there's no need to acquire a floating rbuf. Before we can do
156 * any XORing, we must have acquired a destination buffer. If we
157 * have, then we can go ahead and do the XOR if (1) including this
158 * buffer, enough bufs have accumulated, or (2) this is the last
159 * submission for this stripe. Otherwise, we have to go acquire a
162 targetRbuf
= (RF_ReconBuffer_t
*) pssPtr
->rbuf
;
163 if ((targetRbuf
!= NULL
) &&
164 ((pssPtr
->xorBufCount
== rf_numBufsToAccumulate
- 1) || (targetRbuf
->count
+ pssPtr
->xorBufCount
+ 1 == layoutPtr
->numDataCol
))) {
165 pssPtr
->rbufsForXor
[pssPtr
->xorBufCount
++] = rbuf
; /* install this buffer */
166 Dprintf2("RECON: col %d invoking a %d-way XOR\n", rbuf
->col
, pssPtr
->xorBufCount
);
167 RF_ETIMER_START(timer
);
168 rf_MultiWayReconXor(raidPtr
, pssPtr
);
169 RF_ETIMER_STOP(timer
);
170 RF_ETIMER_EVAL(timer
);
171 raidPtr
->accumXorTimeUs
+= RF_ETIMER_VAL_US(timer
);
174 raidPtr
->recon_tracerecs
[rbuf
->col
].xor_us
= RF_ETIMER_VAL_US(timer
);
175 RF_ETIMER_STOP(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
176 RF_ETIMER_EVAL(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
177 raidPtr
->recon_tracerecs
[rbuf
->col
].specific
.recon
.recon_return_to_submit_us
+=
178 RF_ETIMER_VAL_US(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
179 RF_ETIMER_START(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
181 rf_LogTraceRec(raidPtr
, &raidPtr
->recon_tracerecs
[rbuf
->col
]);
184 rf_CheckForFullRbuf(raidPtr
, reconCtrlPtr
, pssPtr
, layoutPtr
->numDataCol
);
186 /* if use_committed is on, we _must_ consume a buffer off the
189 t
= reconCtrlPtr
->committedRbufs
;
191 reconCtrlPtr
->committedRbufs
= t
->next
;
192 rf_ReleaseFloatingReconBuffer(raidPtr
, t
);
195 RF_UNLOCK_PSS_MUTEX(raidPtr
, rbuf
->parityStripeID
);
196 RF_LOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
197 reconCtrlPtr
->rb_lock
= 0;
198 wakeup(&reconCtrlPtr
->rb_lock
);
199 RF_UNLOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
200 rf_FreeReconBuffer(rbuf
);
205 /* set the value of "t", which we'll use as the rbuf from here on */
209 if (use_committed
) { /* if a buffer has been committed to
211 t
= reconCtrlPtr
->committedRbufs
;
213 reconCtrlPtr
->committedRbufs
= t
->next
;
216 if (reconCtrlPtr
->floatingRbufs
) {
217 t
= reconCtrlPtr
->floatingRbufs
;
218 reconCtrlPtr
->floatingRbufs
= t
->next
;
223 /* If we weren't able to acquire a buffer, append to the end of the
224 * buf list in the recon ctrl struct. */
226 RF_ASSERT(!keep_it
&& !use_committed
);
227 Dprintf1("RECON: col %d failed to acquire floating rbuf\n", rbuf
->col
);
229 raidPtr
->procsInBufWait
++;
230 if ((raidPtr
->procsInBufWait
== raidPtr
->numCol
- 1) && (raidPtr
->numFullReconBuffers
== 0)) {
231 printf("Buffer wait deadlock detected. Exiting.\n");
232 rf_PrintPSStatusTable(raidPtr
);
235 pssPtr
->flags
|= RF_PSS_BUFFERWAIT
;
236 cb
= rf_AllocCallbackDesc(); /* append to buf wait list in
237 * recon ctrl structure */
239 cb
->callbackArg
.v
= rbuf
->parityStripeID
;
241 if (!reconCtrlPtr
->bufferWaitList
)
242 reconCtrlPtr
->bufferWaitList
= cb
;
243 else { /* might want to maintain head/tail pointers
244 * here rather than search for end of list */
245 for (p
= reconCtrlPtr
->bufferWaitList
; p
->next
; p
= p
->next
);
251 Dprintf1("RECON: col %d acquired rbuf\n", rbuf
->col
);
253 RF_ETIMER_STOP(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
254 RF_ETIMER_EVAL(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
255 raidPtr
->recon_tracerecs
[rbuf
->col
].specific
.recon
.recon_return_to_submit_us
+=
256 RF_ETIMER_VAL_US(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
257 RF_ETIMER_START(raidPtr
->recon_tracerecs
[rbuf
->col
].recon_timer
);
259 rf_LogTraceRec(raidPtr
, &raidPtr
->recon_tracerecs
[rbuf
->col
]);
262 /* initialize the buffer */
264 t
->col
= reconCtrlPtr
->fcol
;
265 t
->parityStripeID
= rbuf
->parityStripeID
;
266 t
->which_ru
= rbuf
->which_ru
;
267 t
->failedDiskSectorOffset
= rbuf
->failedDiskSectorOffset
;
268 t
->spCol
= rbuf
->spCol
;
269 t
->spOffset
= rbuf
->spOffset
;
272 t
->buffer
= rbuf
->buffer
;
273 rbuf
->buffer
= ta
; /* swap buffers */
275 /* the first installation always gets installed as the destination
276 * buffer. subsequent installations get stacked up to allow for
282 pssPtr
->rbufsForXor
[pssPtr
->xorBufCount
++] = t
; /* install this buffer */
284 rf_CheckForFullRbuf(raidPtr
, reconCtrlPtr
, pssPtr
, layoutPtr
->numDataCol
); /* the buffer is full if
288 RF_UNLOCK_PSS_MUTEX(raidPtr
, rbuf
->parityStripeID
);
289 RF_LOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
290 reconCtrlPtr
->rb_lock
= 0;
291 wakeup(&reconCtrlPtr
->rb_lock
);
292 RF_UNLOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
295 /* pssPtr - the pss descriptor for this parity stripe */
297 rf_MultiWayReconXor(RF_Raid_t
*raidPtr
, RF_ReconParityStripeStatus_t
*pssPtr
)
299 int i
, numBufs
= pssPtr
->xorBufCount
;
300 int numBytes
= rf_RaidAddressToByte(raidPtr
, raidPtr
->Layout
.sectorsPerStripeUnit
* raidPtr
->Layout
.SUsPerRU
);
301 RF_ReconBuffer_t
**rbufs
= (RF_ReconBuffer_t
**) pssPtr
->rbufsForXor
;
302 RF_ReconBuffer_t
*targetRbuf
= (RF_ReconBuffer_t
*) pssPtr
->rbuf
;
304 RF_ASSERT(pssPtr
->rbuf
!= NULL
);
305 RF_ASSERT(numBufs
> 0 && numBufs
< RF_PS_MAX_BUFS
);
308 thread_block(); /* yield the processor before doing a big XOR */
314 * What if more than 9 bufs?
316 nWayXorFuncs
[numBufs
] (pssPtr
->rbufsForXor
, targetRbuf
, numBytes
/ sizeof(long));
318 /* release all the reconstruction buffers except the last one, which
319 * belongs to the disk whose submission caused this XOR to take place */
320 for (i
= 0; i
< numBufs
- 1; i
++) {
321 if (rbufs
[i
]->type
== RF_RBUF_TYPE_FLOATING
)
322 rf_ReleaseFloatingReconBuffer(raidPtr
, rbufs
[i
]);
324 if (rbufs
[i
]->type
== RF_RBUF_TYPE_FORCED
)
325 rf_FreeReconBuffer(rbufs
[i
]);
329 targetRbuf
->count
+= pssPtr
->xorBufCount
;
330 pssPtr
->xorBufCount
= 0;
333 /* removes one full buffer from one of the full-buffer lists and returns it.
335 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
338 rf_GetFullReconBuffer(RF_ReconCtrl_t
*reconCtrlPtr
)
342 RF_LOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
343 while(reconCtrlPtr
->rb_lock
) {
344 ltsleep(&reconCtrlPtr
->rb_lock
, PRIBIO
, "reconctlcnmhs", 0, &reconCtrlPtr
->rb_mutex
);
346 reconCtrlPtr
->rb_lock
= 1;
347 RF_UNLOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
349 if ((p
= reconCtrlPtr
->fullBufferList
) != NULL
) {
350 reconCtrlPtr
->fullBufferList
= p
->next
;
353 RF_LOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
354 reconCtrlPtr
->rb_lock
= 0;
355 wakeup(&reconCtrlPtr
->rb_lock
);
356 RF_UNLOCK_MUTEX(reconCtrlPtr
->rb_mutex
);
361 /* if the reconstruction buffer is full, move it to the full list,
362 * which is maintained sorted by failed disk sector offset
364 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */
366 rf_CheckForFullRbuf(RF_Raid_t
*raidPtr
, RF_ReconCtrl_t
*reconCtrl
,
367 RF_ReconParityStripeStatus_t
*pssPtr
, int numDataCol
)
369 RF_ReconBuffer_t
*p
, *pt
, *rbuf
= (RF_ReconBuffer_t
*) pssPtr
->rbuf
;
371 if (rbuf
->count
== numDataCol
) {
372 raidPtr
->numFullReconBuffers
++;
373 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
374 (long) rbuf
->parityStripeID
, rbuf
->which_ru
);
375 if (!reconCtrl
->fullBufferList
|| (rbuf
->failedDiskSectorOffset
< reconCtrl
->fullBufferList
->failedDiskSectorOffset
)) {
376 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
377 (long) rbuf
->parityStripeID
, rbuf
->which_ru
);
378 rbuf
->next
= reconCtrl
->fullBufferList
;
379 reconCtrl
->fullBufferList
= rbuf
;
381 for (pt
= reconCtrl
->fullBufferList
, p
= pt
->next
; p
&& p
->failedDiskSectorOffset
< rbuf
->failedDiskSectorOffset
; pt
= p
, p
= p
->next
);
384 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
385 (long) rbuf
->parityStripeID
, rbuf
->which_ru
);
387 rbuf
->pssPtr
= pssPtr
;
389 rf_CauseReconEvent(raidPtr
, rbuf
->col
, NULL
, RF_REVENT_BUFREADY
);
395 /* release a floating recon buffer for someone else to use.
396 * assumes the rb_mutex is LOCKED at entry
399 rf_ReleaseFloatingReconBuffer(RF_Raid_t
*raidPtr
, RF_ReconBuffer_t
*rbuf
)
401 RF_ReconCtrl_t
*rcPtr
= raidPtr
->reconControl
;
402 RF_CallbackDesc_t
*cb
;
404 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
405 (long) rbuf
->parityStripeID
, rbuf
->which_ru
);
407 /* if anyone is waiting on buffers, wake one of them up. They will
408 * subsequently wake up anyone else waiting on their RU */
409 if (rcPtr
->bufferWaitList
) {
410 rbuf
->next
= rcPtr
->committedRbufs
;
411 rcPtr
->committedRbufs
= rbuf
;
412 cb
= rcPtr
->bufferWaitList
;
413 rcPtr
->bufferWaitList
= cb
->next
;
414 rf_CauseReconEvent(raidPtr
, cb
->col
, (void *) 1, RF_REVENT_BUFCLEAR
); /* arg==1 => we've
415 * committed a buffer */
416 rf_FreeCallbackDesc(cb
);
417 raidPtr
->procsInBufWait
--;
419 rbuf
->next
= rcPtr
->floatingRbufs
;
420 rcPtr
->floatingRbufs
= rbuf
;