1 /* libbdev - IPC and recovery functions */
3 #include <minix/drivers.h>
4 #include <minix/bdev.h>
11 static void bdev_cancel(dev_t dev
)
13 /* Recovering the driver for the given device has failed repeatedly. Mark it as
14 * permanently unusable, and clean up any associated calls and resources.
16 bdev_call_t
*call
, *next
;
18 printf("bdev: giving up on major %d\n", major(dev
));
20 /* Cancel all pending asynchronous requests. */
23 while ((call
= bdev_call_iter_maj(dev
, call
, &next
)) != NULL
)
24 bdev_callback_asyn(call
, EDEADSRCDST
);
26 /* Mark the driver as unusable. */
27 bdev_driver_clear(dev
);
30 static int bdev_recover(dev_t dev
, int update_endpt
)
32 /* The IPC subsystem has signaled an error communicating to the driver
33 * associated with the given device. Try to recover. If 'update_endpt' is set,
34 * we need to find the new endpoint of the driver first. Return TRUE iff
35 * recovery has been successful.
37 bdev_call_t
*call
, *next
;
39 int r
, active
, nr_tries
;
41 /* Only print output if there is something to recover. Some drivers may be
42 * shut down and later restarted legitimately, and if they were not in use
43 * while that happened, there is no need to flood the console with messages.
45 active
= bdev_minor_is_open(dev
) || bdev_call_iter_maj(dev
, NULL
, &next
);
48 printf("bdev: recovering from a driver restart on major %d\n",
51 for (nr_tries
= 0; nr_tries
< RECOVER_TRIES
; nr_tries
++) {
52 /* First update the endpoint, if necessary. */
54 (void) bdev_driver_update(dev
);
56 if ((endpt
= bdev_driver_get(dev
)) == NONE
)
59 /* If anything goes wrong, update the endpoint again next time. */
62 /* Reopen all minor devices on the new driver. */
63 if ((r
= bdev_minor_reopen(dev
)) != OK
) {
64 /* If the driver died again, we may give it another try. */
68 /* If another error occurred, we cannot continue using the
69 * driver as is, but we also cannot force it to restart.
74 /* Resend all asynchronous requests. */
77 while ((call
= bdev_call_iter_maj(dev
, call
, &next
)) != NULL
) {
78 /* It is not strictly necessary that we manage to reissue all
79 * asynchronous requests successfully. We can fail them on an
80 * individual basis here, without affecting the overall
81 * recovery. Note that we will never get new IPC failures here.
83 if ((r
= bdev_restart_asyn(call
)) != OK
)
84 bdev_callback_asyn(call
, r
);
87 /* Recovery seems successful. We can now reissue the current
88 * synchronous request (if any), and continue normal operation.
91 printf("bdev: recovery successful, new driver at %d\n", endpt
);
96 /* Recovery failed repeatedly. Give up on this driver. */
102 void bdev_update(dev_t dev
, char *label
)
104 /* Set the endpoint for a driver. Perform recovery if necessary.
106 endpoint_t endpt
, old_endpt
;
108 old_endpt
= bdev_driver_get(dev
);
110 endpt
= bdev_driver_set(dev
, label
);
112 /* If updating the driver causes an endpoint change, we need to perform
113 * recovery, but not update the endpoint yet again.
115 if (old_endpt
!= NONE
&& old_endpt
!= endpt
)
116 bdev_recover(dev
, FALSE
/*update_endpt*/);
119 int bdev_senda(dev_t dev
, const message
*m_orig
, bdev_id_t id
)
121 /* Send an asynchronous request for the given device. This function will never
122 * get any new IPC errors sending to the driver. If sending an asynchronous
123 * request fails, we will find out through other ways later.
129 /* If we have no usable driver endpoint, fail instantly. */
130 if ((endpt
= bdev_driver_get(dev
)) == NONE
)
134 m
.m_lbdev_lblockdriver_msg
.id
= id
;
136 r
= asynsend(endpt
, &m
);
139 printf("bdev: asynsend to driver (%d) failed (%d)\n", endpt
, r
);
144 int bdev_sendrec(dev_t dev
, const message
*m_orig
)
146 /* Send a synchronous request for the given device, and wait for the reply.
147 * Return ERESTART if the caller should try to reissue the request.
153 /* If we have no usable driver endpoint, fail instantly. */
154 if ((endpt
= bdev_driver_get(dev
)) == NONE
)
157 /* Send the request and block until we receive a reply. */
159 m
.m_lbdev_lblockdriver_msg
.id
= NO_ID
;
161 r
= ipc_sendrec(endpt
, &m
);
163 /* If communication failed, the driver has died. We assume it will be
164 * restarted soon after, so we attempt recovery. Upon success, we let the
165 * caller reissue the synchronous request.
167 if (r
== EDEADSRCDST
) {
168 if (!bdev_recover(dev
, TRUE
/*update_endpt*/))
175 printf("bdev: IPC to driver (%d) failed (%d)\n", endpt
, r
);
179 if (m
.m_type
!= BDEV_REPLY
) {
180 printf("bdev: driver (%d) sent weird response (%d)\n",
185 /* The protocol contract states that no asynchronous reply can satisfy a
186 * synchronous SENDREC call, so we can never get an asynchronous reply here.
188 if (m
.m_lblockdriver_lbdev_reply
.id
!= NO_ID
) {
189 printf("bdev: driver (%d) sent invalid ID (%d)\n", endpt
,
190 m
.m_lblockdriver_lbdev_reply
.id
);
194 /* Unless the caller is misusing libbdev, we will only get ERESTART if we
195 * have managed to resend a raw block I/O request to the driver after a
196 * restart, but before VFS has had a chance to reopen the associated device
197 * first. This is highly exceptional, and hard to deal with correctly. We
198 * take the easiest route: sleep for a while so that VFS can reopen the
199 * device, and then resend the request. If the call keeps failing, the caller
200 * will eventually give up.
202 if (m
.m_lblockdriver_lbdev_reply
.status
== ERESTART
) {
203 printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
211 /* Return the result of our request. */
212 return m
.m_lblockdriver_lbdev_reply
.status
;
215 static int bdev_receive(dev_t dev
, message
*m
)
217 /* Receive one valid message.
223 /* Retrieve and check the driver endpoint on every try, as it will
224 * change with each driver restart.
226 if ((endpt
= bdev_driver_get(dev
)) == NONE
)
229 r
= sef_receive(endpt
, m
);
231 if (r
== EDEADSRCDST
) {
232 /* If we reached the maximum number of retries, give up. */
233 if (++nr_tries
== DRIVER_TRIES
)
236 /* Attempt recovery. If successful, all asynchronous requests
237 * will have been resent, and we can retry receiving a reply.
239 if (!bdev_recover(dev
, TRUE
/*update_endpt*/))
246 printf("bdev: IPC to driver (%d) failed (%d)\n", endpt
, r
);
251 if (m
->m_type
!= BDEV_REPLY
) {
252 printf("bdev: driver (%d) sent weird response (%d)\n",
257 /* The caller is responsible for checking the ID and status. */
261 /* All tries failed, even though all recovery attempts succeeded. In this
262 * case, we let the caller recheck whether it wants to keep calling us,
263 * returning ERESTART to indicate we can be called again but did not actually
269 void bdev_reply_asyn(message
*m
)
271 /* A reply has come in from a disk driver.
278 /* This is a requirement for the caller. */
279 assert(m
->m_type
== BDEV_REPLY
);
281 /* Get the corresponding asynchronous call structure. */
282 id
= m
->m_lblockdriver_lbdev_reply
.id
;
284 if ((call
= bdev_call_get(id
)) == NULL
) {
285 printf("bdev: driver (%d) replied to unknown request (%d)\n",
286 m
->m_source
, m
->m_lblockdriver_lbdev_reply
.id
);
290 /* Make sure the reply was sent from the right endpoint. */
291 endpt
= bdev_driver_get(call
->dev
);
293 if (m
->m_source
!= endpt
) {
294 /* If the endpoint is NONE, this may be a stray reply. */
296 printf("bdev: driver (%d) replied to request not sent to it\n",
301 /* See the ERESTART comment in bdev_sendrec(). */
302 if (m
->m_lblockdriver_lbdev_reply
.status
== ERESTART
) {
303 printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
308 if ((r
= bdev_restart_asyn(call
)) != OK
)
309 bdev_callback_asyn(call
, r
);
314 bdev_callback_asyn(call
, m
->m_lblockdriver_lbdev_reply
.status
);
317 int bdev_wait_asyn(bdev_id_t id
)
319 /* Wait for an asynchronous request to complete.
326 if ((call
= bdev_call_get(id
)) == NULL
)
332 if ((r
= bdev_receive(dev
, &m
)) != OK
&& r
!= ERESTART
)
335 /* Processing the reply will free up the call structure as a side
336 * effect. If we repeatedly get ERESTART, we will repeatedly resend the
337 * asynchronous request, which will then eventually hit the retry limit
338 * and we will break out of the loop.
343 } while (bdev_call_get(id
) != NULL
);