minix/lib/libbdev/ipc.c

   1 /* libbdev - IPC and recovery functions */
   2
   3 #include <minix/drivers.h>
   4 #include <minix/bdev.h>
   5 #include <assert.h>
   6
   7 #include "const.h"
   8 #include "type.h"
   9 #include "proto.h"
  10
  11 static void bdev_cancel(dev_t dev)
  12 {
  13 /* Recovering the driver for the given device has failed repeatedly. Mark it as
  14  * permanently unusable, and clean up any associated calls and resources.
  15  */
  16   bdev_call_t *call, *next;
  17
  18   printf("bdev: giving up on major %d\n", major(dev));
  19
  20   /* Cancel all pending asynchronous requests. */
  21   call = NULL;
  22
  23   while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL)
  24         bdev_callback_asyn(call, EDEADSRCDST);
  25
  26   /* Mark the driver as unusable. */
  27   bdev_driver_clear(dev);
  28 }
  29
  30 static int bdev_recover(dev_t dev, int update_endpt)
  31 {
  32 /* The IPC subsystem has signaled an error communicating to the driver
  33  * associated with the given device. Try to recover. If 'update_endpt' is set,
  34  * we need to find the new endpoint of the driver first. Return TRUE iff
  35  * recovery has been successful.
  36  */
  37   bdev_call_t *call, *next;
  38   endpoint_t endpt;
  39   int r, active, nr_tries;
  40
  41   /* Only print output if there is something to recover. Some drivers may be
  42    * shut down and later restarted legitimately, and if they were not in use
  43    * while that happened, there is no need to flood the console with messages.
  44    */
  45   active = bdev_minor_is_open(dev) || bdev_call_iter_maj(dev, NULL, &next);
  46
  47   if (active)
  48         printf("bdev: recovering from a driver restart on major %d\n",
  49                 major(dev));
  50
  51   for (nr_tries = 0; nr_tries < RECOVER_TRIES; nr_tries++) {
  52         /* First update the endpoint, if necessary. */
  53         if (update_endpt)
  54                 (void) bdev_driver_update(dev);
  55
  56         if ((endpt = bdev_driver_get(dev)) == NONE)
  57                 break;
  58
  59         /* If anything goes wrong, update the endpoint again next time. */
  60         update_endpt = TRUE;
  61
  62         /* Reopen all minor devices on the new driver. */
  63         if ((r = bdev_minor_reopen(dev)) != OK) {
  64                 /* If the driver died again, we may give it another try. */
  65                 if (r == EDEADSRCDST)
  66                         continue;
  67
  68                 /* If another error occurred, we cannot continue using the
  69                  * driver as is, but we also cannot force it to restart.
  70                  */
  71                 break;
  72         }
  73
  74         /* Resend all asynchronous requests. */
  75         call = NULL;
  76
  77         while ((call = bdev_call_iter_maj(dev, call, &next)) != NULL) {
  78                 /* It is not strictly necessary that we manage to reissue all
  79                  * asynchronous requests successfully. We can fail them on an
  80                  * individual basis here, without affecting the overall
  81                  * recovery. Note that we will never get new IPC failures here.
  82                  */
  83                 if ((r = bdev_restart_asyn(call)) != OK)
  84                         bdev_callback_asyn(call, r);
  85         }
  86
  87         /* Recovery seems successful. We can now reissue the current
  88          * synchronous request (if any), and continue normal operation.
  89          */
  90         if (active)
  91                 printf("bdev: recovery successful, new driver at %d\n", endpt);
  92
  93         return TRUE;
  94   }
  95
  96   /* Recovery failed repeatedly. Give up on this driver. */
  97   bdev_cancel(dev);
  98
  99   return FALSE;
 100 }
 101
 102 void bdev_update(dev_t dev, char *label)
 103 {
 104 /* Set the endpoint for a driver. Perform recovery if necessary.
 105  */
 106   endpoint_t endpt, old_endpt;
 107
 108   old_endpt = bdev_driver_get(dev);
 109
 110   endpt = bdev_driver_set(dev, label);
 111
 112   /* If updating the driver causes an endpoint change, we need to perform
 113    * recovery, but not update the endpoint yet again.
 114    */
 115   if (old_endpt != NONE && old_endpt != endpt)
 116         bdev_recover(dev, FALSE /*update_endpt*/);
 117 }
 118
 119 int bdev_senda(dev_t dev, const message *m_orig, bdev_id_t id)
 120 {
 121 /* Send an asynchronous request for the given device. This function will never
 122  * get any new IPC errors sending to the driver. If sending an asynchronous
 123  * request fails, we will find out through other ways later.
 124  */
 125   endpoint_t endpt;
 126   message m;
 127   int r;
 128
 129   /* If we have no usable driver endpoint, fail instantly. */
 130   if ((endpt = bdev_driver_get(dev)) == NONE)
 131         return EDEADSRCDST;
 132
 133   m = *m_orig;
 134   m.m_lbdev_lblockdriver_msg.id = id;
 135
 136   r = asynsend(endpt, &m);
 137
 138   if (r != OK)
 139         printf("bdev: asynsend to driver (%d) failed (%d)\n", endpt, r);
 140
 141   return r;
 142 }
 143
 144 int bdev_sendrec(dev_t dev, const message *m_orig)
 145 {
 146 /* Send a synchronous request for the given device, and wait for the reply.
 147  * Return ERESTART if the caller should try to reissue the request.
 148  */
 149   endpoint_t endpt;
 150   message m;
 151   int r;
 152
 153   /* If we have no usable driver endpoint, fail instantly. */
 154   if ((endpt = bdev_driver_get(dev)) == NONE)
 155         return EDEADSRCDST;
 156
 157   /* Send the request and block until we receive a reply. */
 158   m = *m_orig;
 159   m.m_lbdev_lblockdriver_msg.id = NO_ID;
 160
 161   r = ipc_sendrec(endpt, &m);
 162
 163   /* If communication failed, the driver has died. We assume it will be
 164    * restarted soon after, so we attempt recovery. Upon success, we let the
 165    * caller reissue the synchronous request.
 166    */
 167   if (r == EDEADSRCDST) {
 168         if (!bdev_recover(dev, TRUE /*update_endpt*/))
 169                 return EDEADSRCDST;
 170
 171         return ERESTART;
 172   }
 173
 174   if (r != OK) {
 175         printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r);
 176         return r;
 177   }
 178
 179   if (m.m_type != BDEV_REPLY) {
 180         printf("bdev: driver (%d) sent weird response (%d)\n",
 181                 endpt, m.m_type);
 182         return EINVAL;
 183   }
 184
 185   /* The protocol contract states that no asynchronous reply can satisfy a
 186    * synchronous SENDREC call, so we can never get an asynchronous reply here.
 187    */
 188   if (m.m_lblockdriver_lbdev_reply.id != NO_ID) {
 189         printf("bdev: driver (%d) sent invalid ID (%d)\n", endpt,
 190                 m.m_lblockdriver_lbdev_reply.id);
 191         return EINVAL;
 192   }
 193
 194   /* Unless the caller is misusing libbdev, we will only get ERESTART if we
 195    * have managed to resend a raw block I/O request to the driver after a
 196    * restart, but before VFS has had a chance to reopen the associated device
 197    * first. This is highly exceptional, and hard to deal with correctly. We
 198    * take the easiest route: sleep for a while so that VFS can reopen the
 199    * device, and then resend the request. If the call keeps failing, the caller
 200    * will eventually give up.
 201    */
 202   if (m.m_lblockdriver_lbdev_reply.status == ERESTART) {
 203         printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
 204                 endpt);
 205
 206         micro_delay(1000);
 207
 208         return ERESTART;
 209   }
 210
 211   /* Return the result of our request. */
 212   return m.m_lblockdriver_lbdev_reply.status;
 213 }
 214
 215 static int bdev_receive(dev_t dev, message *m)
 216 {
 217 /* Receive one valid message.
 218  */
 219   endpoint_t endpt;
 220   int r, nr_tries = 0;
 221
 222   for (;;) {
 223         /* Retrieve and check the driver endpoint on every try, as it will
 224          * change with each driver restart.
 225          */
 226         if ((endpt = bdev_driver_get(dev)) == NONE)
 227                 return EDEADSRCDST;
 228
 229         r = sef_receive(endpt, m);
 230
 231         if (r == EDEADSRCDST) {
 232                 /* If we reached the maximum number of retries, give up. */
 233                 if (++nr_tries == DRIVER_TRIES)
 234                         break;
 235
 236                 /* Attempt recovery. If successful, all asynchronous requests
 237                  * will have been resent, and we can retry receiving a reply.
 238                  */
 239                 if (!bdev_recover(dev, TRUE /*update_endpt*/))
 240                         return EDEADSRCDST;
 241
 242                 continue;
 243         }
 244
 245         if (r != OK) {
 246                 printf("bdev: IPC to driver (%d) failed (%d)\n", endpt, r);
 247
 248                 return r;
 249         }
 250
 251         if (m->m_type != BDEV_REPLY) {
 252                 printf("bdev: driver (%d) sent weird response (%d)\n",
 253                         endpt, m->m_type);
 254                 return EINVAL;
 255         }
 256
 257         /* The caller is responsible for checking the ID and status. */
 258         return OK;
 259   }
 260
 261   /* All tries failed, even though all recovery attempts succeeded. In this
 262    * case, we let the caller recheck whether it wants to keep calling us,
 263    * returning ERESTART to indicate we can be called again but did not actually
 264    * receive a message.
 265    */
 266   return ERESTART;
 267 }
 268
 269 void bdev_reply_asyn(message *m)
 270 {
 271 /* A reply has come in from a disk driver.
 272  */
 273   bdev_call_t *call;
 274   endpoint_t endpt;
 275   bdev_id_t id;
 276   int r;
 277
 278   /* This is a requirement for the caller. */
 279   assert(m->m_type == BDEV_REPLY);
 280
 281   /* Get the corresponding asynchronous call structure. */
 282   id = m->m_lblockdriver_lbdev_reply.id;
 283
 284   if ((call = bdev_call_get(id)) == NULL) {
 285         printf("bdev: driver (%d) replied to unknown request (%d)\n",
 286                 m->m_source, m->m_lblockdriver_lbdev_reply.id);
 287         return;
 288   }
 289
 290   /* Make sure the reply was sent from the right endpoint. */
 291   endpt = bdev_driver_get(call->dev);
 292
 293   if (m->m_source != endpt) {
 294         /* If the endpoint is NONE, this may be a stray reply. */
 295         if (endpt != NONE)
 296                 printf("bdev: driver (%d) replied to request not sent to it\n",
 297                         m->m_source);
 298         return;
 299   }
 300
 301   /* See the ERESTART comment in bdev_sendrec(). */
 302   if (m->m_lblockdriver_lbdev_reply.status == ERESTART) {
 303         printf("bdev: got ERESTART from driver (%d), sleeping for reopen\n",
 304                 endpt);
 305
 306         micro_delay(1000);
 307
 308         if ((r = bdev_restart_asyn(call)) != OK)
 309                 bdev_callback_asyn(call, r);
 310
 311         return;
 312   }
 313
 314   bdev_callback_asyn(call, m->m_lblockdriver_lbdev_reply.status);
 315 }
 316
 317 int bdev_wait_asyn(bdev_id_t id)
 318 {
 319 /* Wait for an asynchronous request to complete.
 320  */
 321   bdev_call_t *call;
 322   dev_t dev;
 323   message m;
 324   int r;
 325
 326   if ((call = bdev_call_get(id)) == NULL)
 327         return ENOENT;
 328
 329   dev = call->dev;
 330
 331   do {
 332         if ((r = bdev_receive(dev, &m)) != OK && r != ERESTART)
 333                 return r;
 334
 335         /* Processing the reply will free up the call structure as a side
 336          * effect. If we repeatedly get ERESTART, we will repeatedly resend the
 337          * asynchronous request, which will then eventually hit the retry limit
 338          * and we will break out of the loop.
 339          */
 340         if (r == OK)
 341                 bdev_reply_asyn(&m);
 342
 343   } while (bdev_call_get(id) != NULL);
 344
 345   return OK;
 346 }