* added 0.99 linux version
[mascara-docs.git] / i386 / linux / linux-2.3.21 / drivers / block / nbd.c
blobab18b55b00a09e91c8005766c3ba0259e9d201e4
1 /*
2 * Network block device - make block devices work over TCP
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
6 *
7 * Copyright 1997 Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>
8 *
9 * (part of code stolen from loop.c)
11 * 97-3-25 compiled 0-th version, not yet tested it
12 * (it did not work, BTW) (later that day) HEY! it works!
13 * (bit later) hmm, not that much... 2:00am next day:
14 * yes, it works, but it gives something like 50kB/sec
15 * 97-4-01 complete rewrite to make it possible for many requests at
16 * once to be processed
17 * 97-4-11 Making protocol independent of endianity etc.
18 * 97-9-13 Cosmetic changes
19 * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
20 * 99-1-11 Attempt to make 64-bit-clean on 32-bit machines <ankry@mif.pg.gda.pl>
22 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
23 * why not: would need verify_area and friends, would share yet another
24 * structure with userland
27 #define PARANOIA
28 #include <linux/major.h>
30 #include <linux/module.h>
32 #include <linux/sched.h>
33 #include <linux/fs.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <linux/file.h>
37 #include <linux/ioctl.h>
39 #include <asm/segment.h>
40 #include <asm/uaccess.h>
41 #include <asm/types.h>
43 #define MAJOR_NR NBD_MAJOR
44 #include <linux/nbd.h>
46 #define LO_MAGIC 0x68797548
48 static int nbd_blksizes[MAX_NBD];
49 static int nbd_blksize_bits[MAX_NBD];
50 static int nbd_sizes[MAX_NBD];
51 static u64 nbd_bytesizes[MAX_NBD];
53 static struct nbd_device nbd_dev[MAX_NBD];
55 #define DEBUG( s )
56 /* #define DEBUG( s ) printk( s )
59 #ifdef PARANOIA
60 static int requests_in;
61 static int requests_out;
62 #endif
64 static int nbd_open(struct inode *inode, struct file *file)
66 int dev;
67 struct nbd_device *nbdev;
69 if (!inode)
70 return -EINVAL;
71 dev = MINOR(inode->i_rdev);
72 if (dev >= MAX_NBD)
73 return -ENODEV;
75 nbdev = &nbd_dev[dev];
76 nbd_dev[dev].refcnt++;
77 if (!(nbdev->flags & NBD_INITIALISED)) {
78 init_MUTEX(&nbdev->queue_lock);
79 nbdev->flags |= NBD_INITIALISED;
81 MOD_INC_USE_COUNT;
82 return 0;
86 * Send or receive packet.
88 static int nbd_xmit(int send, struct socket *sock, char *buf, int size)
90 mm_segment_t oldfs;
91 int result;
92 struct msghdr msg;
93 struct iovec iov;
94 unsigned long flags;
95 sigset_t oldset;
97 oldfs = get_fs();
98 set_fs(get_ds());
100 spin_lock_irqsave(&current->sigmask_lock, flags);
101 oldset = current->blocked;
102 sigfillset(&current->blocked);
103 recalc_sigpending(current);
104 spin_unlock_irqrestore(&current->sigmask_lock, flags);
107 do {
108 iov.iov_base = buf;
109 iov.iov_len = size;
110 msg.msg_name = NULL;
111 msg.msg_namelen = 0;
112 msg.msg_iov = &iov;
113 msg.msg_iovlen = 1;
114 msg.msg_control = NULL;
115 msg.msg_controllen = 0;
116 msg.msg_namelen = 0;
117 msg.msg_flags = 0;
119 if (send)
120 result = sock_sendmsg(sock, &msg, size);
121 else
122 result = sock_recvmsg(sock, &msg, size, 0);
124 if (result <= 0) {
125 #ifdef PARANOIA
126 printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
127 send ? "send" : "receive", (long) sock, (long) buf, size, result);
128 #endif
129 break;
131 size -= result;
132 buf += result;
133 } while (size > 0);
135 spin_lock_irqsave(&current->sigmask_lock, flags);
136 current->blocked = oldset;
137 recalc_sigpending(current);
138 spin_unlock_irqrestore(&current->sigmask_lock, flags);
140 set_fs(oldfs);
141 return result;
144 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
146 void nbd_send_req(struct socket *sock, struct request *req)
148 int result;
149 struct nbd_request request;
151 DEBUG("NBD: sending control, ");
152 request.magic = htonl(NBD_REQUEST_MAGIC);
153 request.type = htonl(req->cmd);
154 request.from = cpu_to_be64( (u64) req->sector << 9);
155 request.len = htonl(req->current_nr_sectors << 9);
156 memcpy(request.handle, &req, sizeof(req));
158 result = nbd_xmit(1, sock, (char *) &request, sizeof(request));
159 if (result <= 0)
160 FAIL("Sendmsg failed for control.");
162 if (req->cmd == WRITE) {
163 DEBUG("data, ");
164 result = nbd_xmit(1, sock, req->buffer, req->current_nr_sectors << 9);
165 if (result <= 0)
166 FAIL("Send data failed.");
168 return;
170 error_out:
171 req->errors++;
174 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
175 struct request *nbd_read_stat(struct nbd_device *lo)
176 /* NULL returned = something went wrong, inform userspace */
178 int result;
179 struct nbd_reply reply;
180 struct request *xreq, *req;
182 DEBUG("reading control, ");
183 reply.magic = 0;
184 result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply));
185 req = lo->tail;
186 if (result <= 0)
187 HARDFAIL("Recv control failed.");
188 memcpy(&xreq, reply.handle, sizeof(xreq));
190 if (xreq != req)
191 FAIL("Unexpected handle received.\n");
193 DEBUG("ok, ");
194 if (ntohl(reply.magic) != NBD_REPLY_MAGIC)
195 HARDFAIL("Not enough magic.");
196 if (ntohl(reply.error))
197 FAIL("Other side returned error.");
198 if (req->cmd == READ) {
199 DEBUG("data, ");
200 result = nbd_xmit(0, lo->sock, req->buffer, req->current_nr_sectors << 9);
201 if (result <= 0)
202 HARDFAIL("Recv data failed.");
204 DEBUG("done.\n");
205 return req;
207 /* Can we get here? Yes, if other side returns error */
208 error_out:
209 req->errors++;
210 return req;
213 void nbd_do_it(struct nbd_device *lo)
215 struct request *req;
217 while (1) {
218 req = nbd_read_stat(lo);
219 if (!req)
220 return;
221 down (&lo->queue_lock);
222 #ifdef PARANOIA
223 if (req != lo->tail) {
224 printk(KERN_ALERT "NBD: I have problem...\n");
226 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
227 printk(KERN_ALERT "NBD: request corrupted!\n");
228 goto next;
230 if (lo->magic != LO_MAGIC) {
231 printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
232 up (&lo->queue_lock);
233 return;
235 #endif
236 nbd_end_request(req);
237 if (lo->tail == lo->head) {
238 #ifdef PARANOIA
239 if (lo->tail->next)
240 printk(KERN_ERR "NBD: I did not expect this\n");
241 #endif
242 lo->head = NULL;
244 lo->tail = lo->tail->next;
245 next:
246 up (&lo->queue_lock);
250 void nbd_clear_que(struct nbd_device *lo)
252 struct request *req;
254 while (1) {
255 req = lo->tail;
256 if (!req)
257 return;
258 #ifdef PARANOIA
259 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
260 printk(KERN_ALERT "NBD: request corrupted when clearing!\n");
261 continue;
263 if (lo->magic != LO_MAGIC) {
264 printk(KERN_ERR "NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
265 return;
267 #endif
268 req->errors++;
269 nbd_end_request(req);
270 if (lo->tail == lo->head) {
271 #ifdef PARANOIA
272 if (lo->tail->next)
273 printk(KERN_ERR "NBD: I did not assume this\n");
274 #endif
275 lo->head = NULL;
277 lo->tail = lo->tail->next;
282 * We always wait for result of write, for now. It would be nice to make it optional
283 * in future
284 * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
285 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
288 #undef FAIL
289 #define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
291 static void do_nbd_request(void)
293 struct request *req;
294 int dev;
295 struct nbd_device *lo;
297 while (CURRENT) {
298 req = CURRENT;
299 dev = MINOR(req->rq_dev);
300 #ifdef PARANOIA
301 if (dev >= MAX_NBD)
302 FAIL("Minor too big."); /* Probably can not happen */
303 #endif
304 lo = &nbd_dev[dev];
305 if (!lo->file)
306 FAIL("Request when not-ready.");
307 if ((req->cmd == WRITE) && (lo->flags & NBD_READ_ONLY))
308 FAIL("Write on read-only");
309 #ifdef PARANOIA
310 if (lo->magic != LO_MAGIC)
311 FAIL("nbd[] is not magical!");
312 requests_in++;
313 #endif
314 req->errors = 0;
315 CURRENT = CURRENT->next;
316 req->next = NULL;
318 spin_unlock_irq(&io_request_lock);
319 down (&lo->queue_lock);
320 if (lo->head == NULL) {
321 lo->head = req;
322 lo->tail = req;
323 } else {
324 lo->head->next = req;
325 lo->head = req;
328 nbd_send_req(lo->sock, req); /* Why does this block? */
329 up (&lo->queue_lock);
330 spin_lock_irq(&io_request_lock);
331 continue;
333 error_out:
334 req->errors++;
335 nbd_end_request(req);
336 CURRENT = CURRENT->next;
338 return;
341 static int nbd_ioctl(struct inode *inode, struct file *file,
342 unsigned int cmd, unsigned long arg)
344 struct nbd_device *lo;
345 int dev, error, temp;
347 /* Anyone capable of this syscall can do *real bad* things */
349 if (!capable(CAP_SYS_ADMIN))
350 return -EPERM;
351 if (!inode)
352 return -EINVAL;
353 dev = MINOR(inode->i_rdev);
354 if (dev >= MAX_NBD)
355 return -ENODEV;
357 lo = &nbd_dev[dev];
358 switch (cmd) {
359 case NBD_CLEAR_SOCK:
360 nbd_clear_que(lo);
361 if (lo->head || lo->tail) {
362 printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
363 return -EBUSY;
365 file = lo->file;
366 if (!file)
367 return -EINVAL;
368 lo->file = NULL;
369 lo->sock = NULL;
370 fput(file);
371 return 0;
372 case NBD_SET_SOCK:
373 if (lo->file)
374 return -EBUSY;
375 error = -EINVAL;
376 file = fget(arg);
377 if (file) {
378 inode = file->f_dentry->d_inode;
379 /* N.B. Should verify that it's a socket */
380 lo->file = file;
381 lo->sock = &inode->u.socket_i;
382 error = 0;
384 return error;
385 case NBD_SET_BLKSIZE:
386 if ((arg & (arg-1)) || (arg < 512) || (arg > PAGE_SIZE))
387 return -EINVAL;
388 nbd_blksizes[dev] = arg;
389 temp = arg >> 9;
390 nbd_blksize_bits[dev] = 9;
391 while (temp > 1) {
392 nbd_blksize_bits[dev]++;
393 temp >>= 1;
395 nbd_sizes[dev] = nbd_bytesizes[dev] >> nbd_blksize_bits[dev];
396 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
397 return 0;
398 case NBD_SET_SIZE:
399 nbd_sizes[dev] = arg >> nbd_blksize_bits[dev];
400 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
401 return 0;
402 case NBD_SET_SIZE_BLOCKS:
403 nbd_sizes[dev] = arg;
404 nbd_bytesizes[dev] = ((u64) arg) << nbd_blksize_bits[dev];
405 return 0;
406 case NBD_DO_IT:
407 if (!lo->file)
408 return -EINVAL;
409 nbd_do_it(lo);
410 return lo->harderror;
411 case NBD_CLEAR_QUE:
412 nbd_clear_que(lo);
413 return 0;
414 #ifdef PARANOIA
415 case NBD_PRINT_DEBUG:
416 printk(KERN_INFO "NBD device %d: head = %lx, tail = %lx. Global: in %d, out %d\n",
417 dev, (long) lo->head, (long) lo->tail, requests_in, requests_out);
418 return 0;
419 #endif
420 case BLKGETSIZE:
421 return put_user(nbd_bytesizes[dev] >> 9, (long *) arg);
423 return -EINVAL;
426 static int nbd_release(struct inode *inode, struct file *file)
428 struct nbd_device *lo;
429 int dev;
431 if (!inode)
432 return -ENODEV;
433 dev = MINOR(inode->i_rdev);
434 if (dev >= MAX_NBD)
435 return -ENODEV;
436 fsync_dev(inode->i_rdev);
437 invalidate_buffers(inode->i_rdev);
438 lo = &nbd_dev[dev];
439 if (lo->refcnt <= 0)
440 printk(KERN_ALERT "nbd_release: refcount(%d) <= 0\n", lo->refcnt);
441 lo->refcnt--;
442 /* N.B. Doesn't lo->file need an fput?? */
443 MOD_DEC_USE_COUNT;
444 return 0;
447 static struct file_operations nbd_fops =
449 NULL, /* lseek - default */
450 block_read, /* read - general block-dev read */
451 block_write, /* write - general block-dev write */
452 NULL, /* readdir - bad */
453 NULL, /* select */
454 nbd_ioctl, /* ioctl */
455 NULL, /* mmap */
456 nbd_open, /* open */
457 NULL, /* flush */
458 nbd_release /* release */
462 * And here should be modules and kernel interface
463 * (Just smiley confuses emacs :-)
466 #ifdef MODULE
467 #define nbd_init init_module
468 #endif
470 int nbd_init(void)
472 int i;
474 if (sizeof(struct nbd_request) != 28) {
475 printk(KERN_CRIT "Sizeof nbd_request needs to be 28 in order to work!\n" );
476 return -EIO;
479 if (register_blkdev(MAJOR_NR, "nbd", &nbd_fops)) {
480 printk("Unable to get major number %d for NBD\n",
481 MAJOR_NR);
482 return -EIO;
484 #ifdef MODULE
485 printk("nbd: registered device at major %d\n", MAJOR_NR);
486 #endif
487 blksize_size[MAJOR_NR] = nbd_blksizes;
488 blk_size[MAJOR_NR] = nbd_sizes;
489 blk_dev[MAJOR_NR].request_fn = do_nbd_request;
490 for (i = 0; i < MAX_NBD; i++) {
491 nbd_dev[i].refcnt = 0;
492 nbd_dev[i].file = NULL;
493 nbd_dev[i].magic = LO_MAGIC;
494 nbd_dev[i].flags = 0;
495 nbd_blksizes[i] = 1024;
496 nbd_blksize_bits[i] = 10;
497 nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
498 nbd_sizes[i] = nbd_bytesizes[i] >> nbd_blksize_bits[i];
500 return 0;
503 #ifdef MODULE
504 void cleanup_module(void)
506 if (unregister_blkdev(MAJOR_NR, "nbd") != 0)
507 printk("nbd: cleanup_module failed\n");
508 else
509 printk("nbd: module cleaned up.\n");
511 #endif