BPicture: Fix archive constructor.
[haiku.git] / src / add-ons / kernel / network / stack / net_socket.cpp
blob5582351ef7dd00f6786f235631801e4c7c1e7bce
1 /*
2 * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
5 * Authors:
6 * Axel Dörfler, axeld@pinc-software.de
7 */
10 #include "stack_private.h"
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
17 #include <new>
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
36 #include "ancillary_data.h"
37 #include "utility.h"
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 # define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 # define TRACE(x...) ;
45 #endif
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
51 struct net_socket_private : net_socket,
52 DoublyLinkedListLinkImpl<net_socket_private>,
53 BWeakReferenceable {
54 net_socket_private();
55 ~net_socket_private();
57 void RemoveFromParent();
59 BWeakReference<net_socket_private> parent;
60 team_id owner;
61 uint32 max_backlog;
62 uint32 child_count;
63 SocketList pending_children;
64 SocketList connected_children;
66 struct select_sync_pool* select_pool;
67 mutex lock;
69 bool is_connected;
70 bool is_in_socket_list;
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
80 static SocketList sSocketList;
81 static mutex sSocketLock;
84 net_socket_private::net_socket_private()
86 owner(-1),
87 max_backlog(0),
88 child_count(0),
89 select_pool(NULL),
90 is_connected(false),
91 is_in_socket_list(false)
93 first_protocol = NULL;
94 first_info = NULL;
95 options = 0;
96 linger = 0;
97 bound_to_device = 0;
98 error = 0;
100 address.ss_len = 0;
101 peer.ss_len = 0;
103 mutex_init(&lock, "socket");
105 // set defaults (may be overridden by the protocols)
106 send.buffer_size = 65535;
107 send.low_water_mark = 1;
108 send.timeout = B_INFINITE_TIMEOUT;
109 receive.buffer_size = 65535;
110 receive.low_water_mark = 1;
111 receive.timeout = B_INFINITE_TIMEOUT;
115 net_socket_private::~net_socket_private()
117 TRACE("delete net_socket %p\n", this);
119 if (parent != NULL)
120 panic("socket still has a parent!");
122 if (is_in_socket_list) {
123 MutexLocker _(sSocketLock);
124 sSocketList.Remove(this);
127 mutex_lock(&lock);
129 // also delete all children of this socket
130 while (net_socket_private* child = pending_children.RemoveHead()) {
131 child->RemoveFromParent();
133 while (net_socket_private* child = connected_children.RemoveHead()) {
134 child->RemoveFromParent();
137 mutex_unlock(&lock);
139 put_domain_protocols(this);
141 mutex_destroy(&lock);
145 void
146 net_socket_private::RemoveFromParent()
148 ASSERT(!is_in_socket_list && parent != NULL);
150 parent = NULL;
152 mutex_lock(&sSocketLock);
153 sSocketList.Add(this);
154 mutex_unlock(&sSocketLock);
156 is_in_socket_list = true;
158 ReleaseReference();
162 // #pragma mark -
165 static size_t
166 compute_user_iovec_length(iovec* userVec, uint32 count)
168 size_t length = 0;
170 for (uint32 i = 0; i < count; i++) {
171 iovec vec;
172 if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
173 return 0;
175 length += vec.iov_len;
178 return length;
182 static status_t
183 create_socket(int family, int type, int protocol, net_socket_private** _socket)
185 struct net_socket_private* socket = new(std::nothrow) net_socket_private;
186 if (socket == NULL)
187 return B_NO_MEMORY;
188 status_t status = socket->InitCheck();
189 if (status != B_OK) {
190 delete socket;
191 return status;
194 socket->family = family;
195 socket->type = type;
196 socket->protocol = protocol;
198 status = get_domain_protocols(socket);
199 if (status != B_OK) {
200 delete socket;
201 return status;
204 TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
205 socket->type, socket->protocol);
207 #ifdef TRACE_SOCKET
208 net_protocol* current = socket->first_protocol;
209 for (int i = 0; current != NULL; current = current->next, i++)
210 TRACE(" [%d] %p %s\n", i, current, current->module->info.name);
211 #endif
213 *_socket = socket;
214 return B_OK;
218 static status_t
219 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
220 void* data, size_t dataLen)
222 cmsghdr* header = (cmsghdr*)data;
224 while (dataLen > 0) {
225 if (header->cmsg_len < sizeof(cmsghdr) || header->cmsg_len > dataLen)
226 return B_BAD_VALUE;
228 if (socket->first_info->add_ancillary_data == NULL)
229 return B_NOT_SUPPORTED;
231 status_t status = socket->first_info->add_ancillary_data(
232 socket->first_protocol, container, header);
233 if (status != B_OK)
234 return status;
236 dataLen -= _ALIGN(header->cmsg_len);
237 header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
240 return B_OK;
244 static status_t
245 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
246 msghdr* messageHeader)
248 uint8* dataBuffer = (uint8*)messageHeader->msg_control;
249 int dataBufferLen = messageHeader->msg_controllen;
251 if (container == NULL || dataBuffer == NULL) {
252 messageHeader->msg_controllen = 0;
253 return B_OK;
256 ancillary_data_header header;
257 void* data = NULL;
259 while ((data = next_ancillary_data(container, data, &header)) != NULL) {
260 if (socket->first_info->process_ancillary_data == NULL)
261 return B_NOT_SUPPORTED;
263 ssize_t bytesWritten = socket->first_info->process_ancillary_data(
264 socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
265 if (bytesWritten < 0)
266 return bytesWritten;
268 dataBuffer += bytesWritten;
269 dataBufferLen -= bytesWritten;
272 messageHeader->msg_controllen -= dataBufferLen;
274 return B_OK;
278 static status_t
279 process_ancillary_data(net_socket* socket,
280 net_buffer* buffer, msghdr* messageHeader)
282 void *dataBuffer = messageHeader->msg_control;
283 ssize_t bytesWritten;
285 if (dataBuffer == NULL) {
286 messageHeader->msg_controllen = 0;
287 return B_OK;
290 if (socket->first_info->process_ancillary_data_no_container == NULL)
291 return B_NOT_SUPPORTED;
293 bytesWritten = socket->first_info->process_ancillary_data_no_container(
294 socket->first_protocol, buffer, dataBuffer,
295 messageHeader->msg_controllen);
296 if (bytesWritten < 0)
297 return bytesWritten;
298 messageHeader->msg_controllen = bytesWritten;
300 return B_OK;
304 static ssize_t
305 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
306 size_t length, int flags)
308 iovec stackVec = { data, length };
309 iovec* vecs = header ? header->msg_iov : &stackVec;
310 int vecCount = header ? header->msg_iovlen : 1;
311 sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
312 socklen_t* addressLen = header ? &header->msg_namelen : NULL;
314 ancillary_data_container* ancillaryData = NULL;
315 ssize_t bytesRead = socket->first_info->read_data_no_buffer(
316 socket->first_protocol, vecs, vecCount, &ancillaryData, address,
317 addressLen);
318 if (bytesRead < 0)
319 return bytesRead;
321 CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
322 &delete_ancillary_data_container);
324 // process ancillary data
325 if (header != NULL) {
326 status_t status = process_ancillary_data(socket, ancillaryData, header);
327 if (status != B_OK)
328 return status;
330 header->msg_flags = 0;
333 return bytesRead;
337 #if ENABLE_DEBUGGER_COMMANDS
340 static void
341 print_socket_line(net_socket_private* socket, const char* prefix)
343 BReference<net_socket_private> parent = socket->parent.GetReference();
344 kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p %p%s\n", prefix, socket,
345 socket->family, socket->type, socket->protocol, socket->owner,
346 socket->first_protocol, socket->first_info, parent.Get(),
347 parent.Get() != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
351 static int
352 dump_socket(int argc, char** argv)
354 if (argc < 2) {
355 kprintf("usage: %s [address]\n", argv[0]);
356 return 0;
359 net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
361 kprintf("SOCKET %p\n", socket);
362 kprintf(" family.type.protocol: %d.%d.%d\n",
363 socket->family, socket->type, socket->protocol);
364 BReference<net_socket_private> parent = socket->parent.GetReference();
365 kprintf(" parent: %p\n", parent.Get());
366 kprintf(" first protocol: %p\n", socket->first_protocol);
367 kprintf(" first module_info: %p\n", socket->first_info);
368 kprintf(" options: %x\n", socket->options);
369 kprintf(" linger: %d\n", socket->linger);
370 kprintf(" bound to device: %" B_PRIu32 "\n", socket->bound_to_device);
371 kprintf(" owner: %" B_PRId32 "\n", socket->owner);
372 kprintf(" max backlog: %" B_PRId32 "\n", socket->max_backlog);
373 kprintf(" is connected: %d\n", socket->is_connected);
374 kprintf(" child_count: %" B_PRIu32 "\n", socket->child_count);
376 if (socket->child_count == 0)
377 return 0;
379 kprintf(" pending children:\n");
380 SocketList::Iterator iterator = socket->pending_children.GetIterator();
381 while (net_socket_private* child = iterator.Next()) {
382 print_socket_line(child, " ");
385 kprintf(" connected children:\n");
386 iterator = socket->connected_children.GetIterator();
387 while (net_socket_private* child = iterator.Next()) {
388 print_socket_line(child, " ");
391 return 0;
395 static int
396 dump_sockets(int argc, char** argv)
398 kprintf("address kind owner protocol module_info parent\n");
400 SocketList::Iterator iterator = sSocketList.GetIterator();
401 while (net_socket_private* socket = iterator.Next()) {
402 print_socket_line(socket, "");
404 SocketList::Iterator childIterator
405 = socket->pending_children.GetIterator();
406 while (net_socket_private* child = childIterator.Next()) {
407 print_socket_line(child, " ");
410 childIterator = socket->connected_children.GetIterator();
411 while (net_socket_private* child = childIterator.Next()) {
412 print_socket_line(child, " ");
416 return 0;
420 #endif // ENABLE_DEBUGGER_COMMANDS
423 // #pragma mark -
426 status_t
427 socket_open(int family, int type, int protocol, net_socket** _socket)
429 net_socket_private* socket;
430 status_t status = create_socket(family, type, protocol, &socket);
431 if (status != B_OK)
432 return status;
434 status = socket->first_info->open(socket->first_protocol);
435 if (status != B_OK) {
436 delete socket;
437 return status;
440 socket->owner = team_get_current_team_id();
441 socket->is_in_socket_list = true;
443 mutex_lock(&sSocketLock);
444 sSocketList.Add(socket);
445 mutex_unlock(&sSocketLock);
447 *_socket = socket;
448 return B_OK;
452 status_t
453 socket_close(net_socket* _socket)
455 net_socket_private* socket = (net_socket_private*)_socket;
456 return socket->first_info->close(socket->first_protocol);
460 void
461 socket_free(net_socket* _socket)
463 net_socket_private* socket = (net_socket_private*)_socket;
464 socket->first_info->free(socket->first_protocol);
465 socket->ReleaseReference();
469 status_t
470 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
471 size_t* _length)
473 return -1;
477 status_t
478 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
479 size_t* _length)
481 if (socket->peer.ss_len == 0)
482 return ECONNRESET;
484 if (socket->address.ss_len == 0) {
485 // try to bind first
486 status_t status = socket_bind(socket, NULL, 0);
487 if (status != B_OK)
488 return status;
491 // TODO: useful, maybe even computed header space!
492 net_buffer* buffer = gNetBufferModule.create(256);
493 if (buffer == NULL)
494 return ENOBUFS;
496 // copy data into buffer
498 for (uint32 i = 0; i < vecCount; i++) {
499 if (gNetBufferModule.append(buffer, vecs[i].iov_base,
500 vecs[i].iov_len) < B_OK) {
501 gNetBufferModule.free(buffer);
502 return ENOBUFS;
506 memcpy(buffer->source, &socket->address, socket->address.ss_len);
507 memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
508 size_t size = buffer->size;
510 ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
511 buffer);
512 if (bytesWritten < B_OK) {
513 if (buffer->size != size) {
514 // this appears to be a partial write
515 *_length = size - buffer->size;
517 gNetBufferModule.free(buffer);
518 return bytesWritten;
521 *_length = bytesWritten;
522 return B_OK;
526 status_t
527 socket_control(net_socket* socket, int32 op, void* data, size_t length)
529 switch (op) {
530 case FIONBIO:
532 if (data == NULL)
533 return B_BAD_VALUE;
535 int value;
536 if (is_syscall()) {
537 if (!IS_USER_ADDRESS(data)
538 || user_memcpy(&value, data, sizeof(int)) != B_OK) {
539 return B_BAD_ADDRESS;
541 } else
542 value = *(int*)data;
544 return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
545 sizeof(int));
548 case FIONREAD:
550 if (data == NULL)
551 return B_BAD_VALUE;
553 int available = (int)socket_read_avail(socket);
554 if (available < 0)
555 return available;
557 if (is_syscall()) {
558 if (!IS_USER_ADDRESS(data)
559 || user_memcpy(data, &available, sizeof(available))
560 != B_OK) {
561 return B_BAD_ADDRESS;
563 } else
564 *(int*)data = available;
566 return B_OK;
569 case B_SET_BLOCKING_IO:
570 case B_SET_NONBLOCKING_IO:
572 int value = op == B_SET_NONBLOCKING_IO;
573 return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
574 sizeof(int));
578 return socket->first_info->control(socket->first_protocol,
579 LEVEL_DRIVER_IOCTL, op, data, &length);
583 ssize_t
584 socket_read_avail(net_socket* socket)
586 return socket->first_info->read_avail(socket->first_protocol);
590 ssize_t
591 socket_send_avail(net_socket* socket)
593 return socket->first_info->send_avail(socket->first_protocol);
597 status_t
598 socket_send_data(net_socket* socket, net_buffer* buffer)
600 return socket->first_info->send_data(socket->first_protocol,
601 buffer);
605 status_t
606 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
607 net_buffer** _buffer)
609 status_t status = socket->first_info->read_data(socket->first_protocol,
610 length, flags, _buffer);
611 if (status != B_OK)
612 return status;
614 if (*_buffer && length < (*_buffer)->size) {
615 // discard any data behind the amount requested
616 gNetBufferModule.trim(*_buffer, length);
619 return status;
623 status_t
624 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
626 MutexLocker locker(sSocketLock);
628 net_socket_private* socket = NULL;
629 SocketList::Iterator iterator = sSocketList.GetIterator();
630 uint32 cookie = *_cookie;
631 uint32 count = 0;
633 while (true) {
634 socket = iterator.Next();
635 if (socket == NULL)
636 return B_ENTRY_NOT_FOUND;
638 // TODO: also traverse the pending connections
639 if (count == cookie)
640 break;
642 if (family == -1 || family == socket->family)
643 count++;
646 *_cookie = count + 1;
648 stat->family = socket->family;
649 stat->type = socket->type;
650 stat->protocol = socket->protocol;
651 stat->owner = socket->owner;
652 stat->state[0] = '\0';
653 memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
654 memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
655 stat->receive_queue_size = 0;
656 stat->send_queue_size = 0;
658 // fill in protocol specific data (if supported by the protocol)
659 size_t length = sizeof(net_stat);
660 socket->first_info->control(socket->first_protocol, socket->protocol,
661 NET_STAT_SOCKET, stat, &length);
663 return B_OK;
667 // #pragma mark - connections
670 bool
671 socket_acquire(net_socket* _socket)
673 net_socket_private* socket = (net_socket_private*)_socket;
675 // During destruction, the socket might still be accessible over its
676 // endpoint protocol. We need to make sure the endpoint cannot acquire the
677 // socket anymore -- while not obvious, the endpoint protocol is responsible
678 // for the proper locking here.
679 if (socket->CountReferences() == 0)
680 return false;
682 socket->AcquireReference();
683 return true;
687 bool
688 socket_release(net_socket* _socket)
690 net_socket_private* socket = (net_socket_private*)_socket;
691 return socket->ReleaseReference();
695 status_t
696 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
698 net_socket_private* parent = (net_socket_private*)_parent;
700 TRACE("%s(%p)\n", __FUNCTION__, parent);
702 MutexLocker locker(parent->lock);
704 // We actually accept more pending connections to compensate for those
705 // that never complete, and also make sure at least a single connection
706 // can always be accepted
707 if (parent->child_count > 3 * parent->max_backlog / 2)
708 return ENOBUFS;
710 net_socket_private* socket;
711 status_t status = create_socket(parent->family, parent->type,
712 parent->protocol, &socket);
713 if (status != B_OK)
714 return status;
716 // inherit parent's properties
717 socket->send = parent->send;
718 socket->receive = parent->receive;
719 socket->options = parent->options & ~SO_ACCEPTCONN;
720 socket->linger = parent->linger;
721 socket->owner = parent->owner;
722 memcpy(&socket->address, &parent->address, parent->address.ss_len);
723 memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
725 // add to the parent's list of pending connections
726 parent->pending_children.Add(socket);
727 socket->parent = parent;
728 parent->child_count++;
730 *_socket = socket;
731 return B_OK;
735 /*! Dequeues a connected child from a parent socket.
736 It also returns a reference with the child socket.
738 status_t
739 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
741 net_socket_private* parent = (net_socket_private*)_parent;
743 mutex_lock(&parent->lock);
745 net_socket_private* socket = parent->connected_children.RemoveHead();
746 if (socket != NULL) {
747 socket->AcquireReference();
748 socket->RemoveFromParent();
749 parent->child_count--;
750 *_socket = socket;
753 mutex_unlock(&parent->lock);
755 if (socket == NULL)
756 return B_ENTRY_NOT_FOUND;
758 return B_OK;
762 ssize_t
763 socket_count_connected(net_socket* _parent)
765 net_socket_private* parent = (net_socket_private*)_parent;
767 MutexLocker _(parent->lock);
768 return parent->connected_children.Count();
772 status_t
773 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
775 net_socket_private* socket = (net_socket_private*)_socket;
777 // we enforce an upper limit of connections waiting to be accepted
778 if (backlog > 256)
779 backlog = 256;
781 MutexLocker _(socket->lock);
783 // first remove the pending connections, then the already connected
784 // ones as needed
785 net_socket_private* child;
786 while (socket->child_count > backlog
787 && (child = socket->pending_children.RemoveTail()) != NULL) {
788 child->RemoveFromParent();
789 socket->child_count--;
791 while (socket->child_count > backlog
792 && (child = socket->connected_children.RemoveTail()) != NULL) {
793 child->RemoveFromParent();
794 socket->child_count--;
797 socket->max_backlog = backlog;
798 return B_OK;
802 /*! Returns whether or not this socket has a parent. The parent might not be
803 valid anymore, though.
805 bool
806 socket_has_parent(net_socket* _socket)
808 net_socket_private* socket = (net_socket_private*)_socket;
809 return socket->parent != NULL;
813 /*! The socket has been connected. It will be moved to the connected queue
814 of its parent socket.
816 status_t
817 socket_connected(net_socket* _socket)
819 net_socket_private* socket = (net_socket_private*)_socket;
821 TRACE("socket_connected(%p)\n", socket);
823 BReference<net_socket_private> parent = socket->parent.GetReference();
824 if (parent.Get() == NULL)
825 return B_BAD_VALUE;
827 MutexLocker _(parent->lock);
829 parent->pending_children.Remove(socket);
830 parent->connected_children.Add(socket);
831 socket->is_connected = true;
833 // notify parent
834 if (parent->select_pool)
835 notify_select_event_pool(parent->select_pool, B_SELECT_READ);
837 return B_OK;
841 /*! The socket has been aborted. Steals the parent's reference, and releases
844 status_t
845 socket_aborted(net_socket* _socket)
847 net_socket_private* socket = (net_socket_private*)_socket;
849 TRACE("socket_aborted(%p)\n", socket);
851 BReference<net_socket_private> parent = socket->parent.GetReference();
852 if (parent.Get() == NULL)
853 return B_BAD_VALUE;
855 MutexLocker _(parent->lock);
857 if (socket->is_connected)
858 parent->connected_children.Remove(socket);
859 else
860 parent->pending_children.Remove(socket);
862 parent->child_count--;
863 socket->RemoveFromParent();
865 return B_OK;
869 // #pragma mark - notifications
872 status_t
873 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
875 net_socket_private* socket = (net_socket_private*)_socket;
877 mutex_lock(&socket->lock);
879 status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
880 event);
882 mutex_unlock(&socket->lock);
884 if (status != B_OK)
885 return status;
887 // check if the event is already present
888 // TODO: add support for poll() types
890 switch (event) {
891 case B_SELECT_READ:
893 ssize_t available = socket_read_avail(socket);
894 if ((ssize_t)socket->receive.low_water_mark <= available
895 || available < B_OK)
896 notify_select_event(sync, event);
897 break;
899 case B_SELECT_WRITE:
901 ssize_t available = socket_send_avail(socket);
902 if ((ssize_t)socket->send.low_water_mark <= available
903 || available < B_OK)
904 notify_select_event(sync, event);
905 break;
907 case B_SELECT_ERROR:
908 if (socket->error != B_OK)
909 notify_select_event(sync, event);
910 break;
913 return B_OK;
917 status_t
918 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
920 net_socket_private* socket = (net_socket_private*)_socket;
922 MutexLocker _(socket->lock);
923 return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
927 status_t
928 socket_notify(net_socket* _socket, uint8 event, int32 value)
930 net_socket_private* socket = (net_socket_private*)_socket;
931 bool notify = true;
933 switch (event) {
934 case B_SELECT_READ:
935 if ((ssize_t)socket->receive.low_water_mark > value
936 && value >= B_OK)
937 notify = false;
938 break;
940 case B_SELECT_WRITE:
941 if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
942 notify = false;
943 break;
945 case B_SELECT_ERROR:
946 socket->error = value;
947 break;
950 MutexLocker _(socket->lock);
952 if (notify && socket->select_pool != NULL) {
953 notify_select_event_pool(socket->select_pool, event);
955 if (event == B_SELECT_ERROR) {
956 // always notify read/write on error
957 notify_select_event_pool(socket->select_pool, B_SELECT_READ);
958 notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
962 return B_OK;
966 // #pragma mark - standard socket API
970 socket_accept(net_socket* socket, struct sockaddr* address,
971 socklen_t* _addressLength, net_socket** _acceptedSocket)
973 if ((socket->options & SO_ACCEPTCONN) == 0)
974 return B_BAD_VALUE;
976 net_socket* accepted;
977 status_t status = socket->first_info->accept(socket->first_protocol,
978 &accepted);
979 if (status != B_OK)
980 return status;
982 if (address && *_addressLength > 0) {
983 memcpy(address, &accepted->peer, min_c(*_addressLength,
984 min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
985 *_addressLength = accepted->peer.ss_len;
988 *_acceptedSocket = accepted;
989 return B_OK;
994 socket_bind(net_socket* socket, const struct sockaddr* address,
995 socklen_t addressLength)
997 sockaddr empty;
998 if (address == NULL) {
999 // special - try to bind to an empty address, like INADDR_ANY
1000 memset(&empty, 0, sizeof(sockaddr));
1001 empty.sa_len = sizeof(sockaddr);
1002 empty.sa_family = socket->family;
1004 address = &empty;
1005 addressLength = sizeof(sockaddr);
1008 if (socket->address.ss_len != 0) {
1009 status_t status = socket->first_info->unbind(socket->first_protocol,
1010 (sockaddr*)&socket->address);
1011 if (status != B_OK)
1012 return status;
1015 memcpy(&socket->address, address, sizeof(sockaddr));
1016 socket->address.ss_len = sizeof(sockaddr_storage);
1018 status_t status = socket->first_info->bind(socket->first_protocol,
1019 (sockaddr*)address);
1020 if (status != B_OK) {
1021 // clear address again, as binding failed
1022 socket->address.ss_len = 0;
1025 return status;
1030 socket_connect(net_socket* socket, const struct sockaddr* address,
1031 socklen_t addressLength)
1033 if (address == NULL || addressLength == 0)
1034 return ENETUNREACH;
1036 if (socket->address.ss_len == 0) {
1037 // try to bind first
1038 status_t status = socket_bind(socket, NULL, 0);
1039 if (status != B_OK)
1040 return status;
1043 return socket->first_info->connect(socket->first_protocol, address);
1048 socket_getpeername(net_socket* socket, struct sockaddr* address,
1049 socklen_t* _addressLength)
1051 if (socket->peer.ss_len == 0)
1052 return ENOTCONN;
1054 memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1055 *_addressLength = socket->peer.ss_len;
1056 return B_OK;
1061 socket_getsockname(net_socket* socket, struct sockaddr* address,
1062 socklen_t* _addressLength)
1064 if (socket->address.ss_len == 0)
1065 return ENOTCONN;
1067 memcpy(address, &socket->address, min_c(*_addressLength,
1068 socket->address.ss_len));
1069 *_addressLength = socket->address.ss_len;
1070 return B_OK;
1074 status_t
1075 socket_get_option(net_socket* socket, int level, int option, void* value,
1076 int* _length)
1078 if (level != SOL_SOCKET)
1079 return ENOPROTOOPT;
1081 switch (option) {
1082 case SO_SNDBUF:
1084 uint32* size = (uint32*)value;
1085 *size = socket->send.buffer_size;
1086 *_length = sizeof(uint32);
1087 return B_OK;
1090 case SO_RCVBUF:
1092 uint32* size = (uint32*)value;
1093 *size = socket->receive.buffer_size;
1094 *_length = sizeof(uint32);
1095 return B_OK;
1098 case SO_SNDLOWAT:
1100 uint32* size = (uint32*)value;
1101 *size = socket->send.low_water_mark;
1102 *_length = sizeof(uint32);
1103 return B_OK;
1106 case SO_RCVLOWAT:
1108 uint32* size = (uint32*)value;
1109 *size = socket->receive.low_water_mark;
1110 *_length = sizeof(uint32);
1111 return B_OK;
1114 case SO_RCVTIMEO:
1115 case SO_SNDTIMEO:
1117 if (*_length < (int)sizeof(struct timeval))
1118 return B_BAD_VALUE;
1120 bigtime_t timeout;
1121 if (option == SO_SNDTIMEO)
1122 timeout = socket->send.timeout;
1123 else
1124 timeout = socket->receive.timeout;
1125 if (timeout == B_INFINITE_TIMEOUT)
1126 timeout = 0;
1128 struct timeval* timeval = (struct timeval*)value;
1129 timeval->tv_sec = timeout / 1000000LL;
1130 timeval->tv_usec = timeout % 1000000LL;
1132 *_length = sizeof(struct timeval);
1133 return B_OK;
1136 case SO_NONBLOCK:
1138 int32* _set = (int32*)value;
1139 *_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1140 *_length = sizeof(int32);
1141 return B_OK;
1144 case SO_ACCEPTCONN:
1145 case SO_BROADCAST:
1146 case SO_DEBUG:
1147 case SO_DONTROUTE:
1148 case SO_KEEPALIVE:
1149 case SO_OOBINLINE:
1150 case SO_REUSEADDR:
1151 case SO_REUSEPORT:
1152 case SO_USELOOPBACK:
1154 int32* _set = (int32*)value;
1155 *_set = (socket->options & option) != 0;
1156 *_length = sizeof(int32);
1157 return B_OK;
1160 case SO_TYPE:
1162 int32* _set = (int32*)value;
1163 *_set = socket->type;
1164 *_length = sizeof(int32);
1165 return B_OK;
1168 case SO_ERROR:
1170 int32* _set = (int32*)value;
1171 *_set = socket->error;
1172 *_length = sizeof(int32);
1174 socket->error = B_OK;
1175 // clear error upon retrieval
1176 return B_OK;
1179 default:
1180 break;
1183 dprintf("socket_getsockopt: unknown option %d\n", option);
1184 return ENOPROTOOPT;
1189 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1190 int* _length)
1192 return socket->first_protocol->module->getsockopt(socket->first_protocol,
1193 level, option, value, _length);
1198 socket_listen(net_socket* socket, int backlog)
1200 status_t status = socket->first_info->listen(socket->first_protocol,
1201 backlog);
1202 if (status == B_OK)
1203 socket->options |= SO_ACCEPTCONN;
1205 return status;
1209 ssize_t
1210 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1211 int flags)
1213 // If the protocol sports read_data_no_buffer() we use it.
1214 if (socket->first_info->read_data_no_buffer != NULL)
1215 return socket_receive_no_buffer(socket, header, data, length, flags);
1217 size_t totalLength = length;
1218 net_buffer* buffer;
1219 int i;
1221 // the convention to this function is that have header been
1222 // present, { data, length } would have been iovec[0] and is
1223 // always considered like that
1225 if (header) {
1226 // calculate the length considering all of the extra buffers
1227 for (i = 1; i < header->msg_iovlen; i++)
1228 totalLength += header->msg_iov[i].iov_len;
1231 status_t status = socket->first_info->read_data(
1232 socket->first_protocol, totalLength, flags, &buffer);
1233 if (status != B_OK)
1234 return status;
1236 // process ancillary data
1237 if (header != NULL) {
1238 if (buffer != NULL && header->msg_control != NULL) {
1239 ancillary_data_container* container
1240 = gNetBufferModule.get_ancillary_data(buffer);
1241 if (container != NULL)
1242 status = process_ancillary_data(socket, container, header);
1243 else
1244 status = process_ancillary_data(socket, buffer, header);
1245 if (status != B_OK) {
1246 gNetBufferModule.free(buffer);
1247 return status;
1249 } else
1250 header->msg_controllen = 0;
1253 // TODO: - returning a NULL buffer when received 0 bytes
1254 // may not make much sense as we still need the address
1255 // - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1257 size_t nameLen = 0;
1259 if (header) {
1260 // TODO: - consider the control buffer options
1261 nameLen = header->msg_namelen;
1262 header->msg_namelen = 0;
1263 header->msg_flags = 0;
1266 if (buffer == NULL)
1267 return 0;
1269 size_t bytesReceived = buffer->size, bytesCopied = 0;
1271 length = min_c(bytesReceived, length);
1272 if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1273 gNetBufferModule.free(buffer);
1274 return ENOBUFS;
1277 // if first copy was a success, proceed to following
1278 // copies as required
1279 bytesCopied += length;
1281 if (header) {
1282 // we only start considering at iovec[1]
1283 // as { data, length } is iovec[0]
1284 for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1285 iovec& vec = header->msg_iov[i];
1286 size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1287 if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1288 toRead) < B_OK) {
1289 break;
1292 bytesCopied += toRead;
1295 if (header->msg_name != NULL) {
1296 header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1297 memcpy(header->msg_name, buffer->source, header->msg_namelen);
1301 gNetBufferModule.free(buffer);
1303 if (bytesCopied < bytesReceived) {
1304 if (header)
1305 header->msg_flags = MSG_TRUNC;
1307 if (flags & MSG_TRUNC)
1308 return bytesReceived;
1311 return bytesCopied;
1315 ssize_t
1316 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1317 int flags)
1319 const sockaddr* address = NULL;
1320 socklen_t addressLength = 0;
1321 size_t bytesLeft = length;
1323 if (length > SSIZE_MAX)
1324 return B_BAD_VALUE;
1326 ancillary_data_container* ancillaryData = NULL;
1327 CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1328 &delete_ancillary_data_container);
1330 if (header != NULL) {
1331 address = (const sockaddr*)header->msg_name;
1332 addressLength = header->msg_namelen;
1334 // get the ancillary data
1335 if (header->msg_control != NULL) {
1336 ancillaryData = create_ancillary_data_container();
1337 if (ancillaryData == NULL)
1338 return B_NO_MEMORY;
1339 ancillaryDataDeleter.SetTo(ancillaryData);
1341 status_t status = add_ancillary_data(socket, ancillaryData,
1342 (cmsghdr*)header->msg_control, header->msg_controllen);
1343 if (status != B_OK)
1344 return status;
1348 if (addressLength == 0)
1349 address = NULL;
1350 else if (address == NULL)
1351 return B_BAD_VALUE;
1353 if (socket->peer.ss_len != 0) {
1354 if (address != NULL)
1355 return EISCONN;
1357 // socket is connected, we use that address
1358 address = (struct sockaddr*)&socket->peer;
1359 addressLength = socket->peer.ss_len;
1362 if (address == NULL || addressLength == 0) {
1363 // don't know where to send to:
1364 return EDESTADDRREQ;
1367 if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1368 && bytesLeft > socket->send.buffer_size)
1369 return EMSGSIZE;
1371 if (socket->address.ss_len == 0) {
1372 // try to bind first
1373 status_t status = socket_bind(socket, NULL, 0);
1374 if (status != B_OK)
1375 return status;
1378 // If the protocol has a send_data_no_buffer() hook, we use that one.
1379 if (socket->first_info->send_data_no_buffer != NULL) {
1380 iovec stackVec = { (void*)data, length };
1381 iovec* vecs = header ? header->msg_iov : &stackVec;
1382 int vecCount = header ? header->msg_iovlen : 1;
1384 ssize_t written = socket->first_info->send_data_no_buffer(
1385 socket->first_protocol, vecs, vecCount, ancillaryData, address,
1386 addressLength);
1387 if (written > 0)
1388 ancillaryDataDeleter.Detach();
1389 return written;
1392 // By convention, if a header is given, the (data, length) equals the first
1393 // iovec. So drop the header, if it is the only iovec. Otherwise compute
1394 // the size of the remaining ones.
1395 if (header != NULL) {
1396 if (header->msg_iovlen <= 1)
1397 header = NULL;
1398 else {
1399 // TODO: The iovecs have already been copied to kernel space. Simplify!
1400 bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1401 header->msg_iovlen - 1);
1405 ssize_t bytesSent = 0;
1406 size_t vecOffset = 0;
1407 uint32 vecIndex = 0;
1409 while (bytesLeft > 0) {
1410 // TODO: useful, maybe even computed header space!
1411 net_buffer* buffer = gNetBufferModule.create(256);
1412 if (buffer == NULL)
1413 return ENOBUFS;
1415 while (buffer->size < socket->send.buffer_size
1416 && buffer->size < bytesLeft) {
1417 if (vecIndex > 0 && vecOffset == 0) {
1418 // retrieve next iovec buffer from header
1419 iovec vec;
1420 if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1421 < B_OK) {
1422 gNetBufferModule.free(buffer);
1423 return B_BAD_ADDRESS;
1426 data = vec.iov_base;
1427 length = vec.iov_len;
1430 size_t bytes = length;
1431 if (buffer->size + bytes > socket->send.buffer_size)
1432 bytes = socket->send.buffer_size - buffer->size;
1434 if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1435 gNetBufferModule.free(buffer);
1436 return ENOBUFS;
1439 if (bytes != length) {
1440 // partial send
1441 vecOffset = bytes;
1442 length -= vecOffset;
1443 data = (uint8*)data + vecOffset;
1444 } else if (header != NULL) {
1445 // proceed with next buffer, if any
1446 vecOffset = 0;
1447 vecIndex++;
1449 if (vecIndex >= (uint32)header->msg_iovlen)
1450 break;
1454 // attach ancillary data to the first buffer
1455 status_t status = B_OK;
1456 if (ancillaryData != NULL) {
1457 gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1458 ancillaryDataDeleter.Detach();
1459 ancillaryData = NULL;
1462 size_t bufferSize = buffer->size;
1463 buffer->flags = flags;
1464 memcpy(buffer->source, &socket->address, socket->address.ss_len);
1465 memcpy(buffer->destination, address, addressLength);
1466 buffer->destination->sa_len = addressLength;
1468 if (status == B_OK) {
1469 status = socket->first_info->send_data(socket->first_protocol,
1470 buffer);
1472 if (status != B_OK) {
1473 size_t sizeAfterSend = buffer->size;
1474 gNetBufferModule.free(buffer);
1476 if ((sizeAfterSend != bufferSize || bytesSent > 0)
1477 && (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1478 // this appears to be a partial write
1479 return bytesSent + (bufferSize - sizeAfterSend);
1481 return status;
1484 bytesLeft -= bufferSize;
1485 bytesSent += bufferSize;
1488 return bytesSent;
1492 status_t
1493 socket_set_option(net_socket* socket, int level, int option, const void* value,
1494 int length)
1496 if (level != SOL_SOCKET)
1497 return ENOPROTOOPT;
1499 TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1501 switch (option) {
1502 // TODO: implement other options!
1503 case SO_LINGER:
1505 if (length < (int)sizeof(struct linger))
1506 return B_BAD_VALUE;
1508 struct linger* linger = (struct linger*)value;
1509 if (linger->l_onoff) {
1510 socket->options |= SO_LINGER;
1511 socket->linger = linger->l_linger;
1512 } else {
1513 socket->options &= ~SO_LINGER;
1514 socket->linger = 0;
1516 return B_OK;
1519 case SO_SNDBUF:
1520 if (length != sizeof(uint32))
1521 return B_BAD_VALUE;
1523 socket->send.buffer_size = *(const uint32*)value;
1524 return B_OK;
1526 case SO_RCVBUF:
1527 if (length != sizeof(uint32))
1528 return B_BAD_VALUE;
1530 socket->receive.buffer_size = *(const uint32*)value;
1531 return B_OK;
1533 case SO_SNDLOWAT:
1534 if (length != sizeof(uint32))
1535 return B_BAD_VALUE;
1537 socket->send.low_water_mark = *(const uint32*)value;
1538 return B_OK;
1540 case SO_RCVLOWAT:
1541 if (length != sizeof(uint32))
1542 return B_BAD_VALUE;
1544 socket->receive.low_water_mark = *(const uint32*)value;
1545 return B_OK;
1547 case SO_RCVTIMEO:
1548 case SO_SNDTIMEO:
1550 if (length != sizeof(struct timeval))
1551 return B_BAD_VALUE;
1553 const struct timeval* timeval = (const struct timeval*)value;
1554 bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1555 if (timeout == 0)
1556 timeout = B_INFINITE_TIMEOUT;
1558 if (option == SO_SNDTIMEO)
1559 socket->send.timeout = timeout;
1560 else
1561 socket->receive.timeout = timeout;
1562 return B_OK;
1565 case SO_NONBLOCK:
1566 if (length != sizeof(int32))
1567 return B_BAD_VALUE;
1569 if (*(const int32*)value) {
1570 socket->send.timeout = 0;
1571 socket->receive.timeout = 0;
1572 } else {
1573 socket->send.timeout = B_INFINITE_TIMEOUT;
1574 socket->receive.timeout = B_INFINITE_TIMEOUT;
1576 return B_OK;
1578 case SO_BROADCAST:
1579 case SO_DEBUG:
1580 case SO_DONTROUTE:
1581 case SO_KEEPALIVE:
1582 case SO_OOBINLINE:
1583 case SO_REUSEADDR:
1584 case SO_REUSEPORT:
1585 case SO_USELOOPBACK:
1586 if (length != sizeof(int32))
1587 return B_BAD_VALUE;
1589 if (*(const int32*)value)
1590 socket->options |= option;
1591 else
1592 socket->options &= ~option;
1593 return B_OK;
1595 case SO_BINDTODEVICE:
1597 if (length != sizeof(uint32))
1598 return B_BAD_VALUE;
1600 // TODO: we might want to check if the device exists at all
1601 // (although it doesn't really harm when we don't)
1602 socket->bound_to_device = *(const uint32*)value;
1603 return B_OK;
1606 default:
1607 break;
1610 dprintf("socket_setsockopt: unknown option %d\n", option);
1611 return ENOPROTOOPT;
1616 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1617 int length)
1619 return socket->first_protocol->module->setsockopt(socket->first_protocol,
1620 level, option, value, length);
1625 socket_shutdown(net_socket* socket, int direction)
1627 return socket->first_info->shutdown(socket->first_protocol, direction);
1631 status_t
1632 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1634 sockets[0] = NULL;
1635 sockets[1] = NULL;
1637 // create sockets
1638 status_t error = socket_open(family, type, protocol, &sockets[0]);
1639 if (error != B_OK)
1640 return error;
1642 if (error == B_OK)
1643 error = socket_open(family, type, protocol, &sockets[1]);
1645 // bind one
1646 if (error == B_OK)
1647 error = socket_bind(sockets[0], NULL, 0);
1649 // start listening
1650 if (error == B_OK)
1651 error = socket_listen(sockets[0], 1);
1653 // connect them
1654 if (error == B_OK) {
1655 error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1656 sockets[0]->address.ss_len);
1659 // accept a socket
1660 net_socket* acceptedSocket = NULL;
1661 if (error == B_OK)
1662 error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1664 if (error == B_OK) {
1665 // everything worked: close the listener socket
1666 socket_close(sockets[0]);
1667 socket_free(sockets[0]);
1668 sockets[0] = acceptedSocket;
1669 } else {
1670 // close sockets on error
1671 for (int i = 0; i < 2; i++) {
1672 if (sockets[i] != NULL) {
1673 socket_close(sockets[i]);
1674 socket_free(sockets[i]);
1675 sockets[i] = NULL;
1680 return error;
1684 // #pragma mark -
1687 static status_t
1688 socket_std_ops(int32 op, ...)
1690 switch (op) {
1691 case B_MODULE_INIT:
1693 new (&sSocketList) SocketList;
1694 mutex_init(&sSocketLock, "socket list");
1696 #if ENABLE_DEBUGGER_COMMANDS
1697 add_debugger_command("sockets", dump_sockets, "lists all sockets");
1698 add_debugger_command("socket", dump_socket, "dumps a socket");
1699 #endif
1700 return B_OK;
1702 case B_MODULE_UNINIT:
1703 ASSERT(sSocketList.IsEmpty());
1704 mutex_destroy(&sSocketLock);
1706 #if ENABLE_DEBUGGER_COMMANDS
1707 remove_debugger_command("socket", dump_socket);
1708 remove_debugger_command("sockets", dump_sockets);
1709 #endif
1710 return B_OK;
1712 default:
1713 return B_ERROR;
1718 net_socket_module_info gNetSocketModule = {
1720 NET_SOCKET_MODULE_NAME,
1722 socket_std_ops
1724 socket_open,
1725 socket_close,
1726 socket_free,
1728 socket_readv,
1729 socket_writev,
1730 socket_control,
1732 socket_read_avail,
1733 socket_send_avail,
1735 socket_send_data,
1736 socket_receive_data,
1738 socket_get_option,
1739 socket_set_option,
1741 socket_get_next_stat,
1743 // connections
1744 socket_acquire,
1745 socket_release,
1746 socket_spawn_pending,
1747 socket_dequeue_connected,
1748 socket_count_connected,
1749 socket_set_max_backlog,
1750 socket_has_parent,
1751 socket_connected,
1752 socket_aborted,
1754 // notifications
1755 socket_request_notification,
1756 socket_cancel_notification,
1757 socket_notify,
1759 // standard socket API
1760 socket_accept,
1761 socket_bind,
1762 socket_connect,
1763 socket_getpeername,
1764 socket_getsockname,
1765 socket_getsockopt,
1766 socket_listen,
1767 socket_receive,
1768 socket_send,
1769 socket_setsockopt,
1770 socket_shutdown,
1771 socket_socketpair