2 * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
6 * Axel Dörfler, axeld@pinc-software.de
10 #include "stack_private.h"
14 #include <sys/ioctl.h>
20 #include <KernelExport.h>
23 #include <AutoDeleter.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
29 #include <fs/select_sync_pool.h>
32 #include <net_protocol.h>
33 #include <net_stack.h>
36 #include "ancillary_data.h"
40 //#define TRACE_SOCKET
42 # define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
44 # define TRACE(x...) ;
48 struct net_socket_private
;
49 typedef DoublyLinkedList
<net_socket_private
> SocketList
;
51 struct net_socket_private
: net_socket
,
52 DoublyLinkedListLinkImpl
<net_socket_private
>,
55 ~net_socket_private();
57 void RemoveFromParent();
59 BWeakReference
<net_socket_private
> parent
;
63 SocketList pending_children
;
64 SocketList connected_children
;
66 struct select_sync_pool
* select_pool
;
70 bool is_in_socket_list
;
74 int socket_bind(net_socket
* socket
, const struct sockaddr
* address
,
75 socklen_t addressLength
);
76 int socket_setsockopt(net_socket
* socket
, int level
, int option
,
77 const void* value
, int length
);
78 ssize_t
socket_read_avail(net_socket
* socket
);
80 static SocketList sSocketList
;
81 static mutex sSocketLock
;
84 net_socket_private::net_socket_private()
91 is_in_socket_list(false)
93 first_protocol
= NULL
;
103 mutex_init(&lock
, "socket");
105 // set defaults (may be overridden by the protocols)
106 send
.buffer_size
= 65535;
107 send
.low_water_mark
= 1;
108 send
.timeout
= B_INFINITE_TIMEOUT
;
109 receive
.buffer_size
= 65535;
110 receive
.low_water_mark
= 1;
111 receive
.timeout
= B_INFINITE_TIMEOUT
;
115 net_socket_private::~net_socket_private()
117 TRACE("delete net_socket %p\n", this);
120 panic("socket still has a parent!");
122 if (is_in_socket_list
) {
123 MutexLocker
_(sSocketLock
);
124 sSocketList
.Remove(this);
129 // also delete all children of this socket
130 while (net_socket_private
* child
= pending_children
.RemoveHead()) {
131 child
->RemoveFromParent();
133 while (net_socket_private
* child
= connected_children
.RemoveHead()) {
134 child
->RemoveFromParent();
139 put_domain_protocols(this);
141 mutex_destroy(&lock
);
146 net_socket_private::RemoveFromParent()
148 ASSERT(!is_in_socket_list
&& parent
!= NULL
);
152 mutex_lock(&sSocketLock
);
153 sSocketList
.Add(this);
154 mutex_unlock(&sSocketLock
);
156 is_in_socket_list
= true;
166 compute_user_iovec_length(iovec
* userVec
, uint32 count
)
170 for (uint32 i
= 0; i
< count
; i
++) {
172 if (user_memcpy(&vec
, userVec
+ i
, sizeof(iovec
)) < B_OK
)
175 length
+= vec
.iov_len
;
183 create_socket(int family
, int type
, int protocol
, net_socket_private
** _socket
)
185 struct net_socket_private
* socket
= new(std::nothrow
) net_socket_private
;
188 status_t status
= socket
->InitCheck();
189 if (status
!= B_OK
) {
194 socket
->family
= family
;
196 socket
->protocol
= protocol
;
198 status
= get_domain_protocols(socket
);
199 if (status
!= B_OK
) {
204 TRACE("create net_socket %p (%u.%u.%u):\n", socket
, socket
->family
,
205 socket
->type
, socket
->protocol
);
208 net_protocol
* current
= socket
->first_protocol
;
209 for (int i
= 0; current
!= NULL
; current
= current
->next
, i
++)
210 TRACE(" [%d] %p %s\n", i
, current
, current
->module
->info
.name
);
219 add_ancillary_data(net_socket
* socket
, ancillary_data_container
* container
,
220 void* data
, size_t dataLen
)
222 cmsghdr
* header
= (cmsghdr
*)data
;
224 while (dataLen
> 0) {
225 if (header
->cmsg_len
< sizeof(cmsghdr
) || header
->cmsg_len
> dataLen
)
228 if (socket
->first_info
->add_ancillary_data
== NULL
)
229 return B_NOT_SUPPORTED
;
231 status_t status
= socket
->first_info
->add_ancillary_data(
232 socket
->first_protocol
, container
, header
);
236 dataLen
-= _ALIGN(header
->cmsg_len
);
237 header
= (cmsghdr
*)((uint8
*)header
+ _ALIGN(header
->cmsg_len
));
245 process_ancillary_data(net_socket
* socket
, ancillary_data_container
* container
,
246 msghdr
* messageHeader
)
248 uint8
* dataBuffer
= (uint8
*)messageHeader
->msg_control
;
249 int dataBufferLen
= messageHeader
->msg_controllen
;
251 if (container
== NULL
|| dataBuffer
== NULL
) {
252 messageHeader
->msg_controllen
= 0;
256 ancillary_data_header header
;
259 while ((data
= next_ancillary_data(container
, data
, &header
)) != NULL
) {
260 if (socket
->first_info
->process_ancillary_data
== NULL
)
261 return B_NOT_SUPPORTED
;
263 ssize_t bytesWritten
= socket
->first_info
->process_ancillary_data(
264 socket
->first_protocol
, &header
, data
, dataBuffer
, dataBufferLen
);
265 if (bytesWritten
< 0)
268 dataBuffer
+= bytesWritten
;
269 dataBufferLen
-= bytesWritten
;
272 messageHeader
->msg_controllen
-= dataBufferLen
;
279 process_ancillary_data(net_socket
* socket
,
280 net_buffer
* buffer
, msghdr
* messageHeader
)
282 void *dataBuffer
= messageHeader
->msg_control
;
283 ssize_t bytesWritten
;
285 if (dataBuffer
== NULL
) {
286 messageHeader
->msg_controllen
= 0;
290 if (socket
->first_info
->process_ancillary_data_no_container
== NULL
)
291 return B_NOT_SUPPORTED
;
293 bytesWritten
= socket
->first_info
->process_ancillary_data_no_container(
294 socket
->first_protocol
, buffer
, dataBuffer
,
295 messageHeader
->msg_controllen
);
296 if (bytesWritten
< 0)
298 messageHeader
->msg_controllen
= bytesWritten
;
305 socket_receive_no_buffer(net_socket
* socket
, msghdr
* header
, void* data
,
306 size_t length
, int flags
)
308 iovec stackVec
= { data
, length
};
309 iovec
* vecs
= header
? header
->msg_iov
: &stackVec
;
310 int vecCount
= header
? header
->msg_iovlen
: 1;
311 sockaddr
* address
= header
? (sockaddr
*)header
->msg_name
: NULL
;
312 socklen_t
* addressLen
= header
? &header
->msg_namelen
: NULL
;
314 ancillary_data_container
* ancillaryData
= NULL
;
315 ssize_t bytesRead
= socket
->first_info
->read_data_no_buffer(
316 socket
->first_protocol
, vecs
, vecCount
, &ancillaryData
, address
,
321 CObjectDeleter
<ancillary_data_container
> ancillaryDataDeleter(ancillaryData
,
322 &delete_ancillary_data_container
);
324 // process ancillary data
325 if (header
!= NULL
) {
326 status_t status
= process_ancillary_data(socket
, ancillaryData
, header
);
330 header
->msg_flags
= 0;
337 #if ENABLE_DEBUGGER_COMMANDS
341 print_socket_line(net_socket_private
* socket
, const char* prefix
)
343 BReference
<net_socket_private
> parent
= socket
->parent
.GetReference();
344 kprintf("%s%p %2d.%2d.%2d %6" B_PRId32
" %p %p %p%s\n", prefix
, socket
,
345 socket
->family
, socket
->type
, socket
->protocol
, socket
->owner
,
346 socket
->first_protocol
, socket
->first_info
, parent
.Get(),
347 parent
.Get() != NULL
? socket
->is_connected
? " (c)" : " (p)" : "");
352 dump_socket(int argc
, char** argv
)
355 kprintf("usage: %s [address]\n", argv
[0]);
359 net_socket_private
* socket
= (net_socket_private
*)parse_expression(argv
[1]);
361 kprintf("SOCKET %p\n", socket
);
362 kprintf(" family.type.protocol: %d.%d.%d\n",
363 socket
->family
, socket
->type
, socket
->protocol
);
364 BReference
<net_socket_private
> parent
= socket
->parent
.GetReference();
365 kprintf(" parent: %p\n", parent
.Get());
366 kprintf(" first protocol: %p\n", socket
->first_protocol
);
367 kprintf(" first module_info: %p\n", socket
->first_info
);
368 kprintf(" options: %x\n", socket
->options
);
369 kprintf(" linger: %d\n", socket
->linger
);
370 kprintf(" bound to device: %" B_PRIu32
"\n", socket
->bound_to_device
);
371 kprintf(" owner: %" B_PRId32
"\n", socket
->owner
);
372 kprintf(" max backlog: %" B_PRId32
"\n", socket
->max_backlog
);
373 kprintf(" is connected: %d\n", socket
->is_connected
);
374 kprintf(" child_count: %" B_PRIu32
"\n", socket
->child_count
);
376 if (socket
->child_count
== 0)
379 kprintf(" pending children:\n");
380 SocketList::Iterator iterator
= socket
->pending_children
.GetIterator();
381 while (net_socket_private
* child
= iterator
.Next()) {
382 print_socket_line(child
, " ");
385 kprintf(" connected children:\n");
386 iterator
= socket
->connected_children
.GetIterator();
387 while (net_socket_private
* child
= iterator
.Next()) {
388 print_socket_line(child
, " ");
396 dump_sockets(int argc
, char** argv
)
398 kprintf("address kind owner protocol module_info parent\n");
400 SocketList::Iterator iterator
= sSocketList
.GetIterator();
401 while (net_socket_private
* socket
= iterator
.Next()) {
402 print_socket_line(socket
, "");
404 SocketList::Iterator childIterator
405 = socket
->pending_children
.GetIterator();
406 while (net_socket_private
* child
= childIterator
.Next()) {
407 print_socket_line(child
, " ");
410 childIterator
= socket
->connected_children
.GetIterator();
411 while (net_socket_private
* child
= childIterator
.Next()) {
412 print_socket_line(child
, " ");
420 #endif // ENABLE_DEBUGGER_COMMANDS
427 socket_open(int family
, int type
, int protocol
, net_socket
** _socket
)
429 net_socket_private
* socket
;
430 status_t status
= create_socket(family
, type
, protocol
, &socket
);
434 status
= socket
->first_info
->open(socket
->first_protocol
);
435 if (status
!= B_OK
) {
440 socket
->owner
= team_get_current_team_id();
441 socket
->is_in_socket_list
= true;
443 mutex_lock(&sSocketLock
);
444 sSocketList
.Add(socket
);
445 mutex_unlock(&sSocketLock
);
453 socket_close(net_socket
* _socket
)
455 net_socket_private
* socket
= (net_socket_private
*)_socket
;
456 return socket
->first_info
->close(socket
->first_protocol
);
461 socket_free(net_socket
* _socket
)
463 net_socket_private
* socket
= (net_socket_private
*)_socket
;
464 socket
->first_info
->free(socket
->first_protocol
);
465 socket
->ReleaseReference();
470 socket_readv(net_socket
* socket
, const iovec
* vecs
, size_t vecCount
,
478 socket_writev(net_socket
* socket
, const iovec
* vecs
, size_t vecCount
,
481 if (socket
->peer
.ss_len
== 0)
484 if (socket
->address
.ss_len
== 0) {
486 status_t status
= socket_bind(socket
, NULL
, 0);
491 // TODO: useful, maybe even computed header space!
492 net_buffer
* buffer
= gNetBufferModule
.create(256);
496 // copy data into buffer
498 for (uint32 i
= 0; i
< vecCount
; i
++) {
499 if (gNetBufferModule
.append(buffer
, vecs
[i
].iov_base
,
500 vecs
[i
].iov_len
) < B_OK
) {
501 gNetBufferModule
.free(buffer
);
506 memcpy(buffer
->source
, &socket
->address
, socket
->address
.ss_len
);
507 memcpy(buffer
->destination
, &socket
->peer
, socket
->peer
.ss_len
);
508 size_t size
= buffer
->size
;
510 ssize_t bytesWritten
= socket
->first_info
->send_data(socket
->first_protocol
,
512 if (bytesWritten
< B_OK
) {
513 if (buffer
->size
!= size
) {
514 // this appears to be a partial write
515 *_length
= size
- buffer
->size
;
517 gNetBufferModule
.free(buffer
);
521 *_length
= bytesWritten
;
527 socket_control(net_socket
* socket
, int32 op
, void* data
, size_t length
)
537 if (!IS_USER_ADDRESS(data
)
538 || user_memcpy(&value
, data
, sizeof(int)) != B_OK
) {
539 return B_BAD_ADDRESS
;
544 return socket_setsockopt(socket
, SOL_SOCKET
, SO_NONBLOCK
, &value
,
553 int available
= (int)socket_read_avail(socket
);
558 if (!IS_USER_ADDRESS(data
)
559 || user_memcpy(data
, &available
, sizeof(available
))
561 return B_BAD_ADDRESS
;
564 *(int*)data
= available
;
569 case B_SET_BLOCKING_IO
:
570 case B_SET_NONBLOCKING_IO
:
572 int value
= op
== B_SET_NONBLOCKING_IO
;
573 return socket_setsockopt(socket
, SOL_SOCKET
, SO_NONBLOCK
, &value
,
578 return socket
->first_info
->control(socket
->first_protocol
,
579 LEVEL_DRIVER_IOCTL
, op
, data
, &length
);
584 socket_read_avail(net_socket
* socket
)
586 return socket
->first_info
->read_avail(socket
->first_protocol
);
591 socket_send_avail(net_socket
* socket
)
593 return socket
->first_info
->send_avail(socket
->first_protocol
);
598 socket_send_data(net_socket
* socket
, net_buffer
* buffer
)
600 return socket
->first_info
->send_data(socket
->first_protocol
,
606 socket_receive_data(net_socket
* socket
, size_t length
, uint32 flags
,
607 net_buffer
** _buffer
)
609 status_t status
= socket
->first_info
->read_data(socket
->first_protocol
,
610 length
, flags
, _buffer
);
614 if (*_buffer
&& length
< (*_buffer
)->size
) {
615 // discard any data behind the amount requested
616 gNetBufferModule
.trim(*_buffer
, length
);
624 socket_get_next_stat(uint32
* _cookie
, int family
, struct net_stat
* stat
)
626 MutexLocker
locker(sSocketLock
);
628 net_socket_private
* socket
= NULL
;
629 SocketList::Iterator iterator
= sSocketList
.GetIterator();
630 uint32 cookie
= *_cookie
;
634 socket
= iterator
.Next();
636 return B_ENTRY_NOT_FOUND
;
638 // TODO: also traverse the pending connections
642 if (family
== -1 || family
== socket
->family
)
646 *_cookie
= count
+ 1;
648 stat
->family
= socket
->family
;
649 stat
->type
= socket
->type
;
650 stat
->protocol
= socket
->protocol
;
651 stat
->owner
= socket
->owner
;
652 stat
->state
[0] = '\0';
653 memcpy(&stat
->address
, &socket
->address
, sizeof(struct sockaddr_storage
));
654 memcpy(&stat
->peer
, &socket
->peer
, sizeof(struct sockaddr_storage
));
655 stat
->receive_queue_size
= 0;
656 stat
->send_queue_size
= 0;
658 // fill in protocol specific data (if supported by the protocol)
659 size_t length
= sizeof(net_stat
);
660 socket
->first_info
->control(socket
->first_protocol
, socket
->protocol
,
661 NET_STAT_SOCKET
, stat
, &length
);
667 // #pragma mark - connections
671 socket_acquire(net_socket
* _socket
)
673 net_socket_private
* socket
= (net_socket_private
*)_socket
;
675 // During destruction, the socket might still be accessible over its
676 // endpoint protocol. We need to make sure the endpoint cannot acquire the
677 // socket anymore -- while not obvious, the endpoint protocol is responsible
678 // for the proper locking here.
679 if (socket
->CountReferences() == 0)
682 socket
->AcquireReference();
688 socket_release(net_socket
* _socket
)
690 net_socket_private
* socket
= (net_socket_private
*)_socket
;
691 return socket
->ReleaseReference();
696 socket_spawn_pending(net_socket
* _parent
, net_socket
** _socket
)
698 net_socket_private
* parent
= (net_socket_private
*)_parent
;
700 TRACE("%s(%p)\n", __FUNCTION__
, parent
);
702 MutexLocker
locker(parent
->lock
);
704 // We actually accept more pending connections to compensate for those
705 // that never complete, and also make sure at least a single connection
706 // can always be accepted
707 if (parent
->child_count
> 3 * parent
->max_backlog
/ 2)
710 net_socket_private
* socket
;
711 status_t status
= create_socket(parent
->family
, parent
->type
,
712 parent
->protocol
, &socket
);
716 // inherit parent's properties
717 socket
->send
= parent
->send
;
718 socket
->receive
= parent
->receive
;
719 socket
->options
= parent
->options
& ~SO_ACCEPTCONN
;
720 socket
->linger
= parent
->linger
;
721 socket
->owner
= parent
->owner
;
722 memcpy(&socket
->address
, &parent
->address
, parent
->address
.ss_len
);
723 memcpy(&socket
->peer
, &parent
->peer
, parent
->peer
.ss_len
);
725 // add to the parent's list of pending connections
726 parent
->pending_children
.Add(socket
);
727 socket
->parent
= parent
;
728 parent
->child_count
++;
735 /*! Dequeues a connected child from a parent socket.
736 It also returns a reference with the child socket.
739 socket_dequeue_connected(net_socket
* _parent
, net_socket
** _socket
)
741 net_socket_private
* parent
= (net_socket_private
*)_parent
;
743 mutex_lock(&parent
->lock
);
745 net_socket_private
* socket
= parent
->connected_children
.RemoveHead();
746 if (socket
!= NULL
) {
747 socket
->AcquireReference();
748 socket
->RemoveFromParent();
749 parent
->child_count
--;
753 mutex_unlock(&parent
->lock
);
756 return B_ENTRY_NOT_FOUND
;
763 socket_count_connected(net_socket
* _parent
)
765 net_socket_private
* parent
= (net_socket_private
*)_parent
;
767 MutexLocker
_(parent
->lock
);
768 return parent
->connected_children
.Count();
773 socket_set_max_backlog(net_socket
* _socket
, uint32 backlog
)
775 net_socket_private
* socket
= (net_socket_private
*)_socket
;
777 // we enforce an upper limit of connections waiting to be accepted
781 MutexLocker
_(socket
->lock
);
783 // first remove the pending connections, then the already connected
785 net_socket_private
* child
;
786 while (socket
->child_count
> backlog
787 && (child
= socket
->pending_children
.RemoveTail()) != NULL
) {
788 child
->RemoveFromParent();
789 socket
->child_count
--;
791 while (socket
->child_count
> backlog
792 && (child
= socket
->connected_children
.RemoveTail()) != NULL
) {
793 child
->RemoveFromParent();
794 socket
->child_count
--;
797 socket
->max_backlog
= backlog
;
802 /*! Returns whether or not this socket has a parent. The parent might not be
803 valid anymore, though.
806 socket_has_parent(net_socket
* _socket
)
808 net_socket_private
* socket
= (net_socket_private
*)_socket
;
809 return socket
->parent
!= NULL
;
813 /*! The socket has been connected. It will be moved to the connected queue
814 of its parent socket.
817 socket_connected(net_socket
* _socket
)
819 net_socket_private
* socket
= (net_socket_private
*)_socket
;
821 TRACE("socket_connected(%p)\n", socket
);
823 BReference
<net_socket_private
> parent
= socket
->parent
.GetReference();
824 if (parent
.Get() == NULL
)
827 MutexLocker
_(parent
->lock
);
829 parent
->pending_children
.Remove(socket
);
830 parent
->connected_children
.Add(socket
);
831 socket
->is_connected
= true;
834 if (parent
->select_pool
)
835 notify_select_event_pool(parent
->select_pool
, B_SELECT_READ
);
841 /*! The socket has been aborted. Steals the parent's reference, and releases
845 socket_aborted(net_socket
* _socket
)
847 net_socket_private
* socket
= (net_socket_private
*)_socket
;
849 TRACE("socket_aborted(%p)\n", socket
);
851 BReference
<net_socket_private
> parent
= socket
->parent
.GetReference();
852 if (parent
.Get() == NULL
)
855 MutexLocker
_(parent
->lock
);
857 if (socket
->is_connected
)
858 parent
->connected_children
.Remove(socket
);
860 parent
->pending_children
.Remove(socket
);
862 parent
->child_count
--;
863 socket
->RemoveFromParent();
869 // #pragma mark - notifications
873 socket_request_notification(net_socket
* _socket
, uint8 event
, selectsync
* sync
)
875 net_socket_private
* socket
= (net_socket_private
*)_socket
;
877 mutex_lock(&socket
->lock
);
879 status_t status
= add_select_sync_pool_entry(&socket
->select_pool
, sync
,
882 mutex_unlock(&socket
->lock
);
887 // check if the event is already present
888 // TODO: add support for poll() types
893 ssize_t available
= socket_read_avail(socket
);
894 if ((ssize_t
)socket
->receive
.low_water_mark
<= available
896 notify_select_event(sync
, event
);
901 ssize_t available
= socket_send_avail(socket
);
902 if ((ssize_t
)socket
->send
.low_water_mark
<= available
904 notify_select_event(sync
, event
);
908 if (socket
->error
!= B_OK
)
909 notify_select_event(sync
, event
);
918 socket_cancel_notification(net_socket
* _socket
, uint8 event
, selectsync
* sync
)
920 net_socket_private
* socket
= (net_socket_private
*)_socket
;
922 MutexLocker
_(socket
->lock
);
923 return remove_select_sync_pool_entry(&socket
->select_pool
, sync
, event
);
928 socket_notify(net_socket
* _socket
, uint8 event
, int32 value
)
930 net_socket_private
* socket
= (net_socket_private
*)_socket
;
935 if ((ssize_t
)socket
->receive
.low_water_mark
> value
941 if ((ssize_t
)socket
->send
.low_water_mark
> value
&& value
>= B_OK
)
946 socket
->error
= value
;
950 MutexLocker
_(socket
->lock
);
952 if (notify
&& socket
->select_pool
!= NULL
) {
953 notify_select_event_pool(socket
->select_pool
, event
);
955 if (event
== B_SELECT_ERROR
) {
956 // always notify read/write on error
957 notify_select_event_pool(socket
->select_pool
, B_SELECT_READ
);
958 notify_select_event_pool(socket
->select_pool
, B_SELECT_WRITE
);
966 // #pragma mark - standard socket API
970 socket_accept(net_socket
* socket
, struct sockaddr
* address
,
971 socklen_t
* _addressLength
, net_socket
** _acceptedSocket
)
973 if ((socket
->options
& SO_ACCEPTCONN
) == 0)
976 net_socket
* accepted
;
977 status_t status
= socket
->first_info
->accept(socket
->first_protocol
,
982 if (address
&& *_addressLength
> 0) {
983 memcpy(address
, &accepted
->peer
, min_c(*_addressLength
,
984 min_c(accepted
->peer
.ss_len
, sizeof(sockaddr_storage
))));
985 *_addressLength
= accepted
->peer
.ss_len
;
988 *_acceptedSocket
= accepted
;
994 socket_bind(net_socket
* socket
, const struct sockaddr
* address
,
995 socklen_t addressLength
)
998 if (address
== NULL
) {
999 // special - try to bind to an empty address, like INADDR_ANY
1000 memset(&empty
, 0, sizeof(sockaddr
));
1001 empty
.sa_len
= sizeof(sockaddr
);
1002 empty
.sa_family
= socket
->family
;
1005 addressLength
= sizeof(sockaddr
);
1008 if (socket
->address
.ss_len
!= 0) {
1009 status_t status
= socket
->first_info
->unbind(socket
->first_protocol
,
1010 (sockaddr
*)&socket
->address
);
1015 memcpy(&socket
->address
, address
, sizeof(sockaddr
));
1016 socket
->address
.ss_len
= sizeof(sockaddr_storage
);
1018 status_t status
= socket
->first_info
->bind(socket
->first_protocol
,
1019 (sockaddr
*)address
);
1020 if (status
!= B_OK
) {
1021 // clear address again, as binding failed
1022 socket
->address
.ss_len
= 0;
1030 socket_connect(net_socket
* socket
, const struct sockaddr
* address
,
1031 socklen_t addressLength
)
1033 if (address
== NULL
|| addressLength
== 0)
1036 if (socket
->address
.ss_len
== 0) {
1037 // try to bind first
1038 status_t status
= socket_bind(socket
, NULL
, 0);
1043 return socket
->first_info
->connect(socket
->first_protocol
, address
);
1048 socket_getpeername(net_socket
* socket
, struct sockaddr
* address
,
1049 socklen_t
* _addressLength
)
1051 if (socket
->peer
.ss_len
== 0)
1054 memcpy(address
, &socket
->peer
, min_c(*_addressLength
, socket
->peer
.ss_len
));
1055 *_addressLength
= socket
->peer
.ss_len
;
1061 socket_getsockname(net_socket
* socket
, struct sockaddr
* address
,
1062 socklen_t
* _addressLength
)
1064 if (socket
->address
.ss_len
== 0)
1067 memcpy(address
, &socket
->address
, min_c(*_addressLength
,
1068 socket
->address
.ss_len
));
1069 *_addressLength
= socket
->address
.ss_len
;
1075 socket_get_option(net_socket
* socket
, int level
, int option
, void* value
,
1078 if (level
!= SOL_SOCKET
)
1084 uint32
* size
= (uint32
*)value
;
1085 *size
= socket
->send
.buffer_size
;
1086 *_length
= sizeof(uint32
);
1092 uint32
* size
= (uint32
*)value
;
1093 *size
= socket
->receive
.buffer_size
;
1094 *_length
= sizeof(uint32
);
1100 uint32
* size
= (uint32
*)value
;
1101 *size
= socket
->send
.low_water_mark
;
1102 *_length
= sizeof(uint32
);
1108 uint32
* size
= (uint32
*)value
;
1109 *size
= socket
->receive
.low_water_mark
;
1110 *_length
= sizeof(uint32
);
1117 if (*_length
< (int)sizeof(struct timeval
))
1121 if (option
== SO_SNDTIMEO
)
1122 timeout
= socket
->send
.timeout
;
1124 timeout
= socket
->receive
.timeout
;
1125 if (timeout
== B_INFINITE_TIMEOUT
)
1128 struct timeval
* timeval
= (struct timeval
*)value
;
1129 timeval
->tv_sec
= timeout
/ 1000000LL;
1130 timeval
->tv_usec
= timeout
% 1000000LL;
1132 *_length
= sizeof(struct timeval
);
1138 int32
* _set
= (int32
*)value
;
1139 *_set
= socket
->receive
.timeout
== 0 && socket
->send
.timeout
== 0;
1140 *_length
= sizeof(int32
);
1152 case SO_USELOOPBACK
:
1154 int32
* _set
= (int32
*)value
;
1155 *_set
= (socket
->options
& option
) != 0;
1156 *_length
= sizeof(int32
);
1162 int32
* _set
= (int32
*)value
;
1163 *_set
= socket
->type
;
1164 *_length
= sizeof(int32
);
1170 int32
* _set
= (int32
*)value
;
1171 *_set
= socket
->error
;
1172 *_length
= sizeof(int32
);
1174 socket
->error
= B_OK
;
1175 // clear error upon retrieval
1183 dprintf("socket_getsockopt: unknown option %d\n", option
);
1189 socket_getsockopt(net_socket
* socket
, int level
, int option
, void* value
,
1192 return socket
->first_protocol
->module
->getsockopt(socket
->first_protocol
,
1193 level
, option
, value
, _length
);
1198 socket_listen(net_socket
* socket
, int backlog
)
1200 status_t status
= socket
->first_info
->listen(socket
->first_protocol
,
1203 socket
->options
|= SO_ACCEPTCONN
;
1210 socket_receive(net_socket
* socket
, msghdr
* header
, void* data
, size_t length
,
1213 // If the protocol sports read_data_no_buffer() we use it.
1214 if (socket
->first_info
->read_data_no_buffer
!= NULL
)
1215 return socket_receive_no_buffer(socket
, header
, data
, length
, flags
);
1217 size_t totalLength
= length
;
1221 // the convention to this function is that have header been
1222 // present, { data, length } would have been iovec[0] and is
1223 // always considered like that
1226 // calculate the length considering all of the extra buffers
1227 for (i
= 1; i
< header
->msg_iovlen
; i
++)
1228 totalLength
+= header
->msg_iov
[i
].iov_len
;
1231 status_t status
= socket
->first_info
->read_data(
1232 socket
->first_protocol
, totalLength
, flags
, &buffer
);
1236 // process ancillary data
1237 if (header
!= NULL
) {
1238 if (buffer
!= NULL
&& header
->msg_control
!= NULL
) {
1239 ancillary_data_container
* container
1240 = gNetBufferModule
.get_ancillary_data(buffer
);
1241 if (container
!= NULL
)
1242 status
= process_ancillary_data(socket
, container
, header
);
1244 status
= process_ancillary_data(socket
, buffer
, header
);
1245 if (status
!= B_OK
) {
1246 gNetBufferModule
.free(buffer
);
1250 header
->msg_controllen
= 0;
1253 // TODO: - returning a NULL buffer when received 0 bytes
1254 // may not make much sense as we still need the address
1255 // - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1260 // TODO: - consider the control buffer options
1261 nameLen
= header
->msg_namelen
;
1262 header
->msg_namelen
= 0;
1263 header
->msg_flags
= 0;
1269 size_t bytesReceived
= buffer
->size
, bytesCopied
= 0;
1271 length
= min_c(bytesReceived
, length
);
1272 if (gNetBufferModule
.read(buffer
, 0, data
, length
) < B_OK
) {
1273 gNetBufferModule
.free(buffer
);
1277 // if first copy was a success, proceed to following
1278 // copies as required
1279 bytesCopied
+= length
;
1282 // we only start considering at iovec[1]
1283 // as { data, length } is iovec[0]
1284 for (i
= 1; i
< header
->msg_iovlen
&& bytesCopied
< bytesReceived
; i
++) {
1285 iovec
& vec
= header
->msg_iov
[i
];
1286 size_t toRead
= min_c(bytesReceived
- bytesCopied
, vec
.iov_len
);
1287 if (gNetBufferModule
.read(buffer
, bytesCopied
, vec
.iov_base
,
1292 bytesCopied
+= toRead
;
1295 if (header
->msg_name
!= NULL
) {
1296 header
->msg_namelen
= min_c(nameLen
, buffer
->source
->sa_len
);
1297 memcpy(header
->msg_name
, buffer
->source
, header
->msg_namelen
);
1301 gNetBufferModule
.free(buffer
);
1303 if (bytesCopied
< bytesReceived
) {
1305 header
->msg_flags
= MSG_TRUNC
;
1307 if (flags
& MSG_TRUNC
)
1308 return bytesReceived
;
1316 socket_send(net_socket
* socket
, msghdr
* header
, const void* data
, size_t length
,
1319 const sockaddr
* address
= NULL
;
1320 socklen_t addressLength
= 0;
1321 size_t bytesLeft
= length
;
1323 if (length
> SSIZE_MAX
)
1326 ancillary_data_container
* ancillaryData
= NULL
;
1327 CObjectDeleter
<ancillary_data_container
> ancillaryDataDeleter(NULL
,
1328 &delete_ancillary_data_container
);
1330 if (header
!= NULL
) {
1331 address
= (const sockaddr
*)header
->msg_name
;
1332 addressLength
= header
->msg_namelen
;
1334 // get the ancillary data
1335 if (header
->msg_control
!= NULL
) {
1336 ancillaryData
= create_ancillary_data_container();
1337 if (ancillaryData
== NULL
)
1339 ancillaryDataDeleter
.SetTo(ancillaryData
);
1341 status_t status
= add_ancillary_data(socket
, ancillaryData
,
1342 (cmsghdr
*)header
->msg_control
, header
->msg_controllen
);
1348 if (addressLength
== 0)
1350 else if (address
== NULL
)
1353 if (socket
->peer
.ss_len
!= 0) {
1354 if (address
!= NULL
)
1357 // socket is connected, we use that address
1358 address
= (struct sockaddr
*)&socket
->peer
;
1359 addressLength
= socket
->peer
.ss_len
;
1362 if (address
== NULL
|| addressLength
== 0) {
1363 // don't know where to send to:
1364 return EDESTADDRREQ
;
1367 if ((socket
->first_info
->flags
& NET_PROTOCOL_ATOMIC_MESSAGES
) != 0
1368 && bytesLeft
> socket
->send
.buffer_size
)
1371 if (socket
->address
.ss_len
== 0) {
1372 // try to bind first
1373 status_t status
= socket_bind(socket
, NULL
, 0);
1378 // If the protocol has a send_data_no_buffer() hook, we use that one.
1379 if (socket
->first_info
->send_data_no_buffer
!= NULL
) {
1380 iovec stackVec
= { (void*)data
, length
};
1381 iovec
* vecs
= header
? header
->msg_iov
: &stackVec
;
1382 int vecCount
= header
? header
->msg_iovlen
: 1;
1384 ssize_t written
= socket
->first_info
->send_data_no_buffer(
1385 socket
->first_protocol
, vecs
, vecCount
, ancillaryData
, address
,
1388 ancillaryDataDeleter
.Detach();
1392 // By convention, if a header is given, the (data, length) equals the first
1393 // iovec. So drop the header, if it is the only iovec. Otherwise compute
1394 // the size of the remaining ones.
1395 if (header
!= NULL
) {
1396 if (header
->msg_iovlen
<= 1)
1399 // TODO: The iovecs have already been copied to kernel space. Simplify!
1400 bytesLeft
+= compute_user_iovec_length(header
->msg_iov
+ 1,
1401 header
->msg_iovlen
- 1);
1405 ssize_t bytesSent
= 0;
1406 size_t vecOffset
= 0;
1407 uint32 vecIndex
= 0;
1409 while (bytesLeft
> 0) {
1410 // TODO: useful, maybe even computed header space!
1411 net_buffer
* buffer
= gNetBufferModule
.create(256);
1415 while (buffer
->size
< socket
->send
.buffer_size
1416 && buffer
->size
< bytesLeft
) {
1417 if (vecIndex
> 0 && vecOffset
== 0) {
1418 // retrieve next iovec buffer from header
1420 if (user_memcpy(&vec
, header
->msg_iov
+ vecIndex
, sizeof(iovec
))
1422 gNetBufferModule
.free(buffer
);
1423 return B_BAD_ADDRESS
;
1426 data
= vec
.iov_base
;
1427 length
= vec
.iov_len
;
1430 size_t bytes
= length
;
1431 if (buffer
->size
+ bytes
> socket
->send
.buffer_size
)
1432 bytes
= socket
->send
.buffer_size
- buffer
->size
;
1434 if (gNetBufferModule
.append(buffer
, data
, bytes
) < B_OK
) {
1435 gNetBufferModule
.free(buffer
);
1439 if (bytes
!= length
) {
1442 length
-= vecOffset
;
1443 data
= (uint8
*)data
+ vecOffset
;
1444 } else if (header
!= NULL
) {
1445 // proceed with next buffer, if any
1449 if (vecIndex
>= (uint32
)header
->msg_iovlen
)
1454 // attach ancillary data to the first buffer
1455 status_t status
= B_OK
;
1456 if (ancillaryData
!= NULL
) {
1457 gNetBufferModule
.set_ancillary_data(buffer
, ancillaryData
);
1458 ancillaryDataDeleter
.Detach();
1459 ancillaryData
= NULL
;
1462 size_t bufferSize
= buffer
->size
;
1463 buffer
->flags
= flags
;
1464 memcpy(buffer
->source
, &socket
->address
, socket
->address
.ss_len
);
1465 memcpy(buffer
->destination
, address
, addressLength
);
1466 buffer
->destination
->sa_len
= addressLength
;
1468 if (status
== B_OK
) {
1469 status
= socket
->first_info
->send_data(socket
->first_protocol
,
1472 if (status
!= B_OK
) {
1473 size_t sizeAfterSend
= buffer
->size
;
1474 gNetBufferModule
.free(buffer
);
1476 if ((sizeAfterSend
!= bufferSize
|| bytesSent
> 0)
1477 && (status
== B_INTERRUPTED
|| status
== B_WOULD_BLOCK
)) {
1478 // this appears to be a partial write
1479 return bytesSent
+ (bufferSize
- sizeAfterSend
);
1484 bytesLeft
-= bufferSize
;
1485 bytesSent
+= bufferSize
;
1493 socket_set_option(net_socket
* socket
, int level
, int option
, const void* value
,
1496 if (level
!= SOL_SOCKET
)
1499 TRACE("%s(socket %p, option %d\n", __FUNCTION__
, socket
, option
);
1502 // TODO: implement other options!
1505 if (length
< (int)sizeof(struct linger
))
1508 struct linger
* linger
= (struct linger
*)value
;
1509 if (linger
->l_onoff
) {
1510 socket
->options
|= SO_LINGER
;
1511 socket
->linger
= linger
->l_linger
;
1513 socket
->options
&= ~SO_LINGER
;
1520 if (length
!= sizeof(uint32
))
1523 socket
->send
.buffer_size
= *(const uint32
*)value
;
1527 if (length
!= sizeof(uint32
))
1530 socket
->receive
.buffer_size
= *(const uint32
*)value
;
1534 if (length
!= sizeof(uint32
))
1537 socket
->send
.low_water_mark
= *(const uint32
*)value
;
1541 if (length
!= sizeof(uint32
))
1544 socket
->receive
.low_water_mark
= *(const uint32
*)value
;
1550 if (length
!= sizeof(struct timeval
))
1553 const struct timeval
* timeval
= (const struct timeval
*)value
;
1554 bigtime_t timeout
= timeval
->tv_sec
* 1000000LL + timeval
->tv_usec
;
1556 timeout
= B_INFINITE_TIMEOUT
;
1558 if (option
== SO_SNDTIMEO
)
1559 socket
->send
.timeout
= timeout
;
1561 socket
->receive
.timeout
= timeout
;
1566 if (length
!= sizeof(int32
))
1569 if (*(const int32
*)value
) {
1570 socket
->send
.timeout
= 0;
1571 socket
->receive
.timeout
= 0;
1573 socket
->send
.timeout
= B_INFINITE_TIMEOUT
;
1574 socket
->receive
.timeout
= B_INFINITE_TIMEOUT
;
1585 case SO_USELOOPBACK
:
1586 if (length
!= sizeof(int32
))
1589 if (*(const int32
*)value
)
1590 socket
->options
|= option
;
1592 socket
->options
&= ~option
;
1595 case SO_BINDTODEVICE
:
1597 if (length
!= sizeof(uint32
))
1600 // TODO: we might want to check if the device exists at all
1601 // (although it doesn't really harm when we don't)
1602 socket
->bound_to_device
= *(const uint32
*)value
;
1610 dprintf("socket_setsockopt: unknown option %d\n", option
);
1616 socket_setsockopt(net_socket
* socket
, int level
, int option
, const void* value
,
1619 return socket
->first_protocol
->module
->setsockopt(socket
->first_protocol
,
1620 level
, option
, value
, length
);
1625 socket_shutdown(net_socket
* socket
, int direction
)
1627 return socket
->first_info
->shutdown(socket
->first_protocol
, direction
);
1632 socket_socketpair(int family
, int type
, int protocol
, net_socket
* sockets
[2])
1638 status_t error
= socket_open(family
, type
, protocol
, &sockets
[0]);
1643 error
= socket_open(family
, type
, protocol
, &sockets
[1]);
1647 error
= socket_bind(sockets
[0], NULL
, 0);
1651 error
= socket_listen(sockets
[0], 1);
1654 if (error
== B_OK
) {
1655 error
= socket_connect(sockets
[1], (sockaddr
*)&sockets
[0]->address
,
1656 sockets
[0]->address
.ss_len
);
1660 net_socket
* acceptedSocket
= NULL
;
1662 error
= socket_accept(sockets
[0], NULL
, NULL
, &acceptedSocket
);
1664 if (error
== B_OK
) {
1665 // everything worked: close the listener socket
1666 socket_close(sockets
[0]);
1667 socket_free(sockets
[0]);
1668 sockets
[0] = acceptedSocket
;
1670 // close sockets on error
1671 for (int i
= 0; i
< 2; i
++) {
1672 if (sockets
[i
] != NULL
) {
1673 socket_close(sockets
[i
]);
1674 socket_free(sockets
[i
]);
1688 socket_std_ops(int32 op
, ...)
1693 new (&sSocketList
) SocketList
;
1694 mutex_init(&sSocketLock
, "socket list");
1696 #if ENABLE_DEBUGGER_COMMANDS
1697 add_debugger_command("sockets", dump_sockets
, "lists all sockets");
1698 add_debugger_command("socket", dump_socket
, "dumps a socket");
1702 case B_MODULE_UNINIT
:
1703 ASSERT(sSocketList
.IsEmpty());
1704 mutex_destroy(&sSocketLock
);
1706 #if ENABLE_DEBUGGER_COMMANDS
1707 remove_debugger_command("socket", dump_socket
);
1708 remove_debugger_command("sockets", dump_sockets
);
1718 net_socket_module_info gNetSocketModule
= {
1720 NET_SOCKET_MODULE_NAME
,
1736 socket_receive_data
,
1741 socket_get_next_stat
,
1746 socket_spawn_pending
,
1747 socket_dequeue_connected
,
1748 socket_count_connected
,
1749 socket_set_max_backlog
,
1755 socket_request_notification
,
1756 socket_cancel_notification
,
1759 // standard socket API