4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
40 DEFINE_SNMP_STAT(struct dccp_mib
, dccp_statistics
) __read_mostly
;
42 EXPORT_SYMBOL_GPL(dccp_statistics
);
44 atomic_t dccp_orphan_count
= ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count
);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo
= {
49 .lhash_lock
= RW_LOCK_UNLOCKED
,
50 .lhash_users
= ATOMIC_INIT(0),
51 .lhash_wait
= __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo
.lhash_wait
),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo
);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly
= 5;
59 void dccp_set_state(struct sock
*sk
, const int state
)
61 const int oldstate
= sk
->sk_state
;
63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk
), sk
,
64 dccp_state_name(oldstate
), dccp_state_name(state
));
65 WARN_ON(state
== oldstate
);
69 if (oldstate
!= DCCP_OPEN
)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB
);
74 if (oldstate
== DCCP_OPEN
|| oldstate
== DCCP_ACTIVE_CLOSEREQ
||
75 oldstate
== DCCP_CLOSING
)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS
);
78 sk
->sk_prot
->unhash(sk
);
79 if (inet_csk(sk
)->icsk_bind_hash
!= NULL
&&
80 !(sk
->sk_userlocks
& SOCK_BINDPORT_LOCK
))
81 inet_put_port(&dccp_hashinfo
, sk
);
84 if (oldstate
== DCCP_OPEN
)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB
);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
94 EXPORT_SYMBOL_GPL(dccp_set_state
);
96 static void dccp_finish_passive_close(struct sock
*sk
)
98 switch (sk
->sk_state
) {
99 case DCCP_PASSIVE_CLOSE
:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk
, DCCP_RESET_CODE_CLOSED
);
102 dccp_set_state(sk
, DCCP_CLOSED
);
104 case DCCP_PASSIVE_CLOSEREQ
:
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
109 dccp_send_close(sk
, 1);
110 dccp_set_state(sk
, DCCP_CLOSING
);
114 void dccp_done(struct sock
*sk
)
116 dccp_set_state(sk
, DCCP_CLOSED
);
117 dccp_clear_xmit_timers(sk
);
119 sk
->sk_shutdown
= SHUTDOWN_MASK
;
121 if (!sock_flag(sk
, SOCK_DEAD
))
122 sk
->sk_state_change(sk
);
124 inet_csk_destroy_sock(sk
);
127 EXPORT_SYMBOL_GPL(dccp_done
);
129 const char *dccp_packet_name(const int type
)
131 static const char *dccp_packet_names
[] = {
132 [DCCP_PKT_REQUEST
] = "REQUEST",
133 [DCCP_PKT_RESPONSE
] = "RESPONSE",
134 [DCCP_PKT_DATA
] = "DATA",
135 [DCCP_PKT_ACK
] = "ACK",
136 [DCCP_PKT_DATAACK
] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ
] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE
] = "CLOSE",
139 [DCCP_PKT_RESET
] = "RESET",
140 [DCCP_PKT_SYNC
] = "SYNC",
141 [DCCP_PKT_SYNCACK
] = "SYNCACK",
144 if (type
>= DCCP_NR_PKT_TYPES
)
147 return dccp_packet_names
[type
];
150 EXPORT_SYMBOL_GPL(dccp_packet_name
);
152 const char *dccp_state_name(const int state
)
154 static char *dccp_state_names
[] = {
155 [DCCP_OPEN
] = "OPEN",
156 [DCCP_REQUESTING
] = "REQUESTING",
157 [DCCP_PARTOPEN
] = "PARTOPEN",
158 [DCCP_LISTEN
] = "LISTEN",
159 [DCCP_RESPOND
] = "RESPOND",
160 [DCCP_CLOSING
] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ
] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE
] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ
] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT
] = "TIME_WAIT",
165 [DCCP_CLOSED
] = "CLOSED",
168 if (state
>= DCCP_MAX_STATES
)
169 return "INVALID STATE!";
171 return dccp_state_names
[state
];
174 EXPORT_SYMBOL_GPL(dccp_state_name
);
176 void dccp_hash(struct sock
*sk
)
178 inet_hash(&dccp_hashinfo
, sk
);
181 EXPORT_SYMBOL_GPL(dccp_hash
);
183 void dccp_unhash(struct sock
*sk
)
185 inet_unhash(&dccp_hashinfo
, sk
);
188 EXPORT_SYMBOL_GPL(dccp_unhash
);
190 int dccp_init_sock(struct sock
*sk
, const __u8 ctl_sock_initialized
)
192 struct dccp_sock
*dp
= dccp_sk(sk
);
193 struct dccp_minisock
*dmsk
= dccp_msk(sk
);
194 struct inet_connection_sock
*icsk
= inet_csk(sk
);
196 dccp_minisock_init(&dp
->dccps_minisock
);
198 icsk
->icsk_rto
= DCCP_TIMEOUT_INIT
;
199 icsk
->icsk_syn_retries
= sysctl_dccp_request_retries
;
200 sk
->sk_state
= DCCP_CLOSED
;
201 sk
->sk_write_space
= dccp_write_space
;
202 icsk
->icsk_sync_mss
= dccp_sync_mss
;
203 dp
->dccps_mss_cache
= 536;
204 dp
->dccps_rate_last
= jiffies
;
205 dp
->dccps_role
= DCCP_ROLE_UNDEFINED
;
206 dp
->dccps_service
= DCCP_SERVICE_CODE_IS_ABSENT
;
207 dp
->dccps_l_ack_ratio
= dp
->dccps_r_ack_ratio
= 1;
209 dccp_init_xmit_timers(sk
);
212 * FIXME: We're hardcoding the CCID, and doing this at this point makes
213 * the listening (master) sock get CCID control blocks, which is not
214 * necessary, but for now, to not mess with the test userspace apps,
215 * lets leave it here, later the real solution is to do this in a
216 * setsockopt(CCIDs-I-want/accept). -acme
218 if (likely(ctl_sock_initialized
)) {
219 int rc
= dccp_feat_init(dmsk
);
224 if (dmsk
->dccpms_send_ack_vector
) {
225 dp
->dccps_hc_rx_ackvec
= dccp_ackvec_alloc(GFP_KERNEL
);
226 if (dp
->dccps_hc_rx_ackvec
== NULL
)
229 dp
->dccps_hc_rx_ccid
= ccid_hc_rx_new(dmsk
->dccpms_rx_ccid
,
231 dp
->dccps_hc_tx_ccid
= ccid_hc_tx_new(dmsk
->dccpms_tx_ccid
,
233 if (unlikely(dp
->dccps_hc_rx_ccid
== NULL
||
234 dp
->dccps_hc_tx_ccid
== NULL
)) {
235 ccid_hc_rx_delete(dp
->dccps_hc_rx_ccid
, sk
);
236 ccid_hc_tx_delete(dp
->dccps_hc_tx_ccid
, sk
);
237 if (dmsk
->dccpms_send_ack_vector
) {
238 dccp_ackvec_free(dp
->dccps_hc_rx_ackvec
);
239 dp
->dccps_hc_rx_ackvec
= NULL
;
241 dp
->dccps_hc_rx_ccid
= dp
->dccps_hc_tx_ccid
= NULL
;
245 /* control socket doesn't need feat nego */
246 INIT_LIST_HEAD(&dmsk
->dccpms_pending
);
247 INIT_LIST_HEAD(&dmsk
->dccpms_conf
);
253 EXPORT_SYMBOL_GPL(dccp_init_sock
);
255 int dccp_destroy_sock(struct sock
*sk
)
257 struct dccp_sock
*dp
= dccp_sk(sk
);
258 struct dccp_minisock
*dmsk
= dccp_msk(sk
);
261 * DCCP doesn't use sk_write_queue, just sk_send_head
262 * for retransmissions
264 if (sk
->sk_send_head
!= NULL
) {
265 kfree_skb(sk
->sk_send_head
);
266 sk
->sk_send_head
= NULL
;
269 /* Clean up a referenced DCCP bind bucket. */
270 if (inet_csk(sk
)->icsk_bind_hash
!= NULL
)
271 inet_put_port(&dccp_hashinfo
, sk
);
273 kfree(dp
->dccps_service_list
);
274 dp
->dccps_service_list
= NULL
;
276 if (dmsk
->dccpms_send_ack_vector
) {
277 dccp_ackvec_free(dp
->dccps_hc_rx_ackvec
);
278 dp
->dccps_hc_rx_ackvec
= NULL
;
280 ccid_hc_rx_delete(dp
->dccps_hc_rx_ccid
, sk
);
281 ccid_hc_tx_delete(dp
->dccps_hc_tx_ccid
, sk
);
282 dp
->dccps_hc_rx_ccid
= dp
->dccps_hc_tx_ccid
= NULL
;
284 /* clean up feature negotiation state */
285 dccp_feat_clean(dmsk
);
290 EXPORT_SYMBOL_GPL(dccp_destroy_sock
);
292 static inline int dccp_listen_start(struct sock
*sk
, int backlog
)
294 struct dccp_sock
*dp
= dccp_sk(sk
);
296 dp
->dccps_role
= DCCP_ROLE_LISTEN
;
297 return inet_csk_listen_start(sk
, backlog
);
300 static inline int dccp_need_reset(int state
)
302 return state
!= DCCP_CLOSED
&& state
!= DCCP_LISTEN
&&
303 state
!= DCCP_REQUESTING
;
306 int dccp_disconnect(struct sock
*sk
, int flags
)
308 struct inet_connection_sock
*icsk
= inet_csk(sk
);
309 struct inet_sock
*inet
= inet_sk(sk
);
311 const int old_state
= sk
->sk_state
;
313 if (old_state
!= DCCP_CLOSED
)
314 dccp_set_state(sk
, DCCP_CLOSED
);
317 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
320 if (old_state
== DCCP_LISTEN
) {
321 inet_csk_listen_stop(sk
);
322 } else if (dccp_need_reset(old_state
)) {
323 dccp_send_reset(sk
, DCCP_RESET_CODE_ABORTED
);
324 sk
->sk_err
= ECONNRESET
;
325 } else if (old_state
== DCCP_REQUESTING
)
326 sk
->sk_err
= ECONNRESET
;
328 dccp_clear_xmit_timers(sk
);
329 __skb_queue_purge(&sk
->sk_receive_queue
);
330 if (sk
->sk_send_head
!= NULL
) {
331 __kfree_skb(sk
->sk_send_head
);
332 sk
->sk_send_head
= NULL
;
337 if (!(sk
->sk_userlocks
& SOCK_BINDADDR_LOCK
))
338 inet_reset_saddr(sk
);
341 sock_reset_flag(sk
, SOCK_DONE
);
343 icsk
->icsk_backoff
= 0;
344 inet_csk_delack_init(sk
);
347 BUG_TRAP(!inet
->num
|| icsk
->icsk_bind_hash
);
349 sk
->sk_error_report(sk
);
353 EXPORT_SYMBOL_GPL(dccp_disconnect
);
356 * Wait for a DCCP event.
358 * Note that we don't need to lock the socket, as the upper poll layers
359 * take care of normal races (between the test and the event) and we don't
360 * go look at any of the socket buffers directly.
362 unsigned int dccp_poll(struct file
*file
, struct socket
*sock
,
366 struct sock
*sk
= sock
->sk
;
368 poll_wait(file
, sk
->sk_sleep
, wait
);
369 if (sk
->sk_state
== DCCP_LISTEN
)
370 return inet_csk_listen_poll(sk
);
372 /* Socket is not locked. We are protected from async events
373 by poll logic and correct handling of state changes
374 made by another threads is impossible in any case.
381 if (sk
->sk_shutdown
== SHUTDOWN_MASK
|| sk
->sk_state
== DCCP_CLOSED
)
383 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
384 mask
|= POLLIN
| POLLRDNORM
| POLLRDHUP
;
387 if ((1 << sk
->sk_state
) & ~(DCCPF_REQUESTING
| DCCPF_RESPOND
)) {
388 if (atomic_read(&sk
->sk_rmem_alloc
) > 0)
389 mask
|= POLLIN
| POLLRDNORM
;
391 if (!(sk
->sk_shutdown
& SEND_SHUTDOWN
)) {
392 if (sk_stream_wspace(sk
) >= sk_stream_min_wspace(sk
)) {
393 mask
|= POLLOUT
| POLLWRNORM
;
394 } else { /* send SIGIO later */
395 set_bit(SOCK_ASYNC_NOSPACE
,
396 &sk
->sk_socket
->flags
);
397 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
399 /* Race breaker. If space is freed after
400 * wspace test but before the flags are set,
401 * IO signal will be lost.
403 if (sk_stream_wspace(sk
) >= sk_stream_min_wspace(sk
))
404 mask
|= POLLOUT
| POLLWRNORM
;
411 EXPORT_SYMBOL_GPL(dccp_poll
);
413 int dccp_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
419 if (sk
->sk_state
== DCCP_LISTEN
)
425 unsigned long amount
= 0;
427 skb
= skb_peek(&sk
->sk_receive_queue
);
430 * We will only return the amount of this packet since
431 * that is all that will be read.
435 rc
= put_user(amount
, (int __user
*)arg
);
447 EXPORT_SYMBOL_GPL(dccp_ioctl
);
449 static int dccp_setsockopt_service(struct sock
*sk
, const __be32 service
,
450 char __user
*optval
, int optlen
)
452 struct dccp_sock
*dp
= dccp_sk(sk
);
453 struct dccp_service_list
*sl
= NULL
;
455 if (service
== DCCP_SERVICE_INVALID_VALUE
||
456 optlen
> DCCP_SERVICE_LIST_MAX_LEN
* sizeof(u32
))
459 if (optlen
> sizeof(service
)) {
460 sl
= kmalloc(optlen
, GFP_KERNEL
);
464 sl
->dccpsl_nr
= optlen
/ sizeof(u32
) - 1;
465 if (copy_from_user(sl
->dccpsl_list
,
466 optval
+ sizeof(service
),
467 optlen
- sizeof(service
)) ||
468 dccp_list_has_service(sl
, DCCP_SERVICE_INVALID_VALUE
)) {
475 dp
->dccps_service
= service
;
477 kfree(dp
->dccps_service_list
);
479 dp
->dccps_service_list
= sl
;
484 /* byte 1 is feature. the rest is the preference list */
485 static int dccp_setsockopt_change(struct sock
*sk
, int type
,
486 struct dccp_so_feat __user
*optval
)
488 struct dccp_so_feat opt
;
492 if (copy_from_user(&opt
, optval
, sizeof(opt
)))
495 val
= kmalloc(opt
.dccpsf_len
, GFP_KERNEL
);
499 if (copy_from_user(val
, opt
.dccpsf_val
, opt
.dccpsf_len
)) {
504 rc
= dccp_feat_change(dccp_msk(sk
), type
, opt
.dccpsf_feat
,
505 val
, opt
.dccpsf_len
, GFP_KERNEL
);
517 static int do_dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
518 char __user
*optval
, int optlen
)
520 struct dccp_sock
*dp
= dccp_sk(sk
);
523 if (optlen
< sizeof(int))
526 if (get_user(val
, (int __user
*)optval
))
529 if (optname
== DCCP_SOCKOPT_SERVICE
)
530 return dccp_setsockopt_service(sk
, val
, optval
, optlen
);
534 case DCCP_SOCKOPT_PACKET_SIZE
:
535 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
538 case DCCP_SOCKOPT_CHANGE_L
:
539 if (optlen
!= sizeof(struct dccp_so_feat
))
542 err
= dccp_setsockopt_change(sk
, DCCPO_CHANGE_L
,
543 (struct dccp_so_feat __user
*)
546 case DCCP_SOCKOPT_CHANGE_R
:
547 if (optlen
!= sizeof(struct dccp_so_feat
))
550 err
= dccp_setsockopt_change(sk
, DCCPO_CHANGE_R
,
551 (struct dccp_so_feat __user
*)
554 case DCCP_SOCKOPT_SERVER_TIMEWAIT
:
555 if (dp
->dccps_role
!= DCCP_ROLE_SERVER
)
558 dp
->dccps_server_timewait
= (val
!= 0);
560 case DCCP_SOCKOPT_SEND_CSCOV
: /* sender side, RFC 4340, sec. 9.2 */
561 if (val
< 0 || val
> 15)
564 dp
->dccps_pcslen
= val
;
566 case DCCP_SOCKOPT_RECV_CSCOV
: /* receiver side, RFC 4340 sec. 9.2.1 */
567 if (val
< 0 || val
> 15)
570 dp
->dccps_pcrlen
= val
;
571 /* FIXME: add feature negotiation,
572 * ChangeL(MinimumChecksumCoverage, val) */
584 int dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
585 char __user
*optval
, int optlen
)
587 if (level
!= SOL_DCCP
)
588 return inet_csk(sk
)->icsk_af_ops
->setsockopt(sk
, level
,
591 return do_dccp_setsockopt(sk
, level
, optname
, optval
, optlen
);
594 EXPORT_SYMBOL_GPL(dccp_setsockopt
);
597 int compat_dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
598 char __user
*optval
, int optlen
)
600 if (level
!= SOL_DCCP
)
601 return inet_csk_compat_setsockopt(sk
, level
, optname
,
603 return do_dccp_setsockopt(sk
, level
, optname
, optval
, optlen
);
606 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt
);
609 static int dccp_getsockopt_service(struct sock
*sk
, int len
,
610 __be32 __user
*optval
,
613 const struct dccp_sock
*dp
= dccp_sk(sk
);
614 const struct dccp_service_list
*sl
;
615 int err
= -ENOENT
, slen
= 0, total_len
= sizeof(u32
);
618 if ((sl
= dp
->dccps_service_list
) != NULL
) {
619 slen
= sl
->dccpsl_nr
* sizeof(u32
);
628 if (put_user(total_len
, optlen
) ||
629 put_user(dp
->dccps_service
, optval
) ||
630 (sl
!= NULL
&& copy_to_user(optval
+ 1, sl
->dccpsl_list
, slen
)))
637 static int do_dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
638 char __user
*optval
, int __user
*optlen
)
640 struct dccp_sock
*dp
;
643 if (get_user(len
, optlen
))
646 if (len
< (int)sizeof(int))
652 case DCCP_SOCKOPT_PACKET_SIZE
:
653 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
655 case DCCP_SOCKOPT_SERVICE
:
656 return dccp_getsockopt_service(sk
, len
,
657 (__be32 __user
*)optval
, optlen
);
658 case DCCP_SOCKOPT_GET_CUR_MPS
:
659 val
= dp
->dccps_mss_cache
;
661 case DCCP_SOCKOPT_SERVER_TIMEWAIT
:
662 val
= dp
->dccps_server_timewait
;
664 case DCCP_SOCKOPT_SEND_CSCOV
:
665 val
= dp
->dccps_pcslen
;
667 case DCCP_SOCKOPT_RECV_CSCOV
:
668 val
= dp
->dccps_pcrlen
;
671 return ccid_hc_rx_getsockopt(dp
->dccps_hc_rx_ccid
, sk
, optname
,
672 len
, (u32 __user
*)optval
, optlen
);
674 return ccid_hc_tx_getsockopt(dp
->dccps_hc_tx_ccid
, sk
, optname
,
675 len
, (u32 __user
*)optval
, optlen
);
681 if (put_user(len
, optlen
) || copy_to_user(optval
, &val
, len
))
687 int dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
688 char __user
*optval
, int __user
*optlen
)
690 if (level
!= SOL_DCCP
)
691 return inet_csk(sk
)->icsk_af_ops
->getsockopt(sk
, level
,
694 return do_dccp_getsockopt(sk
, level
, optname
, optval
, optlen
);
697 EXPORT_SYMBOL_GPL(dccp_getsockopt
);
700 int compat_dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
701 char __user
*optval
, int __user
*optlen
)
703 if (level
!= SOL_DCCP
)
704 return inet_csk_compat_getsockopt(sk
, level
, optname
,
706 return do_dccp_getsockopt(sk
, level
, optname
, optval
, optlen
);
709 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt
);
712 int dccp_sendmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
715 const struct dccp_sock
*dp
= dccp_sk(sk
);
716 const int flags
= msg
->msg_flags
;
717 const int noblock
= flags
& MSG_DONTWAIT
;
722 if (len
> dp
->dccps_mss_cache
)
727 if (sysctl_dccp_tx_qlen
&&
728 (sk
->sk_write_queue
.qlen
>= sysctl_dccp_tx_qlen
)) {
733 timeo
= sock_sndtimeo(sk
, noblock
);
736 * We have to use sk_stream_wait_connect here to set sk_write_pending,
737 * so that the trick in dccp_rcv_request_sent_state_process.
739 /* Wait for a connection to finish. */
740 if ((1 << sk
->sk_state
) & ~(DCCPF_OPEN
| DCCPF_PARTOPEN
))
741 if ((rc
= sk_stream_wait_connect(sk
, &timeo
)) != 0)
744 size
= sk
->sk_prot
->max_header
+ len
;
746 skb
= sock_alloc_send_skb(sk
, size
, noblock
, &rc
);
751 skb_reserve(skb
, sk
->sk_prot
->max_header
);
752 rc
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
756 skb_queue_tail(&sk
->sk_write_queue
, skb
);
757 dccp_write_xmit(sk
,0);
766 EXPORT_SYMBOL_GPL(dccp_sendmsg
);
768 int dccp_recvmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
769 size_t len
, int nonblock
, int flags
, int *addr_len
)
771 const struct dccp_hdr
*dh
;
776 if (sk
->sk_state
== DCCP_LISTEN
) {
781 timeo
= sock_rcvtimeo(sk
, nonblock
);
784 struct sk_buff
*skb
= skb_peek(&sk
->sk_receive_queue
);
787 goto verify_sock_status
;
791 switch (dh
->dccph_type
) {
793 case DCCP_PKT_DATAACK
:
797 case DCCP_PKT_CLOSEREQ
:
798 if (!(flags
& MSG_PEEK
))
799 dccp_finish_passive_close(sk
);
802 dccp_pr_debug("found fin (%s) ok!\n",
803 dccp_packet_name(dh
->dccph_type
));
807 dccp_pr_debug("packet_type=%s\n",
808 dccp_packet_name(dh
->dccph_type
));
809 sk_eat_skb(sk
, skb
, 0);
812 if (sock_flag(sk
, SOCK_DONE
)) {
818 len
= sock_error(sk
);
822 if (sk
->sk_shutdown
& RCV_SHUTDOWN
) {
827 if (sk
->sk_state
== DCCP_CLOSED
) {
828 if (!sock_flag(sk
, SOCK_DONE
)) {
829 /* This occurs when user tries to read
830 * from never connected socket.
844 if (signal_pending(current
)) {
845 len
= sock_intr_errno(timeo
);
849 sk_wait_data(sk
, &timeo
);
854 else if (len
< skb
->len
)
855 msg
->msg_flags
|= MSG_TRUNC
;
857 if (skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, len
)) {
858 /* Exception. Bailout! */
863 if (!(flags
& MSG_PEEK
))
864 sk_eat_skb(sk
, skb
, 0);
872 EXPORT_SYMBOL_GPL(dccp_recvmsg
);
874 int inet_dccp_listen(struct socket
*sock
, int backlog
)
876 struct sock
*sk
= sock
->sk
;
877 unsigned char old_state
;
883 if (sock
->state
!= SS_UNCONNECTED
|| sock
->type
!= SOCK_DCCP
)
886 old_state
= sk
->sk_state
;
887 if (!((1 << old_state
) & (DCCPF_CLOSED
| DCCPF_LISTEN
)))
890 /* Really, if the socket is already in listen state
891 * we can only allow the backlog to be adjusted.
893 if (old_state
!= DCCP_LISTEN
) {
895 * FIXME: here it probably should be sk->sk_prot->listen_start
896 * see tcp_listen_start
898 err
= dccp_listen_start(sk
, backlog
);
902 sk
->sk_max_ack_backlog
= backlog
;
910 EXPORT_SYMBOL_GPL(inet_dccp_listen
);
912 static void dccp_terminate_connection(struct sock
*sk
)
914 u8 next_state
= DCCP_CLOSED
;
916 switch (sk
->sk_state
) {
917 case DCCP_PASSIVE_CLOSE
:
918 case DCCP_PASSIVE_CLOSEREQ
:
919 dccp_finish_passive_close(sk
);
922 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk
);
923 inet_csk_clear_xmit_timer(sk
, ICSK_TIME_DACK
);
926 dccp_send_close(sk
, 1);
928 if (dccp_sk(sk
)->dccps_role
== DCCP_ROLE_SERVER
&&
929 !dccp_sk(sk
)->dccps_server_timewait
)
930 next_state
= DCCP_ACTIVE_CLOSEREQ
;
932 next_state
= DCCP_CLOSING
;
935 dccp_set_state(sk
, next_state
);
939 void dccp_close(struct sock
*sk
, long timeout
)
941 struct dccp_sock
*dp
= dccp_sk(sk
);
943 u32 data_was_unread
= 0;
948 sk
->sk_shutdown
= SHUTDOWN_MASK
;
950 if (sk
->sk_state
== DCCP_LISTEN
) {
951 dccp_set_state(sk
, DCCP_CLOSED
);
954 inet_csk_listen_stop(sk
);
956 goto adjudge_to_death
;
959 sk_stop_timer(sk
, &dp
->dccps_xmit_timer
);
962 * We need to flush the recv. buffs. We do this only on the
963 * descriptor close, not protocol-sourced closes, because the
964 *reader process may not have drained the data yet!
966 while ((skb
= __skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
967 data_was_unread
+= skb
->len
;
971 if (data_was_unread
) {
972 /* Unread data was tossed, send an appropriate Reset Code */
973 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread
);
974 dccp_send_reset(sk
, DCCP_RESET_CODE_ABORTED
);
975 dccp_set_state(sk
, DCCP_CLOSED
);
976 } else if (sock_flag(sk
, SOCK_LINGER
) && !sk
->sk_lingertime
) {
977 /* Check zero linger _after_ checking for unread data. */
978 sk
->sk_prot
->disconnect(sk
, 0);
979 } else if (sk
->sk_state
!= DCCP_CLOSED
) {
980 dccp_terminate_connection(sk
);
983 sk_stream_wait_close(sk
, timeout
);
986 state
= sk
->sk_state
;
989 atomic_inc(sk
->sk_prot
->orphan_count
);
992 * It is the last release_sock in its life. It will remove backlog.
996 * Now socket is owned by kernel and we acquire BH lock
997 * to finish close. No need to check for user refs.
1001 BUG_TRAP(!sock_owned_by_user(sk
));
1003 /* Have we already been destroyed by a softirq or backlog? */
1004 if (state
!= DCCP_CLOSED
&& sk
->sk_state
== DCCP_CLOSED
)
1007 if (sk
->sk_state
== DCCP_CLOSED
)
1008 inet_csk_destroy_sock(sk
);
1010 /* Otherwise, socket is reprieved until protocol close. */
1018 EXPORT_SYMBOL_GPL(dccp_close
);
1020 void dccp_shutdown(struct sock
*sk
, int how
)
1022 dccp_pr_debug("called shutdown(%x)\n", how
);
1025 EXPORT_SYMBOL_GPL(dccp_shutdown
);
1027 static int __init
dccp_mib_init(void)
1031 dccp_statistics
[0] = alloc_percpu(struct dccp_mib
);
1032 if (dccp_statistics
[0] == NULL
)
1035 dccp_statistics
[1] = alloc_percpu(struct dccp_mib
);
1036 if (dccp_statistics
[1] == NULL
)
1043 free_percpu(dccp_statistics
[0]);
1044 dccp_statistics
[0] = NULL
;
1049 static void dccp_mib_exit(void)
1051 free_percpu(dccp_statistics
[0]);
1052 free_percpu(dccp_statistics
[1]);
1053 dccp_statistics
[0] = dccp_statistics
[1] = NULL
;
1056 static int thash_entries
;
1057 module_param(thash_entries
, int, 0444);
1058 MODULE_PARM_DESC(thash_entries
, "Number of ehash buckets");
1060 #ifdef CONFIG_IP_DCCP_DEBUG
1062 module_param(dccp_debug
, bool, 0444);
1063 MODULE_PARM_DESC(dccp_debug
, "Enable debug messages");
1065 EXPORT_SYMBOL_GPL(dccp_debug
);
1068 static int __init
dccp_init(void)
1071 int ehash_order
, bhash_order
, i
;
1074 dccp_hashinfo
.bind_bucket_cachep
=
1075 kmem_cache_create("dccp_bind_bucket",
1076 sizeof(struct inet_bind_bucket
), 0,
1077 SLAB_HWCACHE_ALIGN
, NULL
);
1078 if (!dccp_hashinfo
.bind_bucket_cachep
)
1082 * Size and allocate the main established and bind bucket
1085 * The methodology is similar to that of the buffer cache.
1087 if (num_physpages
>= (128 * 1024))
1088 goal
= num_physpages
>> (21 - PAGE_SHIFT
);
1090 goal
= num_physpages
>> (23 - PAGE_SHIFT
);
1093 goal
= (thash_entries
*
1094 sizeof(struct inet_ehash_bucket
)) >> PAGE_SHIFT
;
1095 for (ehash_order
= 0; (1UL << ehash_order
) < goal
; ehash_order
++)
1098 dccp_hashinfo
.ehash_size
= (1UL << ehash_order
) * PAGE_SIZE
/
1099 sizeof(struct inet_ehash_bucket
);
1100 while (dccp_hashinfo
.ehash_size
&
1101 (dccp_hashinfo
.ehash_size
- 1))
1102 dccp_hashinfo
.ehash_size
--;
1103 dccp_hashinfo
.ehash
= (struct inet_ehash_bucket
*)
1104 __get_free_pages(GFP_ATOMIC
, ehash_order
);
1105 } while (!dccp_hashinfo
.ehash
&& --ehash_order
> 0);
1107 if (!dccp_hashinfo
.ehash
) {
1108 DCCP_CRIT("Failed to allocate DCCP established hash table");
1109 goto out_free_bind_bucket_cachep
;
1112 for (i
= 0; i
< dccp_hashinfo
.ehash_size
; i
++) {
1113 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].chain
);
1114 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].twchain
);
1117 if (inet_ehash_locks_alloc(&dccp_hashinfo
))
1118 goto out_free_dccp_ehash
;
1120 bhash_order
= ehash_order
;
1123 dccp_hashinfo
.bhash_size
= (1UL << bhash_order
) * PAGE_SIZE
/
1124 sizeof(struct inet_bind_hashbucket
);
1125 if ((dccp_hashinfo
.bhash_size
> (64 * 1024)) &&
1128 dccp_hashinfo
.bhash
= (struct inet_bind_hashbucket
*)
1129 __get_free_pages(GFP_ATOMIC
, bhash_order
);
1130 } while (!dccp_hashinfo
.bhash
&& --bhash_order
>= 0);
1132 if (!dccp_hashinfo
.bhash
) {
1133 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1134 goto out_free_dccp_locks
;
1137 for (i
= 0; i
< dccp_hashinfo
.bhash_size
; i
++) {
1138 spin_lock_init(&dccp_hashinfo
.bhash
[i
].lock
);
1139 INIT_HLIST_HEAD(&dccp_hashinfo
.bhash
[i
].chain
);
1142 rc
= dccp_mib_init();
1144 goto out_free_dccp_bhash
;
1146 rc
= dccp_ackvec_init();
1148 goto out_free_dccp_mib
;
1150 rc
= dccp_sysctl_init();
1152 goto out_ackvec_exit
;
1154 dccp_timestamping_init();
1161 out_free_dccp_bhash
:
1162 free_pages((unsigned long)dccp_hashinfo
.bhash
, bhash_order
);
1163 dccp_hashinfo
.bhash
= NULL
;
1164 out_free_dccp_locks
:
1165 inet_ehash_locks_free(&dccp_hashinfo
);
1166 out_free_dccp_ehash
:
1167 free_pages((unsigned long)dccp_hashinfo
.ehash
, ehash_order
);
1168 dccp_hashinfo
.ehash
= NULL
;
1169 out_free_bind_bucket_cachep
:
1170 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
1171 dccp_hashinfo
.bind_bucket_cachep
= NULL
;
1175 static void __exit
dccp_fini(void)
1178 free_pages((unsigned long)dccp_hashinfo
.bhash
,
1179 get_order(dccp_hashinfo
.bhash_size
*
1180 sizeof(struct inet_bind_hashbucket
)));
1181 free_pages((unsigned long)dccp_hashinfo
.ehash
,
1182 get_order(dccp_hashinfo
.ehash_size
*
1183 sizeof(struct inet_ehash_bucket
)));
1184 inet_ehash_locks_free(&dccp_hashinfo
);
1185 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
1190 module_init(dccp_init
);
1191 module_exit(dccp_fini
);
1193 MODULE_LICENSE("GPL");
1194 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1195 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");