Linux 2.6.25.20
[linux/fpc-iii.git] / net / dccp / proto.c
blobc3e3acb2a158a2205f306d7f7b5f4db21cf6cc53
1 /*
2 * net/dccp/proto.c
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 EXPORT_SYMBOL_GPL(dccp_statistics);
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
59 void dccp_set_state(struct sock *sk, const int state)
61 const int oldstate = sk->sk_state;
63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
67 switch (state) {
68 case DCCP_OPEN:
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 break;
73 case DCCP_CLOSED:
74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(sk);
82 /* fall through */
83 default:
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
91 sk->sk_state = state;
94 EXPORT_SYMBOL_GPL(dccp_set_state);
96 static void dccp_finish_passive_close(struct sock *sk)
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
103 break;
104 case DCCP_PASSIVE_CLOSEREQ:
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
114 void dccp_done(struct sock *sk)
116 dccp_set_state(sk, DCCP_CLOSED);
117 dccp_clear_xmit_timers(sk);
119 sk->sk_shutdown = SHUTDOWN_MASK;
121 if (!sock_flag(sk, SOCK_DEAD))
122 sk->sk_state_change(sk);
123 else
124 inet_csk_destroy_sock(sk);
127 EXPORT_SYMBOL_GPL(dccp_done);
129 const char *dccp_packet_name(const int type)
131 static const char *dccp_packet_names[] = {
132 [DCCP_PKT_REQUEST] = "REQUEST",
133 [DCCP_PKT_RESPONSE] = "RESPONSE",
134 [DCCP_PKT_DATA] = "DATA",
135 [DCCP_PKT_ACK] = "ACK",
136 [DCCP_PKT_DATAACK] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE] = "CLOSE",
139 [DCCP_PKT_RESET] = "RESET",
140 [DCCP_PKT_SYNC] = "SYNC",
141 [DCCP_PKT_SYNCACK] = "SYNCACK",
144 if (type >= DCCP_NR_PKT_TYPES)
145 return "INVALID";
146 else
147 return dccp_packet_names[type];
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
152 const char *dccp_state_name(const int state)
154 static char *dccp_state_names[] = {
155 [DCCP_OPEN] = "OPEN",
156 [DCCP_REQUESTING] = "REQUESTING",
157 [DCCP_PARTOPEN] = "PARTOPEN",
158 [DCCP_LISTEN] = "LISTEN",
159 [DCCP_RESPOND] = "RESPOND",
160 [DCCP_CLOSING] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
168 if (state >= DCCP_MAX_STATES)
169 return "INVALID STATE!";
170 else
171 return dccp_state_names[state];
174 EXPORT_SYMBOL_GPL(dccp_state_name);
176 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
178 struct dccp_sock *dp = dccp_sk(sk);
179 struct dccp_minisock *dmsk = dccp_msk(sk);
180 struct inet_connection_sock *icsk = inet_csk(sk);
182 dccp_minisock_init(&dp->dccps_minisock);
184 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
185 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
186 sk->sk_state = DCCP_CLOSED;
187 sk->sk_write_space = dccp_write_space;
188 icsk->icsk_sync_mss = dccp_sync_mss;
189 dp->dccps_mss_cache = 536;
190 dp->dccps_rate_last = jiffies;
191 dp->dccps_role = DCCP_ROLE_UNDEFINED;
192 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
193 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
195 dccp_init_xmit_timers(sk);
198 * FIXME: We're hardcoding the CCID, and doing this at this point makes
199 * the listening (master) sock get CCID control blocks, which is not
200 * necessary, but for now, to not mess with the test userspace apps,
201 * lets leave it here, later the real solution is to do this in a
202 * setsockopt(CCIDs-I-want/accept). -acme
204 if (likely(ctl_sock_initialized)) {
205 int rc = dccp_feat_init(dmsk);
207 if (rc)
208 return rc;
210 if (dmsk->dccpms_send_ack_vector) {
211 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212 if (dp->dccps_hc_rx_ackvec == NULL)
213 return -ENOMEM;
215 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216 sk, GFP_KERNEL);
217 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218 sk, GFP_KERNEL);
219 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220 dp->dccps_hc_tx_ccid == NULL)) {
221 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223 if (dmsk->dccpms_send_ack_vector) {
224 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225 dp->dccps_hc_rx_ackvec = NULL;
227 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228 return -ENOMEM;
230 } else {
231 /* control socket doesn't need feat nego */
232 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233 INIT_LIST_HEAD(&dmsk->dccpms_conf);
236 return 0;
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
241 int dccp_destroy_sock(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct dccp_minisock *dmsk = dccp_msk(sk);
247 * DCCP doesn't use sk_write_queue, just sk_send_head
248 * for retransmissions
250 if (sk->sk_send_head != NULL) {
251 kfree_skb(sk->sk_send_head);
252 sk->sk_send_head = NULL;
255 /* Clean up a referenced DCCP bind bucket. */
256 if (inet_csk(sk)->icsk_bind_hash != NULL)
257 inet_put_port(sk);
259 kfree(dp->dccps_service_list);
260 dp->dccps_service_list = NULL;
262 if (dmsk->dccpms_send_ack_vector) {
263 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264 dp->dccps_hc_rx_ackvec = NULL;
266 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
270 /* clean up feature negotiation state */
271 dccp_feat_clean(dmsk);
273 return 0;
276 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
278 static inline int dccp_listen_start(struct sock *sk, int backlog)
280 struct dccp_sock *dp = dccp_sk(sk);
282 dp->dccps_role = DCCP_ROLE_LISTEN;
283 return inet_csk_listen_start(sk, backlog);
286 static inline int dccp_need_reset(int state)
288 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
289 state != DCCP_REQUESTING;
292 int dccp_disconnect(struct sock *sk, int flags)
294 struct inet_connection_sock *icsk = inet_csk(sk);
295 struct inet_sock *inet = inet_sk(sk);
296 int err = 0;
297 const int old_state = sk->sk_state;
299 if (old_state != DCCP_CLOSED)
300 dccp_set_state(sk, DCCP_CLOSED);
303 * This corresponds to the ABORT function of RFC793, sec. 3.8
304 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306 if (old_state == DCCP_LISTEN) {
307 inet_csk_listen_stop(sk);
308 } else if (dccp_need_reset(old_state)) {
309 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
310 sk->sk_err = ECONNRESET;
311 } else if (old_state == DCCP_REQUESTING)
312 sk->sk_err = ECONNRESET;
314 dccp_clear_xmit_timers(sk);
315 __skb_queue_purge(&sk->sk_receive_queue);
316 if (sk->sk_send_head != NULL) {
317 __kfree_skb(sk->sk_send_head);
318 sk->sk_send_head = NULL;
321 inet->dport = 0;
323 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
324 inet_reset_saddr(sk);
326 sk->sk_shutdown = 0;
327 sock_reset_flag(sk, SOCK_DONE);
329 icsk->icsk_backoff = 0;
330 inet_csk_delack_init(sk);
331 __sk_dst_reset(sk);
333 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
335 sk->sk_error_report(sk);
336 return err;
339 EXPORT_SYMBOL_GPL(dccp_disconnect);
342 * Wait for a DCCP event.
344 * Note that we don't need to lock the socket, as the upper poll layers
345 * take care of normal races (between the test and the event) and we don't
346 * go look at any of the socket buffers directly.
348 unsigned int dccp_poll(struct file *file, struct socket *sock,
349 poll_table *wait)
351 unsigned int mask;
352 struct sock *sk = sock->sk;
354 poll_wait(file, sk->sk_sleep, wait);
355 if (sk->sk_state == DCCP_LISTEN)
356 return inet_csk_listen_poll(sk);
358 /* Socket is not locked. We are protected from async events
359 by poll logic and correct handling of state changes
360 made by another threads is impossible in any case.
363 mask = 0;
364 if (sk->sk_err)
365 mask = POLLERR;
367 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
368 mask |= POLLHUP;
369 if (sk->sk_shutdown & RCV_SHUTDOWN)
370 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
372 /* Connected? */
373 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
374 if (atomic_read(&sk->sk_rmem_alloc) > 0)
375 mask |= POLLIN | POLLRDNORM;
377 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
378 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
379 mask |= POLLOUT | POLLWRNORM;
380 } else { /* send SIGIO later */
381 set_bit(SOCK_ASYNC_NOSPACE,
382 &sk->sk_socket->flags);
383 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
385 /* Race breaker. If space is freed after
386 * wspace test but before the flags are set,
387 * IO signal will be lost.
389 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
390 mask |= POLLOUT | POLLWRNORM;
394 return mask;
397 EXPORT_SYMBOL_GPL(dccp_poll);
399 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
401 int rc = -ENOTCONN;
403 lock_sock(sk);
405 if (sk->sk_state == DCCP_LISTEN)
406 goto out;
408 switch (cmd) {
409 case SIOCINQ: {
410 struct sk_buff *skb;
411 unsigned long amount = 0;
413 skb = skb_peek(&sk->sk_receive_queue);
414 if (skb != NULL) {
416 * We will only return the amount of this packet since
417 * that is all that will be read.
419 amount = skb->len;
421 rc = put_user(amount, (int __user *)arg);
423 break;
424 default:
425 rc = -ENOIOCTLCMD;
426 break;
428 out:
429 release_sock(sk);
430 return rc;
433 EXPORT_SYMBOL_GPL(dccp_ioctl);
435 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
436 char __user *optval, int optlen)
438 struct dccp_sock *dp = dccp_sk(sk);
439 struct dccp_service_list *sl = NULL;
441 if (service == DCCP_SERVICE_INVALID_VALUE ||
442 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
443 return -EINVAL;
445 if (optlen > sizeof(service)) {
446 sl = kmalloc(optlen, GFP_KERNEL);
447 if (sl == NULL)
448 return -ENOMEM;
450 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
451 if (copy_from_user(sl->dccpsl_list,
452 optval + sizeof(service),
453 optlen - sizeof(service)) ||
454 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
455 kfree(sl);
456 return -EFAULT;
460 lock_sock(sk);
461 dp->dccps_service = service;
463 kfree(dp->dccps_service_list);
465 dp->dccps_service_list = sl;
466 release_sock(sk);
467 return 0;
470 /* byte 1 is feature. the rest is the preference list */
471 static int dccp_setsockopt_change(struct sock *sk, int type,
472 struct dccp_so_feat __user *optval)
474 struct dccp_so_feat opt;
475 u8 *val;
476 int rc;
478 if (copy_from_user(&opt, optval, sizeof(opt)))
479 return -EFAULT;
481 * rfc4340: 6.1. Change Options
483 if (opt.dccpsf_len < 1)
484 return -EINVAL;
486 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
487 if (!val)
488 return -ENOMEM;
490 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
491 rc = -EFAULT;
492 goto out_free_val;
495 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
496 val, opt.dccpsf_len, GFP_KERNEL);
497 if (rc)
498 goto out_free_val;
500 out:
501 return rc;
503 out_free_val:
504 kfree(val);
505 goto out;
508 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
509 char __user *optval, int optlen)
511 struct dccp_sock *dp = dccp_sk(sk);
512 int val, err = 0;
514 if (optlen < sizeof(int))
515 return -EINVAL;
517 if (get_user(val, (int __user *)optval))
518 return -EFAULT;
520 if (optname == DCCP_SOCKOPT_SERVICE)
521 return dccp_setsockopt_service(sk, val, optval, optlen);
523 lock_sock(sk);
524 switch (optname) {
525 case DCCP_SOCKOPT_PACKET_SIZE:
526 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
527 err = 0;
528 break;
529 case DCCP_SOCKOPT_CHANGE_L:
530 if (optlen != sizeof(struct dccp_so_feat))
531 err = -EINVAL;
532 else
533 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
534 (struct dccp_so_feat __user *)
535 optval);
536 break;
537 case DCCP_SOCKOPT_CHANGE_R:
538 if (optlen != sizeof(struct dccp_so_feat))
539 err = -EINVAL;
540 else
541 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
542 (struct dccp_so_feat __user *)
543 optval);
544 break;
545 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
546 if (dp->dccps_role != DCCP_ROLE_SERVER)
547 err = -EOPNOTSUPP;
548 else
549 dp->dccps_server_timewait = (val != 0);
550 break;
551 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
552 if (val < 0 || val > 15)
553 err = -EINVAL;
554 else
555 dp->dccps_pcslen = val;
556 break;
557 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
558 if (val < 0 || val > 15)
559 err = -EINVAL;
560 else {
561 dp->dccps_pcrlen = val;
562 /* FIXME: add feature negotiation,
563 * ChangeL(MinimumChecksumCoverage, val) */
565 break;
566 default:
567 err = -ENOPROTOOPT;
568 break;
571 release_sock(sk);
572 return err;
575 int dccp_setsockopt(struct sock *sk, int level, int optname,
576 char __user *optval, int optlen)
578 if (level != SOL_DCCP)
579 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
580 optname, optval,
581 optlen);
582 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
585 EXPORT_SYMBOL_GPL(dccp_setsockopt);
587 #ifdef CONFIG_COMPAT
588 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
589 char __user *optval, int optlen)
591 if (level != SOL_DCCP)
592 return inet_csk_compat_setsockopt(sk, level, optname,
593 optval, optlen);
594 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
597 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
598 #endif
600 static int dccp_getsockopt_service(struct sock *sk, int len,
601 __be32 __user *optval,
602 int __user *optlen)
604 const struct dccp_sock *dp = dccp_sk(sk);
605 const struct dccp_service_list *sl;
606 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
608 lock_sock(sk);
609 if ((sl = dp->dccps_service_list) != NULL) {
610 slen = sl->dccpsl_nr * sizeof(u32);
611 total_len += slen;
614 err = -EINVAL;
615 if (total_len > len)
616 goto out;
618 err = 0;
619 if (put_user(total_len, optlen) ||
620 put_user(dp->dccps_service, optval) ||
621 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
622 err = -EFAULT;
623 out:
624 release_sock(sk);
625 return err;
628 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
629 char __user *optval, int __user *optlen)
631 struct dccp_sock *dp;
632 int val, len;
634 if (get_user(len, optlen))
635 return -EFAULT;
637 if (len < (int)sizeof(int))
638 return -EINVAL;
640 dp = dccp_sk(sk);
642 switch (optname) {
643 case DCCP_SOCKOPT_PACKET_SIZE:
644 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
645 return 0;
646 case DCCP_SOCKOPT_SERVICE:
647 return dccp_getsockopt_service(sk, len,
648 (__be32 __user *)optval, optlen);
649 case DCCP_SOCKOPT_GET_CUR_MPS:
650 val = dp->dccps_mss_cache;
651 break;
652 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
653 val = dp->dccps_server_timewait;
654 break;
655 case DCCP_SOCKOPT_SEND_CSCOV:
656 val = dp->dccps_pcslen;
657 break;
658 case DCCP_SOCKOPT_RECV_CSCOV:
659 val = dp->dccps_pcrlen;
660 break;
661 case 128 ... 191:
662 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
663 len, (u32 __user *)optval, optlen);
664 case 192 ... 255:
665 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
666 len, (u32 __user *)optval, optlen);
667 default:
668 return -ENOPROTOOPT;
671 len = sizeof(val);
672 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
673 return -EFAULT;
675 return 0;
678 int dccp_getsockopt(struct sock *sk, int level, int optname,
679 char __user *optval, int __user *optlen)
681 if (level != SOL_DCCP)
682 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
683 optname, optval,
684 optlen);
685 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
688 EXPORT_SYMBOL_GPL(dccp_getsockopt);
690 #ifdef CONFIG_COMPAT
691 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
692 char __user *optval, int __user *optlen)
694 if (level != SOL_DCCP)
695 return inet_csk_compat_getsockopt(sk, level, optname,
696 optval, optlen);
697 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
700 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
701 #endif
703 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
704 size_t len)
706 const struct dccp_sock *dp = dccp_sk(sk);
707 const int flags = msg->msg_flags;
708 const int noblock = flags & MSG_DONTWAIT;
709 struct sk_buff *skb;
710 int rc, size;
711 long timeo;
713 if (len > dp->dccps_mss_cache)
714 return -EMSGSIZE;
716 lock_sock(sk);
718 if (sysctl_dccp_tx_qlen &&
719 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
720 rc = -EAGAIN;
721 goto out_release;
724 timeo = sock_sndtimeo(sk, noblock);
727 * We have to use sk_stream_wait_connect here to set sk_write_pending,
728 * so that the trick in dccp_rcv_request_sent_state_process.
730 /* Wait for a connection to finish. */
731 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
732 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
733 goto out_release;
735 size = sk->sk_prot->max_header + len;
736 release_sock(sk);
737 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
738 lock_sock(sk);
739 if (skb == NULL)
740 goto out_release;
742 skb_reserve(skb, sk->sk_prot->max_header);
743 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
744 if (rc != 0)
745 goto out_discard;
747 skb_queue_tail(&sk->sk_write_queue, skb);
748 dccp_write_xmit(sk,0);
749 out_release:
750 release_sock(sk);
751 return rc ? : len;
752 out_discard:
753 kfree_skb(skb);
754 goto out_release;
757 EXPORT_SYMBOL_GPL(dccp_sendmsg);
759 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
760 size_t len, int nonblock, int flags, int *addr_len)
762 const struct dccp_hdr *dh;
763 long timeo;
765 lock_sock(sk);
767 if (sk->sk_state == DCCP_LISTEN) {
768 len = -ENOTCONN;
769 goto out;
772 timeo = sock_rcvtimeo(sk, nonblock);
774 do {
775 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
777 if (skb == NULL)
778 goto verify_sock_status;
780 dh = dccp_hdr(skb);
782 switch (dh->dccph_type) {
783 case DCCP_PKT_DATA:
784 case DCCP_PKT_DATAACK:
785 goto found_ok_skb;
787 case DCCP_PKT_CLOSE:
788 case DCCP_PKT_CLOSEREQ:
789 if (!(flags & MSG_PEEK))
790 dccp_finish_passive_close(sk);
791 /* fall through */
792 case DCCP_PKT_RESET:
793 dccp_pr_debug("found fin (%s) ok!\n",
794 dccp_packet_name(dh->dccph_type));
795 len = 0;
796 goto found_fin_ok;
797 default:
798 dccp_pr_debug("packet_type=%s\n",
799 dccp_packet_name(dh->dccph_type));
800 sk_eat_skb(sk, skb, 0);
802 verify_sock_status:
803 if (sock_flag(sk, SOCK_DONE)) {
804 len = 0;
805 break;
808 if (sk->sk_err) {
809 len = sock_error(sk);
810 break;
813 if (sk->sk_shutdown & RCV_SHUTDOWN) {
814 len = 0;
815 break;
818 if (sk->sk_state == DCCP_CLOSED) {
819 if (!sock_flag(sk, SOCK_DONE)) {
820 /* This occurs when user tries to read
821 * from never connected socket.
823 len = -ENOTCONN;
824 break;
826 len = 0;
827 break;
830 if (!timeo) {
831 len = -EAGAIN;
832 break;
835 if (signal_pending(current)) {
836 len = sock_intr_errno(timeo);
837 break;
840 sk_wait_data(sk, &timeo);
841 continue;
842 found_ok_skb:
843 if (len > skb->len)
844 len = skb->len;
845 else if (len < skb->len)
846 msg->msg_flags |= MSG_TRUNC;
848 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
849 /* Exception. Bailout! */
850 len = -EFAULT;
851 break;
853 found_fin_ok:
854 if (!(flags & MSG_PEEK))
855 sk_eat_skb(sk, skb, 0);
856 break;
857 } while (1);
858 out:
859 release_sock(sk);
860 return len;
863 EXPORT_SYMBOL_GPL(dccp_recvmsg);
865 int inet_dccp_listen(struct socket *sock, int backlog)
867 struct sock *sk = sock->sk;
868 unsigned char old_state;
869 int err;
871 lock_sock(sk);
873 err = -EINVAL;
874 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
875 goto out;
877 old_state = sk->sk_state;
878 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
879 goto out;
881 /* Really, if the socket is already in listen state
882 * we can only allow the backlog to be adjusted.
884 if (old_state != DCCP_LISTEN) {
886 * FIXME: here it probably should be sk->sk_prot->listen_start
887 * see tcp_listen_start
889 err = dccp_listen_start(sk, backlog);
890 if (err)
891 goto out;
893 sk->sk_max_ack_backlog = backlog;
894 err = 0;
896 out:
897 release_sock(sk);
898 return err;
901 EXPORT_SYMBOL_GPL(inet_dccp_listen);
903 static void dccp_terminate_connection(struct sock *sk)
905 u8 next_state = DCCP_CLOSED;
907 switch (sk->sk_state) {
908 case DCCP_PASSIVE_CLOSE:
909 case DCCP_PASSIVE_CLOSEREQ:
910 dccp_finish_passive_close(sk);
911 break;
912 case DCCP_PARTOPEN:
913 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
914 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
915 /* fall through */
916 case DCCP_OPEN:
917 dccp_send_close(sk, 1);
919 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
920 !dccp_sk(sk)->dccps_server_timewait)
921 next_state = DCCP_ACTIVE_CLOSEREQ;
922 else
923 next_state = DCCP_CLOSING;
924 /* fall through */
925 default:
926 dccp_set_state(sk, next_state);
930 void dccp_close(struct sock *sk, long timeout)
932 struct dccp_sock *dp = dccp_sk(sk);
933 struct sk_buff *skb;
934 u32 data_was_unread = 0;
935 int state;
937 lock_sock(sk);
939 sk->sk_shutdown = SHUTDOWN_MASK;
941 if (sk->sk_state == DCCP_LISTEN) {
942 dccp_set_state(sk, DCCP_CLOSED);
944 /* Special case. */
945 inet_csk_listen_stop(sk);
947 goto adjudge_to_death;
950 sk_stop_timer(sk, &dp->dccps_xmit_timer);
953 * We need to flush the recv. buffs. We do this only on the
954 * descriptor close, not protocol-sourced closes, because the
955 *reader process may not have drained the data yet!
957 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
958 data_was_unread += skb->len;
959 __kfree_skb(skb);
962 if (data_was_unread) {
963 /* Unread data was tossed, send an appropriate Reset Code */
964 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
965 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
966 dccp_set_state(sk, DCCP_CLOSED);
967 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
968 /* Check zero linger _after_ checking for unread data. */
969 sk->sk_prot->disconnect(sk, 0);
970 } else if (sk->sk_state != DCCP_CLOSED) {
971 dccp_terminate_connection(sk);
974 sk_stream_wait_close(sk, timeout);
976 adjudge_to_death:
977 state = sk->sk_state;
978 sock_hold(sk);
979 sock_orphan(sk);
980 atomic_inc(sk->sk_prot->orphan_count);
983 * It is the last release_sock in its life. It will remove backlog.
985 release_sock(sk);
987 * Now socket is owned by kernel and we acquire BH lock
988 * to finish close. No need to check for user refs.
990 local_bh_disable();
991 bh_lock_sock(sk);
992 BUG_TRAP(!sock_owned_by_user(sk));
994 /* Have we already been destroyed by a softirq or backlog? */
995 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
996 goto out;
998 if (sk->sk_state == DCCP_CLOSED)
999 inet_csk_destroy_sock(sk);
1001 /* Otherwise, socket is reprieved until protocol close. */
1003 out:
1004 bh_unlock_sock(sk);
1005 local_bh_enable();
1006 sock_put(sk);
1009 EXPORT_SYMBOL_GPL(dccp_close);
1011 void dccp_shutdown(struct sock *sk, int how)
1013 dccp_pr_debug("called shutdown(%x)\n", how);
1016 EXPORT_SYMBOL_GPL(dccp_shutdown);
1018 static int __init dccp_mib_init(void)
1020 int rc = -ENOMEM;
1022 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1023 if (dccp_statistics[0] == NULL)
1024 goto out;
1026 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1027 if (dccp_statistics[1] == NULL)
1028 goto out_free_one;
1030 rc = 0;
1031 out:
1032 return rc;
1033 out_free_one:
1034 free_percpu(dccp_statistics[0]);
1035 dccp_statistics[0] = NULL;
1036 goto out;
1040 static void dccp_mib_exit(void)
1042 free_percpu(dccp_statistics[0]);
1043 free_percpu(dccp_statistics[1]);
1044 dccp_statistics[0] = dccp_statistics[1] = NULL;
1047 static int thash_entries;
1048 module_param(thash_entries, int, 0444);
1049 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1051 #ifdef CONFIG_IP_DCCP_DEBUG
1052 int dccp_debug;
1053 module_param(dccp_debug, bool, 0444);
1054 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1056 EXPORT_SYMBOL_GPL(dccp_debug);
1057 #endif
1059 static int __init dccp_init(void)
1061 unsigned long goal;
1062 int ehash_order, bhash_order, i;
1063 int rc = -ENOBUFS;
1065 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1066 FIELD_SIZEOF(struct sk_buff, cb));
1068 dccp_hashinfo.bind_bucket_cachep =
1069 kmem_cache_create("dccp_bind_bucket",
1070 sizeof(struct inet_bind_bucket), 0,
1071 SLAB_HWCACHE_ALIGN, NULL);
1072 if (!dccp_hashinfo.bind_bucket_cachep)
1073 goto out;
1076 * Size and allocate the main established and bind bucket
1077 * hash tables.
1079 * The methodology is similar to that of the buffer cache.
1081 if (num_physpages >= (128 * 1024))
1082 goal = num_physpages >> (21 - PAGE_SHIFT);
1083 else
1084 goal = num_physpages >> (23 - PAGE_SHIFT);
1086 if (thash_entries)
1087 goal = (thash_entries *
1088 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1089 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1091 do {
1092 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1093 sizeof(struct inet_ehash_bucket);
1094 while (dccp_hashinfo.ehash_size &
1095 (dccp_hashinfo.ehash_size - 1))
1096 dccp_hashinfo.ehash_size--;
1097 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1098 __get_free_pages(GFP_ATOMIC, ehash_order);
1099 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1101 if (!dccp_hashinfo.ehash) {
1102 DCCP_CRIT("Failed to allocate DCCP established hash table");
1103 goto out_free_bind_bucket_cachep;
1106 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1107 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1108 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1111 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1112 goto out_free_dccp_ehash;
1114 bhash_order = ehash_order;
1116 do {
1117 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1118 sizeof(struct inet_bind_hashbucket);
1119 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1120 bhash_order > 0)
1121 continue;
1122 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1123 __get_free_pages(GFP_ATOMIC, bhash_order);
1124 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1126 if (!dccp_hashinfo.bhash) {
1127 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1128 goto out_free_dccp_locks;
1131 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1132 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1133 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1136 rc = dccp_mib_init();
1137 if (rc)
1138 goto out_free_dccp_bhash;
1140 rc = dccp_ackvec_init();
1141 if (rc)
1142 goto out_free_dccp_mib;
1144 rc = dccp_sysctl_init();
1145 if (rc)
1146 goto out_ackvec_exit;
1148 dccp_timestamping_init();
1149 out:
1150 return rc;
1151 out_ackvec_exit:
1152 dccp_ackvec_exit();
1153 out_free_dccp_mib:
1154 dccp_mib_exit();
1155 out_free_dccp_bhash:
1156 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1157 dccp_hashinfo.bhash = NULL;
1158 out_free_dccp_locks:
1159 inet_ehash_locks_free(&dccp_hashinfo);
1160 out_free_dccp_ehash:
1161 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1162 dccp_hashinfo.ehash = NULL;
1163 out_free_bind_bucket_cachep:
1164 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1165 dccp_hashinfo.bind_bucket_cachep = NULL;
1166 goto out;
1169 static void __exit dccp_fini(void)
1171 dccp_mib_exit();
1172 free_pages((unsigned long)dccp_hashinfo.bhash,
1173 get_order(dccp_hashinfo.bhash_size *
1174 sizeof(struct inet_bind_hashbucket)));
1175 free_pages((unsigned long)dccp_hashinfo.ehash,
1176 get_order(dccp_hashinfo.ehash_size *
1177 sizeof(struct inet_ehash_bucket)));
1178 inet_ehash_locks_free(&dccp_hashinfo);
1179 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1180 dccp_ackvec_exit();
1181 dccp_sysctl_exit();
1184 module_init(dccp_init);
1185 module_exit(dccp_fini);
1187 MODULE_LICENSE("GPL");
1188 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1189 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");