x86: 64-bit, add the new split_large_page() function
[wrt350n-kernel.git] / net / dccp / proto.c
blob0bed4a6095b7afd4f84ec179daefcf1285371a8c
1 /*
2 * net/dccp/proto.c
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 EXPORT_SYMBOL_GPL(dccp_statistics);
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
59 void dccp_set_state(struct sock *sk, const int state)
61 const int oldstate = sk->sk_state;
63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
67 switch (state) {
68 case DCCP_OPEN:
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 break;
73 case DCCP_CLOSED:
74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(&dccp_hashinfo, sk);
82 /* fall through */
83 default:
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
91 sk->sk_state = state;
94 EXPORT_SYMBOL_GPL(dccp_set_state);
96 static void dccp_finish_passive_close(struct sock *sk)
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
103 break;
104 case DCCP_PASSIVE_CLOSEREQ:
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
114 void dccp_done(struct sock *sk)
116 dccp_set_state(sk, DCCP_CLOSED);
117 dccp_clear_xmit_timers(sk);
119 sk->sk_shutdown = SHUTDOWN_MASK;
121 if (!sock_flag(sk, SOCK_DEAD))
122 sk->sk_state_change(sk);
123 else
124 inet_csk_destroy_sock(sk);
127 EXPORT_SYMBOL_GPL(dccp_done);
129 const char *dccp_packet_name(const int type)
131 static const char *dccp_packet_names[] = {
132 [DCCP_PKT_REQUEST] = "REQUEST",
133 [DCCP_PKT_RESPONSE] = "RESPONSE",
134 [DCCP_PKT_DATA] = "DATA",
135 [DCCP_PKT_ACK] = "ACK",
136 [DCCP_PKT_DATAACK] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE] = "CLOSE",
139 [DCCP_PKT_RESET] = "RESET",
140 [DCCP_PKT_SYNC] = "SYNC",
141 [DCCP_PKT_SYNCACK] = "SYNCACK",
144 if (type >= DCCP_NR_PKT_TYPES)
145 return "INVALID";
146 else
147 return dccp_packet_names[type];
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
152 const char *dccp_state_name(const int state)
154 static char *dccp_state_names[] = {
155 [DCCP_OPEN] = "OPEN",
156 [DCCP_REQUESTING] = "REQUESTING",
157 [DCCP_PARTOPEN] = "PARTOPEN",
158 [DCCP_LISTEN] = "LISTEN",
159 [DCCP_RESPOND] = "RESPOND",
160 [DCCP_CLOSING] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
168 if (state >= DCCP_MAX_STATES)
169 return "INVALID STATE!";
170 else
171 return dccp_state_names[state];
174 EXPORT_SYMBOL_GPL(dccp_state_name);
176 void dccp_hash(struct sock *sk)
178 inet_hash(&dccp_hashinfo, sk);
181 EXPORT_SYMBOL_GPL(dccp_hash);
183 void dccp_unhash(struct sock *sk)
185 inet_unhash(&dccp_hashinfo, sk);
188 EXPORT_SYMBOL_GPL(dccp_unhash);
190 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
192 struct dccp_sock *dp = dccp_sk(sk);
193 struct dccp_minisock *dmsk = dccp_msk(sk);
194 struct inet_connection_sock *icsk = inet_csk(sk);
196 dccp_minisock_init(&dp->dccps_minisock);
198 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
199 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
200 sk->sk_state = DCCP_CLOSED;
201 sk->sk_write_space = dccp_write_space;
202 icsk->icsk_sync_mss = dccp_sync_mss;
203 dp->dccps_mss_cache = 536;
204 dp->dccps_rate_last = jiffies;
205 dp->dccps_role = DCCP_ROLE_UNDEFINED;
206 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
207 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
209 dccp_init_xmit_timers(sk);
212 * FIXME: We're hardcoding the CCID, and doing this at this point makes
213 * the listening (master) sock get CCID control blocks, which is not
214 * necessary, but for now, to not mess with the test userspace apps,
215 * lets leave it here, later the real solution is to do this in a
216 * setsockopt(CCIDs-I-want/accept). -acme
218 if (likely(ctl_sock_initialized)) {
219 int rc = dccp_feat_init(dmsk);
221 if (rc)
222 return rc;
224 if (dmsk->dccpms_send_ack_vector) {
225 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226 if (dp->dccps_hc_rx_ackvec == NULL)
227 return -ENOMEM;
229 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
230 sk, GFP_KERNEL);
231 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
232 sk, GFP_KERNEL);
233 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
234 dp->dccps_hc_tx_ccid == NULL)) {
235 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
237 if (dmsk->dccpms_send_ack_vector) {
238 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239 dp->dccps_hc_rx_ackvec = NULL;
241 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
242 return -ENOMEM;
244 } else {
245 /* control socket doesn't need feat nego */
246 INIT_LIST_HEAD(&dmsk->dccpms_pending);
247 INIT_LIST_HEAD(&dmsk->dccpms_conf);
250 return 0;
253 EXPORT_SYMBOL_GPL(dccp_init_sock);
255 int dccp_destroy_sock(struct sock *sk)
257 struct dccp_sock *dp = dccp_sk(sk);
258 struct dccp_minisock *dmsk = dccp_msk(sk);
261 * DCCP doesn't use sk_write_queue, just sk_send_head
262 * for retransmissions
264 if (sk->sk_send_head != NULL) {
265 kfree_skb(sk->sk_send_head);
266 sk->sk_send_head = NULL;
269 /* Clean up a referenced DCCP bind bucket. */
270 if (inet_csk(sk)->icsk_bind_hash != NULL)
271 inet_put_port(&dccp_hashinfo, sk);
273 kfree(dp->dccps_service_list);
274 dp->dccps_service_list = NULL;
276 if (dmsk->dccpms_send_ack_vector) {
277 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278 dp->dccps_hc_rx_ackvec = NULL;
280 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
284 /* clean up feature negotiation state */
285 dccp_feat_clean(dmsk);
287 return 0;
290 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
292 static inline int dccp_listen_start(struct sock *sk, int backlog)
294 struct dccp_sock *dp = dccp_sk(sk);
296 dp->dccps_role = DCCP_ROLE_LISTEN;
297 return inet_csk_listen_start(sk, backlog);
300 static inline int dccp_need_reset(int state)
302 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303 state != DCCP_REQUESTING;
306 int dccp_disconnect(struct sock *sk, int flags)
308 struct inet_connection_sock *icsk = inet_csk(sk);
309 struct inet_sock *inet = inet_sk(sk);
310 int err = 0;
311 const int old_state = sk->sk_state;
313 if (old_state != DCCP_CLOSED)
314 dccp_set_state(sk, DCCP_CLOSED);
317 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
320 if (old_state == DCCP_LISTEN) {
321 inet_csk_listen_stop(sk);
322 } else if (dccp_need_reset(old_state)) {
323 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324 sk->sk_err = ECONNRESET;
325 } else if (old_state == DCCP_REQUESTING)
326 sk->sk_err = ECONNRESET;
328 dccp_clear_xmit_timers(sk);
329 __skb_queue_purge(&sk->sk_receive_queue);
330 if (sk->sk_send_head != NULL) {
331 __kfree_skb(sk->sk_send_head);
332 sk->sk_send_head = NULL;
335 inet->dport = 0;
337 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338 inet_reset_saddr(sk);
340 sk->sk_shutdown = 0;
341 sock_reset_flag(sk, SOCK_DONE);
343 icsk->icsk_backoff = 0;
344 inet_csk_delack_init(sk);
345 __sk_dst_reset(sk);
347 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
349 sk->sk_error_report(sk);
350 return err;
353 EXPORT_SYMBOL_GPL(dccp_disconnect);
356 * Wait for a DCCP event.
358 * Note that we don't need to lock the socket, as the upper poll layers
359 * take care of normal races (between the test and the event) and we don't
360 * go look at any of the socket buffers directly.
362 unsigned int dccp_poll(struct file *file, struct socket *sock,
363 poll_table *wait)
365 unsigned int mask;
366 struct sock *sk = sock->sk;
368 poll_wait(file, sk->sk_sleep, wait);
369 if (sk->sk_state == DCCP_LISTEN)
370 return inet_csk_listen_poll(sk);
372 /* Socket is not locked. We are protected from async events
373 by poll logic and correct handling of state changes
374 made by another threads is impossible in any case.
377 mask = 0;
378 if (sk->sk_err)
379 mask = POLLERR;
381 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
382 mask |= POLLHUP;
383 if (sk->sk_shutdown & RCV_SHUTDOWN)
384 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
386 /* Connected? */
387 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388 if (atomic_read(&sk->sk_rmem_alloc) > 0)
389 mask |= POLLIN | POLLRDNORM;
391 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393 mask |= POLLOUT | POLLWRNORM;
394 } else { /* send SIGIO later */
395 set_bit(SOCK_ASYNC_NOSPACE,
396 &sk->sk_socket->flags);
397 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
399 /* Race breaker. If space is freed after
400 * wspace test but before the flags are set,
401 * IO signal will be lost.
403 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404 mask |= POLLOUT | POLLWRNORM;
408 return mask;
411 EXPORT_SYMBOL_GPL(dccp_poll);
413 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
415 int rc = -ENOTCONN;
417 lock_sock(sk);
419 if (sk->sk_state == DCCP_LISTEN)
420 goto out;
422 switch (cmd) {
423 case SIOCINQ: {
424 struct sk_buff *skb;
425 unsigned long amount = 0;
427 skb = skb_peek(&sk->sk_receive_queue);
428 if (skb != NULL) {
430 * We will only return the amount of this packet since
431 * that is all that will be read.
433 amount = skb->len;
435 rc = put_user(amount, (int __user *)arg);
437 break;
438 default:
439 rc = -ENOIOCTLCMD;
440 break;
442 out:
443 release_sock(sk);
444 return rc;
447 EXPORT_SYMBOL_GPL(dccp_ioctl);
449 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
450 char __user *optval, int optlen)
452 struct dccp_sock *dp = dccp_sk(sk);
453 struct dccp_service_list *sl = NULL;
455 if (service == DCCP_SERVICE_INVALID_VALUE ||
456 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
457 return -EINVAL;
459 if (optlen > sizeof(service)) {
460 sl = kmalloc(optlen, GFP_KERNEL);
461 if (sl == NULL)
462 return -ENOMEM;
464 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465 if (copy_from_user(sl->dccpsl_list,
466 optval + sizeof(service),
467 optlen - sizeof(service)) ||
468 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
469 kfree(sl);
470 return -EFAULT;
474 lock_sock(sk);
475 dp->dccps_service = service;
477 kfree(dp->dccps_service_list);
479 dp->dccps_service_list = sl;
480 release_sock(sk);
481 return 0;
484 /* byte 1 is feature. the rest is the preference list */
485 static int dccp_setsockopt_change(struct sock *sk, int type,
486 struct dccp_so_feat __user *optval)
488 struct dccp_so_feat opt;
489 u8 *val;
490 int rc;
492 if (copy_from_user(&opt, optval, sizeof(opt)))
493 return -EFAULT;
495 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
496 if (!val)
497 return -ENOMEM;
499 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
500 rc = -EFAULT;
501 goto out_free_val;
504 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505 val, opt.dccpsf_len, GFP_KERNEL);
506 if (rc)
507 goto out_free_val;
509 out:
510 return rc;
512 out_free_val:
513 kfree(val);
514 goto out;
517 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518 char __user *optval, int optlen)
520 struct dccp_sock *dp = dccp_sk(sk);
521 int val, err = 0;
523 if (optlen < sizeof(int))
524 return -EINVAL;
526 if (get_user(val, (int __user *)optval))
527 return -EFAULT;
529 if (optname == DCCP_SOCKOPT_SERVICE)
530 return dccp_setsockopt_service(sk, val, optval, optlen);
532 lock_sock(sk);
533 switch (optname) {
534 case DCCP_SOCKOPT_PACKET_SIZE:
535 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
536 err = 0;
537 break;
538 case DCCP_SOCKOPT_CHANGE_L:
539 if (optlen != sizeof(struct dccp_so_feat))
540 err = -EINVAL;
541 else
542 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
543 (struct dccp_so_feat __user *)
544 optval);
545 break;
546 case DCCP_SOCKOPT_CHANGE_R:
547 if (optlen != sizeof(struct dccp_so_feat))
548 err = -EINVAL;
549 else
550 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
551 (struct dccp_so_feat __user *)
552 optval);
553 break;
554 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
555 if (dp->dccps_role != DCCP_ROLE_SERVER)
556 err = -EOPNOTSUPP;
557 else
558 dp->dccps_server_timewait = (val != 0);
559 break;
560 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
561 if (val < 0 || val > 15)
562 err = -EINVAL;
563 else
564 dp->dccps_pcslen = val;
565 break;
566 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
567 if (val < 0 || val > 15)
568 err = -EINVAL;
569 else {
570 dp->dccps_pcrlen = val;
571 /* FIXME: add feature negotiation,
572 * ChangeL(MinimumChecksumCoverage, val) */
574 break;
575 default:
576 err = -ENOPROTOOPT;
577 break;
580 release_sock(sk);
581 return err;
584 int dccp_setsockopt(struct sock *sk, int level, int optname,
585 char __user *optval, int optlen)
587 if (level != SOL_DCCP)
588 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
589 optname, optval,
590 optlen);
591 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
594 EXPORT_SYMBOL_GPL(dccp_setsockopt);
596 #ifdef CONFIG_COMPAT
597 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
598 char __user *optval, int optlen)
600 if (level != SOL_DCCP)
601 return inet_csk_compat_setsockopt(sk, level, optname,
602 optval, optlen);
603 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
606 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
607 #endif
609 static int dccp_getsockopt_service(struct sock *sk, int len,
610 __be32 __user *optval,
611 int __user *optlen)
613 const struct dccp_sock *dp = dccp_sk(sk);
614 const struct dccp_service_list *sl;
615 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
617 lock_sock(sk);
618 if ((sl = dp->dccps_service_list) != NULL) {
619 slen = sl->dccpsl_nr * sizeof(u32);
620 total_len += slen;
623 err = -EINVAL;
624 if (total_len > len)
625 goto out;
627 err = 0;
628 if (put_user(total_len, optlen) ||
629 put_user(dp->dccps_service, optval) ||
630 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
631 err = -EFAULT;
632 out:
633 release_sock(sk);
634 return err;
637 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
638 char __user *optval, int __user *optlen)
640 struct dccp_sock *dp;
641 int val, len;
643 if (get_user(len, optlen))
644 return -EFAULT;
646 if (len < (int)sizeof(int))
647 return -EINVAL;
649 dp = dccp_sk(sk);
651 switch (optname) {
652 case DCCP_SOCKOPT_PACKET_SIZE:
653 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
654 return 0;
655 case DCCP_SOCKOPT_SERVICE:
656 return dccp_getsockopt_service(sk, len,
657 (__be32 __user *)optval, optlen);
658 case DCCP_SOCKOPT_GET_CUR_MPS:
659 val = dp->dccps_mss_cache;
660 break;
661 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
662 val = dp->dccps_server_timewait;
663 break;
664 case DCCP_SOCKOPT_SEND_CSCOV:
665 val = dp->dccps_pcslen;
666 break;
667 case DCCP_SOCKOPT_RECV_CSCOV:
668 val = dp->dccps_pcrlen;
669 break;
670 case 128 ... 191:
671 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
672 len, (u32 __user *)optval, optlen);
673 case 192 ... 255:
674 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
675 len, (u32 __user *)optval, optlen);
676 default:
677 return -ENOPROTOOPT;
680 len = sizeof(val);
681 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
682 return -EFAULT;
684 return 0;
687 int dccp_getsockopt(struct sock *sk, int level, int optname,
688 char __user *optval, int __user *optlen)
690 if (level != SOL_DCCP)
691 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
692 optname, optval,
693 optlen);
694 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
697 EXPORT_SYMBOL_GPL(dccp_getsockopt);
699 #ifdef CONFIG_COMPAT
700 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
701 char __user *optval, int __user *optlen)
703 if (level != SOL_DCCP)
704 return inet_csk_compat_getsockopt(sk, level, optname,
705 optval, optlen);
706 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
709 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
710 #endif
712 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
713 size_t len)
715 const struct dccp_sock *dp = dccp_sk(sk);
716 const int flags = msg->msg_flags;
717 const int noblock = flags & MSG_DONTWAIT;
718 struct sk_buff *skb;
719 int rc, size;
720 long timeo;
722 if (len > dp->dccps_mss_cache)
723 return -EMSGSIZE;
725 lock_sock(sk);
727 if (sysctl_dccp_tx_qlen &&
728 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
729 rc = -EAGAIN;
730 goto out_release;
733 timeo = sock_sndtimeo(sk, noblock);
736 * We have to use sk_stream_wait_connect here to set sk_write_pending,
737 * so that the trick in dccp_rcv_request_sent_state_process.
739 /* Wait for a connection to finish. */
740 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
741 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
742 goto out_release;
744 size = sk->sk_prot->max_header + len;
745 release_sock(sk);
746 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
747 lock_sock(sk);
748 if (skb == NULL)
749 goto out_release;
751 skb_reserve(skb, sk->sk_prot->max_header);
752 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
753 if (rc != 0)
754 goto out_discard;
756 skb_queue_tail(&sk->sk_write_queue, skb);
757 dccp_write_xmit(sk,0);
758 out_release:
759 release_sock(sk);
760 return rc ? : len;
761 out_discard:
762 kfree_skb(skb);
763 goto out_release;
766 EXPORT_SYMBOL_GPL(dccp_sendmsg);
768 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
769 size_t len, int nonblock, int flags, int *addr_len)
771 const struct dccp_hdr *dh;
772 long timeo;
774 lock_sock(sk);
776 if (sk->sk_state == DCCP_LISTEN) {
777 len = -ENOTCONN;
778 goto out;
781 timeo = sock_rcvtimeo(sk, nonblock);
783 do {
784 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
786 if (skb == NULL)
787 goto verify_sock_status;
789 dh = dccp_hdr(skb);
791 switch (dh->dccph_type) {
792 case DCCP_PKT_DATA:
793 case DCCP_PKT_DATAACK:
794 goto found_ok_skb;
796 case DCCP_PKT_CLOSE:
797 case DCCP_PKT_CLOSEREQ:
798 if (!(flags & MSG_PEEK))
799 dccp_finish_passive_close(sk);
800 /* fall through */
801 case DCCP_PKT_RESET:
802 dccp_pr_debug("found fin (%s) ok!\n",
803 dccp_packet_name(dh->dccph_type));
804 len = 0;
805 goto found_fin_ok;
806 default:
807 dccp_pr_debug("packet_type=%s\n",
808 dccp_packet_name(dh->dccph_type));
809 sk_eat_skb(sk, skb, 0);
811 verify_sock_status:
812 if (sock_flag(sk, SOCK_DONE)) {
813 len = 0;
814 break;
817 if (sk->sk_err) {
818 len = sock_error(sk);
819 break;
822 if (sk->sk_shutdown & RCV_SHUTDOWN) {
823 len = 0;
824 break;
827 if (sk->sk_state == DCCP_CLOSED) {
828 if (!sock_flag(sk, SOCK_DONE)) {
829 /* This occurs when user tries to read
830 * from never connected socket.
832 len = -ENOTCONN;
833 break;
835 len = 0;
836 break;
839 if (!timeo) {
840 len = -EAGAIN;
841 break;
844 if (signal_pending(current)) {
845 len = sock_intr_errno(timeo);
846 break;
849 sk_wait_data(sk, &timeo);
850 continue;
851 found_ok_skb:
852 if (len > skb->len)
853 len = skb->len;
854 else if (len < skb->len)
855 msg->msg_flags |= MSG_TRUNC;
857 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
858 /* Exception. Bailout! */
859 len = -EFAULT;
860 break;
862 found_fin_ok:
863 if (!(flags & MSG_PEEK))
864 sk_eat_skb(sk, skb, 0);
865 break;
866 } while (1);
867 out:
868 release_sock(sk);
869 return len;
872 EXPORT_SYMBOL_GPL(dccp_recvmsg);
874 int inet_dccp_listen(struct socket *sock, int backlog)
876 struct sock *sk = sock->sk;
877 unsigned char old_state;
878 int err;
880 lock_sock(sk);
882 err = -EINVAL;
883 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
884 goto out;
886 old_state = sk->sk_state;
887 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
888 goto out;
890 /* Really, if the socket is already in listen state
891 * we can only allow the backlog to be adjusted.
893 if (old_state != DCCP_LISTEN) {
895 * FIXME: here it probably should be sk->sk_prot->listen_start
896 * see tcp_listen_start
898 err = dccp_listen_start(sk, backlog);
899 if (err)
900 goto out;
902 sk->sk_max_ack_backlog = backlog;
903 err = 0;
905 out:
906 release_sock(sk);
907 return err;
910 EXPORT_SYMBOL_GPL(inet_dccp_listen);
912 static void dccp_terminate_connection(struct sock *sk)
914 u8 next_state = DCCP_CLOSED;
916 switch (sk->sk_state) {
917 case DCCP_PASSIVE_CLOSE:
918 case DCCP_PASSIVE_CLOSEREQ:
919 dccp_finish_passive_close(sk);
920 break;
921 case DCCP_PARTOPEN:
922 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
923 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
924 /* fall through */
925 case DCCP_OPEN:
926 dccp_send_close(sk, 1);
928 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
929 !dccp_sk(sk)->dccps_server_timewait)
930 next_state = DCCP_ACTIVE_CLOSEREQ;
931 else
932 next_state = DCCP_CLOSING;
933 /* fall through */
934 default:
935 dccp_set_state(sk, next_state);
939 void dccp_close(struct sock *sk, long timeout)
941 struct dccp_sock *dp = dccp_sk(sk);
942 struct sk_buff *skb;
943 u32 data_was_unread = 0;
944 int state;
946 lock_sock(sk);
948 sk->sk_shutdown = SHUTDOWN_MASK;
950 if (sk->sk_state == DCCP_LISTEN) {
951 dccp_set_state(sk, DCCP_CLOSED);
953 /* Special case. */
954 inet_csk_listen_stop(sk);
956 goto adjudge_to_death;
959 sk_stop_timer(sk, &dp->dccps_xmit_timer);
962 * We need to flush the recv. buffs. We do this only on the
963 * descriptor close, not protocol-sourced closes, because the
964 *reader process may not have drained the data yet!
966 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
967 data_was_unread += skb->len;
968 __kfree_skb(skb);
971 if (data_was_unread) {
972 /* Unread data was tossed, send an appropriate Reset Code */
973 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
974 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
975 dccp_set_state(sk, DCCP_CLOSED);
976 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
977 /* Check zero linger _after_ checking for unread data. */
978 sk->sk_prot->disconnect(sk, 0);
979 } else if (sk->sk_state != DCCP_CLOSED) {
980 dccp_terminate_connection(sk);
983 sk_stream_wait_close(sk, timeout);
985 adjudge_to_death:
986 state = sk->sk_state;
987 sock_hold(sk);
988 sock_orphan(sk);
989 atomic_inc(sk->sk_prot->orphan_count);
992 * It is the last release_sock in its life. It will remove backlog.
994 release_sock(sk);
996 * Now socket is owned by kernel and we acquire BH lock
997 * to finish close. No need to check for user refs.
999 local_bh_disable();
1000 bh_lock_sock(sk);
1001 BUG_TRAP(!sock_owned_by_user(sk));
1003 /* Have we already been destroyed by a softirq or backlog? */
1004 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1005 goto out;
1007 if (sk->sk_state == DCCP_CLOSED)
1008 inet_csk_destroy_sock(sk);
1010 /* Otherwise, socket is reprieved until protocol close. */
1012 out:
1013 bh_unlock_sock(sk);
1014 local_bh_enable();
1015 sock_put(sk);
1018 EXPORT_SYMBOL_GPL(dccp_close);
1020 void dccp_shutdown(struct sock *sk, int how)
1022 dccp_pr_debug("called shutdown(%x)\n", how);
1025 EXPORT_SYMBOL_GPL(dccp_shutdown);
1027 static int __init dccp_mib_init(void)
1029 int rc = -ENOMEM;
1031 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1032 if (dccp_statistics[0] == NULL)
1033 goto out;
1035 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1036 if (dccp_statistics[1] == NULL)
1037 goto out_free_one;
1039 rc = 0;
1040 out:
1041 return rc;
1042 out_free_one:
1043 free_percpu(dccp_statistics[0]);
1044 dccp_statistics[0] = NULL;
1045 goto out;
1049 static void dccp_mib_exit(void)
1051 free_percpu(dccp_statistics[0]);
1052 free_percpu(dccp_statistics[1]);
1053 dccp_statistics[0] = dccp_statistics[1] = NULL;
1056 static int thash_entries;
1057 module_param(thash_entries, int, 0444);
1058 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1060 #ifdef CONFIG_IP_DCCP_DEBUG
1061 int dccp_debug;
1062 module_param(dccp_debug, bool, 0444);
1063 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1065 EXPORT_SYMBOL_GPL(dccp_debug);
1066 #endif
1068 static int __init dccp_init(void)
1070 unsigned long goal;
1071 int ehash_order, bhash_order, i;
1072 int rc = -ENOBUFS;
1074 dccp_hashinfo.bind_bucket_cachep =
1075 kmem_cache_create("dccp_bind_bucket",
1076 sizeof(struct inet_bind_bucket), 0,
1077 SLAB_HWCACHE_ALIGN, NULL);
1078 if (!dccp_hashinfo.bind_bucket_cachep)
1079 goto out;
1082 * Size and allocate the main established and bind bucket
1083 * hash tables.
1085 * The methodology is similar to that of the buffer cache.
1087 if (num_physpages >= (128 * 1024))
1088 goal = num_physpages >> (21 - PAGE_SHIFT);
1089 else
1090 goal = num_physpages >> (23 - PAGE_SHIFT);
1092 if (thash_entries)
1093 goal = (thash_entries *
1094 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1095 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1097 do {
1098 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1099 sizeof(struct inet_ehash_bucket);
1100 while (dccp_hashinfo.ehash_size &
1101 (dccp_hashinfo.ehash_size - 1))
1102 dccp_hashinfo.ehash_size--;
1103 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1104 __get_free_pages(GFP_ATOMIC, ehash_order);
1105 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1107 if (!dccp_hashinfo.ehash) {
1108 DCCP_CRIT("Failed to allocate DCCP established hash table");
1109 goto out_free_bind_bucket_cachep;
1112 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1113 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1114 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1117 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1118 goto out_free_dccp_ehash;
1120 bhash_order = ehash_order;
1122 do {
1123 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1124 sizeof(struct inet_bind_hashbucket);
1125 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1126 bhash_order > 0)
1127 continue;
1128 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1129 __get_free_pages(GFP_ATOMIC, bhash_order);
1130 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1132 if (!dccp_hashinfo.bhash) {
1133 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1134 goto out_free_dccp_locks;
1137 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1138 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1139 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1142 rc = dccp_mib_init();
1143 if (rc)
1144 goto out_free_dccp_bhash;
1146 rc = dccp_ackvec_init();
1147 if (rc)
1148 goto out_free_dccp_mib;
1150 rc = dccp_sysctl_init();
1151 if (rc)
1152 goto out_ackvec_exit;
1154 dccp_timestamping_init();
1155 out:
1156 return rc;
1157 out_ackvec_exit:
1158 dccp_ackvec_exit();
1159 out_free_dccp_mib:
1160 dccp_mib_exit();
1161 out_free_dccp_bhash:
1162 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1163 dccp_hashinfo.bhash = NULL;
1164 out_free_dccp_locks:
1165 inet_ehash_locks_free(&dccp_hashinfo);
1166 out_free_dccp_ehash:
1167 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1168 dccp_hashinfo.ehash = NULL;
1169 out_free_bind_bucket_cachep:
1170 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1171 dccp_hashinfo.bind_bucket_cachep = NULL;
1172 goto out;
1175 static void __exit dccp_fini(void)
1177 dccp_mib_exit();
1178 free_pages((unsigned long)dccp_hashinfo.bhash,
1179 get_order(dccp_hashinfo.bhash_size *
1180 sizeof(struct inet_bind_hashbucket)));
1181 free_pages((unsigned long)dccp_hashinfo.ehash,
1182 get_order(dccp_hashinfo.ehash_size *
1183 sizeof(struct inet_ehash_bucket)));
1184 inet_ehash_locks_free(&dccp_hashinfo);
1185 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1186 dccp_ackvec_exit();
1187 dccp_sysctl_exit();
1190 module_init(dccp_init);
1191 module_exit(dccp_fini);
1193 MODULE_LICENSE("GPL");
1194 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1195 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");