sysctl: ipv6 route flushing (kill binary path)
[wrt350n-kernel.git] / net / ipv4 / ipvs / ip_vs_ctl.c
blob7345fc252a2315943b77c084a05ecaa45668d3e5
1 /*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
19 * Changes:
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/types.h>
26 #include <linux/capability.h>
27 #include <linux/fs.h>
28 #include <linux/sysctl.h>
29 #include <linux/proc_fs.h>
30 #include <linux/workqueue.h>
31 #include <linux/swap.h>
32 #include <linux/seq_file.h>
34 #include <linux/netfilter.h>
35 #include <linux/netfilter_ipv4.h>
36 #include <linux/mutex.h>
38 #include <net/net_namespace.h>
39 #include <net/ip.h>
40 #include <net/route.h>
41 #include <net/sock.h>
43 #include <asm/uaccess.h>
45 #include <net/ip_vs.h>
47 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
48 static DEFINE_MUTEX(__ip_vs_mutex);
50 /* lock for service table */
51 static DEFINE_RWLOCK(__ip_vs_svc_lock);
53 /* lock for table with the real services */
54 static DEFINE_RWLOCK(__ip_vs_rs_lock);
56 /* lock for state and timeout tables */
57 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
59 /* lock for drop entry handling */
60 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
62 /* lock for drop packet handling */
63 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
65 /* 1/rate drop and drop-entry variables */
66 int ip_vs_drop_rate = 0;
67 int ip_vs_drop_counter = 0;
68 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
70 /* number of virtual services */
71 static int ip_vs_num_services = 0;
73 /* sysctl variables */
74 static int sysctl_ip_vs_drop_entry = 0;
75 static int sysctl_ip_vs_drop_packet = 0;
76 static int sysctl_ip_vs_secure_tcp = 0;
77 static int sysctl_ip_vs_amemthresh = 1024;
78 static int sysctl_ip_vs_am_droprate = 10;
79 int sysctl_ip_vs_cache_bypass = 0;
80 int sysctl_ip_vs_expire_nodest_conn = 0;
81 int sysctl_ip_vs_expire_quiescent_template = 0;
82 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
83 int sysctl_ip_vs_nat_icmp_send = 0;
86 #ifdef CONFIG_IP_VS_DEBUG
87 static int sysctl_ip_vs_debug_level = 0;
89 int ip_vs_get_debug_level(void)
91 return sysctl_ip_vs_debug_level;
93 #endif
96 * update_defense_level is called from keventd and from sysctl,
97 * so it needs to protect itself from softirqs
99 static void update_defense_level(void)
101 struct sysinfo i;
102 static int old_secure_tcp = 0;
103 int availmem;
104 int nomem;
105 int to_change = -1;
107 /* we only count free and buffered memory (in pages) */
108 si_meminfo(&i);
109 availmem = i.freeram + i.bufferram;
110 /* however in linux 2.5 the i.bufferram is total page cache size,
111 we need adjust it */
112 /* si_swapinfo(&i); */
113 /* availmem = availmem - (i.totalswap - i.freeswap); */
115 nomem = (availmem < sysctl_ip_vs_amemthresh);
117 local_bh_disable();
119 /* drop_entry */
120 spin_lock(&__ip_vs_dropentry_lock);
121 switch (sysctl_ip_vs_drop_entry) {
122 case 0:
123 atomic_set(&ip_vs_dropentry, 0);
124 break;
125 case 1:
126 if (nomem) {
127 atomic_set(&ip_vs_dropentry, 1);
128 sysctl_ip_vs_drop_entry = 2;
129 } else {
130 atomic_set(&ip_vs_dropentry, 0);
132 break;
133 case 2:
134 if (nomem) {
135 atomic_set(&ip_vs_dropentry, 1);
136 } else {
137 atomic_set(&ip_vs_dropentry, 0);
138 sysctl_ip_vs_drop_entry = 1;
140 break;
141 case 3:
142 atomic_set(&ip_vs_dropentry, 1);
143 break;
145 spin_unlock(&__ip_vs_dropentry_lock);
147 /* drop_packet */
148 spin_lock(&__ip_vs_droppacket_lock);
149 switch (sysctl_ip_vs_drop_packet) {
150 case 0:
151 ip_vs_drop_rate = 0;
152 break;
153 case 1:
154 if (nomem) {
155 ip_vs_drop_rate = ip_vs_drop_counter
156 = sysctl_ip_vs_amemthresh /
157 (sysctl_ip_vs_amemthresh-availmem);
158 sysctl_ip_vs_drop_packet = 2;
159 } else {
160 ip_vs_drop_rate = 0;
162 break;
163 case 2:
164 if (nomem) {
165 ip_vs_drop_rate = ip_vs_drop_counter
166 = sysctl_ip_vs_amemthresh /
167 (sysctl_ip_vs_amemthresh-availmem);
168 } else {
169 ip_vs_drop_rate = 0;
170 sysctl_ip_vs_drop_packet = 1;
172 break;
173 case 3:
174 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
175 break;
177 spin_unlock(&__ip_vs_droppacket_lock);
179 /* secure_tcp */
180 write_lock(&__ip_vs_securetcp_lock);
181 switch (sysctl_ip_vs_secure_tcp) {
182 case 0:
183 if (old_secure_tcp >= 2)
184 to_change = 0;
185 break;
186 case 1:
187 if (nomem) {
188 if (old_secure_tcp < 2)
189 to_change = 1;
190 sysctl_ip_vs_secure_tcp = 2;
191 } else {
192 if (old_secure_tcp >= 2)
193 to_change = 0;
195 break;
196 case 2:
197 if (nomem) {
198 if (old_secure_tcp < 2)
199 to_change = 1;
200 } else {
201 if (old_secure_tcp >= 2)
202 to_change = 0;
203 sysctl_ip_vs_secure_tcp = 1;
205 break;
206 case 3:
207 if (old_secure_tcp < 2)
208 to_change = 1;
209 break;
211 old_secure_tcp = sysctl_ip_vs_secure_tcp;
212 if (to_change >= 0)
213 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
214 write_unlock(&__ip_vs_securetcp_lock);
216 local_bh_enable();
221 * Timer for checking the defense
223 #define DEFENSE_TIMER_PERIOD 1*HZ
224 static void defense_work_handler(struct work_struct *work);
225 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
227 static void defense_work_handler(struct work_struct *work)
229 update_defense_level();
230 if (atomic_read(&ip_vs_dropentry))
231 ip_vs_random_dropentry();
233 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
237 ip_vs_use_count_inc(void)
239 return try_module_get(THIS_MODULE);
242 void
243 ip_vs_use_count_dec(void)
245 module_put(THIS_MODULE);
250 * Hash table: for virtual service lookups
252 #define IP_VS_SVC_TAB_BITS 8
253 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
254 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
256 /* the service table hashed by <protocol, addr, port> */
257 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
258 /* the service table hashed by fwmark */
259 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
262 * Hash table: for real service lookups
264 #define IP_VS_RTAB_BITS 4
265 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
266 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
268 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
271 * Trash for destinations
273 static LIST_HEAD(ip_vs_dest_trash);
276 * FTP & NULL virtual service counters
278 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
279 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
283 * Returns hash value for virtual service
285 static __inline__ unsigned
286 ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
288 register unsigned porth = ntohs(port);
290 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
291 & IP_VS_SVC_TAB_MASK;
295 * Returns hash value of fwmark for virtual service lookup
297 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
299 return fwmark & IP_VS_SVC_TAB_MASK;
303 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
304 * or in the ip_vs_svc_fwm_table by fwmark.
305 * Should be called with locked tables.
307 static int ip_vs_svc_hash(struct ip_vs_service *svc)
309 unsigned hash;
311 if (svc->flags & IP_VS_SVC_F_HASHED) {
312 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
313 "called from %p\n", __builtin_return_address(0));
314 return 0;
317 if (svc->fwmark == 0) {
319 * Hash it by <protocol,addr,port> in ip_vs_svc_table
321 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
322 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
323 } else {
325 * Hash it by fwmark in ip_vs_svc_fwm_table
327 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
328 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
331 svc->flags |= IP_VS_SVC_F_HASHED;
332 /* increase its refcnt because it is referenced by the svc table */
333 atomic_inc(&svc->refcnt);
334 return 1;
339 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
340 * Should be called with locked tables.
342 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
344 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
345 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
346 "called from %p\n", __builtin_return_address(0));
347 return 0;
350 if (svc->fwmark == 0) {
351 /* Remove it from the ip_vs_svc_table table */
352 list_del(&svc->s_list);
353 } else {
354 /* Remove it from the ip_vs_svc_fwm_table table */
355 list_del(&svc->f_list);
358 svc->flags &= ~IP_VS_SVC_F_HASHED;
359 atomic_dec(&svc->refcnt);
360 return 1;
365 * Get service by {proto,addr,port} in the service table.
367 static __inline__ struct ip_vs_service *
368 __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
370 unsigned hash;
371 struct ip_vs_service *svc;
373 /* Check for "full" addressed entries */
374 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
376 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
377 if ((svc->addr == vaddr)
378 && (svc->port == vport)
379 && (svc->protocol == protocol)) {
380 /* HIT */
381 atomic_inc(&svc->usecnt);
382 return svc;
386 return NULL;
391 * Get service by {fwmark} in the service table.
393 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
395 unsigned hash;
396 struct ip_vs_service *svc;
398 /* Check for fwmark addressed entries */
399 hash = ip_vs_svc_fwm_hashkey(fwmark);
401 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
402 if (svc->fwmark == fwmark) {
403 /* HIT */
404 atomic_inc(&svc->usecnt);
405 return svc;
409 return NULL;
412 struct ip_vs_service *
413 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
415 struct ip_vs_service *svc;
417 read_lock(&__ip_vs_svc_lock);
420 * Check the table hashed by fwmark first
422 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
423 goto out;
426 * Check the table hashed by <protocol,addr,port>
427 * for "full" addressed entries
429 svc = __ip_vs_service_get(protocol, vaddr, vport);
431 if (svc == NULL
432 && protocol == IPPROTO_TCP
433 && atomic_read(&ip_vs_ftpsvc_counter)
434 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436 * Check if ftp service entry exists, the packet
437 * might belong to FTP data connections.
439 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
442 if (svc == NULL
443 && atomic_read(&ip_vs_nullsvc_counter)) {
445 * Check if the catch-all port (port zero) exists
447 svc = __ip_vs_service_get(protocol, vaddr, 0);
450 out:
451 read_unlock(&__ip_vs_svc_lock);
453 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
454 fwmark, ip_vs_proto_name(protocol),
455 NIPQUAD(vaddr), ntohs(vport),
456 svc?"hit":"not hit");
458 return svc;
462 static inline void
463 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
465 atomic_inc(&svc->refcnt);
466 dest->svc = svc;
469 static inline void
470 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
472 struct ip_vs_service *svc = dest->svc;
474 dest->svc = NULL;
475 if (atomic_dec_and_test(&svc->refcnt))
476 kfree(svc);
481 * Returns hash value for real service
483 static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
485 register unsigned porth = ntohs(port);
487 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
488 & IP_VS_RTAB_MASK;
492 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
493 * should be called with locked tables.
495 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
497 unsigned hash;
499 if (!list_empty(&dest->d_list)) {
500 return 0;
504 * Hash by proto,addr,port,
505 * which are the parameters of the real service.
507 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
508 list_add(&dest->d_list, &ip_vs_rtable[hash]);
510 return 1;
514 * UNhashes ip_vs_dest from ip_vs_rtable.
515 * should be called with locked tables.
517 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
520 * Remove it from the ip_vs_rtable table.
522 if (!list_empty(&dest->d_list)) {
523 list_del(&dest->d_list);
524 INIT_LIST_HEAD(&dest->d_list);
527 return 1;
531 * Lookup real service by <proto,addr,port> in the real service table.
533 struct ip_vs_dest *
534 ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
536 unsigned hash;
537 struct ip_vs_dest *dest;
540 * Check for "full" addressed entries
541 * Return the first found entry
543 hash = ip_vs_rs_hashkey(daddr, dport);
545 read_lock(&__ip_vs_rs_lock);
546 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
547 if ((dest->addr == daddr)
548 && (dest->port == dport)
549 && ((dest->protocol == protocol) ||
550 dest->vfwmark)) {
551 /* HIT */
552 read_unlock(&__ip_vs_rs_lock);
553 return dest;
556 read_unlock(&__ip_vs_rs_lock);
558 return NULL;
562 * Lookup destination by {addr,port} in the given service
564 static struct ip_vs_dest *
565 ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
567 struct ip_vs_dest *dest;
570 * Find the destination for the given service
572 list_for_each_entry(dest, &svc->destinations, n_list) {
573 if ((dest->addr == daddr) && (dest->port == dport)) {
574 /* HIT */
575 return dest;
579 return NULL;
584 * Lookup dest by {svc,addr,port} in the destination trash.
585 * The destination trash is used to hold the destinations that are removed
586 * from the service table but are still referenced by some conn entries.
587 * The reason to add the destination trash is when the dest is temporary
588 * down (either by administrator or by monitor program), the dest can be
589 * picked back from the trash, the remaining connections to the dest can
590 * continue, and the counting information of the dest is also useful for
591 * scheduling.
593 static struct ip_vs_dest *
594 ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
596 struct ip_vs_dest *dest, *nxt;
599 * Find the destination in trash
601 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
602 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
603 "dest->refcnt=%d\n",
604 dest->vfwmark,
605 NIPQUAD(dest->addr), ntohs(dest->port),
606 atomic_read(&dest->refcnt));
607 if (dest->addr == daddr &&
608 dest->port == dport &&
609 dest->vfwmark == svc->fwmark &&
610 dest->protocol == svc->protocol &&
611 (svc->fwmark ||
612 (dest->vaddr == svc->addr &&
613 dest->vport == svc->port))) {
614 /* HIT */
615 return dest;
619 * Try to purge the destination from trash if not referenced
621 if (atomic_read(&dest->refcnt) == 1) {
622 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
623 "from trash\n",
624 dest->vfwmark,
625 NIPQUAD(dest->addr), ntohs(dest->port));
626 list_del(&dest->n_list);
627 ip_vs_dst_reset(dest);
628 __ip_vs_unbind_svc(dest);
629 kfree(dest);
633 return NULL;
638 * Clean up all the destinations in the trash
639 * Called by the ip_vs_control_cleanup()
641 * When the ip_vs_control_clearup is activated by ipvs module exit,
642 * the service tables must have been flushed and all the connections
643 * are expired, and the refcnt of each destination in the trash must
644 * be 1, so we simply release them here.
646 static void ip_vs_trash_cleanup(void)
648 struct ip_vs_dest *dest, *nxt;
650 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
651 list_del(&dest->n_list);
652 ip_vs_dst_reset(dest);
653 __ip_vs_unbind_svc(dest);
654 kfree(dest);
659 static void
660 ip_vs_zero_stats(struct ip_vs_stats *stats)
662 spin_lock_bh(&stats->lock);
663 memset(stats, 0, (char *)&stats->lock - (char *)stats);
664 spin_unlock_bh(&stats->lock);
665 ip_vs_zero_estimator(stats);
669 * Update a destination in the given service
671 static void
672 __ip_vs_update_dest(struct ip_vs_service *svc,
673 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
675 int conn_flags;
677 /* set the weight and the flags */
678 atomic_set(&dest->weight, udest->weight);
679 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
681 /* check if local node and update the flags */
682 if (inet_addr_type(udest->addr) == RTN_LOCAL) {
683 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
684 | IP_VS_CONN_F_LOCALNODE;
687 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
688 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
689 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
690 } else {
692 * Put the real service in ip_vs_rtable if not present.
693 * For now only for NAT!
695 write_lock_bh(&__ip_vs_rs_lock);
696 ip_vs_rs_hash(dest);
697 write_unlock_bh(&__ip_vs_rs_lock);
699 atomic_set(&dest->conn_flags, conn_flags);
701 /* bind the service */
702 if (!dest->svc) {
703 __ip_vs_bind_svc(dest, svc);
704 } else {
705 if (dest->svc != svc) {
706 __ip_vs_unbind_svc(dest);
707 ip_vs_zero_stats(&dest->stats);
708 __ip_vs_bind_svc(dest, svc);
712 /* set the dest status flags */
713 dest->flags |= IP_VS_DEST_F_AVAILABLE;
715 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
716 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
717 dest->u_threshold = udest->u_threshold;
718 dest->l_threshold = udest->l_threshold;
723 * Create a destination for the given service
725 static int
726 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
727 struct ip_vs_dest **dest_p)
729 struct ip_vs_dest *dest;
730 unsigned atype;
732 EnterFunction(2);
734 atype = inet_addr_type(udest->addr);
735 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
736 return -EINVAL;
738 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
739 if (dest == NULL) {
740 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
741 return -ENOMEM;
744 dest->protocol = svc->protocol;
745 dest->vaddr = svc->addr;
746 dest->vport = svc->port;
747 dest->vfwmark = svc->fwmark;
748 dest->addr = udest->addr;
749 dest->port = udest->port;
751 atomic_set(&dest->activeconns, 0);
752 atomic_set(&dest->inactconns, 0);
753 atomic_set(&dest->persistconns, 0);
754 atomic_set(&dest->refcnt, 0);
756 INIT_LIST_HEAD(&dest->d_list);
757 spin_lock_init(&dest->dst_lock);
758 spin_lock_init(&dest->stats.lock);
759 __ip_vs_update_dest(svc, dest, udest);
760 ip_vs_new_estimator(&dest->stats);
762 *dest_p = dest;
764 LeaveFunction(2);
765 return 0;
770 * Add a destination into an existing service
772 static int
773 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
775 struct ip_vs_dest *dest;
776 __be32 daddr = udest->addr;
777 __be16 dport = udest->port;
778 int ret;
780 EnterFunction(2);
782 if (udest->weight < 0) {
783 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
784 return -ERANGE;
787 if (udest->l_threshold > udest->u_threshold) {
788 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
789 "upper threshold\n");
790 return -ERANGE;
794 * Check if the dest already exists in the list
796 dest = ip_vs_lookup_dest(svc, daddr, dport);
797 if (dest != NULL) {
798 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
799 return -EEXIST;
803 * Check if the dest already exists in the trash and
804 * is from the same service
806 dest = ip_vs_trash_get_dest(svc, daddr, dport);
807 if (dest != NULL) {
808 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
809 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
810 NIPQUAD(daddr), ntohs(dport),
811 atomic_read(&dest->refcnt),
812 dest->vfwmark,
813 NIPQUAD(dest->vaddr),
814 ntohs(dest->vport));
815 __ip_vs_update_dest(svc, dest, udest);
818 * Get the destination from the trash
820 list_del(&dest->n_list);
822 ip_vs_new_estimator(&dest->stats);
824 write_lock_bh(&__ip_vs_svc_lock);
827 * Wait until all other svc users go away.
829 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
831 list_add(&dest->n_list, &svc->destinations);
832 svc->num_dests++;
834 /* call the update_service function of its scheduler */
835 svc->scheduler->update_service(svc);
837 write_unlock_bh(&__ip_vs_svc_lock);
838 return 0;
842 * Allocate and initialize the dest structure
844 ret = ip_vs_new_dest(svc, udest, &dest);
845 if (ret) {
846 return ret;
850 * Add the dest entry into the list
852 atomic_inc(&dest->refcnt);
854 write_lock_bh(&__ip_vs_svc_lock);
857 * Wait until all other svc users go away.
859 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
861 list_add(&dest->n_list, &svc->destinations);
862 svc->num_dests++;
864 /* call the update_service function of its scheduler */
865 svc->scheduler->update_service(svc);
867 write_unlock_bh(&__ip_vs_svc_lock);
869 LeaveFunction(2);
871 return 0;
876 * Edit a destination in the given service
878 static int
879 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
881 struct ip_vs_dest *dest;
882 __be32 daddr = udest->addr;
883 __be16 dport = udest->port;
885 EnterFunction(2);
887 if (udest->weight < 0) {
888 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
889 return -ERANGE;
892 if (udest->l_threshold > udest->u_threshold) {
893 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
894 "upper threshold\n");
895 return -ERANGE;
899 * Lookup the destination list
901 dest = ip_vs_lookup_dest(svc, daddr, dport);
902 if (dest == NULL) {
903 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
904 return -ENOENT;
907 __ip_vs_update_dest(svc, dest, udest);
909 write_lock_bh(&__ip_vs_svc_lock);
911 /* Wait until all other svc users go away */
912 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
914 /* call the update_service, because server weight may be changed */
915 svc->scheduler->update_service(svc);
917 write_unlock_bh(&__ip_vs_svc_lock);
919 LeaveFunction(2);
921 return 0;
926 * Delete a destination (must be already unlinked from the service)
928 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
930 ip_vs_kill_estimator(&dest->stats);
933 * Remove it from the d-linked list with the real services.
935 write_lock_bh(&__ip_vs_rs_lock);
936 ip_vs_rs_unhash(dest);
937 write_unlock_bh(&__ip_vs_rs_lock);
940 * Decrease the refcnt of the dest, and free the dest
941 * if nobody refers to it (refcnt=0). Otherwise, throw
942 * the destination into the trash.
944 if (atomic_dec_and_test(&dest->refcnt)) {
945 ip_vs_dst_reset(dest);
946 /* simply decrease svc->refcnt here, let the caller check
947 and release the service if nobody refers to it.
948 Only user context can release destination and service,
949 and only one user context can update virtual service at a
950 time, so the operation here is OK */
951 atomic_dec(&dest->svc->refcnt);
952 kfree(dest);
953 } else {
954 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
955 "dest->refcnt=%d\n",
956 NIPQUAD(dest->addr), ntohs(dest->port),
957 atomic_read(&dest->refcnt));
958 list_add(&dest->n_list, &ip_vs_dest_trash);
959 atomic_inc(&dest->refcnt);
965 * Unlink a destination from the given service
967 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
968 struct ip_vs_dest *dest,
969 int svcupd)
971 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
974 * Remove it from the d-linked destination list.
976 list_del(&dest->n_list);
977 svc->num_dests--;
978 if (svcupd) {
980 * Call the update_service function of its scheduler
982 svc->scheduler->update_service(svc);
988 * Delete a destination server in the given service
990 static int
991 ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
993 struct ip_vs_dest *dest;
994 __be32 daddr = udest->addr;
995 __be16 dport = udest->port;
997 EnterFunction(2);
999 dest = ip_vs_lookup_dest(svc, daddr, dport);
1000 if (dest == NULL) {
1001 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1002 return -ENOENT;
1005 write_lock_bh(&__ip_vs_svc_lock);
1008 * Wait until all other svc users go away.
1010 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1013 * Unlink dest from the service
1015 __ip_vs_unlink_dest(svc, dest, 1);
1017 write_unlock_bh(&__ip_vs_svc_lock);
1020 * Delete the destination
1022 __ip_vs_del_dest(dest);
1024 LeaveFunction(2);
1026 return 0;
1031 * Add a service into the service hash table
1033 static int
1034 ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1036 int ret = 0;
1037 struct ip_vs_scheduler *sched = NULL;
1038 struct ip_vs_service *svc = NULL;
1040 /* increase the module use count */
1041 ip_vs_use_count_inc();
1043 /* Lookup the scheduler by 'u->sched_name' */
1044 sched = ip_vs_scheduler_get(u->sched_name);
1045 if (sched == NULL) {
1046 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1047 u->sched_name);
1048 ret = -ENOENT;
1049 goto out_mod_dec;
1052 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1053 if (svc == NULL) {
1054 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1055 ret = -ENOMEM;
1056 goto out_err;
1059 /* I'm the first user of the service */
1060 atomic_set(&svc->usecnt, 1);
1061 atomic_set(&svc->refcnt, 0);
1063 svc->protocol = u->protocol;
1064 svc->addr = u->addr;
1065 svc->port = u->port;
1066 svc->fwmark = u->fwmark;
1067 svc->flags = u->flags;
1068 svc->timeout = u->timeout * HZ;
1069 svc->netmask = u->netmask;
1071 INIT_LIST_HEAD(&svc->destinations);
1072 rwlock_init(&svc->sched_lock);
1073 spin_lock_init(&svc->stats.lock);
1075 /* Bind the scheduler */
1076 ret = ip_vs_bind_scheduler(svc, sched);
1077 if (ret)
1078 goto out_err;
1079 sched = NULL;
1081 /* Update the virtual service counters */
1082 if (svc->port == FTPPORT)
1083 atomic_inc(&ip_vs_ftpsvc_counter);
1084 else if (svc->port == 0)
1085 atomic_inc(&ip_vs_nullsvc_counter);
1087 ip_vs_new_estimator(&svc->stats);
1088 ip_vs_num_services++;
1090 /* Hash the service into the service table */
1091 write_lock_bh(&__ip_vs_svc_lock);
1092 ip_vs_svc_hash(svc);
1093 write_unlock_bh(&__ip_vs_svc_lock);
1095 *svc_p = svc;
1096 return 0;
1098 out_err:
1099 if (svc != NULL) {
1100 if (svc->scheduler)
1101 ip_vs_unbind_scheduler(svc);
1102 if (svc->inc) {
1103 local_bh_disable();
1104 ip_vs_app_inc_put(svc->inc);
1105 local_bh_enable();
1107 kfree(svc);
1109 ip_vs_scheduler_put(sched);
1111 out_mod_dec:
1112 /* decrease the module use count */
1113 ip_vs_use_count_dec();
1115 return ret;
1120 * Edit a service and bind it with a new scheduler
1122 static int
1123 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1125 struct ip_vs_scheduler *sched, *old_sched;
1126 int ret = 0;
1129 * Lookup the scheduler, by 'u->sched_name'
1131 sched = ip_vs_scheduler_get(u->sched_name);
1132 if (sched == NULL) {
1133 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1134 u->sched_name);
1135 return -ENOENT;
1137 old_sched = sched;
1139 write_lock_bh(&__ip_vs_svc_lock);
1142 * Wait until all other svc users go away.
1144 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1147 * Set the flags and timeout value
1149 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1150 svc->timeout = u->timeout * HZ;
1151 svc->netmask = u->netmask;
1153 old_sched = svc->scheduler;
1154 if (sched != old_sched) {
1156 * Unbind the old scheduler
1158 if ((ret = ip_vs_unbind_scheduler(svc))) {
1159 old_sched = sched;
1160 goto out;
1164 * Bind the new scheduler
1166 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1168 * If ip_vs_bind_scheduler fails, restore the old
1169 * scheduler.
1170 * The main reason of failure is out of memory.
1172 * The question is if the old scheduler can be
1173 * restored all the time. TODO: if it cannot be
1174 * restored some time, we must delete the service,
1175 * otherwise the system may crash.
1177 ip_vs_bind_scheduler(svc, old_sched);
1178 old_sched = sched;
1179 goto out;
1183 out:
1184 write_unlock_bh(&__ip_vs_svc_lock);
1186 if (old_sched)
1187 ip_vs_scheduler_put(old_sched);
1189 return ret;
1194 * Delete a service from the service list
1195 * - The service must be unlinked, unlocked and not referenced!
1196 * - We are called under _bh lock
1198 static void __ip_vs_del_service(struct ip_vs_service *svc)
1200 struct ip_vs_dest *dest, *nxt;
1201 struct ip_vs_scheduler *old_sched;
1203 ip_vs_num_services--;
1204 ip_vs_kill_estimator(&svc->stats);
1206 /* Unbind scheduler */
1207 old_sched = svc->scheduler;
1208 ip_vs_unbind_scheduler(svc);
1209 if (old_sched)
1210 ip_vs_scheduler_put(old_sched);
1212 /* Unbind app inc */
1213 if (svc->inc) {
1214 ip_vs_app_inc_put(svc->inc);
1215 svc->inc = NULL;
1219 * Unlink the whole destination list
1221 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1222 __ip_vs_unlink_dest(svc, dest, 0);
1223 __ip_vs_del_dest(dest);
1227 * Update the virtual service counters
1229 if (svc->port == FTPPORT)
1230 atomic_dec(&ip_vs_ftpsvc_counter);
1231 else if (svc->port == 0)
1232 atomic_dec(&ip_vs_nullsvc_counter);
1235 * Free the service if nobody refers to it
1237 if (atomic_read(&svc->refcnt) == 0)
1238 kfree(svc);
1240 /* decrease the module use count */
1241 ip_vs_use_count_dec();
1245 * Delete a service from the service list
1247 static int ip_vs_del_service(struct ip_vs_service *svc)
1249 if (svc == NULL)
1250 return -EEXIST;
1253 * Unhash it from the service table
1255 write_lock_bh(&__ip_vs_svc_lock);
1257 ip_vs_svc_unhash(svc);
1260 * Wait until all the svc users go away.
1262 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1264 __ip_vs_del_service(svc);
1266 write_unlock_bh(&__ip_vs_svc_lock);
1268 return 0;
1273 * Flush all the virtual services
1275 static int ip_vs_flush(void)
1277 int idx;
1278 struct ip_vs_service *svc, *nxt;
1281 * Flush the service table hashed by <protocol,addr,port>
1283 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1284 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1285 write_lock_bh(&__ip_vs_svc_lock);
1286 ip_vs_svc_unhash(svc);
1288 * Wait until all the svc users go away.
1290 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1291 __ip_vs_del_service(svc);
1292 write_unlock_bh(&__ip_vs_svc_lock);
1297 * Flush the service table hashed by fwmark
1299 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1300 list_for_each_entry_safe(svc, nxt,
1301 &ip_vs_svc_fwm_table[idx], f_list) {
1302 write_lock_bh(&__ip_vs_svc_lock);
1303 ip_vs_svc_unhash(svc);
1305 * Wait until all the svc users go away.
1307 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1308 __ip_vs_del_service(svc);
1309 write_unlock_bh(&__ip_vs_svc_lock);
1313 return 0;
1318 * Zero counters in a service or all services
1320 static int ip_vs_zero_service(struct ip_vs_service *svc)
1322 struct ip_vs_dest *dest;
1324 write_lock_bh(&__ip_vs_svc_lock);
1325 list_for_each_entry(dest, &svc->destinations, n_list) {
1326 ip_vs_zero_stats(&dest->stats);
1328 ip_vs_zero_stats(&svc->stats);
1329 write_unlock_bh(&__ip_vs_svc_lock);
1330 return 0;
1333 static int ip_vs_zero_all(void)
1335 int idx;
1336 struct ip_vs_service *svc;
1338 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1339 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1340 ip_vs_zero_service(svc);
1344 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1345 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1346 ip_vs_zero_service(svc);
1350 ip_vs_zero_stats(&ip_vs_stats);
1351 return 0;
1355 static int
1356 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1357 void __user *buffer, size_t *lenp, loff_t *ppos)
1359 int *valp = table->data;
1360 int val = *valp;
1361 int rc;
1363 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1364 if (write && (*valp != val)) {
1365 if ((*valp < 0) || (*valp > 3)) {
1366 /* Restore the correct value */
1367 *valp = val;
1368 } else {
1369 update_defense_level();
1372 return rc;
1376 static int
1377 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1378 void __user *buffer, size_t *lenp, loff_t *ppos)
1380 int *valp = table->data;
1381 int val[2];
1382 int rc;
1384 /* backup the value first */
1385 memcpy(val, valp, sizeof(val));
1387 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1388 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1389 /* Restore the correct value */
1390 memcpy(valp, val, sizeof(val));
1392 return rc;
1397 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1400 static struct ctl_table vs_vars[] = {
1402 .ctl_name = NET_IPV4_VS_AMEMTHRESH,
1403 .procname = "amemthresh",
1404 .data = &sysctl_ip_vs_amemthresh,
1405 .maxlen = sizeof(int),
1406 .mode = 0644,
1407 .proc_handler = &proc_dointvec,
1409 #ifdef CONFIG_IP_VS_DEBUG
1411 .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
1412 .procname = "debug_level",
1413 .data = &sysctl_ip_vs_debug_level,
1414 .maxlen = sizeof(int),
1415 .mode = 0644,
1416 .proc_handler = &proc_dointvec,
1418 #endif
1420 .ctl_name = NET_IPV4_VS_AMDROPRATE,
1421 .procname = "am_droprate",
1422 .data = &sysctl_ip_vs_am_droprate,
1423 .maxlen = sizeof(int),
1424 .mode = 0644,
1425 .proc_handler = &proc_dointvec,
1428 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
1429 .procname = "drop_entry",
1430 .data = &sysctl_ip_vs_drop_entry,
1431 .maxlen = sizeof(int),
1432 .mode = 0644,
1433 .proc_handler = &proc_do_defense_mode,
1436 .ctl_name = NET_IPV4_VS_DROP_PACKET,
1437 .procname = "drop_packet",
1438 .data = &sysctl_ip_vs_drop_packet,
1439 .maxlen = sizeof(int),
1440 .mode = 0644,
1441 .proc_handler = &proc_do_defense_mode,
1444 .ctl_name = NET_IPV4_VS_SECURE_TCP,
1445 .procname = "secure_tcp",
1446 .data = &sysctl_ip_vs_secure_tcp,
1447 .maxlen = sizeof(int),
1448 .mode = 0644,
1449 .proc_handler = &proc_do_defense_mode,
1451 #if 0
1453 .ctl_name = NET_IPV4_VS_TO_ES,
1454 .procname = "timeout_established",
1455 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1456 .maxlen = sizeof(int),
1457 .mode = 0644,
1458 .proc_handler = &proc_dointvec_jiffies,
1461 .ctl_name = NET_IPV4_VS_TO_SS,
1462 .procname = "timeout_synsent",
1463 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1464 .maxlen = sizeof(int),
1465 .mode = 0644,
1466 .proc_handler = &proc_dointvec_jiffies,
1469 .ctl_name = NET_IPV4_VS_TO_SR,
1470 .procname = "timeout_synrecv",
1471 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1472 .maxlen = sizeof(int),
1473 .mode = 0644,
1474 .proc_handler = &proc_dointvec_jiffies,
1477 .ctl_name = NET_IPV4_VS_TO_FW,
1478 .procname = "timeout_finwait",
1479 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1480 .maxlen = sizeof(int),
1481 .mode = 0644,
1482 .proc_handler = &proc_dointvec_jiffies,
1485 .ctl_name = NET_IPV4_VS_TO_TW,
1486 .procname = "timeout_timewait",
1487 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1488 .maxlen = sizeof(int),
1489 .mode = 0644,
1490 .proc_handler = &proc_dointvec_jiffies,
1493 .ctl_name = NET_IPV4_VS_TO_CL,
1494 .procname = "timeout_close",
1495 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1496 .maxlen = sizeof(int),
1497 .mode = 0644,
1498 .proc_handler = &proc_dointvec_jiffies,
1501 .ctl_name = NET_IPV4_VS_TO_CW,
1502 .procname = "timeout_closewait",
1503 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1504 .maxlen = sizeof(int),
1505 .mode = 0644,
1506 .proc_handler = &proc_dointvec_jiffies,
1509 .ctl_name = NET_IPV4_VS_TO_LA,
1510 .procname = "timeout_lastack",
1511 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1512 .maxlen = sizeof(int),
1513 .mode = 0644,
1514 .proc_handler = &proc_dointvec_jiffies,
1517 .ctl_name = NET_IPV4_VS_TO_LI,
1518 .procname = "timeout_listen",
1519 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1520 .maxlen = sizeof(int),
1521 .mode = 0644,
1522 .proc_handler = &proc_dointvec_jiffies,
1525 .ctl_name = NET_IPV4_VS_TO_SA,
1526 .procname = "timeout_synack",
1527 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1528 .maxlen = sizeof(int),
1529 .mode = 0644,
1530 .proc_handler = &proc_dointvec_jiffies,
1533 .ctl_name = NET_IPV4_VS_TO_UDP,
1534 .procname = "timeout_udp",
1535 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1536 .maxlen = sizeof(int),
1537 .mode = 0644,
1538 .proc_handler = &proc_dointvec_jiffies,
1541 .ctl_name = NET_IPV4_VS_TO_ICMP,
1542 .procname = "timeout_icmp",
1543 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1544 .maxlen = sizeof(int),
1545 .mode = 0644,
1546 .proc_handler = &proc_dointvec_jiffies,
1548 #endif
1550 .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
1551 .procname = "cache_bypass",
1552 .data = &sysctl_ip_vs_cache_bypass,
1553 .maxlen = sizeof(int),
1554 .mode = 0644,
1555 .proc_handler = &proc_dointvec,
1558 .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
1559 .procname = "expire_nodest_conn",
1560 .data = &sysctl_ip_vs_expire_nodest_conn,
1561 .maxlen = sizeof(int),
1562 .mode = 0644,
1563 .proc_handler = &proc_dointvec,
1566 .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
1567 .procname = "expire_quiescent_template",
1568 .data = &sysctl_ip_vs_expire_quiescent_template,
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
1571 .proc_handler = &proc_dointvec,
1574 .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
1575 .procname = "sync_threshold",
1576 .data = &sysctl_ip_vs_sync_threshold,
1577 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1578 .mode = 0644,
1579 .proc_handler = &proc_do_sync_threshold,
1582 .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
1583 .procname = "nat_icmp_send",
1584 .data = &sysctl_ip_vs_nat_icmp_send,
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = &proc_dointvec,
1589 { .ctl_name = 0 }
1592 static ctl_table vs_table[] = {
1594 .ctl_name = NET_IPV4_VS,
1595 .procname = "vs",
1596 .mode = 0555,
1597 .child = vs_vars
1599 { .ctl_name = 0 }
1602 static ctl_table ipvs_ipv4_table[] = {
1604 .ctl_name = NET_IPV4,
1605 .procname = "ipv4",
1606 .mode = 0555,
1607 .child = vs_table,
1609 { .ctl_name = 0 }
1612 static ctl_table vs_root_table[] = {
1614 .ctl_name = CTL_NET,
1615 .procname = "net",
1616 .mode = 0555,
1617 .child = ipvs_ipv4_table,
1619 { .ctl_name = 0 }
1622 static struct ctl_table_header * sysctl_header;
1624 #ifdef CONFIG_PROC_FS
1626 struct ip_vs_iter {
1627 struct list_head *table;
1628 int bucket;
1632 * Write the contents of the VS rule table to a PROCfs file.
1633 * (It is kept just for backward compatibility)
1635 static inline const char *ip_vs_fwd_name(unsigned flags)
1637 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1638 case IP_VS_CONN_F_LOCALNODE:
1639 return "Local";
1640 case IP_VS_CONN_F_TUNNEL:
1641 return "Tunnel";
1642 case IP_VS_CONN_F_DROUTE:
1643 return "Route";
1644 default:
1645 return "Masq";
1650 /* Get the Nth entry in the two lists */
1651 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1653 struct ip_vs_iter *iter = seq->private;
1654 int idx;
1655 struct ip_vs_service *svc;
1657 /* look in hash by protocol */
1658 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1659 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1660 if (pos-- == 0){
1661 iter->table = ip_vs_svc_table;
1662 iter->bucket = idx;
1663 return svc;
1668 /* keep looking in fwmark */
1669 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1670 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1671 if (pos-- == 0) {
1672 iter->table = ip_vs_svc_fwm_table;
1673 iter->bucket = idx;
1674 return svc;
1679 return NULL;
1682 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1685 read_lock_bh(&__ip_vs_svc_lock);
1686 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1690 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1692 struct list_head *e;
1693 struct ip_vs_iter *iter;
1694 struct ip_vs_service *svc;
1696 ++*pos;
1697 if (v == SEQ_START_TOKEN)
1698 return ip_vs_info_array(seq,0);
1700 svc = v;
1701 iter = seq->private;
1703 if (iter->table == ip_vs_svc_table) {
1704 /* next service in table hashed by protocol */
1705 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1706 return list_entry(e, struct ip_vs_service, s_list);
1709 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1710 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1711 s_list) {
1712 return svc;
1716 iter->table = ip_vs_svc_fwm_table;
1717 iter->bucket = -1;
1718 goto scan_fwmark;
1721 /* next service in hashed by fwmark */
1722 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1723 return list_entry(e, struct ip_vs_service, f_list);
1725 scan_fwmark:
1726 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1727 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1728 f_list)
1729 return svc;
1732 return NULL;
1735 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1737 read_unlock_bh(&__ip_vs_svc_lock);
1741 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1743 if (v == SEQ_START_TOKEN) {
1744 seq_printf(seq,
1745 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1746 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1747 seq_puts(seq,
1748 "Prot LocalAddress:Port Scheduler Flags\n");
1749 seq_puts(seq,
1750 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1751 } else {
1752 const struct ip_vs_service *svc = v;
1753 const struct ip_vs_iter *iter = seq->private;
1754 const struct ip_vs_dest *dest;
1756 if (iter->table == ip_vs_svc_table)
1757 seq_printf(seq, "%s %08X:%04X %s ",
1758 ip_vs_proto_name(svc->protocol),
1759 ntohl(svc->addr),
1760 ntohs(svc->port),
1761 svc->scheduler->name);
1762 else
1763 seq_printf(seq, "FWM %08X %s ",
1764 svc->fwmark, svc->scheduler->name);
1766 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1767 seq_printf(seq, "persistent %d %08X\n",
1768 svc->timeout,
1769 ntohl(svc->netmask));
1770 else
1771 seq_putc(seq, '\n');
1773 list_for_each_entry(dest, &svc->destinations, n_list) {
1774 seq_printf(seq,
1775 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1776 ntohl(dest->addr), ntohs(dest->port),
1777 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1778 atomic_read(&dest->weight),
1779 atomic_read(&dest->activeconns),
1780 atomic_read(&dest->inactconns));
1783 return 0;
1786 static const struct seq_operations ip_vs_info_seq_ops = {
1787 .start = ip_vs_info_seq_start,
1788 .next = ip_vs_info_seq_next,
1789 .stop = ip_vs_info_seq_stop,
1790 .show = ip_vs_info_seq_show,
1793 static int ip_vs_info_open(struct inode *inode, struct file *file)
1795 return seq_open_private(file, &ip_vs_info_seq_ops,
1796 sizeof(struct ip_vs_iter));
1799 static const struct file_operations ip_vs_info_fops = {
1800 .owner = THIS_MODULE,
1801 .open = ip_vs_info_open,
1802 .read = seq_read,
1803 .llseek = seq_lseek,
1804 .release = seq_release_private,
1807 #endif
1809 struct ip_vs_stats ip_vs_stats;
1811 #ifdef CONFIG_PROC_FS
1812 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1815 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1816 seq_puts(seq,
1817 " Total Incoming Outgoing Incoming Outgoing\n");
1818 seq_printf(seq,
1819 " Conns Packets Packets Bytes Bytes\n");
1821 spin_lock_bh(&ip_vs_stats.lock);
1822 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1823 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1824 (unsigned long long) ip_vs_stats.inbytes,
1825 (unsigned long long) ip_vs_stats.outbytes);
1827 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1828 seq_puts(seq,
1829 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1830 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1831 ip_vs_stats.cps,
1832 ip_vs_stats.inpps,
1833 ip_vs_stats.outpps,
1834 ip_vs_stats.inbps,
1835 ip_vs_stats.outbps);
1836 spin_unlock_bh(&ip_vs_stats.lock);
1838 return 0;
1841 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1843 return single_open(file, ip_vs_stats_show, NULL);
1846 static const struct file_operations ip_vs_stats_fops = {
1847 .owner = THIS_MODULE,
1848 .open = ip_vs_stats_seq_open,
1849 .read = seq_read,
1850 .llseek = seq_lseek,
1851 .release = single_release,
1854 #endif
1857 * Set timeout values for tcp tcpfin udp in the timeout_table.
1859 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1861 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1862 u->tcp_timeout,
1863 u->tcp_fin_timeout,
1864 u->udp_timeout);
1866 #ifdef CONFIG_IP_VS_PROTO_TCP
1867 if (u->tcp_timeout) {
1868 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1869 = u->tcp_timeout * HZ;
1872 if (u->tcp_fin_timeout) {
1873 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1874 = u->tcp_fin_timeout * HZ;
1876 #endif
1878 #ifdef CONFIG_IP_VS_PROTO_UDP
1879 if (u->udp_timeout) {
1880 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1881 = u->udp_timeout * HZ;
1883 #endif
1884 return 0;
1888 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1889 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1890 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1891 sizeof(struct ip_vs_dest_user))
1892 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1893 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1894 #define MAX_ARG_LEN SVCDEST_ARG_LEN
1896 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1897 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1898 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1899 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1900 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1901 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1902 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1903 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1904 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1905 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1906 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1907 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1910 static int
1911 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1913 int ret;
1914 unsigned char arg[MAX_ARG_LEN];
1915 struct ip_vs_service_user *usvc;
1916 struct ip_vs_service *svc;
1917 struct ip_vs_dest_user *udest;
1919 if (!capable(CAP_NET_ADMIN))
1920 return -EPERM;
1922 if (len != set_arglen[SET_CMDID(cmd)]) {
1923 IP_VS_ERR("set_ctl: len %u != %u\n",
1924 len, set_arglen[SET_CMDID(cmd)]);
1925 return -EINVAL;
1928 if (copy_from_user(arg, user, len) != 0)
1929 return -EFAULT;
1931 /* increase the module use count */
1932 ip_vs_use_count_inc();
1934 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1935 ret = -ERESTARTSYS;
1936 goto out_dec;
1939 if (cmd == IP_VS_SO_SET_FLUSH) {
1940 /* Flush the virtual service */
1941 ret = ip_vs_flush();
1942 goto out_unlock;
1943 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1944 /* Set timeout values for (tcp tcpfin udp) */
1945 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1946 goto out_unlock;
1947 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1948 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1949 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1950 goto out_unlock;
1951 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1952 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1953 ret = stop_sync_thread(dm->state);
1954 goto out_unlock;
1957 usvc = (struct ip_vs_service_user *)arg;
1958 udest = (struct ip_vs_dest_user *)(usvc + 1);
1960 if (cmd == IP_VS_SO_SET_ZERO) {
1961 /* if no service address is set, zero counters in all */
1962 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1963 ret = ip_vs_zero_all();
1964 goto out_unlock;
1968 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1969 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1970 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1971 usvc->protocol, NIPQUAD(usvc->addr),
1972 ntohs(usvc->port), usvc->sched_name);
1973 ret = -EFAULT;
1974 goto out_unlock;
1977 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1978 if (usvc->fwmark == 0)
1979 svc = __ip_vs_service_get(usvc->protocol,
1980 usvc->addr, usvc->port);
1981 else
1982 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1984 if (cmd != IP_VS_SO_SET_ADD
1985 && (svc == NULL || svc->protocol != usvc->protocol)) {
1986 ret = -ESRCH;
1987 goto out_unlock;
1990 switch (cmd) {
1991 case IP_VS_SO_SET_ADD:
1992 if (svc != NULL)
1993 ret = -EEXIST;
1994 else
1995 ret = ip_vs_add_service(usvc, &svc);
1996 break;
1997 case IP_VS_SO_SET_EDIT:
1998 ret = ip_vs_edit_service(svc, usvc);
1999 break;
2000 case IP_VS_SO_SET_DEL:
2001 ret = ip_vs_del_service(svc);
2002 if (!ret)
2003 goto out_unlock;
2004 break;
2005 case IP_VS_SO_SET_ZERO:
2006 ret = ip_vs_zero_service(svc);
2007 break;
2008 case IP_VS_SO_SET_ADDDEST:
2009 ret = ip_vs_add_dest(svc, udest);
2010 break;
2011 case IP_VS_SO_SET_EDITDEST:
2012 ret = ip_vs_edit_dest(svc, udest);
2013 break;
2014 case IP_VS_SO_SET_DELDEST:
2015 ret = ip_vs_del_dest(svc, udest);
2016 break;
2017 default:
2018 ret = -EINVAL;
2021 if (svc)
2022 ip_vs_service_put(svc);
2024 out_unlock:
2025 mutex_unlock(&__ip_vs_mutex);
2026 out_dec:
2027 /* decrease the module use count */
2028 ip_vs_use_count_dec();
2030 return ret;
2034 static void
2035 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2037 spin_lock_bh(&src->lock);
2038 memcpy(dst, src, (char*)&src->lock - (char*)src);
2039 spin_unlock_bh(&src->lock);
2042 static void
2043 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2045 dst->protocol = src->protocol;
2046 dst->addr = src->addr;
2047 dst->port = src->port;
2048 dst->fwmark = src->fwmark;
2049 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2050 dst->flags = src->flags;
2051 dst->timeout = src->timeout / HZ;
2052 dst->netmask = src->netmask;
2053 dst->num_dests = src->num_dests;
2054 ip_vs_copy_stats(&dst->stats, &src->stats);
2057 static inline int
2058 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2059 struct ip_vs_get_services __user *uptr)
2061 int idx, count=0;
2062 struct ip_vs_service *svc;
2063 struct ip_vs_service_entry entry;
2064 int ret = 0;
2066 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2067 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2068 if (count >= get->num_services)
2069 goto out;
2070 memset(&entry, 0, sizeof(entry));
2071 ip_vs_copy_service(&entry, svc);
2072 if (copy_to_user(&uptr->entrytable[count],
2073 &entry, sizeof(entry))) {
2074 ret = -EFAULT;
2075 goto out;
2077 count++;
2081 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2082 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2083 if (count >= get->num_services)
2084 goto out;
2085 memset(&entry, 0, sizeof(entry));
2086 ip_vs_copy_service(&entry, svc);
2087 if (copy_to_user(&uptr->entrytable[count],
2088 &entry, sizeof(entry))) {
2089 ret = -EFAULT;
2090 goto out;
2092 count++;
2095 out:
2096 return ret;
2099 static inline int
2100 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2101 struct ip_vs_get_dests __user *uptr)
2103 struct ip_vs_service *svc;
2104 int ret = 0;
2106 if (get->fwmark)
2107 svc = __ip_vs_svc_fwm_get(get->fwmark);
2108 else
2109 svc = __ip_vs_service_get(get->protocol,
2110 get->addr, get->port);
2111 if (svc) {
2112 int count = 0;
2113 struct ip_vs_dest *dest;
2114 struct ip_vs_dest_entry entry;
2116 list_for_each_entry(dest, &svc->destinations, n_list) {
2117 if (count >= get->num_dests)
2118 break;
2120 entry.addr = dest->addr;
2121 entry.port = dest->port;
2122 entry.conn_flags = atomic_read(&dest->conn_flags);
2123 entry.weight = atomic_read(&dest->weight);
2124 entry.u_threshold = dest->u_threshold;
2125 entry.l_threshold = dest->l_threshold;
2126 entry.activeconns = atomic_read(&dest->activeconns);
2127 entry.inactconns = atomic_read(&dest->inactconns);
2128 entry.persistconns = atomic_read(&dest->persistconns);
2129 ip_vs_copy_stats(&entry.stats, &dest->stats);
2130 if (copy_to_user(&uptr->entrytable[count],
2131 &entry, sizeof(entry))) {
2132 ret = -EFAULT;
2133 break;
2135 count++;
2137 ip_vs_service_put(svc);
2138 } else
2139 ret = -ESRCH;
2140 return ret;
2143 static inline void
2144 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2146 #ifdef CONFIG_IP_VS_PROTO_TCP
2147 u->tcp_timeout =
2148 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2149 u->tcp_fin_timeout =
2150 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2151 #endif
2152 #ifdef CONFIG_IP_VS_PROTO_UDP
2153 u->udp_timeout =
2154 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2155 #endif
2159 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2160 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2161 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2162 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2163 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2164 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2165 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2167 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2168 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2169 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2170 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2171 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2172 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2173 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2174 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2177 static int
2178 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2180 unsigned char arg[128];
2181 int ret = 0;
2183 if (!capable(CAP_NET_ADMIN))
2184 return -EPERM;
2186 if (*len < get_arglen[GET_CMDID(cmd)]) {
2187 IP_VS_ERR("get_ctl: len %u < %u\n",
2188 *len, get_arglen[GET_CMDID(cmd)]);
2189 return -EINVAL;
2192 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2193 return -EFAULT;
2195 if (mutex_lock_interruptible(&__ip_vs_mutex))
2196 return -ERESTARTSYS;
2198 switch (cmd) {
2199 case IP_VS_SO_GET_VERSION:
2201 char buf[64];
2203 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2204 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2205 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2206 ret = -EFAULT;
2207 goto out;
2209 *len = strlen(buf)+1;
2211 break;
2213 case IP_VS_SO_GET_INFO:
2215 struct ip_vs_getinfo info;
2216 info.version = IP_VS_VERSION_CODE;
2217 info.size = IP_VS_CONN_TAB_SIZE;
2218 info.num_services = ip_vs_num_services;
2219 if (copy_to_user(user, &info, sizeof(info)) != 0)
2220 ret = -EFAULT;
2222 break;
2224 case IP_VS_SO_GET_SERVICES:
2226 struct ip_vs_get_services *get;
2227 int size;
2229 get = (struct ip_vs_get_services *)arg;
2230 size = sizeof(*get) +
2231 sizeof(struct ip_vs_service_entry) * get->num_services;
2232 if (*len != size) {
2233 IP_VS_ERR("length: %u != %u\n", *len, size);
2234 ret = -EINVAL;
2235 goto out;
2237 ret = __ip_vs_get_service_entries(get, user);
2239 break;
2241 case IP_VS_SO_GET_SERVICE:
2243 struct ip_vs_service_entry *entry;
2244 struct ip_vs_service *svc;
2246 entry = (struct ip_vs_service_entry *)arg;
2247 if (entry->fwmark)
2248 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2249 else
2250 svc = __ip_vs_service_get(entry->protocol,
2251 entry->addr, entry->port);
2252 if (svc) {
2253 ip_vs_copy_service(entry, svc);
2254 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2255 ret = -EFAULT;
2256 ip_vs_service_put(svc);
2257 } else
2258 ret = -ESRCH;
2260 break;
2262 case IP_VS_SO_GET_DESTS:
2264 struct ip_vs_get_dests *get;
2265 int size;
2267 get = (struct ip_vs_get_dests *)arg;
2268 size = sizeof(*get) +
2269 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2270 if (*len != size) {
2271 IP_VS_ERR("length: %u != %u\n", *len, size);
2272 ret = -EINVAL;
2273 goto out;
2275 ret = __ip_vs_get_dest_entries(get, user);
2277 break;
2279 case IP_VS_SO_GET_TIMEOUT:
2281 struct ip_vs_timeout_user t;
2283 __ip_vs_get_timeouts(&t);
2284 if (copy_to_user(user, &t, sizeof(t)) != 0)
2285 ret = -EFAULT;
2287 break;
2289 case IP_VS_SO_GET_DAEMON:
2291 struct ip_vs_daemon_user d[2];
2293 memset(&d, 0, sizeof(d));
2294 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2295 d[0].state = IP_VS_STATE_MASTER;
2296 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2297 d[0].syncid = ip_vs_master_syncid;
2299 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2300 d[1].state = IP_VS_STATE_BACKUP;
2301 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2302 d[1].syncid = ip_vs_backup_syncid;
2304 if (copy_to_user(user, &d, sizeof(d)) != 0)
2305 ret = -EFAULT;
2307 break;
2309 default:
2310 ret = -EINVAL;
2313 out:
2314 mutex_unlock(&__ip_vs_mutex);
2315 return ret;
2319 static struct nf_sockopt_ops ip_vs_sockopts = {
2320 .pf = PF_INET,
2321 .set_optmin = IP_VS_BASE_CTL,
2322 .set_optmax = IP_VS_SO_SET_MAX+1,
2323 .set = do_ip_vs_set_ctl,
2324 .get_optmin = IP_VS_BASE_CTL,
2325 .get_optmax = IP_VS_SO_GET_MAX+1,
2326 .get = do_ip_vs_get_ctl,
2327 .owner = THIS_MODULE,
2331 int ip_vs_control_init(void)
2333 int ret;
2334 int idx;
2336 EnterFunction(2);
2338 ret = nf_register_sockopt(&ip_vs_sockopts);
2339 if (ret) {
2340 IP_VS_ERR("cannot register sockopt.\n");
2341 return ret;
2344 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
2345 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
2347 sysctl_header = register_sysctl_table(vs_root_table);
2349 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2350 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2351 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2352 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2354 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2355 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2358 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2359 spin_lock_init(&ip_vs_stats.lock);
2360 ip_vs_new_estimator(&ip_vs_stats);
2362 /* Hook the defense timer */
2363 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
2365 LeaveFunction(2);
2366 return 0;
2370 void ip_vs_control_cleanup(void)
2372 EnterFunction(2);
2373 ip_vs_trash_cleanup();
2374 cancel_rearming_delayed_work(&defense_work);
2375 cancel_work_sync(&defense_work.work);
2376 ip_vs_kill_estimator(&ip_vs_stats);
2377 unregister_sysctl_table(sysctl_header);
2378 proc_net_remove(&init_net, "ip_vs_stats");
2379 proc_net_remove(&init_net, "ip_vs");
2380 nf_unregister_sockopt(&ip_vs_sockopts);
2381 LeaveFunction(2);