2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/types.h>
26 #include <linux/capability.h>
28 #include <linux/sysctl.h>
29 #include <linux/proc_fs.h>
30 #include <linux/workqueue.h>
31 #include <linux/swap.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
35 #include <linux/netfilter.h>
36 #include <linux/netfilter_ipv4.h>
39 #include <net/route.h>
42 #include <asm/uaccess.h>
44 #include <net/ip_vs.h>
46 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
47 static DECLARE_MUTEX(__ip_vs_mutex
);
49 /* lock for service table */
50 static DEFINE_RWLOCK(__ip_vs_svc_lock
);
52 /* lock for table with the real services */
53 static DEFINE_RWLOCK(__ip_vs_rs_lock
);
55 /* lock for state and timeout tables */
56 static DEFINE_RWLOCK(__ip_vs_securetcp_lock
);
58 /* lock for drop entry handling */
59 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock
);
61 /* lock for drop packet handling */
62 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock
);
64 /* 1/rate drop and drop-entry variables */
65 int ip_vs_drop_rate
= 0;
66 int ip_vs_drop_counter
= 0;
67 static atomic_t ip_vs_dropentry
= ATOMIC_INIT(0);
69 /* number of virtual services */
70 static int ip_vs_num_services
= 0;
72 /* sysctl variables */
73 static int sysctl_ip_vs_drop_entry
= 0;
74 static int sysctl_ip_vs_drop_packet
= 0;
75 static int sysctl_ip_vs_secure_tcp
= 0;
76 static int sysctl_ip_vs_amemthresh
= 1024;
77 static int sysctl_ip_vs_am_droprate
= 10;
78 int sysctl_ip_vs_cache_bypass
= 0;
79 int sysctl_ip_vs_expire_nodest_conn
= 0;
80 int sysctl_ip_vs_expire_quiescent_template
= 0;
81 int sysctl_ip_vs_sync_threshold
[2] = { 3, 50 };
82 int sysctl_ip_vs_nat_icmp_send
= 0;
85 #ifdef CONFIG_IP_VS_DEBUG
86 static int sysctl_ip_vs_debug_level
= 0;
88 int ip_vs_get_debug_level(void)
90 return sysctl_ip_vs_debug_level
;
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
98 static void update_defense_level(void)
101 static int old_secure_tcp
= 0;
106 /* we only count free and buffered memory (in pages) */
108 availmem
= i
.freeram
+ i
.bufferram
;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
114 nomem
= (availmem
< sysctl_ip_vs_amemthresh
);
119 spin_lock(&__ip_vs_dropentry_lock
);
120 switch (sysctl_ip_vs_drop_entry
) {
122 atomic_set(&ip_vs_dropentry
, 0);
126 atomic_set(&ip_vs_dropentry
, 1);
127 sysctl_ip_vs_drop_entry
= 2;
129 atomic_set(&ip_vs_dropentry
, 0);
134 atomic_set(&ip_vs_dropentry
, 1);
136 atomic_set(&ip_vs_dropentry
, 0);
137 sysctl_ip_vs_drop_entry
= 1;
141 atomic_set(&ip_vs_dropentry
, 1);
144 spin_unlock(&__ip_vs_dropentry_lock
);
147 spin_lock(&__ip_vs_droppacket_lock
);
148 switch (sysctl_ip_vs_drop_packet
) {
154 ip_vs_drop_rate
= ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh
/
156 (sysctl_ip_vs_amemthresh
-availmem
);
157 sysctl_ip_vs_drop_packet
= 2;
164 ip_vs_drop_rate
= ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh
/
166 (sysctl_ip_vs_amemthresh
-availmem
);
169 sysctl_ip_vs_drop_packet
= 1;
173 ip_vs_drop_rate
= sysctl_ip_vs_am_droprate
;
176 spin_unlock(&__ip_vs_droppacket_lock
);
179 write_lock(&__ip_vs_securetcp_lock
);
180 switch (sysctl_ip_vs_secure_tcp
) {
182 if (old_secure_tcp
>= 2)
187 if (old_secure_tcp
< 2)
189 sysctl_ip_vs_secure_tcp
= 2;
191 if (old_secure_tcp
>= 2)
197 if (old_secure_tcp
< 2)
200 if (old_secure_tcp
>= 2)
202 sysctl_ip_vs_secure_tcp
= 1;
206 if (old_secure_tcp
< 2)
210 old_secure_tcp
= sysctl_ip_vs_secure_tcp
;
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp
>1);
213 write_unlock(&__ip_vs_securetcp_lock
);
220 * Timer for checking the defense
222 #define DEFENSE_TIMER_PERIOD 1*HZ
223 static void defense_work_handler(void *data
);
224 static DECLARE_WORK(defense_work
, defense_work_handler
, NULL
);
226 static void defense_work_handler(void *data
)
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry
))
230 ip_vs_random_dropentry();
232 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
236 ip_vs_use_count_inc(void)
238 return try_module_get(THIS_MODULE
);
242 ip_vs_use_count_dec(void)
244 module_put(THIS_MODULE
);
249 * Hash table: for virtual service lookups
251 #define IP_VS_SVC_TAB_BITS 8
252 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
255 /* the service table hashed by <protocol, addr, port> */
256 static struct list_head ip_vs_svc_table
[IP_VS_SVC_TAB_SIZE
];
257 /* the service table hashed by fwmark */
258 static struct list_head ip_vs_svc_fwm_table
[IP_VS_SVC_TAB_SIZE
];
261 * Hash table: for real service lookups
263 #define IP_VS_RTAB_BITS 4
264 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
267 static struct list_head ip_vs_rtable
[IP_VS_RTAB_SIZE
];
270 * Trash for destinations
272 static LIST_HEAD(ip_vs_dest_trash
);
275 * FTP & NULL virtual service counters
277 static atomic_t ip_vs_ftpsvc_counter
= ATOMIC_INIT(0);
278 static atomic_t ip_vs_nullsvc_counter
= ATOMIC_INIT(0);
282 * Returns hash value for virtual service
284 static __inline__
unsigned
285 ip_vs_svc_hashkey(unsigned proto
, __u32 addr
, __u16 port
)
287 register unsigned porth
= ntohs(port
);
289 return (proto
^ntohl(addr
)^(porth
>>IP_VS_SVC_TAB_BITS
)^porth
)
290 & IP_VS_SVC_TAB_MASK
;
294 * Returns hash value of fwmark for virtual service lookup
296 static __inline__
unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark
)
298 return fwmark
& IP_VS_SVC_TAB_MASK
;
302 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303 * or in the ip_vs_svc_fwm_table by fwmark.
304 * Should be called with locked tables.
306 static int ip_vs_svc_hash(struct ip_vs_service
*svc
)
310 if (svc
->flags
& IP_VS_SVC_F_HASHED
) {
311 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312 "called from %p\n", __builtin_return_address(0));
316 if (svc
->fwmark
== 0) {
318 * Hash it by <protocol,addr,port> in ip_vs_svc_table
320 hash
= ip_vs_svc_hashkey(svc
->protocol
, svc
->addr
, svc
->port
);
321 list_add(&svc
->s_list
, &ip_vs_svc_table
[hash
]);
324 * Hash it by fwmark in ip_vs_svc_fwm_table
326 hash
= ip_vs_svc_fwm_hashkey(svc
->fwmark
);
327 list_add(&svc
->f_list
, &ip_vs_svc_fwm_table
[hash
]);
330 svc
->flags
|= IP_VS_SVC_F_HASHED
;
331 /* increase its refcnt because it is referenced by the svc table */
332 atomic_inc(&svc
->refcnt
);
338 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
339 * Should be called with locked tables.
341 static int ip_vs_svc_unhash(struct ip_vs_service
*svc
)
343 if (!(svc
->flags
& IP_VS_SVC_F_HASHED
)) {
344 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
345 "called from %p\n", __builtin_return_address(0));
349 if (svc
->fwmark
== 0) {
350 /* Remove it from the ip_vs_svc_table table */
351 list_del(&svc
->s_list
);
353 /* Remove it from the ip_vs_svc_fwm_table table */
354 list_del(&svc
->f_list
);
357 svc
->flags
&= ~IP_VS_SVC_F_HASHED
;
358 atomic_dec(&svc
->refcnt
);
364 * Get service by {proto,addr,port} in the service table.
366 static __inline__
struct ip_vs_service
*
367 __ip_vs_service_get(__u16 protocol
, __u32 vaddr
, __u16 vport
)
370 struct ip_vs_service
*svc
;
372 /* Check for "full" addressed entries */
373 hash
= ip_vs_svc_hashkey(protocol
, vaddr
, vport
);
375 list_for_each_entry(svc
, &ip_vs_svc_table
[hash
], s_list
){
376 if ((svc
->addr
== vaddr
)
377 && (svc
->port
== vport
)
378 && (svc
->protocol
== protocol
)) {
380 atomic_inc(&svc
->usecnt
);
390 * Get service by {fwmark} in the service table.
392 static __inline__
struct ip_vs_service
*__ip_vs_svc_fwm_get(__u32 fwmark
)
395 struct ip_vs_service
*svc
;
397 /* Check for fwmark addressed entries */
398 hash
= ip_vs_svc_fwm_hashkey(fwmark
);
400 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[hash
], f_list
) {
401 if (svc
->fwmark
== fwmark
) {
403 atomic_inc(&svc
->usecnt
);
411 struct ip_vs_service
*
412 ip_vs_service_get(__u32 fwmark
, __u16 protocol
, __u32 vaddr
, __u16 vport
)
414 struct ip_vs_service
*svc
;
416 read_lock(&__ip_vs_svc_lock
);
419 * Check the table hashed by fwmark first
421 if (fwmark
&& (svc
= __ip_vs_svc_fwm_get(fwmark
)))
425 * Check the table hashed by <protocol,addr,port>
426 * for "full" addressed entries
428 svc
= __ip_vs_service_get(protocol
, vaddr
, vport
);
431 && protocol
== IPPROTO_TCP
432 && atomic_read(&ip_vs_ftpsvc_counter
)
433 && (vport
== FTPDATA
|| ntohs(vport
) >= PROT_SOCK
)) {
435 * Check if ftp service entry exists, the packet
436 * might belong to FTP data connections.
438 svc
= __ip_vs_service_get(protocol
, vaddr
, FTPPORT
);
442 && atomic_read(&ip_vs_nullsvc_counter
)) {
444 * Check if the catch-all port (port zero) exists
446 svc
= __ip_vs_service_get(protocol
, vaddr
, 0);
450 read_unlock(&__ip_vs_svc_lock
);
452 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
453 fwmark
, ip_vs_proto_name(protocol
),
454 NIPQUAD(vaddr
), ntohs(vport
),
455 svc
?"hit":"not hit");
462 __ip_vs_bind_svc(struct ip_vs_dest
*dest
, struct ip_vs_service
*svc
)
464 atomic_inc(&svc
->refcnt
);
469 __ip_vs_unbind_svc(struct ip_vs_dest
*dest
)
471 struct ip_vs_service
*svc
= dest
->svc
;
474 if (atomic_dec_and_test(&svc
->refcnt
))
480 * Returns hash value for real service
482 static __inline__
unsigned ip_vs_rs_hashkey(__u32 addr
, __u16 port
)
484 register unsigned porth
= ntohs(port
);
486 return (ntohl(addr
)^(porth
>>IP_VS_RTAB_BITS
)^porth
)
491 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
492 * should be called with locked tables.
494 static int ip_vs_rs_hash(struct ip_vs_dest
*dest
)
498 if (!list_empty(&dest
->d_list
)) {
503 * Hash by proto,addr,port,
504 * which are the parameters of the real service.
506 hash
= ip_vs_rs_hashkey(dest
->addr
, dest
->port
);
507 list_add(&dest
->d_list
, &ip_vs_rtable
[hash
]);
513 * UNhashes ip_vs_dest from ip_vs_rtable.
514 * should be called with locked tables.
516 static int ip_vs_rs_unhash(struct ip_vs_dest
*dest
)
519 * Remove it from the ip_vs_rtable table.
521 if (!list_empty(&dest
->d_list
)) {
522 list_del(&dest
->d_list
);
523 INIT_LIST_HEAD(&dest
->d_list
);
530 * Lookup real service by <proto,addr,port> in the real service table.
533 ip_vs_lookup_real_service(__u16 protocol
, __u32 daddr
, __u16 dport
)
536 struct ip_vs_dest
*dest
;
539 * Check for "full" addressed entries
540 * Return the first found entry
542 hash
= ip_vs_rs_hashkey(daddr
, dport
);
544 read_lock(&__ip_vs_rs_lock
);
545 list_for_each_entry(dest
, &ip_vs_rtable
[hash
], d_list
) {
546 if ((dest
->addr
== daddr
)
547 && (dest
->port
== dport
)
548 && ((dest
->protocol
== protocol
) ||
551 read_unlock(&__ip_vs_rs_lock
);
555 read_unlock(&__ip_vs_rs_lock
);
561 * Lookup destination by {addr,port} in the given service
563 static struct ip_vs_dest
*
564 ip_vs_lookup_dest(struct ip_vs_service
*svc
, __u32 daddr
, __u16 dport
)
566 struct ip_vs_dest
*dest
;
569 * Find the destination for the given service
571 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
572 if ((dest
->addr
== daddr
) && (dest
->port
== dport
)) {
583 * Lookup dest by {svc,addr,port} in the destination trash.
584 * The destination trash is used to hold the destinations that are removed
585 * from the service table but are still referenced by some conn entries.
586 * The reason to add the destination trash is when the dest is temporary
587 * down (either by administrator or by monitor program), the dest can be
588 * picked back from the trash, the remaining connections to the dest can
589 * continue, and the counting information of the dest is also useful for
592 static struct ip_vs_dest
*
593 ip_vs_trash_get_dest(struct ip_vs_service
*svc
, __u32 daddr
, __u16 dport
)
595 struct ip_vs_dest
*dest
, *nxt
;
598 * Find the destination in trash
600 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
601 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
604 NIPQUAD(dest
->addr
), ntohs(dest
->port
),
605 atomic_read(&dest
->refcnt
));
606 if (dest
->addr
== daddr
&&
607 dest
->port
== dport
&&
608 dest
->vfwmark
== svc
->fwmark
&&
609 dest
->protocol
== svc
->protocol
&&
611 (dest
->vaddr
== svc
->addr
&&
612 dest
->vport
== svc
->port
))) {
618 * Try to purge the destination from trash if not referenced
620 if (atomic_read(&dest
->refcnt
) == 1) {
621 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
624 NIPQUAD(dest
->addr
), ntohs(dest
->port
));
625 list_del(&dest
->n_list
);
626 ip_vs_dst_reset(dest
);
627 __ip_vs_unbind_svc(dest
);
637 * Clean up all the destinations in the trash
638 * Called by the ip_vs_control_cleanup()
640 * When the ip_vs_control_clearup is activated by ipvs module exit,
641 * the service tables must have been flushed and all the connections
642 * are expired, and the refcnt of each destination in the trash must
643 * be 1, so we simply release them here.
645 static void ip_vs_trash_cleanup(void)
647 struct ip_vs_dest
*dest
, *nxt
;
649 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
650 list_del(&dest
->n_list
);
651 ip_vs_dst_reset(dest
);
652 __ip_vs_unbind_svc(dest
);
659 ip_vs_zero_stats(struct ip_vs_stats
*stats
)
661 spin_lock_bh(&stats
->lock
);
662 memset(stats
, 0, (char *)&stats
->lock
- (char *)stats
);
663 spin_unlock_bh(&stats
->lock
);
664 ip_vs_zero_estimator(stats
);
668 * Update a destination in the given service
671 __ip_vs_update_dest(struct ip_vs_service
*svc
,
672 struct ip_vs_dest
*dest
, struct ip_vs_dest_user
*udest
)
676 /* set the weight and the flags */
677 atomic_set(&dest
->weight
, udest
->weight
);
678 conn_flags
= udest
->conn_flags
| IP_VS_CONN_F_INACTIVE
;
680 /* check if local node and update the flags */
681 if (inet_addr_type(udest
->addr
) == RTN_LOCAL
) {
682 conn_flags
= (conn_flags
& ~IP_VS_CONN_F_FWD_MASK
)
683 | IP_VS_CONN_F_LOCALNODE
;
686 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
687 if ((conn_flags
& IP_VS_CONN_F_FWD_MASK
) != 0) {
688 conn_flags
|= IP_VS_CONN_F_NOOUTPUT
;
691 * Put the real service in ip_vs_rtable if not present.
692 * For now only for NAT!
694 write_lock_bh(&__ip_vs_rs_lock
);
696 write_unlock_bh(&__ip_vs_rs_lock
);
698 atomic_set(&dest
->conn_flags
, conn_flags
);
700 /* bind the service */
702 __ip_vs_bind_svc(dest
, svc
);
704 if (dest
->svc
!= svc
) {
705 __ip_vs_unbind_svc(dest
);
706 ip_vs_zero_stats(&dest
->stats
);
707 __ip_vs_bind_svc(dest
, svc
);
711 /* set the dest status flags */
712 dest
->flags
|= IP_VS_DEST_F_AVAILABLE
;
714 if (udest
->u_threshold
== 0 || udest
->u_threshold
> dest
->u_threshold
)
715 dest
->flags
&= ~IP_VS_DEST_F_OVERLOAD
;
716 dest
->u_threshold
= udest
->u_threshold
;
717 dest
->l_threshold
= udest
->l_threshold
;
722 * Create a destination for the given service
725 ip_vs_new_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user
*udest
,
726 struct ip_vs_dest
**dest_p
)
728 struct ip_vs_dest
*dest
;
733 atype
= inet_addr_type(udest
->addr
);
734 if (atype
!= RTN_LOCAL
&& atype
!= RTN_UNICAST
)
737 dest
= kmalloc(sizeof(struct ip_vs_dest
), GFP_ATOMIC
);
739 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
742 memset(dest
, 0, sizeof(struct ip_vs_dest
));
744 dest
->protocol
= svc
->protocol
;
745 dest
->vaddr
= svc
->addr
;
746 dest
->vport
= svc
->port
;
747 dest
->vfwmark
= svc
->fwmark
;
748 dest
->addr
= udest
->addr
;
749 dest
->port
= udest
->port
;
751 atomic_set(&dest
->activeconns
, 0);
752 atomic_set(&dest
->inactconns
, 0);
753 atomic_set(&dest
->persistconns
, 0);
754 atomic_set(&dest
->refcnt
, 0);
756 INIT_LIST_HEAD(&dest
->d_list
);
757 spin_lock_init(&dest
->dst_lock
);
758 spin_lock_init(&dest
->stats
.lock
);
759 __ip_vs_update_dest(svc
, dest
, udest
);
760 ip_vs_new_estimator(&dest
->stats
);
770 * Add a destination into an existing service
773 ip_vs_add_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user
*udest
)
775 struct ip_vs_dest
*dest
;
776 __u32 daddr
= udest
->addr
;
777 __u16 dport
= udest
->port
;
782 if (udest
->weight
< 0) {
783 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
787 if (udest
->l_threshold
> udest
->u_threshold
) {
788 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
789 "upper threshold\n");
794 * Check if the dest already exists in the list
796 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
798 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
803 * Check if the dest already exists in the trash and
804 * is from the same service
806 dest
= ip_vs_trash_get_dest(svc
, daddr
, dport
);
808 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
809 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
810 NIPQUAD(daddr
), ntohs(dport
),
811 atomic_read(&dest
->refcnt
),
813 NIPQUAD(dest
->vaddr
),
815 __ip_vs_update_dest(svc
, dest
, udest
);
818 * Get the destination from the trash
820 list_del(&dest
->n_list
);
822 ip_vs_new_estimator(&dest
->stats
);
824 write_lock_bh(&__ip_vs_svc_lock
);
827 * Wait until all other svc users go away.
829 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
831 list_add(&dest
->n_list
, &svc
->destinations
);
834 /* call the update_service function of its scheduler */
835 svc
->scheduler
->update_service(svc
);
837 write_unlock_bh(&__ip_vs_svc_lock
);
842 * Allocate and initialize the dest structure
844 ret
= ip_vs_new_dest(svc
, udest
, &dest
);
850 * Add the dest entry into the list
852 atomic_inc(&dest
->refcnt
);
854 write_lock_bh(&__ip_vs_svc_lock
);
857 * Wait until all other svc users go away.
859 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
861 list_add(&dest
->n_list
, &svc
->destinations
);
864 /* call the update_service function of its scheduler */
865 svc
->scheduler
->update_service(svc
);
867 write_unlock_bh(&__ip_vs_svc_lock
);
876 * Edit a destination in the given service
879 ip_vs_edit_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user
*udest
)
881 struct ip_vs_dest
*dest
;
882 __u32 daddr
= udest
->addr
;
883 __u16 dport
= udest
->port
;
887 if (udest
->weight
< 0) {
888 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
892 if (udest
->l_threshold
> udest
->u_threshold
) {
893 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
894 "upper threshold\n");
899 * Lookup the destination list
901 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
903 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
907 __ip_vs_update_dest(svc
, dest
, udest
);
909 write_lock_bh(&__ip_vs_svc_lock
);
911 /* Wait until all other svc users go away */
912 while (atomic_read(&svc
->usecnt
) > 1) {};
914 /* call the update_service, because server weight may be changed */
915 svc
->scheduler
->update_service(svc
);
917 write_unlock_bh(&__ip_vs_svc_lock
);
926 * Delete a destination (must be already unlinked from the service)
928 static void __ip_vs_del_dest(struct ip_vs_dest
*dest
)
930 ip_vs_kill_estimator(&dest
->stats
);
933 * Remove it from the d-linked list with the real services.
935 write_lock_bh(&__ip_vs_rs_lock
);
936 ip_vs_rs_unhash(dest
);
937 write_unlock_bh(&__ip_vs_rs_lock
);
940 * Decrease the refcnt of the dest, and free the dest
941 * if nobody refers to it (refcnt=0). Otherwise, throw
942 * the destination into the trash.
944 if (atomic_dec_and_test(&dest
->refcnt
)) {
945 ip_vs_dst_reset(dest
);
946 /* simply decrease svc->refcnt here, let the caller check
947 and release the service if nobody refers to it.
948 Only user context can release destination and service,
949 and only one user context can update virtual service at a
950 time, so the operation here is OK */
951 atomic_dec(&dest
->svc
->refcnt
);
954 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
956 NIPQUAD(dest
->addr
), ntohs(dest
->port
),
957 atomic_read(&dest
->refcnt
));
958 list_add(&dest
->n_list
, &ip_vs_dest_trash
);
959 atomic_inc(&dest
->refcnt
);
965 * Unlink a destination from the given service
967 static void __ip_vs_unlink_dest(struct ip_vs_service
*svc
,
968 struct ip_vs_dest
*dest
,
971 dest
->flags
&= ~IP_VS_DEST_F_AVAILABLE
;
974 * Remove it from the d-linked destination list.
976 list_del(&dest
->n_list
);
980 * Call the update_service function of its scheduler
982 svc
->scheduler
->update_service(svc
);
988 * Delete a destination server in the given service
991 ip_vs_del_dest(struct ip_vs_service
*svc
,struct ip_vs_dest_user
*udest
)
993 struct ip_vs_dest
*dest
;
994 __u32 daddr
= udest
->addr
;
995 __u16 dport
= udest
->port
;
999 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
1001 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1005 write_lock_bh(&__ip_vs_svc_lock
);
1008 * Wait until all other svc users go away.
1010 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1013 * Unlink dest from the service
1015 __ip_vs_unlink_dest(svc
, dest
, 1);
1017 write_unlock_bh(&__ip_vs_svc_lock
);
1020 * Delete the destination
1022 __ip_vs_del_dest(dest
);
1031 * Add a service into the service hash table
1034 ip_vs_add_service(struct ip_vs_service_user
*u
, struct ip_vs_service
**svc_p
)
1037 struct ip_vs_scheduler
*sched
= NULL
;
1038 struct ip_vs_service
*svc
= NULL
;
1040 /* increase the module use count */
1041 ip_vs_use_count_inc();
1043 /* Lookup the scheduler by 'u->sched_name' */
1044 sched
= ip_vs_scheduler_get(u
->sched_name
);
1045 if (sched
== NULL
) {
1046 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1052 svc
= (struct ip_vs_service
*)
1053 kmalloc(sizeof(struct ip_vs_service
), GFP_ATOMIC
);
1055 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1059 memset(svc
, 0, sizeof(struct ip_vs_service
));
1061 /* I'm the first user of the service */
1062 atomic_set(&svc
->usecnt
, 1);
1063 atomic_set(&svc
->refcnt
, 0);
1065 svc
->protocol
= u
->protocol
;
1066 svc
->addr
= u
->addr
;
1067 svc
->port
= u
->port
;
1068 svc
->fwmark
= u
->fwmark
;
1069 svc
->flags
= u
->flags
;
1070 svc
->timeout
= u
->timeout
* HZ
;
1071 svc
->netmask
= u
->netmask
;
1073 INIT_LIST_HEAD(&svc
->destinations
);
1074 rwlock_init(&svc
->sched_lock
);
1075 spin_lock_init(&svc
->stats
.lock
);
1077 /* Bind the scheduler */
1078 ret
= ip_vs_bind_scheduler(svc
, sched
);
1083 /* Update the virtual service counters */
1084 if (svc
->port
== FTPPORT
)
1085 atomic_inc(&ip_vs_ftpsvc_counter
);
1086 else if (svc
->port
== 0)
1087 atomic_inc(&ip_vs_nullsvc_counter
);
1089 ip_vs_new_estimator(&svc
->stats
);
1090 ip_vs_num_services
++;
1092 /* Hash the service into the service table */
1093 write_lock_bh(&__ip_vs_svc_lock
);
1094 ip_vs_svc_hash(svc
);
1095 write_unlock_bh(&__ip_vs_svc_lock
);
1103 ip_vs_unbind_scheduler(svc
);
1106 ip_vs_app_inc_put(svc
->inc
);
1111 ip_vs_scheduler_put(sched
);
1114 /* decrease the module use count */
1115 ip_vs_use_count_dec();
1122 * Edit a service and bind it with a new scheduler
1125 ip_vs_edit_service(struct ip_vs_service
*svc
, struct ip_vs_service_user
*u
)
1127 struct ip_vs_scheduler
*sched
, *old_sched
;
1131 * Lookup the scheduler, by 'u->sched_name'
1133 sched
= ip_vs_scheduler_get(u
->sched_name
);
1134 if (sched
== NULL
) {
1135 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1141 write_lock_bh(&__ip_vs_svc_lock
);
1144 * Wait until all other svc users go away.
1146 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1149 * Set the flags and timeout value
1151 svc
->flags
= u
->flags
| IP_VS_SVC_F_HASHED
;
1152 svc
->timeout
= u
->timeout
* HZ
;
1153 svc
->netmask
= u
->netmask
;
1155 old_sched
= svc
->scheduler
;
1156 if (sched
!= old_sched
) {
1158 * Unbind the old scheduler
1160 if ((ret
= ip_vs_unbind_scheduler(svc
))) {
1166 * Bind the new scheduler
1168 if ((ret
= ip_vs_bind_scheduler(svc
, sched
))) {
1170 * If ip_vs_bind_scheduler fails, restore the old
1172 * The main reason of failure is out of memory.
1174 * The question is if the old scheduler can be
1175 * restored all the time. TODO: if it cannot be
1176 * restored some time, we must delete the service,
1177 * otherwise the system may crash.
1179 ip_vs_bind_scheduler(svc
, old_sched
);
1186 write_unlock_bh(&__ip_vs_svc_lock
);
1189 ip_vs_scheduler_put(old_sched
);
1196 * Delete a service from the service list
1197 * - The service must be unlinked, unlocked and not referenced!
1198 * - We are called under _bh lock
1200 static void __ip_vs_del_service(struct ip_vs_service
*svc
)
1202 struct ip_vs_dest
*dest
, *nxt
;
1203 struct ip_vs_scheduler
*old_sched
;
1205 ip_vs_num_services
--;
1206 ip_vs_kill_estimator(&svc
->stats
);
1208 /* Unbind scheduler */
1209 old_sched
= svc
->scheduler
;
1210 ip_vs_unbind_scheduler(svc
);
1212 ip_vs_scheduler_put(old_sched
);
1214 /* Unbind app inc */
1216 ip_vs_app_inc_put(svc
->inc
);
1221 * Unlink the whole destination list
1223 list_for_each_entry_safe(dest
, nxt
, &svc
->destinations
, n_list
) {
1224 __ip_vs_unlink_dest(svc
, dest
, 0);
1225 __ip_vs_del_dest(dest
);
1229 * Update the virtual service counters
1231 if (svc
->port
== FTPPORT
)
1232 atomic_dec(&ip_vs_ftpsvc_counter
);
1233 else if (svc
->port
== 0)
1234 atomic_dec(&ip_vs_nullsvc_counter
);
1237 * Free the service if nobody refers to it
1239 if (atomic_read(&svc
->refcnt
) == 0)
1242 /* decrease the module use count */
1243 ip_vs_use_count_dec();
1247 * Delete a service from the service list
1249 static int ip_vs_del_service(struct ip_vs_service
*svc
)
1255 * Unhash it from the service table
1257 write_lock_bh(&__ip_vs_svc_lock
);
1259 ip_vs_svc_unhash(svc
);
1262 * Wait until all the svc users go away.
1264 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1266 __ip_vs_del_service(svc
);
1268 write_unlock_bh(&__ip_vs_svc_lock
);
1275 * Flush all the virtual services
1277 static int ip_vs_flush(void)
1280 struct ip_vs_service
*svc
, *nxt
;
1283 * Flush the service table hashed by <protocol,addr,port>
1285 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1286 list_for_each_entry_safe(svc
, nxt
, &ip_vs_svc_table
[idx
], s_list
) {
1287 write_lock_bh(&__ip_vs_svc_lock
);
1288 ip_vs_svc_unhash(svc
);
1290 * Wait until all the svc users go away.
1292 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1293 __ip_vs_del_service(svc
);
1294 write_unlock_bh(&__ip_vs_svc_lock
);
1299 * Flush the service table hashed by fwmark
1301 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1302 list_for_each_entry_safe(svc
, nxt
,
1303 &ip_vs_svc_fwm_table
[idx
], f_list
) {
1304 write_lock_bh(&__ip_vs_svc_lock
);
1305 ip_vs_svc_unhash(svc
);
1307 * Wait until all the svc users go away.
1309 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1310 __ip_vs_del_service(svc
);
1311 write_unlock_bh(&__ip_vs_svc_lock
);
1320 * Zero counters in a service or all services
1322 static int ip_vs_zero_service(struct ip_vs_service
*svc
)
1324 struct ip_vs_dest
*dest
;
1326 write_lock_bh(&__ip_vs_svc_lock
);
1327 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1328 ip_vs_zero_stats(&dest
->stats
);
1330 ip_vs_zero_stats(&svc
->stats
);
1331 write_unlock_bh(&__ip_vs_svc_lock
);
1335 static int ip_vs_zero_all(void)
1338 struct ip_vs_service
*svc
;
1340 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1341 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1342 ip_vs_zero_service(svc
);
1346 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1347 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1348 ip_vs_zero_service(svc
);
1352 ip_vs_zero_stats(&ip_vs_stats
);
1358 proc_do_defense_mode(ctl_table
*table
, int write
, struct file
* filp
,
1359 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1361 int *valp
= table
->data
;
1365 rc
= proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
1366 if (write
&& (*valp
!= val
)) {
1367 if ((*valp
< 0) || (*valp
> 3)) {
1368 /* Restore the correct value */
1371 update_defense_level();
1379 proc_do_sync_threshold(ctl_table
*table
, int write
, struct file
*filp
,
1380 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1382 int *valp
= table
->data
;
1386 /* backup the value first */
1387 memcpy(val
, valp
, sizeof(val
));
1389 rc
= proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
1390 if (write
&& (valp
[0] < 0 || valp
[1] < 0 || valp
[0] >= valp
[1])) {
1391 /* Restore the correct value */
1392 memcpy(valp
, val
, sizeof(val
));
1399 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1402 static struct ctl_table vs_vars
[] = {
1404 .ctl_name
= NET_IPV4_VS_AMEMTHRESH
,
1405 .procname
= "amemthresh",
1406 .data
= &sysctl_ip_vs_amemthresh
,
1407 .maxlen
= sizeof(int),
1409 .proc_handler
= &proc_dointvec
,
1411 #ifdef CONFIG_IP_VS_DEBUG
1413 .ctl_name
= NET_IPV4_VS_DEBUG_LEVEL
,
1414 .procname
= "debug_level",
1415 .data
= &sysctl_ip_vs_debug_level
,
1416 .maxlen
= sizeof(int),
1418 .proc_handler
= &proc_dointvec
,
1422 .ctl_name
= NET_IPV4_VS_AMDROPRATE
,
1423 .procname
= "am_droprate",
1424 .data
= &sysctl_ip_vs_am_droprate
,
1425 .maxlen
= sizeof(int),
1427 .proc_handler
= &proc_dointvec
,
1430 .ctl_name
= NET_IPV4_VS_DROP_ENTRY
,
1431 .procname
= "drop_entry",
1432 .data
= &sysctl_ip_vs_drop_entry
,
1433 .maxlen
= sizeof(int),
1435 .proc_handler
= &proc_do_defense_mode
,
1438 .ctl_name
= NET_IPV4_VS_DROP_PACKET
,
1439 .procname
= "drop_packet",
1440 .data
= &sysctl_ip_vs_drop_packet
,
1441 .maxlen
= sizeof(int),
1443 .proc_handler
= &proc_do_defense_mode
,
1446 .ctl_name
= NET_IPV4_VS_SECURE_TCP
,
1447 .procname
= "secure_tcp",
1448 .data
= &sysctl_ip_vs_secure_tcp
,
1449 .maxlen
= sizeof(int),
1451 .proc_handler
= &proc_do_defense_mode
,
1455 .ctl_name
= NET_IPV4_VS_TO_ES
,
1456 .procname
= "timeout_established",
1457 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ESTABLISHED
],
1458 .maxlen
= sizeof(int),
1460 .proc_handler
= &proc_dointvec_jiffies
,
1463 .ctl_name
= NET_IPV4_VS_TO_SS
,
1464 .procname
= "timeout_synsent",
1465 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_SENT
],
1466 .maxlen
= sizeof(int),
1468 .proc_handler
= &proc_dointvec_jiffies
,
1471 .ctl_name
= NET_IPV4_VS_TO_SR
,
1472 .procname
= "timeout_synrecv",
1473 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_RECV
],
1474 .maxlen
= sizeof(int),
1476 .proc_handler
= &proc_dointvec_jiffies
,
1479 .ctl_name
= NET_IPV4_VS_TO_FW
,
1480 .procname
= "timeout_finwait",
1481 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_FIN_WAIT
],
1482 .maxlen
= sizeof(int),
1484 .proc_handler
= &proc_dointvec_jiffies
,
1487 .ctl_name
= NET_IPV4_VS_TO_TW
,
1488 .procname
= "timeout_timewait",
1489 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_TIME_WAIT
],
1490 .maxlen
= sizeof(int),
1492 .proc_handler
= &proc_dointvec_jiffies
,
1495 .ctl_name
= NET_IPV4_VS_TO_CL
,
1496 .procname
= "timeout_close",
1497 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE
],
1498 .maxlen
= sizeof(int),
1500 .proc_handler
= &proc_dointvec_jiffies
,
1503 .ctl_name
= NET_IPV4_VS_TO_CW
,
1504 .procname
= "timeout_closewait",
1505 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE_WAIT
],
1506 .maxlen
= sizeof(int),
1508 .proc_handler
= &proc_dointvec_jiffies
,
1511 .ctl_name
= NET_IPV4_VS_TO_LA
,
1512 .procname
= "timeout_lastack",
1513 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LAST_ACK
],
1514 .maxlen
= sizeof(int),
1516 .proc_handler
= &proc_dointvec_jiffies
,
1519 .ctl_name
= NET_IPV4_VS_TO_LI
,
1520 .procname
= "timeout_listen",
1521 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LISTEN
],
1522 .maxlen
= sizeof(int),
1524 .proc_handler
= &proc_dointvec_jiffies
,
1527 .ctl_name
= NET_IPV4_VS_TO_SA
,
1528 .procname
= "timeout_synack",
1529 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYNACK
],
1530 .maxlen
= sizeof(int),
1532 .proc_handler
= &proc_dointvec_jiffies
,
1535 .ctl_name
= NET_IPV4_VS_TO_UDP
,
1536 .procname
= "timeout_udp",
1537 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_UDP
],
1538 .maxlen
= sizeof(int),
1540 .proc_handler
= &proc_dointvec_jiffies
,
1543 .ctl_name
= NET_IPV4_VS_TO_ICMP
,
1544 .procname
= "timeout_icmp",
1545 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ICMP
],
1546 .maxlen
= sizeof(int),
1548 .proc_handler
= &proc_dointvec_jiffies
,
1552 .ctl_name
= NET_IPV4_VS_CACHE_BYPASS
,
1553 .procname
= "cache_bypass",
1554 .data
= &sysctl_ip_vs_cache_bypass
,
1555 .maxlen
= sizeof(int),
1557 .proc_handler
= &proc_dointvec
,
1560 .ctl_name
= NET_IPV4_VS_EXPIRE_NODEST_CONN
,
1561 .procname
= "expire_nodest_conn",
1562 .data
= &sysctl_ip_vs_expire_nodest_conn
,
1563 .maxlen
= sizeof(int),
1565 .proc_handler
= &proc_dointvec
,
1568 .ctl_name
= NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE
,
1569 .procname
= "expire_quiescent_template",
1570 .data
= &sysctl_ip_vs_expire_quiescent_template
,
1571 .maxlen
= sizeof(int),
1573 .proc_handler
= &proc_dointvec
,
1576 .ctl_name
= NET_IPV4_VS_SYNC_THRESHOLD
,
1577 .procname
= "sync_threshold",
1578 .data
= &sysctl_ip_vs_sync_threshold
,
1579 .maxlen
= sizeof(sysctl_ip_vs_sync_threshold
),
1581 .proc_handler
= &proc_do_sync_threshold
,
1584 .ctl_name
= NET_IPV4_VS_NAT_ICMP_SEND
,
1585 .procname
= "nat_icmp_send",
1586 .data
= &sysctl_ip_vs_nat_icmp_send
,
1587 .maxlen
= sizeof(int),
1589 .proc_handler
= &proc_dointvec
,
1594 static ctl_table vs_table
[] = {
1596 .ctl_name
= NET_IPV4_VS
,
1604 static ctl_table ipvs_ipv4_table
[] = {
1606 .ctl_name
= NET_IPV4
,
1614 static ctl_table vs_root_table
[] = {
1616 .ctl_name
= CTL_NET
,
1619 .child
= ipvs_ipv4_table
,
1624 static struct ctl_table_header
* sysctl_header
;
1626 #ifdef CONFIG_PROC_FS
1629 struct list_head
*table
;
1634 * Write the contents of the VS rule table to a PROCfs file.
1635 * (It is kept just for backward compatibility)
1637 static inline const char *ip_vs_fwd_name(unsigned flags
)
1639 switch (flags
& IP_VS_CONN_F_FWD_MASK
) {
1640 case IP_VS_CONN_F_LOCALNODE
:
1642 case IP_VS_CONN_F_TUNNEL
:
1644 case IP_VS_CONN_F_DROUTE
:
1652 /* Get the Nth entry in the two lists */
1653 static struct ip_vs_service
*ip_vs_info_array(struct seq_file
*seq
, loff_t pos
)
1655 struct ip_vs_iter
*iter
= seq
->private;
1657 struct ip_vs_service
*svc
;
1659 /* look in hash by protocol */
1660 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1661 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1663 iter
->table
= ip_vs_svc_table
;
1670 /* keep looking in fwmark */
1671 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1672 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1674 iter
->table
= ip_vs_svc_fwm_table
;
1684 static void *ip_vs_info_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1687 read_lock_bh(&__ip_vs_svc_lock
);
1688 return *pos
? ip_vs_info_array(seq
, *pos
- 1) : SEQ_START_TOKEN
;
1692 static void *ip_vs_info_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1694 struct list_head
*e
;
1695 struct ip_vs_iter
*iter
;
1696 struct ip_vs_service
*svc
;
1699 if (v
== SEQ_START_TOKEN
)
1700 return ip_vs_info_array(seq
,0);
1703 iter
= seq
->private;
1705 if (iter
->table
== ip_vs_svc_table
) {
1706 /* next service in table hashed by protocol */
1707 if ((e
= svc
->s_list
.next
) != &ip_vs_svc_table
[iter
->bucket
])
1708 return list_entry(e
, struct ip_vs_service
, s_list
);
1711 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1712 list_for_each_entry(svc
,&ip_vs_svc_table
[iter
->bucket
],
1718 iter
->table
= ip_vs_svc_fwm_table
;
1723 /* next service in hashed by fwmark */
1724 if ((e
= svc
->f_list
.next
) != &ip_vs_svc_fwm_table
[iter
->bucket
])
1725 return list_entry(e
, struct ip_vs_service
, f_list
);
1728 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1729 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[iter
->bucket
],
1737 static void ip_vs_info_seq_stop(struct seq_file
*seq
, void *v
)
1739 read_unlock_bh(&__ip_vs_svc_lock
);
1743 static int ip_vs_info_seq_show(struct seq_file
*seq
, void *v
)
1745 if (v
== SEQ_START_TOKEN
) {
1747 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1748 NVERSION(IP_VS_VERSION_CODE
), IP_VS_CONN_TAB_SIZE
);
1750 "Prot LocalAddress:Port Scheduler Flags\n");
1752 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1754 const struct ip_vs_service
*svc
= v
;
1755 const struct ip_vs_iter
*iter
= seq
->private;
1756 const struct ip_vs_dest
*dest
;
1758 if (iter
->table
== ip_vs_svc_table
)
1759 seq_printf(seq
, "%s %08X:%04X %s ",
1760 ip_vs_proto_name(svc
->protocol
),
1763 svc
->scheduler
->name
);
1765 seq_printf(seq
, "FWM %08X %s ",
1766 svc
->fwmark
, svc
->scheduler
->name
);
1768 if (svc
->flags
& IP_VS_SVC_F_PERSISTENT
)
1769 seq_printf(seq
, "persistent %d %08X\n",
1771 ntohl(svc
->netmask
));
1773 seq_putc(seq
, '\n');
1775 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1777 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1778 ntohl(dest
->addr
), ntohs(dest
->port
),
1779 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
1780 atomic_read(&dest
->weight
),
1781 atomic_read(&dest
->activeconns
),
1782 atomic_read(&dest
->inactconns
));
1788 static struct seq_operations ip_vs_info_seq_ops
= {
1789 .start
= ip_vs_info_seq_start
,
1790 .next
= ip_vs_info_seq_next
,
1791 .stop
= ip_vs_info_seq_stop
,
1792 .show
= ip_vs_info_seq_show
,
1795 static int ip_vs_info_open(struct inode
*inode
, struct file
*file
)
1797 struct seq_file
*seq
;
1799 struct ip_vs_iter
*s
= kmalloc(sizeof(*s
), GFP_KERNEL
);
1804 rc
= seq_open(file
, &ip_vs_info_seq_ops
);
1808 seq
= file
->private_data
;
1810 memset(s
, 0, sizeof(*s
));
1818 static struct file_operations ip_vs_info_fops
= {
1819 .owner
= THIS_MODULE
,
1820 .open
= ip_vs_info_open
,
1822 .llseek
= seq_lseek
,
1823 .release
= seq_release_private
,
1828 struct ip_vs_stats ip_vs_stats
;
1830 #ifdef CONFIG_PROC_FS
1831 static int ip_vs_stats_show(struct seq_file
*seq
, void *v
)
1834 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1836 " Total Incoming Outgoing Incoming Outgoing\n");
1838 " Conns Packets Packets Bytes Bytes\n");
1840 spin_lock_bh(&ip_vs_stats
.lock
);
1841 seq_printf(seq
, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats
.conns
,
1842 ip_vs_stats
.inpkts
, ip_vs_stats
.outpkts
,
1843 (unsigned long long) ip_vs_stats
.inbytes
,
1844 (unsigned long long) ip_vs_stats
.outbytes
);
1846 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1848 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1849 seq_printf(seq
,"%8X %8X %8X %16X %16X\n",
1854 ip_vs_stats
.outbps
);
1855 spin_unlock_bh(&ip_vs_stats
.lock
);
1860 static int ip_vs_stats_seq_open(struct inode
*inode
, struct file
*file
)
1862 return single_open(file
, ip_vs_stats_show
, NULL
);
1865 static struct file_operations ip_vs_stats_fops
= {
1866 .owner
= THIS_MODULE
,
1867 .open
= ip_vs_stats_seq_open
,
1869 .llseek
= seq_lseek
,
1870 .release
= single_release
,
1876 * Set timeout values for tcp tcpfin udp in the timeout_table.
1878 static int ip_vs_set_timeout(struct ip_vs_timeout_user
*u
)
1880 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1885 #ifdef CONFIG_IP_VS_PROTO_TCP
1886 if (u
->tcp_timeout
) {
1887 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_ESTABLISHED
]
1888 = u
->tcp_timeout
* HZ
;
1891 if (u
->tcp_fin_timeout
) {
1892 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_FIN_WAIT
]
1893 = u
->tcp_fin_timeout
* HZ
;
1897 #ifdef CONFIG_IP_VS_PROTO_UDP
1898 if (u
->udp_timeout
) {
1899 ip_vs_protocol_udp
.timeout_table
[IP_VS_UDP_S_NORMAL
]
1900 = u
->udp_timeout
* HZ
;
1907 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1908 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1909 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1910 sizeof(struct ip_vs_dest_user))
1911 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1912 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1913 #define MAX_ARG_LEN SVCDEST_ARG_LEN
1915 static const unsigned char set_arglen
[SET_CMDID(IP_VS_SO_SET_MAX
)+1] = {
1916 [SET_CMDID(IP_VS_SO_SET_ADD
)] = SERVICE_ARG_LEN
,
1917 [SET_CMDID(IP_VS_SO_SET_EDIT
)] = SERVICE_ARG_LEN
,
1918 [SET_CMDID(IP_VS_SO_SET_DEL
)] = SERVICE_ARG_LEN
,
1919 [SET_CMDID(IP_VS_SO_SET_FLUSH
)] = 0,
1920 [SET_CMDID(IP_VS_SO_SET_ADDDEST
)] = SVCDEST_ARG_LEN
,
1921 [SET_CMDID(IP_VS_SO_SET_DELDEST
)] = SVCDEST_ARG_LEN
,
1922 [SET_CMDID(IP_VS_SO_SET_EDITDEST
)] = SVCDEST_ARG_LEN
,
1923 [SET_CMDID(IP_VS_SO_SET_TIMEOUT
)] = TIMEOUT_ARG_LEN
,
1924 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON
)] = DAEMON_ARG_LEN
,
1925 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON
)] = DAEMON_ARG_LEN
,
1926 [SET_CMDID(IP_VS_SO_SET_ZERO
)] = SERVICE_ARG_LEN
,
1930 do_ip_vs_set_ctl(struct sock
*sk
, int cmd
, void __user
*user
, unsigned int len
)
1933 unsigned char arg
[MAX_ARG_LEN
];
1934 struct ip_vs_service_user
*usvc
;
1935 struct ip_vs_service
*svc
;
1936 struct ip_vs_dest_user
*udest
;
1938 if (!capable(CAP_NET_ADMIN
))
1941 if (len
!= set_arglen
[SET_CMDID(cmd
)]) {
1942 IP_VS_ERR("set_ctl: len %u != %u\n",
1943 len
, set_arglen
[SET_CMDID(cmd
)]);
1947 if (copy_from_user(arg
, user
, len
) != 0)
1950 /* increase the module use count */
1951 ip_vs_use_count_inc();
1953 if (down_interruptible(&__ip_vs_mutex
)) {
1958 if (cmd
== IP_VS_SO_SET_FLUSH
) {
1959 /* Flush the virtual service */
1960 ret
= ip_vs_flush();
1962 } else if (cmd
== IP_VS_SO_SET_TIMEOUT
) {
1963 /* Set timeout values for (tcp tcpfin udp) */
1964 ret
= ip_vs_set_timeout((struct ip_vs_timeout_user
*)arg
);
1966 } else if (cmd
== IP_VS_SO_SET_STARTDAEMON
) {
1967 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
1968 ret
= start_sync_thread(dm
->state
, dm
->mcast_ifn
, dm
->syncid
);
1970 } else if (cmd
== IP_VS_SO_SET_STOPDAEMON
) {
1971 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
1972 ret
= stop_sync_thread(dm
->state
);
1976 usvc
= (struct ip_vs_service_user
*)arg
;
1977 udest
= (struct ip_vs_dest_user
*)(usvc
+ 1);
1979 if (cmd
== IP_VS_SO_SET_ZERO
) {
1980 /* if no service address is set, zero counters in all */
1981 if (!usvc
->fwmark
&& !usvc
->addr
&& !usvc
->port
) {
1982 ret
= ip_vs_zero_all();
1987 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1988 if (usvc
->protocol
!=IPPROTO_TCP
&& usvc
->protocol
!=IPPROTO_UDP
) {
1989 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1990 usvc
->protocol
, NIPQUAD(usvc
->addr
),
1991 ntohs(usvc
->port
), usvc
->sched_name
);
1996 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1997 if (usvc
->fwmark
== 0)
1998 svc
= __ip_vs_service_get(usvc
->protocol
,
1999 usvc
->addr
, usvc
->port
);
2001 svc
= __ip_vs_svc_fwm_get(usvc
->fwmark
);
2003 if (cmd
!= IP_VS_SO_SET_ADD
2004 && (svc
== NULL
|| svc
->protocol
!= usvc
->protocol
)) {
2010 case IP_VS_SO_SET_ADD
:
2014 ret
= ip_vs_add_service(usvc
, &svc
);
2016 case IP_VS_SO_SET_EDIT
:
2017 ret
= ip_vs_edit_service(svc
, usvc
);
2019 case IP_VS_SO_SET_DEL
:
2020 ret
= ip_vs_del_service(svc
);
2024 case IP_VS_SO_SET_ZERO
:
2025 ret
= ip_vs_zero_service(svc
);
2027 case IP_VS_SO_SET_ADDDEST
:
2028 ret
= ip_vs_add_dest(svc
, udest
);
2030 case IP_VS_SO_SET_EDITDEST
:
2031 ret
= ip_vs_edit_dest(svc
, udest
);
2033 case IP_VS_SO_SET_DELDEST
:
2034 ret
= ip_vs_del_dest(svc
, udest
);
2041 ip_vs_service_put(svc
);
2046 /* decrease the module use count */
2047 ip_vs_use_count_dec();
2054 ip_vs_copy_stats(struct ip_vs_stats_user
*dst
, struct ip_vs_stats
*src
)
2056 spin_lock_bh(&src
->lock
);
2057 memcpy(dst
, src
, (char*)&src
->lock
- (char*)src
);
2058 spin_unlock_bh(&src
->lock
);
2062 ip_vs_copy_service(struct ip_vs_service_entry
*dst
, struct ip_vs_service
*src
)
2064 dst
->protocol
= src
->protocol
;
2065 dst
->addr
= src
->addr
;
2066 dst
->port
= src
->port
;
2067 dst
->fwmark
= src
->fwmark
;
2068 strlcpy(dst
->sched_name
, src
->scheduler
->name
, sizeof(dst
->sched_name
));
2069 dst
->flags
= src
->flags
;
2070 dst
->timeout
= src
->timeout
/ HZ
;
2071 dst
->netmask
= src
->netmask
;
2072 dst
->num_dests
= src
->num_dests
;
2073 ip_vs_copy_stats(&dst
->stats
, &src
->stats
);
2077 __ip_vs_get_service_entries(const struct ip_vs_get_services
*get
,
2078 struct ip_vs_get_services __user
*uptr
)
2081 struct ip_vs_service
*svc
;
2082 struct ip_vs_service_entry entry
;
2085 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2086 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
2087 if (count
>= get
->num_services
)
2089 memset(&entry
, 0, sizeof(entry
));
2090 ip_vs_copy_service(&entry
, svc
);
2091 if (copy_to_user(&uptr
->entrytable
[count
],
2092 &entry
, sizeof(entry
))) {
2100 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2101 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
2102 if (count
>= get
->num_services
)
2104 memset(&entry
, 0, sizeof(entry
));
2105 ip_vs_copy_service(&entry
, svc
);
2106 if (copy_to_user(&uptr
->entrytable
[count
],
2107 &entry
, sizeof(entry
))) {
2119 __ip_vs_get_dest_entries(const struct ip_vs_get_dests
*get
,
2120 struct ip_vs_get_dests __user
*uptr
)
2122 struct ip_vs_service
*svc
;
2126 svc
= __ip_vs_svc_fwm_get(get
->fwmark
);
2128 svc
= __ip_vs_service_get(get
->protocol
,
2129 get
->addr
, get
->port
);
2132 struct ip_vs_dest
*dest
;
2133 struct ip_vs_dest_entry entry
;
2135 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2136 if (count
>= get
->num_dests
)
2139 entry
.addr
= dest
->addr
;
2140 entry
.port
= dest
->port
;
2141 entry
.conn_flags
= atomic_read(&dest
->conn_flags
);
2142 entry
.weight
= atomic_read(&dest
->weight
);
2143 entry
.u_threshold
= dest
->u_threshold
;
2144 entry
.l_threshold
= dest
->l_threshold
;
2145 entry
.activeconns
= atomic_read(&dest
->activeconns
);
2146 entry
.inactconns
= atomic_read(&dest
->inactconns
);
2147 entry
.persistconns
= atomic_read(&dest
->persistconns
);
2148 ip_vs_copy_stats(&entry
.stats
, &dest
->stats
);
2149 if (copy_to_user(&uptr
->entrytable
[count
],
2150 &entry
, sizeof(entry
))) {
2156 ip_vs_service_put(svc
);
2163 __ip_vs_get_timeouts(struct ip_vs_timeout_user
*u
)
2165 #ifdef CONFIG_IP_VS_PROTO_TCP
2167 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_ESTABLISHED
] / HZ
;
2168 u
->tcp_fin_timeout
=
2169 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_FIN_WAIT
] / HZ
;
2171 #ifdef CONFIG_IP_VS_PROTO_UDP
2173 ip_vs_protocol_udp
.timeout_table
[IP_VS_UDP_S_NORMAL
] / HZ
;
2178 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2179 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2180 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2181 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2182 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2183 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2184 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2186 static const unsigned char get_arglen
[GET_CMDID(IP_VS_SO_GET_MAX
)+1] = {
2187 [GET_CMDID(IP_VS_SO_GET_VERSION
)] = 64,
2188 [GET_CMDID(IP_VS_SO_GET_INFO
)] = GET_INFO_ARG_LEN
,
2189 [GET_CMDID(IP_VS_SO_GET_SERVICES
)] = GET_SERVICES_ARG_LEN
,
2190 [GET_CMDID(IP_VS_SO_GET_SERVICE
)] = GET_SERVICE_ARG_LEN
,
2191 [GET_CMDID(IP_VS_SO_GET_DESTS
)] = GET_DESTS_ARG_LEN
,
2192 [GET_CMDID(IP_VS_SO_GET_TIMEOUT
)] = GET_TIMEOUT_ARG_LEN
,
2193 [GET_CMDID(IP_VS_SO_GET_DAEMON
)] = GET_DAEMON_ARG_LEN
,
2197 do_ip_vs_get_ctl(struct sock
*sk
, int cmd
, void __user
*user
, int *len
)
2199 unsigned char arg
[128];
2202 if (!capable(CAP_NET_ADMIN
))
2205 if (*len
< get_arglen
[GET_CMDID(cmd
)]) {
2206 IP_VS_ERR("get_ctl: len %u < %u\n",
2207 *len
, get_arglen
[GET_CMDID(cmd
)]);
2211 if (copy_from_user(arg
, user
, get_arglen
[GET_CMDID(cmd
)]) != 0)
2214 if (down_interruptible(&__ip_vs_mutex
))
2215 return -ERESTARTSYS
;
2218 case IP_VS_SO_GET_VERSION
:
2222 sprintf(buf
, "IP Virtual Server version %d.%d.%d (size=%d)",
2223 NVERSION(IP_VS_VERSION_CODE
), IP_VS_CONN_TAB_SIZE
);
2224 if (copy_to_user(user
, buf
, strlen(buf
)+1) != 0) {
2228 *len
= strlen(buf
)+1;
2232 case IP_VS_SO_GET_INFO
:
2234 struct ip_vs_getinfo info
;
2235 info
.version
= IP_VS_VERSION_CODE
;
2236 info
.size
= IP_VS_CONN_TAB_SIZE
;
2237 info
.num_services
= ip_vs_num_services
;
2238 if (copy_to_user(user
, &info
, sizeof(info
)) != 0)
2243 case IP_VS_SO_GET_SERVICES
:
2245 struct ip_vs_get_services
*get
;
2248 get
= (struct ip_vs_get_services
*)arg
;
2249 size
= sizeof(*get
) +
2250 sizeof(struct ip_vs_service_entry
) * get
->num_services
;
2252 IP_VS_ERR("length: %u != %u\n", *len
, size
);
2256 ret
= __ip_vs_get_service_entries(get
, user
);
2260 case IP_VS_SO_GET_SERVICE
:
2262 struct ip_vs_service_entry
*entry
;
2263 struct ip_vs_service
*svc
;
2265 entry
= (struct ip_vs_service_entry
*)arg
;
2267 svc
= __ip_vs_svc_fwm_get(entry
->fwmark
);
2269 svc
= __ip_vs_service_get(entry
->protocol
,
2270 entry
->addr
, entry
->port
);
2272 ip_vs_copy_service(entry
, svc
);
2273 if (copy_to_user(user
, entry
, sizeof(*entry
)) != 0)
2275 ip_vs_service_put(svc
);
2281 case IP_VS_SO_GET_DESTS
:
2283 struct ip_vs_get_dests
*get
;
2286 get
= (struct ip_vs_get_dests
*)arg
;
2287 size
= sizeof(*get
) +
2288 sizeof(struct ip_vs_dest_entry
) * get
->num_dests
;
2290 IP_VS_ERR("length: %u != %u\n", *len
, size
);
2294 ret
= __ip_vs_get_dest_entries(get
, user
);
2298 case IP_VS_SO_GET_TIMEOUT
:
2300 struct ip_vs_timeout_user t
;
2302 __ip_vs_get_timeouts(&t
);
2303 if (copy_to_user(user
, &t
, sizeof(t
)) != 0)
2308 case IP_VS_SO_GET_DAEMON
:
2310 struct ip_vs_daemon_user d
[2];
2312 memset(&d
, 0, sizeof(d
));
2313 if (ip_vs_sync_state
& IP_VS_STATE_MASTER
) {
2314 d
[0].state
= IP_VS_STATE_MASTER
;
2315 strlcpy(d
[0].mcast_ifn
, ip_vs_master_mcast_ifn
, sizeof(d
[0].mcast_ifn
));
2316 d
[0].syncid
= ip_vs_master_syncid
;
2318 if (ip_vs_sync_state
& IP_VS_STATE_BACKUP
) {
2319 d
[1].state
= IP_VS_STATE_BACKUP
;
2320 strlcpy(d
[1].mcast_ifn
, ip_vs_backup_mcast_ifn
, sizeof(d
[1].mcast_ifn
));
2321 d
[1].syncid
= ip_vs_backup_syncid
;
2323 if (copy_to_user(user
, &d
, sizeof(d
)) != 0)
2338 static struct nf_sockopt_ops ip_vs_sockopts
= {
2340 .set_optmin
= IP_VS_BASE_CTL
,
2341 .set_optmax
= IP_VS_SO_SET_MAX
+1,
2342 .set
= do_ip_vs_set_ctl
,
2343 .get_optmin
= IP_VS_BASE_CTL
,
2344 .get_optmax
= IP_VS_SO_GET_MAX
+1,
2345 .get
= do_ip_vs_get_ctl
,
2349 int ip_vs_control_init(void)
2356 ret
= nf_register_sockopt(&ip_vs_sockopts
);
2358 IP_VS_ERR("cannot register sockopt.\n");
2362 proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops
);
2363 proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops
);
2365 sysctl_header
= register_sysctl_table(vs_root_table
, 0);
2367 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2368 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2369 INIT_LIST_HEAD(&ip_vs_svc_table
[idx
]);
2370 INIT_LIST_HEAD(&ip_vs_svc_fwm_table
[idx
]);
2372 for(idx
= 0; idx
< IP_VS_RTAB_SIZE
; idx
++) {
2373 INIT_LIST_HEAD(&ip_vs_rtable
[idx
]);
2376 memset(&ip_vs_stats
, 0, sizeof(ip_vs_stats
));
2377 spin_lock_init(&ip_vs_stats
.lock
);
2378 ip_vs_new_estimator(&ip_vs_stats
);
2380 /* Hook the defense timer */
2381 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
2388 void ip_vs_control_cleanup(void)
2391 ip_vs_trash_cleanup();
2392 cancel_rearming_delayed_work(&defense_work
);
2393 ip_vs_kill_estimator(&ip_vs_stats
);
2394 unregister_sysctl_table(sysctl_header
);
2395 proc_net_remove("ip_vs_stats");
2396 proc_net_remove("ip_vs");
2397 nf_unregister_sockopt(&ip_vs_sockopts
);