2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
12 * ip_vs_sync: sync connection info from master load balancer to backups
16 * Alexandre Cassen : Added master & backup support at a time.
17 * Alexandre Cassen : Added SyncID support for incoming sync
19 * Justin Ossevoort : Fix endian problem on sync message size.
22 #include <linux/module.h>
23 #include <linux/slab.h>
24 #include <linux/inetdevice.h>
25 #include <linux/net.h>
26 #include <linux/completion.h>
27 #include <linux/delay.h>
28 #include <linux/skbuff.h>
30 #include <linux/igmp.h> /* for ip_mc_join_group */
31 #include <linux/udp.h>
35 #include <asm/uaccess.h> /* for get_fs and set_fs */
37 #include <net/ip_vs.h>
39 #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
40 #define IP_VS_SYNC_PORT 8848 /* multicast port */
44 * IPVS sync connection entry
46 struct ip_vs_sync_conn
{
49 /* Protocol, addresses and port numbers */
50 __u8 protocol
; /* Which protocol (TCP/UDP) */
54 __be32 caddr
; /* client address */
55 __be32 vaddr
; /* virtual address */
56 __be32 daddr
; /* destination address */
58 /* Flags and state transition */
59 __be16 flags
; /* status flags */
60 __be16 state
; /* state info */
62 /* The sequence options start here */
65 struct ip_vs_sync_conn_options
{
66 struct ip_vs_seq in_seq
; /* incoming seq. struct */
67 struct ip_vs_seq out_seq
; /* outgoing seq. struct */
70 struct ip_vs_sync_thread_data
{
71 struct completion
*startup
;
75 #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
76 #define FULL_CONN_SIZE \
77 (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
81 The master mulitcasts messages to the backup load balancers in the
85 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
86 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
87 | Count Conns | SyncID | Size |
88 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
90 | IPVS Sync Connection (1) |
91 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
95 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
97 | IPVS Sync Connection (n) |
98 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
101 #define SYNC_MESG_HEADER_LEN 4
103 struct ip_vs_sync_mesg
{
108 /* ip_vs_sync_conn entries start here */
111 /* the maximum length of sync (sending/receiving) message */
112 static int sync_send_mesg_maxlen
;
113 static int sync_recv_mesg_maxlen
;
115 struct ip_vs_sync_buff
{
116 struct list_head list
;
117 unsigned long firstuse
;
119 /* pointers for the message data */
120 struct ip_vs_sync_mesg
*mesg
;
126 /* the sync_buff list head and the lock */
127 static LIST_HEAD(ip_vs_sync_queue
);
128 static DEFINE_SPINLOCK(ip_vs_sync_lock
);
130 /* current sync_buff for accepting new conn entries */
131 static struct ip_vs_sync_buff
*curr_sb
= NULL
;
132 static DEFINE_SPINLOCK(curr_sb_lock
);
134 /* ipvs sync daemon state */
135 volatile int ip_vs_sync_state
= IP_VS_STATE_NONE
;
136 volatile int ip_vs_master_syncid
= 0;
137 volatile int ip_vs_backup_syncid
= 0;
139 /* multicast interface name */
140 char ip_vs_master_mcast_ifn
[IP_VS_IFNAME_MAXLEN
];
141 char ip_vs_backup_mcast_ifn
[IP_VS_IFNAME_MAXLEN
];
144 static struct sockaddr_in mcast_addr
;
147 static inline void sb_queue_tail(struct ip_vs_sync_buff
*sb
)
149 spin_lock(&ip_vs_sync_lock
);
150 list_add_tail(&sb
->list
, &ip_vs_sync_queue
);
151 spin_unlock(&ip_vs_sync_lock
);
154 static inline struct ip_vs_sync_buff
* sb_dequeue(void)
156 struct ip_vs_sync_buff
*sb
;
158 spin_lock_bh(&ip_vs_sync_lock
);
159 if (list_empty(&ip_vs_sync_queue
)) {
162 sb
= list_entry(ip_vs_sync_queue
.next
,
163 struct ip_vs_sync_buff
,
167 spin_unlock_bh(&ip_vs_sync_lock
);
172 static inline struct ip_vs_sync_buff
* ip_vs_sync_buff_create(void)
174 struct ip_vs_sync_buff
*sb
;
176 if (!(sb
=kmalloc(sizeof(struct ip_vs_sync_buff
), GFP_ATOMIC
)))
179 if (!(sb
->mesg
=kmalloc(sync_send_mesg_maxlen
, GFP_ATOMIC
))) {
183 sb
->mesg
->nr_conns
= 0;
184 sb
->mesg
->syncid
= ip_vs_master_syncid
;
186 sb
->head
= (unsigned char *)sb
->mesg
+ 4;
187 sb
->end
= (unsigned char *)sb
->mesg
+ sync_send_mesg_maxlen
;
188 sb
->firstuse
= jiffies
;
192 static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff
*sb
)
199 * Get the current sync buffer if it has been created for more
200 * than the specified time or the specified time is zero.
202 static inline struct ip_vs_sync_buff
*
203 get_curr_sync_buff(unsigned long time
)
205 struct ip_vs_sync_buff
*sb
;
207 spin_lock_bh(&curr_sb_lock
);
208 if (curr_sb
&& (time
== 0 ||
209 time_before(jiffies
- curr_sb
->firstuse
, time
))) {
214 spin_unlock_bh(&curr_sb_lock
);
220 * Add an ip_vs_conn information into the current sync_buff.
221 * Called by ip_vs_in.
223 void ip_vs_sync_conn(struct ip_vs_conn
*cp
)
225 struct ip_vs_sync_mesg
*m
;
226 struct ip_vs_sync_conn
*s
;
229 spin_lock(&curr_sb_lock
);
231 if (!(curr_sb
=ip_vs_sync_buff_create())) {
232 spin_unlock(&curr_sb_lock
);
233 IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
238 len
= (cp
->flags
& IP_VS_CONN_F_SEQ_MASK
) ? FULL_CONN_SIZE
:
241 s
= (struct ip_vs_sync_conn
*)curr_sb
->head
;
244 s
->protocol
= cp
->protocol
;
245 s
->cport
= cp
->cport
;
246 s
->vport
= cp
->vport
;
247 s
->dport
= cp
->dport
;
248 s
->caddr
= cp
->caddr
;
249 s
->vaddr
= cp
->vaddr
;
250 s
->daddr
= cp
->daddr
;
251 s
->flags
= htons(cp
->flags
& ~IP_VS_CONN_F_HASHED
);
252 s
->state
= htons(cp
->state
);
253 if (cp
->flags
& IP_VS_CONN_F_SEQ_MASK
) {
254 struct ip_vs_sync_conn_options
*opt
=
255 (struct ip_vs_sync_conn_options
*)&s
[1];
256 memcpy(opt
, &cp
->in_seq
, sizeof(*opt
));
261 curr_sb
->head
+= len
;
263 /* check if there is a space for next one */
264 if (curr_sb
->head
+FULL_CONN_SIZE
> curr_sb
->end
) {
265 sb_queue_tail(curr_sb
);
268 spin_unlock(&curr_sb_lock
);
270 /* synchronize its controller if it has */
272 ip_vs_sync_conn(cp
->control
);
277 * Process received multicast message and create the corresponding
278 * ip_vs_conn entries.
280 static void ip_vs_process_message(const char *buffer
, const size_t buflen
)
282 struct ip_vs_sync_mesg
*m
= (struct ip_vs_sync_mesg
*)buffer
;
283 struct ip_vs_sync_conn
*s
;
284 struct ip_vs_sync_conn_options
*opt
;
285 struct ip_vs_conn
*cp
;
286 struct ip_vs_protocol
*pp
;
287 struct ip_vs_dest
*dest
;
291 /* Convert size back to host byte order */
292 m
->size
= ntohs(m
->size
);
294 if (buflen
!= m
->size
) {
295 IP_VS_ERR("bogus message\n");
299 /* SyncID sanity check */
300 if (ip_vs_backup_syncid
!= 0 && m
->syncid
!= ip_vs_backup_syncid
) {
301 IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
306 p
= (char *)buffer
+ sizeof(struct ip_vs_sync_mesg
);
307 for (i
=0; i
<m
->nr_conns
; i
++) {
308 unsigned flags
, state
;
310 s
= (struct ip_vs_sync_conn
*)p
;
311 flags
= ntohs(s
->flags
) | IP_VS_CONN_F_SYNC
;
312 state
= ntohs(s
->state
);
313 if (!(flags
& IP_VS_CONN_F_TEMPLATE
))
314 cp
= ip_vs_conn_in_get(s
->protocol
,
318 cp
= ip_vs_ct_in_get(s
->protocol
,
323 * Find the appropriate destination for the connection.
324 * If it is not found the connection will remain unbound
327 dest
= ip_vs_find_dest(s
->daddr
, s
->dport
,
330 /* Set the approprite ativity flag */
331 if (s
->protocol
== IPPROTO_TCP
) {
332 if (state
!= IP_VS_TCP_S_ESTABLISHED
)
333 flags
|= IP_VS_CONN_F_INACTIVE
;
335 flags
&= ~IP_VS_CONN_F_INACTIVE
;
337 cp
= ip_vs_conn_new(s
->protocol
,
343 atomic_dec(&dest
->refcnt
);
345 IP_VS_ERR("ip_vs_conn_new failed\n");
349 } else if (!cp
->dest
) {
350 dest
= ip_vs_try_bind_dest(cp
);
352 /* it is an unbound entry created by
354 cp
->flags
= flags
| IP_VS_CONN_F_HASHED
;
356 atomic_dec(&dest
->refcnt
);
357 } else if ((cp
->dest
) && (cp
->protocol
== IPPROTO_TCP
) &&
358 (cp
->state
!= state
)) {
359 /* update active/inactive flag for the connection */
361 if (!(cp
->flags
& IP_VS_CONN_F_INACTIVE
) &&
362 (state
!= IP_VS_TCP_S_ESTABLISHED
)) {
363 atomic_dec(&dest
->activeconns
);
364 atomic_inc(&dest
->inactconns
);
365 cp
->flags
|= IP_VS_CONN_F_INACTIVE
;
366 } else if ((cp
->flags
& IP_VS_CONN_F_INACTIVE
) &&
367 (state
== IP_VS_TCP_S_ESTABLISHED
)) {
368 atomic_inc(&dest
->activeconns
);
369 atomic_dec(&dest
->inactconns
);
370 cp
->flags
&= ~IP_VS_CONN_F_INACTIVE
;
374 if (flags
& IP_VS_CONN_F_SEQ_MASK
) {
375 opt
= (struct ip_vs_sync_conn_options
*)&s
[1];
376 memcpy(&cp
->in_seq
, opt
, sizeof(*opt
));
379 p
+= SIMPLE_CONN_SIZE
;
381 atomic_set(&cp
->in_pkts
, sysctl_ip_vs_sync_threshold
[0]);
383 pp
= ip_vs_proto_get(s
->protocol
);
384 cp
->timeout
= pp
->timeout_table
[cp
->state
];
387 if (p
> buffer
+buflen
) {
388 IP_VS_ERR("bogus message\n");
396 * Setup loopback of outgoing multicasts on a sending socket
398 static void set_mcast_loop(struct sock
*sk
, u_char loop
)
400 struct inet_sock
*inet
= inet_sk(sk
);
402 /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
404 inet
->mc_loop
= loop
? 1 : 0;
409 * Specify TTL for outgoing multicasts on a sending socket
411 static void set_mcast_ttl(struct sock
*sk
, u_char ttl
)
413 struct inet_sock
*inet
= inet_sk(sk
);
415 /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
422 * Specifiy default interface for outgoing multicasts
424 static int set_mcast_if(struct sock
*sk
, char *ifname
)
426 struct net_device
*dev
;
427 struct inet_sock
*inet
= inet_sk(sk
);
429 if ((dev
= __dev_get_by_name(&init_net
, ifname
)) == NULL
)
432 if (sk
->sk_bound_dev_if
&& dev
->ifindex
!= sk
->sk_bound_dev_if
)
436 inet
->mc_index
= dev
->ifindex
;
437 /* inet->mc_addr = 0; */
445 * Set the maximum length of sync message according to the
446 * specified interface's MTU.
448 static int set_sync_mesg_maxlen(int sync_state
)
450 struct net_device
*dev
;
453 if (sync_state
== IP_VS_STATE_MASTER
) {
454 if ((dev
= __dev_get_by_name(&init_net
, ip_vs_master_mcast_ifn
)) == NULL
)
457 num
= (dev
->mtu
- sizeof(struct iphdr
) -
458 sizeof(struct udphdr
) -
459 SYNC_MESG_HEADER_LEN
- 20) / SIMPLE_CONN_SIZE
;
460 sync_send_mesg_maxlen
=
461 SYNC_MESG_HEADER_LEN
+ SIMPLE_CONN_SIZE
* num
;
462 IP_VS_DBG(7, "setting the maximum length of sync sending "
463 "message %d.\n", sync_send_mesg_maxlen
);
464 } else if (sync_state
== IP_VS_STATE_BACKUP
) {
465 if ((dev
= __dev_get_by_name(&init_net
, ip_vs_backup_mcast_ifn
)) == NULL
)
468 sync_recv_mesg_maxlen
= dev
->mtu
-
469 sizeof(struct iphdr
) - sizeof(struct udphdr
);
470 IP_VS_DBG(7, "setting the maximum length of sync receiving "
471 "message %d.\n", sync_recv_mesg_maxlen
);
479 * Join a multicast group.
480 * the group is specified by a class D multicast address 224.0.0.0/8
481 * in the in_addr structure passed in as a parameter.
484 join_mcast_group(struct sock
*sk
, struct in_addr
*addr
, char *ifname
)
486 struct ip_mreqn mreq
;
487 struct net_device
*dev
;
490 memset(&mreq
, 0, sizeof(mreq
));
491 memcpy(&mreq
.imr_multiaddr
, addr
, sizeof(struct in_addr
));
493 if ((dev
= __dev_get_by_name(&init_net
, ifname
)) == NULL
)
495 if (sk
->sk_bound_dev_if
&& dev
->ifindex
!= sk
->sk_bound_dev_if
)
498 mreq
.imr_ifindex
= dev
->ifindex
;
501 ret
= ip_mc_join_group(sk
, &mreq
);
508 static int bind_mcastif_addr(struct socket
*sock
, char *ifname
)
510 struct net_device
*dev
;
512 struct sockaddr_in sin
;
514 if ((dev
= __dev_get_by_name(&init_net
, ifname
)) == NULL
)
517 addr
= inet_select_addr(dev
, 0, RT_SCOPE_UNIVERSE
);
519 IP_VS_ERR("You probably need to specify IP address on "
520 "multicast interface.\n");
522 IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
523 ifname
, NIPQUAD(addr
));
525 /* Now bind the socket with the address of multicast interface */
526 sin
.sin_family
= AF_INET
;
527 sin
.sin_addr
.s_addr
= addr
;
530 return sock
->ops
->bind(sock
, (struct sockaddr
*)&sin
, sizeof(sin
));
534 * Set up sending multicast socket over UDP
536 static struct socket
* make_send_sock(void)
540 /* First create a socket */
541 if (sock_create_kern(PF_INET
, SOCK_DGRAM
, IPPROTO_UDP
, &sock
) < 0) {
542 IP_VS_ERR("Error during creation of socket; terminating\n");
546 if (set_mcast_if(sock
->sk
, ip_vs_master_mcast_ifn
) < 0) {
547 IP_VS_ERR("Error setting outbound mcast interface\n");
551 set_mcast_loop(sock
->sk
, 0);
552 set_mcast_ttl(sock
->sk
, 1);
554 if (bind_mcastif_addr(sock
, ip_vs_master_mcast_ifn
) < 0) {
555 IP_VS_ERR("Error binding address of the mcast interface\n");
559 if (sock
->ops
->connect(sock
,
560 (struct sockaddr
*)&mcast_addr
,
561 sizeof(struct sockaddr
), 0) < 0) {
562 IP_VS_ERR("Error connecting to the multicast addr\n");
575 * Set up receiving multicast socket over UDP
577 static struct socket
* make_receive_sock(void)
581 /* First create a socket */
582 if (sock_create_kern(PF_INET
, SOCK_DGRAM
, IPPROTO_UDP
, &sock
) < 0) {
583 IP_VS_ERR("Error during creation of socket; terminating\n");
587 /* it is equivalent to the REUSEADDR option in user-space */
588 sock
->sk
->sk_reuse
= 1;
590 if (sock
->ops
->bind(sock
,
591 (struct sockaddr
*)&mcast_addr
,
592 sizeof(struct sockaddr
)) < 0) {
593 IP_VS_ERR("Error binding to the multicast addr\n");
597 /* join the multicast group */
598 if (join_mcast_group(sock
->sk
,
599 (struct in_addr
*)&mcast_addr
.sin_addr
,
600 ip_vs_backup_mcast_ifn
) < 0) {
601 IP_VS_ERR("Error joining to the multicast group\n");
614 ip_vs_send_async(struct socket
*sock
, const char *buffer
, const size_t length
)
616 struct msghdr msg
= {.msg_flags
= MSG_DONTWAIT
|MSG_NOSIGNAL
};
621 iov
.iov_base
= (void *)buffer
;
622 iov
.iov_len
= length
;
624 len
= kernel_sendmsg(sock
, &msg
, &iov
, 1, (size_t)(length
));
631 ip_vs_send_sync_msg(struct socket
*sock
, struct ip_vs_sync_mesg
*msg
)
637 /* Put size in network byte order */
638 msg
->size
= htons(msg
->size
);
640 if (ip_vs_send_async(sock
, (char *)msg
, msize
) != msize
)
641 IP_VS_ERR("ip_vs_send_async error\n");
645 ip_vs_receive(struct socket
*sock
, char *buffer
, const size_t buflen
)
647 struct msghdr msg
= {NULL
,};
653 /* Receive a packet */
654 iov
.iov_base
= buffer
;
655 iov
.iov_len
= (size_t)buflen
;
657 len
= kernel_recvmsg(sock
, &msg
, &iov
, 1, buflen
, 0);
667 static DECLARE_WAIT_QUEUE_HEAD(sync_wait
);
668 static pid_t sync_master_pid
= 0;
669 static pid_t sync_backup_pid
= 0;
671 static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait
);
672 static int stop_master_sync
= 0;
673 static int stop_backup_sync
= 0;
675 static void sync_master_loop(void)
678 struct ip_vs_sync_buff
*sb
;
680 /* create the sending multicast socket */
681 sock
= make_send_sock();
685 IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
687 ip_vs_master_mcast_ifn
, ip_vs_master_syncid
);
690 while ((sb
=sb_dequeue())) {
691 ip_vs_send_sync_msg(sock
, sb
->mesg
);
692 ip_vs_sync_buff_release(sb
);
695 /* check if entries stay in curr_sb for 2 seconds */
696 if ((sb
= get_curr_sync_buff(2*HZ
))) {
697 ip_vs_send_sync_msg(sock
, sb
->mesg
);
698 ip_vs_sync_buff_release(sb
);
701 if (stop_master_sync
)
704 msleep_interruptible(1000);
707 /* clean up the sync_buff queue */
708 while ((sb
=sb_dequeue())) {
709 ip_vs_sync_buff_release(sb
);
712 /* clean up the current sync_buff */
713 if ((sb
= get_curr_sync_buff(0))) {
714 ip_vs_sync_buff_release(sb
);
717 /* release the sending multicast socket */
722 static void sync_backup_loop(void)
728 if (!(buf
= kmalloc(sync_recv_mesg_maxlen
, GFP_ATOMIC
))) {
729 IP_VS_ERR("sync_backup_loop: kmalloc error\n");
733 /* create the receiving multicast socket */
734 sock
= make_receive_sock();
738 IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
740 ip_vs_backup_mcast_ifn
, ip_vs_backup_syncid
);
743 /* do you have data now? */
744 while (!skb_queue_empty(&(sock
->sk
->sk_receive_queue
))) {
746 ip_vs_receive(sock
, buf
,
747 sync_recv_mesg_maxlen
)) <= 0) {
748 IP_VS_ERR("receiving message error\n");
751 /* disable bottom half, because it accessed the data
752 shared by softirq while getting/creating conns */
754 ip_vs_process_message(buf
, len
);
758 if (stop_backup_sync
)
761 msleep_interruptible(1000);
764 /* release the sending multicast socket */
772 static void set_sync_pid(int sync_state
, pid_t sync_pid
)
774 if (sync_state
== IP_VS_STATE_MASTER
)
775 sync_master_pid
= sync_pid
;
776 else if (sync_state
== IP_VS_STATE_BACKUP
)
777 sync_backup_pid
= sync_pid
;
780 static void set_stop_sync(int sync_state
, int set
)
782 if (sync_state
== IP_VS_STATE_MASTER
)
783 stop_master_sync
= set
;
784 else if (sync_state
== IP_VS_STATE_BACKUP
)
785 stop_backup_sync
= set
;
787 stop_master_sync
= set
;
788 stop_backup_sync
= set
;
792 static int sync_thread(void *startup
)
794 DECLARE_WAITQUEUE(wait
, current
);
798 struct ip_vs_sync_thread_data
*tinfo
= startup
;
800 /* increase the module use count */
801 ip_vs_use_count_inc();
803 if (ip_vs_sync_state
& IP_VS_STATE_MASTER
&& !sync_master_pid
) {
804 state
= IP_VS_STATE_MASTER
;
805 name
= "ipvs_syncmaster";
806 } else if (ip_vs_sync_state
& IP_VS_STATE_BACKUP
&& !sync_backup_pid
) {
807 state
= IP_VS_STATE_BACKUP
;
808 name
= "ipvs_syncbackup";
811 ip_vs_use_count_dec();
820 /* Block all signals */
821 spin_lock_irq(¤t
->sighand
->siglock
);
822 siginitsetinv(¤t
->blocked
, 0);
824 spin_unlock_irq(¤t
->sighand
->siglock
);
826 /* set the maximum length of sync message */
827 set_sync_mesg_maxlen(state
);
829 /* set up multicast address */
830 mcast_addr
.sin_family
= AF_INET
;
831 mcast_addr
.sin_port
= htons(IP_VS_SYNC_PORT
);
832 mcast_addr
.sin_addr
.s_addr
= htonl(IP_VS_SYNC_GROUP
);
834 add_wait_queue(&sync_wait
, &wait
);
836 set_sync_pid(state
, task_pid_nr(current
));
837 complete(tinfo
->startup
);
840 * once we call the completion queue above, we should
841 * null out that reference, since its allocated on the
842 * stack of the creating kernel thread
844 tinfo
->startup
= NULL
;
846 /* processing master/backup loop here */
847 if (state
== IP_VS_STATE_MASTER
)
849 else if (state
== IP_VS_STATE_BACKUP
)
853 remove_wait_queue(&sync_wait
, &wait
);
858 * If we weren't explicitly stopped, then we
859 * exited in error, and should undo our state
861 if ((!stop_master_sync
) && (!stop_backup_sync
))
862 ip_vs_sync_state
-= tinfo
->state
;
864 set_sync_pid(state
, 0);
865 IP_VS_INFO("sync thread stopped!\n");
869 /* decrease the module use count */
870 ip_vs_use_count_dec();
872 set_stop_sync(state
, 0);
873 wake_up(&stop_sync_wait
);
876 * we need to free the structure that was allocated
877 * for us in start_sync_thread
884 static int fork_sync_thread(void *startup
)
888 /* fork the sync thread here, then the parent process of the
889 sync thread is the init process after this thread exits. */
891 if ((pid
= kernel_thread(sync_thread
, startup
, 0)) < 0) {
892 IP_VS_ERR("could not create sync_thread due to %d... "
894 msleep_interruptible(1000);
902 int start_sync_thread(int state
, char *mcast_ifn
, __u8 syncid
)
904 DECLARE_COMPLETION_ONSTACK(startup
);
906 struct ip_vs_sync_thread_data
*tinfo
;
908 if ((state
== IP_VS_STATE_MASTER
&& sync_master_pid
) ||
909 (state
== IP_VS_STATE_BACKUP
&& sync_backup_pid
))
913 * Note that tinfo will be freed in sync_thread on exit
915 tinfo
= kmalloc(sizeof(struct ip_vs_sync_thread_data
), GFP_KERNEL
);
919 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__
, task_pid_nr(current
));
920 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
921 sizeof(struct ip_vs_sync_conn
));
923 ip_vs_sync_state
|= state
;
924 if (state
== IP_VS_STATE_MASTER
) {
925 strlcpy(ip_vs_master_mcast_ifn
, mcast_ifn
,
926 sizeof(ip_vs_master_mcast_ifn
));
927 ip_vs_master_syncid
= syncid
;
929 strlcpy(ip_vs_backup_mcast_ifn
, mcast_ifn
,
930 sizeof(ip_vs_backup_mcast_ifn
));
931 ip_vs_backup_syncid
= syncid
;
934 tinfo
->state
= state
;
935 tinfo
->startup
= &startup
;
938 if ((pid
= kernel_thread(fork_sync_thread
, tinfo
, 0)) < 0) {
939 IP_VS_ERR("could not create fork_sync_thread due to %d... "
941 msleep_interruptible(1000);
945 wait_for_completion(&startup
);
951 int stop_sync_thread(int state
)
953 DECLARE_WAITQUEUE(wait
, current
);
955 if ((state
== IP_VS_STATE_MASTER
&& !sync_master_pid
) ||
956 (state
== IP_VS_STATE_BACKUP
&& !sync_backup_pid
))
959 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__
, task_pid_nr(current
));
960 IP_VS_INFO("stopping sync thread %d ...\n",
961 (state
== IP_VS_STATE_MASTER
) ?
962 sync_master_pid
: sync_backup_pid
);
964 __set_current_state(TASK_UNINTERRUPTIBLE
);
965 add_wait_queue(&stop_sync_wait
, &wait
);
966 set_stop_sync(state
, 1);
967 ip_vs_sync_state
-= state
;
970 __set_current_state(TASK_RUNNING
);
971 remove_wait_queue(&stop_sync_wait
, &wait
);
973 /* Note: no need to reap the sync thread, because its parent
974 process is the init process */
976 if ((state
== IP_VS_STATE_MASTER
&& stop_master_sync
) ||
977 (state
== IP_VS_STATE_BACKUP
&& stop_backup_sync
))