2 * Copyright (c) 2016 Mohamed Aslan <maslan@sce.carleton.ca>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
29 #include <net/if_arp.h>
30 #include <netinet/in.h>
31 #include <netinet/if_ether.h>
32 #include <arpa/inet.h>
33 #include <netinet/ip.h>
34 #include <sys/queue.h>
40 #include <netinet/ether.h>
44 when a packet arrives:
45 (1) learn (in port and src mac)
46 (2) check whether it's an arp or ip packet
59 #define DPID(dp) (be64toh(((struct of10_switch_features *)((dp)->sw_features))->datapath_id))
61 /* mactable's key entry */
64 uint8_t mac_addr
[ETHER_ADDR_LEN
];
67 static SLIST_HEAD(, dp_ptr
) dataplane_list
;
69 struct of_dataplane
*dp
;
70 SLIST_ENTRY(dp_ptr
) next
;
73 static SLIST_HEAD(, server
) server_list
;
75 struct in_addr ip_addr
;
76 struct ether_addr mac_addr
;
81 SLIST_ENTRY(server
) next
;
84 static struct actl_ctx
*ctx
;
85 static struct hashtab mactable
;
86 static struct in_addr vserver_ip
; /* virtual server's IP */
87 static struct ether_addr vserver_mac
; /* virtual server's MAC */
88 static int n_servers
= 0;
89 static int auto_arp
= 0;
90 static uint16_t ttl_idle
= DEFAULT_TIMEOUT
;
91 static uint16_t ttl_hard
= 0;
93 static struct option longopts
[] = {
94 { "ip_addr", required_argument
, NULL
, 'i' },
95 { "mac_addr", required_argument
, NULL
, 'm' },
96 { "servers", required_argument
, NULL
, 's' },
97 { "arp_servers", required_argument
, NULL
, 'a'},
98 { "poll", required_argument
, NULL
, 'p'},
99 { "kpi", required_argument
, NULL
, 'k'},
100 { "ttl_idle", required_argument
, NULL
, 't'},
101 { "ttl_hard", required_argument
, NULL
, 'h'},
108 learn(struct of_dataplane
*dp
, struct of10_packet_in
*p_in
)
110 struct ether_header
*eh
;
112 struct ether_addr broadcast_addr
;
114 eh
= (struct ether_header
*)(p_in
->data
);
115 memset(&broadcast_addr
, 0xff, ETHER_ADDR_LEN
);
117 /* skip the load-balacing address from the learning process */
118 if (!memcmp(eh
->ether_shost
, &vserver_mac
, ETHER_ADDR_LEN
))
120 /* skip broadcast address */
121 if (!memcmp(eh
->ether_shost
, &broadcast_addr
, ETHER_ADDR_LEN
))
124 /* L2 learning (update mactable) */
125 e
.dpid
= be64toh(((struct of10_switch_features
*)(dp
->sw_features
))->datapath_id
);
126 memcpy(e
.mac_addr
, eh
->ether_shost
, ETHER_ADDR_LEN
);
127 hashtab_put(&mactable
, &e
, sizeof(struct entry
), &p_in
->in_port
, sizeof(uint16_t));
129 printf("[dpid=0x%016llx] learned that %s is at port %hu.\n", DPID(dp
), ether_ntoa(eh
->ether_shost
), ntohs(p_in
->in_port
));
134 lookup(struct of_dataplane
*dp
, struct ether_addr
*ea
, uint16_t *port
)
140 e
.dpid
= be64toh(((struct of10_switch_features
*)(dp
->sw_features
))->datapath_id
);
141 memcpy(e
.mac_addr
, ea
, ETHER_ADDR_LEN
);
143 printf("[dpid=0x%016llx] looking up %s.\n", DPID(dp
), ether_ntoa(ea
));
145 if (hashtab_get(&mactable
, &e
, sizeof(struct entry
), &p
, &len
)) {
153 flood(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct of10_packet_in
*p_in
)
155 struct of10_packet_out
*msg_out
;
156 struct of10_action_output
*action
;
158 msg_out
= (struct of10_packet_out
*)malloc(sizeof(struct of10_packet_out
) + sizeof(struct of10_action_output
));
161 msg_out
->hdr
.version
= OFP_VERSION_10
;
162 msg_out
->hdr
.type
= OFPT10_PACKET_OUT
;
163 msg_out
->hdr
.length
= htons(sizeof(struct of10_packet_out
) + sizeof(struct of10_action_output
));
164 msg_out
->hdr
.xid
= 0;
165 msg_out
->buffer_id
= p_in
->buffer_id
;
166 msg_out
->in_port
= p_in
->in_port
;
167 msg_out
->actions_len
= htons(sizeof(struct of10_action_output
));
168 action
= (struct of10_action_output
*)(msg_out
->actions
);
169 action
->type
= htons(OFPAT10_OUTPUT
);
170 action
->len
= htons(sizeof(struct of10_action_output
));
171 action
->port
= htons(OFPP10_FLOOD
);
173 ctl
->send(ctl
, dp
, (struct ofp_header
*)msg_out
);
176 printf("packet flooded.\n");
181 forward_install(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct of10_packet_in
*p_in
, uint16_t out_port
)
183 struct ether_header
*eh
;
184 struct of10_flow_mod
*msg_mod
;
185 struct of10_action_output
*action
;
187 eh
= (struct ether_header
*)(p_in
->data
);
188 msg_mod
= (struct of10_flow_mod
*)malloc(sizeof(struct of10_flow_mod
) + sizeof(struct of10_action_output
));
192 msg_mod
->hdr
.version
= OFP_VERSION_10
;
193 msg_mod
->hdr
.type
= OFPT10_FLOW_MOD
;
194 msg_mod
->hdr
.length
= htons(sizeof(struct of10_flow_mod
) + sizeof(struct of10_action_output
));
195 msg_mod
->hdr
.xid
= 0;
197 msg_mod
->match
.wildcards
= htonl(OFPFW10_ALL
& ~(OFPFW10_DL_DST
));
198 memcpy(msg_mod
->match
.dl_dst
, eh
->ether_dhost
, OFP10_ETH_ALEN
);
201 msg_mod
->command
= htons(OFPFC10_ADD
);
202 msg_mod
->idle_timeout
= htons(ttl_idle
);
203 msg_mod
->hard_timeout
= htons(ttl_hard
);
204 msg_mod
->priority
= htons(0xffff);
205 msg_mod
->buffer_id
= p_in
->buffer_id
;
206 msg_mod
->out_port
= out_port
; /* already in net byte order */
209 action
= (struct of10_action_output
*)(msg_mod
->actions
);
210 action
->type
= htons(OFPAT10_OUTPUT
);
211 action
->len
= htons(sizeof(struct of10_action_output
));
212 action
->port
= out_port
; /* already in net byte order */
214 ctl
->send(ctl
, dp
, (struct ofp_header
*)msg_mod
);
219 rewrite_install(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct of10_packet_in
*p_in
, struct server
*srv
, uint16_t out_port
)
221 struct ether_header
*eh
;
222 struct of10_flow_mod
*msg_mod
;
223 struct of10_action_nw_addr
*action0
;
224 struct of10_action_dl_addr
*action1
;
225 struct of10_action_output
*action2
;
227 eh
= (struct ether_header
*)(p_in
->data
);
228 msg_mod
= (struct of10_flow_mod
*)malloc(sizeof(struct of10_flow_mod
) + 3 * sizeof(struct of10_action_output
));
232 /* source -> server */
234 msg_mod
->hdr
.version
= OFP_VERSION_10
;
235 msg_mod
->hdr
.type
= OFPT10_FLOW_MOD
;
236 msg_mod
->hdr
.length
= htons(sizeof(struct of10_flow_mod
) + sizeof(struct of10_action_nw_addr
) +
237 sizeof(struct of10_action_dl_addr
) + sizeof(struct of10_action_output
));
238 msg_mod
->hdr
.xid
= 0;
240 msg_mod
->match
.wildcards
= htonl(OFPFW10_ALL
& ~(OFPFW10_DL_SRC
| OFPFW10_DL_DST
));
241 memcpy(msg_mod
->match
.dl_src
, eh
->ether_shost
, OFP10_ETH_ALEN
);
242 memcpy(msg_mod
->match
.dl_dst
, eh
->ether_dhost
, OFP10_ETH_ALEN
);
245 msg_mod
->command
= htons(OFPFC10_ADD
);
246 msg_mod
->idle_timeout
= htons(ttl_idle
);
247 msg_mod
->hard_timeout
= htons(ttl_hard
);
248 msg_mod
->priority
= htons(0xffff);
249 msg_mod
->buffer_id
= p_in
->buffer_id
;
250 msg_mod
->out_port
= out_port
; /* already in net byte order */
253 /* rewrite ip address */
254 action0
= (struct of10_action_nw_addr
*)(msg_mod
->actions
);
255 action0
->type
= htons(OFPAT10_SET_NW_DST
);
256 action0
->len
= htons(sizeof(struct of10_action_nw_addr
));
257 action0
->nw_addr
= srv
->ip_addr
.s_addr
; /* already in net byte order */
258 /* rewrite ether address */
259 action1
= (struct of10_action_dl_addr
*)(++action0
);
260 action1
->type
= htons(OFPAT10_SET_DL_DST
);
261 action1
->len
= htons(sizeof(struct of10_action_dl_addr
));
262 memcpy(&action1
->dl_addr
, &srv
->mac_addr
, OFP10_ETH_ALEN
);
264 action2
= (struct of10_action_output
*)(++action1
);
265 action2
->type
= htons(OFPAT10_OUTPUT
);
266 action2
->len
= htons(sizeof(struct of10_action_output
));
267 action2
->port
= out_port
; /* already in net byte order */
268 action2
->max_len
= 0;
269 /* send flow mod message */
270 ctl
->send(ctl
, dp
, (struct ofp_header
*)msg_mod
);
272 printf("[LB] rewrite and install (1).\n");
275 /* server -> source */
277 memcpy(msg_mod
->match
.dl_src
, &srv
->mac_addr
, OFP10_ETH_ALEN
);
278 memcpy(msg_mod
->match
.dl_dst
, eh
->ether_shost
, OFP10_ETH_ALEN
);
279 msg_mod
->buffer_id
= htonl(-1);
280 /* rewrite ip address */
281 action0
= (struct of10_action_nw_addr
*)(msg_mod
->actions
);
282 action0
->type
= htons(OFPAT10_SET_NW_SRC
);
283 action0
->nw_addr
= vserver_ip
.s_addr
; /* already in net byte order */
284 /* rewrite ether address */
285 action1
= (struct of10_action_dl_addr
*)(++action0
);
286 action1
->type
= htons(OFPAT10_SET_DL_SRC
);
287 memcpy(&action1
->dl_addr
, &vserver_mac
, OFP10_ETH_ALEN
);
289 action2
= (struct of10_action_output
*)(++action1
);
290 action2
->port
= p_in
->in_port
;
291 /* send flow mod message */
292 ctl
->send(ctl
, dp
, (struct ofp_header
*)msg_mod
);
295 printf("[LB] rewrite and install (2).\n");
300 loadbalance(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct of10_packet_in
*p_in
)
304 struct server
*srv
, *llsrv
= NULL
;
309 /* find least-loaded server */
310 srv
= SLIST_FIRST(&server_list
);
311 least_load
= srv
->load
;
312 SLIST_FOREACH(srv
, &server_list
, next
) {
313 printf("[LB] server %s -> load is %lld bytes/sec (status is %d).\n", inet_ntoa(srv
->ip_addr
), srv
->load
, srv
->ip_resolved
);
314 if (!srv
->ip_resolved
)
315 continue; /* next server */
316 if ((llsrv
== NULL
) || (srv
->load
< least_load
)) {
317 least_load
= srv
->load
;
322 return; /* TODO: drop packet */
324 printf("[LB] least-loaded server is %s (with load = %lld bytes/sec).\n", inet_ntoa(llsrv
->ip_addr
), llsrv
->load
);
325 if (!lookup(dp
, &llsrv
->mac_addr
, &llsrv_port
))
326 llsrv_port
= htons(OFPP10_FLOOD
);
327 printf("[LB] least-loaded server's port is %hu.\n", ntohs(llsrv_port
));
328 rewrite_install(ctl
, dp
, p_in
, llsrv
, llsrv_port
);
332 learn_servers(struct ether_arp
*arp
)
334 struct ether_addr
*arp_sha
;
335 struct in_addr
*arp_spa
;
338 arp_sha
= (struct ether_addr
*)arp
->arp_sha
;
339 arp_spa
= (struct in_addr
*)arp
->arp_spa
;
341 /* check it's one of the servers (L3 learning) */
342 SLIST_FOREACH(srv
, &server_list
, next
) {
343 if (arp_spa
->s_addr
== srv
->ip_addr
.s_addr
) {
344 memcpy(&srv
->mac_addr
, arp_sha
, ETHER_ADDR_LEN
);
345 srv
->ip_resolved
= 1;
346 printf("learned (L3) that server %s is at %s.\n", inet_ntoa(srv
->ip_addr
), ether_ntoa(&srv
->mac_addr
));
353 handle_arp(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct of10_packet_in
*ph_in
)
355 struct ether_header
*eh_in
, *eh_out
;
356 struct ether_arp
*arp_in
, *arp_out
;
357 struct of10_packet_out
*msg_out
;
358 struct of10_action_output
*action
;
359 struct in_addr
*arp_in_tpa
;
362 eh_in
= (struct ether_header
*)(ph_in
->data
);
363 arp_in
= (struct ether_arp
*)(ph_in
->data
+ ETHER_HDR_LEN
);
365 /* check if it's valid or not */
366 if (!(arp_in
->ea_hdr
.ar_hrd
== htons(ARPHRD_ETHER
)) ||
367 !(arp_in
->ea_hdr
.ar_pro
== htons(ETHERTYPE_IP
)))
369 /* check if it's an arp reply */
370 if (arp_in
->ea_hdr
.ar_op
== htons(ARPOP_REPLY
)) {
371 printf("[ARP] %u.%u.%u.%u is at %02x:%02x:%02x:%02x:%02x:%02x\n",
372 arp_in
->arp_spa
[0], arp_in
->arp_spa
[1], arp_in
->arp_spa
[2], arp_in
->arp_spa
[3],
373 arp_in
->arp_sha
[0], arp_in
->arp_sha
[1], arp_in
->arp_sha
[2], arp_in
->arp_sha
[3],
374 arp_in
->arp_sha
[4], arp_in
->arp_sha
[5]);
375 learn_servers(arp_in
); /* L3 learn about the servers */
376 flood(ctl
, dp
, ph_in
); /* XXX: lookup() */
380 /* now, ensure it's an arp request */
381 if (arp_in
->ea_hdr
.ar_op
!= htons(ARPOP_REQUEST
))
384 printf("[ARP] who has %u.%u.%u.%u tell %u.%u.%u.%u\n",
385 arp_in
->arp_tpa
[0], arp_in
->arp_tpa
[1], arp_in
->arp_tpa
[2], arp_in
->arp_tpa
[3],
386 arp_in
->arp_spa
[0], arp_in
->arp_spa
[1], arp_in
->arp_spa
[2], arp_in
->arp_spa
[3]);
388 /* check if it's destined to vserver or not */
389 arp_in_tpa
= (struct in_addr
*)arp_in
->arp_tpa
;
390 if (arp_in_tpa
->s_addr
!= vserver_ip
.s_addr
) {
391 printf("[ARP] arp is not for a load balanced host.\n");
392 flood(ctl
, dp
, ph_in
);
395 printf("[ARP] arp is for a load balanced host.\n");
397 /* fake arp response */
398 msg_out
= (struct of10_packet_out
*)
399 malloc(sizeof(struct of10_packet_out
) + sizeof(struct of10_action_output
)
400 + ETHER_HDR_LEN
+ sizeof(struct ether_arp
));
404 eh_out = (struct ether_header *)(msg_out + sizeof(struct of10_packet_out) +
405 sizeof(struct of10_action_output));
406 arp_out = (struct ether_arp *)(msg_out + sizeof(struct of10_packet_out) +
407 sizeof(struct of10_action_output) + ETHER_HDR_LEN);
410 p
= (uint8_t *)(msg_out
) + sizeof(struct of10_packet_out
) +
411 sizeof(struct of10_action_output
);
412 eh_out
= (struct ether_header
*)p
;
414 arp_out
= (struct ether_arp
*)p
;
417 arp_out
->ea_hdr
.ar_hrd
= htons(ARPHRD_ETHER
);
418 arp_out
->ea_hdr
.ar_pro
= htons(ETHERTYPE_IP
);
419 arp_out
->ea_hdr
.ar_op
= htons(ARPOP_REPLY
);
420 arp_out
->ea_hdr
.ar_hln
= arp_in
->ea_hdr
.ar_hln
;
421 arp_out
->ea_hdr
.ar_pln
= arp_in
->ea_hdr
.ar_pln
;
422 memcpy(arp_out
->arp_tha
, arp_in
->arp_sha
, sizeof(arp_out
->arp_tha
));
423 memcpy(arp_out
->arp_tpa
, arp_in
->arp_spa
, sizeof(arp_out
->arp_tpa
));
424 memcpy(arp_out
->arp_sha
, &vserver_mac
, sizeof(arp_out
->arp_sha
));
425 memcpy(arp_out
->arp_spa
, &vserver_ip
, sizeof(arp_out
->arp_spa
));
427 eh_out
->ether_type
= ntohs(ETHERTYPE_ARP
);
428 memcpy(eh_out
->ether_dhost
, eh_in
->ether_shost
, sizeof(eh_out
->ether_dhost
));
429 memcpy(eh_out
->ether_shost
, &vserver_mac
, sizeof(eh_out
->ether_shost
));
431 msg_out
->hdr
.version
= OFP_VERSION_10
;
432 msg_out
->hdr
.type
= OFPT10_PACKET_OUT
;
433 msg_out
->hdr
.length
= htons(sizeof(struct of10_packet_out
) + sizeof(struct of10_action_output
) + ETHER_HDR_LEN
+ sizeof(struct ether_arp
));
434 msg_out
->hdr
.xid
= 0;
435 msg_out
->buffer_id
= htonl(-1);
436 msg_out
->in_port
= ph_in
->in_port
;
437 msg_out
->actions_len
= htons(sizeof(struct of10_action_output
));
438 action
= (struct of10_action_output
*)(msg_out
->actions
);
439 action
->type
= htons(OFPAT10_OUTPUT
);
440 action
->len
= htons(sizeof(struct of10_action_output
));
441 action
->port
= htons(OFPP10_IN_PORT
);
443 ctl
->send(ctl
, dp
, (struct ofp_header
*)msg_out
);
448 handle_ip(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct of10_packet_in
*p_in
)
450 struct ether_header
*eh_in
;
454 eh_in
= (struct ether_header
*)(p_in
->data
);
455 ip_in
= (struct ether_arp
*)(p_in
->data
+ ETHER_HDR_LEN
);
457 char *x
= strdup(inet_ntoa(ip_in
->ip_src
));
458 char *y
= strdup(inet_ntoa(ip_in
->ip_dst
));
459 printf("[IP] from %s to %s\n", x
, y
);
463 /* check if it's destined to vserver or not */
464 if (ip_in
->ip_dst
.s_addr
== vserver_ip
.s_addr
) {
465 printf("[IP] packet is for a load balanced host.\n");
466 loadbalance(ctl
, dp
, p_in
);
470 if (lookup(dp
, (struct ether_addr
*)eh_in
->ether_dhost
, &out_port
)) {
471 printf("[IP] forward (port = %hu) and install flow rule.\n", ntohs(out_port
));
472 forward_install(ctl
, dp
, p_in
, out_port
);
475 printf("[IP] flood (%s not found).\n", ether_ntoa(eh_in
->ether_dhost
));
476 flood(ctl
, dp
, p_in
);
481 handle_stats(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct ofp_header
*msg
)
484 struct of10_stats_reply
*sts
;
485 struct of10_port_stats
*psts
;
487 struct host_info
**hosts
;
492 sts
= (struct of10_stats_reply
*)msg
;
494 printf("[DEBUG] stats received type=0x%x, size=0x%x.\n", ntohs(sts
->type
), ntohs(sts
->hdr
.length
));
496 /* process only port stats for now */
497 if (ntohs(sts
->type
) != OFPST10_PORT
)
499 psts
= (struct of10_flow_stats
*)sts
->body
;
503 printf("[DEBUG] dpid=0x%016llx,port=%u load is now %llu bytes.\n", DPID(dp
), ntohs(psts
->port_no
), be64toh(psts
->tx_bytes
));
505 /* read static topology information */
506 ctx
->topo(ctx
, &hosts
, &n
);
507 for (i
= 0 ; i
< n
; i
++) {
508 if ((DPID(dp
) == hosts
[i
]->dpid
) && (ntohs(psts
->port_no
) == hosts
[i
]->port
)) {
509 SLIST_FOREACH(srv
, &server_list
, next
) {
510 if (srv
->ip_addr
.s_addr
== hosts
[i
]->ip_addr
.s_addr
) {
512 srv
->load
= (uint64_t)(be64toh(psts
->tx_bytes
) - srv
->load_bytes
) / (now
- srv
->load_ts
);
513 srv
->load_bytes
= (uint64_t)be64toh(psts
->tx_bytes
);
515 printf("[INFO] server %s load is now %llu bytes/sec.\n", inet_ntoa(srv
->ip_addr
), srv
->load
);
522 (void)asprintf(&loadstr
, "%llu", srv
->load
);
524 errx(1, "failed to allocate memory");
525 n
= ctx
->put(ctx
, inet_ntoa(srv
->ip_addr
), loadstr
);
527 printf("[INSERT] put(%s, %s) failed.\n", inet_ntoa(srv
->ip_addr
), loadstr
);
529 printf("[INSERT] put(%s, %s) succeeded.\n", inet_ntoa(srv
->ip_addr
), loadstr
);
534 handle_packet_in(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct ofp_header
*msg_in
)
536 struct ether_header
*eh
;
537 struct of10_packet_in
*p_in
;
540 p_in
= (struct of10_packet_in
*)msg_in
;
541 eh
= (struct ether_header
*)(p_in
->data
);
544 printf("in_port: %hu, ", ntohs(p_in
->in_port
));
545 printf("%02x:%02x:%02x:%02x:%02x:%02x ->", eh
->ether_shost
[0], eh
->ether_shost
[1],
546 eh
->ether_shost
[2], eh
->ether_shost
[3], eh
->ether_shost
[4],
548 printf(" %02x:%02x:%02x:%02x:%02x:%02x, ", eh
->ether_dhost
[0], eh
->ether_dhost
[1],
549 eh
->ether_dhost
[2], eh
->ether_dhost
[3], eh
->ether_dhost
[4],
551 printf("type: 0x%hx ", ntohs(eh
->ether_type
));
552 if (ntohs(eh
->ether_type
) == ETHERTYPE_ARP
)
554 else if (ntohs(eh
->ether_type
) == ETHERTYPE_IP
)
557 printf("packet_in reason: 0x%x\n", (unsigned int)p_in
->reason
);
563 /* check if it's an arp packet */
564 if (ntohs(eh
->ether_type
) == ETHERTYPE_ARP
)
565 handle_arp(ctl
, dp
, p_in
);
566 /* check if it's an ip packet */
567 else if (ntohs(eh
->ether_type
) == ETHERTYPE_IP
)
568 handle_ip(ctl
, dp
, p_in
);
572 handle_ofp_messages(struct of_controller
*ctl
, struct of_dataplane
*dp
, struct ofp_header
*msg_in
)
574 printf("ofp_messages [dpid = 0x%016llx, type=0x%x].\n", DPID(dp
), (unsigned int)msg_in
->type
);
575 if (msg_in
->type
== OFPT10_PACKET_IN
)
576 handle_packet_in(ctl
, dp
, msg_in
);
577 else if (msg_in
->type
== OFPT10_STATS_REPLY
)
578 handle_stats(ctl
, dp
, msg_in
);
581 static struct of10_packet_out
*
582 arp_request(struct in_addr addr
)
584 struct ether_header
*eh
;
585 struct ether_arp
*arp
;
586 struct of10_packet_out
*msg
;
587 struct of10_action_output
*action
;
590 msg
= (struct of10_packet_out
*)
591 malloc(sizeof(struct of10_packet_out
) + sizeof(struct of10_action_output
)
592 + ETHER_HDR_LEN
+ sizeof(struct ether_arp
));
596 p
= (uint8_t *)(msg
) + sizeof(struct of10_packet_out
) +
597 sizeof(struct of10_action_output
);
598 eh
= (struct ether_header
*)p
;
600 arp
= (struct ether_arp
*)p
;
603 arp
->ea_hdr
.ar_hrd
= htons(ARPHRD_ETHER
);
604 arp
->ea_hdr
.ar_pro
= htons(ETHERTYPE_IP
);
605 arp
->ea_hdr
.ar_op
= htons(ARPOP_REQUEST
);
606 arp
->ea_hdr
.ar_hln
= ETHER_ADDR_LEN
;
607 arp
->ea_hdr
.ar_pln
= sizeof(in_addr_t
);
608 memset(arp
->arp_tha
, 0xff, ETHER_ADDR_LEN
);
609 memcpy(arp
->arp_tpa
, &addr
.s_addr
, sizeof(arp
->arp_tpa
));
610 memcpy(arp
->arp_sha
, &vserver_mac
, ETHER_ADDR_LEN
);
611 memcpy(arp
->arp_spa
, &vserver_ip
.s_addr
, sizeof(arp
->arp_spa
));
613 eh
->ether_type
= ntohs(ETHERTYPE_ARP
);
614 memset(eh
->ether_dhost
, 0xff, ETHER_ADDR_LEN
);
615 memcpy(eh
->ether_shost
, &vserver_mac
, ETHER_ADDR_LEN
);
617 msg
->hdr
.version
= OFP_VERSION_10
;
618 msg
->hdr
.type
= OFPT10_PACKET_OUT
;
619 msg
->hdr
.length
= htons(sizeof(struct of10_packet_out
) + sizeof(struct of10_action_output
) + ETHER_HDR_LEN
+ sizeof(struct ether_arp
));
621 msg
->buffer_id
= htonl(-1);
622 msg
->in_port
= 0xffff; /* XXX: OFPP10_NONE */
623 msg
->actions_len
= htons(sizeof(struct of10_action_output
));
624 action
= (struct of10_action_output
*)(msg
->actions
);
625 action
->type
= htons(OFPAT10_OUTPUT
);
626 action
->len
= htons(sizeof(struct of10_action_output
));
627 action
->port
= htons(OFPP10_FLOOD
);
633 vserver_stats_request(struct of_controller
*ctl
, struct of_dataplane
*dp
, uint16_t port
)
635 struct of10_stats_request
*msg
;
636 struct of10_port_stats_request
*body
;
638 msg
= (struct of10_stats_request
*)malloc(sizeof(struct of10_stats_request
) + sizeof(struct of10_port_stats_request
));
642 msg
->hdr
.version
= OFP_VERSION_10
;
643 msg
->hdr
.type
= OFPT10_STATS_REQUEST
;
644 msg
->hdr
.length
= htons(sizeof(struct of10_stats_request
) + sizeof(struct of10_port_stats_request
));
647 msg
->type
= htons(OFPST10_PORT
);
651 body
= (struct of10_port_stats_request
*)(msg
->body
);
653 body
->port_no
= htons(port
);
655 ctl
->send(ctl
, dp
, (struct ofp_header
*)msg
);
659 static struct of_dataplane
*
660 dpid2ptr(uint64_t dpid
)
664 SLIST_FOREACH(dptr
, &dataplane_list
, next
)
665 if ((dptr
->dp
) && (DPID(dptr
->dp
) == dpid
))
671 poll(struct of_controller
*ctl
)
674 int i
, n
, ret
, local
;
675 struct host_info
**hosts
;
676 struct of_dataplane
*dp
;
680 /* read static topology information */
681 ctx
->topo(ctx
, &hosts
, &n
);
683 /* XXX: poll local servers only */
684 SLIST_FOREACH(srv
, &server_list
, next
) {
685 if (!srv
->ip_resolved
)
688 for (i
= 0 ; i
< n
; i
++) {
689 if (srv
->ip_addr
.s_addr
== hosts
[i
]->ip_addr
.s_addr
) {
691 dp
= dpid2ptr(hosts
[i
]->dpid
);
695 printf("[DEBUG] poll(): server %s at 0x%016llx.\n", inet_ntoa(srv
->ip_addr
), DPID(dp
));
697 vserver_stats_request(ctl
, dp
, hosts
[i
]->port
);
698 break; /* XXX: support multi-connections */
701 /* check dht for remote servers */
703 ret
= ctx
->get(ctx
, inet_ntoa(srv
->ip_addr
), &loadstr
);
705 printf("[QUERY] get(%s) failed.\n", inet_ntoa(srv
->ip_addr
));
708 printf("[QUERY] get(%s) = %s succeeded.\n", inet_ntoa(srv
->ip_addr
), loadstr
);
709 sscanf(loadstr
, "%llu", &srv
->load
);
716 handle_connection_up(struct of_controller
*ctl
, struct of_dataplane
*dp
)
718 struct ofp_header
*msg
;
724 printf("[INFO] connection_up [dpid = 0x%016llx].\n", dpid
);
727 printf("[INFO] auto_arp is ON.\n");
728 SLIST_FOREACH(srv
, &server_list
, next
) {
729 printf("[INFO] learning about server %s.\n", inet_ntoa(srv
->ip_addr
));
730 msg
= arp_request(srv
->ip_addr
);
731 ctl
->send(ctl
, dp
, (struct ofp_header
*)msg
);
735 dptr
= (struct dp_ptr
*)malloc(sizeof(struct dp_ptr
));
737 SLIST_INSERT_HEAD(&dataplane_list
, dptr
, next
);
738 printf("[INFO] switch [dpid = 0x%016llx] is ready.\n", dpid
);
742 init(struct actl_ctx
*c
)
745 printf("l3_lb initialized.\n");
750 main(int argc
, char **argv
)
752 int i
, ch
, poll_period
= DEFAULT_POLL_PERIOD
;
755 struct ether_addr
*ea
;
760 for (i
= 0 ; i
< argc
; i
++) {
761 printf("arg[%d] = %s.\n", i
, argv
[i
]);
765 SLIST_INIT(&dataplane_list
);
766 SLIST_INIT(&server_list
);
767 while ((ch
= getopt_long(argc
, argv
, "i:m:s:a:p:k:t:h:", longopts
, NULL
)) != -1) {
770 if (!inet_aton(optarg
, &vserver_ip
))
771 errx(1, "invalid IP address");
774 if ((ea
= ether_aton(optarg
)) == NULL
)
775 errx(1, "invalid MAC address");
779 srv
= (struct server
*)malloc(sizeof(struct server
));
780 if (!inet_aton(optarg
, &srv
->ip_addr
))
781 errx(1, "invalid IP address");
782 srv
->ip_resolved
= 0;
785 srv
->load_ts
= time(NULL
);
786 SLIST_INSERT_HEAD(&server_list
, srv
, next
);
790 if (!strcmp(optarg
, "yes"))
792 else if (!strcmp(optarg
, "no"))
795 errx(1, "incorrect option for argument '-a'");
798 poll_period
= strtonum(optarg
, 1, 60, &errstr
);
800 errx(1, "invalid polling period %s: %s", errstr
, optarg
);
803 tar_kpi
= (double)strtonum(optarg
, 1, 100000, &errstr
);
805 errx(1, "invalid target KPI %s:%s", errstr
, optarg
);
806 ctx
->adapt(ctx
, tar_kpi
);
809 ttl_idle
= (uint16_t)strtonum(optarg
, 0, 1024, &errstr
);
811 errx(1, "flow idle ttl out-of-range %s:%s", errstr
, optarg
);
814 ttl_hard
= (uint16_t)strtonum(optarg
, 0, 1024, &errstr
);
816 errx(1, "flow hard ttl out-of-range %s:%s", errstr
, optarg
);
819 /* TODO: default -> usage() */
824 /* initialize the mactable */
825 if (!hashtab_init(&mactable
, 12, NULL
))
828 printf("Virtual Server IP: %s\n", inet_ntoa(vserver_ip
));
829 printf("Virtual Server MAC: %s\n", ether_ntoa(&vserver_mac
));
830 printf("Number of Physical Servers = %d.\n", n_servers
);
832 SLIST_FOREACH(srv
, &server_list
, next
)
833 printf("Server%d IP: %s, MAC: %s\n", ++i
, inet_ntoa(srv
->ip_addr
), ether_ntoa(&srv
->mac_addr
));
834 printf("Target KPI = %lf.\n", tar_kpi
);
835 printf("TTL IDLE = %hu, HARD = %hu.\n", ttl_idle
, ttl_hard
);
837 printf("L3 LB initialized.\n");
840 ctx
->cntl
->timer(ctx
->cntl
, poll_period
* 1000, poll
);
846 handler(struct of_event
*ev
)
849 printf("l3_lb_handler.\n");
851 if (ev
->type
== OFEV_CONNECTION_UP
)
852 handle_connection_up(ctx
->cntl
, ev
->dp
);
853 else if (ev
->type
== OFEV_PROTO_MESSAGE
)
854 handle_ofp_messages(ctx
->cntl
, ev
->dp
, ev
->ofp_hdr
);
860 double avg
, stddev
= 0;
866 printf("l3_lb_kpi.\n");
872 srv
= SLIST_FIRST(&server_list
);
873 SLIST_FOREACH(srv
, &server_list
, next
) {
874 if (!srv
->ip_resolved
)
883 srv
= SLIST_FIRST(&server_list
);
884 SLIST_FOREACH(srv
, &server_list
, next
) {
885 if (!srv
->ip_resolved
)
887 stddev
+= (srv
->load
- avg
) * (srv
->load
- avg
);
890 stddev
= sqrt(stddev
);
892 printf("l3_lb_kpi: KPI = %lf.\n", stddev
);