convert kmeans_incr threshold
[actl.git] / app-l3_lb / l3_lb.c
blobac249769b8f304c58583d2035ca540610e85213a
1 /*
2 * Copyright (c) 2016 Mohamed Aslan <maslan@sce.carleton.ca>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <math.h>
21 #include <getopt.h>
22 #include <err.h>
23 #include <time.h>
25 #include <libof.h>
26 #include <of10.h>
27 #include <hashtab.h>
29 #include <net/if_arp.h>
30 #include <netinet/in.h>
31 #include <netinet/if_ether.h>
32 #include <arpa/inet.h>
33 #include <netinet/ip.h>
34 #include <sys/queue.h>
36 #include <actl.h>
37 #include "l3_lb.h"
39 #ifdef __linux__
40 #include <netinet/ether.h>
41 #endif
44 when a packet arrives:
45 (1) learn (in port and src mac)
46 (2) check whether it's an arp or ip packet
47 a. in case of arp:
48 i. if dst is vserver:
49 fake arp response
50 ii. otherwise:
51 flood
52 b. in case of ip:
53 i. if dst is vserver:
54 loadbalance
55 ii. otherwise:
56 flood or forward
59 #define DPID(dp) (be64toh(((struct of10_switch_features *)((dp)->sw_features))->datapath_id))
61 /* mactable's key entry */
62 struct entry {
63 uint64_t dpid;
64 uint8_t mac_addr[ETHER_ADDR_LEN];
67 static SLIST_HEAD(, dp_ptr) dataplane_list;
68 struct dp_ptr {
69 struct of_dataplane *dp;
70 SLIST_ENTRY(dp_ptr) next;
73 static SLIST_HEAD(, server) server_list;
74 struct server {
75 struct in_addr ip_addr;
76 struct ether_addr mac_addr;
77 int ip_resolved;
78 uint64_t load;
79 uint64_t load_bytes;
80 time_t load_ts;
81 SLIST_ENTRY(server) next;
84 static struct actl_ctx *ctx;
85 static struct hashtab mactable;
86 static struct in_addr vserver_ip; /* virtual server's IP */
87 static struct ether_addr vserver_mac; /* virtual server's MAC */
88 static int n_servers = 0;
89 static int auto_arp = 0;
90 static uint16_t ttl_idle = DEFAULT_TIMEOUT;
91 static uint16_t ttl_hard = 0;
93 static struct option longopts[] = {
94 { "ip_addr", required_argument, NULL, 'i' },
95 { "mac_addr", required_argument, NULL, 'm' },
96 { "servers", required_argument, NULL, 's' },
97 { "arp_servers", required_argument, NULL, 'a'},
98 { "poll", required_argument, NULL, 'p'},
99 { "kpi", required_argument, NULL, 'k'},
100 { "ttl_idle", required_argument, NULL, 't'},
101 { "ttl_hard", required_argument, NULL, 'h'},
102 { NULL, 0, NULL, 0 }
107 static void
108 learn(struct of_dataplane *dp, struct of10_packet_in *p_in)
110 struct ether_header *eh;
111 struct entry e;
112 struct ether_addr broadcast_addr;
114 eh = (struct ether_header *)(p_in->data);
115 memset(&broadcast_addr, 0xff, ETHER_ADDR_LEN);
117 /* skip the load-balacing address from the learning process */
118 if (!memcmp(eh->ether_shost, &vserver_mac, ETHER_ADDR_LEN))
119 return;
120 /* skip broadcast address */
121 if (!memcmp(eh->ether_shost, &broadcast_addr, ETHER_ADDR_LEN))
122 return;
124 /* L2 learning (update mactable) */
125 e.dpid = be64toh(((struct of10_switch_features *)(dp->sw_features))->datapath_id);
126 memcpy(e.mac_addr, eh->ether_shost, ETHER_ADDR_LEN);
127 hashtab_put(&mactable, &e, sizeof(struct entry), &p_in->in_port, sizeof(uint16_t));
128 #ifdef DEBUG
129 printf("[dpid=0x%016llx] learned that %s is at port %hu.\n", DPID(dp), ether_ntoa(eh->ether_shost), ntohs(p_in->in_port));
130 #endif
133 static int
134 lookup(struct of_dataplane *dp, struct ether_addr *ea, uint16_t *port)
136 uint16_t *p;
137 size_t len;
138 struct entry e;
140 e.dpid = be64toh(((struct of10_switch_features *)(dp->sw_features))->datapath_id);
141 memcpy(e.mac_addr, ea, ETHER_ADDR_LEN);
142 #ifdef DEBUG
143 printf("[dpid=0x%016llx] looking up %s.\n", DPID(dp), ether_ntoa(ea));
144 #endif
145 if (hashtab_get(&mactable, &e, sizeof(struct entry), &p, &len)) {
146 *port = *p;
147 return 1;
149 return 0;
152 static void
153 flood(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in)
155 struct of10_packet_out *msg_out;
156 struct of10_action_output *action;
158 msg_out = (struct of10_packet_out *)malloc(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output));
159 if (msg_out == NULL)
160 return;
161 msg_out->hdr.version = OFP_VERSION_10;
162 msg_out->hdr.type = OFPT10_PACKET_OUT;
163 msg_out->hdr.length = htons(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output));
164 msg_out->hdr.xid = 0;
165 msg_out->buffer_id = p_in->buffer_id;
166 msg_out->in_port = p_in->in_port;
167 msg_out->actions_len = htons(sizeof(struct of10_action_output));
168 action = (struct of10_action_output *)(msg_out->actions);
169 action->type = htons(OFPAT10_OUTPUT);
170 action->len = htons(sizeof(struct of10_action_output));
171 action->port = htons(OFPP10_FLOOD);
172 action->max_len = 0;
173 ctl->send(ctl, dp, (struct ofp_header *)msg_out);
174 free(msg_out);
175 #ifdef DEBUG
176 printf("packet flooded.\n");
177 #endif
180 static void
181 forward_install(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in, uint16_t out_port)
183 struct ether_header *eh;
184 struct of10_flow_mod *msg_mod;
185 struct of10_action_output *action;
187 eh = (struct ether_header *)(p_in->data);
188 msg_mod = (struct of10_flow_mod *)malloc(sizeof(struct of10_flow_mod) + sizeof(struct of10_action_output));
189 if (msg_mod == NULL)
190 return;
191 /* header */
192 msg_mod->hdr.version = OFP_VERSION_10;
193 msg_mod->hdr.type = OFPT10_FLOW_MOD;
194 msg_mod->hdr.length = htons(sizeof(struct of10_flow_mod) + sizeof(struct of10_action_output));
195 msg_mod->hdr.xid = 0;
196 /* match */
197 msg_mod->match.wildcards = htonl(OFPFW10_ALL & ~(OFPFW10_DL_DST));
198 memcpy(msg_mod->match.dl_dst, eh->ether_dhost, OFP10_ETH_ALEN);
199 /* others */
200 msg_mod->cookie = 0;
201 msg_mod->command = htons(OFPFC10_ADD);
202 msg_mod->idle_timeout = htons(ttl_idle);
203 msg_mod->hard_timeout = htons(ttl_hard);
204 msg_mod->priority = htons(0xffff);
205 msg_mod->buffer_id = p_in->buffer_id;
206 msg_mod->out_port = out_port; /* already in net byte order */
207 msg_mod->flags = 0;
208 /* action */
209 action = (struct of10_action_output *)(msg_mod->actions);
210 action->type = htons(OFPAT10_OUTPUT);
211 action->len = htons(sizeof(struct of10_action_output));
212 action->port = out_port; /* already in net byte order */
213 action->max_len = 0;
214 ctl->send(ctl, dp, (struct ofp_header *)msg_mod);
215 free(msg_mod);
218 static void
219 rewrite_install(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in, struct server *srv, uint16_t out_port)
221 struct ether_header *eh;
222 struct of10_flow_mod *msg_mod;
223 struct of10_action_nw_addr *action0;
224 struct of10_action_dl_addr *action1;
225 struct of10_action_output *action2;
227 eh = (struct ether_header *)(p_in->data);
228 msg_mod = (struct of10_flow_mod *)malloc(sizeof(struct of10_flow_mod) + 3 * sizeof(struct of10_action_output));
229 if (msg_mod == NULL)
230 return;
232 /* source -> server */
233 /* header */
234 msg_mod->hdr.version = OFP_VERSION_10;
235 msg_mod->hdr.type = OFPT10_FLOW_MOD;
236 msg_mod->hdr.length = htons(sizeof(struct of10_flow_mod) + sizeof(struct of10_action_nw_addr) +
237 sizeof(struct of10_action_dl_addr) + sizeof(struct of10_action_output));
238 msg_mod->hdr.xid = 0;
239 /* match */
240 msg_mod->match.wildcards = htonl(OFPFW10_ALL & ~(OFPFW10_DL_SRC | OFPFW10_DL_DST));
241 memcpy(msg_mod->match.dl_src, eh->ether_shost, OFP10_ETH_ALEN);
242 memcpy(msg_mod->match.dl_dst, eh->ether_dhost, OFP10_ETH_ALEN);
243 /* others */
244 msg_mod->cookie = 0;
245 msg_mod->command = htons(OFPFC10_ADD);
246 msg_mod->idle_timeout = htons(ttl_idle);
247 msg_mod->hard_timeout = htons(ttl_hard);
248 msg_mod->priority = htons(0xffff);
249 msg_mod->buffer_id = p_in->buffer_id;
250 msg_mod->out_port = out_port; /* already in net byte order */
251 msg_mod->flags = 0;
252 /* actions */
253 /* rewrite ip address */
254 action0 = (struct of10_action_nw_addr *)(msg_mod->actions);
255 action0->type = htons(OFPAT10_SET_NW_DST);
256 action0->len = htons(sizeof(struct of10_action_nw_addr));
257 action0->nw_addr = srv->ip_addr.s_addr; /* already in net byte order */
258 /* rewrite ether address */
259 action1 = (struct of10_action_dl_addr *)(++action0);
260 action1->type = htons(OFPAT10_SET_DL_DST);
261 action1->len = htons(sizeof(struct of10_action_dl_addr));
262 memcpy(&action1->dl_addr, &srv->mac_addr, OFP10_ETH_ALEN);
263 /* forward */
264 action2 = (struct of10_action_output *)(++action1);
265 action2->type = htons(OFPAT10_OUTPUT);
266 action2->len = htons(sizeof(struct of10_action_output));
267 action2->port = out_port; /* already in net byte order */
268 action2->max_len = 0;
269 /* send flow mod message */
270 ctl->send(ctl, dp, (struct ofp_header *)msg_mod);
271 #ifdef DEBUG
272 printf("[LB] rewrite and install (1).\n");
273 #endif
275 /* server -> source */
276 /* reuse msg_mod */
277 memcpy(msg_mod->match.dl_src, &srv->mac_addr, OFP10_ETH_ALEN);
278 memcpy(msg_mod->match.dl_dst, eh->ether_shost, OFP10_ETH_ALEN);
279 msg_mod->buffer_id = htonl(-1);
280 /* rewrite ip address */
281 action0 = (struct of10_action_nw_addr *)(msg_mod->actions);
282 action0->type = htons(OFPAT10_SET_NW_SRC);
283 action0->nw_addr = vserver_ip.s_addr; /* already in net byte order */
284 /* rewrite ether address */
285 action1 = (struct of10_action_dl_addr *)(++action0);
286 action1->type = htons(OFPAT10_SET_DL_SRC);
287 memcpy(&action1->dl_addr, &vserver_mac, OFP10_ETH_ALEN);
288 /* forward */
289 action2 = (struct of10_action_output *)(++action1);
290 action2->port = p_in->in_port;
291 /* send flow mod message */
292 ctl->send(ctl, dp, (struct ofp_header *)msg_mod);
293 free(msg_mod);
294 #ifdef DEBUG
295 printf("[LB] rewrite and install (2).\n");
296 #endif
299 static void
300 loadbalance(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in)
302 uint64_t least_load;
303 uint16_t llsrv_port;
304 struct server *srv, *llsrv = NULL;
306 if (!n_servers)
307 return;
309 /* find least-loaded server */
310 srv = SLIST_FIRST(&server_list);
311 least_load = srv->load;
312 SLIST_FOREACH(srv, &server_list, next) {
313 printf("[LB] server %s -> load is %lld bytes/sec (status is %d).\n", inet_ntoa(srv->ip_addr), srv->load, srv->ip_resolved);
314 if (!srv->ip_resolved)
315 continue; /* next server */
316 if ((llsrv == NULL) || (srv->load < least_load)) {
317 least_load = srv->load;
318 llsrv = srv;
321 if (llsrv == NULL)
322 return; /* TODO: drop packet */
323 done:
324 printf("[LB] least-loaded server is %s (with load = %lld bytes/sec).\n", inet_ntoa(llsrv->ip_addr), llsrv->load);
325 if (!lookup(dp, &llsrv->mac_addr, &llsrv_port))
326 llsrv_port = htons(OFPP10_FLOOD);
327 printf("[LB] least-loaded server's port is %hu.\n", ntohs(llsrv_port));
328 rewrite_install(ctl, dp, p_in, llsrv, llsrv_port);
331 static void
332 learn_servers(struct ether_arp *arp)
334 struct ether_addr *arp_sha;
335 struct in_addr *arp_spa;
336 struct server *srv;
338 arp_sha = (struct ether_addr *)arp->arp_sha;
339 arp_spa = (struct in_addr *)arp->arp_spa;
341 /* check it's one of the servers (L3 learning) */
342 SLIST_FOREACH(srv, &server_list, next) {
343 if (arp_spa->s_addr == srv->ip_addr.s_addr) {
344 memcpy(&srv->mac_addr, arp_sha, ETHER_ADDR_LEN);
345 srv->ip_resolved = 1;
346 printf("learned (L3) that server %s is at %s.\n", inet_ntoa(srv->ip_addr), ether_ntoa(&srv->mac_addr));
347 break;
352 static void
353 handle_arp(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *ph_in)
355 struct ether_header *eh_in, *eh_out;
356 struct ether_arp *arp_in, *arp_out;
357 struct of10_packet_out *msg_out;
358 struct of10_action_output *action;
359 struct in_addr *arp_in_tpa;
360 uint8_t *p;
362 eh_in = (struct ether_header *)(ph_in->data);
363 arp_in = (struct ether_arp *)(ph_in->data + ETHER_HDR_LEN);
365 /* check if it's valid or not */
366 if (!(arp_in->ea_hdr.ar_hrd == htons(ARPHRD_ETHER)) ||
367 !(arp_in->ea_hdr.ar_pro == htons(ETHERTYPE_IP)))
368 return;
369 /* check if it's an arp reply */
370 if (arp_in->ea_hdr.ar_op == htons(ARPOP_REPLY)) {
371 printf("[ARP] %u.%u.%u.%u is at %02x:%02x:%02x:%02x:%02x:%02x\n",
372 arp_in->arp_spa[0], arp_in->arp_spa[1], arp_in->arp_spa[2], arp_in->arp_spa[3],
373 arp_in->arp_sha[0], arp_in->arp_sha[1], arp_in->arp_sha[2], arp_in->arp_sha[3],
374 arp_in->arp_sha[4], arp_in->arp_sha[5]);
375 learn_servers(arp_in); /* L3 learn about the servers */
376 flood(ctl, dp, ph_in); /* XXX: lookup() */
377 return;
380 /* now, ensure it's an arp request */
381 if (arp_in->ea_hdr.ar_op != htons(ARPOP_REQUEST))
382 return;
383 /* okay */
384 printf("[ARP] who has %u.%u.%u.%u tell %u.%u.%u.%u\n",
385 arp_in->arp_tpa[0], arp_in->arp_tpa[1], arp_in->arp_tpa[2], arp_in->arp_tpa[3],
386 arp_in->arp_spa[0], arp_in->arp_spa[1], arp_in->arp_spa[2], arp_in->arp_spa[3]);
388 /* check if it's destined to vserver or not */
389 arp_in_tpa = (struct in_addr *)arp_in->arp_tpa;
390 if (arp_in_tpa->s_addr != vserver_ip.s_addr) {
391 printf("[ARP] arp is not for a load balanced host.\n");
392 flood(ctl, dp, ph_in);
393 return;
395 printf("[ARP] arp is for a load balanced host.\n");
397 /* fake arp response */
398 msg_out = (struct of10_packet_out *)
399 malloc(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output)
400 + ETHER_HDR_LEN + sizeof(struct ether_arp));
401 if (msg_out == NULL)
402 return;
404 eh_out = (struct ether_header *)(msg_out + sizeof(struct of10_packet_out) +
405 sizeof(struct of10_action_output));
406 arp_out = (struct ether_arp *)(msg_out + sizeof(struct of10_packet_out) +
407 sizeof(struct of10_action_output) + ETHER_HDR_LEN);
410 p = (uint8_t *)(msg_out) + sizeof(struct of10_packet_out) +
411 sizeof(struct of10_action_output);
412 eh_out = (struct ether_header *)p;
413 p += ETHER_HDR_LEN;
414 arp_out = (struct ether_arp *)p;
416 /* ether_arp */
417 arp_out->ea_hdr.ar_hrd = htons(ARPHRD_ETHER);
418 arp_out->ea_hdr.ar_pro = htons(ETHERTYPE_IP);
419 arp_out->ea_hdr.ar_op = htons(ARPOP_REPLY);
420 arp_out->ea_hdr.ar_hln = arp_in->ea_hdr.ar_hln;
421 arp_out->ea_hdr.ar_pln = arp_in->ea_hdr.ar_pln;
422 memcpy(arp_out->arp_tha, arp_in->arp_sha, sizeof(arp_out->arp_tha));
423 memcpy(arp_out->arp_tpa, arp_in->arp_spa, sizeof(arp_out->arp_tpa));
424 memcpy(arp_out->arp_sha, &vserver_mac, sizeof(arp_out->arp_sha));
425 memcpy(arp_out->arp_spa, &vserver_ip, sizeof(arp_out->arp_spa));
426 /* ether_header */
427 eh_out->ether_type = ntohs(ETHERTYPE_ARP);
428 memcpy(eh_out->ether_dhost, eh_in->ether_shost, sizeof(eh_out->ether_dhost));
429 memcpy(eh_out->ether_shost, &vserver_mac, sizeof(eh_out->ether_shost));
430 /* packet_out */
431 msg_out->hdr.version = OFP_VERSION_10;
432 msg_out->hdr.type = OFPT10_PACKET_OUT;
433 msg_out->hdr.length = htons(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output) + ETHER_HDR_LEN + sizeof(struct ether_arp));
434 msg_out->hdr.xid = 0;
435 msg_out->buffer_id = htonl(-1);
436 msg_out->in_port = ph_in->in_port;
437 msg_out->actions_len = htons(sizeof(struct of10_action_output));
438 action = (struct of10_action_output *)(msg_out->actions);
439 action->type = htons(OFPAT10_OUTPUT);
440 action->len = htons(sizeof(struct of10_action_output));
441 action->port = htons(OFPP10_IN_PORT);
442 action->max_len = 0;
443 ctl->send(ctl, dp, (struct ofp_header *)msg_out);
444 free(msg_out);
447 static void
448 handle_ip(struct of_controller *ctl, struct of_dataplane *dp, struct of10_packet_in *p_in)
450 struct ether_header *eh_in;
451 struct ip *ip_in;
452 uint16_t out_port;
454 eh_in = (struct ether_header *)(p_in->data);
455 ip_in = (struct ether_arp *)(p_in->data + ETHER_HDR_LEN);
457 char *x = strdup(inet_ntoa(ip_in->ip_src));
458 char *y = strdup(inet_ntoa(ip_in->ip_dst));
459 printf("[IP] from %s to %s\n", x, y);
460 free(x);
461 free(y);
463 /* check if it's destined to vserver or not */
464 if (ip_in->ip_dst.s_addr == vserver_ip.s_addr) {
465 printf("[IP] packet is for a load balanced host.\n");
466 loadbalance(ctl, dp, p_in);
467 return;
470 if (lookup(dp, (struct ether_addr *)eh_in->ether_dhost, &out_port)) {
471 printf("[IP] forward (port = %hu) and install flow rule.\n", ntohs(out_port));
472 forward_install(ctl, dp, p_in, out_port);
474 else{
475 printf("[IP] flood (%s not found).\n", ether_ntoa(eh_in->ether_dhost));
476 flood(ctl, dp, p_in);
480 static void
481 handle_stats(struct of_controller *ctl, struct of_dataplane *dp, struct ofp_header *msg)
483 int i, n;
484 struct of10_stats_reply *sts;
485 struct of10_port_stats *psts;
486 struct server *srv;
487 struct host_info **hosts;
488 time_t now;
489 char *loadstr;
492 sts = (struct of10_stats_reply *)msg;
493 #ifdef DEBUG
494 printf("[DEBUG] stats received type=0x%x, size=0x%x.\n", ntohs(sts->type), ntohs(sts->hdr.length));
495 #endif
496 /* process only port stats for now */
497 if (ntohs(sts->type) != OFPST10_PORT)
498 return;
499 psts = (struct of10_flow_stats *)sts->body;
500 if (psts == NULL)
501 return;
502 #ifdef DEBUG
503 printf("[DEBUG] dpid=0x%016llx,port=%u load is now %llu bytes.\n", DPID(dp), ntohs(psts->port_no), be64toh(psts->tx_bytes));
504 #endif
505 /* read static topology information */
506 ctx->topo(ctx, &hosts, &n);
507 for (i = 0 ; i < n ; i++) {
508 if ((DPID(dp) == hosts[i]->dpid) && (ntohs(psts->port_no) == hosts[i]->port)) {
509 SLIST_FOREACH(srv, &server_list, next) {
510 if (srv->ip_addr.s_addr == hosts[i]->ip_addr.s_addr) {
511 now = time(NULL);
512 srv->load = (uint64_t)(be64toh(psts->tx_bytes) - srv->load_bytes) / (now - srv->load_ts);
513 srv->load_bytes = (uint64_t)be64toh(psts->tx_bytes);
514 srv->load_ts = now;
515 printf("[INFO] server %s load is now %llu bytes/sec.\n", inet_ntoa(srv->ip_addr), srv->load);
516 break;
522 (void)asprintf(&loadstr, "%llu", srv->load);
523 if (!loadstr)
524 errx(1, "failed to allocate memory");
525 n = ctx->put(ctx, inet_ntoa(srv->ip_addr), loadstr);
526 if (!n)
527 printf("[INSERT] put(%s, %s) failed.\n", inet_ntoa(srv->ip_addr), loadstr);
528 else
529 printf("[INSERT] put(%s, %s) succeeded.\n", inet_ntoa(srv->ip_addr), loadstr);
530 free(loadstr);
533 static void
534 handle_packet_in(struct of_controller *ctl, struct of_dataplane *dp, struct ofp_header *msg_in)
536 struct ether_header *eh;
537 struct of10_packet_in *p_in;
540 p_in = (struct of10_packet_in *)msg_in;
541 eh = (struct ether_header *)(p_in->data);
543 #ifdef DEBUG
544 printf("in_port: %hu, ", ntohs(p_in->in_port));
545 printf("%02x:%02x:%02x:%02x:%02x:%02x ->", eh->ether_shost[0], eh->ether_shost[1],
546 eh->ether_shost[2], eh->ether_shost[3], eh->ether_shost[4],
547 eh->ether_shost[5]);
548 printf(" %02x:%02x:%02x:%02x:%02x:%02x, ", eh->ether_dhost[0], eh->ether_dhost[1],
549 eh->ether_dhost[2], eh->ether_dhost[3], eh->ether_dhost[4],
550 eh->ether_dhost[5]);
551 printf("type: 0x%hx ", ntohs(eh->ether_type));
552 if (ntohs(eh->ether_type) == ETHERTYPE_ARP)
553 printf("[ARP]");
554 else if (ntohs(eh->ether_type) == ETHERTYPE_IP)
555 printf("[IP4]");
556 printf("\n");
557 printf("packet_in reason: 0x%x\n", (unsigned int)p_in->reason);
558 #endif
560 /* learn */
561 learn(dp, p_in);
563 /* check if it's an arp packet */
564 if (ntohs(eh->ether_type) == ETHERTYPE_ARP)
565 handle_arp(ctl, dp, p_in);
566 /* check if it's an ip packet */
567 else if (ntohs(eh->ether_type) == ETHERTYPE_IP)
568 handle_ip(ctl, dp, p_in);
571 static void
572 handle_ofp_messages(struct of_controller *ctl, struct of_dataplane *dp, struct ofp_header *msg_in)
574 printf("ofp_messages [dpid = 0x%016llx, type=0x%x].\n", DPID(dp), (unsigned int)msg_in->type);
575 if (msg_in->type == OFPT10_PACKET_IN)
576 handle_packet_in(ctl, dp, msg_in);
577 else if (msg_in->type == OFPT10_STATS_REPLY)
578 handle_stats(ctl, dp, msg_in);
581 static struct of10_packet_out *
582 arp_request(struct in_addr addr)
584 struct ether_header *eh;
585 struct ether_arp *arp;
586 struct of10_packet_out *msg;
587 struct of10_action_output *action;
588 uint8_t *p;
590 msg = (struct of10_packet_out *)
591 malloc(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output)
592 + ETHER_HDR_LEN + sizeof(struct ether_arp));
593 if (msg == NULL)
594 return NULL;
596 p = (uint8_t *)(msg) + sizeof(struct of10_packet_out) +
597 sizeof(struct of10_action_output);
598 eh = (struct ether_header *)p;
599 p += ETHER_HDR_LEN;
600 arp = (struct ether_arp *)p;
602 /* ether_arp */
603 arp->ea_hdr.ar_hrd = htons(ARPHRD_ETHER);
604 arp->ea_hdr.ar_pro = htons(ETHERTYPE_IP);
605 arp->ea_hdr.ar_op = htons(ARPOP_REQUEST);
606 arp->ea_hdr.ar_hln = ETHER_ADDR_LEN;
607 arp->ea_hdr.ar_pln = sizeof(in_addr_t);
608 memset(arp->arp_tha, 0xff, ETHER_ADDR_LEN);
609 memcpy(arp->arp_tpa, &addr.s_addr, sizeof(arp->arp_tpa));
610 memcpy(arp->arp_sha, &vserver_mac, ETHER_ADDR_LEN);
611 memcpy(arp->arp_spa, &vserver_ip.s_addr, sizeof(arp->arp_spa));
612 /* ether_header */
613 eh->ether_type = ntohs(ETHERTYPE_ARP);
614 memset(eh->ether_dhost, 0xff, ETHER_ADDR_LEN);
615 memcpy(eh->ether_shost, &vserver_mac, ETHER_ADDR_LEN);
616 /* packet_out */
617 msg->hdr.version = OFP_VERSION_10;
618 msg->hdr.type = OFPT10_PACKET_OUT;
619 msg->hdr.length = htons(sizeof(struct of10_packet_out) + sizeof(struct of10_action_output) + ETHER_HDR_LEN + sizeof(struct ether_arp));
620 msg->hdr.xid = 0;
621 msg->buffer_id = htonl(-1);
622 msg->in_port = 0xffff; /* XXX: OFPP10_NONE */
623 msg->actions_len = htons(sizeof(struct of10_action_output));
624 action = (struct of10_action_output *)(msg->actions);
625 action->type = htons(OFPAT10_OUTPUT);
626 action->len = htons(sizeof(struct of10_action_output));
627 action->port = htons(OFPP10_FLOOD);
628 action->max_len = 0;
629 return msg;
632 static void
633 vserver_stats_request(struct of_controller *ctl, struct of_dataplane *dp, uint16_t port)
635 struct of10_stats_request *msg;
636 struct of10_port_stats_request *body;
638 msg = (struct of10_stats_request *)malloc(sizeof(struct of10_stats_request) + sizeof(struct of10_port_stats_request));
639 if (msg == NULL)
640 return;
641 /* header */
642 msg->hdr.version = OFP_VERSION_10;
643 msg->hdr.type = OFPT10_STATS_REQUEST;
644 msg->hdr.length = htons(sizeof(struct of10_stats_request) + sizeof(struct of10_port_stats_request));
645 msg->hdr.xid = 0;
646 /* type */
647 msg->type = htons(OFPST10_PORT);
648 /* flags */
649 msg->flags = 0;
650 /* body */
651 body = (struct of10_port_stats_request *)(msg->body);
652 /* port */
653 body->port_no = htons(port);
654 /* send message */
655 ctl->send(ctl, dp, (struct ofp_header *)msg);
656 free(msg);
659 static struct of_dataplane *
660 dpid2ptr(uint64_t dpid)
662 struct dp_ptr *dptr;
664 SLIST_FOREACH(dptr, &dataplane_list, next)
665 if ((dptr->dp) && (DPID(dptr->dp) == dpid))
666 return dptr->dp;
667 return NULL;
670 static void
671 poll(struct of_controller *ctl)
673 char *loadstr;
674 int i, n, ret, local;
675 struct host_info **hosts;
676 struct of_dataplane *dp;
677 struct server *srv;
680 /* read static topology information */
681 ctx->topo(ctx, &hosts, &n);
683 /* XXX: poll local servers only */
684 SLIST_FOREACH(srv, &server_list, next) {
685 if (!srv->ip_resolved)
686 continue;
687 local = 0;
688 for (i = 0 ; i < n ; i++) {
689 if (srv->ip_addr.s_addr == hosts[i]->ip_addr.s_addr) {
690 local = 1;
691 dp = dpid2ptr(hosts[i]->dpid);
692 if (!dp)
693 continue;
694 #ifdef DEBUG
695 printf("[DEBUG] poll(): server %s at 0x%016llx.\n", inet_ntoa(srv->ip_addr), DPID(dp));
696 #endif
697 vserver_stats_request(ctl, dp, hosts[i]->port);
698 break; /* XXX: support multi-connections */
701 /* check dht for remote servers */
702 if (!local) {
703 ret = ctx->get(ctx, inet_ntoa(srv->ip_addr), &loadstr);
704 if (!ret) {
705 printf("[QUERY] get(%s) failed.\n", inet_ntoa(srv->ip_addr));
707 else {
708 printf("[QUERY] get(%s) = %s succeeded.\n", inet_ntoa(srv->ip_addr), loadstr);
709 sscanf(loadstr, "%llu", &srv->load);
715 static void
716 handle_connection_up(struct of_controller *ctl, struct of_dataplane *dp)
718 struct ofp_header *msg;
719 struct server *srv;
720 struct dp_ptr *dptr;
721 uint64_t dpid;
723 dpid = DPID(dp);
724 printf("[INFO] connection_up [dpid = 0x%016llx].\n", dpid);
726 if (auto_arp) {
727 printf("[INFO] auto_arp is ON.\n");
728 SLIST_FOREACH(srv, &server_list, next) {
729 printf("[INFO] learning about server %s.\n", inet_ntoa(srv->ip_addr));
730 msg = arp_request(srv->ip_addr);
731 ctl->send(ctl, dp, (struct ofp_header *)msg);
732 free(msg);
735 dptr = (struct dp_ptr *)malloc(sizeof(struct dp_ptr));
736 dptr->dp = dp;
737 SLIST_INSERT_HEAD(&dataplane_list, dptr, next);
738 printf("[INFO] switch [dpid = 0x%016llx] is ready.\n", dpid);
742 init(struct actl_ctx *c)
744 ctx = c;
745 printf("l3_lb initialized.\n");
746 return 0;
750 main(int argc, char **argv)
752 int i, ch, poll_period = DEFAULT_POLL_PERIOD;
753 double tar_kpi;
754 const char *errstr;
755 struct ether_addr *ea;
756 struct server *srv;
759 #ifdef DEBUG
760 for (i = 0 ; i < argc ; i++) {
761 printf("arg[%d] = %s.\n", i, argv[i]);
763 #endif
765 SLIST_INIT(&dataplane_list);
766 SLIST_INIT(&server_list);
767 while ((ch = getopt_long(argc, argv, "i:m:s:a:p:k:t:h:", longopts, NULL)) != -1) {
768 switch (ch) {
769 case 'i':
770 if (!inet_aton(optarg, &vserver_ip))
771 errx(1, "invalid IP address");
772 break;
773 case 'm':
774 if ((ea = ether_aton(optarg)) == NULL)
775 errx(1, "invalid MAC address");
776 vserver_mac = *ea;
777 break;
778 case 's':
779 srv = (struct server *)malloc(sizeof(struct server));
780 if (!inet_aton(optarg, &srv->ip_addr))
781 errx(1, "invalid IP address");
782 srv->ip_resolved = 0;
783 srv->load = 0;
784 srv->load_bytes = 0;
785 srv->load_ts = time(NULL);
786 SLIST_INSERT_HEAD(&server_list, srv, next);
787 ++n_servers;
788 break;
789 case 'a':
790 if (!strcmp(optarg, "yes"))
791 auto_arp = 1;
792 else if (!strcmp(optarg, "no"))
793 auto_arp = 0;
794 else
795 errx(1, "incorrect option for argument '-a'");
796 break;
797 case 'p':
798 poll_period = strtonum(optarg, 1, 60, &errstr);
799 if (errstr != NULL)
800 errx(1, "invalid polling period %s: %s", errstr, optarg);
801 break;
802 case 'k':
803 tar_kpi = (double)strtonum(optarg, 1, 100000, &errstr);
804 if (errstr)
805 errx(1, "invalid target KPI %s:%s", errstr, optarg);
806 ctx->adapt(ctx, tar_kpi);
807 break;
808 case 't':
809 ttl_idle = (uint16_t)strtonum(optarg, 0, 1024, &errstr);
810 if (errstr)
811 errx(1, "flow idle ttl out-of-range %s:%s", errstr, optarg);
812 break;
813 case 'h':
814 ttl_hard = (uint16_t)strtonum(optarg, 0, 1024, &errstr);
815 if (errstr)
816 errx(1, "flow hard ttl out-of-range %s:%s", errstr, optarg);
817 break;
819 /* TODO: default -> usage() */
821 argc -= optind;
822 argv += optind;
824 /* initialize the mactable */
825 if (!hashtab_init(&mactable, 12, NULL))
826 return 1;
828 printf("Virtual Server IP: %s\n", inet_ntoa(vserver_ip));
829 printf("Virtual Server MAC: %s\n", ether_ntoa(&vserver_mac));
830 printf("Number of Physical Servers = %d.\n", n_servers);
831 i = 0;
832 SLIST_FOREACH(srv, &server_list, next)
833 printf("Server%d IP: %s, MAC: %s\n", ++i, inet_ntoa(srv->ip_addr), ether_ntoa(&srv->mac_addr));
834 printf("Target KPI = %lf.\n", tar_kpi);
835 printf("TTL IDLE = %hu, HARD = %hu.\n", ttl_idle, ttl_hard);
837 printf("L3 LB initialized.\n");
839 /* polling timer */
840 ctx->cntl->timer(ctx->cntl, poll_period * 1000, poll);
842 return 0;
845 void
846 handler(struct of_event *ev)
848 #ifdef DEBUG
849 printf("l3_lb_handler.\n");
850 #endif
851 if (ev->type == OFEV_CONNECTION_UP)
852 handle_connection_up(ctx->cntl, ev->dp);
853 else if (ev->type == OFEV_PROTO_MESSAGE)
854 handle_ofp_messages(ctx->cntl, ev->dp, ev->ofp_hdr);
857 double
858 kpi()
860 double avg, stddev = 0;
861 int n = 0;
862 struct server *srv;
863 uint64_t load = 0;
865 #ifdef DEBUG
866 printf("l3_lb_kpi.\n");
867 #endif
868 if (!n_servers)
869 return 0;
871 /* mean */
872 srv = SLIST_FIRST(&server_list);
873 SLIST_FOREACH(srv, &server_list, next) {
874 if (!srv->ip_resolved)
875 continue;
876 load += srv->load;
877 n++;
879 if (!n)
880 return 0;
881 avg = load / n;
882 /* stddev */
883 srv = SLIST_FIRST(&server_list);
884 SLIST_FOREACH(srv, &server_list, next) {
885 if (!srv->ip_resolved)
886 continue;
887 stddev += (srv->load - avg) * (srv->load - avg);
889 stddev /= n;
890 stddev = sqrt(stddev);
891 #ifdef DEBUG
892 printf("l3_lb_kpi: KPI = %lf.\n", stddev);
893 #endif
894 return stddev;