[PATCH] w1: fix CRC calculation on bigendian platforms.
[linux-2.6/verdex.git] / net / ipv4 / fib_trie.c
blob4be234c7d8c3c7b9838691c2c40e41379925e27d
1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
15 * This work is based on the LPC-trie which is originally descibed in:
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
28 * Code from fib_hash has been reused which includes the following header:
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
35 * IPv4 FIB: lookup engine and maintenance routines.
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
46 #define VERSION "0.325"
48 #include <linux/config.h>
49 #include <asm/uaccess.h>
50 #include <asm/system.h>
51 #include <asm/bitops.h>
52 #include <linux/types.h>
53 #include <linux/kernel.h>
54 #include <linux/sched.h>
55 #include <linux/mm.h>
56 #include <linux/string.h>
57 #include <linux/socket.h>
58 #include <linux/sockios.h>
59 #include <linux/errno.h>
60 #include <linux/in.h>
61 #include <linux/inet.h>
62 #include <linux/netdevice.h>
63 #include <linux/if_arp.h>
64 #include <linux/proc_fs.h>
65 #include <linux/skbuff.h>
66 #include <linux/netlink.h>
67 #include <linux/init.h>
68 #include <linux/list.h>
69 #include <net/ip.h>
70 #include <net/protocol.h>
71 #include <net/route.h>
72 #include <net/tcp.h>
73 #include <net/sock.h>
74 #include <net/ip_fib.h>
75 #include "fib_lookup.h"
77 #undef CONFIG_IP_FIB_TRIE_STATS
78 #define MAX_CHILDS 16384
80 #define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81 #define KEYLENGTH (8*sizeof(t_key))
82 #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83 #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
85 static DEFINE_RWLOCK(fib_lock);
87 typedef unsigned int t_key;
89 #define T_TNODE 0
90 #define T_LEAF 1
91 #define NODE_TYPE_MASK 0x1UL
92 #define NODE_PARENT(_node) \
93 ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
94 #define NODE_SET_PARENT(_node, _ptr) \
95 ((_node)->_parent = (((unsigned long)(_ptr)) | \
96 ((_node)->_parent & NODE_TYPE_MASK)))
97 #define NODE_INIT_PARENT(_node, _type) \
98 ((_node)->_parent = (_type))
99 #define NODE_TYPE(_node) \
100 ((_node)->_parent & NODE_TYPE_MASK)
102 #define IS_TNODE(n) (!(n->_parent & T_LEAF))
103 #define IS_LEAF(n) (n->_parent & T_LEAF)
105 struct node {
106 t_key key;
107 unsigned long _parent;
110 struct leaf {
111 t_key key;
112 unsigned long _parent;
113 struct hlist_head list;
116 struct leaf_info {
117 struct hlist_node hlist;
118 int plen;
119 struct list_head falh;
122 struct tnode {
123 t_key key;
124 unsigned long _parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
132 #ifdef CONFIG_IP_FIB_TRIE_STATS
133 struct trie_use_stats {
134 unsigned int gets;
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
139 unsigned int resize_node_skipped;
141 #endif
143 struct trie_stat {
144 unsigned int totdepth;
145 unsigned int maxdepth;
146 unsigned int tnodes;
147 unsigned int leaves;
148 unsigned int nullpointers;
149 unsigned int nodesizes[MAX_CHILDS];
152 struct trie {
153 struct node *trie;
154 #ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156 #endif
157 int size;
158 unsigned int revision;
161 static int trie_debug = 0;
163 static int tnode_full(struct tnode *tn, struct node *n);
164 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
165 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
166 static int tnode_child_length(struct tnode *tn);
167 static struct node *resize(struct trie *t, struct tnode *tn);
168 static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
169 static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
170 static void tnode_free(struct tnode *tn);
171 static void trie_dump_seq(struct seq_file *seq, struct trie *t);
172 extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
173 extern int fib_detect_death(struct fib_info *fi, int order,
174 struct fib_info **last_resort, int *last_idx, int *dflt);
176 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
177 struct nlmsghdr *n, struct netlink_skb_parms *req);
179 static kmem_cache_t *fn_alias_kmem;
180 static struct trie *trie_local = NULL, *trie_main = NULL;
182 static void trie_bug(char *err)
184 printk("Trie Bug: %s\n", err);
185 BUG();
188 static inline struct node *tnode_get_child(struct tnode *tn, int i)
190 if (i >= 1<<tn->bits)
191 trie_bug("tnode_get_child");
193 return tn->child[i];
196 static inline int tnode_child_length(struct tnode *tn)
198 return 1<<tn->bits;
202 _________________________________________________________________
203 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
204 ----------------------------------------------------------------
205 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
207 _________________________________________________________________
208 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
209 -----------------------------------------------------------------
210 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
212 tp->pos = 7
213 tp->bits = 3
214 n->pos = 15
215 n->bits=4
216 KEYLENGTH=32
219 static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
221 if (offset < KEYLENGTH)
222 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
223 else
224 return 0;
227 static inline int tkey_equals(t_key a, t_key b)
229 return a == b;
232 static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
234 if (bits == 0 || offset >= KEYLENGTH)
235 return 1;
236 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
237 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
240 static inline int tkey_mismatch(t_key a, int offset, t_key b)
242 t_key diff = a ^ b;
243 int i = offset;
245 if(!diff)
246 return 0;
247 while((diff << i) >> (KEYLENGTH-1) == 0)
248 i++;
249 return i;
252 /* Candiate for fib_semantics */
254 static void fn_free_alias(struct fib_alias *fa)
256 fib_release_info(fa->fa_info);
257 kmem_cache_free(fn_alias_kmem, fa);
261 To understand this stuff, an understanding of keys and all their bits is
262 necessary. Every node in the trie has a key associated with it, but not
263 all of the bits in that key are significant.
265 Consider a node 'n' and its parent 'tp'.
267 If n is a leaf, every bit in its key is significant. Its presence is
268 necessitaded by path compression, since during a tree traversal (when
269 searching for a leaf - unless we are doing an insertion) we will completely
270 ignore all skipped bits we encounter. Thus we need to verify, at the end of
271 a potentially successful search, that we have indeed been walking the
272 correct key path.
274 Note that we can never "miss" the correct key in the tree if present by
275 following the wrong path. Path compression ensures that segments of the key
276 that are the same for all keys with a given prefix are skipped, but the
277 skipped part *is* identical for each node in the subtrie below the skipped
278 bit! trie_insert() in this implementation takes care of that - note the
279 call to tkey_sub_equals() in trie_insert().
281 if n is an internal node - a 'tnode' here, the various parts of its key
282 have many different meanings.
284 Example:
285 _________________________________________________________________
286 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
287 -----------------------------------------------------------------
288 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
290 _________________________________________________________________
291 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
292 -----------------------------------------------------------------
293 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
295 tp->pos = 7
296 tp->bits = 3
297 n->pos = 15
298 n->bits=4
300 First, let's just ignore the bits that come before the parent tp, that is
301 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
302 not use them for anything.
304 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
305 index into the parent's child array. That is, they will be used to find
306 'n' among tp's children.
308 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
309 for the node n.
311 All the bits we have seen so far are significant to the node n. The rest
312 of the bits are really not needed or indeed known in n->key.
314 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
315 n's child array, and will of course be different for each child.
317 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
318 at this point.
322 static void check_tnode(struct tnode *tn)
324 if(tn && tn->pos+tn->bits > 32) {
325 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
329 static int halve_threshold = 25;
330 static int inflate_threshold = 50;
332 static struct leaf *leaf_new(void)
334 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
335 if(l) {
336 NODE_INIT_PARENT(l, T_LEAF);
337 INIT_HLIST_HEAD(&l->list);
339 return l;
342 static struct leaf_info *leaf_info_new(int plen)
344 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
345 if(li) {
346 li->plen = plen;
347 INIT_LIST_HEAD(&li->falh);
349 return li;
352 static inline void free_leaf(struct leaf *l)
354 kfree(l);
357 static inline void free_leaf_info(struct leaf_info *li)
359 kfree(li);
362 static struct tnode *tnode_alloc(unsigned int size)
364 if (size <= PAGE_SIZE) {
365 return kmalloc(size, GFP_KERNEL);
366 } else {
367 return (struct tnode *)
368 __get_free_pages(GFP_KERNEL, get_order(size));
372 static void __tnode_free(struct tnode *tn)
374 unsigned int size = sizeof(struct tnode) +
375 (1<<tn->bits) * sizeof(struct node *);
377 if (size <= PAGE_SIZE)
378 kfree(tn);
379 else
380 free_pages((unsigned long)tn, get_order(size));
383 static struct tnode* tnode_new(t_key key, int pos, int bits)
385 int nchildren = 1<<bits;
386 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
387 struct tnode *tn = tnode_alloc(sz);
389 if(tn) {
390 memset(tn, 0, sz);
391 NODE_INIT_PARENT(tn, T_TNODE);
392 tn->pos = pos;
393 tn->bits = bits;
394 tn->key = key;
395 tn->full_children = 0;
396 tn->empty_children = 1<<bits;
398 if(trie_debug > 0)
399 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
400 (unsigned int) (sizeof(struct node) * 1<<bits));
401 return tn;
404 static void tnode_free(struct tnode *tn)
406 if(!tn) {
407 trie_bug("tnode_free\n");
409 if(IS_LEAF(tn)) {
410 free_leaf((struct leaf *)tn);
411 if(trie_debug > 0 )
412 printk("FL %p \n", tn);
414 else if(IS_TNODE(tn)) {
415 __tnode_free(tn);
416 if(trie_debug > 0 )
417 printk("FT %p \n", tn);
419 else {
420 trie_bug("tnode_free\n");
425 * Check whether a tnode 'n' is "full", i.e. it is an internal node
426 * and no bits are skipped. See discussion in dyntree paper p. 6
429 static inline int tnode_full(struct tnode *tn, struct node *n)
431 if(n == NULL || IS_LEAF(n))
432 return 0;
434 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
437 static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
439 tnode_put_child_reorg(tn, i, n, -1);
443 * Add a child at position i overwriting the old value.
444 * Update the value of full_children and empty_children.
447 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
449 struct node *chi;
450 int isfull;
452 if(i >= 1<<tn->bits) {
453 printk("bits=%d, i=%d\n", tn->bits, i);
454 trie_bug("tnode_put_child_reorg bits");
456 write_lock_bh(&fib_lock);
457 chi = tn->child[i];
459 /* update emptyChildren */
460 if (n == NULL && chi != NULL)
461 tn->empty_children++;
462 else if (n != NULL && chi == NULL)
463 tn->empty_children--;
465 /* update fullChildren */
466 if (wasfull == -1)
467 wasfull = tnode_full(tn, chi);
469 isfull = tnode_full(tn, n);
470 if (wasfull && !isfull)
471 tn->full_children--;
473 else if (!wasfull && isfull)
474 tn->full_children++;
475 if(n)
476 NODE_SET_PARENT(n, tn);
478 tn->child[i] = n;
479 write_unlock_bh(&fib_lock);
482 static struct node *resize(struct trie *t, struct tnode *tn)
484 int i;
485 int err = 0;
487 if (!tn)
488 return NULL;
490 if(trie_debug)
491 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
492 tn, inflate_threshold, halve_threshold);
494 /* No children */
495 if (tn->empty_children == tnode_child_length(tn)) {
496 tnode_free(tn);
497 return NULL;
499 /* One child */
500 if (tn->empty_children == tnode_child_length(tn) - 1)
501 for (i = 0; i < tnode_child_length(tn); i++) {
503 write_lock_bh(&fib_lock);
504 if (tn->child[i] != NULL) {
506 /* compress one level */
507 struct node *n = tn->child[i];
508 if(n)
509 NODE_INIT_PARENT(n, NODE_TYPE(n));
511 write_unlock_bh(&fib_lock);
512 tnode_free(tn);
513 return n;
515 write_unlock_bh(&fib_lock);
518 * Double as long as the resulting node has a number of
519 * nonempty nodes that are above the threshold.
523 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
524 * the Helsinki University of Technology and Matti Tikkanen of Nokia
525 * Telecommunications, page 6:
526 * "A node is doubled if the ratio of non-empty children to all
527 * children in the *doubled* node is at least 'high'."
529 * 'high' in this instance is the variable 'inflate_threshold'. It
530 * is expressed as a percentage, so we multiply it with
531 * tnode_child_length() and instead of multiplying by 2 (since the
532 * child array will be doubled by inflate()) and multiplying
533 * the left-hand side by 100 (to handle the percentage thing) we
534 * multiply the left-hand side by 50.
536 * The left-hand side may look a bit weird: tnode_child_length(tn)
537 * - tn->empty_children is of course the number of non-null children
538 * in the current node. tn->full_children is the number of "full"
539 * children, that is non-null tnodes with a skip value of 0.
540 * All of those will be doubled in the resulting inflated tnode, so
541 * we just count them one extra time here.
543 * A clearer way to write this would be:
545 * to_be_doubled = tn->full_children;
546 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
547 * tn->full_children;
549 * new_child_length = tnode_child_length(tn) * 2;
551 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
552 * new_child_length;
553 * if (new_fill_factor >= inflate_threshold)
555 * ...and so on, tho it would mess up the while() loop.
557 * anyway,
558 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
559 * inflate_threshold
561 * avoid a division:
562 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
563 * inflate_threshold * new_child_length
565 * expand not_to_be_doubled and to_be_doubled, and shorten:
566 * 100 * (tnode_child_length(tn) - tn->empty_children +
567 * tn->full_children ) >= inflate_threshold * new_child_length
569 * expand new_child_length:
570 * 100 * (tnode_child_length(tn) - tn->empty_children +
571 * tn->full_children ) >=
572 * inflate_threshold * tnode_child_length(tn) * 2
574 * shorten again:
575 * 50 * (tn->full_children + tnode_child_length(tn) -
576 * tn->empty_children ) >= inflate_threshold *
577 * tnode_child_length(tn)
581 check_tnode(tn);
583 err = 0;
584 while ((tn->full_children > 0 &&
585 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
586 inflate_threshold * tnode_child_length(tn))) {
588 tn = inflate(t, tn, &err);
590 if(err) {
591 #ifdef CONFIG_IP_FIB_TRIE_STATS
592 t->stats.resize_node_skipped++;
593 #endif
594 break;
598 check_tnode(tn);
601 * Halve as long as the number of empty children in this
602 * node is above threshold.
605 err = 0;
606 while (tn->bits > 1 &&
607 100 * (tnode_child_length(tn) - tn->empty_children) <
608 halve_threshold * tnode_child_length(tn)) {
610 tn = halve(t, tn, &err);
612 if(err) {
613 #ifdef CONFIG_IP_FIB_TRIE_STATS
614 t->stats.resize_node_skipped++;
615 #endif
616 break;
621 /* Only one child remains */
623 if (tn->empty_children == tnode_child_length(tn) - 1)
624 for (i = 0; i < tnode_child_length(tn); i++) {
626 write_lock_bh(&fib_lock);
627 if (tn->child[i] != NULL) {
628 /* compress one level */
629 struct node *n = tn->child[i];
631 if(n)
632 NODE_INIT_PARENT(n, NODE_TYPE(n));
634 write_unlock_bh(&fib_lock);
635 tnode_free(tn);
636 return n;
638 write_unlock_bh(&fib_lock);
641 return (struct node *) tn;
644 static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
646 struct tnode *inode;
647 struct tnode *oldtnode = tn;
648 int olen = tnode_child_length(tn);
649 int i;
651 if(trie_debug)
652 printk("In inflate\n");
654 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
656 if (!tn) {
657 *err = -ENOMEM;
658 return oldtnode;
662 * Preallocate and store tnodes before the actual work so we
663 * don't get into an inconsistent state if memory allocation
664 * fails. In case of failure we return the oldnode and inflate
665 * of tnode is ignored.
668 for(i = 0; i < olen; i++) {
669 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
671 if (inode &&
672 IS_TNODE(inode) &&
673 inode->pos == oldtnode->pos + oldtnode->bits &&
674 inode->bits > 1) {
675 struct tnode *left, *right;
677 t_key m = TKEY_GET_MASK(inode->pos, 1);
679 left = tnode_new(inode->key&(~m), inode->pos + 1,
680 inode->bits - 1);
682 if(!left) {
683 *err = -ENOMEM;
684 break;
687 right = tnode_new(inode->key|m, inode->pos + 1,
688 inode->bits - 1);
690 if(!right) {
691 *err = -ENOMEM;
692 break;
695 put_child(t, tn, 2*i, (struct node *) left);
696 put_child(t, tn, 2*i+1, (struct node *) right);
700 if(*err) {
701 int size = tnode_child_length(tn);
702 int j;
704 for(j = 0; j < size; j++)
705 if( tn->child[j])
706 tnode_free((struct tnode *)tn->child[j]);
708 tnode_free(tn);
710 *err = -ENOMEM;
711 return oldtnode;
714 for(i = 0; i < olen; i++) {
715 struct node *node = tnode_get_child(oldtnode, i);
717 /* An empty child */
718 if (node == NULL)
719 continue;
721 /* A leaf or an internal node with skipped bits */
723 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
724 tn->pos + tn->bits - 1) {
725 if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
726 1) == 0)
727 put_child(t, tn, 2*i, node);
728 else
729 put_child(t, tn, 2*i+1, node);
730 continue;
733 /* An internal node with two children */
734 inode = (struct tnode *) node;
736 if (inode->bits == 1) {
737 put_child(t, tn, 2*i, inode->child[0]);
738 put_child(t, tn, 2*i+1, inode->child[1]);
740 tnode_free(inode);
743 /* An internal node with more than two children */
744 else {
745 struct tnode *left, *right;
746 int size, j;
748 /* We will replace this node 'inode' with two new
749 * ones, 'left' and 'right', each with half of the
750 * original children. The two new nodes will have
751 * a position one bit further down the key and this
752 * means that the "significant" part of their keys
753 * (see the discussion near the top of this file)
754 * will differ by one bit, which will be "0" in
755 * left's key and "1" in right's key. Since we are
756 * moving the key position by one step, the bit that
757 * we are moving away from - the bit at position
758 * (inode->pos) - is the one that will differ between
759 * left and right. So... we synthesize that bit in the
760 * two new keys.
761 * The mask 'm' below will be a single "one" bit at
762 * the position (inode->pos)
765 /* Use the old key, but set the new significant
766 * bit to zero.
769 left = (struct tnode *) tnode_get_child(tn, 2*i);
770 put_child(t, tn, 2*i, NULL);
772 if(!left)
773 BUG();
775 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
776 put_child(t, tn, 2*i+1, NULL);
778 if(!right)
779 BUG();
781 size = tnode_child_length(left);
782 for(j = 0; j < size; j++) {
783 put_child(t, left, j, inode->child[j]);
784 put_child(t, right, j, inode->child[j + size]);
786 put_child(t, tn, 2*i, resize(t, left));
787 put_child(t, tn, 2*i+1, resize(t, right));
789 tnode_free(inode);
792 tnode_free(oldtnode);
793 return tn;
796 static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
798 struct tnode *oldtnode = tn;
799 struct node *left, *right;
800 int i;
801 int olen = tnode_child_length(tn);
803 if(trie_debug) printk("In halve\n");
805 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
807 if (!tn) {
808 *err = -ENOMEM;
809 return oldtnode;
813 * Preallocate and store tnodes before the actual work so we
814 * don't get into an inconsistent state if memory allocation
815 * fails. In case of failure we return the oldnode and halve
816 * of tnode is ignored.
819 for(i = 0; i < olen; i += 2) {
820 left = tnode_get_child(oldtnode, i);
821 right = tnode_get_child(oldtnode, i+1);
823 /* Two nonempty children */
824 if( left && right) {
825 struct tnode *newBinNode =
826 tnode_new(left->key, tn->pos + tn->bits, 1);
828 if(!newBinNode) {
829 *err = -ENOMEM;
830 break;
832 put_child(t, tn, i/2, (struct node *)newBinNode);
836 if(*err) {
837 int size = tnode_child_length(tn);
838 int j;
840 for(j = 0; j < size; j++)
841 if( tn->child[j])
842 tnode_free((struct tnode *)tn->child[j]);
844 tnode_free(tn);
846 *err = -ENOMEM;
847 return oldtnode;
850 for(i = 0; i < olen; i += 2) {
851 left = tnode_get_child(oldtnode, i);
852 right = tnode_get_child(oldtnode, i+1);
854 /* At least one of the children is empty */
855 if (left == NULL) {
856 if (right == NULL) /* Both are empty */
857 continue;
858 put_child(t, tn, i/2, right);
859 } else if (right == NULL)
860 put_child(t, tn, i/2, left);
862 /* Two nonempty children */
863 else {
864 struct tnode *newBinNode =
865 (struct tnode *) tnode_get_child(tn, i/2);
866 put_child(t, tn, i/2, NULL);
868 if(!newBinNode)
869 BUG();
871 put_child(t, newBinNode, 0, left);
872 put_child(t, newBinNode, 1, right);
873 put_child(t, tn, i/2, resize(t, newBinNode));
876 tnode_free(oldtnode);
877 return tn;
880 static void *trie_init(struct trie *t)
882 if(t) {
883 t->size = 0;
884 t->trie = NULL;
885 t->revision = 0;
886 #ifdef CONFIG_IP_FIB_TRIE_STATS
887 memset(&t->stats, 0, sizeof(struct trie_use_stats));
888 #endif
890 return t;
893 static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
895 struct hlist_node *node;
896 struct leaf_info *li;
898 hlist_for_each_entry(li, node, head, hlist) {
900 if ( li->plen == plen )
901 return li;
903 return NULL;
906 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
908 struct list_head *fa_head=NULL;
909 struct leaf_info *li = find_leaf_info(&l->list, plen);
911 if(li)
912 fa_head = &li->falh;
914 return fa_head;
917 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
919 struct leaf_info *li=NULL, *last=NULL;
920 struct hlist_node *node, *tmp;
922 write_lock_bh(&fib_lock);
924 if(hlist_empty(head))
925 hlist_add_head(&new->hlist, head);
926 else {
927 hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
929 if (new->plen > li->plen)
930 break;
932 last = li;
934 if(last)
935 hlist_add_after(&last->hlist, &new->hlist);
936 else
937 hlist_add_before(&new->hlist, &li->hlist);
939 write_unlock_bh(&fib_lock);
942 static struct leaf *
943 fib_find_node(struct trie *t, u32 key)
945 int pos;
946 struct tnode *tn;
947 struct node *n;
949 pos = 0;
950 n=t->trie;
952 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
953 tn = (struct tnode *) n;
955 check_tnode(tn);
957 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
958 pos=tn->pos + tn->bits;
959 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
961 else
962 break;
964 /* Case we have found a leaf. Compare prefixes */
966 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
967 struct leaf *l = (struct leaf *) n;
968 return l;
970 return NULL;
973 static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
975 int i = 0;
976 int wasfull;
977 t_key cindex, key;
978 struct tnode *tp = NULL;
980 if(!tn)
981 BUG();
983 key = tn->key;
984 i = 0;
986 while (tn != NULL && NODE_PARENT(tn) != NULL) {
988 if( i > 10 ) {
989 printk("Rebalance tn=%p \n", tn);
990 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
992 printk("Rebalance tp=%p \n", tp);
993 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
996 if( i > 12 ) BUG();
997 i++;
999 tp = NODE_PARENT(tn);
1000 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1001 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
1002 tn = (struct tnode *) resize (t, (struct tnode *)tn);
1003 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
1005 if(!NODE_PARENT(tn))
1006 break;
1008 tn = NODE_PARENT(tn);
1010 /* Handle last (top) tnode */
1011 if (IS_TNODE(tn))
1012 tn = (struct tnode*) resize(t, (struct tnode *)tn);
1014 return (struct node*) tn;
1017 static struct list_head *
1018 fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1020 int pos, newpos;
1021 struct tnode *tp = NULL, *tn = NULL;
1022 struct node *n;
1023 struct leaf *l;
1024 int missbit;
1025 struct list_head *fa_head=NULL;
1026 struct leaf_info *li;
1027 t_key cindex;
1029 pos = 0;
1030 n=t->trie;
1032 /* If we point to NULL, stop. Either the tree is empty and we should
1033 * just put a new leaf in if, or we have reached an empty child slot,
1034 * and we should just put our new leaf in that.
1035 * If we point to a T_TNODE, check if it matches our key. Note that
1036 * a T_TNODE might be skipping any number of bits - its 'pos' need
1037 * not be the parent's 'pos'+'bits'!
1039 * If it does match the current key, get pos/bits from it, extract
1040 * the index from our key, push the T_TNODE and walk the tree.
1042 * If it doesn't, we have to replace it with a new T_TNODE.
1044 * If we point to a T_LEAF, it might or might not have the same key
1045 * as we do. If it does, just change the value, update the T_LEAF's
1046 * value, and return it.
1047 * If it doesn't, we need to replace it with a T_TNODE.
1050 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1051 tn = (struct tnode *) n;
1053 check_tnode(tn);
1055 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
1056 tp = tn;
1057 pos=tn->pos + tn->bits;
1058 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1060 if(n && NODE_PARENT(n) != tn) {
1061 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1062 BUG();
1065 else
1066 break;
1070 * n ----> NULL, LEAF or TNODE
1072 * tp is n's (parent) ----> NULL or TNODE
1075 if(tp && IS_LEAF(tp))
1076 BUG();
1079 /* Case 1: n is a leaf. Compare prefixes */
1081 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1082 struct leaf *l = ( struct leaf *) n;
1084 li = leaf_info_new(plen);
1086 if(! li) {
1087 *err = -ENOMEM;
1088 goto err;
1091 fa_head = &li->falh;
1092 insert_leaf_info(&l->list, li);
1093 goto done;
1095 t->size++;
1096 l = leaf_new();
1098 if(! l) {
1099 *err = -ENOMEM;
1100 goto err;
1103 l->key = key;
1104 li = leaf_info_new(plen);
1106 if(! li) {
1107 tnode_free((struct tnode *) l);
1108 *err = -ENOMEM;
1109 goto err;
1112 fa_head = &li->falh;
1113 insert_leaf_info(&l->list, li);
1115 /* Case 2: n is NULL, and will just insert a new leaf */
1116 if (t->trie && n == NULL) {
1118 NODE_SET_PARENT(l, tp);
1120 if (!tp)
1121 BUG();
1123 else {
1124 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1125 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1128 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1129 else {
1131 * Add a new tnode here
1132 * first tnode need some special handling
1135 if (tp)
1136 pos=tp->pos+tp->bits;
1137 else
1138 pos=0;
1139 if(n) {
1140 newpos = tkey_mismatch(key, pos, n->key);
1141 tn = tnode_new(n->key, newpos, 1);
1143 else {
1144 newpos = 0;
1145 tn = tnode_new(key, newpos, 1); /* First tnode */
1148 if(!tn) {
1149 free_leaf_info(li);
1150 tnode_free((struct tnode *) l);
1151 *err = -ENOMEM;
1152 goto err;
1155 NODE_SET_PARENT(tn, tp);
1157 missbit=tkey_extract_bits(key, newpos, 1);
1158 put_child(t, tn, missbit, (struct node *)l);
1159 put_child(t, tn, 1-missbit, n);
1161 if(tp) {
1162 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1163 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1165 else {
1166 t->trie = (struct node*) tn; /* First tnode */
1167 tp = tn;
1170 if(tp && tp->pos+tp->bits > 32) {
1171 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1172 tp, tp->pos, tp->bits, key, plen);
1174 /* Rebalance the trie */
1175 t->trie = trie_rebalance(t, tp);
1176 done:
1177 t->revision++;
1178 err:;
1179 return fa_head;
1182 static int
1183 fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1184 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1186 struct trie *t = (struct trie *) tb->tb_data;
1187 struct fib_alias *fa, *new_fa;
1188 struct list_head *fa_head=NULL;
1189 struct fib_info *fi;
1190 int plen = r->rtm_dst_len;
1191 int type = r->rtm_type;
1192 u8 tos = r->rtm_tos;
1193 u32 key, mask;
1194 int err;
1195 struct leaf *l;
1197 if (plen > 32)
1198 return -EINVAL;
1200 key = 0;
1201 if (rta->rta_dst)
1202 memcpy(&key, rta->rta_dst, 4);
1204 key = ntohl(key);
1206 if(trie_debug)
1207 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1209 mask = ntohl( inet_make_mask(plen) );
1211 if(key & ~mask)
1212 return -EINVAL;
1214 key = key & mask;
1216 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
1217 goto err;
1219 l = fib_find_node(t, key);
1220 fa = NULL;
1222 if(l) {
1223 fa_head = get_fa_head(l, plen);
1224 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1227 /* Now fa, if non-NULL, points to the first fib alias
1228 * with the same keys [prefix,tos,priority], if such key already
1229 * exists or to the node before which we will insert new one.
1231 * If fa is NULL, we will need to allocate a new one and
1232 * insert to the head of f.
1234 * If f is NULL, no fib node matched the destination key
1235 * and we need to allocate a new one of those as well.
1238 if (fa &&
1239 fa->fa_info->fib_priority == fi->fib_priority) {
1240 struct fib_alias *fa_orig;
1242 err = -EEXIST;
1243 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1244 goto out;
1246 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1247 struct fib_info *fi_drop;
1248 u8 state;
1250 write_lock_bh(&fib_lock);
1252 fi_drop = fa->fa_info;
1253 fa->fa_info = fi;
1254 fa->fa_type = type;
1255 fa->fa_scope = r->rtm_scope;
1256 state = fa->fa_state;
1257 fa->fa_state &= ~FA_S_ACCESSED;
1259 write_unlock_bh(&fib_lock);
1261 fib_release_info(fi_drop);
1262 if (state & FA_S_ACCESSED)
1263 rt_cache_flush(-1);
1265 goto succeeded;
1267 /* Error if we find a perfect match which
1268 * uses the same scope, type, and nexthop
1269 * information.
1271 fa_orig = fa;
1272 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1273 if (fa->fa_tos != tos)
1274 break;
1275 if (fa->fa_info->fib_priority != fi->fib_priority)
1276 break;
1277 if (fa->fa_type == type &&
1278 fa->fa_scope == r->rtm_scope &&
1279 fa->fa_info == fi) {
1280 goto out;
1283 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1284 fa = fa_orig;
1286 err = -ENOENT;
1287 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
1288 goto out;
1290 err = -ENOBUFS;
1291 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1292 if (new_fa == NULL)
1293 goto out;
1295 new_fa->fa_info = fi;
1296 new_fa->fa_tos = tos;
1297 new_fa->fa_type = type;
1298 new_fa->fa_scope = r->rtm_scope;
1299 new_fa->fa_state = 0;
1300 #if 0
1301 new_fa->dst = NULL;
1302 #endif
1304 * Insert new entry to the list.
1307 if(!fa_head) {
1308 fa_head = fib_insert_node(t, &err, key, plen);
1309 err = 0;
1310 if(err)
1311 goto out_free_new_fa;
1314 write_lock_bh(&fib_lock);
1316 list_add_tail(&new_fa->fa_list,
1317 (fa ? &fa->fa_list : fa_head));
1319 write_unlock_bh(&fib_lock);
1321 rt_cache_flush(-1);
1322 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1323 succeeded:
1324 return 0;
1326 out_free_new_fa:
1327 kmem_cache_free(fn_alias_kmem, new_fa);
1328 out:
1329 fib_release_info(fi);
1330 err:;
1331 return err;
1334 static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
1335 struct fib_result *res, int *err)
1337 int i;
1338 t_key mask;
1339 struct leaf_info *li;
1340 struct hlist_head *hhead = &l->list;
1341 struct hlist_node *node;
1343 hlist_for_each_entry(li, node, hhead, hlist) {
1345 i = li->plen;
1346 mask = ntohl(inet_make_mask(i));
1347 if (l->key != (key & mask))
1348 continue;
1350 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
1351 *plen = i;
1352 #ifdef CONFIG_IP_FIB_TRIE_STATS
1353 t->stats.semantic_match_passed++;
1354 #endif
1355 return 1;
1357 #ifdef CONFIG_IP_FIB_TRIE_STATS
1358 t->stats.semantic_match_miss++;
1359 #endif
1361 return 0;
1364 static int
1365 fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1367 struct trie *t = (struct trie *) tb->tb_data;
1368 int plen, ret = 0;
1369 struct node *n;
1370 struct tnode *pn;
1371 int pos, bits;
1372 t_key key=ntohl(flp->fl4_dst);
1373 int chopped_off;
1374 t_key cindex = 0;
1375 int current_prefix_length = KEYLENGTH;
1376 n = t->trie;
1378 read_lock(&fib_lock);
1379 if(!n)
1380 goto failed;
1382 #ifdef CONFIG_IP_FIB_TRIE_STATS
1383 t->stats.gets++;
1384 #endif
1386 /* Just a leaf? */
1387 if (IS_LEAF(n)) {
1388 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
1389 goto found;
1390 goto failed;
1392 pn = (struct tnode *) n;
1393 chopped_off = 0;
1395 while (pn) {
1397 pos = pn->pos;
1398 bits = pn->bits;
1400 if(!chopped_off)
1401 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1403 n = tnode_get_child(pn, cindex);
1405 if (n == NULL) {
1406 #ifdef CONFIG_IP_FIB_TRIE_STATS
1407 t->stats.null_node_hit++;
1408 #endif
1409 goto backtrace;
1412 if (IS_TNODE(n)) {
1413 #define HL_OPTIMIZE
1414 #ifdef HL_OPTIMIZE
1415 struct tnode *cn = (struct tnode *)n;
1416 t_key node_prefix, key_prefix, pref_mismatch;
1417 int mp;
1420 * It's a tnode, and we can do some extra checks here if we
1421 * like, to avoid descending into a dead-end branch.
1422 * This tnode is in the parent's child array at index
1423 * key[p_pos..p_pos+p_bits] but potentially with some bits
1424 * chopped off, so in reality the index may be just a
1425 * subprefix, padded with zero at the end.
1426 * We can also take a look at any skipped bits in this
1427 * tnode - everything up to p_pos is supposed to be ok,
1428 * and the non-chopped bits of the index (se previous
1429 * paragraph) are also guaranteed ok, but the rest is
1430 * considered unknown.
1432 * The skipped bits are key[pos+bits..cn->pos].
1435 /* If current_prefix_length < pos+bits, we are already doing
1436 * actual prefix matching, which means everything from
1437 * pos+(bits-chopped_off) onward must be zero along some
1438 * branch of this subtree - otherwise there is *no* valid
1439 * prefix present. Here we can only check the skipped
1440 * bits. Remember, since we have already indexed into the
1441 * parent's child array, we know that the bits we chopped of
1442 * *are* zero.
1445 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1447 if (current_prefix_length < pos+bits) {
1448 if (tkey_extract_bits(cn->key, current_prefix_length,
1449 cn->pos - current_prefix_length) != 0 ||
1450 !(cn->child[0]))
1451 goto backtrace;
1455 * If chopped_off=0, the index is fully validated and we
1456 * only need to look at the skipped bits for this, the new,
1457 * tnode. What we actually want to do is to find out if
1458 * these skipped bits match our key perfectly, or if we will
1459 * have to count on finding a matching prefix further down,
1460 * because if we do, we would like to have some way of
1461 * verifying the existence of such a prefix at this point.
1464 /* The only thing we can do at this point is to verify that
1465 * any such matching prefix can indeed be a prefix to our
1466 * key, and if the bits in the node we are inspecting that
1467 * do not match our key are not ZERO, this cannot be true.
1468 * Thus, find out where there is a mismatch (before cn->pos)
1469 * and verify that all the mismatching bits are zero in the
1470 * new tnode's key.
1473 /* Note: We aren't very concerned about the piece of the key
1474 * that precede pn->pos+pn->bits, since these have already been
1475 * checked. The bits after cn->pos aren't checked since these are
1476 * by definition "unknown" at this point. Thus, what we want to
1477 * see is if we are about to enter the "prefix matching" state,
1478 * and in that case verify that the skipped bits that will prevail
1479 * throughout this subtree are zero, as they have to be if we are
1480 * to find a matching prefix.
1483 node_prefix = MASK_PFX(cn->key, cn->pos);
1484 key_prefix = MASK_PFX(key, cn->pos);
1485 pref_mismatch = key_prefix^node_prefix;
1486 mp = 0;
1488 /* In short: If skipped bits in this node do not match the search
1489 * key, enter the "prefix matching" state.directly.
1491 if (pref_mismatch) {
1492 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1493 mp++;
1494 pref_mismatch = pref_mismatch <<1;
1496 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1498 if (key_prefix != 0)
1499 goto backtrace;
1501 if (current_prefix_length >= cn->pos)
1502 current_prefix_length=mp;
1504 #endif
1505 pn = (struct tnode *)n; /* Descend */
1506 chopped_off = 0;
1507 continue;
1509 if (IS_LEAF(n)) {
1510 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1511 goto found;
1513 backtrace:
1514 chopped_off++;
1516 /* As zero don't change the child key (cindex) */
1517 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
1518 chopped_off++;
1521 /* Decrease current_... with bits chopped off */
1522 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1523 current_prefix_length = pn->pos + pn->bits - chopped_off;
1526 * Either we do the actual chop off according or if we have
1527 * chopped off all bits in this tnode walk up to our parent.
1530 if(chopped_off <= pn->bits)
1531 cindex &= ~(1 << (chopped_off-1));
1532 else {
1533 if( NODE_PARENT(pn) == NULL)
1534 goto failed;
1536 /* Get Child's index */
1537 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1538 pn = NODE_PARENT(pn);
1539 chopped_off = 0;
1541 #ifdef CONFIG_IP_FIB_TRIE_STATS
1542 t->stats.backtrack++;
1543 #endif
1544 goto backtrace;
1547 failed:
1548 ret = 1;
1549 found:
1550 read_unlock(&fib_lock);
1551 return ret;
1554 static int trie_leaf_remove(struct trie *t, t_key key)
1556 t_key cindex;
1557 struct tnode *tp = NULL;
1558 struct node *n = t->trie;
1559 struct leaf *l;
1561 if(trie_debug)
1562 printk("entering trie_leaf_remove(%p)\n", n);
1564 /* Note that in the case skipped bits, those bits are *not* checked!
1565 * When we finish this, we will have NULL or a T_LEAF, and the
1566 * T_LEAF may or may not match our key.
1569 while (n != NULL && IS_TNODE(n)) {
1570 struct tnode *tn = (struct tnode *) n;
1571 check_tnode(tn);
1572 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1574 if(n && NODE_PARENT(n) != tn) {
1575 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1576 BUG();
1579 l = (struct leaf *) n;
1581 if(!n || !tkey_equals(l->key, key))
1582 return 0;
1585 * Key found.
1586 * Remove the leaf and rebalance the tree
1589 t->revision++;
1590 t->size--;
1592 tp = NODE_PARENT(n);
1593 tnode_free((struct tnode *) n);
1595 if(tp) {
1596 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1597 put_child(t, (struct tnode *)tp, cindex, NULL);
1598 t->trie = trie_rebalance(t, tp);
1600 else
1601 t->trie = NULL;
1603 return 1;
1606 static int
1607 fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1608 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1610 struct trie *t = (struct trie *) tb->tb_data;
1611 u32 key, mask;
1612 int plen = r->rtm_dst_len;
1613 u8 tos = r->rtm_tos;
1614 struct fib_alias *fa, *fa_to_delete;
1615 struct list_head *fa_head;
1616 struct leaf *l;
1618 if (plen > 32)
1619 return -EINVAL;
1621 key = 0;
1622 if (rta->rta_dst)
1623 memcpy(&key, rta->rta_dst, 4);
1625 key = ntohl(key);
1626 mask = ntohl( inet_make_mask(plen) );
1628 if(key & ~mask)
1629 return -EINVAL;
1631 key = key & mask;
1632 l = fib_find_node(t, key);
1634 if(!l)
1635 return -ESRCH;
1637 fa_head = get_fa_head(l, plen);
1638 fa = fib_find_alias(fa_head, tos, 0);
1640 if (!fa)
1641 return -ESRCH;
1643 if (trie_debug)
1644 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1646 fa_to_delete = NULL;
1647 fa_head = fa->fa_list.prev;
1648 list_for_each_entry(fa, fa_head, fa_list) {
1649 struct fib_info *fi = fa->fa_info;
1651 if (fa->fa_tos != tos)
1652 break;
1654 if ((!r->rtm_type ||
1655 fa->fa_type == r->rtm_type) &&
1656 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1657 fa->fa_scope == r->rtm_scope) &&
1658 (!r->rtm_protocol ||
1659 fi->fib_protocol == r->rtm_protocol) &&
1660 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1661 fa_to_delete = fa;
1662 break;
1666 if (fa_to_delete) {
1667 int kill_li = 0;
1668 struct leaf_info *li;
1670 fa = fa_to_delete;
1671 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1673 l = fib_find_node(t, key);
1674 li = find_leaf_info(&l->list, plen);
1676 write_lock_bh(&fib_lock);
1678 list_del(&fa->fa_list);
1680 if(list_empty(fa_head)) {
1681 hlist_del(&li->hlist);
1682 kill_li = 1;
1684 write_unlock_bh(&fib_lock);
1686 if(kill_li)
1687 free_leaf_info(li);
1689 if(hlist_empty(&l->list))
1690 trie_leaf_remove(t, key);
1692 if (fa->fa_state & FA_S_ACCESSED)
1693 rt_cache_flush(-1);
1695 fn_free_alias(fa);
1696 return 0;
1698 return -ESRCH;
1701 static int trie_flush_list(struct trie *t, struct list_head *head)
1703 struct fib_alias *fa, *fa_node;
1704 int found = 0;
1706 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1707 struct fib_info *fi = fa->fa_info;
1709 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1711 write_lock_bh(&fib_lock);
1712 list_del(&fa->fa_list);
1713 write_unlock_bh(&fib_lock);
1715 fn_free_alias(fa);
1716 found++;
1719 return found;
1722 static int trie_flush_leaf(struct trie *t, struct leaf *l)
1724 int found = 0;
1725 struct hlist_head *lih = &l->list;
1726 struct hlist_node *node, *tmp;
1727 struct leaf_info *li = NULL;
1729 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1731 found += trie_flush_list(t, &li->falh);
1733 if (list_empty(&li->falh)) {
1735 write_lock_bh(&fib_lock);
1736 hlist_del(&li->hlist);
1737 write_unlock_bh(&fib_lock);
1739 free_leaf_info(li);
1742 return found;
1745 static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1747 struct node *c = (struct node *) thisleaf;
1748 struct tnode *p;
1749 int idx;
1751 if(c == NULL) {
1752 if(t->trie == NULL)
1753 return NULL;
1755 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1756 return (struct leaf *) t->trie;
1758 p = (struct tnode*) t->trie; /* Start */
1760 else
1761 p = (struct tnode *) NODE_PARENT(c);
1762 while (p) {
1763 int pos, last;
1765 /* Find the next child of the parent */
1766 if(c)
1767 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1768 else
1769 pos = 0;
1771 last = 1 << p->bits;
1772 for(idx = pos; idx < last ; idx++) {
1773 if( p->child[idx]) {
1775 /* Decend if tnode */
1777 while (IS_TNODE(p->child[idx])) {
1778 p = (struct tnode*) p->child[idx];
1779 idx = 0;
1781 /* Rightmost non-NULL branch */
1782 if( p && IS_TNODE(p) )
1783 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
1785 /* Done with this tnode? */
1786 if( idx >= (1 << p->bits) || p->child[idx] == NULL )
1787 goto up;
1789 return (struct leaf*) p->child[idx];
1793 /* No more children go up one step */
1794 c = (struct node*) p;
1795 p = (struct tnode *) NODE_PARENT(p);
1797 return NULL; /* Ready. Root of trie */
1800 static int fn_trie_flush(struct fib_table *tb)
1802 struct trie *t = (struct trie *) tb->tb_data;
1803 struct leaf *ll = NULL, *l = NULL;
1804 int found = 0, h;
1806 t->revision++;
1808 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1809 found += trie_flush_leaf(t, l);
1811 if (ll && hlist_empty(&ll->list))
1812 trie_leaf_remove(t, ll->key);
1813 ll = l;
1816 if (ll && hlist_empty(&ll->list))
1817 trie_leaf_remove(t, ll->key);
1819 if(trie_debug)
1820 printk("trie_flush found=%d\n", found);
1821 return found;
1824 static int trie_last_dflt=-1;
1826 static void
1827 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1829 struct trie *t = (struct trie *) tb->tb_data;
1830 int order, last_idx;
1831 struct fib_info *fi = NULL;
1832 struct fib_info *last_resort;
1833 struct fib_alias *fa = NULL;
1834 struct list_head *fa_head;
1835 struct leaf *l;
1837 last_idx = -1;
1838 last_resort = NULL;
1839 order = -1;
1841 read_lock(&fib_lock);
1843 l = fib_find_node(t, 0);
1844 if(!l)
1845 goto out;
1847 fa_head = get_fa_head(l, 0);
1848 if(!fa_head)
1849 goto out;
1851 if (list_empty(fa_head))
1852 goto out;
1854 list_for_each_entry(fa, fa_head, fa_list) {
1855 struct fib_info *next_fi = fa->fa_info;
1857 if (fa->fa_scope != res->scope ||
1858 fa->fa_type != RTN_UNICAST)
1859 continue;
1861 if (next_fi->fib_priority > res->fi->fib_priority)
1862 break;
1863 if (!next_fi->fib_nh[0].nh_gw ||
1864 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1865 continue;
1866 fa->fa_state |= FA_S_ACCESSED;
1868 if (fi == NULL) {
1869 if (next_fi != res->fi)
1870 break;
1871 } else if (!fib_detect_death(fi, order, &last_resort,
1872 &last_idx, &trie_last_dflt)) {
1873 if (res->fi)
1874 fib_info_put(res->fi);
1875 res->fi = fi;
1876 atomic_inc(&fi->fib_clntref);
1877 trie_last_dflt = order;
1878 goto out;
1880 fi = next_fi;
1881 order++;
1883 if (order <= 0 || fi == NULL) {
1884 trie_last_dflt = -1;
1885 goto out;
1888 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1889 if (res->fi)
1890 fib_info_put(res->fi);
1891 res->fi = fi;
1892 atomic_inc(&fi->fib_clntref);
1893 trie_last_dflt = order;
1894 goto out;
1896 if (last_idx >= 0) {
1897 if (res->fi)
1898 fib_info_put(res->fi);
1899 res->fi = last_resort;
1900 if (last_resort)
1901 atomic_inc(&last_resort->fib_clntref);
1903 trie_last_dflt = last_idx;
1904 out:;
1905 read_unlock(&fib_lock);
1908 static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1909 struct sk_buff *skb, struct netlink_callback *cb)
1911 int i, s_i;
1912 struct fib_alias *fa;
1914 u32 xkey=htonl(key);
1916 s_i=cb->args[3];
1917 i = 0;
1919 list_for_each_entry(fa, fah, fa_list) {
1920 if (i < s_i) {
1921 i++;
1922 continue;
1924 if (fa->fa_info->fib_nh == NULL) {
1925 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1926 i++;
1927 continue;
1929 if (fa->fa_info == NULL) {
1930 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1931 i++;
1932 continue;
1935 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1936 cb->nlh->nlmsg_seq,
1937 RTM_NEWROUTE,
1938 tb->tb_id,
1939 fa->fa_type,
1940 fa->fa_scope,
1941 &xkey,
1942 plen,
1943 fa->fa_tos,
1944 fa->fa_info, 0) < 0) {
1945 cb->args[3] = i;
1946 return -1;
1948 i++;
1950 cb->args[3]=i;
1951 return skb->len;
1954 static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1955 struct netlink_callback *cb)
1957 int h, s_h;
1958 struct list_head *fa_head;
1959 struct leaf *l = NULL;
1960 s_h=cb->args[2];
1962 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1964 if (h < s_h)
1965 continue;
1966 if (h > s_h)
1967 memset(&cb->args[3], 0,
1968 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1970 fa_head = get_fa_head(l, plen);
1972 if(!fa_head)
1973 continue;
1975 if(list_empty(fa_head))
1976 continue;
1978 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1979 cb->args[2]=h;
1980 return -1;
1983 cb->args[2]=h;
1984 return skb->len;
1987 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1989 int m, s_m;
1990 struct trie *t = (struct trie *) tb->tb_data;
1992 s_m = cb->args[1];
1994 read_lock(&fib_lock);
1995 for (m=0; m<=32; m++) {
1997 if (m < s_m)
1998 continue;
1999 if (m > s_m)
2000 memset(&cb->args[2], 0,
2001 sizeof(cb->args) - 2*sizeof(cb->args[0]));
2003 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
2004 cb->args[1] = m;
2005 goto out;
2008 read_unlock(&fib_lock);
2009 cb->args[1] = m;
2010 return skb->len;
2011 out:
2012 read_unlock(&fib_lock);
2013 return -1;
2016 /* Fix more generic FIB names for init later */
2018 #ifdef CONFIG_IP_MULTIPLE_TABLES
2019 struct fib_table * fib_hash_init(int id)
2020 #else
2021 struct fib_table * __init fib_hash_init(int id)
2022 #endif
2024 struct fib_table *tb;
2025 struct trie *t;
2027 if (fn_alias_kmem == NULL)
2028 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2029 sizeof(struct fib_alias),
2030 0, SLAB_HWCACHE_ALIGN,
2031 NULL, NULL);
2033 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
2034 GFP_KERNEL);
2035 if (tb == NULL)
2036 return NULL;
2038 tb->tb_id = id;
2039 tb->tb_lookup = fn_trie_lookup;
2040 tb->tb_insert = fn_trie_insert;
2041 tb->tb_delete = fn_trie_delete;
2042 tb->tb_flush = fn_trie_flush;
2043 tb->tb_select_default = fn_trie_select_default;
2044 tb->tb_dump = fn_trie_dump;
2045 memset(tb->tb_data, 0, sizeof(struct trie));
2047 t = (struct trie *) tb->tb_data;
2049 trie_init(t);
2051 if (id == RT_TABLE_LOCAL)
2052 trie_local=t;
2053 else if (id == RT_TABLE_MAIN)
2054 trie_main=t;
2056 if (id == RT_TABLE_LOCAL)
2057 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2059 return tb;
2062 /* Trie dump functions */
2064 static void putspace_seq(struct seq_file *seq, int n)
2066 while (n--) seq_printf(seq, " ");
2069 static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
2071 while (bits--)
2072 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
2075 static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
2076 int pend, int cindex, int bits)
2078 putspace_seq(seq, indent);
2079 if (IS_LEAF(n))
2080 seq_printf(seq, "|");
2081 else
2082 seq_printf(seq, "+");
2083 if (bits) {
2084 seq_printf(seq, "%d/", cindex);
2085 printbin_seq(seq, cindex, bits);
2086 seq_printf(seq, ": ");
2088 else
2089 seq_printf(seq, "<root>: ");
2090 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
2092 if (IS_LEAF(n))
2093 seq_printf(seq, "key=%d.%d.%d.%d\n",
2094 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
2095 else {
2096 int plen=((struct tnode *)n)->pos;
2097 t_key prf=MASK_PFX(n->key, plen);
2098 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
2099 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
2101 if (IS_LEAF(n)) {
2102 struct leaf *l=(struct leaf *)n;
2103 struct fib_alias *fa;
2104 int i;
2105 for (i=32; i>=0; i--)
2106 if(find_leaf_info(&l->list, i)) {
2108 struct list_head *fa_head = get_fa_head(l, i);
2110 if(!fa_head)
2111 continue;
2113 if(list_empty(fa_head))
2114 continue;
2116 putspace_seq(seq, indent+2);
2117 seq_printf(seq, "{/%d...dumping}\n", i);
2120 list_for_each_entry(fa, fa_head, fa_list) {
2121 putspace_seq(seq, indent+2);
2122 if (fa->fa_info->fib_nh == NULL) {
2123 seq_printf(seq, "Error _fib_nh=NULL\n");
2124 continue;
2126 if (fa->fa_info == NULL) {
2127 seq_printf(seq, "Error fa_info=NULL\n");
2128 continue;
2131 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2132 fa->fa_type,
2133 fa->fa_scope,
2134 fa->fa_tos);
2138 else if (IS_TNODE(n)) {
2139 struct tnode *tn=(struct tnode *)n;
2140 putspace_seq(seq, indent); seq_printf(seq, "| ");
2141 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
2142 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2143 seq_printf(seq, "}\n");
2144 putspace_seq(seq, indent); seq_printf(seq, "| ");
2145 seq_printf(seq, "{pos=%d", tn->pos);
2146 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2147 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2148 putspace_seq(seq, indent); seq_printf(seq, "| ");
2149 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2153 static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2155 struct node *n=t->trie;
2156 int cindex=0;
2157 int indent=1;
2158 int pend=0;
2159 int depth = 0;
2161 read_lock(&fib_lock);
2163 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2164 if (n) {
2165 printnode_seq(seq, indent, n, pend, cindex, 0);
2166 if (IS_TNODE(n)) {
2167 struct tnode *tn=(struct tnode *)n;
2168 pend = tn->pos+tn->bits;
2169 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2170 indent += 3;
2171 depth++;
2173 while (tn && cindex < (1 << tn->bits)) {
2174 if (tn->child[cindex]) {
2176 /* Got a child */
2178 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2179 if (IS_LEAF(tn->child[cindex])) {
2180 cindex++;
2183 else {
2185 * New tnode. Decend one level
2188 depth++;
2189 n=tn->child[cindex];
2190 tn=(struct tnode *)n;
2191 pend=tn->pos+tn->bits;
2192 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2193 indent+=3;
2194 cindex=0;
2197 else
2198 cindex++;
2201 * Test if we are done
2204 while (cindex >= (1 << tn->bits)) {
2207 * Move upwards and test for root
2208 * pop off all traversed nodes
2211 if (NODE_PARENT(tn) == NULL) {
2212 tn = NULL;
2213 n = NULL;
2214 break;
2216 else {
2217 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2218 tn = NODE_PARENT(tn);
2219 cindex++;
2220 n=(struct node *)tn;
2221 pend=tn->pos+tn->bits;
2222 indent-=3;
2223 depth--;
2228 else n = NULL;
2230 else seq_printf(seq, "------ trie is empty\n");
2232 read_unlock(&fib_lock);
2235 static struct trie_stat *trie_stat_new(void)
2237 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2238 int i;
2240 if(s) {
2241 s->totdepth = 0;
2242 s->maxdepth = 0;
2243 s->tnodes = 0;
2244 s->leaves = 0;
2245 s->nullpointers = 0;
2247 for(i=0; i< MAX_CHILDS; i++)
2248 s->nodesizes[i] = 0;
2250 return s;
2253 static struct trie_stat *trie_collect_stats(struct trie *t)
2255 struct node *n=t->trie;
2256 struct trie_stat *s = trie_stat_new();
2257 int cindex = 0;
2258 int indent = 1;
2259 int pend = 0;
2260 int depth = 0;
2262 read_lock(&fib_lock);
2264 if (s) {
2265 if (n) {
2266 if (IS_TNODE(n)) {
2267 struct tnode *tn = (struct tnode *)n;
2268 pend=tn->pos+tn->bits;
2269 indent += 3;
2270 s->nodesizes[tn->bits]++;
2271 depth++;
2273 while (tn && cindex < (1 << tn->bits)) {
2274 if (tn->child[cindex]) {
2275 /* Got a child */
2277 if (IS_LEAF(tn->child[cindex])) {
2278 cindex++;
2280 /* stats */
2281 if (depth > s->maxdepth)
2282 s->maxdepth = depth;
2283 s->totdepth += depth;
2284 s->leaves++;
2287 else {
2289 * New tnode. Decend one level
2292 s->tnodes++;
2293 s->nodesizes[tn->bits]++;
2294 depth++;
2296 n = tn->child[cindex];
2297 tn = (struct tnode *)n;
2298 pend = tn->pos+tn->bits;
2300 indent += 3;
2301 cindex = 0;
2304 else {
2305 cindex++;
2306 s->nullpointers++;
2310 * Test if we are done
2313 while (cindex >= (1 << tn->bits)) {
2316 * Move upwards and test for root
2317 * pop off all traversed nodes
2321 if (NODE_PARENT(tn) == NULL) {
2322 tn = NULL;
2323 n = NULL;
2324 break;
2326 else {
2327 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2328 tn = NODE_PARENT(tn);
2329 cindex++;
2330 n = (struct node *)tn;
2331 pend=tn->pos+tn->bits;
2332 indent -= 3;
2333 depth--;
2338 else n = NULL;
2342 read_unlock(&fib_lock);
2343 return s;
2346 #ifdef CONFIG_PROC_FS
2348 static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2350 return NULL;
2353 static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2355 return NULL;
2358 static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2360 void *v = NULL;
2362 if (ip_fib_main_table)
2363 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
2364 return v;
2367 static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2369 ++*pos;
2370 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
2373 static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2379 * This outputs /proc/net/fib_triestats
2381 * It always works in backward compatibility mode.
2382 * The format of the file is not supposed to be changed.
2385 static void collect_and_show(struct trie *t, struct seq_file *seq)
2387 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2388 int i, max, pointers;
2389 struct trie_stat *stat;
2390 int avdepth;
2392 stat = trie_collect_stats(t);
2394 bytes=0;
2395 seq_printf(seq, "trie=%p\n", t);
2397 if (stat) {
2398 if (stat->leaves)
2399 avdepth=stat->totdepth*100 / stat->leaves;
2400 else
2401 avdepth=0;
2402 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2403 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2405 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2406 bytes += sizeof(struct leaf) * stat->leaves;
2407 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2408 bytes += sizeof(struct tnode) * stat->tnodes;
2410 max = MAX_CHILDS-1;
2412 while (max >= 0 && stat->nodesizes[max] == 0)
2413 max--;
2414 pointers = 0;
2416 for (i = 1; i <= max; i++)
2417 if (stat->nodesizes[i] != 0) {
2418 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2419 pointers += (1<<i) * stat->nodesizes[i];
2421 seq_printf(seq, "\n");
2422 seq_printf(seq, "Pointers: %d\n", pointers);
2423 bytes += sizeof(struct node *) * pointers;
2424 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2425 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2427 kfree(stat);
2430 #ifdef CONFIG_IP_FIB_TRIE_STATS
2431 seq_printf(seq, "Counters:\n---------\n");
2432 seq_printf(seq,"gets = %d\n", t->stats.gets);
2433 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2434 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2435 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2436 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2437 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2438 #ifdef CLEAR_STATS
2439 memset(&(t->stats), 0, sizeof(t->stats));
2440 #endif
2441 #endif /* CONFIG_IP_FIB_TRIE_STATS */
2444 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2446 char bf[128];
2448 if (v == SEQ_START_TOKEN) {
2449 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2450 sizeof(struct leaf), sizeof(struct tnode));
2451 if (trie_local)
2452 collect_and_show(trie_local, seq);
2454 if (trie_main)
2455 collect_and_show(trie_main, seq);
2457 else {
2458 snprintf(bf, sizeof(bf),
2459 "*\t%08X\t%08X", 200, 400);
2461 seq_printf(seq, "%-127s\n", bf);
2463 return 0;
2466 static struct seq_operations fib_triestat_seq_ops = {
2467 .start = fib_triestat_seq_start,
2468 .next = fib_triestat_seq_next,
2469 .stop = fib_triestat_seq_stop,
2470 .show = fib_triestat_seq_show,
2473 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2475 struct seq_file *seq;
2476 int rc = -ENOMEM;
2478 rc = seq_open(file, &fib_triestat_seq_ops);
2479 if (rc)
2480 goto out_kfree;
2482 seq = file->private_data;
2483 out:
2484 return rc;
2485 out_kfree:
2486 goto out;
2489 static struct file_operations fib_triestat_seq_fops = {
2490 .owner = THIS_MODULE,
2491 .open = fib_triestat_seq_open,
2492 .read = seq_read,
2493 .llseek = seq_lseek,
2494 .release = seq_release_private,
2497 int __init fib_stat_proc_init(void)
2499 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2500 return -ENOMEM;
2501 return 0;
2504 void __init fib_stat_proc_exit(void)
2506 proc_net_remove("fib_triestat");
2509 static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2511 return NULL;
2514 static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2516 return NULL;
2519 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2521 void *v = NULL;
2523 if (ip_fib_main_table)
2524 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
2525 return v;
2528 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2530 ++*pos;
2531 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
2534 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2540 * This outputs /proc/net/fib_trie.
2542 * It always works in backward compatibility mode.
2543 * The format of the file is not supposed to be changed.
2546 static int fib_trie_seq_show(struct seq_file *seq, void *v)
2548 char bf[128];
2550 if (v == SEQ_START_TOKEN) {
2551 if (trie_local)
2552 trie_dump_seq(seq, trie_local);
2554 if (trie_main)
2555 trie_dump_seq(seq, trie_main);
2558 else {
2559 snprintf(bf, sizeof(bf),
2560 "*\t%08X\t%08X", 200, 400);
2561 seq_printf(seq, "%-127s\n", bf);
2564 return 0;
2567 static struct seq_operations fib_trie_seq_ops = {
2568 .start = fib_trie_seq_start,
2569 .next = fib_trie_seq_next,
2570 .stop = fib_trie_seq_stop,
2571 .show = fib_trie_seq_show,
2574 static int fib_trie_seq_open(struct inode *inode, struct file *file)
2576 struct seq_file *seq;
2577 int rc = -ENOMEM;
2579 rc = seq_open(file, &fib_trie_seq_ops);
2580 if (rc)
2581 goto out_kfree;
2583 seq = file->private_data;
2584 out:
2585 return rc;
2586 out_kfree:
2587 goto out;
2590 static struct file_operations fib_trie_seq_fops = {
2591 .owner = THIS_MODULE,
2592 .open = fib_trie_seq_open,
2593 .read = seq_read,
2594 .llseek = seq_lseek,
2595 .release = seq_release_private,
2598 int __init fib_proc_init(void)
2600 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2601 return -ENOMEM;
2602 return 0;
2605 void __init fib_proc_exit(void)
2607 proc_net_remove("fib_trie");
2610 #endif /* CONFIG_PROC_FS */