3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
19 * @author Brandon Black <blblack@gmail.com>
23 * Matches IP addresses against a set of CIDR specifications
26 * // At startup, calculate the optimized data structure for the set:
27 * $ipset = new IPSet( $wgSquidServersNoPurge );
28 * // runtime check against cached set (returns bool):
29 * $allowme = $ipset->match( $ip );
31 * In rough benchmarking, this takes about 80% more time than
32 * in_array() checks on a short (a couple hundred at most) array
33 * of addresses. It's fast either way at those levels, though,
34 * and IPSet would scale better than in_array if the array were
37 * For mixed-family CIDR sets, however, this code gives well over
38 * 100x speedup vs iterating IP::isInRange() over an array
41 * The basic implementation is two separate binary trees
42 * (IPv4 and IPv6) as nested php arrays with keys named 0 and 1.
43 * The values false and true are terminal match-fail and match-success,
44 * otherwise the value is a deeper node in the tree.
46 * A simple depth-compression scheme is also implemented: whole-byte
47 * tree compression at whole-byte boundaries only, where no branching
48 * occurs during that whole byte of depth. A compressed node has
49 * keys 'comp' (the byte to compare) and 'next' (the next node to
50 * recurse into if 'comp' matched successfully).
52 * For example, given these inputs:
56 * The v4 tree would look like:
68 * (multi-byte compression nodes were attempted as well, but were
69 * a net loss in my test scenarios due to additional match complexity)
74 /** @var array $root4: the root of the IPv4 matching tree */
75 private $root4 = array( false, false );
77 /** @var array $root6: the root of the IPv6 matching tree */
78 private $root6 = array( false, false );
81 * __construct() instantiate the object from an array of CIDR specs
83 * @param array $cfg array of IPv[46] CIDR specs as strings
84 * @return IPSet new IPSet object
86 * Invalid input network/mask values in $cfg will result in issuing
87 * E_WARNING and/or E_USER_WARNING and the bad values being ignored.
89 public function __construct( array $cfg ) {
90 foreach ( $cfg as $cidr ) {
91 $this->addCidr( $cidr );
94 self
::recOptimize( $this->root4
);
95 self
::recCompress( $this->root4
, 0, 24 );
96 self
::recOptimize( $this->root6
);
97 self
::recCompress( $this->root6
, 0, 120 );
101 * Add a single CIDR spec to the internal matching trees
103 * @param string $cidr string CIDR spec, IPv[46], optional /mask (def all-1's)
105 private function addCidr( $cidr ) {
107 if ( strpos( $cidr, ':' ) === false ) {
108 $node =& $this->root4
;
111 $node =& $this->root6
;
115 // Default to all-1's mask if no netmask in the input
116 if ( strpos( $cidr, '/' ) === false ) {
120 list( $net, $mask ) = explode( '/', $cidr, 2 );
121 if ( !ctype_digit( $mask ) ||
intval( $mask ) > $defMask ) {
122 trigger_error( "IPSet: Bad mask '$mask' from '$cidr', ignored", E_USER_WARNING
);
126 $mask = intval( $mask ); // explicit integer convert, checked above
128 // convert $net to an array of integer bytes, length 4 or 16:
129 $raw = inet_pton( $net );
130 if ( $raw === false ) {
131 return; // inet_pton() sends an E_WARNING for us
133 $rawOrd = array_map( 'ord', str_split( $raw ) );
135 // special-case: zero mask overwrites the whole tree with a pair of terminal successes
137 $node = array( true, true );
141 // iterate the bits of the address while walking the tree structure for inserts
144 $maskShift = 7 - ( $curBit & 7 );
145 $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
147 if ( $node === true ) {
148 // already added a larger supernet, no need to go deeper
150 } elseif ( $curBit == $mask ) {
151 // this may wipe out deeper subnets from earlier
154 } elseif ( $node === false ) {
155 // create new subarray to go deeper
156 $node = array( false, false );
162 * Match an IP address against the set
164 * @param string $ip string IPv[46] address
165 * @return boolean true is match success, false is match failure
167 * If $ip is unparseable, inet_pton may issue an E_WARNING to that effect
169 public function match( $ip ) {
170 $raw = inet_pton( $ip );
171 if ( $raw === false ) {
172 return false; // inet_pton() sends an E_WARNING for us
175 $rawOrd = array_map( 'ord', str_split( $raw ) );
176 if ( count( $rawOrd ) == 4 ) {
177 $node =& $this->root4
;
179 $node =& $this->root6
;
184 if ( isset( $node['comp'] ) ) {
185 // compressed node, matches 1 whole byte on a byte boundary
186 if ( $rawOrd[$curBit >> 3] != $node['comp'] ) {
190 $node =& $node['next'];
192 // uncompressed node, walk in the correct direction for the current bit-value
193 $maskShift = 7 - ( $curBit & 7 );
194 $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
198 if ( $node === true ||
$node === false ) {
205 * Recursively merges adjacent nets into larger supernets
207 * @param array &$node Tree node to optimize, by-reference
209 * e.g.: 8.0.0.0/8 + 9.0.0.0/8 -> 8.0.0.0/7
211 private static function recOptimize( &$node ) {
212 if ( $node[0] !== false && $node[0] !== true && self
::recOptimize( $node[0] ) ) {
215 if ( $node[1] !== false && $node[1] !== true && self
::recOptimize( $node[1] ) ) {
218 if ( $node[0] === true && $node[1] === true ) {
225 * Recursively compresses a tree
227 * @param array &$node Tree node to compress, by-reference
228 * @param integer $curBit current depth in the tree
229 * @param integer $maxCompStart maximum depth at which compression can start, family-specific
231 * This is a very simplistic compression scheme: if we go through a whole
232 * byte of address starting at a byte boundary with no real branching
233 * other than immediate false-vs-(node|true), compress that subtree down to a single
234 * byte-matching node.
235 * The $maxCompStart check elides recursing the final 7 levels of depth (family-dependent)
237 private static function recCompress( &$node, $curBit, $maxCompStart ) {
238 if ( !( $curBit & 7 ) ) { // byte boundary, check for depth-8 single path(s)
243 if ( $cnode[0] === false ) {
246 } elseif ( $cnode[1] === false ) {
249 // partial-byte branching, give up
253 if ( $i == -1 ) { // means we did not exit the while() via break
259 if ( $cnode !== true ) {
260 self
::recCompress( $cnode, $curBit, $maxCompStart );
267 if ( $curBit <= $maxCompStart ) {
268 if ( $node[0] !== false && $node[0] !== true ) {
269 self
::recCompress( $node[0], $curBit, $maxCompStart );
271 if ( $node[1] !== false && $node[1] !== true ) {
272 self
::recCompress( $node[1], $curBit, $maxCompStart );