ae593785f335e7e0f4e1f204cba32739d94c74cf
[lhc/web/wiklou.git] / includes / libs / IPSet.php
1 <?php
2 /**
3 * @section LICENSE
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 * http://www.gnu.org/copyleft/gpl.html
18 *
19 * @file
20 * @author Brandon Black <blblack@gmail.com>
21 */
22
23 /**
24 * Matches IP addresses against a set of CIDR specifications
25 *
26 * Usage:
27 * // At startup, calculate the optimized data structure for the set:
28 * $ipset = new IPSet( $wgSquidServersNoPurge );
29 * // runtime check against cached set (returns bool):
30 * $allowme = $ipset->match( $ip );
31 *
32 * In rough benchmarking, this takes about 80% more time than
33 * in_array() checks on a short (a couple hundred at most) array
34 * of addresses. It's fast either way at those levels, though,
35 * and IPSet would scale better than in_array if the array were
36 * much larger.
37 *
38 * For mixed-family CIDR sets, however, this code gives well over
39 * 100x speedup vs iterating IP::isInRange() over an array
40 * of CIDR specs.
41 *
42 * The basic implementation is two separate binary trees
43 * (IPv4 and IPv6) as nested php arrays with keys named 0 and 1.
44 * The values false and true are terminal match-fail and match-success,
45 * otherwise the value is a deeper node in the tree.
46 *
47 * A simple depth-compression scheme is also implemented: whole-byte
48 * tree compression at whole-byte boundaries only, where no branching
49 * occurs during that whole byte of depth. A compressed node has
50 * keys 'comp' (the byte to compare) and 'next' (the next node to
51 * recurse into if 'comp' matched successfully).
52 *
53 * For example, given these inputs:
54 * 25.0.0.0/9
55 * 25.192.0.0/10
56 *
57 * The v4 tree would look like:
58 * root4 => array(
59 * 'comp' => 25,
60 * 'next' => array(
61 * 0 => true,
62 * 1 => array(
63 * 0 => false,
64 * 1 => true,
65 * ),
66 * ),
67 * );
68 *
69 * (multi-byte compression nodes were attempted as well, but were
70 * a net loss in my test scenarios due to additional match complexity)
71 *
72 * @since 1.24
73 */
74 class IPSet {
75 /** @var array $root4: the root of the IPv4 matching tree */
76 private $root4 = array( false, false );
77
78 /** @var array $root6: the root of the IPv6 matching tree */
79 private $root6 = array( false, false );
80
81 /**
82 * __construct() instantiate the object from an array of CIDR specs
83 *
84 * @param array $cfg array of IPv[46] CIDR specs as strings
85 * @return IPSet new IPSet object
86 *
87 * Invalid input network/mask values in $cfg will result in issuing
88 * E_WARNING and/or E_USER_WARNING and the bad values being ignored.
89 */
90 public function __construct( array $cfg ) {
91 foreach ( $cfg as $cidr ) {
92 $this->addCidr( $cidr );
93 }
94
95 self::recOptimize( $this->root4 );
96 self::recCompress( $this->root4, 0, 24 );
97 self::recOptimize( $this->root6 );
98 self::recCompress( $this->root6, 0, 120 );
99 }
100
101 /**
102 * Add a single CIDR spec to the internal matching trees
103 *
104 * @param string $cidr string CIDR spec, IPv[46], optional /mask (def all-1's)
105 */
106 private function addCidr( $cidr ) {
107 // v4 or v6 check
108 if ( strpos( $cidr, ':' ) === false ) {
109 $node =& $this->root4;
110 $defMask = '32';
111 } else {
112 $node =& $this->root6;
113 $defMask = '128';
114 }
115
116 // Default to all-1's mask if no netmask in the input
117 if ( strpos( $cidr, '/' ) === false ) {
118 $net = $cidr;
119 $mask = $defMask;
120 } else {
121 list( $net, $mask ) = explode( '/', $cidr, 2 );
122 if ( !ctype_digit( $mask ) || intval( $mask ) > $defMask ) {
123 trigger_error( "IPSet: Bad mask '$mask' from '$cidr', ignored", E_USER_WARNING );
124 return;
125 }
126 }
127 $mask = intval( $mask ); // explicit integer convert, checked above
128
129 // convert $net to an array of integer bytes, length 4 or 16:
130 $raw = inet_pton( $net );
131 if ( $raw === false ) {
132 return; // inet_pton() sends an E_WARNING for us
133 }
134 $rawOrd = array_map( 'ord', str_split( $raw ) );
135
136 // special-case: zero mask overwrites the whole tree with a pair of terminal successes
137 if ( $mask == 0 ) {
138 $node = array( true, true );
139 return;
140 }
141
142 // iterate the bits of the address while walking the tree structure for inserts
143 $curBit = 0;
144 while ( 1 ) {
145 $maskShift = 7 - ( $curBit & 7 );
146 $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
147 ++$curBit;
148 if ( $node === true ) {
149 // already added a larger supernet, no need to go deeper
150 return;
151 } elseif ( $curBit == $mask ) {
152 // this may wipe out deeper subnets from earlier
153 $node = true;
154 return;
155 } elseif ( $node === false ) {
156 // create new subarray to go deeper
157 $node = array( false, false );
158 }
159 }
160 }
161
162 /**
163 * Match an IP address against the set
164 *
165 * @param string $ip string IPv[46] address
166 * @return boolean true is match success, false is match failure
167 *
168 * If $ip is unparseable, inet_pton may issue an E_WARNING to that effect
169 */
170 public function match( $ip ) {
171 $raw = inet_pton( $ip );
172 if ( $raw === false ) {
173 return false; // inet_pton() sends an E_WARNING for us
174 }
175
176 $rawOrd = array_map( 'ord', str_split( $raw ) );
177 if ( count( $rawOrd ) == 4 ) {
178 $node =& $this->root4;
179 } else {
180 $node =& $this->root6;
181 }
182
183 $curBit = 0;
184 while ( 1 ) {
185 if ( isset( $node['comp'] ) ) {
186 // compressed node, matches 1 whole byte on a byte boundary
187 if ( $rawOrd[$curBit >> 3] != $node['comp'] ) {
188 return false;
189 }
190 $curBit += 8;
191 $node =& $node['next'];
192 } else {
193 // uncompressed node, walk in the correct direction for the current bit-value
194 $maskShift = 7 - ( $curBit & 7 );
195 $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
196 ++$curBit;
197 }
198
199 if ( $node === true || $node === false ) {
200 return $node;
201 }
202 }
203 }
204
205 /**
206 * Recursively merges adjacent nets into larger supernets
207 *
208 * @param array &$node Tree node to optimize, by-reference
209 *
210 * e.g.: 8.0.0.0/8 + 9.0.0.0/8 -> 8.0.0.0/7
211 */
212 private static function recOptimize( &$node ) {
213 if ( $node[0] !== false && $node[0] !== true && self::recOptimize( $node[0] ) ) {
214 $node[0] = true;
215 }
216 if ( $node[1] !== false && $node[1] !== true && self::recOptimize( $node[1] ) ) {
217 $node[1] = true;
218 }
219 if ( $node[0] === true && $node[1] === true ) {
220 return true;
221 }
222 return false;
223 }
224
225 /**
226 * Recursively compresses a tree
227 *
228 * @param array &$node Tree node to compress, by-reference
229 * @param integer $curBit current depth in the tree
230 * @param integer $maxCompStart maximum depth at which compression can start, family-specific
231 *
232 * This is a very simplistic compression scheme: if we go through a whole
233 * byte of address starting at a byte boundary with no real branching
234 * other than immediate false-vs-(node|true), compress that subtree down to a single
235 * byte-matching node.
236 * The $maxCompStart check elides recursing the final 7 levels of depth (family-dependent)
237 */
238 private static function recCompress( &$node, $curBit, $maxCompStart ) {
239 if ( !( $curBit & 7 ) ) { // byte boundary, check for depth-8 single path(s)
240 $byte = 0;
241 $cnode =& $node;
242 $i = 8;
243 while ( $i-- ) {
244 if ( $cnode[0] === false ) {
245 $byte |= 1 << $i;
246 $cnode =& $cnode[1];
247 } elseif ( $cnode[1] === false ) {
248 $cnode =& $cnode[0];
249 } else {
250 // partial-byte branching, give up
251 break;
252 }
253 }
254 if ( $i == -1 ) { // means we did not exit the while() via break
255 $node = array(
256 'comp' => $byte,
257 'next' => &$cnode,
258 );
259 $curBit += 8;
260 if ( $cnode !== true ) {
261 self::recCompress( $cnode, $curBit, $maxCompStart );
262 }
263 return;
264 }
265 }
266
267 ++$curBit;
268 if ( $curBit <= $maxCompStart ) {
269 if ( $node[0] !== false && $node[0] !== true ) {
270 self::recCompress( $node[0], $curBit, $maxCompStart );
271 }
272 if ( $node[1] !== false && $node[1] !== true ) {
273 self::recCompress( $node[1], $curBit, $maxCompStart );
274 }
275 }
276 }
277 }