(bug 19195) Make user IDs more readily available with the API
[lhc/web/wiklou.git] / includes / libs / IEUrlExtension.php
1 <?php
2
3 /**
4 * Internet Explorer derives a cache filename from a URL, and then in certain
5 * circumstances, uses the extension of the resulting file to determine the
6 * content type of the data, ignoring the Content-Type header.
7 *
8 * This can be a problem, especially when non-HTML content is sent by MediaWiki,
9 * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
10 *
11 * Usually the script filename (e.g. api.php) is present in the URL, and this
12 * makes Internet Explorer think the extension is a harmless script extension.
13 * But Internet Explorer 6 and earlier allows the script extension to be
14 * obscured by encoding the dot as "%2E".
15 *
16 * This class contains functions which help in detecting and dealing with this
17 * situation.
18 *
19 * Checking the URL for a bad extension is somewhat complicated due to the fact
20 * that CGI doesn't provide a standard method to determine the URL. Instead it
21 * is necessary to pass a subset of $_SERVER variables, which we then attempt
22 * to use to guess parts of the URL.
23 */
24 class IEUrlExtension {
25 /**
26 * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
27 * to see if it indicates that the request was sent with a bad file
28 * extension. Returns true if the request should be denied or modified,
29 * false otherwise. The relevant $_SERVER elements are:
30 *
31 * - SERVER_SOFTWARE
32 * - REQUEST_URI
33 * - QUERY_STRING
34 * - PATH_INFO
35 *
36 * If the a variable is unset in $_SERVER, it should be unset in $vars.
37 *
38 * @param $vars array A subset of $_SERVER.
39 * @param $extWhitelist array Extensions which are allowed, assumed harmless.
40 * @return bool
41 */
42 public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
43 // Check QUERY_STRING or REQUEST_URI
44 if ( isset( $vars['SERVER_SOFTWARE'] )
45 && isset( $vars['REQUEST_URI'] )
46 && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) )
47 {
48 $urlPart = $vars['REQUEST_URI'];
49 } elseif ( isset( $vars['QUERY_STRING'] ) ) {
50 $urlPart = $vars['QUERY_STRING'];
51 } else {
52 $urlPart = '';
53 }
54
55 if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
56 return true;
57 }
58
59 // Some servers have PATH_INFO but not REQUEST_URI, so we check both
60 // to be on the safe side.
61 if ( isset( $vars['PATH_INFO'] )
62 && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
63 {
64 return true;
65 }
66
67 // All checks passed
68 return false;
69 }
70
71 /**
72 * Given a right-hand portion of a URL, determine whether IE would detect
73 * a potentially harmful file extension.
74 *
75 * @param $urlPart string The right-hand portion of a URL
76 * @param $extWhitelist array An array of file extensions which may occur in this
77 * URL, and which should be allowed.
78 * @return bool
79 */
80 public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) {
81 if ( strval( $urlPart ) === '' ) {
82 return false;
83 }
84
85 $extension = self::findIE6Extension( $urlPart );
86 if ( strval( $extension ) === '' ) {
87 // No extension or empty extension
88 return false;
89 }
90
91 if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
92 // Script extension, OK
93 return false;
94 }
95 if ( in_array( $extension, $extWhitelist ) ) {
96 // Whitelisted extension
97 return false;
98 }
99
100 if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
101 // Non-alphanumeric extension, unlikely to be registered.
102 //
103 // The regex above is known to match all registered file extensions
104 // in a default Windows XP installation. It's important to allow
105 // extensions with ampersands and percent signs, since that reduces
106 // the number of false positives substantially.
107 return false;
108 }
109
110 // Possibly bad extension
111 return true;
112 }
113
114 /**
115 * Returns a variant of $url which will pass isUrlExtensionBad() but has the
116 * same GET parameters, or false if it can't figure one out.
117 * @param $url
118 * @param $extWhitelist array
119 * @return bool|string
120 */
121 public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
122 $questionPos = strpos( $url, '?' );
123 if ( $questionPos === false ) {
124 $beforeQuery = $url . '?';
125 $query = '';
126 } elseif ( $questionPos === strlen( $url ) - 1 ) {
127 $beforeQuery = $url;
128 $query = '';
129 } else {
130 $beforeQuery = substr( $url, 0, $questionPos + 1 );
131 $query = substr( $url, $questionPos + 1 );
132 }
133
134 // Multiple question marks cause problems. Encode the second and
135 // subsequent question mark.
136 $query = str_replace( '?', '%3E', $query );
137 // Append an invalid path character so that IE6 won't see the end of the
138 // query string as an extension
139 $query .= '&*';
140 // Put the URL back together
141 $url = $beforeQuery . $query;
142 if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
143 // Avoid a redirect loop
144 return false;
145 }
146 return $url;
147 }
148
149 /**
150 * Determine what extension IE6 will infer from a certain query string.
151 * If the URL has an extension before the question mark, IE6 will use
152 * that and ignore the query string, but per the comment at
153 * isPathInfoBad() we don't have a reliable way to determine the URL,
154 * so isPathInfoBad() just passes in the query string for $url.
155 * All entry points have safe extensions (php, php5) anyway, so
156 * checking the query string is possibly overly paranoid but never
157 * insecure.
158 *
159 * The criteria for finding an extension are as follows:
160 * - a possible extension is a dot followed by one or more characters not
161 * in <>\"/:|?.#
162 * - if we find a possible extension followed by the end of the string or
163 * a #, that's our extension
164 * - if we find a possible extension followed by a ?, that's our extension
165 * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
166 * searching for another possible extension
167 * - if we find a possible extension followed by a dot or another illegal
168 * character, we ignore it and continue searching
169 *
170 * @param $url string URL
171 * @return mixed Detected extension (string), or false if none found
172 */
173 public static function findIE6Extension( $url ) {
174 $pos = 0;
175 $hashPos = strpos( $url, '#' );
176 if ( $hashPos !== false ) {
177 $urlLength = $hashPos;
178 } else {
179 $urlLength = strlen( $url );
180 }
181 $remainingLength = $urlLength;
182 while ( $remainingLength > 0 ) {
183 // Skip ahead to the next dot
184 $pos += strcspn( $url, '.', $pos, $remainingLength );
185 if ( $pos >= $urlLength ) {
186 // End of string, we're done
187 return false;
188 }
189
190 // We found a dot. Skip past it
191 $pos++;
192 $remainingLength = $urlLength - $pos;
193
194 // Check for illegal characters in our prospective extension,
195 // or for another dot
196 $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
197 if ( $nextPos >= $urlLength ) {
198 // No illegal character or next dot
199 // We have our extension
200 return substr( $url, $pos, $urlLength - $pos );
201 }
202 if ( $url[$nextPos] === '?' ) {
203 // We've found a legal extension followed by a question mark
204 // If the extension is NOT exe, dll or cgi, return it
205 $extension = substr( $url, $pos, $nextPos - $pos );
206 if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
207 strcasecmp( $extension, 'cgi' ) )
208 {
209 return $extension;
210 }
211 // Else continue looking
212 }
213 // We found an illegal character or another dot
214 // Skip to that character and continue the loop
215 $pos = $nextPos + 1;
216 $remainingLength = $urlLength - $pos;
217 }
218 return false;
219 }
220
221 /**
222 * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
223 * returns true if that server is known to have a REQUEST_URI variable
224 * with %2E not decoded to ".". On such a server, it is possible to detect
225 * whether the script filename has been obscured.
226 *
227 * The function returns false if the server is not known to have this
228 * behaviour. Microsoft IIS in particular is known to decode escaped script
229 * filenames.
230 *
231 * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
232 * or a specification in the style of a User-Agent header, such as
233 * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
234 *
235 * @param $serverSoftware
236 * @return bool
237 *
238 */
239 public static function haveUndecodedRequestUri( $serverSoftware ) {
240 static $whitelist = array(
241 'Apache',
242 'Zeus',
243 'LiteSpeed' );
244 if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
245 return in_array( $m[1], $whitelist );
246 } else {
247 return false;
248 }
249 }
250
251 }