(bug 39672) Fixing quote.
[lhc/web/wiklou.git] / includes / Uri.php
1 <?php
2 /**
3 * Class for simple URI parsing and manipulation.
4 * Intended to simplify things that were using wfParseUrl and
5 * had to do manual concatenation for various needs.
6 * Built to match our JS mw.Uri in naming patterns.
7 * @file
8 * @author Daniel Friesen
9 * @since 1.20
10 */
11
12 class Uri {
13
14 /**
15 * The parsed components of the URI
16 */
17 protected $components;
18
19 protected static $validComponents = array( 'scheme', 'delimiter', 'host', 'port', 'user', 'pass', 'path', 'query', 'fragment' );
20 protected static $componentAliases = array( 'protocol' => 'scheme', 'password' => 'pass' );
21
22 /**
23 * parse_url() work-alike, but non-broken. Differences:
24 *
25 * 1) Does not raise warnings on bad URLs (just returns false)
26 * 2) Handles protocols that don't use :// (e.g., mailto: and news: , as well as protocol-relative URLs) correctly
27 * 3) Adds a "delimiter" element to the array, either '://', ':' or '//' (see (2))
28 *
29 * @param $url String: a URL to parse
30 * @return Array: bits of the URL in an associative array, per PHP docs
31 */
32 protected static function parseUri( $url ) {
33 global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php
34
35 // Protocol-relative URLs are handled really badly by parse_url(). It's so bad that the easiest
36 // way to handle them is to just prepend 'http:' and strip the protocol out later
37 $wasRelative = substr( $url, 0, 2 ) == '//';
38 if ( $wasRelative ) {
39 $url = "http:$url";
40 }
41 wfSuppressWarnings();
42 $bits = parse_url( $url );
43 wfRestoreWarnings();
44 // parse_url() returns an array without scheme for some invalid URLs, e.g.
45 // parse_url("%0Ahttp://example.com") == array( 'host' => '%0Ahttp', 'path' => 'example.com' )
46 if ( !$bits ||
47 !isset( $bits['scheme'] ) && strpos( $url, "://" ) !== false ) {
48 wfDebug( __METHOD__ . ": Invalid URL: $url" );
49 return false;
50 } else {
51 $scheme = isset( $bits['scheme'] ) ? $bits['scheme'] : null;
52 }
53
54 // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
55 if ( in_array( $scheme . '://', $wgUrlProtocols ) ) {
56 $bits['delimiter'] = '://';
57 } elseif ( !is_null( $scheme ) && !in_array( $scheme . ':', $wgUrlProtocols ) ) {
58 wfDebug( __METHOD__ . ": Invalid scheme in URL: $scheme" );
59 return false;
60 } elseif( !is_null( $scheme ) ) {
61 if( !in_array( $scheme . ':', $wgUrlProtocols ) ) {
62 // For URLs that don't have a scheme, but do have a user:password, parse_url
63 // detects the user as the scheme.
64 unset( $bits['scheme'] );
65 $bits['user'] = $scheme;
66 } else {
67 $bits['delimiter'] = ':';
68 // parse_url detects for news: and mailto: the host part of an url as path
69 // We have to correct this wrong detection
70 if ( isset( $bits['path'] ) ) {
71 $bits['host'] = $bits['path'];
72 $bits['path'] = '';
73 }
74 }
75 }
76
77 /* Provide an empty host for eg. file:/// urls (see bug 28627) */
78 if ( !isset( $bits['host'] ) && $scheme == "file" ) {
79 $bits['host'] = '';
80
81 /* parse_url loses the third / for file:///c:/ urls (but not on variants) */
82 if ( isset( $bits['path'] ) && substr( $bits['path'], 0, 1 ) !== '/' ) {
83 $bits['path'] = '/' . $bits['path'];
84 }
85 }
86
87 // If the URL was protocol-relative, fix scheme and delimiter
88 if ( $wasRelative ) {
89 $bits['scheme'] = '';
90 $bits['delimiter'] = '//';
91 }
92 return $bits;
93 }
94
95 /**
96 *
97 * @param $uri mixed URI string or array
98 */
99 public function __construct( $uri ) {
100 $this->components = array();
101 $this->setUri( $uri );
102 }
103
104 /**
105 * Set the Uri to the value of some other URI.
106 *
107 * @param $uri mixed URI string or array
108 */
109 public function setUri( $uri ) {
110 if ( is_string( $uri ) ) {
111 $parsed = self::parseUri( $uri );
112 if( $parsed === false ) {
113 return false;
114 }
115 $this->setComponents( $parsed );
116 } elseif ( is_array( $uri ) ) {
117 $this->setComponents( $uri );
118 } elseif ( $uri instanceof Uri ) {
119 $this->setComponents( $uri->getComponents() );
120 } else {
121 throw new MWException( __METHOD__ . ": $uri is not of a valid type." );
122 }
123 }
124
125 /**
126 * Set the components of this array.
127 * Will output warnings when invalid components or aliases are found.
128 *
129 * @param $components Array The components to set on this Uri.
130 */
131 public function setComponents( array $components ) {
132 foreach ( $components as $name => $value ) {
133 if ( isset( self::$componentAliases[$name] ) ) {
134 $canonical = self::$componentAliases[$name];
135 wfDebug( __METHOD__ . ": Converting alias $name to canonical $canonical." );
136 $components[$canonical] = $value;
137 unset( $components[$name] );
138 } elseif ( !in_array( $name, self::$validComponents ) ) {
139 throw new MWException( __METHOD__ . ": $name is not a valid component." );
140 }
141 }
142
143 $this->components = $components;
144 }
145
146 /**
147 * Return the components for this Uri
148 * @return Array
149 */
150 public function getComponents() {
151 return $this->components;
152 }
153
154 /**
155 * Return the value of a specific component
156 *
157 * @param $name string The name of the component to return
158 * @param string|null
159 */
160 public function getComponent( $name ) {
161 if ( isset( self::$componentAliases[$name] ) ) {
162 // Component is an alias. Get the actual name.
163 $alias = $name;
164 $name = self::$componentAliases[$name];
165 wfDebug( __METHOD__ . ": Converting alias $alias to canonical $name." );
166 }
167
168 if( !in_array( $name, self::$validComponents ) ) {
169 // Component is invalid
170 throw new MWException( __METHOD__ . ": $name is not a valid component." );
171 } elseif( !empty( $this->components[$name] ) ) {
172 // Component is valid and has a value.
173 return $this->components[$name];
174 } else {
175 // Component is empty
176 return null;
177 }
178 }
179
180 /**
181 * Set a component for this Uri
182 * @param $name string The name of the component to set
183 * @param $value string|null The value to set
184 */
185 public function setComponent( $name, $value ) {
186 if ( isset( self::$componentAliases[$name] ) ) {
187 $alias = $name;
188 $name = self::$componentAliases[$name];
189 wfDebug( __METHOD__ . ": Converting alias $alias to canonical $name." );
190 } elseif ( !in_array( $name, self::$validComponents ) ) {
191 throw new MWException( __METHOD__ . ": $name is not a valid component." );
192 }
193 $this->components[$name] = $value;
194 }
195
196 public function getProtocol() { return $this->getComponent( 'scheme' ); }
197 public function getUser() { return $this->getComponent( 'user' ); }
198 public function getPassword() { return $this->getComponent( 'pass' ); }
199 public function getHost() { return $this->getComponent( 'host' ); }
200 public function getPort() { return $this->getComponent( 'port' ); }
201 public function getPath() { return $this->getComponent( 'path' ); }
202 public function getQueryString() { return $this->getComponent( 'query' ); }
203 public function getFragment() { return $this->getComponent( 'fragment' ); }
204
205 public function setProtocol( $scheme ) { $this->setComponent( 'scheme', $scheme ); }
206 public function setUser( $user ) { $this->setComponent( 'user', $user ); }
207 public function setPassword( $pass ) { $this->setComponent( 'pass', $pass ); }
208 public function setHost( $host ) { $this->setComponent( 'host', $host ); }
209 public function setPort( $port ) { $this->setComponent( 'port', $port ); }
210 public function setPath( $path ) { $this->setComponent( 'path', $path ); }
211 public function setFragment( $fragment ) { $this->setComponent( 'fragment', $fragment ); }
212
213 /**
214 * Gets the protocol-authority delimiter of a URI (:// or //).
215 * @return string|null
216 */
217 public function getDelimiter() {
218 $delimiter = $this->getComponent( 'delimiter' );
219 if ( $delimiter ) {
220 // A specific delimiter is set, so return it.
221 return $delimiter;
222 }
223 if ( $this->getAuthority() && $this->getProtocol() ) {
224 // If the URI has a protocol and a body (i.e., some sort of host, etc.)
225 // the default delimiter is "://", e.g., "http://test.com".
226 return '://';
227 }
228 return null;
229 }
230
231 /**
232 * Gets query portion of a URI in array format.
233 * @return string
234 */
235 public function getQuery() {
236 return wfCgiToArray( $this->getQueryString() );
237 }
238
239 /**
240 * Gets query portion of a URI.
241 * @param string|array $query
242 */
243 public function setQuery( $query ) {
244 if ( is_array( $query ) ) {
245 $query = wfArrayToCGI( $query );
246 }
247 $this->setComponent( 'query', $query );
248 }
249
250 /**
251 * Extend the query -- supply query parameters to override or add to ours
252 * @param Array|string $parameters query parameters to override or add
253 * @return Uri this URI object
254 */
255 public function extendQuery( $parameters ) {
256 if ( !is_array( $parameters ) ) {
257 $parameters = wfCgiToArray( $parameters );
258 }
259
260 $query = $this->getQuery();
261 foreach( $parameters as $key => $value ) {
262 $query[$key] = $value;
263 }
264
265 $this->setQuery( $query );
266 return $this;
267 }
268
269 /**
270 * Returns user and password portion of a URI.
271 * @return string
272 */
273 public function getUserInfo() {
274 $user = $this->getComponent( 'user' );
275 $pass = $this->getComponent( 'pass' );
276 return $pass ? "$user:$pass" : $user;
277 }
278
279 /**
280 * Gets host and port portion of a URI.
281 * @return string
282 */
283 public function getHostPort() {
284 $host = $this->getComponent( 'host' );
285 $port = $this->getComponent( 'port' );
286 return $port ? "$host:$port" : $host;
287 }
288
289 /**
290 * Returns the userInfo and host and port portion of the URI.
291 * In most real-world URLs, this is simply the hostname, but it is more general.
292 * @return string
293 */
294 public function getAuthority() {
295 $userinfo = $this->getUserInfo();
296 $hostinfo = $this->getHostPort();
297 return $userinfo ? "$userinfo@$hostinfo" : $hostinfo;
298 }
299
300 /**
301 * Returns everything after the authority section of the URI
302 * @return String
303 */
304 public function getRelativePath() {
305 $path = $this->getComponent( 'path' );
306 $query = $this->getComponent( 'query' );
307 $fragment = $this->getComponent( 'fragment' );
308
309 $retval = $path;
310 if( $query ) {
311 $retval .= "?$query";
312 }
313 if( $fragment ) {
314 $retval .= "#$fragment";
315 }
316 return $retval;
317 }
318
319 /**
320 * Gets the entire URI string. May not be precisely the same as input due to order of query arguments.
321 * @return String the URI string
322 */
323 public function toString() {
324 return $this->getComponent( 'scheme' ) . $this->getDelimiter() . $this->getAuthority() . $this->getRelativePath();
325 }
326
327 /**
328 * Gets the entire URI string. May not be precisely the same as input due to order of query arguments.
329 * @return String the URI string
330 */
331 public function __toString() {
332 return $this->toString();
333 }
334
335 }