A little refactoring of the input splitting/expansion:
[lhc/web/wiklou.git] / includes / Xml.php
1 <?php
2
3 /**
4 * Module of static functions for generating XML
5 */
6
7 class Xml {
8 /**
9 * Format an XML element with given attributes and, optionally, text content.
10 * Element and attribute names are assumed to be ready for literal inclusion.
11 * Strings are assumed to not contain XML-illegal characters; special
12 * characters (<, >, &) are escaped but illegals are not touched.
13 *
14 * @param $element String:
15 * @param $attribs Array: Name=>value pairs. Values will be escaped.
16 * @param $contents String: NULL to make an open tag only; '' for a contentless closed tag (default)
17 * @return string
18 */
19 public static function element( $element, $attribs = null, $contents = '') {
20 $out = '<' . $element;
21 if( !is_null( $attribs ) ) {
22 $out .= self::expandAttributes( $attribs );
23 }
24 if( is_null( $contents ) ) {
25 $out .= '>';
26 } else {
27 if( $contents === '' ) {
28 $out .= ' />';
29 } else {
30 $out .= '>' . htmlspecialchars( $contents ) . "</$element>";
31 }
32 }
33 return $out;
34 }
35
36 /**
37 * Given an array of ('attributename' => 'value'), it generates the code
38 * to set the XML attributes : attributename="value".
39 * The values are passed to Sanitizer::encodeAttribute.
40 * Return null if no attributes given.
41 * @param $attribs Array of attributes for an XML element
42 */
43 private static function expandAttributes( $attribs ) {
44 $out = '';
45 if( is_null( $attribs ) ) {
46 return null;
47 } elseif( is_array( $attribs ) ) {
48 foreach( $attribs as $name => $val )
49 $out .= " {$name}=\"" . Sanitizer::encodeAttribute( $val ) . '"';
50 return $out;
51 } else {
52 throw new MWException( 'Expected attribute array, got something else in ' . __METHOD__ );
53 }
54 }
55
56 /**
57 * Format an XML element as with self::element(), but run text through the
58 * UtfNormal::cleanUp() validator first to ensure that no invalid UTF-8
59 * is passed.
60 *
61 * @param $element String:
62 * @param $attribs Array: Name=>value pairs. Values will be escaped.
63 * @param $contents String: NULL to make an open tag only; '' for a contentless closed tag (default)
64 * @return string
65 */
66 public static function elementClean( $element, $attribs = array(), $contents = '') {
67 if( $attribs ) {
68 $attribs = array_map( array( 'UtfNormal', 'cleanUp' ), $attribs );
69 }
70 if( $contents ) {
71 wfProfileIn( __METHOD__ . '-norm' );
72 $contents = UtfNormal::cleanUp( $contents );
73 wfProfileOut( __METHOD__ . '-norm' );
74 }
75 return self::element( $element, $attribs, $contents );
76 }
77
78 /** This open an XML element */
79 public static function openElement( $element, $attribs = null ) {
80 return '<' . $element . self::expandAttributes( $attribs ) . '>';
81 }
82
83 // Shortcut
84 public static function closeElement( $element ) { return "</$element>"; }
85
86 /**
87 * Same as <link>element</link>, but does not escape contents. Handy when the
88 * content you have is already valid xml.
89 */
90 public static function tags( $element, $attribs = null, $contents ) {
91 return self::openElement( $element, $attribs ) . $contents . "</$element>";
92 }
93
94 /**
95 * Build a drop-down box for selecting a namespace
96 *
97 * @param mixed $selected Namespace which should be pre-selected
98 * @param mixed $all Value of an item denoting all namespaces, or null to omit
99 * @param bool $hidden Include hidden namespaces? [WTF? --RC]
100 * @return string
101 */
102 public static function namespaceSelector( $selected = '', $all = null, $hidden = false, $element_name = 'namespace' ) {
103 global $wgContLang;
104 $namespaces = $wgContLang->getFormattedNamespaces();
105 $options = array();
106
107 // Godawful hack... we'll be frequently passed selected namespaces
108 // as strings since PHP is such a shithole.
109 // But we also don't want blanks and nulls and "all"s matching 0,
110 // so let's convert *just* string ints to clean ints.
111 if( preg_match( '/^\d+$/', $selected ) ) {
112 $selected = intval( $selected );
113 }
114
115 if( !is_null( $all ) )
116 $namespaces = array( $all => wfMsg( 'namespacesall' ) ) + $namespaces;
117 foreach( $namespaces as $index => $name ) {
118 if( $index < NS_MAIN )
119 continue;
120 if( $index === 0 )
121 $name = wfMsg( 'blanknamespace' );
122 $options[] = self::option( $name, $index, $index === $selected );
123 }
124
125 return Xml::openElement( 'select', array( 'id' => 'namespace', 'name' => $element_name,
126 'class' => 'namespaceselector' ) )
127 . "\n"
128 . implode( "\n", $options )
129 . "\n"
130 . Xml::closeElement( 'select' );
131 }
132
133 /**
134 * Create a date selector
135 *
136 * @param $selected Mixed: the month which should be selected, default ''
137 * @param $allmonths String: value of a special item denoting all month. Null to not include (default)
138 * @param string $id Element identifier
139 * @return String: Html string containing the month selector
140 */
141 public static function monthSelector( $selected = '', $allmonths = null, $id = 'month' ) {
142 global $wgLang;
143 $options = array();
144 if( is_null( $selected ) )
145 $selected = '';
146 if( !is_null( $allmonths ) )
147 $options[] = self::option( wfMsg( 'monthsall' ), $allmonths, $selected === $allmonths );
148 for( $i = 1; $i < 13; $i++ )
149 $options[] = self::option( $wgLang->getMonthName( $i ), $i, $selected === $i );
150 return self::openElement( 'select', array( 'id' => $id, 'name' => 'month' ) )
151 . implode( "\n", $options )
152 . self::closeElement( 'select' );
153 }
154
155 /**
156 *
157 * @param $language The language code of the selected language
158 * @param $customisedOnly If true only languages which have some content are listed
159 * @return array of label and select
160 */
161 public static function languageSelector( $selected, $customisedOnly = true ) {
162 global $wgContLanguageCode;
163 /**
164 * Make sure the site language is in the list; a custom language code
165 * might not have a defined name...
166 */
167 $languages = Language::getLanguageNames( $customisedOnly );
168 if( !array_key_exists( $wgContLanguageCode, $languages ) ) {
169 $languages[$wgContLanguageCode] = $wgContLanguageCode;
170 }
171 ksort( $languages );
172
173 /**
174 * If a bogus value is set, default to the content language.
175 * Otherwise, no default is selected and the user ends up
176 * with an Afrikaans interface since it's first in the list.
177 */
178 $selected = isset( $languages[$selected] ) ? $selected : $wgContLanguageCode;
179 $options = "\n";
180 foreach( $languages as $code => $name ) {
181 $options .= Xml::option( "$code - $name", $code, ($code == $selected) ) . "\n";
182 }
183
184 return array(
185 Xml::label( wfMsg('yourlanguage'), 'wpUserLanguage' ),
186 Xml::tags( 'select',
187 array( 'id' => 'wpUserLanguage', 'name' => 'wpUserLanguage' ),
188 $options
189 )
190 );
191
192 }
193
194 public static function span( $text, $class, $attribs=array() ) {
195 return self::element( 'span', array( 'class' => $class ) + $attribs, $text );
196 }
197
198 /**
199 * Convenience function to build an HTML text input field
200 * @return string HTML
201 */
202 public static function input( $name, $size=false, $value=false, $attribs=array() ) {
203 return self::element( 'input', array(
204 'name' => $name,
205 'size' => $size,
206 'value' => $value ) + $attribs );
207 }
208
209 /**
210 * Convenience function to build an HTML password input field
211 * @return string HTML
212 */
213 public static function password( $name, $size=false, $value=false, $attribs=array() ) {
214 return self::input( $name, $size, $value, array_merge($attribs, array('type' => 'password')));
215 }
216
217 /**
218 * Internal function for use in checkboxes and radio buttons and such.
219 * @return array
220 */
221 public static function attrib( $name, $present = true ) {
222 return $present ? array( $name => $name ) : array();
223 }
224
225 /**
226 * Convenience function to build an HTML checkbox
227 * @return string HTML
228 */
229 public static function check( $name, $checked=false, $attribs=array() ) {
230 return self::element( 'input', array_merge(
231 array(
232 'name' => $name,
233 'type' => 'checkbox',
234 'value' => 1 ),
235 self::attrib( 'checked', $checked ),
236 $attribs ) );
237 }
238
239 /**
240 * Convenience function to build an HTML radio button
241 * @return string HTML
242 */
243 public static function radio( $name, $value, $checked=false, $attribs=array() ) {
244 return self::element( 'input', array(
245 'name' => $name,
246 'type' => 'radio',
247 'value' => $value ) + self::attrib( 'checked', $checked ) + $attribs );
248 }
249
250 /**
251 * Convenience function to build an HTML form label
252 * @return string HTML
253 */
254 public static function label( $label, $id ) {
255 return self::element( 'label', array( 'for' => $id ), $label );
256 }
257
258 /**
259 * Convenience function to build an HTML text input field with a label
260 * @return string HTML
261 */
262 public static function inputLabel( $label, $name, $id, $size=false, $value=false, $attribs=array() ) {
263 return Xml::label( $label, $id ) .
264 '&nbsp;' .
265 self::input( $name, $size, $value, array( 'id' => $id ) + $attribs );
266 }
267
268 /**
269 * Convenience function to build an HTML checkbox with a label
270 * @return string HTML
271 */
272 public static function checkLabel( $label, $name, $id, $checked=false, $attribs=array() ) {
273 return self::check( $name, $checked, array( 'id' => $id ) + $attribs ) .
274 '&nbsp;' .
275 self::label( $label, $id );
276 }
277
278 /**
279 * Convenience function to build an HTML radio button with a label
280 * @return string HTML
281 */
282 public static function radioLabel( $label, $name, $value, $id, $checked=false, $attribs=array() ) {
283 return self::radio( $name, $value, $checked, array( 'id' => $id ) + $attribs ) .
284 '&nbsp;' .
285 self::label( $label, $id );
286 }
287
288 /**
289 * Convenience function to build an HTML submit button
290 * @param $value String: label text for the button
291 * @param $attribs Array: optional custom attributes
292 * @return string HTML
293 */
294 public static function submitButton( $value, $attribs=array() ) {
295 return self::element( 'input', array( 'type' => 'submit', 'value' => $value ) + $attribs );
296 }
297
298 /**
299 * Convenience function to build an HTML hidden form field.
300 * @todo Document $name parameter.
301 * @param $name FIXME
302 * @param $value String: label text for the button
303 * @param $attribs Array: optional custom attributes
304 * @return string HTML
305 */
306 public static function hidden( $name, $value, $attribs=array() ) {
307 return self::element( 'input', array(
308 'name' => $name,
309 'type' => 'hidden',
310 'value' => $value ) + $attribs );
311 }
312
313 /**
314 * Convenience function to build an HTML drop-down list item.
315 * @param $text String: text for this item
316 * @param $value String: form submission value; if empty, use text
317 * @param $selected boolean: if true, will be the default selected item
318 * @param $attribs array: optional additional HTML attributes
319 * @return string HTML
320 */
321 public static function option( $text, $value=null, $selected=false,
322 $attribs=array() ) {
323 if( !is_null( $value ) ) {
324 $attribs['value'] = $value;
325 }
326 if( $selected ) {
327 $attribs['selected'] = 'selected';
328 }
329 return self::element( 'option', $attribs, $text );
330 }
331
332 /**
333 * Returns an escaped string suitable for inclusion in a string literal
334 * for JavaScript source code.
335 * Illegal control characters are assumed not to be present.
336 *
337 * @param string $string
338 * @return string
339 */
340 public static function escapeJsString( $string ) {
341 // See ECMA 262 section 7.8.4 for string literal format
342 $pairs = array(
343 "\\" => "\\\\",
344 "\"" => "\\\"",
345 '\'' => '\\\'',
346 "\n" => "\\n",
347 "\r" => "\\r",
348
349 # To avoid closing the element or CDATA section
350 "<" => "\\x3c",
351 ">" => "\\x3e",
352
353 # To avoid any complaints about bad entity refs
354 "&" => "\\x26",
355
356 # Work around https://bugzilla.mozilla.org/show_bug.cgi?id=274152
357 # Encode certain Unicode formatting chars so affected
358 # versions of Gecko don't misinterpret our strings;
359 # this is a common problem with Farsi text.
360 "\xe2\x80\x8c" => "\\u200c", // ZERO WIDTH NON-JOINER
361 "\xe2\x80\x8d" => "\\u200d", // ZERO WIDTH JOINER
362 );
363 return strtr( $string, $pairs );
364 }
365
366 /**
367 * Encode a variable of unknown type to JavaScript.
368 * Arrays are converted to JS arrays, objects are converted to JS associative
369 * arrays (objects). So cast your PHP associative arrays to objects before
370 * passing them to here.
371 */
372 public static function encodeJsVar( $value ) {
373 if ( is_bool( $value ) ) {
374 $s = $value ? 'true' : 'false';
375 } elseif ( is_null( $value ) ) {
376 $s = 'null';
377 } elseif ( is_int( $value ) ) {
378 $s = $value;
379 } elseif ( is_array( $value ) ) {
380 $s = '[';
381 foreach ( $value as $elt ) {
382 if ( $s != '[' ) {
383 $s .= ', ';
384 }
385 $s .= self::encodeJsVar( $elt );
386 }
387 $s .= ']';
388 } elseif ( is_object( $value ) ) {
389 $s = '{';
390 foreach ( (array)$value as $name => $elt ) {
391 if ( $s != '{' ) {
392 $s .= ', ';
393 }
394 $s .= '"' . self::escapeJsString( $name ) . '": ' .
395 self::encodeJsVar( $elt );
396 }
397 $s .= '}';
398 } else {
399 $s = '"' . self::escapeJsString( $value ) . '"';
400 }
401 return $s;
402 }
403
404
405 /**
406 * Check if a string is well-formed XML.
407 * Must include the surrounding tag.
408 *
409 * @param $text String: string to test.
410 * @return bool
411 *
412 * @todo Error position reporting return
413 */
414 public static function isWellFormed( $text ) {
415 $parser = xml_parser_create( "UTF-8" );
416
417 # case folding violates XML standard, turn it off
418 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
419
420 if( !xml_parse( $parser, $text, true ) ) {
421 //$err = xml_error_string( xml_get_error_code( $parser ) );
422 //$position = xml_get_current_byte_index( $parser );
423 //$fragment = $this->extractFragment( $html, $position );
424 //$this->mXmlError = "$err at byte $position:\n$fragment";
425 xml_parser_free( $parser );
426 return false;
427 }
428 xml_parser_free( $parser );
429 return true;
430 }
431
432 /**
433 * Check if a string is a well-formed XML fragment.
434 * Wraps fragment in an \<html\> bit and doctype, so it can be a fragment
435 * and can use HTML named entities.
436 *
437 * @param $text String:
438 * @return bool
439 */
440 public static function isWellFormedXmlFragment( $text ) {
441 $html =
442 Sanitizer::hackDocType() .
443 '<html>' .
444 $text .
445 '</html>';
446 return Xml::isWellFormed( $html );
447 }
448
449 /**
450 * Replace " > and < with their respective HTML entities ( &quot;,
451 * &gt;, &lt;)
452 *
453 * @param $in String: text that might contain HTML tags.
454 * @return string Escaped string
455 */
456 public static function escapeTagsOnly( $in ) {
457 return str_replace(
458 array( '"', '>', '<' ),
459 array( '&quot;', '&gt;', '&lt;' ),
460 $in );
461 }
462 }
463