Reverted r111188: backport conflict fodder
[lhc/web/wiklou.git] / includes / ConfEditor.php
1 <?php
2
3 /**
4 * This is a state machine style parser with two internal stacks:
5 * * A next state stack, which determines the state the machine will progress to next
6 * * A path stack, which keeps track of the logical location in the file.
7 *
8 * Reference grammar:
9 *
10 * file = T_OPEN_TAG *statement
11 * statement = T_VARIABLE "=" expression ";"
12 * expression = array / scalar / T_VARIABLE
13 * array = T_ARRAY "(" [ element *( "," element ) [ "," ] ] ")"
14 * element = assoc-element / expression
15 * assoc-element = scalar T_DOUBLE_ARROW expression
16 * scalar = T_LNUMBER / T_DNUMBER / T_STRING / T_CONSTANT_ENCAPSED_STRING
17 */
18 class ConfEditor {
19 /** The text to parse */
20 var $text;
21
22 /** The token array from token_get_all() */
23 var $tokens;
24
25 /** The current position in the token array */
26 var $pos;
27
28 /** The current 1-based line number */
29 var $lineNum;
30
31 /** The current 1-based column number */
32 var $colNum;
33
34 /** The current 0-based byte number */
35 var $byteNum;
36
37 /** The current ConfEditorToken object */
38 var $currentToken;
39
40 /** The previous ConfEditorToken object */
41 var $prevToken;
42
43 /**
44 * The state machine stack. This is an array of strings where the topmost
45 * element will be popped off and become the next parser state.
46 */
47 var $stateStack;
48
49
50 /**
51 * The path stack is a stack of associative arrays with the following elements:
52 * name The name of top level of the path
53 * level The level (number of elements) of the path
54 * startByte The byte offset of the start of the path
55 * startToken The token offset of the start
56 * endByte The byte offset of thee
57 * endToken The token offset of the end, plus one
58 * valueStartToken The start token offset of the value part
59 * valueStartByte The start byte offset of the value part
60 * valueEndToken The end token offset of the value part, plus one
61 * valueEndByte The end byte offset of the value part, plus one
62 * nextArrayIndex The next numeric array index at this level
63 * hasComma True if the array element ends with a comma
64 * arrowByte The byte offset of the "=>", or false if there isn't one
65 */
66 var $pathStack;
67
68 /**
69 * The elements of the top of the pathStack for every path encountered, indexed
70 * by slash-separated path.
71 */
72 var $pathInfo;
73
74 /**
75 * Next serial number for whitespace placeholder paths (\@extra-N)
76 */
77 var $serial;
78
79 /**
80 * Editor state. This consists of the internal copy/insert operations which
81 * are applied to the source string to obtain the destination string.
82 */
83 var $edits;
84
85 /**
86 * Simple entry point for command-line testing
87 *
88 * @param $text string
89 *
90 * @return string
91 */
92 static function test( $text ) {
93 try {
94 $ce = new self( $text );
95 $ce->parse();
96 } catch ( ConfEditorParseError $e ) {
97 return $e->getMessage() . "\n" . $e->highlight( $text );
98 }
99 return "OK";
100 }
101
102 /**
103 * Construct a new parser
104 */
105 public function __construct( $text ) {
106 $this->text = $text;
107 }
108
109 /**
110 * Edit the text. Returns the edited text.
111 * @param $ops Array of operations.
112 *
113 * Operations are given as an associative array, with members:
114 * type: One of delete, set, append or insert (required)
115 * path: The path to operate on (required)
116 * key: The array key to insert/append, with PHP quotes
117 * value: The value, with PHP quotes
118 *
119 * delete
120 * Deletes an array element or statement with the specified path.
121 * e.g.
122 * array('type' => 'delete', 'path' => '$foo/bar/baz' )
123 * is equivalent to the runtime PHP code:
124 * unset( $foo['bar']['baz'] );
125 *
126 * set
127 * Sets the value of an array element. If the element doesn't exist, it
128 * is appended to the array. If it does exist, the value is set, with
129 * comments and indenting preserved.
130 *
131 * append
132 * Appends a new element to the end of the array. Adds a trailing comma.
133 * e.g.
134 * array( 'type' => 'append', 'path', '$foo/bar',
135 * 'key' => 'baz', 'value' => "'x'" )
136 * is like the PHP code:
137 * $foo['bar']['baz'] = 'x';
138 *
139 * insert
140 * Insert a new element at the start of the array.
141 *
142 * @return string
143 */
144 public function edit( $ops ) {
145 $this->parse();
146
147 $this->edits = array(
148 array( 'copy', 0, strlen( $this->text ) )
149 );
150 foreach ( $ops as $op ) {
151 $type = $op['type'];
152 $path = $op['path'];
153 $value = isset( $op['value'] ) ? $op['value'] : null;
154 $key = isset( $op['key'] ) ? $op['key'] : null;
155
156 switch ( $type ) {
157 case 'delete':
158 list( $start, $end ) = $this->findDeletionRegion( $path );
159 $this->replaceSourceRegion( $start, $end, false );
160 break;
161 case 'set':
162 if ( isset( $this->pathInfo[$path] ) ) {
163 list( $start, $end ) = $this->findValueRegion( $path );
164 $encValue = $value; // var_export( $value, true );
165 $this->replaceSourceRegion( $start, $end, $encValue );
166 break;
167 }
168 // No existing path, fall through to append
169 $slashPos = strrpos( $path, '/' );
170 $key = var_export( substr( $path, $slashPos + 1 ), true );
171 $path = substr( $path, 0, $slashPos );
172 // Fall through
173 case 'append':
174 // Find the last array element
175 $lastEltPath = $this->findLastArrayElement( $path );
176 if ( $lastEltPath === false ) {
177 throw new MWException( "Can't find any element of array \"$path\"" );
178 }
179 $lastEltInfo = $this->pathInfo[$lastEltPath];
180
181 // Has it got a comma already?
182 if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) {
183 // No comma, insert one after the value region
184 list( , $end ) = $this->findValueRegion( $lastEltPath );
185 $this->replaceSourceRegion( $end - 1, $end - 1, ',' );
186 }
187
188 // Make the text to insert
189 list( $start, $end ) = $this->findDeletionRegion( $lastEltPath );
190
191 if ( $key === null ) {
192 list( $indent, ) = $this->getIndent( $start );
193 $textToInsert = "$indent$value,";
194 } else {
195 list( $indent, $arrowIndent ) =
196 $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] );
197 $textToInsert = "$indent$key$arrowIndent=> $value,";
198 }
199 $textToInsert .= ( $indent === false ? ' ' : "\n" );
200
201 // Insert the item
202 $this->replaceSourceRegion( $end, $end, $textToInsert );
203 break;
204 case 'insert':
205 // Find first array element
206 $firstEltPath = $this->findFirstArrayElement( $path );
207 if ( $firstEltPath === false ) {
208 throw new MWException( "Can't find array element of \"$path\"" );
209 }
210 list( $start, ) = $this->findDeletionRegion( $firstEltPath );
211 $info = $this->pathInfo[$firstEltPath];
212
213 // Make the text to insert
214 if ( $key === null ) {
215 list( $indent, ) = $this->getIndent( $start );
216 $textToInsert = "$indent$value,";
217 } else {
218 list( $indent, $arrowIndent ) =
219 $this->getIndent( $start, $key, $info['arrowByte'] );
220 $textToInsert = "$indent$key$arrowIndent=> $value,";
221 }
222 $textToInsert .= ( $indent === false ? ' ' : "\n" );
223
224 // Insert the item
225 $this->replaceSourceRegion( $start, $start, $textToInsert );
226 break;
227 default:
228 throw new MWException( "Unrecognised operation: \"$type\"" );
229 }
230 }
231
232 // Do the edits
233 $out = '';
234 foreach ( $this->edits as $edit ) {
235 if ( $edit[0] == 'copy' ) {
236 $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] );
237 } else { // if ( $edit[0] == 'insert' )
238 $out .= $edit[1];
239 }
240 }
241
242 // Do a second parse as a sanity check
243 $this->text = $out;
244 try {
245 $this->parse();
246 } catch ( ConfEditorParseError $e ) {
247 throw new MWException(
248 "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " .
249 $e->getMessage() );
250 }
251 return $out;
252 }
253
254 /**
255 * Get the variables defined in the text
256 * @return array( varname => value )
257 */
258 function getVars() {
259 $vars = array();
260 $this->parse();
261 foreach( $this->pathInfo as $path => $data ) {
262 if ( $path[0] != '$' )
263 continue;
264 $trimmedPath = substr( $path, 1 );
265 $name = $data['name'];
266 if ( $name[0] == '@' )
267 continue;
268 if ( $name[0] == '$' )
269 $name = substr( $name, 1 );
270 $parentPath = substr( $trimmedPath, 0,
271 strlen( $trimmedPath ) - strlen( $name ) );
272 if( substr( $parentPath, -1 ) == '/' )
273 $parentPath = substr( $parentPath, 0, -1 );
274
275 $value = substr( $this->text, $data['valueStartByte'],
276 $data['valueEndByte'] - $data['valueStartByte']
277 );
278 $this->setVar( $vars, $parentPath, $name,
279 $this->parseScalar( $value ) );
280 }
281 return $vars;
282 }
283
284 /**
285 * Set a value in an array, unless it's set already. For instance,
286 * setVar( $arr, 'foo/bar', 'baz', 3 ); will set
287 * $arr['foo']['bar']['baz'] = 3;
288 * @param $array array
289 * @param $path string slash-delimited path
290 * @param $key mixed Key
291 * @param $value mixed Value
292 */
293 function setVar( &$array, $path, $key, $value ) {
294 $pathArr = explode( '/', $path );
295 $target =& $array;
296 if ( $path !== '' ) {
297 foreach ( $pathArr as $p ) {
298 if( !isset( $target[$p] ) )
299 $target[$p] = array();
300 $target =& $target[$p];
301 }
302 }
303 if ( !isset( $target[$key] ) )
304 $target[$key] = $value;
305 }
306
307 /**
308 * Parse a scalar value in PHP
309 * @return mixed Parsed value
310 */
311 function parseScalar( $str ) {
312 if ( $str !== '' && $str[0] == '\'' )
313 // Single-quoted string
314 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
315 // appended to the token; without it we ended up reading in the
316 // extra quote on the end!
317 return strtr( substr( trim( $str ), 1, -1 ),
318 array( '\\\'' => '\'', '\\\\' => '\\' ) );
319 if ( $str !== '' && $str[0] == '"' )
320 // Double-quoted string
321 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
322 // appended to the token; without it we ended up reading in the
323 // extra quote on the end!
324 return stripcslashes( substr( trim( $str ), 1, -1 ) );
325 if ( substr( $str, 0, 4 ) == 'true' )
326 return true;
327 if ( substr( $str, 0, 5 ) == 'false' )
328 return false;
329 if ( substr( $str, 0, 4 ) == 'null' )
330 return null;
331 // Must be some kind of numeric value, so let PHP's weak typing
332 // be useful for a change
333 return $str;
334 }
335
336 /**
337 * Replace the byte offset region of the source with $newText.
338 * Works by adding elements to the $this->edits array.
339 */
340 function replaceSourceRegion( $start, $end, $newText = false ) {
341 // Split all copy operations with a source corresponding to the region
342 // in question.
343 $newEdits = array();
344 foreach ( $this->edits as $edit ) {
345 if ( $edit[0] !== 'copy' ) {
346 $newEdits[] = $edit;
347 continue;
348 }
349 $copyStart = $edit[1];
350 $copyEnd = $edit[2];
351 if ( $start >= $copyEnd || $end <= $copyStart ) {
352 // Outside this region
353 $newEdits[] = $edit;
354 continue;
355 }
356 if ( ( $start < $copyStart && $end > $copyStart )
357 || ( $start < $copyEnd && $end > $copyEnd )
358 ) {
359 throw new MWException( "Overlapping regions found, can't do the edit" );
360 }
361 // Split the copy
362 $newEdits[] = array( 'copy', $copyStart, $start );
363 if ( $newText !== false ) {
364 $newEdits[] = array( 'insert', $newText );
365 }
366 $newEdits[] = array( 'copy', $end, $copyEnd );
367 }
368 $this->edits = $newEdits;
369 }
370
371 /**
372 * Finds the source byte region which you would want to delete, if $pathName
373 * was to be deleted. Includes the leading spaces and tabs, the trailing line
374 * break, and any comments in between.
375 * @return array
376 */
377 function findDeletionRegion( $pathName ) {
378 if ( !isset( $this->pathInfo[$pathName] ) ) {
379 throw new MWException( "Can't find path \"$pathName\"" );
380 }
381 $path = $this->pathInfo[$pathName];
382 // Find the start
383 $this->firstToken();
384 while ( $this->pos != $path['startToken'] ) {
385 $this->nextToken();
386 }
387 $regionStart = $path['startByte'];
388 for ( $offset = -1; $offset >= -$this->pos; $offset-- ) {
389 $token = $this->getTokenAhead( $offset );
390 if ( !$token->isSkip() ) {
391 // If there is other content on the same line, don't move the start point
392 // back, because that will cause the regions to overlap.
393 $regionStart = $path['startByte'];
394 break;
395 }
396 $lfPos = strrpos( $token->text, "\n" );
397 if ( $lfPos === false ) {
398 $regionStart -= strlen( $token->text );
399 } else {
400 // The line start does not include the LF
401 $regionStart -= strlen( $token->text ) - $lfPos - 1;
402 break;
403 }
404 }
405 // Find the end
406 while ( $this->pos != $path['endToken'] ) {
407 $this->nextToken();
408 }
409 $regionEnd = $path['endByte']; // past the end
410 for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) {
411 $token = $this->getTokenAhead( $offset );
412 if ( !$token->isSkip() ) {
413 break;
414 }
415 $lfPos = strpos( $token->text, "\n" );
416 if ( $lfPos === false ) {
417 $regionEnd += strlen( $token->text );
418 } else {
419 // This should point past the LF
420 $regionEnd += $lfPos + 1;
421 break;
422 }
423 }
424 return array( $regionStart, $regionEnd );
425 }
426
427 /**
428 * Find the byte region in the source corresponding to the value part.
429 * This includes the quotes, but does not include the trailing comma
430 * or semicolon.
431 *
432 * The end position is the past-the-end (end + 1) value as per convention.
433 * @return array
434 */
435 function findValueRegion( $pathName ) {
436 if ( !isset( $this->pathInfo[$pathName] ) ) {
437 throw new MWException( "Can't find path \"$pathName\"" );
438 }
439 $path = $this->pathInfo[$pathName];
440 if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) {
441 throw new MWException( "Can't find value region for path \"$pathName\"" );
442 }
443 return array( $path['valueStartByte'], $path['valueEndByte'] );
444 }
445
446 /**
447 * Find the path name of the last element in the array.
448 * If the array is empty, this will return the \@extra interstitial element.
449 * If the specified path is not found or is not an array, it will return false.
450 * @return bool|int|string
451 */
452 function findLastArrayElement( $path ) {
453 // Try for a real element
454 $lastEltPath = false;
455 foreach ( $this->pathInfo as $candidatePath => $info ) {
456 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
457 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
458 if ( $part2 == '@' ) {
459 // Do nothing
460 } elseif ( $part1 == "$path/" ) {
461 $lastEltPath = $candidatePath;
462 } elseif ( $lastEltPath !== false ) {
463 break;
464 }
465 }
466 if ( $lastEltPath !== false ) {
467 return $lastEltPath;
468 }
469
470 // Try for an interstitial element
471 $extraPath = false;
472 foreach ( $this->pathInfo as $candidatePath => $info ) {
473 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
474 if ( $part1 == "$path/" ) {
475 $extraPath = $candidatePath;
476 } elseif ( $extraPath !== false ) {
477 break;
478 }
479 }
480 return $extraPath;
481 }
482
483 /**
484 * Find the path name of first element in the array.
485 * If the array is empty, this will return the \@extra interstitial element.
486 * If the specified path is not found or is not an array, it will return false.
487 * @return bool|int|string
488 */
489 function findFirstArrayElement( $path ) {
490 // Try for an ordinary element
491 foreach ( $this->pathInfo as $candidatePath => $info ) {
492 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
493 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
494 if ( $part1 == "$path/" && $part2 != '@' ) {
495 return $candidatePath;
496 }
497 }
498
499 // Try for an interstitial element
500 foreach ( $this->pathInfo as $candidatePath => $info ) {
501 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
502 if ( $part1 == "$path/" ) {
503 return $candidatePath;
504 }
505 }
506 return false;
507 }
508
509 /**
510 * Get the indent string which sits after a given start position.
511 * Returns false if the position is not at the start of the line.
512 * @return array
513 */
514 function getIndent( $pos, $key = false, $arrowPos = false ) {
515 $arrowIndent = ' ';
516 if ( $pos == 0 || $this->text[$pos-1] == "\n" ) {
517 $indentLength = strspn( $this->text, " \t", $pos );
518 $indent = substr( $this->text, $pos, $indentLength );
519 } else {
520 $indent = false;
521 }
522 if ( $indent !== false && $arrowPos !== false ) {
523 $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key );
524 if ( $arrowIndentLength > 0 ) {
525 $arrowIndent = str_repeat( ' ', $arrowIndentLength );
526 }
527 }
528 return array( $indent, $arrowIndent );
529 }
530
531 /**
532 * Run the parser on the text. Throws an exception if the string does not
533 * match our defined subset of PHP syntax.
534 */
535 public function parse() {
536 $this->initParse();
537 $this->pushState( 'file' );
538 $this->pushPath( '@extra-' . ($this->serial++) );
539 $token = $this->firstToken();
540
541 while ( !$token->isEnd() ) {
542 $state = $this->popState();
543 if ( !$state ) {
544 $this->error( 'internal error: empty state stack' );
545 }
546
547 switch ( $state ) {
548 case 'file':
549 $this->expect( T_OPEN_TAG );
550 $token = $this->skipSpace();
551 if ( $token->isEnd() ) {
552 break 2;
553 }
554 $this->pushState( 'statement', 'file 2' );
555 break;
556 case 'file 2':
557 $token = $this->skipSpace();
558 if ( $token->isEnd() ) {
559 break 2;
560 }
561 $this->pushState( 'statement', 'file 2' );
562 break;
563 case 'statement':
564 $token = $this->skipSpace();
565 if ( !$this->validatePath( $token->text ) ) {
566 $this->error( "Invalid variable name \"{$token->text}\"" );
567 }
568 $this->nextPath( $token->text );
569 $this->expect( T_VARIABLE );
570 $this->skipSpace();
571 $arrayAssign = false;
572 if ( $this->currentToken()->type == '[' ) {
573 $this->nextToken();
574 $token = $this->skipSpace();
575 if ( !$token->isScalar() ) {
576 $this->error( "expected a string or number for the array key" );
577 }
578 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
579 $text = $this->parseScalar( $token->text );
580 } else {
581 $text = $token->text;
582 }
583 if ( !$this->validatePath( $text ) ) {
584 $this->error( "Invalid associative array name \"$text\"" );
585 }
586 $this->pushPath( $text );
587 $this->nextToken();
588 $this->skipSpace();
589 $this->expect( ']' );
590 $this->skipSpace();
591 $arrayAssign = true;
592 }
593 $this->expect( '=' );
594 $this->skipSpace();
595 $this->startPathValue();
596 if ( $arrayAssign )
597 $this->pushState( 'expression', 'array assign end' );
598 else
599 $this->pushState( 'expression', 'statement end' );
600 break;
601 case 'array assign end':
602 case 'statement end':
603 $this->endPathValue();
604 if ( $state == 'array assign end' )
605 $this->popPath();
606 $this->skipSpace();
607 $this->expect( ';' );
608 $this->nextPath( '@extra-' . ($this->serial++) );
609 break;
610 case 'expression':
611 $token = $this->skipSpace();
612 if ( $token->type == T_ARRAY ) {
613 $this->pushState( 'array' );
614 } elseif ( $token->isScalar() ) {
615 $this->nextToken();
616 } elseif ( $token->type == T_VARIABLE ) {
617 $this->nextToken();
618 } else {
619 $this->error( "expected simple expression" );
620 }
621 break;
622 case 'array':
623 $this->skipSpace();
624 $this->expect( T_ARRAY );
625 $this->skipSpace();
626 $this->expect( '(' );
627 $this->skipSpace();
628 $this->pushPath( '@extra-' . ($this->serial++) );
629 if ( $this->isAhead( ')' ) ) {
630 // Empty array
631 $this->pushState( 'array end' );
632 } else {
633 $this->pushState( 'element', 'array end' );
634 }
635 break;
636 case 'array end':
637 $this->skipSpace();
638 $this->popPath();
639 $this->expect( ')' );
640 break;
641 case 'element':
642 $token = $this->skipSpace();
643 // Look ahead to find the double arrow
644 if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) {
645 // Found associative element
646 $this->pushState( 'assoc-element', 'element end' );
647 } else {
648 // Not associative
649 $this->nextPath( '@next' );
650 $this->startPathValue();
651 $this->pushState( 'expression', 'element end' );
652 }
653 break;
654 case 'element end':
655 $token = $this->skipSpace();
656 if ( $token->type == ',' ) {
657 $this->endPathValue();
658 $this->markComma();
659 $this->nextToken();
660 $this->nextPath( '@extra-' . ($this->serial++) );
661 // Look ahead to find ending bracket
662 if ( $this->isAhead( ")" ) ) {
663 // Found ending bracket, no continuation
664 $this->skipSpace();
665 } else {
666 // No ending bracket, continue to next element
667 $this->pushState( 'element' );
668 }
669 } elseif ( $token->type == ')' ) {
670 // End array
671 $this->endPathValue();
672 } else {
673 $this->error( "expected the next array element or the end of the array" );
674 }
675 break;
676 case 'assoc-element':
677 $token = $this->skipSpace();
678 if ( !$token->isScalar() ) {
679 $this->error( "expected a string or number for the array key" );
680 }
681 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
682 $text = $this->parseScalar( $token->text );
683 } else {
684 $text = $token->text;
685 }
686 if ( !$this->validatePath( $text ) ) {
687 $this->error( "Invalid associative array name \"$text\"" );
688 }
689 $this->nextPath( $text );
690 $this->nextToken();
691 $this->skipSpace();
692 $this->markArrow();
693 $this->expect( T_DOUBLE_ARROW );
694 $this->skipSpace();
695 $this->startPathValue();
696 $this->pushState( 'expression' );
697 break;
698 }
699 }
700 if ( count( $this->stateStack ) ) {
701 $this->error( 'unexpected end of file' );
702 }
703 $this->popPath();
704 }
705
706 /**
707 * Initialise a parse.
708 */
709 protected function initParse() {
710 $this->tokens = token_get_all( $this->text );
711 $this->stateStack = array();
712 $this->pathStack = array();
713 $this->firstToken();
714 $this->pathInfo = array();
715 $this->serial = 1;
716 }
717
718 /**
719 * Set the parse position. Do not call this except from firstToken() and
720 * nextToken(), there is more to update than just the position.
721 */
722 protected function setPos( $pos ) {
723 $this->pos = $pos;
724 if ( $this->pos >= count( $this->tokens ) ) {
725 $this->currentToken = ConfEditorToken::newEnd();
726 } else {
727 $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] );
728 }
729 return $this->currentToken;
730 }
731
732 /**
733 * Create a ConfEditorToken from an element of token_get_all()
734 * @return \ConfEditorToken
735 */
736 function newTokenObj( $internalToken ) {
737 if ( is_array( $internalToken ) ) {
738 return new ConfEditorToken( $internalToken[0], $internalToken[1] );
739 } else {
740 return new ConfEditorToken( $internalToken, $internalToken );
741 }
742 }
743
744 /**
745 * Reset the parse position
746 */
747 function firstToken() {
748 $this->setPos( 0 );
749 $this->prevToken = ConfEditorToken::newEnd();
750 $this->lineNum = 1;
751 $this->colNum = 1;
752 $this->byteNum = 0;
753 return $this->currentToken;
754 }
755
756 /**
757 * Get the current token
758 */
759 function currentToken() {
760 return $this->currentToken;
761 }
762
763 /**
764 * Advance the current position and return the resulting next token
765 */
766 function nextToken() {
767 if ( $this->currentToken ) {
768 $text = $this->currentToken->text;
769 $lfCount = substr_count( $text, "\n" );
770 if ( $lfCount ) {
771 $this->lineNum += $lfCount;
772 $this->colNum = strlen( $text ) - strrpos( $text, "\n" );
773 } else {
774 $this->colNum += strlen( $text );
775 }
776 $this->byteNum += strlen( $text );
777 }
778 $this->prevToken = $this->currentToken;
779 $this->setPos( $this->pos + 1 );
780 return $this->currentToken;
781 }
782
783 /**
784 * Get the token $offset steps ahead of the current position.
785 * $offset may be negative, to get tokens behind the current position.
786 * @return \ConfEditorToken
787 */
788 function getTokenAhead( $offset ) {
789 $pos = $this->pos + $offset;
790 if ( $pos >= count( $this->tokens ) || $pos < 0 ) {
791 return ConfEditorToken::newEnd();
792 } else {
793 return $this->newTokenObj( $this->tokens[$pos] );
794 }
795 }
796
797 /**
798 * Advances the current position past any whitespace or comments
799 */
800 function skipSpace() {
801 while ( $this->currentToken && $this->currentToken->isSkip() ) {
802 $this->nextToken();
803 }
804 return $this->currentToken;
805 }
806
807 /**
808 * Throws an error if the current token is not of the given type, and
809 * then advances to the next position.
810 */
811 function expect( $type ) {
812 if ( $this->currentToken && $this->currentToken->type == $type ) {
813 return $this->nextToken();
814 } else {
815 $this->error( "expected " . $this->getTypeName( $type ) .
816 ", got " . $this->getTypeName( $this->currentToken->type ) );
817 }
818 }
819
820 /**
821 * Push a state or two on to the state stack.
822 */
823 function pushState( $nextState, $stateAfterThat = null ) {
824 if ( $stateAfterThat !== null ) {
825 $this->stateStack[] = $stateAfterThat;
826 }
827 $this->stateStack[] = $nextState;
828 }
829
830 /**
831 * Pop a state from the state stack.
832 * @return mixed
833 */
834 function popState() {
835 return array_pop( $this->stateStack );
836 }
837
838 /**
839 * Returns true if the user input path is valid.
840 * This exists to allow "/" and "@" to be reserved for string path keys
841 * @return bool
842 */
843 function validatePath( $path ) {
844 return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@';
845 }
846
847 /**
848 * Internal function to update some things at the end of a path region. Do
849 * not call except from popPath() or nextPath().
850 */
851 function endPath() {
852 $key = '';
853 foreach ( $this->pathStack as $pathInfo ) {
854 if ( $key !== '' ) {
855 $key .= '/';
856 }
857 $key .= $pathInfo['name'];
858 }
859 $pathInfo['endByte'] = $this->byteNum;
860 $pathInfo['endToken'] = $this->pos;
861 $this->pathInfo[$key] = $pathInfo;
862 }
863
864 /**
865 * Go up to a new path level, for example at the start of an array.
866 */
867 function pushPath( $path ) {
868 $this->pathStack[] = array(
869 'name' => $path,
870 'level' => count( $this->pathStack ) + 1,
871 'startByte' => $this->byteNum,
872 'startToken' => $this->pos,
873 'valueStartToken' => false,
874 'valueStartByte' => false,
875 'valueEndToken' => false,
876 'valueEndByte' => false,
877 'nextArrayIndex' => 0,
878 'hasComma' => false,
879 'arrowByte' => false
880 );
881 }
882
883 /**
884 * Go down a path level, for example at the end of an array.
885 */
886 function popPath() {
887 $this->endPath();
888 array_pop( $this->pathStack );
889 }
890
891 /**
892 * Go to the next path on the same level. This ends the current path and
893 * starts a new one. If $path is \@next, the new path is set to the next
894 * numeric array element.
895 */
896 function nextPath( $path ) {
897 $this->endPath();
898 $i = count( $this->pathStack ) - 1;
899 if ( $path == '@next' ) {
900 $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex'];
901 $this->pathStack[$i]['name'] = $nextArrayIndex;
902 $nextArrayIndex++;
903 } else {
904 $this->pathStack[$i]['name'] = $path;
905 }
906 $this->pathStack[$i] =
907 array(
908 'startByte' => $this->byteNum,
909 'startToken' => $this->pos,
910 'valueStartToken' => false,
911 'valueStartByte' => false,
912 'valueEndToken' => false,
913 'valueEndByte' => false,
914 'hasComma' => false,
915 'arrowByte' => false,
916 ) + $this->pathStack[$i];
917 }
918
919 /**
920 * Mark the start of the value part of a path.
921 */
922 function startPathValue() {
923 $path =& $this->pathStack[count( $this->pathStack ) - 1];
924 $path['valueStartToken'] = $this->pos;
925 $path['valueStartByte'] = $this->byteNum;
926 }
927
928 /**
929 * Mark the end of the value part of a path.
930 */
931 function endPathValue() {
932 $path =& $this->pathStack[count( $this->pathStack ) - 1];
933 $path['valueEndToken'] = $this->pos;
934 $path['valueEndByte'] = $this->byteNum;
935 }
936
937 /**
938 * Mark the comma separator in an array element
939 */
940 function markComma() {
941 $path =& $this->pathStack[count( $this->pathStack ) - 1];
942 $path['hasComma'] = true;
943 }
944
945 /**
946 * Mark the arrow separator in an associative array element
947 */
948 function markArrow() {
949 $path =& $this->pathStack[count( $this->pathStack ) - 1];
950 $path['arrowByte'] = $this->byteNum;
951 }
952
953 /**
954 * Generate a parse error
955 */
956 function error( $msg ) {
957 throw new ConfEditorParseError( $this, $msg );
958 }
959
960 /**
961 * Get a readable name for the given token type.
962 * @return string
963 */
964 function getTypeName( $type ) {
965 if ( is_int( $type ) ) {
966 return token_name( $type );
967 } else {
968 return "\"$type\"";
969 }
970 }
971
972 /**
973 * Looks ahead to see if the given type is the next token type, starting
974 * from the current position plus the given offset. Skips any intervening
975 * whitespace.
976 * @return bool
977 */
978 function isAhead( $type, $offset = 0 ) {
979 $ahead = $offset;
980 $token = $this->getTokenAhead( $offset );
981 while ( !$token->isEnd() ) {
982 if ( $token->isSkip() ) {
983 $ahead++;
984 $token = $this->getTokenAhead( $ahead );
985 continue;
986 } elseif ( $token->type == $type ) {
987 // Found the type
988 return true;
989 } else {
990 // Not found
991 return false;
992 }
993 }
994 return false;
995 }
996
997 /**
998 * Get the previous token object
999 */
1000 function prevToken() {
1001 return $this->prevToken;
1002 }
1003
1004 /**
1005 * Echo a reasonably readable representation of the tokenizer array.
1006 */
1007 function dumpTokens() {
1008 $out = '';
1009 foreach ( $this->tokens as $token ) {
1010 $obj = $this->newTokenObj( $token );
1011 $out .= sprintf( "%-28s %s\n",
1012 $this->getTypeName( $obj->type ),
1013 addcslashes( $obj->text, "\0..\37" ) );
1014 }
1015 echo "<pre>" . htmlspecialchars( $out ) . "</pre>";
1016 }
1017 }
1018
1019 /**
1020 * Exception class for parse errors
1021 */
1022 class ConfEditorParseError extends MWException {
1023 var $lineNum, $colNum;
1024 function __construct( $editor, $msg ) {
1025 $this->lineNum = $editor->lineNum;
1026 $this->colNum = $editor->colNum;
1027 parent::__construct( "Parse error on line {$editor->lineNum} " .
1028 "col {$editor->colNum}: $msg" );
1029 }
1030
1031 function highlight( $text ) {
1032 $lines = StringUtils::explode( "\n", $text );
1033 foreach ( $lines as $lineNum => $line ) {
1034 if ( $lineNum == $this->lineNum - 1 ) {
1035 return "$line\n" .str_repeat( ' ', $this->colNum - 1 ) . "^\n";
1036 }
1037 }
1038 }
1039
1040 }
1041
1042 /**
1043 * Class to wrap a token from the tokenizer.
1044 */
1045 class ConfEditorToken {
1046 var $type, $text;
1047
1048 static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING );
1049 static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT );
1050
1051 static function newEnd() {
1052 return new self( 'END', '' );
1053 }
1054
1055 function __construct( $type, $text ) {
1056 $this->type = $type;
1057 $this->text = $text;
1058 }
1059
1060 function isSkip() {
1061 return in_array( $this->type, self::$skipTypes );
1062 }
1063
1064 function isScalar() {
1065 return in_array( $this->type, self::$scalarTypes );
1066 }
1067
1068 function isEnd() {
1069 return $this->type == 'END';
1070 }
1071 }
1072