Merge "prop=duplicatefiles does not show duplicates under same name"
[lhc/web/wiklou.git] / includes / ConfEditor.php
1 <?php
2 /**
3 * Configuration file editor.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * This is a state machine style parser with two internal stacks:
25 * * A next state stack, which determines the state the machine will progress to next
26 * * A path stack, which keeps track of the logical location in the file.
27 *
28 * Reference grammar:
29 *
30 * file = T_OPEN_TAG *statement
31 * statement = T_VARIABLE "=" expression ";"
32 * expression = array / scalar / T_VARIABLE
33 * array = T_ARRAY "(" [ element *( "," element ) [ "," ] ] ")"
34 * element = assoc-element / expression
35 * assoc-element = scalar T_DOUBLE_ARROW expression
36 * scalar = T_LNUMBER / T_DNUMBER / T_STRING / T_CONSTANT_ENCAPSED_STRING
37 */
38 class ConfEditor {
39 /** The text to parse */
40 var $text;
41
42 /** The token array from token_get_all() */
43 var $tokens;
44
45 /** The current position in the token array */
46 var $pos;
47
48 /** The current 1-based line number */
49 var $lineNum;
50
51 /** The current 1-based column number */
52 var $colNum;
53
54 /** The current 0-based byte number */
55 var $byteNum;
56
57 /** The current ConfEditorToken object */
58 var $currentToken;
59
60 /** The previous ConfEditorToken object */
61 var $prevToken;
62
63 /**
64 * The state machine stack. This is an array of strings where the topmost
65 * element will be popped off and become the next parser state.
66 */
67 var $stateStack;
68
69
70 /**
71 * The path stack is a stack of associative arrays with the following elements:
72 * name The name of top level of the path
73 * level The level (number of elements) of the path
74 * startByte The byte offset of the start of the path
75 * startToken The token offset of the start
76 * endByte The byte offset of thee
77 * endToken The token offset of the end, plus one
78 * valueStartToken The start token offset of the value part
79 * valueStartByte The start byte offset of the value part
80 * valueEndToken The end token offset of the value part, plus one
81 * valueEndByte The end byte offset of the value part, plus one
82 * nextArrayIndex The next numeric array index at this level
83 * hasComma True if the array element ends with a comma
84 * arrowByte The byte offset of the "=>", or false if there isn't one
85 */
86 var $pathStack;
87
88 /**
89 * The elements of the top of the pathStack for every path encountered, indexed
90 * by slash-separated path.
91 */
92 var $pathInfo;
93
94 /**
95 * Next serial number for whitespace placeholder paths (\@extra-N)
96 */
97 var $serial;
98
99 /**
100 * Editor state. This consists of the internal copy/insert operations which
101 * are applied to the source string to obtain the destination string.
102 */
103 var $edits;
104
105 /**
106 * Simple entry point for command-line testing
107 *
108 * @param $text string
109 *
110 * @return string
111 */
112 static function test( $text ) {
113 try {
114 $ce = new self( $text );
115 $ce->parse();
116 } catch ( ConfEditorParseError $e ) {
117 return $e->getMessage() . "\n" . $e->highlight( $text );
118 }
119 return "OK";
120 }
121
122 /**
123 * Construct a new parser
124 */
125 public function __construct( $text ) {
126 $this->text = $text;
127 }
128
129 /**
130 * Edit the text. Returns the edited text.
131 * @param $ops Array of operations.
132 *
133 * Operations are given as an associative array, with members:
134 * type: One of delete, set, append or insert (required)
135 * path: The path to operate on (required)
136 * key: The array key to insert/append, with PHP quotes
137 * value: The value, with PHP quotes
138 *
139 * delete
140 * Deletes an array element or statement with the specified path.
141 * e.g.
142 * array('type' => 'delete', 'path' => '$foo/bar/baz' )
143 * is equivalent to the runtime PHP code:
144 * unset( $foo['bar']['baz'] );
145 *
146 * set
147 * Sets the value of an array element. If the element doesn't exist, it
148 * is appended to the array. If it does exist, the value is set, with
149 * comments and indenting preserved.
150 *
151 * append
152 * Appends a new element to the end of the array. Adds a trailing comma.
153 * e.g.
154 * array( 'type' => 'append', 'path', '$foo/bar',
155 * 'key' => 'baz', 'value' => "'x'" )
156 * is like the PHP code:
157 * $foo['bar']['baz'] = 'x';
158 *
159 * insert
160 * Insert a new element at the start of the array.
161 *
162 * @return string
163 */
164 public function edit( $ops ) {
165 $this->parse();
166
167 $this->edits = array(
168 array( 'copy', 0, strlen( $this->text ) )
169 );
170 foreach ( $ops as $op ) {
171 $type = $op['type'];
172 $path = $op['path'];
173 $value = isset( $op['value'] ) ? $op['value'] : null;
174 $key = isset( $op['key'] ) ? $op['key'] : null;
175
176 switch ( $type ) {
177 case 'delete':
178 list( $start, $end ) = $this->findDeletionRegion( $path );
179 $this->replaceSourceRegion( $start, $end, false );
180 break;
181 case 'set':
182 if ( isset( $this->pathInfo[$path] ) ) {
183 list( $start, $end ) = $this->findValueRegion( $path );
184 $encValue = $value; // var_export( $value, true );
185 $this->replaceSourceRegion( $start, $end, $encValue );
186 break;
187 }
188 // No existing path, fall through to append
189 $slashPos = strrpos( $path, '/' );
190 $key = var_export( substr( $path, $slashPos + 1 ), true );
191 $path = substr( $path, 0, $slashPos );
192 // Fall through
193 case 'append':
194 // Find the last array element
195 $lastEltPath = $this->findLastArrayElement( $path );
196 if ( $lastEltPath === false ) {
197 throw new MWException( "Can't find any element of array \"$path\"" );
198 }
199 $lastEltInfo = $this->pathInfo[$lastEltPath];
200
201 // Has it got a comma already?
202 if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) {
203 // No comma, insert one after the value region
204 list( , $end ) = $this->findValueRegion( $lastEltPath );
205 $this->replaceSourceRegion( $end - 1, $end - 1, ',' );
206 }
207
208 // Make the text to insert
209 list( $start, $end ) = $this->findDeletionRegion( $lastEltPath );
210
211 if ( $key === null ) {
212 list( $indent, ) = $this->getIndent( $start );
213 $textToInsert = "$indent$value,";
214 } else {
215 list( $indent, $arrowIndent ) =
216 $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] );
217 $textToInsert = "$indent$key$arrowIndent=> $value,";
218 }
219 $textToInsert .= ( $indent === false ? ' ' : "\n" );
220
221 // Insert the item
222 $this->replaceSourceRegion( $end, $end, $textToInsert );
223 break;
224 case 'insert':
225 // Find first array element
226 $firstEltPath = $this->findFirstArrayElement( $path );
227 if ( $firstEltPath === false ) {
228 throw new MWException( "Can't find array element of \"$path\"" );
229 }
230 list( $start, ) = $this->findDeletionRegion( $firstEltPath );
231 $info = $this->pathInfo[$firstEltPath];
232
233 // Make the text to insert
234 if ( $key === null ) {
235 list( $indent, ) = $this->getIndent( $start );
236 $textToInsert = "$indent$value,";
237 } else {
238 list( $indent, $arrowIndent ) =
239 $this->getIndent( $start, $key, $info['arrowByte'] );
240 $textToInsert = "$indent$key$arrowIndent=> $value,";
241 }
242 $textToInsert .= ( $indent === false ? ' ' : "\n" );
243
244 // Insert the item
245 $this->replaceSourceRegion( $start, $start, $textToInsert );
246 break;
247 default:
248 throw new MWException( "Unrecognised operation: \"$type\"" );
249 }
250 }
251
252 // Do the edits
253 $out = '';
254 foreach ( $this->edits as $edit ) {
255 if ( $edit[0] == 'copy' ) {
256 $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] );
257 } else { // if ( $edit[0] == 'insert' )
258 $out .= $edit[1];
259 }
260 }
261
262 // Do a second parse as a sanity check
263 $this->text = $out;
264 try {
265 $this->parse();
266 } catch ( ConfEditorParseError $e ) {
267 throw new MWException(
268 "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " .
269 $e->getMessage() );
270 }
271 return $out;
272 }
273
274 /**
275 * Get the variables defined in the text
276 * @return array( varname => value )
277 */
278 function getVars() {
279 $vars = array();
280 $this->parse();
281 foreach( $this->pathInfo as $path => $data ) {
282 if ( $path[0] != '$' )
283 continue;
284 $trimmedPath = substr( $path, 1 );
285 $name = $data['name'];
286 if ( $name[0] == '@' )
287 continue;
288 if ( $name[0] == '$' )
289 $name = substr( $name, 1 );
290 $parentPath = substr( $trimmedPath, 0,
291 strlen( $trimmedPath ) - strlen( $name ) );
292 if( substr( $parentPath, -1 ) == '/' )
293 $parentPath = substr( $parentPath, 0, -1 );
294
295 $value = substr( $this->text, $data['valueStartByte'],
296 $data['valueEndByte'] - $data['valueStartByte']
297 );
298 $this->setVar( $vars, $parentPath, $name,
299 $this->parseScalar( $value ) );
300 }
301 return $vars;
302 }
303
304 /**
305 * Set a value in an array, unless it's set already. For instance,
306 * setVar( $arr, 'foo/bar', 'baz', 3 ); will set
307 * $arr['foo']['bar']['baz'] = 3;
308 * @param $array array
309 * @param $path string slash-delimited path
310 * @param $key mixed Key
311 * @param $value mixed Value
312 */
313 function setVar( &$array, $path, $key, $value ) {
314 $pathArr = explode( '/', $path );
315 $target =& $array;
316 if ( $path !== '' ) {
317 foreach ( $pathArr as $p ) {
318 if( !isset( $target[$p] ) )
319 $target[$p] = array();
320 $target =& $target[$p];
321 }
322 }
323 if ( !isset( $target[$key] ) )
324 $target[$key] = $value;
325 }
326
327 /**
328 * Parse a scalar value in PHP
329 * @return mixed Parsed value
330 */
331 function parseScalar( $str ) {
332 if ( $str !== '' && $str[0] == '\'' )
333 // Single-quoted string
334 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
335 // appended to the token; without it we ended up reading in the
336 // extra quote on the end!
337 return strtr( substr( trim( $str ), 1, -1 ),
338 array( '\\\'' => '\'', '\\\\' => '\\' ) );
339 if ( $str !== '' && $str[0] == '"' )
340 // Double-quoted string
341 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
342 // appended to the token; without it we ended up reading in the
343 // extra quote on the end!
344 return stripcslashes( substr( trim( $str ), 1, -1 ) );
345 if ( substr( $str, 0, 4 ) == 'true' )
346 return true;
347 if ( substr( $str, 0, 5 ) == 'false' )
348 return false;
349 if ( substr( $str, 0, 4 ) == 'null' )
350 return null;
351 // Must be some kind of numeric value, so let PHP's weak typing
352 // be useful for a change
353 return $str;
354 }
355
356 /**
357 * Replace the byte offset region of the source with $newText.
358 * Works by adding elements to the $this->edits array.
359 */
360 function replaceSourceRegion( $start, $end, $newText = false ) {
361 // Split all copy operations with a source corresponding to the region
362 // in question.
363 $newEdits = array();
364 foreach ( $this->edits as $edit ) {
365 if ( $edit[0] !== 'copy' ) {
366 $newEdits[] = $edit;
367 continue;
368 }
369 $copyStart = $edit[1];
370 $copyEnd = $edit[2];
371 if ( $start >= $copyEnd || $end <= $copyStart ) {
372 // Outside this region
373 $newEdits[] = $edit;
374 continue;
375 }
376 if ( ( $start < $copyStart && $end > $copyStart )
377 || ( $start < $copyEnd && $end > $copyEnd )
378 ) {
379 throw new MWException( "Overlapping regions found, can't do the edit" );
380 }
381 // Split the copy
382 $newEdits[] = array( 'copy', $copyStart, $start );
383 if ( $newText !== false ) {
384 $newEdits[] = array( 'insert', $newText );
385 }
386 $newEdits[] = array( 'copy', $end, $copyEnd );
387 }
388 $this->edits = $newEdits;
389 }
390
391 /**
392 * Finds the source byte region which you would want to delete, if $pathName
393 * was to be deleted. Includes the leading spaces and tabs, the trailing line
394 * break, and any comments in between.
395 * @return array
396 */
397 function findDeletionRegion( $pathName ) {
398 if ( !isset( $this->pathInfo[$pathName] ) ) {
399 throw new MWException( "Can't find path \"$pathName\"" );
400 }
401 $path = $this->pathInfo[$pathName];
402 // Find the start
403 $this->firstToken();
404 while ( $this->pos != $path['startToken'] ) {
405 $this->nextToken();
406 }
407 $regionStart = $path['startByte'];
408 for ( $offset = -1; $offset >= -$this->pos; $offset-- ) {
409 $token = $this->getTokenAhead( $offset );
410 if ( !$token->isSkip() ) {
411 // If there is other content on the same line, don't move the start point
412 // back, because that will cause the regions to overlap.
413 $regionStart = $path['startByte'];
414 break;
415 }
416 $lfPos = strrpos( $token->text, "\n" );
417 if ( $lfPos === false ) {
418 $regionStart -= strlen( $token->text );
419 } else {
420 // The line start does not include the LF
421 $regionStart -= strlen( $token->text ) - $lfPos - 1;
422 break;
423 }
424 }
425 // Find the end
426 while ( $this->pos != $path['endToken'] ) {
427 $this->nextToken();
428 }
429 $regionEnd = $path['endByte']; // past the end
430 for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) {
431 $token = $this->getTokenAhead( $offset );
432 if ( !$token->isSkip() ) {
433 break;
434 }
435 $lfPos = strpos( $token->text, "\n" );
436 if ( $lfPos === false ) {
437 $regionEnd += strlen( $token->text );
438 } else {
439 // This should point past the LF
440 $regionEnd += $lfPos + 1;
441 break;
442 }
443 }
444 return array( $regionStart, $regionEnd );
445 }
446
447 /**
448 * Find the byte region in the source corresponding to the value part.
449 * This includes the quotes, but does not include the trailing comma
450 * or semicolon.
451 *
452 * The end position is the past-the-end (end + 1) value as per convention.
453 * @return array
454 */
455 function findValueRegion( $pathName ) {
456 if ( !isset( $this->pathInfo[$pathName] ) ) {
457 throw new MWException( "Can't find path \"$pathName\"" );
458 }
459 $path = $this->pathInfo[$pathName];
460 if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) {
461 throw new MWException( "Can't find value region for path \"$pathName\"" );
462 }
463 return array( $path['valueStartByte'], $path['valueEndByte'] );
464 }
465
466 /**
467 * Find the path name of the last element in the array.
468 * If the array is empty, this will return the \@extra interstitial element.
469 * If the specified path is not found or is not an array, it will return false.
470 * @return bool|int|string
471 */
472 function findLastArrayElement( $path ) {
473 // Try for a real element
474 $lastEltPath = false;
475 foreach ( $this->pathInfo as $candidatePath => $info ) {
476 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
477 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
478 if ( $part2 == '@' ) {
479 // Do nothing
480 } elseif ( $part1 == "$path/" ) {
481 $lastEltPath = $candidatePath;
482 } elseif ( $lastEltPath !== false ) {
483 break;
484 }
485 }
486 if ( $lastEltPath !== false ) {
487 return $lastEltPath;
488 }
489
490 // Try for an interstitial element
491 $extraPath = false;
492 foreach ( $this->pathInfo as $candidatePath => $info ) {
493 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
494 if ( $part1 == "$path/" ) {
495 $extraPath = $candidatePath;
496 } elseif ( $extraPath !== false ) {
497 break;
498 }
499 }
500 return $extraPath;
501 }
502
503 /**
504 * Find the path name of first element in the array.
505 * If the array is empty, this will return the \@extra interstitial element.
506 * If the specified path is not found or is not an array, it will return false.
507 * @return bool|int|string
508 */
509 function findFirstArrayElement( $path ) {
510 // Try for an ordinary element
511 foreach ( $this->pathInfo as $candidatePath => $info ) {
512 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
513 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
514 if ( $part1 == "$path/" && $part2 != '@' ) {
515 return $candidatePath;
516 }
517 }
518
519 // Try for an interstitial element
520 foreach ( $this->pathInfo as $candidatePath => $info ) {
521 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
522 if ( $part1 == "$path/" ) {
523 return $candidatePath;
524 }
525 }
526 return false;
527 }
528
529 /**
530 * Get the indent string which sits after a given start position.
531 * Returns false if the position is not at the start of the line.
532 * @return array
533 */
534 function getIndent( $pos, $key = false, $arrowPos = false ) {
535 $arrowIndent = ' ';
536 if ( $pos == 0 || $this->text[$pos-1] == "\n" ) {
537 $indentLength = strspn( $this->text, " \t", $pos );
538 $indent = substr( $this->text, $pos, $indentLength );
539 } else {
540 $indent = false;
541 }
542 if ( $indent !== false && $arrowPos !== false ) {
543 $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key );
544 if ( $arrowIndentLength > 0 ) {
545 $arrowIndent = str_repeat( ' ', $arrowIndentLength );
546 }
547 }
548 return array( $indent, $arrowIndent );
549 }
550
551 /**
552 * Run the parser on the text. Throws an exception if the string does not
553 * match our defined subset of PHP syntax.
554 */
555 public function parse() {
556 $this->initParse();
557 $this->pushState( 'file' );
558 $this->pushPath( '@extra-' . ($this->serial++) );
559 $token = $this->firstToken();
560
561 while ( !$token->isEnd() ) {
562 $state = $this->popState();
563 if ( !$state ) {
564 $this->error( 'internal error: empty state stack' );
565 }
566
567 switch ( $state ) {
568 case 'file':
569 $this->expect( T_OPEN_TAG );
570 $token = $this->skipSpace();
571 if ( $token->isEnd() ) {
572 break 2;
573 }
574 $this->pushState( 'statement', 'file 2' );
575 break;
576 case 'file 2':
577 $token = $this->skipSpace();
578 if ( $token->isEnd() ) {
579 break 2;
580 }
581 $this->pushState( 'statement', 'file 2' );
582 break;
583 case 'statement':
584 $token = $this->skipSpace();
585 if ( !$this->validatePath( $token->text ) ) {
586 $this->error( "Invalid variable name \"{$token->text}\"" );
587 }
588 $this->nextPath( $token->text );
589 $this->expect( T_VARIABLE );
590 $this->skipSpace();
591 $arrayAssign = false;
592 if ( $this->currentToken()->type == '[' ) {
593 $this->nextToken();
594 $token = $this->skipSpace();
595 if ( !$token->isScalar() ) {
596 $this->error( "expected a string or number for the array key" );
597 }
598 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
599 $text = $this->parseScalar( $token->text );
600 } else {
601 $text = $token->text;
602 }
603 if ( !$this->validatePath( $text ) ) {
604 $this->error( "Invalid associative array name \"$text\"" );
605 }
606 $this->pushPath( $text );
607 $this->nextToken();
608 $this->skipSpace();
609 $this->expect( ']' );
610 $this->skipSpace();
611 $arrayAssign = true;
612 }
613 $this->expect( '=' );
614 $this->skipSpace();
615 $this->startPathValue();
616 if ( $arrayAssign )
617 $this->pushState( 'expression', 'array assign end' );
618 else
619 $this->pushState( 'expression', 'statement end' );
620 break;
621 case 'array assign end':
622 case 'statement end':
623 $this->endPathValue();
624 if ( $state == 'array assign end' )
625 $this->popPath();
626 $this->skipSpace();
627 $this->expect( ';' );
628 $this->nextPath( '@extra-' . ($this->serial++) );
629 break;
630 case 'expression':
631 $token = $this->skipSpace();
632 if ( $token->type == T_ARRAY ) {
633 $this->pushState( 'array' );
634 } elseif ( $token->isScalar() ) {
635 $this->nextToken();
636 } elseif ( $token->type == T_VARIABLE ) {
637 $this->nextToken();
638 } else {
639 $this->error( "expected simple expression" );
640 }
641 break;
642 case 'array':
643 $this->skipSpace();
644 $this->expect( T_ARRAY );
645 $this->skipSpace();
646 $this->expect( '(' );
647 $this->skipSpace();
648 $this->pushPath( '@extra-' . ($this->serial++) );
649 if ( $this->isAhead( ')' ) ) {
650 // Empty array
651 $this->pushState( 'array end' );
652 } else {
653 $this->pushState( 'element', 'array end' );
654 }
655 break;
656 case 'array end':
657 $this->skipSpace();
658 $this->popPath();
659 $this->expect( ')' );
660 break;
661 case 'element':
662 $token = $this->skipSpace();
663 // Look ahead to find the double arrow
664 if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) {
665 // Found associative element
666 $this->pushState( 'assoc-element', 'element end' );
667 } else {
668 // Not associative
669 $this->nextPath( '@next' );
670 $this->startPathValue();
671 $this->pushState( 'expression', 'element end' );
672 }
673 break;
674 case 'element end':
675 $token = $this->skipSpace();
676 if ( $token->type == ',' ) {
677 $this->endPathValue();
678 $this->markComma();
679 $this->nextToken();
680 $this->nextPath( '@extra-' . ($this->serial++) );
681 // Look ahead to find ending bracket
682 if ( $this->isAhead( ")" ) ) {
683 // Found ending bracket, no continuation
684 $this->skipSpace();
685 } else {
686 // No ending bracket, continue to next element
687 $this->pushState( 'element' );
688 }
689 } elseif ( $token->type == ')' ) {
690 // End array
691 $this->endPathValue();
692 } else {
693 $this->error( "expected the next array element or the end of the array" );
694 }
695 break;
696 case 'assoc-element':
697 $token = $this->skipSpace();
698 if ( !$token->isScalar() ) {
699 $this->error( "expected a string or number for the array key" );
700 }
701 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
702 $text = $this->parseScalar( $token->text );
703 } else {
704 $text = $token->text;
705 }
706 if ( !$this->validatePath( $text ) ) {
707 $this->error( "Invalid associative array name \"$text\"" );
708 }
709 $this->nextPath( $text );
710 $this->nextToken();
711 $this->skipSpace();
712 $this->markArrow();
713 $this->expect( T_DOUBLE_ARROW );
714 $this->skipSpace();
715 $this->startPathValue();
716 $this->pushState( 'expression' );
717 break;
718 }
719 }
720 if ( count( $this->stateStack ) ) {
721 $this->error( 'unexpected end of file' );
722 }
723 $this->popPath();
724 }
725
726 /**
727 * Initialise a parse.
728 */
729 protected function initParse() {
730 $this->tokens = token_get_all( $this->text );
731 $this->stateStack = array();
732 $this->pathStack = array();
733 $this->firstToken();
734 $this->pathInfo = array();
735 $this->serial = 1;
736 }
737
738 /**
739 * Set the parse position. Do not call this except from firstToken() and
740 * nextToken(), there is more to update than just the position.
741 */
742 protected function setPos( $pos ) {
743 $this->pos = $pos;
744 if ( $this->pos >= count( $this->tokens ) ) {
745 $this->currentToken = ConfEditorToken::newEnd();
746 } else {
747 $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] );
748 }
749 return $this->currentToken;
750 }
751
752 /**
753 * Create a ConfEditorToken from an element of token_get_all()
754 * @return ConfEditorToken
755 */
756 function newTokenObj( $internalToken ) {
757 if ( is_array( $internalToken ) ) {
758 return new ConfEditorToken( $internalToken[0], $internalToken[1] );
759 } else {
760 return new ConfEditorToken( $internalToken, $internalToken );
761 }
762 }
763
764 /**
765 * Reset the parse position
766 */
767 function firstToken() {
768 $this->setPos( 0 );
769 $this->prevToken = ConfEditorToken::newEnd();
770 $this->lineNum = 1;
771 $this->colNum = 1;
772 $this->byteNum = 0;
773 return $this->currentToken;
774 }
775
776 /**
777 * Get the current token
778 */
779 function currentToken() {
780 return $this->currentToken;
781 }
782
783 /**
784 * Advance the current position and return the resulting next token
785 */
786 function nextToken() {
787 if ( $this->currentToken ) {
788 $text = $this->currentToken->text;
789 $lfCount = substr_count( $text, "\n" );
790 if ( $lfCount ) {
791 $this->lineNum += $lfCount;
792 $this->colNum = strlen( $text ) - strrpos( $text, "\n" );
793 } else {
794 $this->colNum += strlen( $text );
795 }
796 $this->byteNum += strlen( $text );
797 }
798 $this->prevToken = $this->currentToken;
799 $this->setPos( $this->pos + 1 );
800 return $this->currentToken;
801 }
802
803 /**
804 * Get the token $offset steps ahead of the current position.
805 * $offset may be negative, to get tokens behind the current position.
806 * @return ConfEditorToken
807 */
808 function getTokenAhead( $offset ) {
809 $pos = $this->pos + $offset;
810 if ( $pos >= count( $this->tokens ) || $pos < 0 ) {
811 return ConfEditorToken::newEnd();
812 } else {
813 return $this->newTokenObj( $this->tokens[$pos] );
814 }
815 }
816
817 /**
818 * Advances the current position past any whitespace or comments
819 */
820 function skipSpace() {
821 while ( $this->currentToken && $this->currentToken->isSkip() ) {
822 $this->nextToken();
823 }
824 return $this->currentToken;
825 }
826
827 /**
828 * Throws an error if the current token is not of the given type, and
829 * then advances to the next position.
830 */
831 function expect( $type ) {
832 if ( $this->currentToken && $this->currentToken->type == $type ) {
833 return $this->nextToken();
834 } else {
835 $this->error( "expected " . $this->getTypeName( $type ) .
836 ", got " . $this->getTypeName( $this->currentToken->type ) );
837 }
838 }
839
840 /**
841 * Push a state or two on to the state stack.
842 */
843 function pushState( $nextState, $stateAfterThat = null ) {
844 if ( $stateAfterThat !== null ) {
845 $this->stateStack[] = $stateAfterThat;
846 }
847 $this->stateStack[] = $nextState;
848 }
849
850 /**
851 * Pop a state from the state stack.
852 * @return mixed
853 */
854 function popState() {
855 return array_pop( $this->stateStack );
856 }
857
858 /**
859 * Returns true if the user input path is valid.
860 * This exists to allow "/" and "@" to be reserved for string path keys
861 * @return bool
862 */
863 function validatePath( $path ) {
864 return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@';
865 }
866
867 /**
868 * Internal function to update some things at the end of a path region. Do
869 * not call except from popPath() or nextPath().
870 */
871 function endPath() {
872 $key = '';
873 foreach ( $this->pathStack as $pathInfo ) {
874 if ( $key !== '' ) {
875 $key .= '/';
876 }
877 $key .= $pathInfo['name'];
878 }
879 $pathInfo['endByte'] = $this->byteNum;
880 $pathInfo['endToken'] = $this->pos;
881 $this->pathInfo[$key] = $pathInfo;
882 }
883
884 /**
885 * Go up to a new path level, for example at the start of an array.
886 */
887 function pushPath( $path ) {
888 $this->pathStack[] = array(
889 'name' => $path,
890 'level' => count( $this->pathStack ) + 1,
891 'startByte' => $this->byteNum,
892 'startToken' => $this->pos,
893 'valueStartToken' => false,
894 'valueStartByte' => false,
895 'valueEndToken' => false,
896 'valueEndByte' => false,
897 'nextArrayIndex' => 0,
898 'hasComma' => false,
899 'arrowByte' => false
900 );
901 }
902
903 /**
904 * Go down a path level, for example at the end of an array.
905 */
906 function popPath() {
907 $this->endPath();
908 array_pop( $this->pathStack );
909 }
910
911 /**
912 * Go to the next path on the same level. This ends the current path and
913 * starts a new one. If $path is \@next, the new path is set to the next
914 * numeric array element.
915 */
916 function nextPath( $path ) {
917 $this->endPath();
918 $i = count( $this->pathStack ) - 1;
919 if ( $path == '@next' ) {
920 $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex'];
921 $this->pathStack[$i]['name'] = $nextArrayIndex;
922 $nextArrayIndex++;
923 } else {
924 $this->pathStack[$i]['name'] = $path;
925 }
926 $this->pathStack[$i] =
927 array(
928 'startByte' => $this->byteNum,
929 'startToken' => $this->pos,
930 'valueStartToken' => false,
931 'valueStartByte' => false,
932 'valueEndToken' => false,
933 'valueEndByte' => false,
934 'hasComma' => false,
935 'arrowByte' => false,
936 ) + $this->pathStack[$i];
937 }
938
939 /**
940 * Mark the start of the value part of a path.
941 */
942 function startPathValue() {
943 $path =& $this->pathStack[count( $this->pathStack ) - 1];
944 $path['valueStartToken'] = $this->pos;
945 $path['valueStartByte'] = $this->byteNum;
946 }
947
948 /**
949 * Mark the end of the value part of a path.
950 */
951 function endPathValue() {
952 $path =& $this->pathStack[count( $this->pathStack ) - 1];
953 $path['valueEndToken'] = $this->pos;
954 $path['valueEndByte'] = $this->byteNum;
955 }
956
957 /**
958 * Mark the comma separator in an array element
959 */
960 function markComma() {
961 $path =& $this->pathStack[count( $this->pathStack ) - 1];
962 $path['hasComma'] = true;
963 }
964
965 /**
966 * Mark the arrow separator in an associative array element
967 */
968 function markArrow() {
969 $path =& $this->pathStack[count( $this->pathStack ) - 1];
970 $path['arrowByte'] = $this->byteNum;
971 }
972
973 /**
974 * Generate a parse error
975 */
976 function error( $msg ) {
977 throw new ConfEditorParseError( $this, $msg );
978 }
979
980 /**
981 * Get a readable name for the given token type.
982 * @return string
983 */
984 function getTypeName( $type ) {
985 if ( is_int( $type ) ) {
986 return token_name( $type );
987 } else {
988 return "\"$type\"";
989 }
990 }
991
992 /**
993 * Looks ahead to see if the given type is the next token type, starting
994 * from the current position plus the given offset. Skips any intervening
995 * whitespace.
996 * @return bool
997 */
998 function isAhead( $type, $offset = 0 ) {
999 $ahead = $offset;
1000 $token = $this->getTokenAhead( $offset );
1001 while ( !$token->isEnd() ) {
1002 if ( $token->isSkip() ) {
1003 $ahead++;
1004 $token = $this->getTokenAhead( $ahead );
1005 continue;
1006 } elseif ( $token->type == $type ) {
1007 // Found the type
1008 return true;
1009 } else {
1010 // Not found
1011 return false;
1012 }
1013 }
1014 return false;
1015 }
1016
1017 /**
1018 * Get the previous token object
1019 */
1020 function prevToken() {
1021 return $this->prevToken;
1022 }
1023
1024 /**
1025 * Echo a reasonably readable representation of the tokenizer array.
1026 */
1027 function dumpTokens() {
1028 $out = '';
1029 foreach ( $this->tokens as $token ) {
1030 $obj = $this->newTokenObj( $token );
1031 $out .= sprintf( "%-28s %s\n",
1032 $this->getTypeName( $obj->type ),
1033 addcslashes( $obj->text, "\0..\37" ) );
1034 }
1035 echo "<pre>" . htmlspecialchars( $out ) . "</pre>";
1036 }
1037 }
1038
1039 /**
1040 * Exception class for parse errors
1041 */
1042 class ConfEditorParseError extends MWException {
1043 var $lineNum, $colNum;
1044 function __construct( $editor, $msg ) {
1045 $this->lineNum = $editor->lineNum;
1046 $this->colNum = $editor->colNum;
1047 parent::__construct( "Parse error on line {$editor->lineNum} " .
1048 "col {$editor->colNum}: $msg" );
1049 }
1050
1051 function highlight( $text ) {
1052 $lines = StringUtils::explode( "\n", $text );
1053 foreach ( $lines as $lineNum => $line ) {
1054 if ( $lineNum == $this->lineNum - 1 ) {
1055 return "$line\n" .str_repeat( ' ', $this->colNum - 1 ) . "^\n";
1056 }
1057 }
1058 }
1059
1060 }
1061
1062 /**
1063 * Class to wrap a token from the tokenizer.
1064 */
1065 class ConfEditorToken {
1066 var $type, $text;
1067
1068 static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING );
1069 static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT );
1070
1071 static function newEnd() {
1072 return new self( 'END', '' );
1073 }
1074
1075 function __construct( $type, $text ) {
1076 $this->type = $type;
1077 $this->text = $text;
1078 }
1079
1080 function isSkip() {
1081 return in_array( $this->type, self::$skipTypes );
1082 }
1083
1084 function isScalar() {
1085 return in_array( $this->type, self::$scalarTypes );
1086 }
1087
1088 function isEnd() {
1089 return $this->type == 'END';
1090 }
1091 }
1092