(bug 33911) SiteStats::isSane() should not check admins anymore
[lhc/web/wiklou.git] / includes / ConfEditor.php
1 <?php
2
3 /**
4 * This is a state machine style parser with two internal stacks:
5 * * A next state stack, which determines the state the machine will progress to next
6 * * A path stack, which keeps track of the logical location in the file.
7 *
8 * Reference grammar:
9 *
10 * file = T_OPEN_TAG *statement
11 * statement = T_VARIABLE "=" expression ";"
12 * expression = array / scalar / T_VARIABLE
13 * array = T_ARRAY "(" [ element *( "," element ) [ "," ] ] ")"
14 * element = assoc-element / expression
15 * assoc-element = scalar T_DOUBLE_ARROW expression
16 * scalar = T_LNUMBER / T_DNUMBER / T_STRING / T_CONSTANT_ENCAPSED_STRING
17 */
18 class ConfEditor {
19 /** The text to parse */
20 var $text;
21
22 /** The token array from token_get_all() */
23 var $tokens;
24
25 /** The current position in the token array */
26 var $pos;
27
28 /** The current 1-based line number */
29 var $lineNum;
30
31 /** The current 1-based column number */
32 var $colNum;
33
34 /** The current 0-based byte number */
35 var $byteNum;
36
37 /** The current ConfEditorToken object */
38 var $currentToken;
39
40 /** The previous ConfEditorToken object */
41 var $prevToken;
42
43 /**
44 * The state machine stack. This is an array of strings where the topmost
45 * element will be popped off and become the next parser state.
46 */
47 var $stateStack;
48
49
50 /**
51 * The path stack is a stack of associative arrays with the following elements:
52 * name The name of top level of the path
53 * level The level (number of elements) of the path
54 * startByte The byte offset of the start of the path
55 * startToken The token offset of the start
56 * endByte The byte offset of thee
57 * endToken The token offset of the end, plus one
58 * valueStartToken The start token offset of the value part
59 * valueStartByte The start byte offset of the value part
60 * valueEndToken The end token offset of the value part, plus one
61 * valueEndByte The end byte offset of the value part, plus one
62 * nextArrayIndex The next numeric array index at this level
63 * hasComma True if the array element ends with a comma
64 * arrowByte The byte offset of the "=>", or false if there isn't one
65 */
66 var $pathStack;
67
68 /**
69 * The elements of the top of the pathStack for every path encountered, indexed
70 * by slash-separated path.
71 */
72 var $pathInfo;
73
74 /**
75 * Next serial number for whitespace placeholder paths (\@extra-N)
76 */
77 var $serial;
78
79 /**
80 * Editor state. This consists of the internal copy/insert operations which
81 * are applied to the source string to obtain the destination string.
82 */
83 var $edits;
84
85 /**
86 * Simple entry point for command-line testing
87 *
88 * @param $text string
89 *
90 * @return string
91 */
92 static function test( $text ) {
93 try {
94 $ce = new self( $text );
95 $ce->parse();
96 } catch ( ConfEditorParseError $e ) {
97 return $e->getMessage() . "\n" . $e->highlight( $text );
98 }
99 return "OK";
100 }
101
102 /**
103 * Construct a new parser
104 */
105 public function __construct( $text ) {
106 $this->text = $text;
107 }
108
109 /**
110 * Edit the text. Returns the edited text.
111 * @param $ops Array of operations.
112 *
113 * Operations are given as an associative array, with members:
114 * type: One of delete, set, append or insert (required)
115 * path: The path to operate on (required)
116 * key: The array key to insert/append, with PHP quotes
117 * value: The value, with PHP quotes
118 *
119 * delete
120 * Deletes an array element or statement with the specified path.
121 * e.g.
122 * array('type' => 'delete', 'path' => '$foo/bar/baz' )
123 * is equivalent to the runtime PHP code:
124 * unset( $foo['bar']['baz'] );
125 *
126 * set
127 * Sets the value of an array element. If the element doesn't exist, it
128 * is appended to the array. If it does exist, the value is set, with
129 * comments and indenting preserved.
130 *
131 * append
132 * Appends a new element to the end of the array. Adds a trailing comma.
133 * e.g.
134 * array( 'type' => 'append', 'path', '$foo/bar',
135 * 'key' => 'baz', 'value' => "'x'" )
136 * is like the PHP code:
137 * $foo['bar']['baz'] = 'x';
138 *
139 * insert
140 * Insert a new element at the start of the array.
141 *
142 */
143 public function edit( $ops ) {
144 $this->parse();
145
146 $this->edits = array(
147 array( 'copy', 0, strlen( $this->text ) )
148 );
149 foreach ( $ops as $op ) {
150 $type = $op['type'];
151 $path = $op['path'];
152 $value = isset( $op['value'] ) ? $op['value'] : null;
153 $key = isset( $op['key'] ) ? $op['key'] : null;
154
155 switch ( $type ) {
156 case 'delete':
157 list( $start, $end ) = $this->findDeletionRegion( $path );
158 $this->replaceSourceRegion( $start, $end, false );
159 break;
160 case 'set':
161 if ( isset( $this->pathInfo[$path] ) ) {
162 list( $start, $end ) = $this->findValueRegion( $path );
163 $encValue = $value; // var_export( $value, true );
164 $this->replaceSourceRegion( $start, $end, $encValue );
165 break;
166 }
167 // No existing path, fall through to append
168 $slashPos = strrpos( $path, '/' );
169 $key = var_export( substr( $path, $slashPos + 1 ), true );
170 $path = substr( $path, 0, $slashPos );
171 // Fall through
172 case 'append':
173 // Find the last array element
174 $lastEltPath = $this->findLastArrayElement( $path );
175 if ( $lastEltPath === false ) {
176 throw new MWException( "Can't find any element of array \"$path\"" );
177 }
178 $lastEltInfo = $this->pathInfo[$lastEltPath];
179
180 // Has it got a comma already?
181 if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) {
182 // No comma, insert one after the value region
183 list( , $end ) = $this->findValueRegion( $lastEltPath );
184 $this->replaceSourceRegion( $end - 1, $end - 1, ',' );
185 }
186
187 // Make the text to insert
188 list( $start, $end ) = $this->findDeletionRegion( $lastEltPath );
189
190 if ( $key === null ) {
191 list( $indent, ) = $this->getIndent( $start );
192 $textToInsert = "$indent$value,";
193 } else {
194 list( $indent, $arrowIndent ) =
195 $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] );
196 $textToInsert = "$indent$key$arrowIndent=> $value,";
197 }
198 $textToInsert .= ( $indent === false ? ' ' : "\n" );
199
200 // Insert the item
201 $this->replaceSourceRegion( $end, $end, $textToInsert );
202 break;
203 case 'insert':
204 // Find first array element
205 $firstEltPath = $this->findFirstArrayElement( $path );
206 if ( $firstEltPath === false ) {
207 throw new MWException( "Can't find array element of \"$path\"" );
208 }
209 list( $start, ) = $this->findDeletionRegion( $firstEltPath );
210 $info = $this->pathInfo[$firstEltPath];
211
212 // Make the text to insert
213 if ( $key === null ) {
214 list( $indent, ) = $this->getIndent( $start );
215 $textToInsert = "$indent$value,";
216 } else {
217 list( $indent, $arrowIndent ) =
218 $this->getIndent( $start, $key, $info['arrowByte'] );
219 $textToInsert = "$indent$key$arrowIndent=> $value,";
220 }
221 $textToInsert .= ( $indent === false ? ' ' : "\n" );
222
223 // Insert the item
224 $this->replaceSourceRegion( $start, $start, $textToInsert );
225 break;
226 default:
227 throw new MWException( "Unrecognised operation: \"$type\"" );
228 }
229 }
230
231 // Do the edits
232 $out = '';
233 foreach ( $this->edits as $edit ) {
234 if ( $edit[0] == 'copy' ) {
235 $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] );
236 } else { // if ( $edit[0] == 'insert' )
237 $out .= $edit[1];
238 }
239 }
240
241 // Do a second parse as a sanity check
242 $this->text = $out;
243 try {
244 $this->parse();
245 } catch ( ConfEditorParseError $e ) {
246 throw new MWException(
247 "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " .
248 $e->getMessage() );
249 }
250 return $out;
251 }
252
253 /**
254 * Get the variables defined in the text
255 * @return array( varname => value )
256 */
257 function getVars() {
258 $vars = array();
259 $this->parse();
260 foreach( $this->pathInfo as $path => $data ) {
261 if ( $path[0] != '$' )
262 continue;
263 $trimmedPath = substr( $path, 1 );
264 $name = $data['name'];
265 if ( $name[0] == '@' )
266 continue;
267 if ( $name[0] == '$' )
268 $name = substr( $name, 1 );
269 $parentPath = substr( $trimmedPath, 0,
270 strlen( $trimmedPath ) - strlen( $name ) );
271 if( substr( $parentPath, -1 ) == '/' )
272 $parentPath = substr( $parentPath, 0, -1 );
273
274 $value = substr( $this->text, $data['valueStartByte'],
275 $data['valueEndByte'] - $data['valueStartByte']
276 );
277 $this->setVar( $vars, $parentPath, $name,
278 $this->parseScalar( $value ) );
279 }
280 return $vars;
281 }
282
283 /**
284 * Set a value in an array, unless it's set already. For instance,
285 * setVar( $arr, 'foo/bar', 'baz', 3 ); will set
286 * $arr['foo']['bar']['baz'] = 3;
287 * @param $array array
288 * @param $path string slash-delimited path
289 * @param $key mixed Key
290 * @param $value mixed Value
291 */
292 function setVar( &$array, $path, $key, $value ) {
293 $pathArr = explode( '/', $path );
294 $target =& $array;
295 if ( $path !== '' ) {
296 foreach ( $pathArr as $p ) {
297 if( !isset( $target[$p] ) )
298 $target[$p] = array();
299 $target =& $target[$p];
300 }
301 }
302 if ( !isset( $target[$key] ) )
303 $target[$key] = $value;
304 }
305
306 /**
307 * Parse a scalar value in PHP
308 * @return mixed Parsed value
309 */
310 function parseScalar( $str ) {
311 if ( $str !== '' && $str[0] == '\'' )
312 // Single-quoted string
313 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
314 // appended to the token; without it we ended up reading in the
315 // extra quote on the end!
316 return strtr( substr( trim( $str ), 1, -1 ),
317 array( '\\\'' => '\'', '\\\\' => '\\' ) );
318 if ( $str !== '' && $str[0] == '"' )
319 // Double-quoted string
320 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
321 // appended to the token; without it we ended up reading in the
322 // extra quote on the end!
323 return stripcslashes( substr( trim( $str ), 1, -1 ) );
324 if ( substr( $str, 0, 4 ) == 'true' )
325 return true;
326 if ( substr( $str, 0, 5 ) == 'false' )
327 return false;
328 if ( substr( $str, 0, 4 ) == 'null' )
329 return null;
330 // Must be some kind of numeric value, so let PHP's weak typing
331 // be useful for a change
332 return $str;
333 }
334
335 /**
336 * Replace the byte offset region of the source with $newText.
337 * Works by adding elements to the $this->edits array.
338 */
339 function replaceSourceRegion( $start, $end, $newText = false ) {
340 // Split all copy operations with a source corresponding to the region
341 // in question.
342 $newEdits = array();
343 foreach ( $this->edits as $edit ) {
344 if ( $edit[0] !== 'copy' ) {
345 $newEdits[] = $edit;
346 continue;
347 }
348 $copyStart = $edit[1];
349 $copyEnd = $edit[2];
350 if ( $start >= $copyEnd || $end <= $copyStart ) {
351 // Outside this region
352 $newEdits[] = $edit;
353 continue;
354 }
355 if ( ( $start < $copyStart && $end > $copyStart )
356 || ( $start < $copyEnd && $end > $copyEnd )
357 ) {
358 throw new MWException( "Overlapping regions found, can't do the edit" );
359 }
360 // Split the copy
361 $newEdits[] = array( 'copy', $copyStart, $start );
362 if ( $newText !== false ) {
363 $newEdits[] = array( 'insert', $newText );
364 }
365 $newEdits[] = array( 'copy', $end, $copyEnd );
366 }
367 $this->edits = $newEdits;
368 }
369
370 /**
371 * Finds the source byte region which you would want to delete, if $pathName
372 * was to be deleted. Includes the leading spaces and tabs, the trailing line
373 * break, and any comments in between.
374 */
375 function findDeletionRegion( $pathName ) {
376 if ( !isset( $this->pathInfo[$pathName] ) ) {
377 throw new MWException( "Can't find path \"$pathName\"" );
378 }
379 $path = $this->pathInfo[$pathName];
380 // Find the start
381 $this->firstToken();
382 while ( $this->pos != $path['startToken'] ) {
383 $this->nextToken();
384 }
385 $regionStart = $path['startByte'];
386 for ( $offset = -1; $offset >= -$this->pos; $offset-- ) {
387 $token = $this->getTokenAhead( $offset );
388 if ( !$token->isSkip() ) {
389 // If there is other content on the same line, don't move the start point
390 // back, because that will cause the regions to overlap.
391 $regionStart = $path['startByte'];
392 break;
393 }
394 $lfPos = strrpos( $token->text, "\n" );
395 if ( $lfPos === false ) {
396 $regionStart -= strlen( $token->text );
397 } else {
398 // The line start does not include the LF
399 $regionStart -= strlen( $token->text ) - $lfPos - 1;
400 break;
401 }
402 }
403 // Find the end
404 while ( $this->pos != $path['endToken'] ) {
405 $this->nextToken();
406 }
407 $regionEnd = $path['endByte']; // past the end
408 for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) {
409 $token = $this->getTokenAhead( $offset );
410 if ( !$token->isSkip() ) {
411 break;
412 }
413 $lfPos = strpos( $token->text, "\n" );
414 if ( $lfPos === false ) {
415 $regionEnd += strlen( $token->text );
416 } else {
417 // This should point past the LF
418 $regionEnd += $lfPos + 1;
419 break;
420 }
421 }
422 return array( $regionStart, $regionEnd );
423 }
424
425 /**
426 * Find the byte region in the source corresponding to the value part.
427 * This includes the quotes, but does not include the trailing comma
428 * or semicolon.
429 *
430 * The end position is the past-the-end (end + 1) value as per convention.
431 */
432 function findValueRegion( $pathName ) {
433 if ( !isset( $this->pathInfo[$pathName] ) ) {
434 throw new MWException( "Can't find path \"$pathName\"" );
435 }
436 $path = $this->pathInfo[$pathName];
437 if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) {
438 throw new MWException( "Can't find value region for path \"$pathName\"" );
439 }
440 return array( $path['valueStartByte'], $path['valueEndByte'] );
441 }
442
443 /**
444 * Find the path name of the last element in the array.
445 * If the array is empty, this will return the \@extra interstitial element.
446 * If the specified path is not found or is not an array, it will return false.
447 */
448 function findLastArrayElement( $path ) {
449 // Try for a real element
450 $lastEltPath = false;
451 foreach ( $this->pathInfo as $candidatePath => $info ) {
452 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
453 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
454 if ( $part2 == '@' ) {
455 // Do nothing
456 } elseif ( $part1 == "$path/" ) {
457 $lastEltPath = $candidatePath;
458 } elseif ( $lastEltPath !== false ) {
459 break;
460 }
461 }
462 if ( $lastEltPath !== false ) {
463 return $lastEltPath;
464 }
465
466 // Try for an interstitial element
467 $extraPath = false;
468 foreach ( $this->pathInfo as $candidatePath => $info ) {
469 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
470 if ( $part1 == "$path/" ) {
471 $extraPath = $candidatePath;
472 } elseif ( $extraPath !== false ) {
473 break;
474 }
475 }
476 return $extraPath;
477 }
478
479 /**
480 * Find the path name of first element in the array.
481 * If the array is empty, this will return the \@extra interstitial element.
482 * If the specified path is not found or is not an array, it will return false.
483 */
484 function findFirstArrayElement( $path ) {
485 // Try for an ordinary element
486 foreach ( $this->pathInfo as $candidatePath => $info ) {
487 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
488 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
489 if ( $part1 == "$path/" && $part2 != '@' ) {
490 return $candidatePath;
491 }
492 }
493
494 // Try for an interstitial element
495 foreach ( $this->pathInfo as $candidatePath => $info ) {
496 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
497 if ( $part1 == "$path/" ) {
498 return $candidatePath;
499 }
500 }
501 return false;
502 }
503
504 /**
505 * Get the indent string which sits after a given start position.
506 * Returns false if the position is not at the start of the line.
507 */
508 function getIndent( $pos, $key = false, $arrowPos = false ) {
509 $arrowIndent = ' ';
510 if ( $pos == 0 || $this->text[$pos-1] == "\n" ) {
511 $indentLength = strspn( $this->text, " \t", $pos );
512 $indent = substr( $this->text, $pos, $indentLength );
513 } else {
514 $indent = false;
515 }
516 if ( $indent !== false && $arrowPos !== false ) {
517 $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key );
518 if ( $arrowIndentLength > 0 ) {
519 $arrowIndent = str_repeat( ' ', $arrowIndentLength );
520 }
521 }
522 return array( $indent, $arrowIndent );
523 }
524
525 /**
526 * Run the parser on the text. Throws an exception if the string does not
527 * match our defined subset of PHP syntax.
528 */
529 public function parse() {
530 $this->initParse();
531 $this->pushState( 'file' );
532 $this->pushPath( '@extra-' . ($this->serial++) );
533 $token = $this->firstToken();
534
535 while ( !$token->isEnd() ) {
536 $state = $this->popState();
537 if ( !$state ) {
538 $this->error( 'internal error: empty state stack' );
539 }
540
541 switch ( $state ) {
542 case 'file':
543 $this->expect( T_OPEN_TAG );
544 $token = $this->skipSpace();
545 if ( $token->isEnd() ) {
546 break 2;
547 }
548 $this->pushState( 'statement', 'file 2' );
549 break;
550 case 'file 2':
551 $token = $this->skipSpace();
552 if ( $token->isEnd() ) {
553 break 2;
554 }
555 $this->pushState( 'statement', 'file 2' );
556 break;
557 case 'statement':
558 $token = $this->skipSpace();
559 if ( !$this->validatePath( $token->text ) ) {
560 $this->error( "Invalid variable name \"{$token->text}\"" );
561 }
562 $this->nextPath( $token->text );
563 $this->expect( T_VARIABLE );
564 $this->skipSpace();
565 $arrayAssign = false;
566 if ( $this->currentToken()->type == '[' ) {
567 $this->nextToken();
568 $token = $this->skipSpace();
569 if ( !$token->isScalar() ) {
570 $this->error( "expected a string or number for the array key" );
571 }
572 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
573 $text = $this->parseScalar( $token->text );
574 } else {
575 $text = $token->text;
576 }
577 if ( !$this->validatePath( $text ) ) {
578 $this->error( "Invalid associative array name \"$text\"" );
579 }
580 $this->pushPath( $text );
581 $this->nextToken();
582 $this->skipSpace();
583 $this->expect( ']' );
584 $this->skipSpace();
585 $arrayAssign = true;
586 }
587 $this->expect( '=' );
588 $this->skipSpace();
589 $this->startPathValue();
590 if ( $arrayAssign )
591 $this->pushState( 'expression', 'array assign end' );
592 else
593 $this->pushState( 'expression', 'statement end' );
594 break;
595 case 'array assign end':
596 case 'statement end':
597 $this->endPathValue();
598 if ( $state == 'array assign end' )
599 $this->popPath();
600 $this->skipSpace();
601 $this->expect( ';' );
602 $this->nextPath( '@extra-' . ($this->serial++) );
603 break;
604 case 'expression':
605 $token = $this->skipSpace();
606 if ( $token->type == T_ARRAY ) {
607 $this->pushState( 'array' );
608 } elseif ( $token->isScalar() ) {
609 $this->nextToken();
610 } elseif ( $token->type == T_VARIABLE ) {
611 $this->nextToken();
612 } else {
613 $this->error( "expected simple expression" );
614 }
615 break;
616 case 'array':
617 $this->skipSpace();
618 $this->expect( T_ARRAY );
619 $this->skipSpace();
620 $this->expect( '(' );
621 $this->skipSpace();
622 $this->pushPath( '@extra-' . ($this->serial++) );
623 if ( $this->isAhead( ')' ) ) {
624 // Empty array
625 $this->pushState( 'array end' );
626 } else {
627 $this->pushState( 'element', 'array end' );
628 }
629 break;
630 case 'array end':
631 $this->skipSpace();
632 $this->popPath();
633 $this->expect( ')' );
634 break;
635 case 'element':
636 $token = $this->skipSpace();
637 // Look ahead to find the double arrow
638 if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) {
639 // Found associative element
640 $this->pushState( 'assoc-element', 'element end' );
641 } else {
642 // Not associative
643 $this->nextPath( '@next' );
644 $this->startPathValue();
645 $this->pushState( 'expression', 'element end' );
646 }
647 break;
648 case 'element end':
649 $token = $this->skipSpace();
650 if ( $token->type == ',' ) {
651 $this->endPathValue();
652 $this->markComma();
653 $this->nextToken();
654 $this->nextPath( '@extra-' . ($this->serial++) );
655 // Look ahead to find ending bracket
656 if ( $this->isAhead( ")" ) ) {
657 // Found ending bracket, no continuation
658 $this->skipSpace();
659 } else {
660 // No ending bracket, continue to next element
661 $this->pushState( 'element' );
662 }
663 } elseif ( $token->type == ')' ) {
664 // End array
665 $this->endPathValue();
666 } else {
667 $this->error( "expected the next array element or the end of the array" );
668 }
669 break;
670 case 'assoc-element':
671 $token = $this->skipSpace();
672 if ( !$token->isScalar() ) {
673 $this->error( "expected a string or number for the array key" );
674 }
675 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
676 $text = $this->parseScalar( $token->text );
677 } else {
678 $text = $token->text;
679 }
680 if ( !$this->validatePath( $text ) ) {
681 $this->error( "Invalid associative array name \"$text\"" );
682 }
683 $this->nextPath( $text );
684 $this->nextToken();
685 $this->skipSpace();
686 $this->markArrow();
687 $this->expect( T_DOUBLE_ARROW );
688 $this->skipSpace();
689 $this->startPathValue();
690 $this->pushState( 'expression' );
691 break;
692 }
693 }
694 if ( count( $this->stateStack ) ) {
695 $this->error( 'unexpected end of file' );
696 }
697 $this->popPath();
698 }
699
700 /**
701 * Initialise a parse.
702 */
703 protected function initParse() {
704 $this->tokens = token_get_all( $this->text );
705 $this->stateStack = array();
706 $this->pathStack = array();
707 $this->firstToken();
708 $this->pathInfo = array();
709 $this->serial = 1;
710 }
711
712 /**
713 * Set the parse position. Do not call this except from firstToken() and
714 * nextToken(), there is more to update than just the position.
715 */
716 protected function setPos( $pos ) {
717 $this->pos = $pos;
718 if ( $this->pos >= count( $this->tokens ) ) {
719 $this->currentToken = ConfEditorToken::newEnd();
720 } else {
721 $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] );
722 }
723 return $this->currentToken;
724 }
725
726 /**
727 * Create a ConfEditorToken from an element of token_get_all()
728 */
729 function newTokenObj( $internalToken ) {
730 if ( is_array( $internalToken ) ) {
731 return new ConfEditorToken( $internalToken[0], $internalToken[1] );
732 } else {
733 return new ConfEditorToken( $internalToken, $internalToken );
734 }
735 }
736
737 /**
738 * Reset the parse position
739 */
740 function firstToken() {
741 $this->setPos( 0 );
742 $this->prevToken = ConfEditorToken::newEnd();
743 $this->lineNum = 1;
744 $this->colNum = 1;
745 $this->byteNum = 0;
746 return $this->currentToken;
747 }
748
749 /**
750 * Get the current token
751 */
752 function currentToken() {
753 return $this->currentToken;
754 }
755
756 /**
757 * Advance the current position and return the resulting next token
758 */
759 function nextToken() {
760 if ( $this->currentToken ) {
761 $text = $this->currentToken->text;
762 $lfCount = substr_count( $text, "\n" );
763 if ( $lfCount ) {
764 $this->lineNum += $lfCount;
765 $this->colNum = strlen( $text ) - strrpos( $text, "\n" );
766 } else {
767 $this->colNum += strlen( $text );
768 }
769 $this->byteNum += strlen( $text );
770 }
771 $this->prevToken = $this->currentToken;
772 $this->setPos( $this->pos + 1 );
773 return $this->currentToken;
774 }
775
776 /**
777 * Get the token $offset steps ahead of the current position.
778 * $offset may be negative, to get tokens behind the current position.
779 */
780 function getTokenAhead( $offset ) {
781 $pos = $this->pos + $offset;
782 if ( $pos >= count( $this->tokens ) || $pos < 0 ) {
783 return ConfEditorToken::newEnd();
784 } else {
785 return $this->newTokenObj( $this->tokens[$pos] );
786 }
787 }
788
789 /**
790 * Advances the current position past any whitespace or comments
791 */
792 function skipSpace() {
793 while ( $this->currentToken && $this->currentToken->isSkip() ) {
794 $this->nextToken();
795 }
796 return $this->currentToken;
797 }
798
799 /**
800 * Throws an error if the current token is not of the given type, and
801 * then advances to the next position.
802 */
803 function expect( $type ) {
804 if ( $this->currentToken && $this->currentToken->type == $type ) {
805 return $this->nextToken();
806 } else {
807 $this->error( "expected " . $this->getTypeName( $type ) .
808 ", got " . $this->getTypeName( $this->currentToken->type ) );
809 }
810 }
811
812 /**
813 * Push a state or two on to the state stack.
814 */
815 function pushState( $nextState, $stateAfterThat = null ) {
816 if ( $stateAfterThat !== null ) {
817 $this->stateStack[] = $stateAfterThat;
818 }
819 $this->stateStack[] = $nextState;
820 }
821
822 /**
823 * Pop a state from the state stack.
824 */
825 function popState() {
826 return array_pop( $this->stateStack );
827 }
828
829 /**
830 * Returns true if the user input path is valid.
831 * This exists to allow "/" and "@" to be reserved for string path keys
832 */
833 function validatePath( $path ) {
834 return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@';
835 }
836
837 /**
838 * Internal function to update some things at the end of a path region. Do
839 * not call except from popPath() or nextPath().
840 */
841 function endPath() {
842 $key = '';
843 foreach ( $this->pathStack as $pathInfo ) {
844 if ( $key !== '' ) {
845 $key .= '/';
846 }
847 $key .= $pathInfo['name'];
848 }
849 $pathInfo['endByte'] = $this->byteNum;
850 $pathInfo['endToken'] = $this->pos;
851 $this->pathInfo[$key] = $pathInfo;
852 }
853
854 /**
855 * Go up to a new path level, for example at the start of an array.
856 */
857 function pushPath( $path ) {
858 $this->pathStack[] = array(
859 'name' => $path,
860 'level' => count( $this->pathStack ) + 1,
861 'startByte' => $this->byteNum,
862 'startToken' => $this->pos,
863 'valueStartToken' => false,
864 'valueStartByte' => false,
865 'valueEndToken' => false,
866 'valueEndByte' => false,
867 'nextArrayIndex' => 0,
868 'hasComma' => false,
869 'arrowByte' => false
870 );
871 }
872
873 /**
874 * Go down a path level, for example at the end of an array.
875 */
876 function popPath() {
877 $this->endPath();
878 array_pop( $this->pathStack );
879 }
880
881 /**
882 * Go to the next path on the same level. This ends the current path and
883 * starts a new one. If $path is \@next, the new path is set to the next
884 * numeric array element.
885 */
886 function nextPath( $path ) {
887 $this->endPath();
888 $i = count( $this->pathStack ) - 1;
889 if ( $path == '@next' ) {
890 $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex'];
891 $this->pathStack[$i]['name'] = $nextArrayIndex;
892 $nextArrayIndex++;
893 } else {
894 $this->pathStack[$i]['name'] = $path;
895 }
896 $this->pathStack[$i] =
897 array(
898 'startByte' => $this->byteNum,
899 'startToken' => $this->pos,
900 'valueStartToken' => false,
901 'valueStartByte' => false,
902 'valueEndToken' => false,
903 'valueEndByte' => false,
904 'hasComma' => false,
905 'arrowByte' => false,
906 ) + $this->pathStack[$i];
907 }
908
909 /**
910 * Mark the start of the value part of a path.
911 */
912 function startPathValue() {
913 $path =& $this->pathStack[count( $this->pathStack ) - 1];
914 $path['valueStartToken'] = $this->pos;
915 $path['valueStartByte'] = $this->byteNum;
916 }
917
918 /**
919 * Mark the end of the value part of a path.
920 */
921 function endPathValue() {
922 $path =& $this->pathStack[count( $this->pathStack ) - 1];
923 $path['valueEndToken'] = $this->pos;
924 $path['valueEndByte'] = $this->byteNum;
925 }
926
927 /**
928 * Mark the comma separator in an array element
929 */
930 function markComma() {
931 $path =& $this->pathStack[count( $this->pathStack ) - 1];
932 $path['hasComma'] = true;
933 }
934
935 /**
936 * Mark the arrow separator in an associative array element
937 */
938 function markArrow() {
939 $path =& $this->pathStack[count( $this->pathStack ) - 1];
940 $path['arrowByte'] = $this->byteNum;
941 }
942
943 /**
944 * Generate a parse error
945 */
946 function error( $msg ) {
947 throw new ConfEditorParseError( $this, $msg );
948 }
949
950 /**
951 * Get a readable name for the given token type.
952 */
953 function getTypeName( $type ) {
954 if ( is_int( $type ) ) {
955 return token_name( $type );
956 } else {
957 return "\"$type\"";
958 }
959 }
960
961 /**
962 * Looks ahead to see if the given type is the next token type, starting
963 * from the current position plus the given offset. Skips any intervening
964 * whitespace.
965 */
966 function isAhead( $type, $offset = 0 ) {
967 $ahead = $offset;
968 $token = $this->getTokenAhead( $offset );
969 while ( !$token->isEnd() ) {
970 if ( $token->isSkip() ) {
971 $ahead++;
972 $token = $this->getTokenAhead( $ahead );
973 continue;
974 } elseif ( $token->type == $type ) {
975 // Found the type
976 return true;
977 } else {
978 // Not found
979 return false;
980 }
981 }
982 return false;
983 }
984
985 /**
986 * Get the previous token object
987 */
988 function prevToken() {
989 return $this->prevToken;
990 }
991
992 /**
993 * Echo a reasonably readable representation of the tokenizer array.
994 */
995 function dumpTokens() {
996 $out = '';
997 foreach ( $this->tokens as $token ) {
998 $obj = $this->newTokenObj( $token );
999 $out .= sprintf( "%-28s %s\n",
1000 $this->getTypeName( $obj->type ),
1001 addcslashes( $obj->text, "\0..\37" ) );
1002 }
1003 echo "<pre>" . htmlspecialchars( $out ) . "</pre>";
1004 }
1005 }
1006
1007 /**
1008 * Exception class for parse errors
1009 */
1010 class ConfEditorParseError extends MWException {
1011 var $lineNum, $colNum;
1012 function __construct( $editor, $msg ) {
1013 $this->lineNum = $editor->lineNum;
1014 $this->colNum = $editor->colNum;
1015 parent::__construct( "Parse error on line {$editor->lineNum} " .
1016 "col {$editor->colNum}: $msg" );
1017 }
1018
1019 function highlight( $text ) {
1020 $lines = StringUtils::explode( "\n", $text );
1021 foreach ( $lines as $lineNum => $line ) {
1022 if ( $lineNum == $this->lineNum - 1 ) {
1023 return "$line\n" .str_repeat( ' ', $this->colNum - 1 ) . "^\n";
1024 }
1025 }
1026 }
1027
1028 }
1029
1030 /**
1031 * Class to wrap a token from the tokenizer.
1032 */
1033 class ConfEditorToken {
1034 var $type, $text;
1035
1036 static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING );
1037 static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT );
1038
1039 static function newEnd() {
1040 return new self( 'END', '' );
1041 }
1042
1043 function __construct( $type, $text ) {
1044 $this->type = $type;
1045 $this->text = $text;
1046 }
1047
1048 function isSkip() {
1049 return in_array( $this->type, self::$skipTypes );
1050 }
1051
1052 function isScalar() {
1053 return in_array( $this->type, self::$scalarTypes );
1054 }
1055
1056 function isEnd() {
1057 return $this->type == 'END';
1058 }
1059 }
1060