docs: Fix Doxygen parsing of @var descriptions with $var names
authorTimo Tijhof <krinklemail@gmail.com>
Thu, 5 Sep 2019 18:21:59 +0000 (19:21 +0100)
committerTimo Tijhof <krinklemail@gmail.com>
Thu, 5 Sep 2019 18:24:14 +0000 (19:24 +0100)
It was wrongly assuming that if the description started with
a dollar sign, that that first word would be the variable name
it was documenting, thus leading to some oddly documented
class members, such as from:

 > SimpleCallbacks.php:
 > /** @var (string|string[])[] $_GET/$_POST data */

Fix this by capturing the first word and actually comparing
it to the variable name we're about to document.

Keep the overall behaviour the same as before, that is,
if the description does indeed start with the correct variable
name, then don't modify it in any way (already covered by test).

Change-Id: I48ed20cf75f146a30d5976fcad3a72d5a9a4906b

maintenance/includes/MWDoxygenFilter.php
tests/phpunit/maintenance/MWDoxygenFilterTest.php

index 30cb97d..287a927 100644 (file)
  * Based on
  * <https://virtualtee.blogspot.co.uk/2012/03/tip-for-using-doxygen-for-php-code.html>
  *
- * Improved to resolve various bugs and better MediaWiki PHPDoc conventions:
+ * It has been adapted for MediaWiki to resolve various bugs we experienced
+ * from using Doxygen with our coding conventions:
  *
- * - Insert variable name after typehint instead of at end of line so that
- *   documentation text may follow after `@var Type`.
- * - Insert typehint into source code before $variable instead of inside the comment
- *   so that Doxygen interprets it.
- * - Strip the text after `@var` from the output to avoid Doxygen warnings about bogus
- *   symbols being documented but not declared or defined.
+ * - We want to allow documenting class members on a single line by documenting
+ *   them as `/** @var SomeType Description here.`, and in long-form as
+ *   `/**\n * Description here.\n * @var SomeType`.
+ *
+ * - PHP does not support native type-hinting of class members. Instead, we document
+ *   that using `@var` in the doc blocks above it. However, Doxygen only supports
+ *   parsing this from executable code. We achieve this by having the below filter
+ *   take the typehint from the doc block and insert it into the source code in
+ *   front of `$myvar`, like `protected SomeType $myvar`. This result is technically
+ *   invalid PHP code, but Doxygen understands it this way.
  *
  * @internal For use by maintenance/mwdoc-filter.php
  * @ingroup Maintenance
@@ -48,15 +53,15 @@ class MWDoxygenFilter {
         */
        public static function filter( $source ) {
                $tokens = token_get_all( $source );
-               $buffer = $bufferType = null;
+               $buffer = null;
                $output = '';
                foreach ( $tokens as $token ) {
                        if ( is_string( $token ) ) {
                                if ( $buffer !== null && $token === ';' ) {
                                        // If we still have a buffer and the statement has ended,
                                        // flush it and move on.
-                                       $output .= $buffer;
-                                       $buffer = $bufferType = null;
+                                       $output .= $buffer['raw'];
+                                       $buffer = null;
                                }
                                $output .= $token;
                                continue;
@@ -67,14 +72,16 @@ class MWDoxygenFilter {
                                        // Escape slashes so that references to namespaces are not
                                        // wrongly interpreted as a Doxygen "\command".
                                        $content = addcslashes( $content, '\\' );
-                                       // Look for instances of "@var Type" not followed by $name.
-                                       if ( preg_match( '#@var\s+([^\s]+)\s+([^\$]+)#s', $content ) ) {
-                                               $buffer = preg_replace_callback(
-                                                       // Strip the "@var Type" part and remember the type
-                                                       '#(@var\s+)([^\s]+)#s',
-                                                       function ( $matches ) use ( &$bufferType ) {
-                                                               $bufferType = $matches[2];
-                                                               return '';
+                                       // Look for instances of "@var SomeType".
+                                       if ( preg_match( '#@var\s+\S+#s', $content ) ) {
+                                               $buffer = [ 'raw' => $content, 'desc' => null, 'type' => null, 'name' => null ];
+                                               $buffer['desc'] = preg_replace_callback(
+                                                       // Strip "@var SomeType" part, but remember the type and optional name
+                                                       '#@var\s+(\S+)(\s+)?(\S+)?#s',
+                                                       function ( $matches ) use ( &$buffer ) {
+                                                               $buffer['type'] = $matches[1];
+                                                               $buffer['name'] = $matches[3] ?? null;
+                                                               return ( $matches[2] ?? '' ) . ( $matches[3] ?? '' );
                                                        },
                                                        $content
                                                );
@@ -84,10 +91,38 @@ class MWDoxygenFilter {
                                        break;
 
                                case T_VARIABLE:
+                                       // Doxygen requires class members to be documented in one of two ways:
+                                       //
+                                       // 1. Fully qualified:
+                                       //    /** @var SomeType $name Description here. */
+                                       //
+                                       //    These result in the creation of a new virtual node called $name
+                                       //    with the specified type and description. The real code doesn't
+                                       //    even need to exist in this case.
+                                       //
+                                       // 2. Contextual:
+                                       //    /** Description here. */
+                                       //    private SomeType? $name;
+                                       //
+                                       // In MediaWiki, we are mostly like #1 but without the name repeated:
+                                       //    /** @var SomeType Description here. */
+                                       //    private $name;
+                                       //
+                                       // These emit a warning in Doxygen because they are missing a variable name.
+                                       // Convert these to the "Contextual" kind by stripping ""@var", injecting
+                                       // type into the code, and leaving the description in-place.
                                        if ( $buffer !== null ) {
-                                               $output .= $buffer;
-                                               $output .= "$bufferType $content";
-                                               $buffer = $bufferType = null;
+                                               if ( $buffer['name'] === $content ) {
+                                                       // Fully qualitied "@var" comment, leave as-is.
+                                                       $output .= $buffer['raw'];
+                                                       $output .= $content;
+                                               } else {
+                                                       // MW-style "@var" comment. Keep only the description and transplant
+                                                       // the type into the code.
+                                                       $output .= $buffer['desc'];
+                                                       $output .= "{$buffer['type']} $content";
+                                               }
+                                               $buffer = null;
                                        } else {
                                                $output .= $content;
                                        }
@@ -95,7 +130,8 @@ class MWDoxygenFilter {
 
                                default:
                                        if ( $buffer !== null ) {
-                                               $buffer .= $content;
+                                               $buffer['raw'] .= $content;
+                                               $buffer['desc'] .= $content;
                                        } else {
                                                $output .= $content;
                                        }
index 2e06d6f..22b5938 100644 (file)
@@ -45,6 +45,21 @@ CODE
 CODE
                ];
 
+               yield 'One-line var with type and description that starts like a variable name' => [
+                       <<<'CODE'
+<?php class MyClass {
+       /** @var array $_GET data from some thing */
+       protected $name;
+}
+CODE
+                       , <<<'CODE'
+<?php class MyClass {
+       /**  $_GET data from some thing */
+       protected array $name;
+}
+CODE
+               ];
+
                yield 'One-line var with type, name, and description' => [
                        // In this full form, Doxygen understands it just fine.
                        // No changes made.