docs: Fix Doxygen parsing of @var descriptions with $var names
[lhc/web/wiklou.git] / maintenance / includes / MWDoxygenFilter.php
index 30cb97d..287a927 100644 (file)
  * Based on
  * <https://virtualtee.blogspot.co.uk/2012/03/tip-for-using-doxygen-for-php-code.html>
  *
- * Improved to resolve various bugs and better MediaWiki PHPDoc conventions:
+ * It has been adapted for MediaWiki to resolve various bugs we experienced
+ * from using Doxygen with our coding conventions:
  *
- * - Insert variable name after typehint instead of at end of line so that
- *   documentation text may follow after `@var Type`.
- * - Insert typehint into source code before $variable instead of inside the comment
- *   so that Doxygen interprets it.
- * - Strip the text after `@var` from the output to avoid Doxygen warnings about bogus
- *   symbols being documented but not declared or defined.
+ * - We want to allow documenting class members on a single line by documenting
+ *   them as `/** @var SomeType Description here.`, and in long-form as
+ *   `/**\n * Description here.\n * @var SomeType`.
+ *
+ * - PHP does not support native type-hinting of class members. Instead, we document
+ *   that using `@var` in the doc blocks above it. However, Doxygen only supports
+ *   parsing this from executable code. We achieve this by having the below filter
+ *   take the typehint from the doc block and insert it into the source code in
+ *   front of `$myvar`, like `protected SomeType $myvar`. This result is technically
+ *   invalid PHP code, but Doxygen understands it this way.
  *
  * @internal For use by maintenance/mwdoc-filter.php
  * @ingroup Maintenance
@@ -48,15 +53,15 @@ class MWDoxygenFilter {
         */
        public static function filter( $source ) {
                $tokens = token_get_all( $source );
-               $buffer = $bufferType = null;
+               $buffer = null;
                $output = '';
                foreach ( $tokens as $token ) {
                        if ( is_string( $token ) ) {
                                if ( $buffer !== null && $token === ';' ) {
                                        // If we still have a buffer and the statement has ended,
                                        // flush it and move on.
-                                       $output .= $buffer;
-                                       $buffer = $bufferType = null;
+                                       $output .= $buffer['raw'];
+                                       $buffer = null;
                                }
                                $output .= $token;
                                continue;
@@ -67,14 +72,16 @@ class MWDoxygenFilter {
                                        // Escape slashes so that references to namespaces are not
                                        // wrongly interpreted as a Doxygen "\command".
                                        $content = addcslashes( $content, '\\' );
-                                       // Look for instances of "@var Type" not followed by $name.
-                                       if ( preg_match( '#@var\s+([^\s]+)\s+([^\$]+)#s', $content ) ) {
-                                               $buffer = preg_replace_callback(
-                                                       // Strip the "@var Type" part and remember the type
-                                                       '#(@var\s+)([^\s]+)#s',
-                                                       function ( $matches ) use ( &$bufferType ) {
-                                                               $bufferType = $matches[2];
-                                                               return '';
+                                       // Look for instances of "@var SomeType".
+                                       if ( preg_match( '#@var\s+\S+#s', $content ) ) {
+                                               $buffer = [ 'raw' => $content, 'desc' => null, 'type' => null, 'name' => null ];
+                                               $buffer['desc'] = preg_replace_callback(
+                                                       // Strip "@var SomeType" part, but remember the type and optional name
+                                                       '#@var\s+(\S+)(\s+)?(\S+)?#s',
+                                                       function ( $matches ) use ( &$buffer ) {
+                                                               $buffer['type'] = $matches[1];
+                                                               $buffer['name'] = $matches[3] ?? null;
+                                                               return ( $matches[2] ?? '' ) . ( $matches[3] ?? '' );
                                                        },
                                                        $content
                                                );
@@ -84,10 +91,38 @@ class MWDoxygenFilter {
                                        break;
 
                                case T_VARIABLE:
+                                       // Doxygen requires class members to be documented in one of two ways:
+                                       //
+                                       // 1. Fully qualified:
+                                       //    /** @var SomeType $name Description here. */
+                                       //
+                                       //    These result in the creation of a new virtual node called $name
+                                       //    with the specified type and description. The real code doesn't
+                                       //    even need to exist in this case.
+                                       //
+                                       // 2. Contextual:
+                                       //    /** Description here. */
+                                       //    private SomeType? $name;
+                                       //
+                                       // In MediaWiki, we are mostly like #1 but without the name repeated:
+                                       //    /** @var SomeType Description here. */
+                                       //    private $name;
+                                       //
+                                       // These emit a warning in Doxygen because they are missing a variable name.
+                                       // Convert these to the "Contextual" kind by stripping ""@var", injecting
+                                       // type into the code, and leaving the description in-place.
                                        if ( $buffer !== null ) {
-                                               $output .= $buffer;
-                                               $output .= "$bufferType $content";
-                                               $buffer = $bufferType = null;
+                                               if ( $buffer['name'] === $content ) {
+                                                       // Fully qualitied "@var" comment, leave as-is.
+                                                       $output .= $buffer['raw'];
+                                                       $output .= $content;
+                                               } else {
+                                                       // MW-style "@var" comment. Keep only the description and transplant
+                                                       // the type into the code.
+                                                       $output .= $buffer['desc'];
+                                                       $output .= "{$buffer['type']} $content";
+                                               }
+                                               $buffer = null;
                                        } else {
                                                $output .= $content;
                                        }
@@ -95,7 +130,8 @@ class MWDoxygenFilter {
 
                                default:
                                        if ( $buffer !== null ) {
-                                               $buffer .= $content;
+                                               $buffer['raw'] .= $content;
+                                               $buffer['desc'] .= $content;
                                        } else {
                                                $output .= $content;
                                        }