mediawiki.Uri: Use extended RegExps through templates
authorBartosz Dziewoński <matma.rex@gmail.com>
Tue, 25 Aug 2015 07:43:38 +0000 (09:43 +0200)
committerBartosz Dziewoński <matma.rex@gmail.com>
Tue, 1 Sep 2015 12:19:39 +0000 (14:19 +0200)
JavaScript does not support the 'x' RegExp option for 'extended'
RegExps, or named capturing groups, which makes it impossible to write
large RegExps in a readable way. OR DOES IT?!?

Cons:
* Small runtime performance loss to convert to regular RegExp
* Worse minification due to unminifiable whitespace
    (Both of these problems could probably me resolved
     if we moved the processing server-side somehow,
     but there's no support for it in ResourceLoader now)

Pros:
* You can actually somewhat understand the bloody things if you care
  to, unlike before.

YMMV:
* The actual regexps are in different files

Change-Id: I41630bf20cdb5c2936d85d9f831f2aa7bb8cfef1

resources/Resources.php
resources/src/mediawiki/mediawiki.Uri.js
resources/src/mediawiki/mediawiki.Uri.loose.regexp [new file with mode: 0644]
resources/src/mediawiki/mediawiki.Uri.strict.regexp [new file with mode: 0644]
resources/src/mediawiki/mediawiki.template.regexp.js [new file with mode: 0644]

index 2293984..cac87b9 100644 (file)
@@ -849,6 +849,10 @@ return array(
                ),
                'targets' => array( 'desktop', 'mobile' ),
        ),
+       'mediawiki.template.regexp' => array(
+               'scripts' => 'resources/src/mediawiki/mediawiki.template.regexp.js',
+               'targets' => array( 'desktop', 'mobile' ),
+       ),
        'mediawiki.apipretty' => array(
                'styles' => 'resources/src/mediawiki/mediawiki.apipretty.css',
                'targets' => array( 'desktop', 'mobile' ),
@@ -1155,6 +1159,10 @@ return array(
        ),
        'mediawiki.Uri' => array(
                'scripts' => 'resources/src/mediawiki/mediawiki.Uri.js',
+               'templates' => array(
+                       'strict.regexp' => 'resources/src/mediawiki/mediawiki.Uri.strict.regexp',
+                       'loose.regexp' => 'resources/src/mediawiki/mediawiki.Uri.loose.regexp',
+               ),
                'dependencies' => 'mediawiki.util',
                'targets' => array( 'desktop', 'mobile' ),
        ),
index 07d8900..ac6c583 100644 (file)
        /**
         * Regular expressions to parse many common URIs.
         *
+        * As they are gnarly, they have been moved to separate files to allow us to format them in the
+        * 'extended' regular expression format (which JavaScript normally doesn't support). The subset of
+        * features handled is minimal, but just the free whitespace gives us a lot.
+        *
         * @private
         * @static
         * @property {Object} parser
         */
        var parser = {
-               strict: /^(?:([^:\/?#]+):)?(?:\/\/(?:(?:([^:@\/?#]*)(?::([^:@\/?#]*))?)?@)?([^:\/?#]*)(?::(\d*))?)?((?:[^?#\/]*\/)*[^?#]*)(?:\?([^#]*))?(?:#(.*))?/,
-               loose:  /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?(?:(?:([^:@\/?#]*)(?::([^:@\/?#]*))?)?@)?([^:\/?#]*)(?::(\d*))?((?:\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?[^?#\/]*)(?:\?([^#]*))?(?:#(.*))?/
+               strict: mw.template.get( 'mediawiki.Uri', 'strict.regexp' ).render(),
+               loose: mw.template.get( 'mediawiki.Uri', 'loose.regexp' ).render()
        },
 
        /**
diff --git a/resources/src/mediawiki/mediawiki.Uri.loose.regexp b/resources/src/mediawiki/mediawiki.Uri.loose.regexp
new file mode 100644 (file)
index 0000000..300ab3b
--- /dev/null
@@ -0,0 +1,22 @@
+^
+(?:
+       (?![^:@]+:[^:@/]*@)
+       (?<protocol>[^:/?#.]+):
+)?
+(?://)?
+(?:(?:
+       (?<user>[^:@/?#]*)
+       (?::(?<password>[^:@/?#]*))?
+)?@)?
+(?<host>[^:/?#]*)
+(?::(?<port>\d*))?
+(
+       (?:/
+               (?:[^?#]
+                       (?![^?#/]*\.[^?#/.]+(?:[?#]|$))
+               )*/?
+       )?
+       [^?#/]*
+)
+(?:\?(?<query>[^#]*))?
+(?:\#(?<fragment>.*))?
diff --git a/resources/src/mediawiki/mediawiki.Uri.strict.regexp b/resources/src/mediawiki/mediawiki.Uri.strict.regexp
new file mode 100644 (file)
index 0000000..2ac7d2f
--- /dev/null
@@ -0,0 +1,13 @@
+^
+(?:(?<protocol>[^:/?#]+):)?
+(?://(?:
+       (?:
+               (?<user>[^:@/?#]*)
+               (?::(?<password>[^:@/?#]*))?
+       )?@)?
+       (?<host>[^:/?#]*)
+       (?::(?<port>\d*))?
+)?
+(?<path>(?:[^?#/]*/)*[^?#]*)
+(?:\?(?<query>[^#]*))?
+(?:\#(?<fragment>.*))?
diff --git a/resources/src/mediawiki/mediawiki.template.regexp.js b/resources/src/mediawiki/mediawiki.template.regexp.js
new file mode 100644 (file)
index 0000000..3ec0a1f
--- /dev/null
@@ -0,0 +1,15 @@
+mediaWiki.template.registerCompiler( 'regexp', {
+       compile: function ( src ) {
+               return {
+                       render: function () {
+                               return new RegExp(
+                                       src
+                                               // Remove whitespace
+                                               .replace( /\s+/g, '' )
+                                               // Remove named capturing groups
+                                               .replace( /\?<\w+?>/g, '' )
+                               );
+                       }
+               };
+       }
+} );