French space armoring: Use unicode regex instead of octal escapes
authorC. Scott Ananian <cscott@cscott.net>
Thu, 21 Jun 2018 18:53:04 +0000 (14:53 -0400)
committerC. Scott Ananian <cscott@cscott.net>
Fri, 22 Jun 2018 19:49:01 +0000 (15:49 -0400)
Follow up to Id8cdb887182f346acab2d108836ce201626848af

Change-Id: I35f7b35746e63a98a115a0dbc9d5869b691c3a9c

includes/parser/Sanitizer.php

index 89a7c96..21498f8 100644 (file)
@@ -1155,9 +1155,9 @@ class Sanitizer {
                $fixtags = [
                        # French spaces, last one Guillemet-left
                        # only if there is something before the space
-                       '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => "\\1$space",
+                       '/(.) (?=[?:;!%»])/u' => "\\1$space",
                        # French spaces, Guillemet-right
-                       '/(\\302\\253) /' => "\\1$space",
+                       '/(«) /u' => "\\1$space",
                ];
                return preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
        }