From be266087b47cc5a6dee48bffc89e58d232e59890 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Thu, 21 Jun 2018 11:36:07 -0400 Subject: [PATCH] Don't armor french spaces before punctuation followed by word characters This makes Sanitizer::armorFrenchSpaces() more selective about where it inserts  , avoiding the need to protect common "not actually French" cases like `color: red !important` and `foo :bar`. We also added the single-guillemet to the rules, to accomodate Swiss French. Bug: T197902 Change-Id: I42e747f17c17c1513fec96cdd2d3285da7da05a4 --- includes/parser/Sanitizer.php | 5 +- tests/parser/parserTests.txt | 89 ++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 29 deletions(-) diff --git a/includes/parser/Sanitizer.php b/includes/parser/Sanitizer.php index 21498f89eb..7a0d5f6e88 100644 --- a/includes/parser/Sanitizer.php +++ b/includes/parser/Sanitizer.php @@ -1155,9 +1155,10 @@ class Sanitizer { $fixtags = [ # French spaces, last one Guillemet-left # only if there is something before the space - '/(.) (?=[?:;!%»])/u' => "\\1$space", + # and a non-word character after the punctuation. + '/(\S) (?=[?:;!%»›](?!\w))/u' => "\\1$space", # French spaces, Guillemet-right - '/(«) /u' => "\\1$space", + '/([«‹]) /u' => "\\1$space", ]; return preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); } diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index e06a732000..d19a397553 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -3880,7 +3880,7 @@ Simple definition !! wikitext ;name :Definition !! html -
name 
+
name
Definition
!! end @@ -3909,7 +3909,7 @@ Definition list with URL link !! wikitext ;http://example.com/ :definition !! html -
http://example.com/ 
+
http://example.com/
definition
!! end @@ -3940,7 +3940,7 @@ Definition list with news link containing colon !! wikitext ;news:alt.wikipedia.rox :This isn't even a real newsgroup! !! html/php -
news:alt.wikipedia.rox 
+
news:alt.wikipedia.rox
This isn't even a real newsgroup!
!! html/parsoid @@ -3961,7 +3961,7 @@ Definition lists: colon in external link text !! wikitext ;[http://www.wikipedia2.org/ Wikipedia :The Next Generation] :OK, I made that up !! html -
Wikipedia :The Next Generation 
+
Wikipedia :The Next Generation
OK, I made that up
!! end @@ -3980,7 +3980,7 @@ Definition lists: self-closed tag !! wikitext ;one
two :two-line fun !! html -
one
two 
+
one
two
two-line fun
!! end @@ -4000,7 +4000,7 @@ Definition lists: excess closed tags !! wikitext ;onetwo :bad tag fun !! html/php+tidy -
onetwo 
+
onetwo
bad tag fun
!! html/parsoid
@@ -4037,7 +4037,7 @@ Definition and unordered list using wiki syntax nested in unordered list using h !! html
  • -
    term 
    +
    term
    description
    • unordered
@@ -4437,9 +4437,9 @@ Definition Lists: Mixed Lists: Test 4 *;d1 :d2 *;d3 :d4 !! html -
  • d1 
    +
    • d1
      d2
      -
      d3 
      +
      d3
      d4
    !! end @@ -4498,7 +4498,7 @@ Definition Lists: Mixed Lists: Test 9 !! wikitext *;foo :bar !! html -
    • foo 
      +
      • foo
        bar
      !! end @@ -4509,7 +4509,7 @@ Definition Lists: Mixed Lists: Test 10 !! wikitext *#;foo :bar !! html -
        1. foo 
          +
            1. foo
              bar
          !! end @@ -4542,15 +4542,15 @@ Definition Lists: Mixed Lists: Test 12 *#*#;*;;foo :bar *#*#;boo :baz !! html/php -
                1. foo 
                  +
                        1. foo
                          • bar
                  -
                  boo 
                  +
                  boo
                  baz
          !! html/php+tidy -
                1. foo 
                  +
                        1. foo
                          • bar
                      -
                      boo 
                      +
                      boo
                      baz
                  !! html/parsoid
                    @@ -4568,12 +4568,12 @@ Definition Lists: Mixed Lists: Test 12
                    -
                    foo 
                    +
                    foo
                    bar
                  -
                  boo 
                  +
                  boo
                  baz
              • @@ -4582,6 +4582,18 @@ Definition Lists: Mixed Lists: Test 12
              !! end +!! test +Definition Lists: Mixed Lists: Test 13 +!! wikitext +*#*#;*;;foo : bar +*#*#;boo : baz +!! html+tidy +
                    1. foo 
                      +
                      • bar
                  +
                  boo 
                  +
                  baz
              +!! end + # FIXME: Maybe get rid of this test? # From whitelist: # * The test is wrong, there are two colons where there should be :; @@ -4591,7 +4603,7 @@ Definition Lists: Weird Ones: Test 1 !! wikitext *#;*::;;foo :bar (who uses this?) !! html/php+tidy -
                1. foo 
                  +
                    1. foo
                      • bar (who uses this?)
                  !! html/parsoid
                    @@ -4609,7 +4621,7 @@ Definition Lists: Weird Ones: Test 1
                    -
                    foo 
                    +
                    foo
                    bar (who uses this?)
                    @@ -6716,7 +6728,7 @@ Element attributes with double ! should not be broken up by !! html/php -
                    hi
                    +
                    hi
                    !! html/parsoid @@ -6737,7 +6749,7 @@ parsoid=wt2html !! html/php - +
                    style="color: red !important;" data-contrived="put this herestyle="color: red !important;" data-contrived="put this here foo
                    @@ -18758,7 +18770,7 @@ Punctuation: CSS !important (T13874) !! wikitext
                    important
                    !! html -
                    important
                    +
                    important
                    !!end @@ -21536,16 +21548,16 @@ Definition list code coverage ;title :def ;title:def !! html/php -
                    title  
                    +
                    title
                    def
                    -
                    title 
                    +
                    title
                    def
                    title
                    def
                    !! html/parsoid -
                    title  
                    def
                    -
                    title 
                    def
                    +
                    title
                    def
                    +
                    title
                    def
                    title
                    def
                    !! end @@ -24050,7 +24062,7 @@ Play a bit with r67090 and T5158
                     
                     
                    !! html/php -
                     
                    +
                     
                     
                     
                     
                    @@ -24063,6 +24075,29 @@ Play a bit with r67090 and T5158 !! end +!! test +French spaces in wikitext +!! wikitext +foo ! bar ? bat 50 % is less than 75 %. + +Hello : this ; is « something ‹ else › again » +!! html +

                    foo ! bar ? bat 50 % is less than 75 %. +

                    Hello : this ; is « something ‹ else â€º again Â» +

                    +!! end + +# It would be reasonable for Parsoid and PHP to differ here. +# The PHP behavior is arguably a bug. +!! test +Corner case: french spaces in definition list +!! wikitext +;foo : bar +!! html+tidy +
                    foo 
                    +
                    bar
                    +!! end + !! test T5158: Test for French spaces in attributes !! wikitext -- 2.20.1