From ee90bd4c5c4fc2100386fa31ae9324cc13eaf919 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Sun, 1 Apr 2018 23:49:19 -0400 Subject: [PATCH] Make LanguageConverter roman-numeral cases consistent Add a look-ahead to ensure that the regex intended to match roman numerals doesn't also match the empty string. Tweak the regular expressions slightly to ensure that Sr/Ku/Crh all have identical regular expressions. Change-Id: If43bf99a21c42c6c5050f814c0bc99edec353228 --- languages/classes/LanguageCrh.php | 3 ++- languages/classes/LanguageKu.php | 3 ++- languages/classes/LanguageSr.php | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/languages/classes/LanguageCrh.php b/languages/classes/LanguageCrh.php index d5418b9a6d..01a5a793c0 100644 --- a/languages/classes/LanguageCrh.php +++ b/languages/classes/LanguageCrh.php @@ -219,7 +219,8 @@ class CrhConverter extends LanguageConverter { } // check for roman numbers like VII, XIX... - $roman = '/^M{0,3}(C[DM]|D{0,1}C{0,3})(X[LC]|L{0,1}X{0,3})(I[VX]|V{0,1}I{0,3})$/u'; + // Lookahead assertion ensures $roman doesn't match the empty string + $roman = '/^(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})$/u'; # match any sub-string of the relevant letters and convert it $matches = preg_split( '/(\b|^)[^' . $letters . ']+(\b|$)/u', diff --git a/languages/classes/LanguageKu.php b/languages/classes/LanguageKu.php index ef77775385..b90ca41369 100644 --- a/languages/classes/LanguageKu.php +++ b/languages/classes/LanguageKu.php @@ -192,7 +192,8 @@ class KuConverter extends LanguageConverter { /* From Kazakh interface, maybe we need it later $breaks = '[^\w\x80-\xff]'; // regexp for roman numbers - $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'; + // Lookahead assertion ensures $roman doesn't match the empty string + $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; $roman = ''; $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/'; diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php index e0ff5e1676..0ad7860fc0 100644 --- a/languages/classes/LanguageSr.php +++ b/languages/classes/LanguageSr.php @@ -115,7 +115,8 @@ class SrConverter extends LanguageConverter { $breaks = '[^\w\x80-\xff]'; // regexp for roman numbers - $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'; + // Lookahead assertion ensures $roman doesn't match the empty string + $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks . $roman . '$|' . $breaks . $roman . $breaks . '/'; -- 2.20.1