<?php
/**
+ * Cantonese (粵語) specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * Cantonese (粵語)
+ *
* @ingroup Language
*/
class LanguageYue extends Language {
- function stripForSearch( $string ) {
- wfProfileIn( __METHOD__ );
- global $wgSearchType;
- $s = $string;
+ /**
+ * @return bool
+ */
+ function hasWordBreaks() {
+ return false;
+ }
- // Double-width roman characters: ff00-ff5f ~= 0020-007f
- $s = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $s );
- $s = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $s );
+ /**
+ * Eventually this should be a word segmentation;
+ * for now just treat each character as a word.
+ * @todo FIXME: Only do this for Han characters...
+ *
+ * @param $string string
+ * @return string
+ */
+ function segmentByWord( $string ) {
+ $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+ $s = self::insertSpace( $string, $reg );
+ return $s;
+ }
- if ( $wgSearchType != 'LuceneSearch' ) {
- // eventually this should be a word segmentation;
- // for now just treat each character as a word.
- // Not for LuceneSearch, because LSearch will
- // split the text to words itself.
- // @todo Fixme: only do this for Han characters...
- $s = preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
- " $1 ", $s);
- $s = preg_replace( '/ +/', ' ', $s );
- }
+ /**
+ * @param $string
+ * @return string
+ */
+ function normalizeForSearch( $string ) {
+ wfProfileIn( __METHOD__ );
+ // Double-width roman characters
+ $s = self::convertDoubleWidth( $string );
$s = trim( $s );
+ $s = parent::normalizeForSearch( $s );
- // Do general case folding and UTF-8 armoring
- $s = parent::stripForSearch( $s );
wfProfileOut( __METHOD__ );
return $s;
}