Merge "Improve docs for Title::getInternalURL/getCanonicalURL"
[lhc/web/wiklou.git] / includes / collation / Collation.php
1 <?php
2 /**
3 * Database row sorting.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use MediaWiki\MediaWikiServices;
24
25 /**
26 * @since 1.16.3
27 * @author Tim Starling
28 */
29 abstract class Collation {
30 private static $instance;
31
32 /**
33 * @since 1.16.3
34 * @return Collation
35 */
36 public static function singleton() {
37 if ( !self::$instance ) {
38 global $wgCategoryCollation;
39 self::$instance = self::factory( $wgCategoryCollation );
40 }
41 return self::$instance;
42 }
43
44 /**
45 * @since 1.16.3
46 * @throws MWException
47 * @param string $collationName
48 * @return Collation
49 */
50 public static function factory( $collationName ) {
51 switch ( $collationName ) {
52 case 'uppercase':
53 return new UppercaseCollation;
54 case 'numeric':
55 return new NumericUppercaseCollation(
56 MediaWikiServices::getInstance()->getContentLanguage() );
57 case 'identity':
58 return new IdentityCollation;
59 case 'uca-default':
60 return new IcuCollation( 'root' );
61 case 'uca-default-u-kn':
62 return new IcuCollation( 'root-u-kn' );
63 case 'xx-uca-ckb':
64 return new CollationCkb;
65 case 'uppercase-ab':
66 return new AbkhazUppercaseCollation;
67 case 'uppercase-ba':
68 return new BashkirUppercaseCollation;
69 default:
70 $match = [];
71 if ( preg_match( '/^uca-([A-Za-z@=-]+)$/', $collationName, $match ) ) {
72 return new IcuCollation( $match[1] );
73 }
74
75 # Provide a mechanism for extensions to hook in.
76 $collationObject = null;
77 Hooks::run( 'Collation::factory', [ $collationName, &$collationObject ] );
78
79 if ( $collationObject instanceof self ) {
80 return $collationObject;
81 }
82
83 // If all else fails...
84 throw new MWException( __METHOD__ . ": unknown collation type \"$collationName\"" );
85 }
86 }
87
88 /**
89 * Given a string, convert it to a (hopefully short) key that can be used
90 * for efficient sorting. A binary sort according to the sortkeys
91 * corresponds to a logical sort of the corresponding strings. Current
92 * code expects that a line feed character should sort before all others, but
93 * has no other particular expectations (and that one can be changed if
94 * necessary).
95 *
96 * @since 1.16.3
97 *
98 * @param string $string UTF-8 string
99 * @return string Binary sortkey
100 */
101 abstract function getSortKey( $string );
102
103 /**
104 * Given a string, return the logical "first letter" to be used for
105 * grouping on category pages and so on. This has to be coordinated
106 * carefully with convertToSortkey(), or else the sorted list might jump
107 * back and forth between the same "initial letters" or other pathological
108 * behavior. For instance, if you just return the first character, but "a"
109 * sorts the same as "A" based on getSortKey(), then you might get a
110 * list like
111 *
112 * == A ==
113 * * [[Aardvark]]
114 *
115 * == a ==
116 * * [[antelope]]
117 *
118 * == A ==
119 * * [[Ape]]
120 *
121 * etc., assuming for the sake of argument that $wgCapitalLinks is false.
122 *
123 * @since 1.16.3
124 *
125 * @param string $string UTF-8 string
126 * @return string UTF-8 string corresponding to the first letter of input
127 */
128 abstract function getFirstLetter( $string );
129
130 }