Follow-up I5b02aa914916f64492c85ce6dcc3272b6406551a (#4335), also put a link in the...
[lhc/web/wiklou.git] / includes / normal / Utf8CaseGenerate.php
1 <?php
2 /**
3 * This script generates Utf8Case.php from the Unicode Character Database
4 * and supplementary files.
5 *
6 * Copyright © 2004,2008 Brion Vibber <brion@pobox.com>
7 * http://www.mediawiki.org/
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
23 *
24 * @file
25 * @ingroup UtfNormal
26 */
27
28 if( php_sapi_name() != 'cli' ) {
29 die( "Run me from the command line please.\n" );
30 }
31
32 require_once 'UtfNormalDefines.php';
33 require_once 'UtfNormalUtil.php';
34
35 $in = fopen("UnicodeData.txt", "rt" );
36 if( !$in ) {
37 print "Can't open UnicodeData.txt for reading.\n";
38 print "If necessary, fetch this file from the internet:\n";
39 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
40 exit(-1);
41 }
42 $wikiUpperChars = array();
43 $wikiLowerChars = array();
44
45 print "Reading character definitions...\n";
46 while( false !== ($line = fgets( $in ) ) ) {
47 $columns = explode(';', $line);
48 $codepoint = $columns[0];
49 $name = $columns[1];
50 $simpleUpper = $columns[12];
51 $simpleLower = $columns[13];
52
53 $source = codepointToUtf8( hexdec( $codepoint ) );
54 if( $simpleUpper ) {
55 $wikiUpperChars[$source] = codepointToUtf8( hexdec( $simpleUpper ) );
56 }
57 if( $simpleLower ) {
58 $wikiLowerChars[$source] = codepointToUtf8( hexdec( $simpleLower ) );
59 }
60 }
61 fclose( $in );
62
63 $out = fopen("Utf8Case.php", "wt");
64 if( $out ) {
65 $outUpperChars = escapeArray( $wikiUpperChars );
66 $outLowerChars = escapeArray( $wikiLowerChars );
67 $outdata = "<" . "?php
68 /**
69 * Simple 1:1 upper/lowercase switching arrays for utf-8 text.
70 * Won't get context-sensitive things yet.
71 *
72 * Hack for bugs in ucfirst() and company
73 *
74 * These are pulled from memcached if possible, as this is faster than filling
75 * up a big array manually.
76 *
77 * @file
78 * @ingroup Language
79 */
80
81 /**
82 * Translation array to get upper case character
83 */
84 \$wikiUpperChars = $outUpperChars;
85
86 /**
87 * Translation array to get lower case character
88 */
89 \$wikiLowerChars = $outLowerChars;\n";
90 fputs( $out, $outdata );
91 fclose( $out );
92 print "Wrote out Utf8Case.php\n";
93 } else {
94 print "Can't create file Utf8Case.php\n";
95 exit(-1);
96 }
97
98
99 function escapeArray( $arr ) {
100 return "array(\n" .
101 implode( ",\n",
102 array_map( "escapeLine",
103 array_keys( $arr ),
104 array_values( $arr ) ) ) .
105 "\n)";
106 }
107
108 function escapeLine( $key, $val ) {
109 $encKey = escapeSingleString( $key );
110 $encVal = escapeSingleString( $val );
111 return "\t'$encKey' => '$encVal'";
112 }