Revert r35848 per Brion's WONTFIX of bug 14536: "This would just mean that there...
[lhc/web/wiklou.git] / includes / normal / Utf8CaseGenerate.php
1 <?php
2 # Copyright (C) 2004,2008 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 /**
21 * This script generates Utf8Case.inc from the Unicode Character Database
22 * and supplementary files.
23 *
24 * @ingroup UtfNormal
25 * @access private
26 */
27
28 /** */
29
30 if( php_sapi_name() != 'cli' ) {
31 die( "Run me from the command line please.\n" );
32 }
33
34 require_once 'UtfNormalUtil.php';
35
36 $in = fopen("UnicodeData.txt", "rt" );
37 if( !$in ) {
38 print "Can't open UnicodeData.txt for reading.\n";
39 print "If necessary, fetch this file from the internet:\n";
40 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
41 exit(-1);
42 }
43 $wikiUpperChars = array();
44 $wikiLowerChars = array();
45
46 print "Reading character definitions...\n";
47 while( false !== ($line = fgets( $in ) ) ) {
48 $columns = split(';', $line);
49 $codepoint = $columns[0];
50 $name = $columns[1];
51 $simpleUpper = $columns[12];
52 $simpleLower = $columns[13];
53
54 $source = codepointToUtf8( hexdec( $codepoint ) );
55 if( $simpleUpper ) {
56 $wikiUpperChars[$source] = codepointToUtf8( hexdec( $simpleUpper ) );
57 }
58 if( $simpleLower ) {
59 $wikiLowerChars[$source] = codepointToUtf8( hexdec( $simpleLower ) );
60 }
61 }
62 fclose( $in );
63
64 $out = fopen("Utf8Case.php", "wt");
65 if( $out ) {
66 $outUpperChars = escapeArray( $wikiUpperChars );
67 $outLowerChars = escapeArray( $wikiLowerChars );
68 $outdata = "<" . "?php
69 /**
70 * Simple 1:1 upper/lowercase switching arrays for utf-8 text
71 * Won't get context-sensitive things yet
72 *
73 * Hack for bugs in ucfirst() and company
74 *
75 * These are pulled from memcached if possible, as this is faster than filling
76 * up a big array manually.
77 * @ingroup Language
78 */
79
80 /*
81 * Translation array to get upper case character
82 */
83
84 \$wikiUpperChars = $outUpperChars;
85
86 /*
87 * Translation array to get lower case character
88 */
89 \$wikiLowerChars = $outLowerChars;\n";
90 fputs( $out, $outdata );
91 fclose( $out );
92 print "Wrote out Utf8Case.php\n";
93 } else {
94 print "Can't create file Utf8Case.php\n";
95 exit(-1);
96 }
97
98
99 function escapeArray( $arr ) {
100 return "array(\n" .
101 implode( ",\n",
102 array_map( "escapeLine",
103 array_keys( $arr ),
104 array_values( $arr ) ) ) .
105 "\n)";
106 }
107
108 function escapeLine( $key, $val ) {
109 $encKey = escapeSingleString( $key );
110 $encVal = escapeSingleString( $val );
111 return "\t'$encKey' => '$encVal'";
112 }