- $char = mb_convert_encoding( '&#' . $i . ';', 'UTF-8', 'HTML-ENTITIES' );
- $phpUpper = mb_strtoupper( $char );
- $jsUpper = $jsUpperChars[$i];
- if ( $jsUpper !== $phpUpper ) {
- $data[$char] = $phpUpper;
+
+ public function execute() {
+ global $wgContLang, $IP;
+
+ $data = [];
+
+ $result = Shell::command(
+ [ 'node', $IP . '/maintenance/mediawiki.Title/generateJsToUpperCaseList.js' ]
+ )
+ // Node allocates lots of memory
+ ->limits( [ 'memory' => 1024 * 1024 ] )
+ ->execute();
+
+ if ( $result->getExitcode() !== 0 ) {
+ $this->output( $result->getStderr() );
+ return;
+ }
+
+ $jsUpperChars = json_decode( $result->getStdout() );
+
+ for ( $i = 0; $i <= 0x10ffff; $i++ ) {
+ if ( $i >= 0xd800 && $i <= 0xdfff ) {
+ // Skip surrogate pairs
+ continue;
+ }
+ $char = \UtfNormal\Utils::codepointToUtf8( $i );
+ $phpUpper = $wgContLang->ucfirst( $char );
+ $jsUpper = $jsUpperChars[$i];
+ if ( $jsUpper !== $phpUpper ) {
+ if ( $char === $phpUpper ) {
+ // Optimisation: Use the empty string to signal "leave character unchanged".
+ // Reduces the transfer size by ~50%. Reduces browser memory cost as well.
+ $data[$char] = '';
+ } else {
+ $data[$char] = $phpUpper;
+ }
+ }
+ }
+
+ $mappingJson = str_replace( ' ', "\t",
+ json_encode( $data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE )
+ ) . "\n";
+ $outputPath = '/resources/src/mediawiki.Title/phpCharToUpper.json';
+ $file = fopen( $IP . $outputPath, 'w' );
+ if ( !$file ) {
+ $this->fatalError( "Unable to write file \"$IP$outputPath\"" );
+ }
+ fwrite( $file, $mappingJson );
+
+ $this->output( count( $data ) . " differences found.\n" );
+ $this->output( "Written to $outputPath\n" );