- $char = mb_convert_encoding( '&#' . $i . ';', 'UTF-8', 'HTML-ENTITIES' );
- $phpUpper = mb_strtoupper( $char );
- $jsUpper = $jsUpperChars[$i];
- if ( $jsUpper !== $phpUpper ) {
- $data[$char] = $phpUpper;
+
+ public function execute() {
+ global $wgContLang;
+
+ $data = [];
+
+ $result = Shell::command( [ 'node', __DIR__ . '/generateJsToUpperCaseList.js' ] )
+ // Node allocates lots of memory
+ ->limits( [ 'memory' => 1024 * 1024 ] )
+ ->execute();
+
+ if ( $result->getExitcode() !== 0 ) {
+ $this->output( $result->getStderr() );
+ return;
+ }
+
+ $jsUpperChars = json_decode( $result->getStdout() );
+
+ for ( $i = 0; $i <= 0x10ffff; $i++ ) {
+ if ( $i >= 0xd800 && $i <= 0xdfff ) {
+ // Skip surrogate pairs
+ continue;
+ }
+ $char = \UtfNormal\Utils::codepointToUtf8( $i );
+ $phpUpper = $wgContLang->ucfirst( $char );
+ $jsUpper = $jsUpperChars[$i];
+ if ( $jsUpper !== $phpUpper ) {
+ $data[$char] = $phpUpper;
+ }
+ }
+
+ $this->output( str_replace( ' ', "\t",
+ json_encode( $data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE )
+ ) . "\n" );