<?php
/**
+ * Statistics about the localisation.
+ *
* @package MediaWiki
* @subpackage Maintenance
*
* @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
* @author Ashar Voultoiz <thoane@altern.org>
- * @bug 2499
*
* Output is posted from time to time on:
* http://meta.wikimedia.org/wiki/Localization_statistics
*/
-/** */
-require_once('commandLine.inc');
-if( isset($options['help']) ) { usage(); die(); }
-// default output is WikiText
-if( !isset($options['output']) ) { $options['output']='wiki'; }
+require_once( 'commandLine.inc' );
+require_once( 'languages.inc' );
+if ( isset( $options['help'] ) ) {
+ showUsage();
+}
+# Default output is WikiText
+if ( !isset( $options['output'] ) ) {
+ $options['output'] = 'wiki';
+}
/** Print a usage message*/
-function usage() {
-print <<<END
-Usage: php transstat.php [--help] [--output:csv|text|wiki] [--noredundant]
- --help : this helpful message
- --output : select an output engine one of:
- * 'csv' : Comma Separated Values.
- * 'wiki' : MediaWiki syntax.
- * 'text' : Text with tabs.
- Default output is 'wiki'
- --noredundant : do NOT calculate redundant (takes some time)
-
+function showUsage() {
+ print <<<END
+Usage: php transstat.php [--help] [--output=csv|text|wiki]
+ --help : this helpful message
+ --output : select an output engine one of:
+ * 'csv' : Comma Separated Values.
+ * 'wiki' : MediaWiki syntax (default).
+ * 'metawiki' : MediaWiki syntax used for Meta-Wiki.
+ * 'text' : Text with tabs.
+Example: php maintenance/transstat.php --output=text
END;
+ exit();
}
-
/** A general output object. Need to be overriden */
class statsOutput {
- var $output; // buffer that contain the text
- function statsOutput() { $this->output='';}
- function getContent() { return $this->output;}
-
- function formatPercent($subset, $total, $revert=false, $accuracy=2) {
+ function formatPercent( $subset, $total, $revert = false, $accuracy = 2 ) {
return @sprintf( '%.' . $accuracy . 'f%%', 100 * $subset / $total );
}
- // Override the next methods
- function heading() {}
- function footer() {}
- function blockstart() {}
- function blockend() {}
- function element($in, $heading=false) {}
+ # Override the following methods
+ function heading() {
+ }
+ function footer() {
+ }
+ function blockstart() {
+ }
+ function blockend() {
+ }
+ function element( $in, $heading = false ) {
+ }
}
-
/** Outputs WikiText */
class wikiStatsOutput extends statsOutput {
function heading() {
- $this->output .= "{| border=2 cellpadding=4 cellspacing=0 style=\"background: #f9f9f9; border: 1px #aaa solid; border-collapse: collapse;\" width=100%\n";
+ global $IP;
+ $version = SpecialVersion::getVersion( $IP );
+ echo "'''Statistics based on:''' <tt>" . $version . "</tt>\n\n";
+ echo "'''Note:''' These statistics can be generated by running <code>php maintenance/transstat.php</code>.\n\n";
+ echo "For additional information on specific languages (the message names, the actual problems, etc.), run <code>php maintenance/checkLanguage.php --lang=foo</code>.\n\n";
+ echo "{| border=2 cellpadding=4 cellspacing=0 style=\"background: #f9f9f9; border: 1px #aaa solid; border-collapse: collapse;\" width=100%\n";
}
- function footer() { $this->output .= "|}\n"; }
- function blockstart() { $this->output .= "|-\n"; }
- function blockend() { $this->output .= ''; }
- function element($in, $heading = false) {
- $this->output .= ($heading ? '!' : '|') . " $in\n";
+ function footer() {
+ echo "|}\n";
}
- function formatPercent($subset, $total, $revert=false, $accuracy=2) {
+ function blockstart() {
+ echo "|-\n";
+ }
+ function blockend() {
+ echo '';
+ }
+ function element( $in, $heading = false ) {
+ echo ($heading ? '!' : '|') . " $in\n";
+ }
+ function formatPercent( $subset, $total, $revert = false, $accuracy = 2 ) {
$v = @round(255 * $subset / $total);
- if($revert) $v = 255 - $v;
- if($v < 128) {
- // red to yellow
+ if ( $revert ) {
+ $v = 255 - $v;
+ }
+ if ( $v < 128 ) {
+ # Red to Yellow
$red = 'FF';
- $green = sprintf('%02X', 2*$v);
+ $green = sprintf( '%02X', 2 * $v );
} else {
- // yellow to green
- $red = sprintf('%02X', 2*(255 -$v) );
+ # Yellow to Green
+ $red = sprintf('%02X', 2 * ( 255 - $v ) );
$green = 'FF';
}
- $blue = '00';
- $color = $red.$green.$blue;
+ $blue = '00';
+ $color = $red . $green . $blue;
- $percent = statsOutput::formatPercent($subset, $total, $revert, $accuracy);
- return 'bgcolor="#'.$color.'" | '.$percent;
+ $percent = statsOutput::formatPercent( $subset, $total, $revert, $accuracy );
+ return 'bgcolor="#'. $color .'" | '. $percent;
+ }
+}
+
+/** Outputs WikiText and appends category only used for Meta-Wiki */
+class metawikiStatsOutput extends wikiStatsOutput {
+ function footer() {
+ echo "|}\n\n";
+ echo "[[Category:Localisation|Statistics]]\n";
}
}
/** Output text. To be used on a terminal for example. */
class textStatsOutput extends statsOutput {
- function element($in, $heading = false) {
- $this->output .= $in."\t";
+ function element( $in, $heading = false ) {
+ echo $in."\t";
+ }
+ function blockend() {
+ echo "\n";
}
- function blockend(){ $this->output .="\n";}
}
/** csv output. Some people love excel */
class csvStatsOutput extends statsOutput {
- function element($in, $heading = false) {
- $this->output .= $in.";";
+ function element( $in, $heading = false ) {
+ echo $in . ";";
}
- function blockend(){ $this->output .="\n";}
-}
-
-
-/** FIXME: This takes an obscene amount of time */
-if(isset($options['noredundant'])) {
- function redundant(&$arr) { return 'NC'; }
-} else {
- function redundant(&$arr) {
- global $wgAllMessagesEn;
-
- $redundant = 0;
- foreach(array_keys($arr) as $key) {
- if ( ! array_key_exists( $key, $wgAllMessagesEn) )
- ++$redundant;
- }
- return $redundant;
+ function blockend() {
+ echo "\n";
}
}
-// Select an output engine
-switch ($options['output']) {
- case 'csv':
- $out = new csvStatsOutput(); break;
- case 'text':
- $out = new textStatsOutput(); break;
+# Select an output engine
+switch ( $options['output'] ) {
case 'wiki':
- $out = new wikiStatsOutput(); break;
+ $wgOut = new wikiStatsOutput();
+ break;
+ case 'metawiki':
+ $wgOut = new metawikiStatsOutput();
+ break;
+ case 'text':
+ $wgOut = new textStatsOutput();
+ break;
+ case 'csv':
+ $wgOut = new csvStatsOutput();
+ break;
default:
- usage(); die();
- break;
+ showUsage();
}
-// available language files
-$langs = array();
-$dir = opendir("$IP/languages");
-while ($file = readdir($dir)) {
- if (preg_match("/Language(.*?)\.php$/", $file, $m)) {
- $langs[] = $m[1];
+# Languages
+$wgLanguages = new languages();
+
+# Header
+$wgOut->heading();
+$wgOut->blockstart();
+$wgOut->element( 'Language', true );
+$wgOut->element( 'Translated', true );
+$wgOut->element( '%', true );
+$wgOut->element( 'Obsolete', true );
+$wgOut->element( '%', true );
+$wgOut->element( 'Problematic', true );
+$wgOut->element( '%', true );
+$wgOut->blockend();
+
+$wgGeneralMessages = $wgLanguages->getGeneralMessages();
+$wgRequiredMessagesNumber = count( $wgGeneralMessages['required'] );
+
+foreach ( $wgLanguages->getLanguages() as $code ) {
+ # Don't check English or RTL English
+ if ( $code == 'en' || $code == 'enRTL' ) {
+ continue;
}
-}
-sort($langs);
-// Cleanup file list
-foreach($langs as $key => $lang) {
- if ($lang == 'Utf8' || $lang == '' || $lang == 'Converter')
- unset($langs[$key]);
+ # Calculate the numbers
+ $language = $wgContLang->getLanguageName( $code );
+ $messages = $wgLanguages->getMessages( $code );
+ $messagesNumber = count( $messages['translated'] );
+ $requiredMessagesNumber = count( $messages['required'] );
+ $requiredMessagesPercent = $wgOut->formatPercent( $requiredMessagesNumber, $wgRequiredMessagesNumber );
+ $obsoleteMessagesNumber = count( $messages['obsolete'] );
+ $obsoleteMessagesPercent = $wgOut->formatPercent( $obsoleteMessagesNumber, $messagesNumber, true );
+ $messagesWithoutVariables = $wgLanguages->getMessagesWithoutVariables( $code );
+ $emptyMessages = $wgLanguages->getEmptyMessages( $code );
+ $messagesWithWhitespace = $wgLanguages->getMessagesWithWhitespace( $code );
+ $nonXHTMLMessages = $wgLanguages->getNonXHTMLMessages( $code );
+ $messagesWithWrongChars = $wgLanguages->getMessagesWithWrongChars( $code );
+ $problematicMessagesNumber = count( array_unique( array_merge( $messagesWithoutVariables, $emptyMessages, $messagesWithWhitespace, $nonXHTMLMessages, $messagesWithWrongChars ) ) );
+ $problematicMessagesPercent = $wgOut->formatPercent( $problematicMessagesNumber, $messagesNumber, true );
+
+ # Output them
+ $wgOut->blockstart();
+ $wgOut->element( "$language ($code)" );
+ $wgOut->element( "$requiredMessagesNumber/$wgRequiredMessagesNumber" );
+ $wgOut->element( $requiredMessagesPercent );
+ $wgOut->element( "$obsoleteMessagesNumber/$messagesNumber" );
+ $wgOut->element( $obsoleteMessagesPercent );
+ $wgOut->element( "$problematicMessagesNumber/$messagesNumber" );
+ $wgOut->element( $problematicMessagesPercent );
+ $wgOut->blockend();
}
-// Load message and compute stuff
-$msgs = array();
-foreach($langs as $lang) {
- // Since they aren't loaded by default..
- require_once( 'languages/Language' . $lang . '.php' );
- $arr = 'wgAllMessages' . $lang;
- if (@is_array($$arr)) { // Some of them don't have a message array
- $msgs[$wgContLang->lcfirst($lang)] = array(
- 'total' => count($$arr),
- 'redundant' => redundant($$arr),
- );
- } else {
- $msgs[$wgContLang->lcfirst($lang)] = array(
- 'total' => 0,
- 'redundant' => 0,
- );
- }
-}
-
-// Top entry
-$out->heading();
-$out->blockstart();
-$out->element('Language', true);
-$out->element('Translated', true);
-$out->element('%', true);
-$out->element('Untranslated', true);
-$out->element('%', true);
-$out->element('Redundant', true);
-$out->element('%', true);
-$out->blockend();
-
-// Generate rows
-foreach($msgs as $lang => $stats) {
- $out->blockstart();
- $out->element($wgContLang->getLanguageName(strtr($lang, '_', '-')) . " ($lang)"); // Language
- $out->element($stats['total'] . '/' . $msgs['en']['total']); // Translated
- $out->element($out->formatPercent($stats['total'], $msgs['en']['total'])); // % Translated
- $out->element($msgs['en']['total'] - $stats['total']); // Untranslated
- $out->element($out->formatPercent($msgs['en']['total'] - $stats['total'], $msgs['en']['total'], true)); // % Untranslated
- if($stats['redundant'] =='NC') {
- $out->element('NC');
- $out->element('NC');
- } else {
- $out->element($stats['redundant'] . '/' . $stats['total']); // Redundant
- $out->element($out->formatPercent($stats['redundant'], $stats['total'],true)); // % Redundant
- }
- $out->blockend();
-}
-$out->footer();
+# Footer
+$wgOut->footer();
-// Final output
-echo $out->getContent();
?>