Add benchmarkTidy.php, to benchmark tidy drivers
authorTim Starling <tstarling@wikimedia.org>
Thu, 23 Mar 2017 01:51:43 +0000 (12:51 +1100)
committerKrinkle <krinklemail@gmail.com>
Fri, 21 Apr 2017 01:02:22 +0000 (01:02 +0000)
Plus representative input file

Change-Id: I254793fc55c57a98c07ae1e4c27e6005965c9a20

autoload.php
includes/parser/MWTidy.php
maintenance/benchmarks/README
maintenance/benchmarks/australia-untidy.html.gz [new file with mode: 0644]
maintenance/benchmarks/benchmarkTidy.php [new file with mode: 0644]

index ba5b774..27c27c5 100644 (file)
@@ -192,6 +192,7 @@ $wgAutoloadLocalClasses = [
        'BenchmarkHooks' => __DIR__ . '/maintenance/benchmarks/benchmarkHooks.php',
        'BenchmarkParse' => __DIR__ . '/maintenance/benchmarks/benchmarkParse.php',
        'BenchmarkPurge' => __DIR__ . '/maintenance/benchmarks/benchmarkPurge.php',
+       'BenchmarkTidy' => __DIR__ . '/maintenance/benchmarks/benchmarkTidy.php',
        'Benchmarker' => __DIR__ . '/maintenance/benchmarks/Benchmarker.php',
        'BitmapHandler' => __DIR__ . '/includes/media/Bitmap.php',
        'BitmapHandler_ClientOnly' => __DIR__ . '/includes/media/Bitmap_ClientOnly.php',
index 01bf2d0..ffc884e 100644 (file)
@@ -83,7 +83,7 @@ class MWTidy {
        /**
         * @return bool|\MediaWiki\Tidy\TidyDriverBase
         */
-       protected static function singleton() {
+       public static function singleton() {
                global $wgUseTidy, $wgTidyInternal, $wgTidyConf, $wgDebugTidy, $wgTidyConfig,
                        $wgTidyBin, $wgTidyOpts;
 
index c021abd..27da9de 100644 (file)
@@ -5,3 +5,6 @@ To get somehow accurate result, you might want to bound the PHP process
 to a specific CPU with `taskset` and raise its priority with `nice`. Example:
 
  $ taskset 1 nice -n-10 php bench_wfIsWindows.php
+
+australia-untidy.html.gz contains representative input text for
+benchmarkTidy.php. It needs to be decompressed before use.
diff --git a/maintenance/benchmarks/australia-untidy.html.gz b/maintenance/benchmarks/australia-untidy.html.gz
new file mode 100644 (file)
index 0000000..148481d
Binary files /dev/null and b/maintenance/benchmarks/australia-untidy.html.gz differ
diff --git a/maintenance/benchmarks/benchmarkTidy.php b/maintenance/benchmarks/benchmarkTidy.php
new file mode 100644 (file)
index 0000000..1479174
--- /dev/null
@@ -0,0 +1,78 @@
+<?php
+
+require __DIR__ . '/../Maintenance.php';
+
+class BenchmarkTidy extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->addOption( 'file', 'A filename which contains the input text', true, true );
+               $this->addOption( 'driver', 'The Tidy driver name, or false to use the configured instance',
+                       false,  true );
+               $this->addOption( 'tidy-config', 'JSON encoded value for the tidy configuration array',
+                       false, true );
+       }
+
+       public function execute() {
+               $html = file_get_contents( $this->getOption( 'file' ) );
+               if ( $html === false ) {
+                       $this->error( "Unable to open input file", 1 );
+               }
+               if ( $this->hasOption( 'driver' ) || $this->hasOption( 'tidy-config' ) ) {
+                       $config = json_decode( $this->getOption( 'tidy-config', '{}' ), true );
+                       if ( !is_array( $config ) ) {
+                               $this->error( "Invalid JSON tidy config", 1 );
+                       }
+                       $config += [ 'driver' => $this->getOption( 'driver', 'RemexHtml' ) ];
+                       $driver = MWTidy::factory( $config );
+               } else {
+                       $driver = MWTidy::singleton();
+                       if ( !$driver ) {
+                               $this->error( "Tidy disabled or not installed", 1 );
+                       }
+               }
+
+               $this->benchmark( $driver, $html );
+       }
+
+       private function benchmark( $driver, $html ) {
+               global $wgContLang;
+
+               $times = [];
+               $innerCount = 10;
+               $outerCount = 10;
+               for ( $j = 1; $j <= $outerCount; $j++ ) {
+                       $t = microtime( true );
+                       for ( $i = 0; $i < $innerCount; $i++ ) {
+                               $driver->tidy( $html );
+                               print $wgContLang->formatSize( memory_get_usage( true ) ) . "\n";
+                       }
+                       $t = ( ( microtime( true ) - $t ) / $innerCount ) * 1000;
+                       $times[] = $t;
+                       print "Run $j: $t\n";
+               }
+               print "\n";
+
+               sort( $times, SORT_NUMERIC );
+               $n = $outerCount;
+               $min = $times[0];
+               $max = end( $times );
+               if ( $n % 2 ) {
+                       $median = $times[ ( $n - 1 ) / 2 ];
+               } else {
+                       $median = ( $times[$n / 2] + $times[$n / 2 - 1] ) / 2;
+               }
+               $mean = array_sum( $times ) / $n;
+
+               print "Minimum: $min ms\n";
+               print "Median: $median ms\n";
+               print "Mean: $mean ms\n";
+               print "Maximum: $max ms\n";
+               print "Memory usage: " .
+                       $wgContLang->formatSize( memory_get_usage( true ) ) . "\n";
+               print "Peak memory usage: " .
+                       $wgContLang->formatSize( memory_get_peak_usage( true ) ) . "\n";
+       }
+}
+
+$maintClass = 'BenchmarkTidy';
+require RUN_MAINTENANCE_IF_MAIN;