Merge "For consistency of the interface, don't do expensive checks on page view."
[lhc/web/wiklou.git] / maintenance / benchmarks / bench_utf8_title_check.php
1 <?php
2 /**
3 * @file
4 * @ingroup Benchmark
5 */
6
7 require_once( dirname( __FILE__ ) . '/Benchmarker.php' );
8
9 /**
10 * This little benchmark executes the regexp used in Language->checkTitleEncoding() and compares its execution time
11 * against that of mb_check_encoding, if available.
12 */
13 class bench_utf8_title_check extends Benchmarker {
14
15 private $canRun;
16
17 private $data;
18
19 public function __construct() {
20 parent::__construct();
21
22 $this->data = array (
23 "",
24 "United States of America", // 7bit ASCII
25 "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
26 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
27 // This comes from bug 36839
28 "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
29 . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
30 . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
31 . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C"
32 . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C"
33 . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C"
34 . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C"
35 . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C"
36 . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C"
37 . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C"
38 . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C"
39 . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C"
40 . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C"
41 . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis"
42 );
43
44 $this->canRun = function_exists ( 'mb_check_encoding' );
45
46 if ( $this->canRun ) {
47 $this->mDescription = "Benchmark for using a regexp vs. mb_check_encoding to check for UTF-8 encoding.";
48 mb_internal_encoding( 'UTF-8' );
49 } else {
50 $this->mDescription = "CANNOT RUN benchmark using mb_check_encoding: function not available.";
51 }
52 }
53
54 public function execute() {
55 if ( !$this->canRun ) {
56 return;
57 }
58 $benchmarks = array();
59 foreach ($this->data as $val) {
60 $benchmarks[] = array(
61 'function' => array( $this, 'use_regexp' ),
62 'args' => array( rawurldecode ( $val ) )
63 );
64 $benchmarks[] = array(
65 'function' => array( $this, 'use_regexp_non_capturing' ),
66 'args' => array( rawurldecode ( $val ) )
67 );
68 $benchmarks[] = array(
69 'function' => array( $this, 'use_regexp_once_only' ),
70 'args' => array( rawurldecode ( $val ) )
71 );
72 $benchmarks[] = array(
73 'function' => array( $this, 'use_mb_check_encoding' ),
74 'args' => array( rawurldecode ( $val ) )
75 );
76 }
77 $this->bench( $benchmarks );
78 print $this->getFormattedResults();
79 }
80
81 private $isutf8;
82
83 function use_regexp( $s ) {
84 $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
85 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
86 }
87
88 function use_regexp_non_capturing( $s ) {
89 // Same as above with a non-capturing subgroup.
90 $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
91 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
92 }
93
94 function use_regexp_once_only( $s ) {
95 // Same as above with a once-only subgroup.
96 $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
97 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
98 }
99
100 function use_mb_check_encoding( $s ) {
101 $this->isutf8 = mb_check_encoding( $s, 'UTF-8' );
102 }
103
104 }
105
106 $maintClass = 'bench_utf8_title_check';
107 require_once( RUN_MAINTENANCE_IF_MAIN );