Tweaks for the "Whole HTML Validation" feature:
[lhc/web/wiklou.git] / includes / OutputHandler.php
1 <?php
2
3 /**
4 * Standard output handler for use with ob_start
5 */
6 function wfOutputHandler( $s ) {
7 global $wgDisableOutputCompression, $wgValidateAllHtml;
8 $s = wfMangleFlashPolicy( $s );
9 if ( $wgValidateAllHtml ) {
10 $headers = apache_response_headers();
11 $isHTML = true;
12 foreach ( $headers as $name => $value ) {
13 if ( strtolower( $name ) == 'content-type' && strpos( $value, 'text/html' ) === false && strpos( $value, 'application/xhtml+xml' ) === false ) {
14 $isHTML = false;
15 break;
16 }
17 }
18 if ( $isHTML ) {
19 $s = wfHtmlValidationHandler( $s );
20 }
21 }
22 if ( !$wgDisableOutputCompression && !ini_get( 'zlib.output_compression' ) ) {
23 if ( !defined( 'MW_NO_OUTPUT_COMPRESSION' ) ) {
24 $s = wfGzipHandler( $s );
25 }
26 if ( !ini_get( 'output_handler' ) ) {
27 wfDoContentLength( strlen( $s ) );
28 }
29 }
30 return $s;
31 }
32
33 /**
34 * Get the "file extension" that some client apps will estimate from
35 * the currently-requested URL.
36 * This isn't on WebRequest because we need it when things aren't initialized
37 * @private
38 */
39 function wfRequestExtension() {
40 /// @fixme -- this sort of dupes some code in WebRequest::getRequestUrl()
41 if( isset( $_SERVER['REQUEST_URI'] ) ) {
42 // Strip the query string...
43 list( $path ) = explode( '?', $_SERVER['REQUEST_URI'], 2 );
44 } elseif( isset( $_SERVER['SCRIPT_NAME'] ) ) {
45 // Probably IIS. QUERY_STRING appears separately.
46 $path = $_SERVER['SCRIPT_NAME'];
47 } else {
48 // Can't get the path from the server? :(
49 return '';
50 }
51
52 $period = strrpos( $path, '.' );
53 if( $period !== false ) {
54 return strtolower( substr( $path, $period ) );
55 }
56 return '';
57 }
58
59 /**
60 * Handler that compresses data with gzip if allowed by the Accept header.
61 * Unlike ob_gzhandler, it works for HEAD requests too.
62 */
63 function wfGzipHandler( $s ) {
64 if( !function_exists( 'gzencode' ) || headers_sent() ) {
65 return $s;
66 }
67
68 $ext = wfRequestExtension();
69 if( $ext == '.gz' || $ext == '.tgz' ) {
70 // Don't do gzip compression if the URL path ends in .gz or .tgz
71 // This confuses Safari and triggers a download of the page,
72 // even though it's pretty clearly labeled as viewable HTML.
73 // Bad Safari! Bad!
74 return $s;
75 }
76
77 if( isset( $_SERVER['HTTP_ACCEPT_ENCODING'] ) ) {
78 $tokens = preg_split( '/[,; ]/', $_SERVER['HTTP_ACCEPT_ENCODING'] );
79 if ( in_array( 'gzip', $tokens ) ) {
80 header( 'Content-Encoding: gzip' );
81 $s = gzencode( $s, 3 );
82 }
83 }
84
85 // Set vary header if it hasn't been set already
86 $headers = headers_list();
87 $foundVary = false;
88 foreach ( $headers as $header ) {
89 if ( substr( $header, 0, 5 ) == 'Vary:' ) {
90 $foundVary = true;
91 break;
92 }
93 }
94 if ( !$foundVary ) {
95 header( 'Vary: Accept-Encoding' );
96 header( 'X-Vary-Options: Accept-Encoding;list-contains=gzip' );
97 }
98 return $s;
99 }
100
101 /**
102 * Mangle flash policy tags which open up the site to XSS attacks.
103 */
104 function wfMangleFlashPolicy( $s ) {
105 # Avoid weird excessive memory usage in PCRE on big articles
106 if ( preg_match( '/\<\s*cross-domain-policy\s*\>/i', $s ) ) {
107 return preg_replace( '/\<\s*cross-domain-policy\s*\>/i', '<NOT-cross-domain-policy>', $s );
108 } else {
109 return $s;
110 }
111 }
112
113 /**
114 * Add a Content-Length header if possible. This makes it cooperate with squid better.
115 */
116 function wfDoContentLength( $length ) {
117 if ( !headers_sent() && $_SERVER['SERVER_PROTOCOL'] == 'HTTP/1.0' ) {
118 header( "Content-Length: $length" );
119 }
120 }
121
122 /**
123 * Replace the output with an error if the HTML is not valid
124 */
125 function wfHtmlValidationHandler( $s ) {
126 global $IP, $wgTidyInternal, $wgTidyConf;
127 if ( $wgTidyInternal ) {
128 $tidy = new tidy;
129
130 $tidy->parseString( $s, $wgTidyConf, 'utf8' );
131 if ( $tidy->getStatus() == 0 ) {
132 return $s;
133 }
134
135 $errors = $tidy->errorBuffer;
136 } else {
137 // Copied from Parser::externalTidy();
138 global $wgTidyBin, $wgTidyOpts;
139
140 $cleansource = '';
141 $opts = ' -utf8';
142
143 $descriptorspec = array(
144 0 => array( 'pipe', 'r' ),
145 1 => array( 'file', wfGetNull(), 'a' ),
146 2 => array( 'pipe', 'w' )
147 );
148 $pipes = array();
149 if( function_exists( 'proc_open' ) ) {
150 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
151 if ( is_resource( $process ) ) {
152 fwrite( $pipes[0], $s );
153 fclose( $pipes[0] );
154 while( !feof( $pipes[2] ) ) {
155 $errors .= fgets( $pipes[2], 1024 );
156 }
157 fclose( $pipes[2] );
158 $ret = proc_close( $process );
159 if( ( $ret < 0 && $errors == '' ) || $ret == 0 )
160 return $s;
161 } else {
162 return $s;
163 }
164
165 } else {
166 return $s;
167 }
168 }
169
170 header( 'Cache-Control: no-cache' );
171
172 $out = <<<EOT
173 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
174 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
175 <head>
176 <title>HTML validation error</title>
177 <style>
178 .highlight { background-color: #ffc }
179 li { white-space: pre }
180 </style>
181 </head>
182 <body>
183 <h1>HTML validation error</h1>
184 <ul>
185 EOT;
186
187 $error = strtok( $errors, "\n" );
188 $badLines = array();
189 while ( $error !== false ) {
190 if ( preg_match( '/^line (\d+)/', $error, $m ) ) {
191 $lineNum = intval( $m[1] );
192 $badLines[$lineNum] = true;
193 $out .= "<li><a href=\"#line-{$lineNum}\">" . htmlspecialchars( $error ) . "</a></li>\n";
194 }
195 $error = strtok( "\n" );
196 }
197
198 $out .= '</ul>';
199 $out .= '<pre>' . htmlspecialchars( $errors ) . '</pre>';
200 $out .= '<ol>';
201 $line = strtok( $s, "\n" );
202 $i = 1;
203 while ( $line !== false ) {
204 if ( isset( $badLines[$i] ) ) {
205 $out .= "<li class=\"highlight\" id=\"line-$i\">";
206 } else {
207 $out .= '<li>';
208 }
209 $out .= htmlspecialchars( $line ) . '</li>';
210 $line = strtok( "\n" );
211 $i++;
212 }
213 $out .= '</ol></body></html>';
214 return $out;
215 }