- // Only some HTML tags are understood with params by MediaWiki, the rest are ignored.
- // List the tags that accept params below, as well as what those params are.
- public static $data = array(
- "B" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "CAPTION" => array("CLASS", "ID", "STYLE", "align", "lang", "dir", "title"),
- "CENTER" => array("CLASS", "STYLE", "ID", "lang", "dir", "title"),
- "DIV" => array("CLASS", "STYLE", "ID", "align", "lang", "dir", "title"),
- "FONT" => array("CLASS", "STYLE", "ID", "lang", "dir", "title", "face", "size", "color"),
- "H1" => array("STYLE", "CLASS", "ID", "align", "lang", "dir", "title"),
- "H2" => array("STYLE", "CLASS", "ID", "align", "lang", "dir", "title"),
- "HR" => array("STYLE", "CLASS", "ID", "WIDTH", "lang", "dir", "title", "size", "noshade"),
- "LI" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "type", "value"),
- "TABLE" => array("STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "BORDER", "CELLPADDING",
- "CELLSPACING", "lang", "dir", "title", "summary", "frame", "rules"),
- "TD" => array("STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "COLSPAN", "ROWSPAN",
- "VALIGN", "abbr", "axis", "headers", "scope", "nowrap", "height", "lang",
- "dir", "title", "char", "charoff"),
- "TH" => array("STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "COLSPAN", "ROWSPAN",
- "VALIGN", "abbr", "axis", "headers", "scope", "nowrap", "height", "lang",
- "dir", "title", "char", "charoff"),
- "TR" => array("CLASS", "STYLE", "ID", "BGCOLOR", "ALIGN", "VALIGN", "lang", "dir", "title", "char", "charoff"),
- "UL" => array("CLASS", "STYLE", "ID", "lang", "dir", "title", "type"),
- "P" => array("style", "class", "id", "align", "lang", "dir", "title"),
- "blockquote" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "cite"),
- "span" => array("CLASS", "ID", "STYLE", "align", "lang", "dir", "title"),
- "code" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "tt" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "small" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "big" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "s" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "u" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "del" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "datetime", "cite"),
- "ins" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "datetime", "cite"),
- "sub" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "sup" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "ol" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "type", "start"),
- "br" => array("CLASS", "ID", "STYLE", "title", "clear"),
- "cite" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "var" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "dl" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "ruby" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "rt" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "rp" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "dt" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "dl" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "em" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "strong" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "i" => array("CLASS", "ID", "STYLE", "lang", "dir", "title"),
- "thead" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign'),
- "tfoot" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign'),
- "tbody" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign'),
- "colgroup" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign', 'span', 'width'),
- "col" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign', 'span', 'width'),
- "pre" => array("CLASS", "ID", "STYLE", "lang", "dir", "title", "width"),
-
- // extension tags that accept parameters:
- "sort" => array("order", "class"),
- "ref" => array("name"),
- "categorytree" => array("hideroot", "mode", "style"),
- "chemform" => array("link", "wikilink", "query"),
- "section" => array("begin", "new"),
-
- // older MW transclusion.
- "transclude" => array("page"),
- );
-
- // The types of the HTML that we will be testing were defined above
- // Note: this needs to be initialized later to be equal to: array_keys(wikiFuzz::$data);
- // as such, it also needs to also be publicly modifiable.
- public static $types;
-
-
- // Some attribute values.
- static private $other = array("&","=",":","?","\"","\n","%n%n%n%n%n%n%n%n%n%n%n%n","\\");
- static private $ints = array(
- // various numbers
- "0","-1","127","-7897","89000","808080","90928345",
- "0xfffffff","ffff",
-
- // Different ways of saying: '
- "'", // Long UTF-8 Unicode encoding
- "'", // dec version.
- "'", // hex version.
- "§", // malformed hex variant, MSB not zero.
-
- // Different ways of saying: "
- """, // Long UTF-8 Unicode encoding
- """,
- """, // hex version.
- "¢", // malformed hex variant, MSB not zero.
-
- // Different ways of saying: <
- "<",
- "<", // Long UTF-8 Unicode encoding without semicolon (Mediawiki wants the colon)
- "<", // Long UTF-8 Unicode encoding with semicolon
- "<",
- "<", // hex version.
- "¼", // malformed hex variant, MSB not zero.
- "<", // mid-length hex version
- "<", // slightly longer hex version, with capital "X"
-
- // Different ways of saying: >
- ">",
- ">", // Long UTF-8 Unicode encoding
- ">",
- ">", // hex version.
- "¾", // malformed variant, MSB not zero.
-
- // Different ways of saying: [
- "[", // Long UTF-8 Unicode encoding
- "[",
- "[", // hex version.
-
- // Different ways of saying: {{
- "{{", // Long UTF-8 Unicode encoding
- "{{",
- "{{", // hex version.
-
- // Different ways of saying: |
- "|", // Long UTF-8 Unicode encoding
- "|",
- "|", // hex version.
- "ü", // malformed hex variant, MSB not zero.
-
- // a "lignature" - http://www.robinlionheart.com/stds/html4/spchars#ligature
- "‌"
- );
-
- // Defines various wiki-related bits of syntax, that can potentially cause
- // MediaWiki to do something other than just print that literal text.
- static private $ext = array(
- // links, templates, parameters.
- "[[", "]]", "{{", "}}", "|", "[", "]", "{{{", "}}}", "|]]",
-
- // wiki tables.
- "\n{|", "\n|}",
- "!",
- "\n!",
- "!!",
- "||",
- "\n|-", "| ", "\n|",
-
- // section headings.
- "=", "==", "===", "====", "=====", "======",
-
- // lists (ordered and unordered) and indentation.
- "\n*", "*", "\n:", ":",
- "\n#", "#",
-
- // definition lists (dl, dt, dd), newline, and newline with pre, and a tab.
- "\n;", ";", "\n ",
-
- // Whitespace: newline, tab, space.
- "\n", "\t", " ",
-
- // Some XSS attack vectors from http://ha.ckers.org/xss.html
- "	", // tab
- "
", // newline
- "
", // carriage return
- "\0", // null character
- "  ", // spaces and meta characters
- "'';!--\"<XSS>=&{()}", // compact injection of XSS & SQL tester
-
- // various NULL fields
- "%00",
- "�",
- "\0",
-
- // horizontal rule.
- "-----", "\n-----",
-
- // signature, redirect, bold, italics.
- "~~~~", "#REDIRECT [[", "'''", "''",
-
- // comments.
- "<!--", "-->",
-
- // quotes.
- "\"", "'",
-
- // tag start and tag end.
- "<", ">",
-
- // implicit link creation on URIs.
- "http://",
- "https://",
- "ftp://",
- "irc://",
- "news:",
- 'gopher://',
- 'telnet://',
- 'nntp://',
- 'worldwind://',
- 'mailto:',
-
- // images.
- "[[image:",
- ".gif",
- ".png",
- ".jpg",
- ".jpeg",
- 'thumbnail=',
- 'thumbnail',
- 'thumb=',
- 'thumb',
- 'right',
- 'none',
- 'left',
- 'framed',
- 'frame',
- 'enframed',
- 'centre',
- 'center',
- "Image:",
- "[[:Image",
- 'px',
- 'upright=',
- 'border',
-
- // misc stuff to throw at the Parser.
- '%08X',
- '/',
- ":x{|",
- "\n|+",
- "<noinclude>",
- "</noinclude>",
- " \302\273",
- " :",
- " !",
- " ;",
- "\302\253",
- "[[category:",
- "?=",
- "(",
- ")",
- "]]]",
- "../",
- "{{{{",
- "}}}}",
- "[[Special:",
- "<includeonly>",
- "</includeonly>",
- "<!--MWTEMPLATESECTION=",
- '<!--MWTOC-->',
-
- // implicit link creation on booknum, RFC, and PubMed ID usage (both with and without IDs)
- "ISBN 2",
- "RFC 000",
- "PMID 000",
- "ISBN ",
- "RFC ",
- "PMID ",
-
- // magic words:
- '__NOTOC__',
- '__FORCETOC__',
- '__NOEDITSECTION__',
- '__START__',
- '__NOTITLECONVERT__',
- '__NOCONTENTCONVERT__',
- '__END__',
- '__TOC__',
- '__NOTC__',
- '__NOCC__',
- "__FORCETOC__",
- "__NEWSECTIONLINK__",
- "__NOGALLERY__",
-
- // more magic words / internal templates.
- '{{PAGENAME}}',
- '{{PAGENAMEE}}',
- '{{NAMESPACE}}',
- "{{MSG:",
- "}}",
- "{{MSGNW:",
- "}}",
- "{{INT:",
- "}}",
- '{{SITENAME}}',
- "{{NS:",
- "}}",
- "{{LOCALURL:",
- "}}",
- "{{LOCALURLE:",
- "}}",
- "{{SCRIPTPATH}}",
- "{{GRAMMAR:gentiv|",
- "}}",
- "{{REVISIONID}}",
- "{{SUBPAGENAME}}",
- "{{SUBPAGENAMEE}}",
- "{{ns:0}}",
- "{{fullurle:",
- "}}",
- "{{subst::",
- "}}",
- "{{UCFIRST:",
- "}}",
- "{{UC:",
- '{{SERVERNAME}}',
- '{{SERVER}}',
- "{{RAW:",
- "}}",
- "{{PLURAL:",
- "}}",
- "{{LCFIRST:",
- "}}",
- "{{LC:",
- "}}",
- '{{CURRENTWEEK}}',
- '{{CURRENTDOW}}',
- "{{INT:{{LC:contribs-showhideminor}}|",
- "}}",
- "{{INT:googlesearch|",
- "}}",
- "{{BASEPAGENAME}}",
- "{{CONTENTLANGUAGE}}",
- "{{PAGESINNAMESPACE:}}",
- "{{#language:",
- "}}",
- "{{#special:",
- "}}",
- "{{#special:emailuser",
- "}}",
-
- // Some raw link for magic words.
- "{{NUMBEROFPAGES:R",
- "}}",
- "{{NUMBEROFUSERS:R",
- "}}",
- "{{NUMBEROFARTICLES:R",
- "}}",
- "{{NUMBEROFFILES:R",
- "}}",
- "{{NUMBEROFADMINS:R",
- "}}",
- "{{padleft:",
- "}}",
- "{{padright:",
- "}}",
- "{{DEFAULTSORT:",
- "}}",
-
- // internal Math "extension":
- "<math>",
- "</math>",
-
- // Parser extension functions:
- "{{#expr:",
- "{{#if:",
- "{{#ifeq:",
- "{{#ifexist:",
- "{{#ifexpr:",
- "{{#switch:",
- "{{#time:",
- "}}",
-
- // references table for the Cite extension.
- "<references/>",
-
- // Internal Parser tokens - try inserting some of these.
- "UNIQ25f46b0524f13e67NOPARSE",
- "UNIQ17197916557e7cd6-HTMLCommentStrip46238afc3bb0cf5f00000002",
- "\x07UNIQ17197916557e7cd6-HTMLCommentStrip46238afc3bb0cf5f00000002-QINU",
-
- // Inputbox extension:
- "<inputbox>\ntype=search\nsearchbuttonlabel=\n",
- "</inputbox>",
-
- // charInsert extension:
- "<charInsert>",
- "</charInsert>",
-
- // wikiHiero extension:
- "<hiero>",
- "</hiero>",
-
- // Image gallery:
- "<gallery>",
- "</gallery>",
-
- // FixedImage extension.
- "<fundraising/>",
-
- // Timeline extension: currently untested.
-
- // Nowiki:
- "<nOwIkI>",
- "</nowiki>",
-
- // an external image to test the external image displaying code
- "http://debian.org/Pics/debian.png",
-
- // LabeledSectionTransclusion extension.
- "{{#lstx:",
- "}}",
- "{{#lst:",
- "}}",
- "{{#lst:Main Page|",
- "}}"
- );
-
- /**
- ** Randomly returns one element of the input array.
- */
- static public function chooseInput(array $input) {
- $randindex = wikiFuzz::randnum(count($input) - 1);
- return $input[$randindex];
- }
-
- // Max number of parameters for HTML attributes.
- static private $maxparams = 10;
-
- /**
- ** Returns random number between finish and start.
- */
- static public function randnum($finish,$start=0) {
- return mt_rand($start,$finish);
- }
-
- /**
- ** Returns a mix of random text and random wiki syntax.
- */
- static private function randstring() {
- $thestring = "";
-
- for ($i=0; $i<40; $i++) {
- $what = wikiFuzz::randnum(1);
-
- if ($what == 0) { // include some random wiki syntax
- $which = wikiFuzz::randnum(count(wikiFuzz::$ext) - 1);
- $thestring .= wikiFuzz::$ext[$which];
- }
- else { // include some random text
- $char = INCLUDE_BINARY
- // Decimal version:
- // "&#" . wikiFuzz::randnum(255) . ";"
- // Hex version:
- ? "&#x" . str_pad(dechex(wikiFuzz::randnum(255)), wikiFuzz::randnum(2, 7), "0", STR_PAD_LEFT) . ";"
- // A truly binary version:
- // ? chr(wikiFuzz::randnum(0,255))
- : chr(wikiFuzz::randnum(126,32));
-
- $length = wikiFuzz::randnum(8);
- $thestring .= str_repeat ($char, $length);
- }
- }
- return $thestring;
- }
-
- /**
- ** Returns either random text, or random wiki syntax, or random data from "ints",
- ** or random data from "other".
- */
- static private function makestring() {
- $what = wikiFuzz::randnum(2);
- if ($what == 0) {
- return wikiFuzz::randstring();
- }
- elseif ($what == 1) {
- return wikiFuzz::$ints[wikiFuzz::randnum(count(wikiFuzz::$ints) - 1)];
- }
- else {
- return wikiFuzz::$other[wikiFuzz::randnum(count(wikiFuzz::$other) - 1)];
- }
- }
-
-
- /**
- ** Strips out the stuff that Mediawiki balks at in a page's title.
- ** Implementation copied/pasted from cleanupTable.inc & cleanupImages.php
- */
- static public function makeTitleSafe($str) {
- $legalTitleChars = " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF";
- return preg_replace_callback(
- "/([^$legalTitleChars])/",
- create_function(
- // single quotes are essential here,
- // or alternative escape all $ as \$
- '$matches',
- 'return sprintf( "\\x%02x", ord( $matches[1] ) );'
- ),
- $str );
- }
-
- /**
- ** Returns a string of fuzz text.
- */
- static private function loop() {
- switch ( wikiFuzz::randnum(3) ) {
- case 1: // an opening tag, with parameters.
- $string = "";
- $i = wikiFuzz::randnum(count(wikiFuzz::$types) - 1);
- $t = wikiFuzz::$types[$i];
- $arr = wikiFuzz::$data[$t];
- $string .= "<" . $t . " ";
- $num_params = min(wikiFuzz::$maxparams, count($arr));
- for ($z=0; $z<$num_params; $z++) {
- $badparam = $arr[wikiFuzz::randnum(count($arr) - 1)];
- $badstring = wikiFuzz::makestring();
- $string .= $badparam . "=" . wikiFuzz::getRandQuote() . $badstring . wikiFuzz::getRandQuote() . " ";
- }
- $string .= ">\n";
- return $string;
- case 2: // a closing tag.
- $i = wikiFuzz::randnum(count(wikiFuzz::$types) - 1);
- return "</". wikiFuzz::$types[$i] . ">";
- case 3: // a random string, between tags.
- return wikiFuzz::makeString();
- }
- return ""; // catch-all, should never be called.
- }
-
- /**
- ** Returns one of the three styles of random quote: ', ", and nothing.
- */
- static private function getRandQuote() {
- switch ( wikiFuzz::randnum(3) ) {
- case 1 : return "'";
- case 2 : return "\"";
- default: return "";
- }
- }
-
- /**
- ** Returns fuzz text, with the parameter indicating approximately how many lines of text you want.
- */
- static public function makeFuzz($maxtypes = 2) {
- $page = "";
- for ($k=0; $k<$maxtypes; $k++) {
- $page .= wikiFuzz::loop();
- }
- return $page;
- }
-}
-
-
-//////// MEDIAWIKI PAGES TO TEST, AND HOW TO TEST THEM ///////
+ // Only some HTML tags are understood with params by MediaWiki, the rest are ignored.
+ // List the tags that accept params below, as well as what those params are.
+ public static $data = array(
+ "B" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "CAPTION" => array( "CLASS", "ID", "STYLE", "align", "lang", "dir", "title" ),
+ "CENTER" => array( "CLASS", "STYLE", "ID", "lang", "dir", "title" ),
+ "DIV" => array( "CLASS", "STYLE", "ID", "align", "lang", "dir", "title" ),
+ "FONT" => array( "CLASS", "STYLE", "ID", "lang", "dir", "title", "face", "size", "color" ),
+ "H1" => array( "STYLE", "CLASS", "ID", "align", "lang", "dir", "title" ),
+ "H2" => array( "STYLE", "CLASS", "ID", "align", "lang", "dir", "title" ),
+ "HR" => array( "STYLE", "CLASS", "ID", "WIDTH", "lang", "dir", "title", "size", "noshade" ),
+ "LI" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", "type", "value" ),
+ "TABLE" => array( "STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "BORDER", "CELLPADDING",
+ "CELLSPACING", "lang", "dir", "title", "summary", "frame", "rules" ),
+ "TD" => array( "STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "COLSPAN", "ROWSPAN",
+ "VALIGN", "abbr", "axis", "headers", "scope", "nowrap", "height", "lang",
+ "dir", "title", "char", "charoff" ),
+ "TH" => array( "STYLE", "CLASS", "ID", "BGCOLOR", "WIDTH", "ALIGN", "COLSPAN", "ROWSPAN",
+ "VALIGN", "abbr", "axis", "headers", "scope", "nowrap", "height", "lang",
+ "dir", "title", "char", "charoff" ),
+ "TR" => array( "CLASS", "STYLE", "ID", "BGCOLOR", "ALIGN", "VALIGN", "lang", "dir", "title", "char", "charoff" ),
+ "UL" => array( "CLASS", "STYLE", "ID", "lang", "dir", "title", "type" ),
+ "P" => array( "style", "class", "id", "align", "lang", "dir", "title" ),
+ "blockquote" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", "cite" ),
+ "span" => array( "CLASS", "ID", "STYLE", "align", "lang", "dir", "title" ),
+ "code" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "tt" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "small" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "big" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "s" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "u" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "del" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", "datetime", "cite" ),
+ "ins" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", "datetime", "cite" ),
+ "sub" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "sup" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "ol" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", "type", "start" ),
+ "br" => array( "CLASS", "ID", "STYLE", "title", "clear" ),
+ "cite" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "var" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "dl" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "ruby" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "rt" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "rp" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "dt" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "dl" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "em" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "strong" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "i" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title" ),
+ "thead" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign' ),
+ "tfoot" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign' ),
+ "tbody" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign' ),
+ "colgroup" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign', 'span', 'width' ),
+ "col" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", 'align', 'char', 'charoff', 'valign', 'span', 'width' ),
+ "pre" => array( "CLASS", "ID", "STYLE", "lang", "dir", "title", "width" ),
+
+ // extension tags that accept parameters:
+ "sort" => array( "order", "class" ),
+ "ref" => array( "name" ),
+ "categorytree" => array( "hideroot", "mode", "style" ),
+ "chemform" => array( "link", "wikilink", "query" ),
+ "section" => array( "begin", "new" ),
+
+ // older MW transclusion.
+ "transclude" => array( "page" ),
+ );
+
+ // The types of the HTML that we will be testing were defined above
+ // Note: this needs to be initialized later to be equal to: array_keys(wikiFuzz::$data);
+ // as such, it also needs to also be publicly modifiable.
+ public static $types;
+
+
+ // Some attribute values.
+ static private $other = array( "&", "=", ":", "?", "\"", "\n", "%n%n%n%n%n%n%n%n%n%n%n%n", "\\" );
+ static private $ints = array(
+ // various numbers
+ "0", "-1", "127", "-7897", "89000", "808080", "90928345",
+ "0xfffffff", "ffff",
+
+ // Different ways of saying: '
+ "'", // Long UTF-8 Unicode encoding
+ "'", // dec version.
+ "'", // hex version.
+ "§", // malformed hex variant, MSB not zero.
+
+ // Different ways of saying: "
+ """, // Long UTF-8 Unicode encoding
+ """,
+ """, // hex version.
+ "¢", // malformed hex variant, MSB not zero.
+
+ // Different ways of saying: <
+ "<",
+ "<", // Long UTF-8 Unicode encoding without semicolon (Mediawiki wants the colon)
+ "<", // Long UTF-8 Unicode encoding with semicolon
+ "<",
+ "<", // hex version.
+ "¼", // malformed hex variant, MSB not zero.
+ "<", // mid-length hex version
+ "<", // slightly longer hex version, with capital "X"
+
+ // Different ways of saying: >
+ ">",
+ ">", // Long UTF-8 Unicode encoding
+ ">",
+ ">", // hex version.
+ "¾", // malformed variant, MSB not zero.
+
+ // Different ways of saying: [
+ "[", // Long UTF-8 Unicode encoding
+ "[",
+ "[", // hex version.
+
+ // Different ways of saying: {{
+ "{{", // Long UTF-8 Unicode encoding
+ "{{",
+ "{{", // hex version.
+
+ // Different ways of saying: |
+ "|", // Long UTF-8 Unicode encoding
+ "|",
+ "|", // hex version.
+ "ü", // malformed hex variant, MSB not zero.
+
+ // a "lignature" - http://www.robinlionheart.com/stds/html4/spchars#ligature
+ // ‌ == ‌
+ "‌"
+ );
+
+ // Defines various wiki-related bits of syntax, that can potentially cause
+ // MediaWiki to do something other than just print that literal text.
+ static private $ext = array(
+ // links, templates, parameters.
+ "[[", "]]", "{{", "}}", "|", "[", "]", "{{{", "}}}", "|]]",
+
+ // wiki tables.
+ "\n{|", "\n|}",
+ "!",
+ "\n!",
+ "!!",
+ "||",
+ "\n|-", "| ", "\n|",
+
+ // section headings.
+ "=", "==", "===", "====", "=====", "======",
+
+ // lists (ordered and unordered) and indentation.
+ "\n*", "*", "\n:", ":",
+ "\n#", "#",
+
+ // definition lists (dl, dt, dd), newline, and newline with pre, and a tab.
+ "\n;", ";", "\n ",
+
+ // Whitespace: newline, tab, space.
+ "\n", "\t", " ",
+
+ // Some XSS attack vectors from http://ha.ckers.org/xss.html
+ "	", // tab
+ "
", // newline
+ "
", // carriage return
+ "\0", // null character
+ "  ", // spaces and meta characters
+ "'';!--\"<XSS>=&{()}", // compact injection of XSS & SQL tester
+
+ // various NULL fields
+ "%00",
+ "�",
+ "\0",
+
+ // horizontal rule.
+ "-----", "\n-----",
+
+ // signature, redirect, bold, italics.
+ "~~~~", "#REDIRECT [[", "'''", "''",
+
+ // comments.
+ "<!--", "-->",
+
+ // quotes.
+ "\"", "'",
+
+ // tag start and tag end.
+ "<", ">",
+
+ // implicit link creation on URIs.
+ "http://",
+ "https://",
+ "ftp://",
+ "irc://",
+ "news:",
+ 'gopher://',
+ 'telnet://',
+ 'nntp://',
+ 'worldwind://',
+ 'mailto:',
+
+ // images.
+ "[[image:",
+ ".gif",
+ ".png",
+ ".jpg",
+ ".jpeg",
+ 'thumbnail=',
+ 'thumbnail',
+ 'thumb=',
+ 'thumb',
+ 'right',
+ 'none',
+ 'left',
+ 'framed',
+ 'frame',
+ 'enframed',
+ 'centre',
+ 'center',
+ "Image:",
+ "[[:Image",
+ 'px',
+ 'upright=',
+ 'border',
+
+ // misc stuff to throw at the Parser.
+ '%08X',
+ '/',
+ ":x{|",
+ "\n|+",
+ "<noinclude>",
+ "</noinclude>",
+ " \302\273",
+ " :",
+ " !",
+ " ;",
+ "\302\253",
+ "[[category:",
+ "?=",
+ "(",
+ ")",
+ "]]]",
+ "../",
+ "{{{{",
+ "}}}}",
+ "[[Special:",
+ "<includeonly>",
+ "</includeonly>",
+ "<!--MWTEMPLATESECTION=",
+ '<!--MWTOC-->',
+
+ // implicit link creation on booknum, RFC, and PubMed ID usage (both with and without IDs)
+ "ISBN 2",
+ "RFC 000",
+ "PMID 000",
+ "ISBN ",
+ "RFC ",
+ "PMID ",
+
+ // magic words:
+ '__NOTOC__',
+ '__FORCETOC__',
+ '__NOEDITSECTION__',
+ '__START__',
+ '__NOTITLECONVERT__',
+ '__NOCONTENTCONVERT__',
+ '__END__',
+ '__TOC__',
+ '__NOTC__',
+ '__NOCC__',
+ "__FORCETOC__",
+ "__NEWSECTIONLINK__",
+ "__NOGALLERY__",
+
+ // more magic words / internal templates.
+ '{{PAGENAME}}',
+ '{{PAGENAMEE}}',
+ '{{NAMESPACE}}',
+ "{{MSG:",
+ "}}",
+ "{{MSGNW:",
+ "}}",
+ "{{INT:",
+ "}}",
+ '{{SITENAME}}',
+ "{{NS:",
+ "}}",
+ "{{LOCALURL:",
+ "}}",
+ "{{LOCALURLE:",
+ "}}",
+ "{{SCRIPTPATH}}",
+ "{{GRAMMAR:gentiv|",
+ "}}",
+ "{{REVISIONID}}",
+ "{{SUBPAGENAME}}",
+ "{{SUBPAGENAMEE}}",
+ "{{ns:0}}",
+ "{{fullurle:",
+ "}}",
+ "{{subst::",
+ "}}",
+ "{{UCFIRST:",
+ "}}",
+ "{{UC:",
+ '{{SERVERNAME}}',
+ '{{SERVER}}',
+ "{{RAW:",
+ "}}",
+ "{{PLURAL:",
+ "}}",
+ "{{LCFIRST:",
+ "}}",
+ "{{LC:",
+ "}}",
+ '{{CURRENTWEEK}}',
+ '{{CURRENTDOW}}',
+ "{{INT:{{LC:contribs-showhideminor}}|",
+ "}}",
+ "{{INT:googlesearch|",
+ "}}",
+ "{{BASEPAGENAME}}",
+ "{{CONTENTLANGUAGE}}",
+ "{{PAGESINNAMESPACE:}}",
+ "{{#language:",
+ "}}",
+ "{{#special:",
+ "}}",
+ "{{#special:emailuser",
+ "}}",
+
+ // Some raw link for magic words.
+ "{{NUMBEROFPAGES:R",
+ "}}",
+ "{{NUMBEROFUSERS:R",
+ "}}",
+ "{{NUMBEROFARTICLES:R",
+ "}}",
+ "{{NUMBEROFFILES:R",
+ "}}",
+ "{{NUMBEROFADMINS:R",
+ "}}",
+ "{{padleft:",
+ "}}",
+ "{{padright:",
+ "}}",
+ "{{DEFAULTSORT:",
+ "}}",
+
+ // internal Math "extension":
+ "<math>",
+ "</math>",
+
+ // Parser extension functions:
+ "{{#expr:",
+ "{{#if:",
+ "{{#ifeq:",
+ "{{#ifexist:",
+ "{{#ifexpr:",
+ "{{#switch:",
+ "{{#time:",
+ "}}",
+
+ // references table for the Cite extension.
+ "<references/>",
+
+ // Internal Parser tokens - try inserting some of these.
+ "UNIQ25f46b0524f13e67NOPARSE",
+ "UNIQ17197916557e7cd6-HTMLCommentStrip46238afc3bb0cf5f00000002",
+ "\x07UNIQ17197916557e7cd6-HTMLCommentStrip46238afc3bb0cf5f00000002-QINU",
+
+ // Inputbox extension:
+ "<inputbox>\ntype=search\nsearchbuttonlabel=\n",
+ "</inputbox>",
+
+ // charInsert extension:
+ "<charInsert>",
+ "</charInsert>",
+
+ // wikiHiero extension:
+ "<hiero>",
+ "</hiero>",
+
+ // Image gallery:
+ "<gallery>",
+ "</gallery>",
+
+ // FixedImage extension.
+ "<fundraising/>",
+
+ // Timeline extension: currently untested.
+
+ // Nowiki:
+ "<nOwIkI>",
+ "</nowiki>",
+
+ // an external image to test the external image displaying code
+ "http://debian.org/Pics/debian.png",
+
+ // LabeledSectionTransclusion extension.
+ "{{#lstx:",
+ "}}",
+ "{{#lst:",
+ "}}",
+ "{{#lst:Main Page|",
+ "}}"
+ );
+
+ /**
+ ** Randomly returns one element of the input array.
+ */
+ static public function chooseInput( array $input ) {
+ $randindex = wikiFuzz::randnum( count( $input ) - 1 );
+ return $input[$randindex];
+ }
+
+ // Max number of parameters for HTML attributes.
+ static private $maxparams = 10;
+
+ /**
+ * Returns random number between finish and start.
+ * @param $finish
+ * @param $start int
+ * @return int
+ */
+ static public function randnum( $finish, $start = 0 ) {
+ return mt_rand( $start, $finish );
+ }
+
+ /**
+ * Returns a mix of random text and random wiki syntax.
+ * @return string
+ */
+ static private function randstring() {
+ $thestring = "";
+
+ for ( $i = 0; $i < 40; $i++ ) {
+ $what = wikiFuzz::randnum( 1 );
+
+ if ( $what == 0 ) { // include some random wiki syntax
+ $which = wikiFuzz::randnum( count( wikiFuzz::$ext ) - 1 );
+ $thestring .= wikiFuzz::$ext[$which];
+ }
+ else { // include some random text
+ $char = INCLUDE_BINARY
+ // Decimal version:
+ // "&#" . wikiFuzz::randnum(255) . ";"
+ // Hex version:
+ ? "&#x" . str_pad( dechex( wikiFuzz::randnum( 255 ) ), wikiFuzz::randnum( 2, 7 ), "0", STR_PAD_LEFT ) . ";"
+ // A truly binary version:
+ // ? chr(wikiFuzz::randnum(0,255))
+ : chr( wikiFuzz::randnum( 126, 32 ) );
+
+ $length = wikiFuzz::randnum( 8 );
+ $thestring .= str_repeat ( $char, $length );
+ }
+ }
+ return $thestring;
+ }
+
+ /**
+ * Returns either random text, or random wiki syntax, or random data from "ints",
+ * or random data from "other".
+ * @return string
+ */
+ static private function makestring() {
+ $what = wikiFuzz::randnum( 2 );
+ if ( $what == 0 ) {
+ return wikiFuzz::randstring();
+ } elseif ( $what == 1 ) {
+ return wikiFuzz::$ints[wikiFuzz::randnum( count( wikiFuzz::$ints ) - 1 )];
+ } else {
+ return wikiFuzz::$other[wikiFuzz::randnum( count( wikiFuzz::$other ) - 1 )];
+ }
+ }
+
+ /**
+ * Returns the matched character slash-escaped as in a C string
+ * Helper for makeTitleSafe callback
+ * @param $matches
+ * @return atring
+ */
+ static private function stringEscape( $matches ) {
+ return sprintf( "\\x%02x", ord( $matches[1] ) );
+ }
+
+ /**
+ ** Strips out the stuff that Mediawiki balks at in a page's title.
+ ** Implementation copied/pasted from cleanupTable.inc & cleanupImages.php
+ * @param $str string
+ * @return string
+ */
+ static public function makeTitleSafe( $str ) {
+ $legalTitleChars = " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF";
+ return preg_replace_callback(
+ "/([^$legalTitleChars])/", 'wikiFuzz::stringEscape',
+ $str );
+ }
+
+ /**
+ ** Returns a string of fuzz text.
+ * @return string
+ */
+ static private function loop() {
+ switch ( wikiFuzz::randnum( 3 ) ) {
+ case 1: // an opening tag, with parameters.
+ $string = "";
+ $i = wikiFuzz::randnum( count( wikiFuzz::$types ) - 1 );
+ $t = wikiFuzz::$types[$i];
+ $arr = wikiFuzz::$data[$t];
+ $string .= "<" . $t . " ";
+ $num_params = min( wikiFuzz::$maxparams, count( $arr ) );
+ for ( $z = 0; $z < $num_params; $z++ ) {
+ $badparam = $arr[wikiFuzz::randnum( count( $arr ) - 1 )];
+ $badstring = wikiFuzz::makestring();
+ $string .= $badparam . "=" . wikiFuzz::getRandQuote() . $badstring . wikiFuzz::getRandQuote() . " ";
+ }
+ $string .= ">\n";
+ return $string;
+ case 2: // a closing tag.
+ $i = wikiFuzz::randnum( count( wikiFuzz::$types ) - 1 );
+ return "</" . wikiFuzz::$types[$i] . ">";
+ case 3: // a random string, between tags.
+ return wikiFuzz::makeString();
+ }
+ return ""; // catch-all, should never be called.
+ }
+
+ /**
+ * Returns one of the three styles of random quote: ', ", and nothing.
+ * @return string
+ */
+ static private function getRandQuote() {
+ switch ( wikiFuzz::randnum( 3 ) ) {
+ case 1 : return "'";
+ case 2 : return "\"";
+ default: return "";
+ }
+ }
+
+ /**
+ ** Returns fuzz text, with the parameter indicating approximately how many lines of text you want.
+ * @param $maxtypes int
+ * @return string
+ */
+ static public function makeFuzz( $maxtypes = 2 ) {
+ $page = "";
+ for ( $k = 0; $k < $maxtypes; $k++ ) {
+ $page .= wikiFuzz::loop();
+ }
+ return $page;
+ }
+}
+
+
+// ////// MEDIAWIKI PAGES TO TEST, AND HOW TO TEST THEM ///////