b3d816cee6accb1e8920dddbb13982d6e25219a8
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?
2 require_once ( "Parser.php" ) ;
3
4 /**
5 * This should one day become the XML->(X)HTML parser
6 * Based on work by Jan Hidders and Magnus Manske
7 * @package MediaWiki
8 * @subpackage Experimental
9 */
10
11 /**
12 * the base class for an element
13 */
14 class element {
15 var $name = '';
16 var $attrs = array();
17 var $children = array();
18
19 function sub_makeXHTML ( &$parser , $tag = "" , $attr = "" )
20 {
21 $ret = "" ;
22 if ( $tag != "" )
23 {
24 $ret .= "<" . $tag ;
25 if ( $attr != "" ) $ret .= " " . $attr ;
26 $ret .= ">" ;
27 }
28 foreach ($this->children as $child) {
29 if ( is_string($child) ) {
30 $ret .= $child ;
31 } else {
32 $ret .= $child->makeXHTML ( $parser );
33 }
34 }
35 if ( $tag != "" )
36 $ret .= "</" . $tag . ">\n" ;
37 return $ret ;
38 }
39
40 function makeXHTML ( &$parser )
41 {
42 $ret = "" ;
43 $n = $this->name ; # Shortcut
44 if ( $n == "ARTICLE" )
45 $ret .= $this->sub_makeXHTML ( $parser ) ;
46 else if ( $n == "HEADING" )
47 $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs["LEVEL"] ) ;
48 else if ( $n == "PARAGRAPH" )
49 $ret .= $this->sub_makeXHTML ( $parser , "p" ) ;
50 else if ( $n == "BOLD" )
51 $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ;
52 else if ( $n == "ITALICS" )
53 $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
54
55 else if ( $n == "EXTENSION" )
56 {
57 $ext = $this->attrs["NAME"] ;
58
59 # $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
60 }
61
62 else if ( $n == "TABLE" )
63 {
64 $ret .= $this->sub_makeXHTML ( $parser , "table" ) ;
65 }
66 else if ( $n == "TABLEROW" )
67 {
68 $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ;
69 }
70 else if ( $n == "TABLECELL" )
71 {
72 $ret .= $this->sub_makeXHTML ( $parser , "td" ) ;
73 }
74
75
76 else if ( $n == "LISTITEM" )
77 $ret .= $this->sub_makeXHTML ( $parser , "li" ) ;
78 else if ( $n == "LIST" )
79 {
80 $type = "ol" ; # Default
81 if ( $this->attrs["TYPE"] == "bullet" ) $type = "ul" ;
82 $ret .= $this->sub_makeXHTML ( $parser , $type ) ;
83 }
84
85 else
86 {
87 $ret .= "&lt;" . $n . "&gt;" ;
88 $ret .= $this->sub_makeXHTML ( $parser ) ;
89 $ret .= "&lt;/" . $n . "&gt;" ;
90 }
91 return $ret ;
92 }
93
94 function myPrint() {
95 $ret = "<ul>\n";
96 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
97 // print attributes
98 $ret .= '<li> <b> Attributes: </b>';
99 foreach ($this->attrs as $name => $value) {
100 $ret .= "$name => $value; " ;
101 }
102 $ret .= " </li>\n";
103 // print children
104 foreach ($this->children as $child) {
105 if ( is_string($child) ) {
106 $ret .= "<li> $child </li>\n";
107 } else {
108 $ret .= $child->myPrint();
109 }
110 }
111 $ret .= "</ul>\n";
112 return $ret;
113 }
114 }
115
116 $ancStack = array(); // the stack with ancestral elements
117
118 // Three global functions needed for parsing, sorry guys
119 function wgXMLstartElement($parser, $name, $attrs) {
120 global $ancStack;
121
122 $newElem = new element;
123 $newElem->name = $name;
124 $newElem->attrs = $attrs;
125
126 array_push($ancStack, $newElem);
127 }
128
129 function wgXMLendElement($parser, $name) {
130 global $ancStack, $rootElem;
131 // pop element off stack
132 $elem = array_pop ($ancStack);
133 if (count ($ancStack) == 0)
134 $rootElem = $elem;
135 else
136 // add it to its parent
137 array_push ($ancStack[count($ancStack)-1]->children, $elem);
138 }
139
140 function wgXMLcharacterData($parser, $data) {
141 global $ancStack;
142 $data = trim ($data); // Don't add blank lines, they're no use...
143 // add to parent if parent exists
144 if ( $ancStack && $data != "" ) {
145 array_push ($ancStack[count($ancStack)-1]->children, $data);
146 }
147 }
148
149
150 /**
151 * Here's the class that generates a nice tree
152 */
153 class xml2php {
154
155 function &scanFile( $filename ) {
156 global $ancStack, $rootElem;
157 $ancStack = array();
158
159 $xml_parser = xml_parser_create();
160 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
161 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
162 if (!($fp = fopen($filename, 'r'))) {
163 die('could not open XML input');
164 }
165 while ($data = fread($fp, 4096)) {
166 if (!xml_parse($xml_parser, $data, feof($fp))) {
167 die(sprintf("XML error: %s at line %d",
168 xml_error_string(xml_get_error_code($xml_parser)),
169 xml_get_current_line_number($xml_parser)));
170 }
171 }
172 xml_parser_free($xml_parser);
173
174 // return the remaining root element we copied in the beginning
175 return $rootElem;
176 }
177
178 function scanString ( $input ) {
179 global $ancStack, $rootElem;
180 $ancStack = array();
181
182 $xml_parser = xml_parser_create();
183 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
184 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
185
186 if (!xml_parse ($xml_parser, $input, true)) {
187 die (sprintf ("XML error: %s at line %d",
188 xml_error_string(xml_get_error_code($xml_parser)),
189 xml_get_current_line_number($xml_parser)));
190 }
191 xml_parser_free ($xml_parser);
192
193 // return the remaining root element we copied in the beginning
194 return $rootElem;
195 }
196
197 }
198
199 /* Example code:
200
201 $w = new xml2php;
202 $filename = 'sample.xml';
203 $result = $w->scanFile( $filename );
204 print $result->myPrint();
205 */
206
207 $dummytext = "<article><heading level='2'> R-type </heading><paragraph><link><linktarget>image:a.jpg</linktarget><linkoption>1</linkoption><linkoption>2</linkoption><linkoption>3</linkoption><linkoption>text</linkoption></link></paragraph><paragraph>The <link><linktarget>video game</linktarget><linkoption>computer game</linkoption></link> <bold>R-type</bold> is <extension name='nowiki'>cool &amp; stuff</extension> because:</paragraph><list type='bullet'><listitem>it's nice</listitem><listitem>it's fast</listitem><listitem>it has:<list type='bullet'><listitem>graphics</listitem><listitem>sound</listitem></list></listitem></list><table><tablerow><tablecell>Version 1 </tablecell><tablecell>not bad</tablecell></tablerow><tablerow><tablecell>Version 2 </tablecell><tablecell>much better </tablecell></tablerow></table><paragraph>This is a || token in the middle of text.</paragraph></article>" ;
208
209 class ParserXML EXTENDS Parser
210 {
211 /**#@+
212 * @access private
213 */
214 # Persistent:
215 var $mTagHooks;
216
217 # Cleared with clearState():
218 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
219 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
220
221 # Temporary:
222 var $mOptions, $mTitle, $mOutputType,
223 $mTemplates, // cache of already loaded templates, avoids
224 // multiple SQL queries for the same string
225 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
226 // in this path. Used for loop detection.
227
228 /**#@-*/
229
230 /**
231 * Constructor
232 *
233 * @access public
234 */
235 function ParserXML() {
236 $this->mTemplates = array();
237 $this->mTemplatePath = array();
238 $this->mTagHooks = array();
239 $this->clearState();
240 }
241
242 /**
243 * Clear Parser state
244 *
245 * @access private
246 */
247 function clearState() {
248 $this->mOutput = new ParserOutput;
249 $this->mAutonumber = 0;
250 $this->mLastSection = "";
251 $this->mDTopen = false;
252 $this->mVariables = false;
253 $this->mIncludeCount = array();
254 $this->mStripState = array();
255 $this->mArgStack = array();
256 $this->mInPre = false;
257 }
258
259 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
260 global $dummytext ;
261 $text = $dummytext ;
262
263 $w = new xml2php;
264 $result = $w->scanString( $text );
265 $text .= "<hr>" . $result->makeXHTML ( $this );
266 $text .= "<hr>" . $result->myPrint();
267
268 $this->mOutput->setText ( $text ) ;
269 return $this->mOutput;
270 }
271
272 }
273
274 ?>