Broken initial XML parser. Just getting it in here so we can preserve debug stuff.
[isso.git] / xml.php
1 <?php
2 /*=====================================================================*\
3 || ################################################################### ||
4 || # Iris Studios Shared Object Framework [#]version[#]
5 || # --------------------------------------------------------------- # ||
6 || # All parts of this file are ©2003-[#]year[#] Iris Studios, Inc. No # ||
7 || # part of this file may be reproduced in any way: part or whole. # ||
8 || # --------------------------------------------------------------- # ||
9 || # ©2003 - [#]year[#] Iris Studios, Inc. | http://www.iris-studios.com # ||
10 || ################################################################### ||
11 \*=====================================================================*/
12
13 $OBJECT = 'XML Parser';
14 $CLASS = 'XML_Parser';
15 $OBJ = 'xml';
16
17 /**
18 * XML Parser
19 *
20 * This framework is a wrapper for a robust XML parser.
21 *
22 * @author Iris Studios, Inc.
23 * @copyright Copyright ©2003 - [#]year[#], Iris Studios, Inc.
24 * @version $Revision$
25 *
26 */
27 class XML_Parser
28 {
29 /**
30 * Global environment variables
31 *
32 * @var parser Current XML parser
33 * @var taginfo Array of all the processed tag data
34 * @var taghandler What to do for each tag encountered
35 * @var trimdata Trim CDATA elements?
36 * @var attrdata Data parsed from global attributes
37 * @var tagid Current tag ID in the document
38 * @var tagname Current tag name in the document
39 * @var tree Tag stack to use for proper nesting
40 * @var parent The tag's parent
41 */
42 var $parser = null;
43 var $taginfo = array();
44 var $taghandler = array();
45 var $trimdata = true;
46 var $attrdata = array();
47 var $tagid = -1;
48 var $tagname = '';
49 var $tree = array();
50 var $parent = -1;
51
52 /**
53 * Parse an XML file
54 *
55 * @param str XML file data
56 *
57 * @return array Array with all the XML data parsed
58 */
59 function parse($data)
60 {
61 $this->parser = xml_parser_create();
62 xml_set_object($this->parser, $this);
63 xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0);
64 xml_set_element_handler($this->parser, '_tag_start', '_tag_end');
65 xml_set_character_data_handler($this->parser, '_cdata');
66
67 if (!xml_parse($this->parser, $data))
68 {
69 $error['code'] = xml_get_error_code($this->parser);
70 $error['string'] = xml_error_string($error['code']);
71 $error['line'] = xml_get_current_line_number($this->parser);
72 $error['column'] = xml_get_current_column_number($this->parser);
73 print(sprintf("XML Error: %s (%d) at line %d colunn %d", $error['string'], $error['code'], $error['line'], $error['column']));
74 exit;
75 }
76
77 xml_parser_free($this->parser);
78
79 return array();
80 }
81
82 /**
83 * Process the opening location of an XML tag
84 *
85 * @param res XML parser
86 * @param str Tag name
87 * @param array Tag attributes
88 */
89 function _tag_start($parser, $name, $attributes)
90 {
91 // we're opening a new tag
92 $this->tagid++;
93 $this->tagname = $name;
94
95 // copy all tag attributes
96 array_walk($attributes, 'trim');
97 $this->attrdata[ $this->tagid ] = $attributes;
98
99 $this->parent = current($this->tree);
100 if ($this->parent === false)
101 {
102 $this->parent = -1;
103 }
104 // advance the parent counter because the tree is yet to be updated
105 else
106 {
107 $this->parent++;
108 }
109
110 // initialize the data set
111 $this->taginfo["$name"][ $this->parent ][ $this->tagid ] = '';
112
113 // add the current tag into the tree
114 $this->tree[] = $this->tagid;
115 }
116
117 /**
118 * Process XML CDATA
119 *
120 * @param res XML parser
121 * @param str CDATA from tag
122 */
123 function _cdata($parser, $data)
124 {
125 global $_isso;
126 static $count;
127
128 /*if (preg_replace('#(^[[:space:]]+|[[:space:]]+$)#', '', trim($data)) == '')
129 {
130 $count++;
131 $datanew = str_replace(array("\t", "\n", "\r", " "), array("{t}", "{n}", "{r}", "{s}"), $data);
132 $_isso->debug("strangechars[" . $this->tagid . "] = '$datanew'");
133 $data = $datanew . "||$count||";
134 //return;
135 }*/
136
137 $_isso->debug("cdata[] = '$data'");
138 // read in the CDATA
139 if ($this->parent == $this->tagid)
140 {
141 if (!isset($this->taginfo[ $this->tagname ][ $this->parent - 1 ][ $this->tagid ]))
142 {
143 $this->taginfo[ $this->tagname ][ $this->parent - 1 ][ $this->tagid ] = '';
144 }
145
146 $this->taginfo[ $this->tagname ][ $this->parent - 1 ][ $this->tagid ] .= $data;
147 }
148 else
149 {
150 $this->taginfo[ $this->tagname ][ $this->parent ][ $this->tagid ] .= $data;
151 }
152 }
153
154 /**
155 * Process the closing of an XML tag
156 *
157 * @param res XML parser
158 * @param str Tag name
159 */
160 function _tag_end($parser, $name)
161 {
162 global $_isso;
163
164 // fetch the data
165 if (isset($this->taginfo[ $this->tagname ][ $this->parent ][ $this->tagid ]))
166 {
167 $data = $this->taginfo[ $this->tagname ][ $this->parent ][ $this->tagid ];
168 }
169 else
170 {
171 return;
172 }
173
174 if (trim($data) == '')
175 {
176 $_isso->debug("not going to bother with '" . $this->tagid . "'");
177 }
178
179 // if we have a data handler, operate it now
180 if (isset($this->taghandler[ $this->tagname ]))
181 {
182 $_isso->debug("handler: {$this->taghandler[ " . $this->tagname . " ]}");
183 if (function_exists($this->taghandler[ $this->tagname ]))
184 {
185 $data = $this->taghandler[ $this->tagname ]($data);
186 }
187 }
188
189 // check for necessary trims
190 if ($this->trimdata)
191 {
192 // trim() doesn't take off the edge for more than a few white characters, preg_replace does; don't use [:space:] either...
193 //$data = preg_replace('#(^\s+|\s+$)#', '', $data);
194 //$data = preg_replace('#(^[[:space:]]+|[[:space:]]+$)#', '', trim($data));
195 $data = preg_replace('#^\s+#', '', $data);
196 $data = preg_replace('#\s+$#', '', $data);
197 $_isso->debug("trimming data ('" . $this->tagid . "'): '" . $data . "'");
198 //$data = trim($data);
199 }
200
201 // don't want blank fields
202 if (trim($data) != '')
203 {
204 $this->taginfo[ $this->tagname ][ $this->parent ][ $this->tagid ] = $data;
205 }
206 else
207 {
208 unset($this->taginfo[ $this->tagname ][ $this->parent ][ $this->tagid ]);
209 }
210
211 $_isso->debug("tree[] = " . current($this->tree));
212
213 // make sure that we're always at the end of the tree
214 end($this->tree);
215 }
216 }
217
218 /*=====================================================================*\
219 || ###################################################################
220 || # $HeadURL$
221 || # $Id$
222 || ###################################################################
223 \*=====================================================================*/
224 ?>