| // | Tomas V.V.Cox | // | Stephan Schmidt | // +----------------------------------------------------------------------+ // // $Id$ /** * XML Parser class. * * This is an XML parser based on PHP's "xml" extension, * based on the bundled expat library. * * @category XML * @package XML_Parser * @author Stig Bakken * @author Tomas V.V.Cox * @author Stephan Schmidt */ /** * uses PEAR's error handling */ require_once 'PEAR.php'; /** * resource could not be created */ define('XML_PARSER_ERROR_NO_RESOURCE', 200); /** * unsupported mode */ define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201); /** * invalid encoding was given */ define('XML_PARSER_ERROR_INVALID_ENCODING', 202); /** * specified file could not be read */ define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203); /** * invalid input */ define('XML_PARSER_ERROR_INVALID_INPUT', 204); /** * remote file cannot be retrieved in safe mode */ define('XML_PARSER_ERROR_REMOTE', 205); /** * XML Parser class. * * This is an XML parser based on PHP's "xml" extension, * based on the bundled expat library. * * Notes: * - It requires PHP 4.0.4pl1 or greater * - From revision 1.17, the function names used by the 'func' mode * are in the format "xmltag_$elem", for example: use "xmltag_name" * to handle the tags of your xml file. * * @category XML * @package XML_Parser * @author Stig Bakken * @author Tomas V.V.Cox * @author Stephan Schmidt * @todo create XML_Parser_Namespace to parse documents with namespaces * @todo create XML_Parser_Pull * @todo Tests that need to be made: * - mixing character encodings * - a test using all expat handlers * - options (folding, output charset) * - different parsing modes */ class XML_Parser extends PEAR { // {{{ properties /** * XML parser handle * * @var resource * @see xml_parser_create() */ var $parser; /** * File handle if parsing from a file * * @var resource */ var $fp; /** * Whether to do case folding * * If set to true, all tag and attribute names will * be converted to UPPER CASE. * * @var boolean */ var $folding = true; /** * Mode of operation, one of "event" or "func" * * @var string */ var $mode; /** * Mapping from expat handler function to class method. * * @var array */ var $handler = array( 'character_data_handler' => 'cdataHandler', 'default_handler' => 'defaultHandler', 'processing_instruction_handler' => 'piHandler', 'unparsed_entity_decl_handler' => 'unparsedHandler', 'notation_decl_handler' => 'notationHandler', 'external_entity_ref_handler' => 'entityrefHandler' ); /** * source encoding * * @var string */ var $srcenc; /** * target encoding * * @var string */ var $tgtenc; /** * handler object * * @var object */ var $_handlerObj; /** * valid encodings * * @var array */ var $_validEncodings = array('ISO-8859-1', 'UTF-8', 'US-ASCII'); // }}} // {{{ constructor /** * Creates an XML parser. * * This is needed for PHP4 compatibility, it will * call the constructor, when a new instance is created. * * @param string $srcenc source charset encoding, use NULL (default) to use * whatever the document specifies * @param string $mode how this parser object should work, "event" for * startelement/endelement-type events, "func" * to have it call functions named after elements * @param string $tgenc a valid target encoding */ function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null) { XML_Parser::__construct($srcenc, $mode, $tgtenc); } // }}} /** * PHP5 constructor * * @param string $srcenc source charset encoding, use NULL (default) to use * whatever the document specifies * @param string $mode how this parser object should work, "event" for * startelement/endelement-type events, "func" * to have it call functions named after elements * @param string $tgenc a valid target encoding */ function __construct($srcenc = null, $mode = 'event', $tgtenc = null) { $this->PEAR('XML_Parser_Error'); $this->mode = $mode; $this->srcenc = $srcenc; $this->tgtenc = $tgtenc; } // }}} /** * Sets the mode of the parser. * * Possible modes are: * - func * - event * * You can set the mode using the second parameter * in the constructor. * * This method is only needed, when switching to a new * mode at a later point. * * @access public * @param string mode, either 'func' or 'event' * @return boolean|object true on success, PEAR_Error otherwise */ function setMode($mode) { if ($mode != 'func' && $mode != 'event') { $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE); } $this->mode = $mode; return true; } /** * Sets the object, that will handle the XML events * * This allows you to create a handler object independent of the * parser object that you are using and easily switch the underlying * parser. * * If no object will be set, XML_Parser assumes that you * extend this class and handle the events in $this. * * @access public * @param object object to handle the events * @return boolean will always return true * @since v1.2.0beta3 */ function setHandlerObj(&$obj) { $this->_handlerObj = &$obj; return true; } /** * Init the element handlers * * @access private */ function _initHandlers() { if (!is_resource($this->parser)) { return false; } if (!is_object($this->_handlerObj)) { $this->_handlerObj = &$this; } switch ($this->mode) { case 'func': xml_set_object($this->parser, $this->_handlerObj); xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler')); break; case 'event': xml_set_object($this->parser, $this->_handlerObj); xml_set_element_handler($this->parser, 'startHandler', 'endHandler'); break; default: return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE); break; } /** * set additional handlers for character data, entities, etc. */ foreach ($this->handler as $xml_func => $method) { if (method_exists($this->_handlerObj, $method)) { $xml_func = 'xml_set_' . $xml_func; $xml_func($this->parser, $method); } } } // {{{ _create() /** * create the XML parser resource * * Has been moved from the constructor to avoid * problems with object references. * * Furthermore it allows us returning an error * if something fails. * * @access private * @return boolean|object true on success, PEAR_Error otherwise * * @see xml_parser_create */ function _create() { if ($this->srcenc === null) { $xp = @xml_parser_create(); } else { $xp = @xml_parser_create($this->srcenc); } if (is_resource($xp)) { if ($this->tgtenc !== null) { if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING, $this->tgtenc)) { return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING); } } $this->parser = $xp; $result = $this->_initHandlers($this->mode); if ($this->isError($result)) { return $result; } xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding); return true; } if (!in_array(strtoupper($this->srcenc), $this->_validEncodings)) { return $this->raiseError('invalid source encoding', XML_PARSER_ERROR_INVALID_ENCODING); } return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE); } // }}} // {{{ reset() /** * Reset the parser. * * This allows you to use one parser instance * to parse multiple XML documents. * * @access public * @return boolean|object true on success, PEAR_Error otherwise */ function reset() { $result = $this->_create(); if ($this->isError( $result )) { return $result; } return true; } // }}} // {{{ setInputFile() /** * Sets the input xml file to be parsed * * @param string Filename (full path) * @return resource fopen handle of the given file * @throws XML_Parser_Error * @see setInput(), setInputString(), parse() * @access public */ function setInputFile($file) { /** * check, if file is a remote file */ if (eregi('^(http|ftp)://', substr($file, 0, 10))) { if (!ini_get('allow_url_fopen')) { return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE); } } $fp = @fopen($file, 'rb'); if (is_resource($fp)) { $this->fp = $fp; return $fp; } return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE); } // }}} // {{{ setInputString() /** * XML_Parser::setInputString() * * Sets the xml input from a string * * @param string $data a string containing the XML document * @return null **/ function setInputString($data) { $this->fp = $data; return null; } // }}} // {{{ setInput() /** * Sets the file handle to use with parse(). * * You should use setInputFile() or setInputString() if you * pass a string * * @param mixed $fp Can be either a resource returned from fopen(), * a URL, a local filename or a string. * @access public * @see parse() * @uses setInputString(), setInputFile() */ function setInput($fp) { if (is_resource($fp)) { $this->fp = $fp; return true; } // see if it's an absolute URL (has a scheme at the beginning) elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) { return $this->setInputFile($fp); } // see if it's a local file elseif (file_exists($fp)) { return $this->setInputFile($fp); } // it must be a string else { $this->fp = $fp; return true; } return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT); } // }}} // {{{ parse() /** * Central parsing function. * * @return true|object PEAR error returns true on success, or a PEAR_Error otherwise * @access public */ function parse() { /** * reset the parser */ $result = $this->reset(); if ($this->isError($result)) { return $result; } // if $this->fp was fopened previously if (is_resource($this->fp)) { while ($data = fread($this->fp, 4096)) { if (!$this->_parseString($data, feof($this->fp))) { $error = &$this->raiseError(); $this->free(); return $error; } } // otherwise, $this->fp must be a string } else { if (!$this->_parseString($this->fp, true)) { $error = &$this->raiseError(); $this->free(); return $error; } } $this->free(); return true; } /** * XML_Parser::_parseString() * * @param string $data * @param boolean $eof * @return bool * @access private * @see parseString() **/ function _parseString($data, $eof = false) { return xml_parse($this->parser, $data, $eof); } // }}} // {{{ parseString() /** * XML_Parser::parseString() * * Parses a string. * * @param string $data XML data * @param boolean $eof If set and TRUE, data is the last piece of data sent in this parser * @throws XML_Parser_Error * @return Pear Error|true true on success or a PEAR Error * @see _parseString() */ function parseString($data, $eof = false) { if (!isset($this->parser) || !is_resource($this->parser)) { $this->reset(); } if (!$this->_parseString($data, $eof)) { $error = &$this->raiseError(); $this->free(); return $error; } if ($eof === true) { $this->free(); } return true; } /** * XML_Parser::free() * * Free the internal resources associated with the parser * * @return null **/ function free() { if (isset($this->parser) && is_resource($this->parser)) { xml_parser_free($this->parser); unset( $this->parser ); } if (isset($this->fp) && is_resource($this->fp)) { fclose($this->fp); } unset($this->fp); return null; } /** * XML_Parser::raiseError() * * Throws a XML_Parser_Error * * @param string $msg the error message * @param integer $ecode the error message code * @return XML_Parser_Error **/ function raiseError($msg = null, $ecode = 0) { $msg = !is_null($msg) ? $msg : $this->parser; $err = &new XML_Parser_Error($msg, $ecode); return parent::raiseError($err); } // }}} // {{{ funcStartHandler() function funcStartHandler($xp, $elem, $attribs) { $func = 'xmltag_' . $elem; $func = str_replace(array('.', '-', ':'), '_', $func); if (method_exists($this->_handlerObj, $func)) { call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs); } elseif (method_exists($this->_handlerObj, 'xmltag')) { call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs); } } // }}} // {{{ funcEndHandler() function funcEndHandler($xp, $elem) { $func = 'xmltag_' . $elem . '_'; $func = str_replace(array('.', '-', ':'), '_', $func); if (method_exists($this->_handlerObj, $func)) { call_user_func(array(&$this->_handlerObj, $func), $xp, $elem); } elseif (method_exists($this->_handlerObj, 'xmltag_')) { call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem); } } // }}} // {{{ startHandler() /** * * @abstract */ function startHandler($xp, $elem, &$attribs) { return NULL; } // }}} // {{{ endHandler() /** * * @abstract */ function endHandler($xp, $elem) { return NULL; } // }}}me } /** * error class, replaces PEAR_Error * * An instance of this class will be returned * if an error occurs inside XML_Parser. * * There are three advantages over using the standard PEAR_Error: * - All messages will be prefixed * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' ) * - messages can be generated from the xml_parser resource * * @package XML_Parser * @access public * @see PEAR_Error */ class XML_Parser_Error extends PEAR_Error { // {{{ properties /** * prefix for all messages * * @var string */ var $error_message_prefix = 'XML_Parser: '; // }}} // {{{ constructor() /** * construct a new error instance * * You may either pass a message or an xml_parser resource as first * parameter. If a resource has been passed, the last error that * happened will be retrieved and returned. * * @access public * @param string|resource message or parser resource * @param integer error code * @param integer error handling * @param integer error level */ function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE) { if (is_resource($msgorparser)) { $code = xml_get_error_code($msgorparser); $msgorparser = sprintf('%s at XML input line %d:%d', xml_error_string($code), xml_get_current_line_number($msgorparser), xml_get_current_column_number($msgorparser)); } $this->PEAR_Error($msgorparser, $code, $mode, $level); } // }}} } ?>