Source code for sos.xmlparser

# -*- coding: utf-8 -*-
"""
XMLParser module, includes a xml parser factory and a XML parser abstract base class
"""
import abc
from PyQt4.QtXml import QDomDocument, QDomNode, QDomElement

__all__ = ['XMLParserFactory', 'XMLParser']

[docs]class XMLParserFactory () : """ XML parsers factory. """ _parsers = None @classmethod
[docs] def getInstance (self, tagname, preffix=""): """ :param tagname: XML tag name :type tagname: str :param preffix: Class name prefix :type preffix: str :return: XMLParser :raise: NotImplementedError """ if self._parsers == None: self._parsers = dict() for cls in XMLParser.__subclasses__(): self._parsers[cls.__name__] = cls tagname = preffix + tagname + "Parser" try: return self._parsers[tagname] except KeyError: raise NotImplementedError(tagname)
[docs]class XMLParser (object): """ XML parser base class """ __metadata__ = abc.ABCMeta @abc.abstractmethod
[docs] def parse (self, xml=None): """ :param xml: XML to parse :type xml: QDomElement or str """ if isinstance (xml, QDomElement): return xml doc = QDomDocument() (ok, errorMsg, errorLine, errorCol) = doc.setContent(xml, True) if ok: return doc.documentElement() else: raise ValueError ("{} in line {}, column {}".format(errorMsg, errorLine, errorCol))
@staticmethod
[docs] def searchFirst (xml, query): """ :param xml: XML to parse :type xml: QDomNode :param query: :type query: str :return: QDomNode, str """ for node, value in XMLParser.search (xml, query): return node, value return None, None
@staticmethod
[docs] def search (xml, query): """ :param xml: XML to parse :type xml: QDomNode :param query: :type query: str :return: QDomNode, str generator """ def _text (node, attr=None): if attr: return unicode (node.attribute(attr)) if node.firstChild().isText(): return unicode (node.firstChild().nodeValue()) return unicode (node.localName()) if not isinstance (xml, QDomNode): raise TypeError ("xml must be a QDomNode") if not isinstance (query, str): raise TypeError ("query must be a string") tag = query.split("@")[ 0] attr = query.split("@")[-1] if "@" in query else "" val = attr.split ("=")[-1] if "=" in attr else "" attr = attr.split ("=")[ 0] for tag in tag.split("/"): if tag == "*": xml = xml.firstChildElement () elif tag <> "": xml = xml.firstChildElement (tag) while not xml.isNull(): if attr <> "": if val == "": yield xml, _text(xml, attr) elif val == xml.attribute(attr): yield xml, _text(xml) else: yield xml, _text(xml) xml = xml.nextSiblingElement (tag if tag <> "*" else "")