home *** CD-ROM | disk | FTP | other *** search
/ Chip 2004 July / CMCD0704.ISO / Software / Shareware / Comunicatii / jyte / jyte.exe / pulldom.py < prev    next >
Text File  |  2003-04-24  |  12KB  |  352 lines

  1. import xml.sax
  2. import xml.sax.handler
  3. import types
  4.  
  5. try:
  6.     _StringTypes = [types.StringType, types.UnicodeType]
  7. except AttributeError:
  8.     _StringTypes = [types.StringType]
  9.  
  10. START_ELEMENT = "START_ELEMENT"
  11. END_ELEMENT = "END_ELEMENT"
  12. COMMENT = "COMMENT"
  13. START_DOCUMENT = "START_DOCUMENT"
  14. END_DOCUMENT = "END_DOCUMENT"
  15. PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
  16. IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
  17. CHARACTERS = "CHARACTERS"
  18.  
  19. class PullDOM(xml.sax.ContentHandler):
  20.     _locator = None
  21.     document = None
  22.  
  23.     def __init__(self, documentFactory=None):
  24.         from xml.dom import XML_NAMESPACE
  25.         self.documentFactory = documentFactory
  26.         self.firstEvent = [None, None]
  27.         self.lastEvent = self.firstEvent
  28.         self.elementStack = []
  29.         self.push = self.elementStack.append
  30.         try:
  31.             self.pop = self.elementStack.pop
  32.         except AttributeError:
  33.             # use class' pop instead
  34.             pass
  35.         self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
  36.         self._current_context = self._ns_contexts[-1]
  37.         self.pending_events = []
  38.  
  39.     def pop(self):
  40.         result = self.elementStack[-1]
  41.         del self.elementStack[-1]
  42.         return result
  43.  
  44.     def setDocumentLocator(self, locator):
  45.         self._locator = locator
  46.  
  47.     def startPrefixMapping(self, prefix, uri):
  48.         if not hasattr(self, '_xmlns_attrs'):
  49.             self._xmlns_attrs = []
  50.         self._xmlns_attrs.append((prefix or 'xmlns', uri))
  51.         self._ns_contexts.append(self._current_context.copy())
  52.         self._current_context[uri] = prefix or None
  53.  
  54.     def endPrefixMapping(self, prefix):
  55.         self._current_context = self._ns_contexts.pop()
  56.  
  57.     def startElementNS(self, name, tagName , attrs):
  58.         # Retrieve xml namespace declaration attributes.
  59.         xmlns_uri = 'http://www.w3.org/2000/xmlns/'
  60.         xmlns_attrs = getattr(self, '_xmlns_attrs', None)
  61.         if xmlns_attrs is not None:
  62.             for aname, value in xmlns_attrs:
  63.                 attrs._attrs[(xmlns_uri, aname)] = value
  64.             self._xmlns_attrs = []
  65.         uri, localname = name
  66.         if uri:
  67.             # When using namespaces, the reader may or may not
  68.             # provide us with the original name. If not, create
  69.             # *a* valid tagName from the current context.
  70.             if tagName is None:
  71.                 prefix = self._current_context[uri]
  72.                 if prefix:
  73.                     tagName = prefix + ":" + localname
  74.                 else:
  75.                     tagName = localname
  76.             if self.document:
  77.                 node = self.document.createElementNS(uri, tagName)
  78.             else:
  79.                 node = self.buildDocument(uri, tagName)
  80.         else:
  81.             # When the tagname is not prefixed, it just appears as
  82.             # localname
  83.             if self.document:
  84.                 node = self.document.createElement(localname)
  85.             else:
  86.                 node = self.buildDocument(None, localname)
  87.  
  88.         for aname,value in attrs.items():
  89.             a_uri, a_localname = aname
  90.             if a_uri == xmlns_uri:
  91.                 if a_localname == 'xmlns':
  92.                     qname = a_localname
  93.                 else:
  94.                     qname = 'xmlns:' + a_localname
  95.                 attr = self.document.createAttributeNS(a_uri, qname)
  96.                 node.setAttributeNodeNS(attr)
  97.             elif a_uri:
  98.                 prefix = self._current_context[a_uri]
  99.                 if prefix:
  100.                     qname = prefix + ":" + a_localname
  101.                 else:
  102.                     qname = a_localname
  103.                 attr = self.document.createAttributeNS(a_uri, qname)
  104.                 node.setAttributeNodeNS(attr)
  105.             else:
  106.                 attr = self.document.createAttribute(a_localname)
  107.                 node.setAttributeNode(attr)
  108.             attr.value = value
  109.  
  110.         self.lastEvent[1] = [(START_ELEMENT, node), None]
  111.         self.lastEvent = self.lastEvent[1]
  112.         self.push(node)
  113.  
  114.     def endElementNS(self, name, tagName):
  115.         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
  116.         self.lastEvent = self.lastEvent[1]
  117.  
  118.     def startElement(self, name, attrs):
  119.         if self.document:
  120.             node = self.document.createElement(name)
  121.         else:
  122.             node = self.buildDocument(None, name)
  123.  
  124.         for aname,value in attrs.items():
  125.             attr = self.document.createAttribute(aname)
  126.             attr.value = value
  127.             node.setAttributeNode(attr)
  128.  
  129.         self.lastEvent[1] = [(START_ELEMENT, node), None]
  130.         self.lastEvent = self.lastEvent[1]
  131.         self.push(node)
  132.  
  133.     def endElement(self, name):
  134.         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
  135.         self.lastEvent = self.lastEvent[1]
  136.  
  137.     def comment(self, s):
  138.         if self.document:
  139.             node = self.document.createComment(s)
  140.             self.lastEvent[1] = [(COMMENT, node), None]
  141.             self.lastEvent = self.lastEvent[1]
  142.         else:
  143.             event = [(COMMENT, s), None]
  144.             self.pending_events.append(event)
  145.  
  146.     def processingInstruction(self, target, data):
  147.         if self.document:
  148.             node = self.document.createProcessingInstruction(target, data)
  149.             self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
  150.             self.lastEvent = self.lastEvent[1]
  151.         else:
  152.             event = [(PROCESSING_INSTRUCTION, target, data), None]
  153.             self.pending_events.append(event)
  154.  
  155.     def ignorableWhitespace(self, chars):
  156.         node = self.document.createTextNode(chars)
  157.         self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
  158.         self.lastEvent = self.lastEvent[1]
  159.  
  160.     def characters(self, chars):
  161.         node = self.document.createTextNode(chars)
  162.         self.lastEvent[1] = [(CHARACTERS, node), None]
  163.         self.lastEvent = self.lastEvent[1]
  164.  
  165.     def startDocument(self):
  166.         if self.documentFactory is None:
  167.             import xml.dom.minidom
  168.             self.documentFactory = xml.dom.minidom.Document.implementation
  169.  
  170.     def buildDocument(self, uri, tagname):
  171.         # Can't do that in startDocument, since we need the tagname
  172.         # XXX: obtain DocumentType
  173.         node = self.documentFactory.createDocument(uri, tagname, None)
  174.         self.document = node
  175.         self.lastEvent[1] = [(START_DOCUMENT, node), None]
  176.         self.lastEvent = self.lastEvent[1]
  177.         self.push(node)
  178.         # Put everything we have seen so far into the document
  179.         for e in self.pending_events:
  180.             if e[0][0] == PROCESSING_INSTRUCTION:
  181.                 _,target,data = e[0]
  182.                 n = self.document.createProcessingInstruction(target, data)
  183.                 e[0] = (PROCESSING_INSTRUCTION, n)
  184.             elif e[0][0] == COMMENT:
  185.                 n = self.document.createComment(e[0][1])
  186.                 e[0] = (COMMENT, n)
  187.             else:
  188.                 raise AssertionError("Unknown pending event ",e[0][0])
  189.             self.lastEvent[1] = e
  190.             self.lastEvent = e
  191.         self.pending_events = None
  192.         return node.firstChild
  193.  
  194.     def endDocument(self):
  195.         self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
  196.         self.pop()
  197.  
  198.     def clear(self):
  199.         "clear(): Explicitly release parsing structures"
  200.         self.document = None
  201.  
  202. class ErrorHandler:
  203.     def warning(self, exception):
  204.         print exception
  205.     def error(self, exception):
  206.         raise exception
  207.     def fatalError(self, exception):
  208.         raise exception
  209.  
  210. class DOMEventStream:
  211.     def __init__(self, stream, parser, bufsize):
  212.         self.stream = stream
  213.         self.parser = parser
  214.         self.bufsize = bufsize
  215.         if not hasattr(self.parser, 'feed'):
  216.             self.getEvent = self._slurp
  217.         self.reset()
  218.  
  219.     def reset(self):
  220.         self.pulldom = PullDOM()
  221.         # This content handler relies on namespace support
  222.         self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
  223.         self.parser.setContentHandler(self.pulldom)
  224.  
  225.     def __getitem__(self, pos):
  226.         rc = self.getEvent()
  227.         if rc:
  228.             return rc
  229.         raise IndexError
  230.  
  231.     def next(self):
  232.         rc = self.getEvent()
  233.         if rc:
  234.             return rc
  235.         raise StopIteration
  236.  
  237.     def __iter__(self):
  238.         return self
  239.  
  240.     def expandNode(self, node):
  241.         event = self.getEvent()
  242.         parents = [node]
  243.         while event:
  244.             token, cur_node = event
  245.             if cur_node is node:
  246.                 return
  247.             if token != END_ELEMENT:
  248.                 parents[-1].appendChild(cur_node)
  249.             if token == START_ELEMENT:
  250.                 parents.append(cur_node)
  251.             elif token == END_ELEMENT:
  252.                 del parents[-1]
  253.             event = self.getEvent()
  254.  
  255.     def getEvent(self):
  256.         # use IncrementalParser interface, so we get the desired
  257.         # pull effect
  258.         if not self.pulldom.firstEvent[1]:
  259.             self.pulldom.lastEvent = self.pulldom.firstEvent
  260.         while not self.pulldom.firstEvent[1]:
  261.             buf = self.stream.read(self.bufsize)
  262.             if not buf:
  263.                 self.parser.close()
  264.                 return None
  265.             self.parser.feed(buf)
  266.         rc = self.pulldom.firstEvent[1][0]
  267.         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
  268.         return rc
  269.  
  270.     def _slurp(self):
  271.         """ Fallback replacement for getEvent() using the
  272.             standard SAX2 interface, which means we slurp the
  273.             SAX events into memory (no performance gain, but
  274.             we are compatible to all SAX parsers).
  275.         """
  276.         self.parser.parse(self.stream)
  277.         self.getEvent = self._emit
  278.         return self._emit()
  279.  
  280.     def _emit(self):
  281.         """ Fallback replacement for getEvent() that emits
  282.             the events that _slurp() read previously.
  283.         """
  284.         rc = self.pulldom.firstEvent[1][0]
  285.         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
  286.         return rc
  287.  
  288.     def clear(self):
  289.         """clear(): Explicitly release parsing objects"""
  290.         self.pulldom.clear()
  291.         del self.pulldom
  292.         self.parser = None
  293.         self.stream = None
  294.  
  295. class SAX2DOM(PullDOM):
  296.  
  297.     def startElementNS(self, name, tagName , attrs):
  298.         PullDOM.startElementNS(self, name, tagName, attrs)
  299.         curNode = self.elementStack[-1]
  300.         parentNode = self.elementStack[-2]
  301.         parentNode.appendChild(curNode)
  302.  
  303.     def startElement(self, name, attrs):
  304.         PullDOM.startElement(self, name, attrs)
  305.         curNode = self.elementStack[-1]
  306.         parentNode = self.elementStack[-2]
  307.         parentNode.appendChild(curNode)
  308.  
  309.     def processingInstruction(self, target, data):
  310.         PullDOM.processingInstruction(self, target, data)
  311.         node = self.lastEvent[0][1]
  312.         parentNode = self.elementStack[-1]
  313.         parentNode.appendChild(node)
  314.  
  315.     def ignorableWhitespace(self, chars):
  316.         PullDOM.ignorableWhitespace(self, chars)
  317.         node = self.lastEvent[0][1]
  318.         parentNode = self.elementStack[-1]
  319.         parentNode.appendChild(node)
  320.  
  321.     def characters(self, chars):
  322.         PullDOM.characters(self, chars)
  323.         node = self.lastEvent[0][1]
  324.         parentNode = self.elementStack[-1]
  325.         parentNode.appendChild(node)
  326.  
  327.  
  328. default_bufsize = (2 ** 14) - 20
  329.  
  330. def parse(stream_or_string, parser=None, bufsize=None):
  331.     if bufsize is None:
  332.         bufsize = default_bufsize
  333.     if type(stream_or_string) in _StringTypes:
  334.         stream = open(stream_or_string)
  335.     else:
  336.         stream = stream_or_string
  337.     if not parser:
  338.         parser = xml.sax.make_parser()
  339.     return DOMEventStream(stream, parser, bufsize)
  340.  
  341. def parseString(string, parser=None):
  342.     try:
  343.         from cStringIO import StringIO
  344.     except ImportError:
  345.         from StringIO import StringIO
  346.  
  347.     bufsize = len(string)
  348.     buf = StringIO(string)
  349.     if not parser:
  350.         parser = xml.sax.make_parser()
  351.     return DOMEventStream(buf, parser, bufsize)
  352.