home *** CD-ROM | disk | FTP | other *** search
/ Komputer for Alle 2004 #2 / K-CD-2-2004.ISO / OpenOffice Sv / f_0397 / python-core-2.2.2 / lib / xml / dom / pulldom.py < prev   
Encoding:
Python Source  |  2003-07-18  |  11.5 KB  |  342 lines

  1. import xml.sax
  2. import xml.sax.handler
  3. import types
  4.  
  5. try:
  6.     _StringTypes = [types.StringType, types.UnicodeType]
  7. except AttributeError:
  8.     _StringTypes = [types.StringType]
  9.  
  10. START_ELEMENT = "START_ELEMENT"
  11. END_ELEMENT = "END_ELEMENT"
  12. COMMENT = "COMMENT"
  13. START_DOCUMENT = "START_DOCUMENT"
  14. END_DOCUMENT = "END_DOCUMENT"
  15. PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
  16. IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
  17. CHARACTERS = "CHARACTERS"
  18.  
  19. class PullDOM(xml.sax.ContentHandler):
  20.     _locator = None
  21.     document = None
  22.  
  23.     def __init__(self, documentFactory=None):
  24.         self.documentFactory = documentFactory
  25.         self.firstEvent = [None, None]
  26.         self.lastEvent = self.firstEvent
  27.         self.elementStack = []
  28.         self.push = self.elementStack.append
  29.         try:
  30.             self.pop = self.elementStack.pop
  31.         except AttributeError:
  32.             # use class' pop instead
  33.             pass
  34.         self._ns_contexts = [{}] # contains uri -> prefix dicts
  35.         self._current_context = self._ns_contexts[-1]
  36.         self.pending_events = []
  37.  
  38.     def pop(self):
  39.         result = self.elementStack[-1]
  40.         del self.elementStack[-1]
  41.         return result
  42.  
  43.     def setDocumentLocator(self, locator):
  44.         self._locator = locator
  45.  
  46.     def startPrefixMapping(self, prefix, uri):
  47.         if not hasattr(self, '_xmlns_attrs'):
  48.             self._xmlns_attrs = []
  49.         self._xmlns_attrs.append((prefix or 'xmlns', uri))
  50.         self._ns_contexts.append(self._current_context.copy())
  51.         self._current_context[uri] = prefix or None
  52.  
  53.     def endPrefixMapping(self, prefix):
  54.         self._current_context = self._ns_contexts.pop()
  55.  
  56.     def startElementNS(self, name, tagName , attrs):
  57.         # Retrieve xml namespace declaration attributes.
  58.         xmlns_uri = 'http://www.w3.org/2000/xmlns/'
  59.         xmlns_attrs = getattr(self, '_xmlns_attrs', None)
  60.         if xmlns_attrs is not None:
  61.             for aname, value in xmlns_attrs:
  62.                 attrs._attrs[(xmlns_uri, aname)] = value
  63.             self._xmlns_attrs = []
  64.         uri, localname = name
  65.         if uri:
  66.             # When using namespaces, the reader may or may not
  67.             # provide us with the original name. If not, create
  68.             # *a* valid tagName from the current context.
  69.             if tagName is None:
  70.                 prefix = self._current_context[uri]
  71.                 if prefix:
  72.                     tagName = prefix + ":" + localname
  73.                 else:
  74.                     tagName = localname
  75.             if self.document:
  76.                 node = self.document.createElementNS(uri, tagName)
  77.             else:
  78.                 node = self.buildDocument(uri, tagName)
  79.         else:
  80.             # When the tagname is not prefixed, it just appears as
  81.             # localname
  82.             if self.document:
  83.                 node = self.document.createElement(localname)
  84.             else:
  85.                 node = self.buildDocument(None, localname)
  86.  
  87.         for aname,value in attrs.items():
  88.             a_uri, a_localname = aname
  89.             if a_uri == xmlns_uri:
  90.                 if a_localname == 'xmlns':
  91.                     qname = a_localname
  92.                 else:
  93.                     qname = 'xmlns:' + a_localname
  94.                 attr = self.document.createAttributeNS(a_uri, qname)
  95.                 node.setAttributeNodeNS(attr)
  96.             elif a_uri:
  97.                 prefix = self._current_context[a_uri]
  98.                 if prefix:
  99.                     qname = prefix + ":" + a_localname
  100.                 else:
  101.                     qname = a_localname
  102.                 attr = self.document.createAttributeNS(a_uri, qname)
  103.                 node.setAttributeNodeNS(attr)
  104.             else:
  105.                 attr = self.document.createAttribute(a_localname)
  106.                 node.setAttributeNode(attr)
  107.             attr.value = value
  108.  
  109.         self.lastEvent[1] = [(START_ELEMENT, node), None]
  110.         self.lastEvent = self.lastEvent[1]
  111.         self.push(node)
  112.  
  113.     def endElementNS(self, name, tagName):
  114.         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
  115.         self.lastEvent = self.lastEvent[1]
  116.  
  117.     def startElement(self, name, attrs):
  118.         if self.document:
  119.             node = self.document.createElement(name)
  120.         else:
  121.             node = self.buildDocument(None, name)
  122.  
  123.         for aname,value in attrs.items():
  124.             attr = self.document.createAttribute(aname)
  125.             attr.value = value
  126.             node.setAttributeNode(attr)
  127.  
  128.         self.lastEvent[1] = [(START_ELEMENT, node), None]
  129.         self.lastEvent = self.lastEvent[1]
  130.         self.push(node)
  131.  
  132.     def endElement(self, name):
  133.         self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
  134.         self.lastEvent = self.lastEvent[1]
  135.  
  136.     def comment(self, s):
  137.         if self.document:
  138.             node = self.document.createComment(s)
  139.             self.lastEvent[1] = [(COMMENT, node), None]
  140.             self.lastEvent = self.lastEvent[1]
  141.         else:
  142.             event = [(COMMENT, s), None]
  143.             self.pending_events.append(event)
  144.  
  145.     def processingInstruction(self, target, data):
  146.         if self.document:
  147.             node = self.document.createProcessingInstruction(target, data)
  148.             self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
  149.             self.lastEvent = self.lastEvent[1]
  150.         else:
  151.             event = [(PROCESSING_INSTRUCTION, target, data), None]
  152.             self.pending_events.append(event)
  153.  
  154.     def ignorableWhitespace(self, chars):
  155.         node = self.document.createTextNode(chars)
  156.         self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
  157.         self.lastEvent = self.lastEvent[1]
  158.  
  159.     def characters(self, chars):
  160.         node = self.document.createTextNode(chars)
  161.         self.lastEvent[1] = [(CHARACTERS, node), None]
  162.         self.lastEvent = self.lastEvent[1]
  163.  
  164.     def startDocument(self):
  165.         if self.documentFactory is None:
  166.             import xml.dom.minidom
  167.             self.documentFactory = xml.dom.minidom.Document.implementation
  168.  
  169.     def buildDocument(self, uri, tagname):
  170.         # Can't do that in startDocument, since we need the tagname
  171.         # XXX: obtain DocumentType
  172.         node = self.documentFactory.createDocument(uri, tagname, None)
  173.         self.document = node
  174.         self.lastEvent[1] = [(START_DOCUMENT, node), None]
  175.         self.lastEvent = self.lastEvent[1]
  176.         self.push(node)
  177.         # Put everything we have seen so far into the document
  178.         for e in self.pending_events:
  179.             if e[0][0] == PROCESSING_INSTRUCTION:
  180.                 _,target,data = e[0]
  181.                 n = self.document.createProcessingInstruction(target, data)
  182.                 e[0] = (PROCESSING_INSTRUCTION, n)
  183.             elif e[0][0] == COMMENT:
  184.                 n = self.document.createComment(e[0][1])
  185.                 e[0] = (COMMENT, n)
  186.             else:
  187.                 raise AssertionError("Unknown pending event ",e[0][0])
  188.             self.lastEvent[1] = e
  189.             self.lastEvent = e
  190.         self.pending_events = None
  191.         return node.firstChild
  192.  
  193.     def endDocument(self):
  194.         self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
  195.         self.pop()
  196.  
  197.     def clear(self):
  198.         "clear(): Explicitly release parsing structures"
  199.         self.document = None
  200.  
  201. class ErrorHandler:
  202.     def warning(self, exception):
  203.         print exception
  204.     def error(self, exception):
  205.         raise exception
  206.     def fatalError(self, exception):
  207.         raise exception
  208.  
  209. class DOMEventStream:
  210.     def __init__(self, stream, parser, bufsize):
  211.         self.stream = stream
  212.         self.parser = parser
  213.         self.bufsize = bufsize
  214.         if not hasattr(self.parser, 'feed'):
  215.             self.getEvent = self._slurp
  216.         self.reset()
  217.  
  218.     def reset(self):
  219.         self.pulldom = PullDOM()
  220.         # This content handler relies on namespace support
  221.         self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
  222.         self.parser.setContentHandler(self.pulldom)
  223.  
  224.     def __getitem__(self, pos):
  225.         rc = self.getEvent()
  226.         if rc:
  227.             return rc
  228.         raise IndexError
  229.  
  230.     def expandNode(self, node):
  231.         event = self.getEvent()
  232.         parents = [node]
  233.         while event:
  234.             token, cur_node = event
  235.             if cur_node is node:
  236.                 return
  237.             if token != END_ELEMENT:
  238.                 parents[-1].appendChild(cur_node)
  239.             if token == START_ELEMENT:
  240.                 parents.append(cur_node)
  241.             elif token == END_ELEMENT:
  242.                 del parents[-1]
  243.             event = self.getEvent()
  244.  
  245.     def getEvent(self):
  246.         # use IncrementalParser interface, so we get the desired
  247.         # pull effect
  248.         if not self.pulldom.firstEvent[1]:
  249.             self.pulldom.lastEvent = self.pulldom.firstEvent
  250.         while not self.pulldom.firstEvent[1]:
  251.             buf = self.stream.read(self.bufsize)
  252.             if not buf:
  253.                 self.parser.close()
  254.                 return None
  255.             self.parser.feed(buf)
  256.         rc = self.pulldom.firstEvent[1][0]
  257.         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
  258.         return rc
  259.  
  260.     def _slurp(self):
  261.         """ Fallback replacement for getEvent() using the
  262.             standard SAX2 interface, which means we slurp the
  263.             SAX events into memory (no performance gain, but
  264.             we are compatible to all SAX parsers).
  265.         """
  266.         self.parser.parse(self.stream)
  267.         self.getEvent = self._emit
  268.         return self._emit()
  269.  
  270.     def _emit(self):
  271.         """ Fallback replacement for getEvent() that emits
  272.             the events that _slurp() read previously.
  273.         """
  274.         rc = self.pulldom.firstEvent[1][0]
  275.         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
  276.         return rc
  277.  
  278.     def clear(self):
  279.         """clear(): Explicitly release parsing objects"""
  280.         self.pulldom.clear()
  281.         del self.pulldom
  282.         self.parser = None
  283.         self.stream = None
  284.  
  285. class SAX2DOM(PullDOM):
  286.  
  287.     def startElementNS(self, name, tagName , attrs):
  288.         PullDOM.startElementNS(self, name, tagName, attrs)
  289.         curNode = self.elementStack[-1]
  290.         parentNode = self.elementStack[-2]
  291.         parentNode.appendChild(curNode)
  292.  
  293.     def startElement(self, name, attrs):
  294.         PullDOM.startElement(self, name, attrs)
  295.         curNode = self.elementStack[-1]
  296.         parentNode = self.elementStack[-2]
  297.         parentNode.appendChild(curNode)
  298.  
  299.     def processingInstruction(self, target, data):
  300.         PullDOM.processingInstruction(self, target, data)
  301.         node = self.lastEvent[0][1]
  302.         parentNode = self.elementStack[-1]
  303.         parentNode.appendChild(node)
  304.  
  305.     def ignorableWhitespace(self, chars):
  306.         PullDOM.ignorableWhitespace(self, chars)
  307.         node = self.lastEvent[0][1]
  308.         parentNode = self.elementStack[-1]
  309.         parentNode.appendChild(node)
  310.  
  311.     def characters(self, chars):
  312.         PullDOM.characters(self, chars)
  313.         node = self.lastEvent[0][1]
  314.         parentNode = self.elementStack[-1]
  315.         parentNode.appendChild(node)
  316.  
  317.  
  318. default_bufsize = (2 ** 14) - 20
  319.  
  320. def parse(stream_or_string, parser=None, bufsize=None):
  321.     if bufsize is None:
  322.         bufsize = default_bufsize
  323.     if type(stream_or_string) in _StringTypes:
  324.         stream = open(stream_or_string)
  325.     else:
  326.         stream = stream_or_string
  327.     if not parser:
  328.         parser = xml.sax.make_parser()
  329.     return DOMEventStream(stream, parser, bufsize)
  330.  
  331. def parseString(string, parser=None):
  332.     try:
  333.         from cStringIO import StringIO
  334.     except ImportError:
  335.         from StringIO import StringIO
  336.  
  337.     bufsize = len(string)
  338.     buf = StringIO(string)
  339.     if not parser:
  340.         parser = xml.sax.make_parser()
  341.     return DOMEventStream(buf, parser, bufsize)
  342.