home *** CD-ROM | disk | FTP | other *** search
/ PC World 2001 April / PCWorld_2001-04_cd.bin / Software / TemaCD / webclean / !!!python!!! / BeOpen-Python-2.0.exe / PULLDOM.PY < prev    next >
Encoding:
Python Source  |  2000-10-13  |  8.2 KB  |  239 lines

  1. import minidom
  2. import xml.sax,xml.sax.handler
  3.  
  4. START_ELEMENT = "START_ELEMENT"
  5. END_ELEMENT = "END_ELEMENT"
  6. COMMENT = "COMMENT"
  7. START_DOCUMENT = "START_DOCUMENT"
  8. END_DOCUMENT = "END_DOCUMENT"
  9. PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
  10. IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
  11. CHARACTERS = "CHARACTERS"
  12.  
  13. class PullDOM(xml.sax.ContentHandler):
  14.     def __init__(self):
  15.         self.firstEvent = [None, None]
  16.         self.lastEvent = self.firstEvent
  17.         self._ns_contexts = [{}] # contains uri -> prefix dicts
  18.         self._current_context = self._ns_contexts[-1]
  19.  
  20.     def setDocumentLocator(self, locator): pass
  21.  
  22.     def startPrefixMapping(self, prefix, uri):
  23.         self._ns_contexts.append(self._current_context.copy())
  24.         self._current_context[uri] = prefix
  25.  
  26.     def endPrefixMapping(self, prefix):
  27.         del self._ns_contexts[-1]
  28.  
  29.     def startElementNS(self, name, tagName , attrs):
  30.         uri,localname = name
  31.         if uri:
  32.             # When using namespaces, the reader may or may not
  33.             # provide us with the original name. If not, create
  34.             # *a* valid tagName from the current context.
  35.             if tagName is None:
  36.                 tagName = self._current_context[uri] + ":" + localname
  37.             node = self.document.createElementNS(uri, tagName)
  38.         else:
  39.             # When the tagname is not prefixed, it just appears as
  40.             # localname
  41.             node = self.document.createElement(localname)
  42.  
  43.         for aname,value in attrs.items():
  44.             a_uri, a_localname = aname
  45.             if a_uri:
  46.                 qname = self._current_context[a_uri] + ":" + a_localname
  47.                 attr = self.document.createAttributeNS(a_uri, qname)
  48.             else:
  49.                 attr = self.document.createAttribute(a_localname)
  50.             attr.value = value
  51.             node.setAttributeNode(attr)
  52.         
  53.         parent = self.curNode
  54.         node.parentNode = parent
  55.         self.curNode = node
  56.  
  57.         self.lastEvent[1] = [(START_ELEMENT, node), None]
  58.         self.lastEvent = self.lastEvent[1]
  59.         #self.events.append((START_ELEMENT, node))
  60.  
  61.     def endElementNS(self, name, tagName):
  62.         node = self.curNode
  63.         self.lastEvent[1] = [(END_ELEMENT, node), None]
  64.         self.lastEvent = self.lastEvent[1]
  65.         #self.events.append((END_ELEMENT, node))
  66.         self.curNode = node.parentNode
  67.  
  68.     def startElement(self, name, attrs):
  69.         node = self.document.createElement(name)
  70.  
  71.         for aname,value in attrs.items():
  72.             attr = self.document.createAttribute(aname)
  73.             attr.value = value
  74.             node.setAttributeNode(attr)
  75.         
  76.         parent = self.curNode
  77.         node.parentNode = parent
  78.         self.curNode = node
  79.  
  80.         self.lastEvent[1] = [(START_ELEMENT, node), None]
  81.         self.lastEvent = self.lastEvent[1]
  82.         #self.events.append((START_ELEMENT, node))
  83.  
  84.     def endElement(self, name):
  85.         node = self.curNode
  86.         self.lastEvent[1] = [(END_ELEMENT, node), None]
  87.         self.lastEvent = self.lastEvent[1]
  88.         #self.events.append((END_ELEMENT, node))
  89.         self.curNode = node.parentNode
  90.         
  91.     def comment(self, s):
  92.         node = self.document.createComment(s)
  93.         parent = self.curNode
  94.         node.parentNode = parent
  95.         self.lastEvent[1] = [(COMMENT, node), None]
  96.         self.lastEvent = self.lastEvent[1]
  97.         #self.events.append((COMMENT, node))
  98.  
  99.     def processingInstruction(self, target, data):
  100.         node = self.document.createProcessingInstruction(target, data)
  101.         
  102.         parent = self.curNode
  103.         node.parentNode = parent
  104.         self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
  105.         self.lastEvent = self.lastEvent[1]
  106.         #self.events.append((PROCESSING_INSTRUCTION, node))
  107.  
  108.     def ignorableWhitespace(self, chars):
  109.         node = self.document.createTextNode(chars[start:start + length])
  110.         parent = self.curNode
  111.         node.parentNode = parent
  112.         self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
  113.         self.lastEvent = self.lastEvent[1]
  114.         #self.events.append((IGNORABLE_WHITESPACE, node))
  115.  
  116.     def characters(self, chars):
  117.         node = self.document.createTextNode(chars)
  118.         parent = self.curNode
  119.         node.parentNode = parent
  120.         self.lastEvent[1] = [(CHARACTERS, node), None]
  121.         self.lastEvent = self.lastEvent[1]
  122.  
  123.     def startDocument(self):
  124.         node = self.curNode = self.document = minidom.Document()
  125.         node.parentNode = None
  126.         self.lastEvent[1] = [(START_DOCUMENT, node), None]
  127.         self.lastEvent = self.lastEvent[1]
  128.         #self.events.append((START_DOCUMENT, node))
  129.  
  130.     def endDocument(self):
  131.         assert not self.curNode.parentNode
  132.         for node in self.curNode.childNodes:
  133.             if node.nodeType == node.ELEMENT_NODE:
  134.                 self.document.documentElement = node
  135.         #if not self.document.documentElement:
  136.         #    raise Error, "No document element"
  137.  
  138.         self.lastEvent[1] = [(END_DOCUMENT, node), None]
  139.         #self.events.append((END_DOCUMENT, self.curNode))
  140.  
  141. class ErrorHandler:
  142.     def warning(self, exception):
  143.         print exception
  144.     def error(self, exception):
  145.         raise exception 
  146.     def fatalError(self, exception):
  147.         raise exception 
  148.  
  149. class DOMEventStream:
  150.     def __init__(self, stream, parser, bufsize):
  151.         self.stream = stream
  152.         self.parser = parser
  153.         self.bufsize = bufsize
  154.         self.reset()
  155.  
  156.     def reset(self):
  157.         self.pulldom = PullDOM()
  158.         # This content handler relies on namespace support
  159.         self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
  160.         self.parser.setContentHandler(self.pulldom)
  161.  
  162.     def __getitem__(self, pos):
  163.         rc = self.getEvent()
  164.         if rc:
  165.             return rc
  166.         raise IndexError
  167.  
  168.     def expandNode(self, node):
  169.         event = self.getEvent()
  170.         while event:
  171.             token, cur_node = event
  172.             if cur_node is node:
  173.                 return
  174.             if token != END_ELEMENT:
  175.                 cur_node.parentNode.appendChild(cur_node)
  176.             event = self.getEvent()
  177.  
  178.     def getEvent(self):
  179.         if not self.pulldom.firstEvent[1]:
  180.             self.pulldom.lastEvent = self.pulldom.firstEvent
  181.         while not self.pulldom.firstEvent[1]:
  182.             buf=self.stream.read(self.bufsize)
  183.             if not buf:
  184.                 #FIXME: why doesn't Expat close work?
  185.                 #self.parser.close()
  186.                 return None
  187.             self.parser.feed(buf)
  188.         rc = self.pulldom.firstEvent[1][0]
  189.         self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
  190.         return rc
  191.  
  192. class SAX2DOM(PullDOM):
  193.  
  194.     def startElementNS(self, name, tagName , attrs):
  195.         PullDOM.startElementNS(self, name, tagName, attrs)
  196.         self.curNode.parentNode.appendChild(self.curNode)
  197.  
  198.     def startElement(self, name, attrs):
  199.         PullDOM.startElement(self, name, attrs)
  200.         self.curNode.parentNode.appendChild(self.curNode)
  201.  
  202.     def processingInstruction(self, target, data):
  203.         PullDOM.processingInstruction(self, target, data)
  204.         node = self.lastEvent[0][1]
  205.         node.parentNode.appendChild(node)        
  206.  
  207.     def ignorableWhitespace(self, chars):
  208.         PullDOM.ignorableWhitespace(self, chars)
  209.         node = self.lastEvent[0][1]
  210.         node.parentNode.appendChild(node)        
  211.  
  212.     def characters(self, chars):
  213.         PullDOM.characters(self, chars)
  214.         node = self.lastEvent[0][1]
  215.         node.parentNode.appendChild(node)        
  216.     
  217. default_bufsize = (2 ** 14) - 20
  218.  
  219. def parse(stream_or_string, parser=None, bufsize=default_bufsize):
  220.     if type(stream_or_string) is type(""):
  221.         stream = open(stream_or_string)
  222.     else:
  223.         stream = stream_or_string
  224.     if not parser: 
  225.         parser = xml.sax.make_parser()
  226.     return DOMEventStream(stream, parser, bufsize)
  227.  
  228. def parseString(string, parser=None):
  229.     try:
  230.         from cStringIO import StringIO
  231.     except ImportError:
  232.         from StringIO import StringIO
  233.         
  234.     bufsize = len(string)
  235.     buf = StringIO(string)
  236.     if not parser:
  237.         parser = xml.sax.make_parser()
  238.     return DOMEventStream(buf, parser, bufsize)
  239.