home *** CD-ROM | disk | FTP | other *** search
- #
- # ElementTree
- # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
- #
- # limited xpath support for element trees
- #
- # history:
- # 2003-05-23 fl created
- # 2003-05-28 fl added support for // etc
- # 2003-08-27 fl fixed parsing of periods in element names
- #
- # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
- #
- # fredrik@pythonware.com
- # http://www.pythonware.com
- #
- # --------------------------------------------------------------------
- # The ElementTree toolkit is
- #
- # Copyright (c) 1999-2004 by Fredrik Lundh
- #
- # By obtaining, using, and/or copying this software and/or its
- # associated documentation, you agree that you have read, understood,
- # and will comply with the following terms and conditions:
- #
- # Permission to use, copy, modify, and distribute this software and
- # its associated documentation for any purpose and without fee is
- # hereby granted, provided that the above copyright notice appears in
- # all copies, and that both that copyright notice and this permission
- # notice appear in supporting documentation, and that the name of
- # Secret Labs AB or the author not be used in advertising or publicity
- # pertaining to distribution of the software without specific, written
- # prior permission.
- #
- # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
- # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
- # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
- # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- # OF THIS SOFTWARE.
- # --------------------------------------------------------------------
-
- # Licensed to PSF under a Contributor Agreement.
- # See http://www.python.org/2.4/license for licensing details.
-
- ##
- # Implementation module for XPath support. There's usually no reason
- # to import this module directly; the <b>ElementTree</b> does this for
- # you, if needed.
- ##
-
- import re
-
- xpath_tokenizer = re.compile(
- "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
- ).findall
-
- class xpath_descendant_or_self:
- pass
-
- ##
- # Wrapper for a compiled XPath.
-
- class Path:
-
- ##
- # Create an Path instance from an XPath expression.
-
- def __init__(self, path):
- tokens = xpath_tokenizer(path)
- # the current version supports 'path/path'-style expressions only
- self.path = []
- self.tag = None
- if tokens and tokens[0][0] == "/":
- raise SyntaxError("cannot use absolute path on element")
- while tokens:
- op, tag = tokens.pop(0)
- if tag or op == "*":
- self.path.append(tag or op)
- elif op == ".":
- pass
- elif op == "/":
- self.path.append(xpath_descendant_or_self())
- continue
- else:
- raise SyntaxError("unsupported path syntax (%s)" % op)
- if tokens:
- op, tag = tokens.pop(0)
- if op != "/":
- raise SyntaxError(
- "expected path separator (%s)" % (op or tag)
- )
- if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
- raise SyntaxError("path cannot end with //")
- if len(self.path) == 1 and isinstance(self.path[0], type("")):
- self.tag = self.path[0]
-
- ##
- # Find first matching object.
-
- def find(self, element):
- tag = self.tag
- if tag is None:
- nodeset = self.findall(element)
- if not nodeset:
- return None
- return nodeset[0]
- for elem in element:
- if elem.tag == tag:
- return elem
- return None
-
- ##
- # Find text for first matching object.
-
- def findtext(self, element, default=None):
- tag = self.tag
- if tag is None:
- nodeset = self.findall(element)
- if not nodeset:
- return default
- return nodeset[0].text or ""
- for elem in element:
- if elem.tag == tag:
- return elem.text or ""
- return default
-
- ##
- # Find all matching objects.
-
- def findall(self, element):
- nodeset = [element]
- index = 0
- while 1:
- try:
- path = self.path[index]
- index = index + 1
- except IndexError:
- return nodeset
- set = []
- if isinstance(path, xpath_descendant_or_self):
- try:
- tag = self.path[index]
- if not isinstance(tag, type("")):
- tag = None
- else:
- index = index + 1
- except IndexError:
- tag = None # invalid path
- for node in nodeset:
- new = list(node.getiterator(tag))
- if new and new[0] is node:
- set.extend(new[1:])
- else:
- set.extend(new)
- else:
- for node in nodeset:
- for node in node:
- if path == "*" or node.tag == path:
- set.append(node)
- if not set:
- return []
- nodeset = set
-
- _cache = {}
-
- ##
- # (Internal) Compile path.
-
- def _compile(path):
- p = _cache.get(path)
- if p is not None:
- return p
- p = Path(path)
- if len(_cache) >= 100:
- _cache.clear()
- _cache[path] = p
- return p
-
- ##
- # Find first matching object.
-
- def find(element, path):
- return _compile(path).find(element)
-
- ##
- # Find text for first matching object.
-
- def findtext(element, path, default=None):
- return _compile(path).findtext(element, default)
-
- ##
- # Find all matching objects.
-
- def findall(element, path):
- return _compile(path).findall(element)
-