home *** CD-ROM | disk | FTP | other *** search
- """Parse a Python module and describe its classes and methods.
-
- Parse enough of a Python file to recognize imports and class and
- method definitions, and to find out the superclasses of a class.
-
- The interface consists of a single function:
- readmodule_ex(module [, path])
- where module is the name of a Python module, and path is an optional
- list of directories where the module is to be searched. If present,
- path is prepended to the system search path sys.path. The return
- value is a dictionary. The keys of the dictionary are the names of
- the classes defined in the module (including classes that are defined
- via the from XXX import YYY construct). The values are class
- instances of the class Class defined here. One special key/value pair
- is present for packages: the key '__path__' has a list as its value
- which contains the package search path.
-
- A class is described by the class Class in this module. Instances
- of this class have the following instance variables:
- module -- the module name
- name -- the name of the class
- super -- a list of super classes (Class instances)
- methods -- a dictionary of methods
- file -- the file in which the class was defined
- lineno -- the line in the file on which the class statement occurred
- The dictionary of methods uses the method names as keys and the line
- numbers on which the method was defined as values.
- If the name of a super class is not recognized, the corresponding
- entry in the list of super classes is not a class instance but a
- string giving the name of the super class. Since import statements
- are recognized and imported modules are scanned as well, this
- shouldn't happen often.
-
- A function is described by the class Function in this module.
- Instances of this class have the following instance variables:
- module -- the module name
- name -- the name of the class
- file -- the file in which the class was defined
- lineno -- the line in the file on which the class statement occurred
- """
-
- import sys
- import imp
- import tokenize
- from token import NAME, DEDENT, OP
- from operator import itemgetter
-
- __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
-
- _modules = {} # cache of modules we've seen
-
- # each Python class is represented by an instance of this class
- class Class:
- '''Class to represent a Python class.'''
- def __init__(self, module, name, super, file, lineno):
- self.module = module
- self.name = name
- if super is None:
- super = []
- self.super = super
- self.methods = {}
- self.file = file
- self.lineno = lineno
-
- def _addmethod(self, name, lineno):
- self.methods[name] = lineno
-
- class Function:
- '''Class to represent a top-level Python function'''
- def __init__(self, module, name, file, lineno):
- self.module = module
- self.name = name
- self.file = file
- self.lineno = lineno
-
- def readmodule(module, path=None):
- '''Backwards compatible interface.
-
- Call readmodule_ex() and then only keep Class objects from the
- resulting dictionary.'''
-
- res = {}
- for key, value in _readmodule(module, path or []).items():
- if isinstance(value, Class):
- res[key] = value
- return res
-
- def readmodule_ex(module, path=None):
- '''Read a module file and return a dictionary of classes.
-
- Search for MODULE in PATH and sys.path, read and parse the
- module and return a dictionary with one entry for each class
- found in the module.
- '''
- return _readmodule(module, path or [])
-
- def _readmodule(module, path, inpackage=None):
- '''Do the hard work for readmodule[_ex].
-
- If INPACKAGE is given, it must be the dotted name of the package in
- which we are searching for a submodule, and then PATH must be the
- package search path; otherwise, we are searching for a top-level
- module, and PATH is combined with sys.path.
- '''
- # Compute the full module name (prepending inpackage if set)
- if inpackage is not None:
- fullmodule = "%s.%s" % (inpackage, module)
- else:
- fullmodule = module
-
- # Check in the cache
- if fullmodule in _modules:
- return _modules[fullmodule]
-
- # Initialize the dict for this module's contents
- dict = {}
-
- # Check if it is a built-in module; we don't do much for these
- if module in sys.builtin_module_names and inpackage is None:
- _modules[module] = dict
- return dict
-
- # Check for a dotted module name
- i = module.rfind('.')
- if i >= 0:
- package = module[:i]
- submodule = module[i+1:]
- parent = _readmodule(package, path, inpackage)
- if inpackage is not None:
- package = "%s.%s" % (inpackage, package)
- return _readmodule(submodule, parent['__path__'], package)
-
- # Search the path for the module
- f = None
- if inpackage is not None:
- f, fname, (_s, _m, ty) = imp.find_module(module, path)
- else:
- f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
- if ty == imp.PKG_DIRECTORY:
- dict['__path__'] = [fname]
- path = [fname] + path
- f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
- _modules[fullmodule] = dict
- if ty != imp.PY_SOURCE:
- # not Python source, can't do anything with this module
- f.close()
- return dict
-
- stack = [] # stack of (class, indent) pairs
-
- g = tokenize.generate_tokens(f.readline)
- try:
- for tokentype, token, start, _end, _line in g:
- if tokentype == DEDENT:
- lineno, thisindent = start
- # close nested classes and defs
- while stack and stack[-1][1] >= thisindent:
- del stack[-1]
- elif token == 'def':
- lineno, thisindent = start
- # close previous nested classes and defs
- while stack and stack[-1][1] >= thisindent:
- del stack[-1]
- tokentype, meth_name, start = g.next()[0:3]
- if tokentype != NAME:
- continue # Syntax error
- if stack:
- cur_class = stack[-1][0]
- if isinstance(cur_class, Class):
- # it's a method
- cur_class._addmethod(meth_name, lineno)
- # else it's a nested def
- else:
- # it's a function
- dict[meth_name] = Function(fullmodule, meth_name,
- fname, lineno)
- stack.append((None, thisindent)) # Marker for nested fns
- elif token == 'class':
- lineno, thisindent = start
- # close previous nested classes and defs
- while stack and stack[-1][1] >= thisindent:
- del stack[-1]
- tokentype, class_name, start = g.next()[0:3]
- if tokentype != NAME:
- continue # Syntax error
- # parse what follows the class name
- tokentype, token, start = g.next()[0:3]
- inherit = None
- if token == '(':
- names = [] # List of superclasses
- # there's a list of superclasses
- level = 1
- super = [] # Tokens making up current superclass
- while True:
- tokentype, token, start = g.next()[0:3]
- if token in (')', ',') and level == 1:
- n = "".join(super)
- if n in dict:
- # we know this super class
- n = dict[n]
- else:
- c = n.split('.')
- if len(c) > 1:
- # super class is of the form
- # module.class: look in module for
- # class
- m = c[-2]
- c = c[-1]
- if m in _modules:
- d = _modules[m]
- if c in d:
- n = d[c]
- names.append(n)
- super = []
- if token == '(':
- level += 1
- elif token == ')':
- level -= 1
- if level == 0:
- break
- elif token == ',' and level == 1:
- pass
- # only use NAME and OP (== dot) tokens for type name
- elif tokentype in (NAME, OP) and level == 1:
- super.append(token)
- # expressions in the base list are not supported
- inherit = names
- cur_class = Class(fullmodule, class_name, inherit,
- fname, lineno)
- if not stack:
- dict[class_name] = cur_class
- stack.append((cur_class, thisindent))
- elif token == 'import' and start[1] == 0:
- modules = _getnamelist(g)
- for mod, _mod2 in modules:
- try:
- # Recursively read the imported module
- if inpackage is None:
- _readmodule(mod, path)
- else:
- try:
- _readmodule(mod, path, inpackage)
- except ImportError:
- _readmodule(mod, [])
- except:
- # If we can't find or parse the imported module,
- # too bad -- don't die here.
- pass
- elif token == 'from' and start[1] == 0:
- mod, token = _getname(g)
- if not mod or token != "import":
- continue
- names = _getnamelist(g)
- try:
- # Recursively read the imported module
- d = _readmodule(mod, path, inpackage)
- except:
- # If we can't find or parse the imported module,
- # too bad -- don't die here.
- continue
- # add any classes that were defined in the imported module
- # to our name space if they were mentioned in the list
- for n, n2 in names:
- if n in d:
- dict[n2 or n] = d[n]
- elif n == '*':
- # don't add names that start with _
- for n in d:
- if n[0] != '_':
- dict[n] = d[n]
- except StopIteration:
- pass
-
- f.close()
- return dict
-
- def _getnamelist(g):
- # Helper to get a comma-separated list of dotted names plus 'as'
- # clauses. Return a list of pairs (name, name2) where name2 is
- # the 'as' name, or None if there is no 'as' clause.
- names = []
- while True:
- name, token = _getname(g)
- if not name:
- break
- if token == 'as':
- name2, token = _getname(g)
- else:
- name2 = None
- names.append((name, name2))
- while token != "," and "\n" not in token:
- token = g.next()[1]
- if token != ",":
- break
- return names
-
- def _getname(g):
- # Helper to get a dotted name, return a pair (name, token) where
- # name is the dotted name, or None if there was no dotted name,
- # and token is the next input token.
- parts = []
- tokentype, token = g.next()[0:2]
- if tokentype != NAME and token != '*':
- return (None, token)
- parts.append(token)
- while True:
- tokentype, token = g.next()[0:2]
- if token != '.':
- break
- tokentype, token = g.next()[0:2]
- if tokentype != NAME:
- break
- parts.append(token)
- return (".".join(parts), token)
-
- def _main():
- # Main program for testing.
- import os
- mod = sys.argv[1]
- if os.path.exists(mod):
- path = [os.path.dirname(mod)]
- mod = os.path.basename(mod)
- if mod.lower().endswith(".py"):
- mod = mod[:-3]
- else:
- path = []
- dict = readmodule_ex(mod, path)
- objs = dict.values()
- objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
- getattr(b, 'lineno', 0)))
- for obj in objs:
- if isinstance(obj, Class):
- print "class", obj.name, obj.super, obj.lineno
- methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
- for name, lineno in methods:
- if name != "__path__":
- print " def", name, lineno
- elif isinstance(obj, Function):
- print "def", obj.name, obj.lineno
-
- if __name__ == "__main__":
- _main()
-