home *** CD-ROM | disk | FTP | other *** search
/ PC World 2001 April / PCWorld_2001-04_cd.bin / Software / TemaCD / webclean / !!!python!!! / BeOpen-Python-2.0.exe / SHLEX.PY < prev    next >
Encoding:
Python Source  |  2000-09-28  |  7.1 KB  |  191 lines

  1. """A lexical analyzer class for simple shell-like syntaxes."""
  2.  
  3. # Module and documentation by Eric S. Raymond, 21 Dec 1998 
  4. # Input stacking and error message cleanup added by ESR, March 2000
  5.  
  6. import os.path
  7. import sys
  8.  
  9.  
  10. class shlex:
  11.     "A lexical analyzer class for simple shell-like syntaxes." 
  12.     def __init__(self, instream=None, infile=None):
  13.         if instream:
  14.             self.instream = instream
  15.             self.infile = infile
  16.         else:
  17.             self.instream = sys.stdin
  18.             self.infile = None
  19.         self.commenters = '#'
  20.         self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
  21.                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
  22.         self.whitespace = ' \t\r\n'
  23.         self.quotes = '\'"'
  24.         self.state = ' '
  25.         self.pushback = [];
  26.         self.lineno = 1
  27.         self.debug = 0
  28.         self.token = ''
  29.         self.filestack = []
  30.         self.source = None
  31.         if self.debug:
  32.             print 'shlex: reading from %s, line %d' \
  33.                   % (self.instream, self.lineno)
  34.  
  35.     def push_token(self, tok):
  36.         "Push a token onto the stack popped by the get_token method"
  37.         if self.debug >= 1:
  38.             print "shlex: pushing token " + `tok`
  39.         self.pushback = [tok] + self.pushback;
  40.  
  41.     def get_token(self):
  42.         "Get a token from the input stream (or from stack if it's nonempty)"
  43.         if self.pushback:
  44.             tok = self.pushback[0]
  45.             self.pushback = self.pushback[1:]
  46.             if self.debug >= 1:
  47.                 print "shlex: popping token " + `tok`
  48.             return tok
  49.         # No pushback.  Get a token.
  50.         raw = self.read_token()
  51.         # Handle inclusions
  52.         while raw == self.source:
  53.             (newfile, newstream) = self.sourcehook(self.read_token())
  54.             self.filestack.insert(0, (self.infile, self.instream, self.lineno))
  55.             self.infile = newfile
  56.             self.instream = newstream
  57.             self.lineno = 1
  58.             if self.debug:
  59.                 print 'shlex: pushing to file %s' % (self.infile,)
  60.             raw = self.get_token()
  61.         # Maybe we got EOF instead?
  62.         while raw == "":
  63.             if len(self.filestack) == 0:
  64.                 return ""
  65.             else:
  66.                 self.instream.close()
  67.                 (self.infile, self.instream, self.lineno) = self.filestack[0]
  68.                 self.filestack = self.filestack[1:]
  69.                 if self.debug:
  70.                     print 'shlex: popping to %s, line %d' \
  71.                           % (self.instream, self.lineno)
  72.                 self.state = ' '
  73.                 raw = self.get_token()
  74.          # Neither inclusion nor EOF
  75.         if self.debug >= 1:
  76.             if raw:
  77.                 print "shlex: token=" + `raw`
  78.             else:
  79.                 print "shlex: token=EOF"
  80.         return raw
  81.  
  82.     def read_token(self):
  83.         "Read a token from the input stream (no pushback or inclusions)"
  84.         tok = ''
  85.         while 1:
  86.             nextchar = self.instream.read(1);
  87.             if nextchar == '\n':
  88.                 self.lineno = self.lineno + 1
  89.             if self.debug >= 3:
  90.                 print "shlex: in state", repr(self.state), \
  91.                       "I see character:", repr(nextchar) 
  92.             if self.state is None:
  93.                 self.token = '';        # past end of file
  94.                 break
  95.             elif self.state == ' ':
  96.                 if not nextchar:
  97.                     self.state = None;  # end of file
  98.                     break
  99.                 elif nextchar in self.whitespace:
  100.                     if self.debug >= 2:
  101.                         print "shlex: I see whitespace in whitespace state"
  102.                     if self.token:
  103.                         break   # emit current token
  104.                     else:
  105.                         continue
  106.                 elif nextchar in self.commenters:
  107.                     self.instream.readline()
  108.                     self.lineno = self.lineno + 1
  109.                 elif nextchar in self.wordchars:
  110.                     self.token = nextchar
  111.                     self.state = 'a'
  112.                 elif nextchar in self.quotes:
  113.                     self.token = nextchar
  114.                     self.state = nextchar
  115.                 else:
  116.                     self.token = nextchar
  117.                     if self.token:
  118.                         break   # emit current token
  119.                     else:
  120.                         continue
  121.             elif self.state in self.quotes:
  122.                 self.token = self.token + nextchar
  123.                 if nextchar == self.state:
  124.                     self.state = ' '
  125.                     break
  126.             elif self.state == 'a':
  127.                 if not nextchar:
  128.                     self.state = None;  # end of file
  129.                     break
  130.                 elif nextchar in self.whitespace:
  131.                     if self.debug >= 2:
  132.                         print "shlex: I see whitespace in word state"
  133.                     self.state = ' '
  134.                     if self.token:
  135.                         break   # emit current token
  136.                     else:
  137.                         continue
  138.                 elif nextchar in self.commenters:
  139.                     self.instream.readline()
  140.                     self.lineno = self.lineno + 1
  141.                 elif nextchar in self.wordchars or nextchar in self.quotes:
  142.                     self.token = self.token + nextchar
  143.                 else:
  144.                     self.pushback = [nextchar] + self.pushback
  145.                     if self.debug >= 2:
  146.                         print "shlex: I see punctuation in word state"
  147.                     self.state = ' '
  148.                     if self.token:
  149.                         break   # emit current token
  150.                     else:
  151.                         continue
  152.         result = self.token
  153.         self.token = ''
  154.         if self.debug > 1:
  155.             if result:
  156.                 print "shlex: raw token=" + `result`
  157.             else:
  158.                 print "shlex: raw token=EOF"
  159.         return result
  160.  
  161.     def sourcehook(self, newfile):
  162.         "Hook called on a filename to be sourced."
  163.         if newfile[0] == '"':
  164.             newfile = newfile[1:-1]
  165.         # This implements cpp-like semantics for relative-path inclusion.
  166.         if type(self.infile) == type("") and not os.path.isabs(newfile):
  167.             newfile = os.path.join(os.path.dirname(self.infile), newfile)
  168.         return (newfile, open(newfile, "r"))
  169.  
  170.     def error_leader(self, infile=None, lineno=None):
  171.         "Emit a C-compiler-like, Emacs-friendly error-message leader."
  172.         if not infile:
  173.             infile = self.infile
  174.         if not lineno:
  175.             lineno = self.lineno
  176.         return "\"%s\", line %d: " % (infile, lineno)
  177.  
  178.  
  179. if __name__ == '__main__': 
  180.     if len(sys.argv) == 1:
  181.         lexer = shlex()
  182.     else:
  183.         file = sys.argv[1]
  184.         lexer = shlex(open(file), file)
  185.     while 1:
  186.         tt = lexer.get_token()
  187.         if tt:
  188.             print "Token: " + repr(tt)
  189.         else:
  190.             break
  191.