home *** CD-ROM | disk | FTP | other *** search
/ PC World 2001 August / PCWorld_2001-08_cd.bin / Komunikace / phptriad / phptriadsetup2-11.exe / php / pear / PHPDoc / parser / PhpdocParserRegExp.php < prev    next >
PHP Script  |  2001-02-18  |  28KB  |  492 lines

  1. <?php
  2. /**
  3. * Defines all regular expressions.
  4. *
  5. * This class defines all regular expressions. To make the 
  6. * configuration and customization of PHPDoc as simple as
  7. * possible I decided to define all regular expressions in one class.
  8. * From a programming point of view there's no need to do so. 
  9. *
  10. * @version  $Id: PhpdocParserRegExp.php,v 1.5 2001/02/18 14:45:28 uw Exp $
  11. */
  12. class PhpdocParserRegExp extends PhpdocObject {
  13.  
  14.     /**
  15.     * Array of phpdoc tags, indexed by the tagname.
  16.     *
  17.     * ... grepping information is really not a parser. Don't 
  18.     * change the order the tags are listed. If you introduce
  19.     * new tags write the long variant of the tagname (parameter)
  20.     * in front of the shortcut (param).
  21.     *
  22.     * @var  array   List of all PHPDoc documentation tags.
  23.     */
  24.     var $PHPDOC_TAGS = array(
  25.                                 "@parameter"    => '@param[eter] (object objectname|type) [$varname] [description]',
  26.                                 "@param"        => '@param[eter] (object objectname|type) [$varname] [description]',
  27.                                 
  28.                                 "@return"       => '@return    (object objectname|type) [$varname] [description]',
  29.                                 
  30.                                 "@access"       => '@access',
  31.                                 "@abstract"     => '@abstract',
  32.                                 "@static"       => '@static',
  33.                                 "@final"        => '@final',
  34.                                 
  35.                                 "@throws"       => '@throws exception [, exception]',
  36.                                 
  37.                                 "@see"          => '@see (function()|$varname|(module|class)(function()|$varname)) [, (funtion()|$varname|(module|class)(function()|$varname))]',
  38.                                 "@link"         => '@link URL [description]',
  39.                                 
  40.                                 "@var"          => '@var     (object objectname|type) [$varname]',
  41.                                 "@global"       => '@global (object objectname|type) $varname [description]',
  42.                                 
  43.                                 "@constant"     => '@const[ant] label [description]',
  44.                                 "@const"        => '@const[ant] label [description]',
  45.                                 
  46.                                 "@author"       => '@author Name [<email>] [, Name [<email>]',
  47.                                 "@copyright"    => '@copyright description',
  48.                                 
  49.                                 "@version"      => '@version label',
  50.                                 "@since"        => '@since label',
  51.     
  52.                                 "@deprecated"   => '@deprec[ated] description',                                                        
  53.                                 "@deprec"       => '@deprec[ated] description',
  54.                                 
  55.                                 "@brother"      => '@(brother|sister) (function()|$varname)',
  56.                                 "@sister"       => '@(brother|sister) (function()|$varname)',
  57.                                                                                         
  58.                                 "@include"      => '@include description',    
  59.                                 
  60.                                 "@exclude"      => '@exclude label',
  61.                                 
  62.                                 "@modulegroup"  => '@modulegroup label',
  63.                                 "@module"       => '@module label',
  64.                                 
  65.                                 "@package"      => '@package label',
  66.                                 
  67.                                 "@magic"        => '@magic description',
  68.                                 "@todo"         => '@todo description'
  69.                             );
  70.  
  71.     /**
  72.     * Basis regular expressions used to compose complex expressions to grep doc comments.
  73.     *
  74.     * PHPDoc tries to compose all complex regular expressions
  75.     * from a list of basic ones. This array contains all expressions
  76.     * used grep complex doc comments and the surrounding keywords.
  77.     *
  78.     * @var  array   List of basic regular expressions matching parts of doc comments: 
  79.     *               module names, module separator, vartypes, accesstypes.
  80.     * @final
  81.     * @see    buildComplexRegExps(), $C_COMPLEX
  82.     */
  83.     var $C_BASE = array(
  84.                         #"block"            => '/\*\*((?:(?!\*).)*(?:\n(?!\s*\*/)\s*\*(?:(?!\*/).)*)*)\*/',
  85.                         "module"            => "[^\s]+",
  86.                         "module_separator"  => "::",
  87.                         "module_tags"       => "(@modulegroup|@module)",
  88.                                                
  89.                         "vartype"           => "(string|integer|int|long|real|double|float|boolean|bool|mixed|array|object)",
  90.                         "access"            => "(private|public)"
  91.                     );
  92.  
  93.     /**
  94.     * List of regular expressions used to grep complex doc comments.
  95.     * 
  96.     * As with $PHP_COMPLEX all complex expressions are build using basic
  97.     * ones in buildComplexRegExps().
  98.     *
  99.     * @var    array Regular expressions matching see and optional objectnames.
  100.     * @final
  101.     * @see    buildComplexRegexps(), $C_BASE
  102.     */                                                            
  103.     var $C_COMPLEX = array(                                                            
  104.                             "objectname_optional"       => "",
  105.                                             
  106.                             "see_var"                   => "",
  107.                             "see_function"              => "",
  108.                             "see_moduleclass"           => "",
  109.                             
  110.                             "module_doc"                => "",
  111.                             "module_tags"               => "",
  112.                             "module_separator"          => "",
  113.                             "module_separator_len"      => 0,
  114.                             "module_separator_len_neg"  => 0
  115.                             
  116.                         );
  117.     
  118.     /**
  119.     * Basic RegExps used to analyse PHP Code.
  120.     *
  121.     * PHPDoc tries to compose all complex regular expressions
  122.     * from some basic expressions. This array contains
  123.     * all expressions used to build $PHP_COMPLEX. 
  124.     * There're some differences to the RegExps in zend-scanner.l, 
  125.     * e.g. I decided to write "\s+" instead of "[ \n\r\t]+" which
  126.     * should be identical as long as perl compatible regular 
  127.     * expressions are used. Another point is that I did not break 
  128.     * down numbers to LNUM/DNUM.
  129.     * 
  130.     * @var      array   List of basis regular expressions matching php code elements:
  131.     *                   spaces, optional spaces, linebreaks, labels, use (include and friends),
  132.     *                   optional argument assignment, boolean, several variable types.
  133.     * @final
  134.     * @see     $PHP_COMPLEX
  135.     */
  136.     var $PHP_BASE = array (
  137.  
  138.                             "space"                 => "\s+",
  139.                             "space_optional"        => "\s*",
  140.                             "break"                 => "[\n\r]",
  141.                             
  142.                             "php_open_long"         => "<\?php\s", # zend_scanner.l use {WHITESPACE} (space in our case) eighter. Might be slightly faster.
  143.                             "php_open_short"        => "<\?",
  144.                             "php_open_asp"          => "<%",
  145.                             "php_open_short_print"  => "<\?=",
  146.                             "php_open_asp_print"    => "<%=",
  147.                             
  148.                              # do not change the single quotes to double ones
  149.                             "label"                 => '[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\xzf-\xff]*', 
  150.                             "use"                   => "(include_once|include|require_once|require)",
  151.                             "assignment"            => "\s*([,=])\s*",
  152.                             
  153.                             "boolean"               => "(true|false)",
  154.                             
  155.                             "string"                => "[^\s]+",
  156.                             "string_enclosed"       => "(['\"])(?:\\\\\\1|[^\\1])*?\\1",
  157.  
  158.                             "int_oct"               => "[+-]?\s*0[0-7]+",
  159.                             "int_hex"               => "[+-]?\s*0[xX][0-9A-Fa-f]+",
  160.                             
  161.                             "float"                 => "[+-]?\s*\d*\.\d+",
  162.                             "float_exponent"        => "[+-]?\s*\d*(?:\.\d+)*[eE][+-]?\d+",
  163.                             
  164.                             "number"                => "[+-]?\s*\d+",
  165.                             
  166.                             "array"                 => "array\s*\(",
  167.                             "empty_array"           => "array\s*\(\s*\)\s*"
  168.                         );
  169.  
  170.     /**
  171.     * List of regular expressions used to grep complex php code elements.
  172.     *
  173.     * The RegExp of the variable types is slightly changed to that
  174.     * one in $PHP_BASE, getVariableTypeAndValue() needs this.
  175.     * "undoc_*" is used to grep all keywords those who have a doc 
  176.     * comment in front and those without. See getPhodocParagraphs() 
  177.     * for more details on this.
  178.     *
  179.     * @var  array   RegExps to match: variablenames, functionnames, classnames,
  180.     *               class variable declarations, function declarations,
  181.     *               class declarations, defines, uses (include and friends), 
  182.     *               function arguments, several variables types. 
  183.     * @see    buildComplexRegExps(), getVariableTypeAndValue(), getPhpdocParagraphs(), $PHP_BASE
  184.     */                                                                    
  185.     var $PHP_COMPLEX = array (
  186.                                 "varname"               => "",
  187.                                 "functionname"          => "",
  188.                                 "classname"             => "",
  189.                                 
  190.                                 "php_open_script"       => "",
  191.                         
  192.                                 "var"                   => "",
  193.                                 "undoc_var"             => "",
  194.                                                 
  195.                                 "function"              => "",
  196.                                 "undoc_function"        => "",
  197.                                                 
  198.                                 "class"                 => "",
  199.                                 "undoc_class"           => "",
  200.                                                 
  201.                                 "class_extends"         => "",
  202.                                 "undoc_class_extends"   => "",
  203.                                                 
  204.                                 "const"                 => "",
  205.                                 "undoc_const"           => "",
  206.                                                 
  207.                                 "use"                   => "",
  208.                                 "undoc_use"             => "",
  209.                                             
  210.                                 "argument"              => "",
  211.                                 
  212.                                 "type_boolean"          => "",
  213.                                 
  214.                                 "type_string"                        => "",
  215.                                 "type_string_enclosed"  => "",
  216.                                 
  217.                                 "type_int_oct"          => "",
  218.                                 "type_int_hex"          => "",
  219.                                 
  220.                                 "type_float"            => "",
  221.                                 "type_float_exponent"   => "",
  222.                                 
  223.                                 "type_number"           => "",
  224.                                 
  225.                                 "type_array"            => "",
  226.                                 "type_empty_array"      => ""
  227.                             );                                                                    
  228.  
  229.     /**
  230.     * Array of RegExp matching the syntax of several complex tags.
  231.     *
  232.     * The array is filled by the constructor.
  233.     *
  234.     * @var  array   Used to analyse return, var, param, 
  235.     *               global, see and to find tags in general
  236.     * @see  PhpdocParserObject()
  237.     */
  238.     var $TAGS = array ( 
  239.                         "return"        => "", 
  240.                         "var"           => "", # @var, @param
  241.                         "global"        => "", 
  242.                         "access"        => "", 
  243.                         
  244.                         "module"        => "", # @module, @modulegroup
  245.                         
  246.                         "const"         => "", # @const, @constant
  247.                         
  248.                         "see_var"       => "", # @see
  249.                         "see_function"  => "", # @see
  250.                         "see_class"     => "", # @see
  251.                         "see_module"    => "", # @see
  252.                         
  253.                         "link"          => "@([^\s]+)(.*)@is", # @link
  254.                         
  255.                         "brother"       => "",
  256.                         
  257.                         "author"        => "<\s*([a-z]([-a-z0-9_.])*@([-a-z0-9_]*\.)+[a-z]{2,})\s*>", # @author <email> part
  258.                         
  259.                         "all"           => ""     # list of all known tags
  260.                     );
  261.  
  262.     /**
  263.     * Builds complex regular expressions for the parser.
  264.     *
  265.     * PHPDoc has a small set of basic regular expressions. All complex
  266.     * regular expressions are made out of the basic ones. The composition 
  267.     * in done in this method. Note: every derived class must 
  268.     * call this method in it's constructor!
  269.     * @see    $PHP_BASE, $PHP_COMPLEX, $C_BASE, $C_COMPLEX
  270.     */                                                    
  271.     function buildComplexRegExps() {
  272.     
  273.         //
  274.         // Do not change the order of the variable initializations there're dependencies.
  275.         // It starts with some php names.
  276.         // 
  277.         
  278.         // some names
  279.         $this->PHP_COMPLEX["varname"] = sprintf("[&]?[$]%s", $this->PHP_BASE["label"] );
  280.         $this->PHP_COMPLEX["functionname"] = sprintf("[&]?%s", $this->PHP_BASE["label"]    );
  281.         $this->PHP_COMPLEX["classname"] = $this->PHP_BASE["label"];                    
  282.         
  283.         // 
  284.         // Now build all regexps used to grep doc comment elements.
  285.         // 
  286.         
  287.         // optional object name
  288.          $this->C_COMPLEX["objectname_optional"] = sprintf("(?:object%s%s)?", 
  289.                                                             $this->PHP_BASE["space"],
  290.                                                             $this->PHP_COMPLEX["classname"] 
  291.                                                         );
  292.         
  293.         $this->C_COMPLEX["module_separator"] = sprintf("(?:%s)", $this->C_BASE["module_separator"]);
  294.         $this->C_COMPLEX["module_separator_len"] = strlen($this->C_BASE["module_separator"]);
  295.         $this->C_COMPLEX["module_separator_len_neg"] = -1*strlen($this->C_BASE["module_separator"]);
  296.  
  297.         // References to other elements
  298.         $this->C_COMPLEX["see_var"] = sprintf("(%s%s)?([$][^:]%s)",
  299.                                                 $this->C_BASE["module"],
  300.                                                 $this->C_COMPLEX["module_separator"],
  301.                                                 $this->PHP_BASE["label"]
  302.                                             );
  303.                                                                                         
  304.         $this->C_COMPLEX["see_function"] = sprintf("(%s%s)?([^:]%s\(%s\))",
  305.                                                     $this->C_BASE["module"],
  306.                                                     $this->C_COMPLEX["module_separator"],
  307.                                                     $this->PHP_BASE["label"],
  308.                                                     $this->PHP_BASE["space_optional"]
  309.                                                 );
  310.  
  311.         $this->C_COMPLEX["see_moduleclass"] = sprintf("(%s)",  $this->C_BASE["module"]    );
  312.  
  313.         //
  314.         // RegExps used to grep certain php code elements.
  315.         //
  316.         
  317.         // var statements
  318.         $this->PHP_COMPLEX["var"] =  sprintf("|^%svar%s([$]%s)%s(=?)|is",
  319.                                                 $this->PHP_BASE["space_optional"],
  320.                                                 $this->PHP_BASE["space"],
  321.                                                 $this->PHP_BASE["label"],
  322.                                                 $this->PHP_BASE["space_optional"],
  323.                                                 $this->PHP_BASE["space_optional"]
  324.                                         );    
  325.         $this->PHP_COMPLEX["undoc_var"] = sprintf("|%s|isS", substr($this->PHP_COMPLEX["var"], 2, -3) );
  326.  
  327.         // function statements
  328.         $this->PHP_COMPLEX["function"] = sprintf("|^%sfunction%s(%s)%s\(|is",
  329.                                                 $this->PHP_BASE["space_optional"],
  330.                                                 $this->PHP_BASE["space"],
  331.                                                 $this->PHP_COMPLEX["functionname"],
  332.                                                 $this->PHP_BASE["space_optional"]
  333.                                             );                                                                         
  334.         $this->PHP_COMPLEX["undoc_function"] = sprintf("|%s|isS",    substr($this->PHP_COMPLEX["function"], 2, -3) );
  335.  
  336.         // class statements
  337.         $this->PHP_COMPLEX["class"] = sprintf("|^%sclass%s(%s)%s{|is",
  338.                                                 $this->PHP_BASE["space_optional"],
  339.                                                 $this->PHP_BASE["space"],
  340.                                                 $this->PHP_COMPLEX["classname"],
  341.                                                 $this->PHP_BASE["space_optional"]
  342.                                             );                                    
  343.         $this->PHP_COMPLEX["undoc_class"] = sprintf("|%s|isS", substr($this->PHP_COMPLEX["class"], 2, -3) );
  344.         
  345.         $this->PHP_COMPLEX["class_extends"] = sprintf("|^%sclass%s(%s)%sextends%s(%s)%s{|is",
  346.                                                         $this->PHP_BASE["space_optional"],    
  347.                                                         $this->PHP_BASE["space"],
  348.                                                         $this->PHP_COMPLEX["classname"],
  349.                                                         $this->PHP_BASE["space"],
  350.                                                         $this->PHP_BASE["space"],
  351.                                                         $this->PHP_COMPLEX["classname"],
  352.                                                         $this->PHP_BASE["space_optional"]
  353.                                                     );        
  354.         $this->PHP_COMPLEX["undoc_class_extends"] = sprintf("|%s|isS", substr($this->PHP_COMPLEX["class_extends"], 2, -3) );
  355.         
  356.         // 
  357.         // RegExp used to grep define statements.
  358.         // NOTE: the backticks do not allow the usage of $this->PHP_BASE
  359.         //
  360.         $this->PHP_COMPLEX["const"] = sprintf("@^%sdefine%s\(%s(%s)%s,%s(%s)%s(?:,%s(%s))?%s\)%s;@is", 
  361.                                                 $this->PHP_BASE["space_optional"],
  362.                                                 $this->PHP_BASE["space_optional"],
  363.                                                 $this->PHP_BASE["space_optional"],
  364.                                                 "[$]?\w[\w-_]*|(['\"])(?:\\\\\\2|[^\\2])*?\\2",
  365.                                                 $this->PHP_BASE["space_optional"],
  366.                                                 $this->PHP_BASE["space_optional"],
  367.                                                 "(['\"])(?:\\\\\\4|[^\\4])*?\\4|(?:true|false)|[+-]?\s*0[0-7]+|[+-]?\s*0[xX][0-9A-Fa-f]+|[+-]?\s*\d*(?:\.\d+)*[eE][+-]?\d+|[+-]?\s*\d*\.\d+|[+-]?\s*\d+|&?[$]?\w[\w-_]*",
  368.                                                 $this->PHP_BASE["space_optional"],
  369.                                                 $this->PHP_BASE["space_optional"],
  370.                                                 "(?:true|false)|[+-]?\s*0[0-7]+|[+-]?\s*0[xX][0-9A-Fa-f]+|[+-]?\s*\d*(?:\.\d+)*[eE][+-]?\d+|[+-]?\s*\d*\.\d+|[+-]?\s*\d+|&?[$]?\w[\w-_]*|(['])(?:\\\\\\6|[^\\6])*?\\6",
  371.                                                 $this->PHP_BASE["space_optional"],
  372.                                                 $this->PHP_BASE["space_optional"]
  373.                                             );        
  374.         $this->PHP_COMPLEX["undoc_const"] = sprintf("@%s@isS", substr($this->PHP_COMPLEX["const"], 2, -3) );
  375.         
  376.         //
  377.         // include, include_once, require, require_once and friends 
  378.         //
  379. // ? removed!
  380.         $this->PHP_COMPLEX["use"] = sprintf("@^%s%s[\(]%s((['\"])((?:\\\\\\3|[^\\3])*?)\\3|([^\s]+))%s[\)]%s;@is",
  381.                                                     $this->PHP_BASE["use"],
  382.                                                     $this->PHP_BASE["space_optional"],
  383.                                                     $this->PHP_BASE["space_optional"],
  384.                                                     $this->PHP_BASE["space_optional"],
  385.                                                     $this->PHP_BASE["space_optional"]
  386.                                             );
  387.         $this->PHP_COMPLEX["undoc_use"] = sprintf("@%s@isS", substr($this->PHP_COMPLEX["use"], 2, -3) );
  388.                         
  389.         //                                        
  390.         // Variable name with an optional assignment operator. This one is used
  391.         // to analyse function heads [parameter lists] as well as class variable
  392.         // declarations.
  393.         //
  394.         $this->PHP_COMPLEX["argument"] = sprintf("|(%s)(%s)?|s", 
  395.                                                     $this->PHP_COMPLEX["varname"],
  396.                                                     $this->PHP_BASE["assignment"]
  397.                                             );
  398.  
  399.  
  400.         //
  401.         // <script language="php"> syntax
  402.         //                                                                
  403.         $this->PHP_COMPLEX["php_open_script"] = sprintf("<script%slanguage%s=%s[\"']php[\"']%s>",
  404.                                                         $this->PHP_BASE["space"],
  405.                                                         $this->PHP_BASE["space_optional"],
  406.                                                         $this->PHP_BASE["space_optional"],
  407.                                                         $this->PHP_BASE["space_optional"]
  408.                                                     );
  409.  
  410.         $this->PHP_COMPLEX["php_open_all"] = sprintf("(?:%s|%s|%s|%s|%s|%s)",
  411.                                                     $this->PHP_BASE["php_open_long"],
  412.                                                     $this->PHP_BASE["php_open_short"],
  413.                                                     $this->PHP_BASE["php_open_asp"],
  414.                                                     $this->PHP_BASE["php_open_short_print"],
  415.                                                     $this->PHP_BASE["php_open_asp_print"],
  416.                                                     $this->PHP_COMPLEX["php_open_script"]
  417.                                                 );
  418.  
  419.         $this->C_COMPLEX["module_doc"] = sprintf("@^%s%s%s/\*\*@is", 
  420.                                                     $this->PHP_BASE["space_optional"],
  421.                                                     $this->PHP_COMPLEX["php_open_all"],
  422.                                                     $this->PHP_BASE["space_optional"]
  423.                                                 );
  424.  
  425.         $this->C_COMPLEX["module_tags"] = sprintf("/%s/is", $this->C_BASE["module_tags"] );
  426.  
  427.         //
  428.         // RegExp used to grep variables types
  429.         //
  430.         $elements = array( 
  431.                             "boolean", "string", "string_enclosed", 
  432.                             "int_oct", "int_hex", "float", "float_exponent", 
  433.                             "number", "array", "empty_array" 
  434.                         );
  435.         reset($elements);
  436.         while (list($key, $name)=each($elements)) 
  437.             $this->PHP_COMPLEX["type_".$name] = sprintf("@^%s@", $this->PHP_BASE[$name]);
  438.                                                                             
  439.         // 
  440.         // Regular expressions used to analyse phpdoc tags.
  441.         // 
  442.         $this->TAGS["var"] = sprintf("/%s(?:%s(%s))?(?:%s(%s))?%s(.*)?/is",
  443.                                         $this->C_BASE["vartype"],
  444.                                         $this->PHP_BASE["space"],
  445.                                         $this->PHP_BASE["label"],
  446.                                         $this->PHP_BASE["space"],
  447.                                         $this->PHP_COMPLEX["varname"],
  448.                                         $this->PHP_BASE["space_optional"]
  449.                                     );    
  450.         $this->TAGS["return"] = $this->TAGS["var"];            
  451.                                                         
  452.         $this->TAGS["global"] = sprintf("/%s%s(%s)%s(%s)%s(.*)/is",
  453.                                         $this->C_BASE["vartype"],
  454.                                         $this->PHP_BASE["space_optional"],
  455.                                         $this->C_COMPLEX["objectname_optional"],
  456.                                         $this->PHP_BASE["space"],
  457.                                         $this->PHP_COMPLEX["varname"],
  458.                                         $this->PHP_BASE["space_optional"]
  459.                                     );    
  460.                                                         
  461.         $this->TAGS["brother"] = sprintf("/(%s\(\)|\$%s)/is", 
  462.                                         $this->PHP_BASE["label"],
  463.                                         $this->PHP_BASE["label"]
  464.                                     );
  465.         
  466.         $this->TAGS["const"] = sprintf("/(%s)%s(.*)?/is",
  467.                                         $this->PHP_BASE["label"],
  468.                                         $this->PHP_BASE["space_optional"]
  469.                                     );
  470.                                                         
  471.         $this->TAGS["access"] = sprintf("/%s/is", $this->C_BASE["access"]);
  472.         $this->TAGS["module"] = sprintf("/%s/is", $this->PHP_BASE["label"]);
  473.         
  474.         $this->TAGS["author"] = sprintf("/%s/is", $this->TAGS["author"]);
  475.         
  476.         $all_tags = "";                                            
  477.         reset($this->PHPDOC_TAGS);                                                        
  478.         while (list($tag, $v)=each($this->PHPDOC_TAGS))
  479.             $all_tags.= substr($tag, 1)."|";
  480.         $all_tags = substr($all_tags, 0, -1);
  481.         
  482.         $this->TAGS["all"] = "/@($all_tags)/is";
  483.         
  484.         $elements = array ( "see_function", "see_var", "see_moduleclass" );
  485.         reset($elements);
  486.         while (list($k, $index)=each($elements))
  487.             $this->TAGS[$index] = sprintf("/%s/is", $this->C_COMPLEX[$index]);
  488.  
  489.     } // end func buildComplexRegExps
  490.     
  491. } // end class PhpdocParserRegExp
  492. ?>