home *** CD-ROM | disk | FTP | other *** search
/ IRIX Base Documentation 2002 November / SGI IRIX Base Documentation 2002 November.iso / usr / share / catman / p_man / cat3w / wsregexp.z / wsregexp
Encoding:
Text File  |  2002-10-03  |  22.1 KB  |  327 lines

  1.  
  2.  
  3.  
  4. wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))                                                      wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))
  5.  
  6.  
  7.  
  8. NNNNAAAAMMMMEEEE
  9.      _wwww_ssss_rrrr_eeee_gggg_eeee_xxxx_pppp:  _wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee, _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp, _wwww_ssss_rrrr_eeee_mmmm_aaaa_tttt_cccc_hhhh, _wwww_ssss_rrrr_eeee_eeee_rrrr_rrrr - Wide character
  10.      based regular expression compile and match routines
  11.  
  12. SSSSYYYYNNNNOOOOPPPPSSSSIIIISSSS
  13.      _####_iiii_nnnn_cccc_llll_uuuu_dddd_eeee _<<<<_wwww_ssss_rrrr_eeee_gggg_eeee_xxxx_pppp_...._hhhh_>>>>
  14.      _####_iiii_nnnn_cccc_llll_uuuu_dddd_eeee _<<<<_wwww_iiii_dddd_eeee_cccc_...._hhhh_>>>>
  15.      _llll_oooo_nnnn_gggg _wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee_((((_ssss_tttt_rrrr_uuuu_cccc_tttt _rrrr_eeee_xxxx_dddd_aaaa_tttt_aaaa _****_pppp_rrrr_eeee_xxxx_,,,, _llll_oooo_nnnn_gggg _****_eeee_xxxx_pppp_bbbb_uuuu_ffff_,,,,
  16.                          _llll_oooo_nnnn_gggg _****_eeee_nnnn_dddd_bbbb_uuuu_ffff_,,,, _wwww_cccc_hhhh_aaaa_rrrr______tttt _eeee_oooo_ffff_))))_;;;;
  17.      _iiii_nnnn_tttt  _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp_((((_ssss_tttt_rrrr_uuuu_cccc_tttt _rrrr_eeee_xxxx_dddd_aaaa_tttt_aaaa _****_pppp_rrrr_eeee_xxxx_,,,, _wwww_cccc_hhhh_aaaa_rrrr______tttt _****_wwww_ssss_tttt_rrrr_,,,, _llll_oooo_nnnn_gggg _****_eeee_xxxx_pppp_bbbb_uuuu_ffff_))))_;;;;
  18.      _iiii_nnnn_tttt  _wwww_ssss_rrrr_eeee_mmmm_aaaa_tttt_cccc_hhhh_((((_ssss_tttt_rrrr_uuuu_cccc_tttt _rrrr_eeee_xxxx_dddd_aaaa_tttt_aaaa _****_pppp_rrrr_eeee_xxxx_,,,, _wwww_cccc_hhhh_aaaa_rrrr______tttt _****_wwww_ssss_tttt_rrrr_,,,, _llll_oooo_nnnn_gggg _****_eeee_xxxx_pppp_bbbb_uuuu_ffff_))))_;;;;
  19.      _cccc_hhhh_aaaa_rrrr _****_wwww_ssss_rrrr_eeee_eeee_rrrr_rrrr_((((_iiii_nnnn_tttt _eeee_rrrr_rrrr_))))_;;;;
  20. DDDDEEEESSSSCCCCRRRRIIIIPPPPTTTTIIIIOOOONNNN
  21.      These functions are general purpose internationalized regular expression
  22.      matching routines to be used in programs that perform regular expression
  23.      matching.  These functions are defined by the _wwww_ssss_rrrr_eeee_gggg_eeee_xxxx_pppp_...._hhhh header file.
  24.  
  25.      The function _wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee takes as input an internationalized regular
  26.      expression as defined below (apart from the normal regular expressions as
  27.      defined by _rrrr_eeee_gggg_eeee_xxxx_pppp) and produces a compiled expression that can be used
  28.      with _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp or _wwww_ssss_rrrr_eeee_mmmm_aaaa_tttt_cccc_hhhh.
  29.           _ssss_tttt_rrrr_uuuu_cccc_tttt _rrrr_eeee_xxxx_dddd_aaaa_tttt_aaaa _{{{{
  30.                _ssss_hhhh_oooo_rrrr_tttt     _ssss_eeee_dddd_;;;;      _////_**** _ffff_llll_aaaa_gggg _ffff_oooo_rrrr _ssss_eeee_dddd _****_////
  31.                _wwww_cccc_hhhh_aaaa_rrrr______tttt   _****_ssss_tttt_rrrr_;;;;     _////_**** _rrrr_eeee_gggg_uuuu_llll_aaaa_rrrr _eeee_xxxx_pppp_rrrr_eeee_ssss_ssss_iiii_oooo_nnnn _****_////
  32.                _iiii_nnnn_tttt       _eeee_rrrr_rrrr_;;;;      _////_**** _rrrr_eeee_tttt_uuuu_rrrr_nnnn_eeee_dddd _eeee_rrrr_rrrr_oooo_rrrr _cccc_oooo_dddd_eeee_,,,, _0000 _==== _nnnn_oooo _eeee_rrrr_rrrr_oooo_rrrr _****_////
  33.                _wwww_cccc_hhhh_aaaa_rrrr______tttt   _****_llll_oooo_cccc_1111_;;;;
  34.                _wwww_cccc_hhhh_aaaa_rrrr______tttt   _****_llll_oooo_cccc_2222_;;;;
  35.                _iiii_nnnn_tttt       _cccc_iiii_rrrr_cccc_ffff_;;;;
  36.                _...._...._....
  37.           _}}}}_;;;;
  38.  
  39.      The first parameter, _p_r_e_x, is a pointer to the specification of the
  40.      regular expression. _p_r_e_x->_s_e_d should be non-zero if sed style delimiter
  41.      syntax is to be adopted. _p_r_e_x->_s_t_r should point to the regular expression
  42.      that needs to be compiled. The regular expression string should be in
  43.      wide character format. _p_r_e_x->_e_r_r indicated any error during the
  44.      compilation and use of this regular expression.  _e_x_p_b_u_f points to the
  45.      place where the compiled regular expression will be placed. _e_n_d_b_u_f points
  46.      to the first long after the space where the compiled regular expression
  47.      may be placed.  (_e_n_d_b_u_f-_e_x_p_b_u_f) should be large enough for the compiled
  48.      regular expression to fit.  _e_o_f is the wide character which marks the end
  49.      of the regular expression.  This character is usually a _//// (slash).
  50.  
  51.      If _wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee was successful, it returns the pointer to the end of the
  52.      regular expression, _e_n_d_b_u_f. Otherwise, 0 is returned and the error code
  53.      is set in _p_r_e_x->_e_r_r.
  54.  
  55.  
  56.  
  57.  
  58.                                                                         PPPPaaaaggggeeee 1111
  59.  
  60.  
  61.  
  62.  
  63.  
  64.  
  65. wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))                                                      wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))
  66.  
  67.  
  68.  
  69.      The functions _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp and _wwww_ssss_rrrr_eeee_mmmm_aaaa_tttt_cccc_hhhh do pattern matching given a null
  70.      terminated wide character string _w_s_t_r and a compiled regular expression
  71.      _e_x_p_b_u_f as input. _e_x_p_b_u_f for these functions should be the compiled
  72.      regular expression which was obtained by a call to the function
  73.      _wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee.
  74.  
  75.      The function _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp returns non-zero if some substring of _w_s_t_r matches
  76.      the regular expression in _e_x_p_b_u_f and zero if there is no match.  The
  77.      function _wwww_ssss_rrrr_eeee_mmmm_aaaa_tttt_cccc_hhhh returns non-zero if a substring of _w_s_t_r starting from
  78.      the beginning matches the regular expression in _e_x_p_b_u_f and zero if there
  79.      is no match.  If there is a match, _p_r_e_x->_l_o_c_1 and _p_r_e_x->_l_o_c_2 are set.
  80.      _p_r_e_x->_l_o_c_1 points to the first wide character that matched the regular
  81.      expression; _p_r_e_x->_l_o_c_2 points to the wide character after the last wide
  82.      character that matches the regular expression.  Thus if the regular
  83.      expression matches the entire input string, _p_r_e_x->_l_o_c_1 will point to the
  84.      first wide character of _w_s_t_r and _p_r_e_x->_l_o_c_2 will point to the null at the
  85.      end of _w_s_t_r.
  86.  
  87.      _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp uses the variable _c_i_r_c_f of _s_t_r_u_c_t _r_e_x_d_a_t_a which is set by
  88.      _wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee if the regular expression begins with _^^^^ (caret). If this is
  89.      set then _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp will try to match the regular expression to the
  90.      beginning of the string only. If more than one regular expression is to
  91.      be compiled before the first is executed, the value of _p_r_e_x->_c_i_r_c_f should
  92.      be saved for each compiled expression and should be set to that saved
  93.      value before each call to _wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp.
  94.  
  95.      _wwww_ssss_rrrr_eeee_eeee_rrrr_rrrr returns the error message corresponding to the error code in the
  96.      language of the current locale. The error code _e_r_r should be one returned
  97.      by the wsregexp functions in the _e_r_r variable of _s_t_r_u_c_t _r_e_x_d_a_t_a.
  98.  
  99.      The internationalized regular expressions available for use with the
  100.      wsregexp functions are constructed as follows:
  101.  
  102.      _E_x_p_r_e_s_s_i_o_n  _M_e_a_n_i_n_g
  103.  
  104.      _c           the character _c where _c is not a special character.
  105.  
  106.      _[[[[_[[[[_::::_c_l_a_s_s_::::_]]]]_]]]] _c_l_a_s_s is any character type as defined by the _L_C__T_Y_P_E locale
  107.                  category. _c_l_a_s_s can be one of the following
  108.  
  109.                  _a_l_p_h_a   a letter
  110.  
  111.                  _u_p_p_e_r   an upper-case letter
  112.  
  113.                  _l_o_w_e_r   a lower-case letter
  114.  
  115.                  _d_i_g_i_t   a decimal digit
  116.  
  117.                  _x_d_i_g_i_t  a hexadecimal digit
  118.  
  119.  
  120.  
  121.  
  122.  
  123.  
  124.                                                                         PPPPaaaaggggeeee 2222
  125.  
  126.  
  127.  
  128.  
  129.  
  130.  
  131. wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))                                                      wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))
  132.  
  133.  
  134.  
  135.                  _a_l_n_u_m   an alphanumeric character
  136.  
  137.                  _s_p_a_c_e   any whitespace character
  138.  
  139.                  _p_u_n_c_t   a punctuation character
  140.  
  141.                  _p_r_i_n_t   a printable character
  142.  
  143.                  _g_r_a_p_h   a character that has a visible representation
  144.  
  145.                  _c_n_t_r_l   a control character
  146.  
  147.      _[[[[_[[[[_====_c_====_]]]]_]]]]     An equivalence class, or, any collation element defined as
  148.                  having the same relative order in the current collation
  149.                  sequence as _c.  As an example, if _AAAA and _aaaa belong to the same
  150.                  equivalence class, then both [[=_A=]_b]] and [[=_a=]_b]] are
  151.                  equivalent to [_A_a_b].
  152.  
  153.      _[[[[_[[[[_...._c_c_...._]]]]_]]]]    This represents a multi-character collating symbol.  Multi-
  154.                  character collating elements must be represented as collating
  155.                  symbols to distinguish them from single-character collating
  156.                  elements. As an example, if the string _a_b is a valid
  157.                  collating element, then [[._a_b.]] will be treated as an
  158.                  element and will match the same string of characters, while
  159.                  _a_b will match the list of characters _a and _b. If the multi-
  160.                  character collating symbol is not a valid collating element
  161.                  in the current collating sequence definition, the symbol will
  162.                  be treated as an invalid expression.
  163.  
  164.      _[[[[_[[[[_c_----_c_]]]]_]]]]     Any collation element in the character expression range _c-_c,
  165.                  where _c can identify a collating symbol or an equivalence
  166.                  class.  If the character _---- (hyphen) appears immediately after
  167.                  an opening square bracker, _e._g. [-_c], or immediately prior to
  168.                  a closing square bracket, _e._g. [_c-], it has no special
  169.                  meaning.
  170.  
  171.      Immediately following an opening square bracket ^ means the complement
  172.      of, _e._g. [^_c]. Otherwise, it has no special meaning.
  173.  
  174.      Within square brackets, a _.... that is not part of a [[._c_c.]]  sequence, or
  175.      a _:::: that is not part of a [[:_c_l_a_s_s:]] sequence, matches itself.
  176.  
  177. SSSSEEEEEEEE AAAALLLLSSSSOOOO
  178.      regexp(5)
  179.  
  180. DDDDIIIIAAAAGGGGNNNNOOOOSSSSTTTTIIIICCCCSSSS
  181.      Errors are:
  182.  
  183.           _EEEE_RRRR_RRRR______NNNN_OOOO_RRRR_MMMM_BBBB_RRRR               no remembered search string
  184.  
  185.  
  186.  
  187.  
  188.  
  189.  
  190.                                                                         PPPPaaaaggggeeee 3333
  191.  
  192.  
  193.  
  194.  
  195.  
  196.  
  197. wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))                                                      wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))
  198.  
  199.  
  200.  
  201.           _EEEE_RRRR_RRRR______RRRR_EEEE_OOOO_VVVV_FFFF_LLLL_OOOO_WWWW             regexp overflow
  202.                                    This happens when _wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee cant fit the
  203.                                    compiled regular expression in (_e_n_d_b_u_f-
  204.                                    _e_x_p_b_u_f).
  205.  
  206.           _EEEE_RRRR_RRRR______BBBB_RRRR_AAAA                  _(((( _)))) imbalance
  207.  
  208.           _EEEE_RRRR_RRRR______DDDD_EEEE_LLLL_IIII_MMMM                illegal or missing delimiter.
  209.  
  210.           _EEEE_RRRR_RRRR______NNNN_BBBB_RRRR                  bad number in _{{{{ _}}}}
  211.  
  212.           _EEEE_RRRR_RRRR______2222_MMMM_NNNN_BBBB_RRRR                more than 2 numbers given in _{{{{ _}}}}
  213.  
  214.           _EEEE_RRRR_RRRR______DDDD_IIII_GGGG_IIII_TTTT                _dddd_iiii_gggg_iiii_tttt out of range
  215.  
  216.           _EEEE_RRRR_RRRR______2222_MMMM_LLLL_BBBB_RRRR_AAAA               too many _((((
  217.  
  218.           _EEEE_RRRR_RRRR______RRRR_AAAA_NNNN_GGGG_EEEE                range number too large
  219.  
  220.           _EEEE_RRRR_RRRR______MMMM_IIII_SSSS_SSSS_BBBB                _}}}} expected after _\\\\
  221.  
  222.           _EEEE_RRRR_RRRR______BBBB_AAAA_DDDD_RRRR_NNNN_GGGG               first number exceeds second in _{{{{ _}}}}.
  223.  
  224.           _EEEE_RRRR_RRRR______SSSS_IIII_MMMM_BBBB_AAAA_LLLL               _[[[[ _]]]] imbalance.
  225.  
  226.           _EEEE_RRRR_RRRR______SSSS_YYYY_NNNN_TTTT_AAAA_XXXX               illegal regular expression
  227.  
  228.           _EEEE_RRRR_RRRR______IIII_LLLL_LLLL_CCCC_LLLL_AAAA_SSSS_SSSS             illegal _[[[[_::::_c_l_a_s_s_::::_]]]]
  229.  
  230.           _EEEE_RRRR_RRRR______EEEE_QQQQ_UUUU_IIII_LLLL                illegal _[[[[_====_c_l_a_s_s_====_]]]]
  231.  
  232.           _EEEE_RRRR_RRRR______CCCC_OOOO_LLLL_LLLL                 illegal _[[[[_...._c_c_...._]]]]
  233.  
  234. EEEEXXXXAAAAMMMMPPPPLLLLEEEE
  235.      The following is an example of how the regular expression macros and
  236.      calls might be defined by an application program:
  237.  
  238.           _####_iiii_nnnn_cccc_llll_uuuu_dddd_eeee _<<<<_wwww_ssss_rrrr_eeee_gggg_eeee_xxxx_pppp_...._hhhh_>>>>
  239.           _####_iiii_nnnn_cccc_llll_uuuu_dddd_eeee _<<<<_wwww_iiii_dddd_eeee_cccc_...._hhhh_>>>>
  240.            _.... _.... _....
  241.           _ssss_tttt_rrrr_uuuu_cccc_tttt _rrrr_eeee_xxxx_dddd_aaaa_tttt_aaaa _rrrr_eeee_xxxx_;;;;
  242.           _llll_oooo_nnnn_gggg _eeee_xxxx_pppp_bbbb_uuuu_ffff _[[[[_BBBB_UUUU_FFFF_SSSS_IIII_ZZZZ_]]]]_;;;;      _////_**** _BBBB_uuuu_ffff_ffff_eeee_rrrr _ffff_oooo_rrrr _tttt_hhhh_eeee _cccc_oooo_mmmm_pppp_iiii_llll_eeee_dddd _RRRR_EEEE _****_////
  243.  
  244.           _////_**** _DDDD_eeee_ffff_iiii_nnnn_eeee _aaaa _RRRR_EEEE _tttt_oooo _iiii_dddd_eeee_nnnn_tttt_iiii_ffff_yyyy _aaaa _cccc_aaaa_pppp_iiii_tttt_aaaa_llll_iiii_zzzz_eeee_dddd _wwww_oooo_rrrr_dddd _****_////
  245.           _cccc_hhhh_aaaa_rrrr _****_rrrr_eeee_gggg_eeee_xxxx_pppp _==== _""""_[[[[_[[[[_::::_ssss_pppp_aaaa_cccc_eeee_::::_]]]]_]]]]_[[[[_[[[[_::::_uuuu_pppp_pppp_eeee_rrrr_::::_]]]]_]]]]_""""_;;;;
  246.           _wwww_cccc_hhhh_aaaa_rrrr______tttt _wwww_rrrr_eeee_gggg_eeee_xxxx_pppp _[[[[_5555_1111_2222_]]]]_;;;;
  247.           _wwww_cccc_hhhh_aaaa_rrrr______tttt _wwww_eeee_oooo_ffff_;;;;              _////_**** _TTTT_hhhh_eeee _eeee_nnnn_dddd _oooo_ffff _rrrr_eeee_gggg_uuuu_llll_aaaa_rrrr _eeee_xxxx_pppp_rrrr_eeee_ssss_ssss_iiii_oooo_nnnn _****_////
  248.           _cccc_hhhh_aaaa_rrrr _eeee_oooo_ffff _==== _''''_\\\\_0000_''''_;;;;
  249.  
  250.           _wwww_cccc_hhhh_aaaa_rrrr______tttt _llll_iiii_nnnn_eeee_bbbb_uuuu_ffff _[[[[_BBBB_UUUU_FFFF_SSSS_IIII_ZZZZ_]]]]_;;;;  _////_**** _BBBB_uuuu_ffff_ffff_eeee_rrrr _ffff_oooo_rrrr _tttt_hhhh_eeee _iiii_nnnn_pppp_uuuu_tttt _ssss_tttt_rrrr_iiii_nnnn_gggg _****_////
  251.            _.... _.... _....
  252.           _((((_vvvv_oooo_iiii_dddd_)))) _mmmm_bbbb_ssss_tttt_oooo_wwww_cccc_ssss_((((_wwww_rrrr_eeee_gggg_eeee_xxxx_pppp_,,,, _rrrr_eeee_gggg_eeee_xxxx_pppp_,,,, _ssss_tttt_rrrr_llll_eeee_nnnn_((((_rrrr_eeee_gggg_eeee_xxxx_pppp_))))_++++_1111_))))_;;;;
  253.  
  254.  
  255.                                                                         PPPPaaaaggggeeee 4444
  256.  
  257.  
  258.  
  259.  
  260.  
  261.  
  262. wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))                                                      wwwwssssrrrreeeeggggeeeexxxxpppp((((3333WWWW))))
  263.  
  264.  
  265.  
  266.           _((((_vvvv_oooo_iiii_dddd_)))) _mmmm_bbbb_tttt_oooo_wwww_cccc_((((_&&&&_wwww_eeee_oooo_ffff_,,,, _&&&&_eeee_oooo_ffff_,,,, _1111_))))_;;;;
  267.           _rrrr_eeee_xxxx_...._ssss_tttt_rrrr _==== _wwww_rrrr_eeee_gggg_eeee_xxxx_pppp_;;;;
  268.           _rrrr_eeee_xxxx_...._ssss_eeee_dddd _==== _0000_;;;;
  269.           _rrrr_eeee_xxxx_...._eeee_rrrr_rrrr _==== _0000_;;;;
  270.           _iiii_ffff _((((_!!!!_wwww_ssss_rrrr_eeee_cccc_oooo_mmmm_pppp_iiii_llll_eeee_((((_&&&&_rrrr_eeee_xxxx_,,,, _eeee_xxxx_pppp_bbbb_uuuu_ffff_,,,, _&&&&_eeee_xxxx_pppp_bbbb_uuuu_ffff_[[[[_BBBB_UUUU_FFFF_SSSS_IIII_ZZZZ_]]]]_,,,, _wwww_eeee_oooo_ffff_))))_))))
  271.                   _ffff_pppp_rrrr_iiii_nnnn_tttt_ffff_((((_ssss_tttt_dddd_eeee_rrrr_rrrr_,,,, _""""_%%%%_ssss_\\\\_nnnn_""""_,,,, _wwww_ssss_rrrr_eeee_eeee_rrrr_rrrr_((((_rrrr_eeee_xxxx_...._eeee_rrrr_rrrr_))))_))))_;;;;
  272.            _.... _.... _....
  273.           _iiii_ffff _((((_wwww_ssss_rrrr_eeee_ssss_tttt_eeee_pppp_((((_&&&&_rrrr_eeee_xxxx_,,,, _llll_iiii_nnnn_eeee_bbbb_uuuu_ffff_,,,, _eeee_xxxx_pppp_bbbb_uuuu_ffff_))))_))))
  274.                   _ssss_uuuu_cccc_cccc_eeee_eeee_dddd_;;;;
  275.  
  276.  
  277.  
  278.  
  279.  
  280.  
  281.  
  282.  
  283.  
  284.  
  285.  
  286.  
  287.  
  288.  
  289.  
  290.  
  291.  
  292.  
  293.  
  294.  
  295.  
  296.  
  297.  
  298.  
  299.  
  300.  
  301.  
  302.  
  303.  
  304.  
  305.  
  306.  
  307.  
  308.  
  309.  
  310.  
  311.  
  312.  
  313.  
  314.  
  315.  
  316.  
  317.  
  318.  
  319.  
  320.  
  321.                                                                         PPPPaaaaggggeeee 5555
  322.  
  323.  
  324.  
  325.