home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World Komputer 1999 mARCH
/
PCWK3A99.iso
/
Linux
/
DDD331
/
DDD-3_1_.000
/
DDD-3_1_
/
ddd-3.1.1
/
ddd
/
rxclass.C
< prev
next >
Wrap
C/C++ Source or Header
|
1998-03-25
|
7KB
|
302 lines
// $Id: rxclass.C,v 1.18 1998/03/25 12:46:27 zeller Exp $ -*- C++ -*-
// Regular expression class, based on POSIX regcomp()/regexec() interface
// Copyright (C) 1996 Technische Universitaet Braunschweig, Germany.
// Written by Andreas Zeller <zeller@ips.cs.tu-bs.de>.
//
// This file is part of DDD.
//
// DDD is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// DDD is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public
// License along with DDD -- see the file COPYING.
// If not, write to the Free Software Foundation, Inc.,
// 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// DDD is the data display debugger.
// For details, see the DDD World-Wide-Web page,
// `http://www.cs.tu-bs.de/softech/ddd/',
// or send a mail to the DDD developers <ddd@ips.cs.tu-bs.de>.
char regex_rcsid[] =
"$Id: rxclass.C,v 1.18 1998/03/25 12:46:27 zeller Exp $";
#ifdef __GNUG__
#pragma implementation
#endif
#include "config.h"
#include "bool.h"
#include "strclass.h"
#include "assert.h"
#include "misc.h"
#include "cook.h"
#include <stdlib.h>
#include <iostream.h>
#include <ctype.h>
#include <string.h> // strncmp()
#if WITH_RUNTIME_REGEX
// Get a prefix character from T; let T point at the next prefix character.
char regex::get_prefix(const char *& t, int flags)
{
if (flags & REG_EXTENDED)
{
switch (*t++)
{
case '.':
case '(':
case '^':
case '$':
return '\0';
case '[':
if (*t != ']' && *t != '\0' && *t != '^' && t[1] == ']')
{
char ret = *t;
t += 2;
return ret;
}
return '\0';
case '\\':
return *t++;
default:
// Ordinary character
switch (*t)
{
case '+':
case '*':
case '?':
case '|':
return '\0';
default:
return t[-1];
}
}
}
else
{
// FIXME: give some rules for ordinary regexps
return '\0';
}
}
void regex::fatal(int errcode, const char *src)
{
if (errcode == 0)
return;
size_t length = regerror(errcode, &compiled, (char *)0, 0);
char *buffer = new char[length];
regerror(errcode, &compiled, buffer, length);
cerr << "regex ";
if (src)
cerr << quote(src) << ": ";
cerr << "error " << errcode;
if (buffer[0] != '\0')
cerr << " - " << buffer;
cerr << "\n";
#if !defined(REGCOMP_BROKEN) && !defined(GNU_LIBrx_USED)
cerr << "As a workaround, link with GNU librx - "
"in `config.h', #define REGCOMP_BROKEN.\n";
#endif
delete[] buffer;
abort();
}
regex::regex(const char* t, int flags)
: exprs(0), matcher(0), data(0)
{
string rx = "^" + string(t);
int errcode = regcomp(&compiled, rx, flags);
if (errcode)
fatal(errcode, rx.chars());
exprs = new regmatch_t[nexprs()];
unsigned int i = 0;
const char *s = t;
while ((prefix[i++] = get_prefix(s, flags)) != '\0'
&& i < sizeof(prefix) - 1)
;
prefix[i] = '\0';
}
#endif // WITH_RUNTIME_REGEX
regex::regex(rxmatchproc p, void *d)
:
#if WITH_RUNTIME_REGEX
exprs(0),
#endif
matcher(p), data(d)
{
#if WITH_RUNTIME_REGEX
prefix[0] = '\0';
#endif
}
regex::~regex()
{
#if WITH_RUNTIME_REGEX
if (matcher == 0)
regfree(&compiled);
delete[] exprs;
#endif // WITH_RUNTIME_REGEX
}
// Search T in S; return position of first occurrence.
// If STARTPOS is positive, start search from that position.
// If STARTPOS is negative, perform reverse search from that position
// and return last occurrence.
// MATCHLEN contains the length of the matched expression.
// If T is not found, return -1.
int regex::search(const char* s, int len, int& matchlen, int startpos) const
{
string substr;
int direction = +1;
if (startpos < 0)
{
startpos += len;
direction = -1;
}
if (startpos < 0 || startpos > len)
return -1;
if (s[len] != '\0')
{
substr = string(s, len);
s = (char *)substr;
}
assert(s[len] == '\0');
#if WITH_RUNTIME_REGEX
int errcode = 0;
int prefix_len = strlen(prefix);
#endif
for (; startpos >= 0 && startpos < len; startpos += direction)
{
if (matcher != 0)
{
matchlen = matcher(data, s, len, startpos);
if (matchlen >= 0)
break;
}
#if WITH_RUNTIME_REGEX
else
{
char *t = (char *)s + startpos;
if (strncmp(t, prefix, min(prefix_len, len - startpos)) == 0)
{
errcode = regexec((regex_t *)&compiled, t, nexprs(), exprs, 0);
if (errcode == 0)
break;
}
}
#endif // WITH_RUNTIME_REGEX
}
if (startpos < 0 || startpos >= len)
return -1;
int matchpos = startpos;
#if WITH_RUNTIME_REGEX
if (exprs[0].rm_so >= 0)
{
matchpos = exprs[0].rm_so + startpos;
matchlen = exprs[0].rm_eo - exprs[0].rm_so;
}
else
{
matchpos = -1;
matchlen = 0;
}
#endif // WITH_RUNTIME_REGEX
return matchpos;
}
// Return length of matched string iff T matches S at POS,
// -1 otherwise. LEN is the length of S.
int regex::match(const char *s, int len, int pos) const
{
if (matcher != 0)
return matcher(data, s, len, pos);
#if WITH_RUNTIME_REGEX
string substr;
if (pos < 0)
pos += len;
if (pos > len)
return -1;
if (s[len] != '\0')
{
substr = string(s, len);
s = (char *)substr;
}
assert(s[len] == '\0');
int errcode = regexec((regex_t *)&compiled, (char *)s + pos,
nexprs(), exprs, 0);
if (errcode == 0 && exprs[0].rm_so >= 0)
return exprs[0].rm_eo - exprs[0].rm_so;
#endif
return -1;
}
#if WITH_RUNTIME_REGEX
bool regex::match_info(int& start, int& length, int nth) const
{
if ((unsigned)(nth) >= nexprs())
return false;
else
{
start = exprs[nth].rm_so;
length = exprs[nth].rm_eo - start;
return start >= 0 && length >= 0;
}
}
#endif
bool regex::OK() const
{
#if WITH_RUNTIME_REGEX
assert(exprs != 0);
#endif
return true;
}
#if WITH_RUNTIME_REGEX && RUNTIME_REGEX
// Built-in regular expressions
const regex rxwhite("[ \n\t\r\v\f]+");
const regex rxint("-?[0-9]+");
const regex rxdouble("-?(([0-9]+\\.[0-9]*)|([0-9]+)|(\\.[0-9]+))"
"([eE][---+]?[0-9]+)?");
const regex rxalpha("[A-Za-z]+");
const regex rxlowercase("[a-z]+");
const regex rxuppercase("[A-Z]+");
const regex rxalphanum("[0-9A-Za-z]+");
const regex rxidentifier("[A-Za-z_$][A-Za-z0-9_$]*");
#endif // WITH_RUNTIME_REGEX