home *** CD-ROM | disk | FTP | other *** search
- /*
- XXX support range parameter on search
- XXX support mstop parameter on search
- */
-
- /***********************************************************
- Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
- The Netherlands.
-
- All Rights Reserved
-
- Permission to use, copy, modify, and distribute this software and its
- documentation for any purpose and without fee is hereby granted,
- provided that the above copyright notice appear in all copies and that
- both that copyright notice and this permission notice appear in
- supporting documentation, and that the names of Stichting Mathematisch
- Centrum or CWI or Corporation for National Research Initiatives or
- CNRI not be used in advertising or publicity pertaining to
- distribution of the software without specific, written prior
- permission.
-
- While CWI is the initial source for this software, a modified version
- is made available by the Corporation for National Research Initiatives
- (CNRI) at the Internet address ftp://ftp.python.org.
-
- STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
- REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
- CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
- DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
- PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- PERFORMANCE OF THIS SOFTWARE.
-
- ******************************************************************/
-
- /* Regular expression objects */
- /* This uses Tatu Ylonen's copyleft-free reimplementation of
- GNU regular expressions */
-
- #include <ctype.h>
- #include "Python.h"
-
- #include "regexpr.h"
-
- static PyObject *RegexError; /* Exception */
-
- typedef struct {
- PyObject_HEAD
- struct re_pattern_buffer re_patbuf; /* The compiled expression */
- struct re_registers re_regs; /* The registers from the last match */
- char re_fastmap[256]; /* Storage for fastmap */
- PyObject *re_translate; /* String object for translate table */
- PyObject *re_lastok; /* String object last matched/searched */
- PyObject *re_groupindex; /* Group name to index dictionary */
- PyObject *re_givenpat; /* Pattern with symbolic groups */
- PyObject *re_realpat; /* Pattern without symbolic groups */
- } regexobject;
-
- #include "protos/regexmodule_protos.h"
-
- /* Regex object methods */
-
- static void
- reg_dealloc(re)
- regexobject *re;
- {
- PyMem_XDEL(re->re_patbuf.buffer);
- Py_XDECREF(re->re_translate);
- Py_XDECREF(re->re_lastok);
- Py_XDECREF(re->re_groupindex);
- Py_XDECREF(re->re_givenpat);
- Py_XDECREF(re->re_realpat);
- PyMem_DEL(re);
- }
-
- static PyObject *
- makeresult(regs)
- struct re_registers *regs;
- {
- PyObject *v;
- int i;
- static PyObject *filler = NULL;
- if (filler == NULL) {
- filler = Py_BuildValue("(ii)", -1, -1);
- if (filler == NULL)
- return NULL;
- }
- v = PyTuple_New(RE_NREGS);
- if (v == NULL)
- return NULL;
- for (i = 0; i < RE_NREGS; i++) {
- int lo = regs->start[i];
- int hi = regs->end[i];
- PyObject *w;
- if (lo == -1 && hi == -1) {
- w = filler;
- Py_INCREF(w);
- }
- else
- w = Py_BuildValue("(ii)", lo, hi);
- if (w == NULL) {
- Py_XDECREF(v);
- return NULL;
- }
- PyTuple_SetItem(v, i, w);
- }
- return v;
- }
-
- static PyObject *
- reg_match(re, args)
- regexobject *re;
- PyObject *args;
- {
- PyObject *argstring;
- char *buffer;
- int size;
- int offset;
- int result;
- if (PyArg_Parse(args, "S", &argstring)) {
- offset = 0;
- }
- else {
- PyErr_Clear();
- if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
- return NULL;
- }
- buffer = PyString_AsString(argstring);
- size = PyString_Size(argstring);
- if (offset < 0 || offset > size) {
- PyErr_SetString(RegexError, "match offset out of range");
- return NULL;
- }
- Py_XDECREF(re->re_lastok);
- re->re_lastok = NULL;
- result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
- if (result < -1) {
- /* Failure like stack overflow */
- PyErr_SetString(RegexError, "match failure");
- return NULL;
- }
- if (result >= 0) {
- Py_INCREF(argstring);
- re->re_lastok = argstring;
- }
- return PyInt_FromLong((long)result); /* Length of the match or -1 */
- }
-
- static PyObject *
- reg_search(re, args)
- regexobject *re;
- PyObject *args;
- {
- PyObject *argstring;
- char *buffer;
- int size;
- int offset;
- int range;
- int result;
-
- if (PyArg_Parse(args, "S", &argstring)) {
- offset = 0;
- }
- else {
- PyErr_Clear();
- if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
- return NULL;
- }
- buffer = PyString_AsString(argstring);
- size = PyString_Size(argstring);
- if (offset < 0 || offset > size) {
- PyErr_SetString(RegexError, "search offset out of range");
- return NULL;
- }
- /* NB: In Emacs 18.57, the documentation for re_search[_2] and
- the implementation don't match: the documentation states that
- |range| positions are tried, while the code tries |range|+1
- positions. It seems more productive to believe the code! */
- range = size - offset;
- Py_XDECREF(re->re_lastok);
- re->re_lastok = NULL;
- result = re_search(&re->re_patbuf, buffer, size, offset, range,
- &re->re_regs);
- if (result < -1) {
- /* Failure like stack overflow */
- PyErr_SetString(RegexError, "match failure");
- return NULL;
- }
- if (result >= 0) {
- Py_INCREF(argstring);
- re->re_lastok = argstring;
- }
- return PyInt_FromLong((long)result); /* Position of the match or -1 */
- }
-
- static PyObject *
- reg_group(re, args)
- regexobject *re;
- PyObject *args;
- {
- int i, a, b;
- if (args != NULL && PyTuple_Check(args)) {
- int n = PyTuple_Size(args);
- PyObject *res = PyTuple_New(n);
- if (res == NULL)
- return NULL;
- for (i = 0; i < n; i++) {
- PyObject *v = reg_group(re, PyTuple_GetItem(args, i));
- if (v == NULL) {
- Py_DECREF(res);
- return NULL;
- }
- PyTuple_SetItem(res, i, v);
- }
- return res;
- }
- if (!PyArg_Parse(args, "i", &i)) {
- PyObject *n;
- PyErr_Clear();
- if (!PyArg_Parse(args, "S", &n))
- return NULL;
- else {
- PyObject *index;
- if (re->re_groupindex == NULL)
- index = NULL;
- else
- index = PyDict_GetItem(re->re_groupindex, n);
- if (index == NULL) {
- PyErr_SetString(RegexError, "group() group name doesn't exist");
- return NULL;
- }
- i = PyInt_AsLong(index);
- }
- }
- if (i < 0 || i >= RE_NREGS) {
- PyErr_SetString(RegexError, "group() index out of range");
- return NULL;
- }
- if (re->re_lastok == NULL) {
- PyErr_SetString(RegexError,
- "group() only valid after successful match/search");
- return NULL;
- }
- a = re->re_regs.start[i];
- b = re->re_regs.end[i];
- if (a < 0 || b < 0) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a);
- }
-
- static struct PyMethodDef reg_methods[] = {
- {"match", (PyCFunction)reg_match},
- {"search", (PyCFunction)reg_search},
- {"group", (PyCFunction)reg_group},
- {NULL, NULL} /* sentinel */
- };
-
- static PyObject *
- reg_getattr(re, name)
- regexobject *re;
- char *name;
- {
- if (strcmp(name, "regs") == 0) {
- if (re->re_lastok == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return makeresult(&re->re_regs);
- }
- if (strcmp(name, "last") == 0) {
- if (re->re_lastok == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_lastok);
- return re->re_lastok;
- }
- if (strcmp(name, "translate") == 0) {
- if (re->re_translate == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_translate);
- return re->re_translate;
- }
- if (strcmp(name, "groupindex") == 0) {
- if (re->re_groupindex == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_groupindex);
- return re->re_groupindex;
- }
- if (strcmp(name, "realpat") == 0) {
- if (re->re_realpat == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_realpat);
- return re->re_realpat;
- }
- if (strcmp(name, "givenpat") == 0) {
- if (re->re_givenpat == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_givenpat);
- return re->re_givenpat;
- }
- if (strcmp(name, "__members__") == 0) {
- PyObject *list = PyList_New(6);
- if (list) {
- PyList_SetItem(list, 0, PyString_FromString("last"));
- PyList_SetItem(list, 1, PyString_FromString("regs"));
- PyList_SetItem(list, 2, PyString_FromString("translate"));
- PyList_SetItem(list, 3, PyString_FromString("groupindex"));
- PyList_SetItem(list, 4, PyString_FromString("realpat"));
- PyList_SetItem(list, 5, PyString_FromString("givenpat"));
- if (PyErr_Occurred()) {
- Py_DECREF(list);
- list = NULL;
- }
- }
- return list;
- }
- return Py_FindMethod(reg_methods, (PyObject *)re, name);
- }
-
- static PyTypeObject Regextype = {
- PyObject_HEAD_INIT(&PyType_Type)
- 0, /*ob_size*/
- "regex", /*tp_name*/
- sizeof(regexobject), /*tp_size*/
- 0, /*tp_itemsize*/
- /* methods */
- (destructor)reg_dealloc, /*tp_dealloc*/
- 0, /*tp_print*/
- (getattrfunc)reg_getattr, /*tp_getattr*/
- 0, /*tp_setattr*/
- 0, /*tp_compare*/
- 0, /*tp_repr*/
- };
-
- static PyObject *
- newregexobject(pattern, translate, givenpat, groupindex)
- PyObject *pattern;
- PyObject *translate;
- PyObject *givenpat;
- PyObject *groupindex;
- {
- regexobject *re;
- char *pat = PyString_AsString(pattern);
- int size = PyString_Size(pattern);
-
- if (translate != NULL && PyString_Size(translate) != 256) {
- PyErr_SetString(RegexError,
- "translation table must be 256 bytes");
- return NULL;
- }
- re = PyObject_NEW(regexobject, &Regextype);
- if (re != NULL) {
- char *error;
- re->re_patbuf.buffer = NULL;
- re->re_patbuf.allocated = 0;
- re->re_patbuf.fastmap = re->re_fastmap;
- if (translate)
- re->re_patbuf.translate = PyString_AsString(translate);
- else
- re->re_patbuf.translate = NULL;
- Py_XINCREF(translate);
- re->re_translate = translate;
- re->re_lastok = NULL;
- re->re_groupindex = groupindex;
- Py_INCREF(pattern);
- re->re_realpat = pattern;
- Py_INCREF(givenpat);
- re->re_givenpat = givenpat;
- error = re_compile_pattern(pat, size, &re->re_patbuf);
- if (error != NULL) {
- PyErr_SetString(RegexError, error);
- Py_DECREF(re);
- re = NULL;
- }
- }
- return (PyObject *)re;
- }
-
- static PyObject *
- regex_compile(self, args)
- PyObject *self;
- PyObject *args;
- {
- PyObject *pat = NULL;
- PyObject *tran = NULL;
- if (!PyArg_Parse(args, "S", &pat)) {
- PyErr_Clear();
- if (!PyArg_Parse(args, "(SS)", &pat, &tran))
- return NULL;
- }
- return newregexobject(pat, tran, pat, NULL);
- }
-
- static PyObject *
- symcomp(pattern, gdict)
- PyObject *pattern;
- PyObject *gdict;
- {
- char *opat = PyString_AsString(pattern);
- char *oend = opat + PyString_Size(pattern);
- int group_count = 0;
- int escaped = 0;
- char *o = opat;
- char *n;
- char name_buf[128];
- char *g;
- PyObject *npattern;
- int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
-
- if (oend == opat) {
- Py_INCREF(pattern);
- return pattern;
- }
-
- npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern));
- if (npattern == NULL)
- return NULL;
- n = PyString_AsString(npattern);
-
- while (o < oend) {
- if (*o == '(' && escaped == require_escape) {
- char *backtrack;
- escaped = 0;
- ++group_count;
- *n++ = *o;
- if (++o >= oend || *o != '<')
- continue;
- /* *o == '<' */
- if (o+1 < oend && *(o+1) == '>')
- continue;
- backtrack = o;
- g = name_buf;
- for (++o; o < oend;) {
- if (*o == '>') {
- PyObject *group_name = NULL;
- PyObject *group_index = NULL;
- *g++ = '\0';
- group_name = PyString_FromString(name_buf);
- group_index = PyInt_FromLong(group_count);
- if (group_name == NULL || group_index == NULL
- || PyDict_SetItem(gdict, group_name, group_index) != 0) {
- Py_XDECREF(group_name);
- Py_XDECREF(group_index);
- Py_XDECREF(npattern);
- return NULL;
- }
- ++o; /* eat the '>' */
- break;
- }
- if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
- o = backtrack;
- break;
- }
- *g++ = *o++;
- }
- }
- else if (*o == '[' && !escaped) {
- *n++ = *o;
- ++o; /* eat the char following '[' */
- *n++ = *o;
- while (o < oend && *o != ']') {
- ++o;
- *n++ = *o;
- }
- if (o < oend)
- ++o;
- }
- else if (*o == '\\') {
- escaped = 1;
- *n++ = *o;
- ++o;
- }
- else {
- escaped = 0;
- *n++ = *o;
- ++o;
- }
- }
-
- if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0)
- return npattern;
- else {
- return NULL;
- }
-
- }
-
- static PyObject *
- regex_symcomp(self, args)
- PyObject *self;
- PyObject *args;
- {
- PyObject *pattern;
- PyObject *tran = NULL;
- PyObject *gdict = NULL;
- PyObject *npattern;
- if (!PyArg_Parse(args, "S", &pattern)) {
- PyErr_Clear();
- if (!PyArg_Parse(args, "(SS)", &pattern, &tran))
- return NULL;
- }
- gdict = PyDict_New();
- if (gdict == NULL
- || (npattern = symcomp(pattern, gdict)) == NULL) {
- Py_DECREF(gdict);
- Py_DECREF(pattern);
- return NULL;
- }
- return newregexobject(npattern, tran, pattern, gdict);
- }
-
-
- static PyObject *cache_pat;
- static PyObject *cache_prog;
-
- static int
- update_cache(pat)
- PyObject *pat;
- {
- if (pat != cache_pat) {
- Py_XDECREF(cache_pat);
- cache_pat = NULL;
- Py_XDECREF(cache_prog);
- cache_prog = regex_compile((PyObject *)NULL, pat);
- if (cache_prog == NULL)
- return -1;
- cache_pat = pat;
- Py_INCREF(cache_pat);
- }
- return 0;
- }
-
- static PyObject *
- regex_match(self, args)
- PyObject *self;
- PyObject *args;
- {
- PyObject *pat, *string;
- if (!PyArg_Parse(args, "(SS)", &pat, &string))
- return NULL;
- if (update_cache(pat) < 0)
- return NULL;
- return reg_match((regexobject *)cache_prog, string);
- }
-
- static PyObject *
- regex_search(self, args)
- PyObject *self;
- PyObject *args;
- {
- PyObject *pat, *string;
- if (!PyArg_Parse(args, "(SS)", &pat, &string))
- return NULL;
- if (update_cache(pat) < 0)
- return NULL;
- return reg_search((regexobject *)cache_prog, string);
- }
-
- static PyObject *
- regex_set_syntax(self, args)
- PyObject *self, *args;
- {
- int syntax;
- if (!PyArg_Parse(args, "i", &syntax))
- return NULL;
- syntax = re_set_syntax(syntax);
- return PyInt_FromLong((long)syntax);
- }
-
- static struct PyMethodDef regex_global_methods[] = {
- {"compile", regex_compile, 0},
- {"symcomp", regex_symcomp, 0},
- {"match", regex_match, 0},
- {"search", regex_search, 0},
- {"set_syntax", regex_set_syntax, 0},
- {NULL, NULL} /* sentinel */
- };
-
- void
- initregex()
- {
- PyObject *m, *d, *v;
-
- m = Py_InitModule("regex", regex_global_methods);
- d = PyModule_GetDict(m);
-
- /* Initialize regex.error exception */
- RegexError = PyString_FromString("regex.error");
- if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0)
- Py_FatalError("can't define regex.error");
-
- /* Initialize regex.casefold constant */
- v = PyString_FromStringAndSize((char *)NULL, 256);
- if (v != NULL) {
- int i;
- char *s = PyString_AsString(v);
- for (i = 0; i < 256; i++) {
- if (isupper(i))
- s[i] = tolower(i);
- else
- s[i] = i;
- }
- PyDict_SetItemString(d, "casefold", v);
- Py_DECREF(v);
- }
- }
-