maildrop: remove old, manual, pre-PCRE regexp engine.

author: Sam Varshavchik 2013-08-25 22:20:03 -0400
committer: Sam Varshavchik 2013-08-28 21:07:40 -0400
commit: 064186c841dee4f58e9ef3577e7550fbc761ab48 (patch)
tree: 31c3b82045d0867d156bb6a3edc7b0e4be78fe39
parent: 9bb1a8d85390653f702e8ad5556a2cd3793acbfe (diff)
download: courier-libs-064186c841dee4f58e9ef3577e7550fbc761ab48.tar.bz2
5 files changed, 103 insertions, 1294 deletions
diff --git a/maildrop/Makefile.am b/maildrop/Makefile.am
index f256edd..9cb53a8 100644
--- a/maildrop/Makefile.am
+++ b/maildrop/Makefile.am
@@ -39,7 +39,7 @@ maildrop_SOURCES=deliver.C deliverdotlock.C deliverdotlock.h \
 	dotlockmaildrop.C filelock.C filelock.h filter.C formatmbox.C \
 	formatmbox.h globaltimer.C globaltimer.h lexer.C lexer.h log.C log.h \
 	maildir.C maildir.h main.C message.C message.h messageinfo.C \
-	messageinfo.h mytime.h mywait.h pipefds.C pipefds.h re.C re.h \
+	messageinfo.h mytime.h mywait.h pipefds.C pipefds.h \
 	recipe.C recipe.h recipenode.C recipenode.h recipeparse.C reeval.C \
 	reeval.h regexpnode.h rematch.C rematch.h rematchmsg.C rematchmsg.h \
 	rematchstr.C rematchstr.h search.C search.h token.C \
diff --git a/maildrop/re.C b/maildrop/re.C
deleted file mode 100644
index f2ab94e..0000000
--- a/maildrop/re.C
+++ /dev/null
@@ -1,888 +0,0 @@
-#include	"config.h"
-#include	"re.h"
-#include	"mio.h"
-#include	"regexpnode.h"
-#include	"rematch.h"
-#include	"funcs.h"
-#include	"buffer.h"
-#include	<ctype.h>
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// Create sets for the [:is....:] codes.
-//
-
-static void mk_alnum(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isalnum(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_alpha(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isalpha(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_cntrl(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (iscntrl(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_digit(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isdigit(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_graph(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isgraph(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_lower(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (islower(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_print(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isprint(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_punct(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (ispunct(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_space(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isspace(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_upper(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isupper(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_xdigit(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (isxdigit(i))
-			p[i/8] |= 1 << (i % 8);
-}
-
-static void mk_wbreak(unsigned char *p)
-{
-register unsigned i;
-
-	for (i=0; i<256; i++)
-		if (!isalnum(i) && i != '_')
-			p[i/8] |= 1 << (i % 8);
-}
-
-static const char *const is_setname[]={
-	":alnum:",
-	":alpha:",
-	":cntrl:",
-	":digit:",
-	":graph:",
-	":lower:",
-	":print:",
-	":punct:",
-	":space:",
-	":upper:",
-	":xdigit:",
-	":wbreak:"};
-
-static void (*is_setfunc[])(unsigned char *)={
-	mk_alnum,
-	mk_alpha,
-	mk_cntrl,
-	mk_digit,
-	mk_graph,
-	mk_lower,
-	mk_print,
-	mk_punct,
-	mk_space,
-	mk_upper,
-	mk_xdigit,
-	mk_wbreak};
-
-Re::Re() : chainedre(0), prevre(0), nodes(0), first(0), isCaret(0)
-{
-}
-
-Re::~Re()
-{
-	init();
-}
-
-void Re::init()
-{
-	if (chainedre)	delete chainedre;
-	chainedre=0;
-
-RegExpNode *n;
-
-	while ((n=nodes) != 0)
-	{
-		nodes=n->next;
-		delete n;
-	}
-}
-
-inline RegExpNode	*Re::allocnode()
-{
-RegExpNode	*n;
-
-	if ((n=new RegExpNode(nextid++)) == 0)
-		outofmem();
-	n->next=nodes; nodes=n; return(n);
-}
-
-int Re::Compile(const char *ptr, int caseflag, int &errindex)
-{
-	if (*ptr == '^')
-	{
-		if (CompileS(ptr+1, caseflag, errindex))	return (-1);
-		isCaret=1;
-		return (0);
-	}
-
-	if (CompileS("[.\n]*", 1, errindex) < 0)	return (-1);
-	if ((chainedre=new Re) == 0)
-		outofmem();
-	isDummy=1;
-	chainedre->prevre=this;
-	return (chainedre->CompileS(ptr, caseflag, errindex));
-}
-
-int Re::CompileS(const char *ptr, int caseflag, int &errindex)
-{
-	expr=ptr;
-	origexpr=expr;
-	init();
-	nextid=0;
-	first=0;
-	isCaret=0;
-	isDummy=0;
-	casesensitive=caseflag;
-	matchFull=0;
-
-int	rc=0;
-
-	try
-	{
-	RegExpNode **p=CompileOrClause(&first);
-
-		if (*expr == '!')
-		{
-		int dummy;
-
-			++expr;
-			if ((chainedre=new Re) == 0)
-				outofmem();
-			if ( chainedre->CompileS(expr, caseflag, dummy) < 0)
-			{
-				expr += dummy;
-				throw -1;
-			}
-			chainedre->prevre=this;
-			if (VerboseLevel() > 7)
-				merr.write("\n*** CHAINED TO ***\n");
-
-		} else if (curchar())	throw -1;
-
-		final=*p=allocnode();
-		final->thechar=REFINAL;
-	}
-	catch (int n)
-	{
-		init();
-		errindex=expr-origexpr;
-		rc= n;
-	}
-	if (rc == 0 && VerboseLevel() > 7)
-	{
-	RegExpNode *n;
-	Buffer	b;
-
-		if (first)
-		{
-			b="Start node: ";
-			b.append( (unsigned long)first->id );
-			b += "\n\n";
-			b += '\0';
-			merr.write(b);
-		}
-		for (n=nodes; n; n=n->next)
-		{
-			b="Node ";
-			b.append( (unsigned long)n->id );
-			b += ": ";
-			switch (n->thechar)	{
-			case RENULL:
-				b  += "null";
-				break;
-			case RESET:
-				b += "[set] ";
-				{
-				int i,j=0;
-
-					for (i=0; i<256; i=j)
-					{
-						j=i+1;
-						if ((n->reset[i/8] &
-							(1 << (i % 8))) == 0)
-							continue;
-						for (j=i; j<256; j++)
-							if ((n->reset[j/8] &
-								(1 << (j % 8)))
-								== 0)
-							break;
-						if (i < ' ' || i > 127)
-						{
-							b += '#';
-							b.append((unsigned long)
-								i);
-						}
-						else
-						{
-							if (i == '#'
-								|| i == '-'
-								|| i == '\\')
-								b += '\\';
-							b += (char)i;
-						}
-						if (i+1 == j)	continue;
-						b += ('-');
-						--j;
-					}
-				}
-				break;
-			case REFINAL:
-				b += "final";
-				break;
-			default:
-				if (n->thechar >= ' ' && n->thechar < 127)
-				{
-					b += '\'';
-					b += (char)n->thechar;
-					b += '\'';
-				}
-				else
-				{
-					b += "chr(";
-					b.append((unsigned long)n->thechar);
-					b += ')';
-				}
-			}
-			b += '\n';
-			b += '\0';
-			merr.write( b );
-			if (n->next1)
-			{
-				b="    transition to ";
-				b.append((unsigned long)n->next1->id);
-				b += '\n';
-				b += '\0';
-				merr.write(b);
-			}
-
-			if (n->next2)
-			{
-				b="    transition to ";
-				b.append((unsigned long)n->next2->id);
-				b += '\n';
-				b += '\0';
-				merr.write(b);
-			}
-			merr.write("\n");
-		}
-	}
-	return (rc);
-}
-
-RegExpNode **Re::CompileOrClause(RegExpNode **ptr)
-{
-RegExpNode **finish=CompileAtomString(ptr);
-
-	if ( curchar() != '|')	return (finish);
-
-RegExpNode *realfinish=allocnode();
-
-	realfinish->thechar=RENULL;
-	*finish=realfinish;
-
-	while ( curchar() == '|' )
-	{
-		nextchar();
-
-	RegExpNode *newstart=allocnode();
-
-		newstart->thechar=RENULL;
-		newstart->next1= *ptr;
-		*ptr=newstart;
-
-		finish=CompileAtomString(&newstart->next2);
-		*finish=realfinish;
-	}
-	return (&realfinish->next1);
-}
-
-RegExpNode **Re::CompileAtomString(RegExpNode **ptr)
-{
-int	c;
-
-	for (;;)
-	{
-		c=curchar();
-		if (c == 0 || c == '|' || c == ')' || c == '!')
-			break;
-		ptr=CompileElement(ptr);
-	}
-	return (ptr);
-}
-
-RegExpNode **Re::CompileElement(RegExpNode **start)
-{
-RegExpNode **finish;
-
-	if (curchar() != '$')
-	{
-		finish=CompileAtom(start);
-	}
-	else
-	{
-		nextchar();
-		if (curchar() == 0)
-		{
-			matchFull=1;
-			return (start);
-		}
-		(*start)=allocnode();
-		(*start)->thechar='$';
-		finish= & (*start)->next1;
-	}
-
-	switch (curchar())	{
-	case '+':
-		(*finish)=allocnode();
-		(*finish)->thechar=RENULL;
-		(*finish)->next1=(*start);
-		finish= &(*finish)->next2;
-		nextchar();
-		break;
-	case '*':
-		(*finish)=allocnode();
-		(*finish)->thechar=RENULL;
-		(*finish)->next1=(*start);
-		(*start)=(*finish);
-		finish= &(*finish)->next2;
-		nextchar();
-		break;
-	case '?':
-
-		{
-		RegExpNode *newstart=allocnode();
-
-			newstart->thechar=RENULL;
-			(*finish)=allocnode();
-			(*finish)->thechar=RENULL;
-			newstart->next1= *start;
-			newstart->next2= *finish;
-			*start=newstart;
-			finish= &(*finish)->next1;
-			nextchar();
-		}
-		break;
-	}
-	return (finish);
-}
-
-RegExpNode **Re::CompileAtom(RegExpNode **ptr)
-{
-int	c=curchar();
-
-	if (c == '(')	// Subexpression
-	{
-		nextchar();
-		ptr=CompileOrClause(ptr);
-		if ( curchar() != ')')	throw -1;
-		nextchar();
-		return (ptr);
-	}
-
-	(*ptr)=allocnode();
-
-	if (c == '[' || c == '.')
-	{
-	int	i, complement=0;
-
-		if ( ((*ptr)->reset=new unsigned char[256/8]) == 0)
-			outofmem();
-		for (i=0; i<256/8; i++)
-			(*ptr)->reset[i]=0;
-
-		if ( c == '.' )
-		{
-			(*ptr)->reset[ '\n' / 8 ] |= 1 << ('\n' % 8);
-			complement=1;
-		}
-		else
-		{
-			nextchar();
-			if ( curchar() == '^')
-			{
-				complement=1;
-				nextchar();
-			}
-
-			is_sets(*ptr);
-		}
-		nextchar();
-		if (complement)
-			for (i=0; i<256/8; i++)
-				(*ptr)->reset[i] ^= ~0;
-		c=RESET;
-	}
-	else c=parsechar();
-
-	(*ptr)->thechar=c;
-	return (&(*ptr)->next1);
-}
-
-void Re::is_sets(RegExpNode *p)
-{
-Buffer	buf;
-int	c=curchar();
-int	call_parsechar=1;
-
-	if (c == ':')
-	{
-		do
-		{
-			buf += c;
-			nextchar();
-		} while ( (c=curchar()) >= 0 && isalpha(c));
-
-		if (c == ':')
-		{
-			buf += c;
-			nextchar();
-			c=curchar();
-			if (c == ']')
-			{
-				buf += '\0';
-
-			const char *q=(const char *)buf;
-			unsigned i;
-
-				for (i=0; i<sizeof(is_setname)/
-					sizeof(is_setname[0]); i++)
-				{
-					if (strcmp(is_setname[i], q) == 0)
-					{
-						(*is_setfunc[i])(p->reset);
-						return;
-					}
-				}
-			}
-		}
-
-	int	i=0;
-
-		for (i=0; i<buf.Length(); i++)
-		{
-			c=(int)(unsigned char)((const char*)buf)[i];
-			p->reset[ c / 8 ] |= 1 << (c % 8);
-		}
-		// In case the next character is '-', leave 'c' the way it
-		// is.
-		call_parsechar=0;
-		if (curchar() == ']')	return;
-	}
-
-	do
-	{
-	int	c2;
-
-		if (c == 0)	throw -1;
-
-		if (c == '.')
-		{
-			for (c2=0; c2 < 256/8; c2++)
-				if (c2 != '\n' / 8)
-					p->reset[c2]= ~0;
-				else
-					p->reset[c2] |= ~(1 << ('\n' % 8));
-			if (call_parsechar)
-				nextchar();
-			call_parsechar=1;
-			continue;
-		}
-		if (call_parsechar)
-			c=parsechar();
-		c2=c;
-		call_parsechar=1;
-
-		if (curchar() == '-')
-		{
-			nextchar();
-			c2=parsechar();
-		}
-		while ( c <= c2 )
-		{
-			p->reset[ c / 8 ] |= 1 << (c % 8);
-			++c;
-		}
-	} while ((c=curchar()) != ']');
-}
-
-int Re::parsechar()
-{
-int	c;
-
-	c=curchar();
-	if (c == 0)	throw -1;
-	nextchar();
-	if (c != '\\') return (c);
-	c=curchar();
-
-	if (c == 0)
-		throw -1;
-	else if (c >= '0' && c <= '7')
-	{
-	unsigned char uc=0;
-
-		while ( c >= '0' && c <= '7' )
-		{
-			uc = uc * 8 + (c-'0');
-			nextchar();
-			c=curchar();
-		}
-		c=uc;
-	}
-	else
-	{
-		c=backslash_char(c);
-		nextchar();
-	}
-	return (c);
-}
-
-/////////////////////////////////////////////////////////////////////////////
-
-int Re::Match(ReMatch &string)
-{
-	matched= -1;
-	matchedpos=0;
-
-	charsmatched=0;
-	state1.init(nextid);
-	state2.init(nextid);
-
-	curstate= &state1;
-	nextstate= &state2;
-
-	curstate->nodes[0]=first;
-	curstate->numnodes=1;
-	curstate->nodenums[first->id]=0;
-
-	final_id=final->id;
-
-	if (VerboseLevel() > 8)
-	{
-		merr.write("*** MATCH START ***\n");
-	}
-
-	for (;;)
-	{
-	// Compute null closure
-
-	unsigned n;
-
-		for (n=0; n<curstate->numnodes; n++)
-		{
-		RegExpNode *p=curstate->nodes[n];
-
-			if (p->thechar != RENULL)	continue;
-
-		RegExpNode *q=p->next1;
-
-			if (q && curstate->nodenums[q->id] != charsmatched)
-			{
-				curstate->nodes[curstate->numnodes++]=q;
-				curstate->nodenums[q->id]=charsmatched;
-				if (VerboseLevel() > 8)
-				{
-				Buffer b;
-
-					b="  Transition to state ";
-					b.append((unsigned long)q->id);
-					b += '\n';
-					b += '\0';
-					merr.write(b);
-				}
-			}
-
-			q=p->next2;
-			if (q && curstate->nodenums[q->id] != charsmatched)
-			{
-				curstate->nodes[curstate->numnodes++]=q;
-				curstate->nodenums[q->id]=charsmatched;
-				if (VerboseLevel() > 8)
-				{
-				Buffer b;
-
-					b="  Transition to state ";
-					b.append((unsigned long)q->id);
-					b += '\n';
-					b += '\0';
-					merr.write(b);
-				}
-			}
-		}
-
-	int	nextChar;
-
-		if (curstate->nodenums[final_id] == charsmatched)
-		{
-		off_t	pos=string.GetCurrentPos();
-
-			if (VerboseLevel() > 8)
-				merr.write("**Final node.\n");
-			if (chainedre)
-			{
-			unsigned long saved_matched_chainedre=
-					chainedre->charsmatched;
-
-				// On subsequent passes, charsmatched gets
-				// reset.  If, previously, we had a match,
-				// don't forget # of characters matched!
-
-				if (VerboseLevel() > 8)
-					merr.write(
-					"**Final node - checking subexpr.\n");
-				if (chainedre->Match(string) == 0)
-				{
-					if (VerboseLevel() > 8)
-					{
-					Buffer	buf;
-
-						buf="**Subexpr matched after ";
-						buf.append( (unsigned long)
-							charsmatched);
-						buf += " characters.\n";
-						buf += '\0';
-						merr.write(buf);
-					}
-					matched=0;
-					matchedpos=charsmatched;
-					if (isDummy)	// Don't need to
-							// look for max matches
-							// for the dummy block
-					{
-						return (0);
-					}
-				}
-				else
-				{
-					if (VerboseLevel() > 8)
-						merr.write(
-						"**Subexpr didn't match.\n");
-					chainedre->charsmatched=
-						saved_matched_chainedre;
-				}
-				string.SetCurrentPos(pos);
-				nextChar=string.NextChar();
-			}
-			else
-			{
-				if (!matchFull)	// We don't need to match full
-						// string.
-				{
-					if (VerboseLevel() > 8)
-					{
-					Buffer	buf;
-
-						buf="Matched ";
-						buf.append( (unsigned long)
-							charsmatched);
-						buf += " characters.\n";
-						buf += '\0';
-						merr.write(buf);
-					}
-					matched=0;
-					matchedpos=charsmatched;
-				}
-
-				nextChar=string.NextChar();
-				if ( nextChar < 0)
-				{
-					if (VerboseLevel() > 8)
-					{
-					Buffer	buf;
-
-						buf="Matched ";
-						buf.append( (unsigned long)
-							charsmatched);
-						buf += " characters.\n";
-						buf += '\0';
-						merr.write(buf);
-					}
-					return (0);	// Matched everything
-				}
-			}
-		}
-		else nextChar=string.NextChar();
-
-		if (nextChar < 0)
-		{
-			if (VerboseLevel() > 8)
-				merr.write(
-					"Failed - End of matching string.\n");
-			charsmatched=matchedpos;
-			return (matched);
-		}
-		if (curstate->numnodes == 0)	// No sense to continue
-		{
-			if (VerboseLevel() > 8)
-				merr.write(
-					"Failed - out of states.\n");
-			charsmatched=matchedpos;
-			return (matched);
-		}
-
-		if (VerboseLevel() > 8)
-		{
-		Buffer	b;
-
-			b="Matching character: ";
-
-			if (nextChar <= ' ' || nextChar > 127)
-			{
-				b += '#';
-				b.append((unsigned long)nextChar);
-			}
-			else	b += (char)nextChar;
-			b += '\n';
-			b += '\0';
-			merr.write(b);
-		}
-		++charsmatched;
-
-		if (!casesensitive)
-			nextChar=tolower(nextChar);
-
-		nextstate->numnodes=0;
-
-		for (n=0; n<curstate->numnodes; n++)
-		{
-		RegExpNode *p=curstate->nodes[n];
-
-			if (p->thechar == RESET)
-			{
-				if ((p->reset[nextChar / 8] &
-						(1 << (nextChar % 8))) == 0)
-				{
-					if (casesensitive)	continue;
-
-				int	uchar=toupper(nextChar);
-					if ((p->reset[uchar / 8] &
-						(1 << (uchar % 8))) == 0)
-							continue;
-				}
-			}
-			else
-			{
-				if (p->thechar != nextChar)
-				{
-					if (casesensitive)	continue;
-				int	uchar=toupper(nextChar);
-					if (p->thechar != uchar)
-						continue;
-				}
-			}
-
-		RegExpNode *q=p->next1;
-
-			if (q && nextstate->nodenums[q->id] != charsmatched)
-			{
-				nextstate->nodes[nextstate->numnodes++]=q;
-				nextstate->nodenums[q->id]=charsmatched;
-				if (VerboseLevel() > 8)
-				{
-				Buffer b;
-
-					b="  Transition to state ";
-					b.append((unsigned long)q->id);
-					b += '\n';
-					b += '\0';
-					merr.write(b);
-				}
-			}
-
-			q=p->next2;
-
-			if (q && nextstate->nodenums[q->id] != charsmatched)
-			{
-				nextstate->nodes[nextstate->numnodes++]=q;
-				nextstate->nodenums[q->id]=charsmatched;
-				if (VerboseLevel() > 8)
-				{
-				Buffer b;
-
-					b="  Transition to state ";
-					b.append((unsigned long)q->id);
-					b += '\n';
-					b += '\0';
-					merr.write(b);
-				}
-			}
-		}
-
-	ReEval *swap=curstate; curstate=nextstate; nextstate=swap;
-	}
-}
diff --git a/maildrop/re.h b/maildrop/re.h
deleted file mode 100644
index 1c2d3ab..0000000
--- a/maildrop/re.h
+++ /dev/null
@@ -1,100 +0,0 @@
-#ifndef	re_h
-#define	re_h
-
-
-#include	"config.h"
-#include	<sys/types.h>
-#include	"funcs.h"
-#include	"reeval.h"
-
-class ReMatch;
-
-///////////////////////////////////////////////////////////////////////////
-//
-//  The Re class represents a regular expression.   The regular expression
-//  is translated into a non-deterministic automaton, stored as a list
-//  of RegExpNodes.
-//
-//  Then, one or more strings are matched against the regular expression.
-//
-//  The Re object may dynamically allocate another Re object in order to
-//  implement the ! operator.  Each ! operator introduces a dynamically-
-//  allocated Re object, which contains the next chained regular expression.
-//  Another ! operator causes another object to be allocated.
-//
-//  The ^ and $ anchors are implemented here.  The ABSENCE of a ^ anchor
-//  causes a dummy "[.\n]*" expression to be created in the first Re object,
-//  with the real expression being parsed in the 2nd Re object.
-//
-//  When a string is matched against a regular expression, when the current
-//  state includes a FINAL state, and there is a chained Re object, the
-//  remainder of the string gets matched against the chained Re object.
-//  If the chained matched succeeds, the entire match succeeds, otherwise,
-//  we continue matching the original string.
-//
-//  If a match is succesfull, MatchCount() may be called to return the number
-//  of characters that were matched.  If an ! operator is used, the optional
-//  argument to MatchCount(), if not null, can be used to call MatchCount()
-//  to return the count that the next expression matched.
-//
-///////////////////////////////////////////////////////////////////////////
-
-class	RegExpNode;
-
-class Re {
-
-	Re	*chainedre;		// Chained regular expression
-	Re	*prevre;
-	RegExpNode *nodes;		// Singly-linked list of nodes
-	RegExpNode *first;		// Starting node
-	RegExpNode *final;		// Final node
-	unsigned nextid;		// When creating, next ID to assign
-
-	RegExpNode	*allocnode();
-	const	char *expr, *origexpr;
-
-	// When matching:
-	int	matched;
-	off_t matchedpos;
-	ReEval	*curstate, *nextstate;
-	unsigned final_id;
-
-	int	curchar() { return ((int)(unsigned char)*expr); }
-	void	nextchar() { ++expr; }
-	int	casesensitive;
-	int	matchFull;
-	int	isCaret;
-	int	isDummy;
-public:
-	Re();
-	~Re();
-
-	int Compile(const char *, int, int &);
-			// Compile regular expression
-private:
-	int CompileS(const char *, int, int &);
-
-
-	void init();
-	RegExpNode **CompileAtom(RegExpNode **);
-	RegExpNode **CompileAtomString(RegExpNode **);
-	RegExpNode **CompileOrClause(RegExpNode **);
-	RegExpNode **CompileElement(RegExpNode **);
-	void is_sets(RegExpNode *);
-
-	int	parsechar();
-
-// Evaluation
-
-	ReEval	state1, state2;
-	unsigned charsmatched;
-public:
-	int	Match(ReMatch &);
-	unsigned MatchCount(Re **p =0) {
-					if (p) *p=chainedre;
-					return (charsmatched); }
-	int	IsDummy()	{ return (isDummy); }
-	int	IsAnchorStart()	{ return (isCaret); }
-} ;
-
-#endif
diff --git a/maildrop/search.C b/maildrop/search.C
index 33a5792..d2756bb 100644
--- a/maildrop/search.C
+++ b/maildrop/search.C
@@ -32,8 +32,6 @@ void Search::cleanup()
 
 int	Search::init(const char *expr, const char *opts)
 {
-	int	dummy;
-
 	match_header=0;
 	match_body=0;
 	weight1=1;
@@ -49,84 +47,67 @@ int	Search::init(const char *expr, const char *opts)
 		if (strchr(opts, 'w'))	match_body=1;
 	}
 
-	Buffer b;
-
-	b="MAILDROP_OLD_REGEXP";
-
-	const char *p=GetVarStr(b);
-
-	if (atoi(p ? p:"0") == 0)
-	{
-		const char *errptr;
-
-		cleanup();
+	const char *errptr;
 
-		if (strchr(opts, 'w'))
-		{
-			b="Pattern option 'w' is valid only when MAILDROP_OLD_REGEXP is set\n";
-			b += '\0';
-			merr.write(b);
-			return -1;
-		}
+	cleanup();
 
-		int errindex;
+	int errindex;
 
-		pcre_regexp=pcre_compile(expr,
-					 strchr(opts, 'D') ? 0:PCRE_CASELESS,
-					 &errptr,
-					 &errindex, 0);
+	pcre_regexp=pcre_compile(expr,
+				 strchr(opts, 'D') ? 0:PCRE_CASELESS,
+				 &errptr,
+				 &errindex, 0);
 
-		if (!pcre_regexp)
-		{
-			b="Invalid regular expression, offset ";
-			b.append((unsigned long)errindex);
-			b += " of: ";
-			b += expr;
-			b += ": ";
-			b += errptr;
-			b += "\n";
-			b += '\0';
-			merr.write(b);
-			return -1;
-		}
+	if (!pcre_regexp)
+	{
+		Buffer b;
+
+		b="Invalid regular expression, offset ";
+		b.append((unsigned long)errindex);
+		b += " of: ";
+		b += expr;
+		b += ": ";
+		b += errptr;
+		b += "\n";
+		b += '\0';
+		merr.write(b);
+		return -1;
+	}
 
-		pcre_regexp_extra=pcre_study(pcre_regexp, 0,
-					     &errptr);
+	pcre_regexp_extra=pcre_study(pcre_regexp, 0, &errptr);
 
-		if (errptr)
-		{
-			b="Error parsing regular expression: ";
-			b += expr;
-			b += ": ";
-			b += errptr;
-			b += "\n";
-			b += '\0';
-			merr.write(b);
-			return -1;
-		}
+	if (errptr)
+	{
+		Buffer b;
+
+		b="Error parsing regular expression: ";
+		b += expr;
+		b += ": ";
+		b += errptr;
+		b += "\n";
+		b += '\0';
+		merr.write(b);
+		return -1;
+	}
 
-		int cnt=0;
+	int cnt=0;
 
-		pcre_fullinfo(pcre_regexp, pcre_regexp_extra,
-			      PCRE_INFO_CAPTURECOUNT, &cnt);
+	pcre_fullinfo(pcre_regexp, pcre_regexp_extra,
+		      PCRE_INFO_CAPTURECOUNT, &cnt);
 
-		pcre_vector_count=(cnt+1)*3;
+	pcre_vector_count=(cnt+1)*3;
 
-		pcre_vectors=(int *)malloc(pcre_vector_count*sizeof(int));
+	pcre_vectors=(int *)malloc(pcre_vector_count*sizeof(int));
 
-		if (!pcre_vectors)
-		{
-			b=strerror(errno);
-			b += "\n";
-			b += '\0';
-			merr.write(b);
-			return -1;
-		}
-	}				
-	else
+	if (!pcre_vectors)
 	{
-		if (regexp.Compile(expr, strchr(opts, 'D') ? 1:0, dummy))
-			return (-1);
+		Buffer b;
+
+		b=strerror(errno);
+		b += "\n";
+		b += '\0';
+		merr.write(b);
+		return -1;
 	}
 
 	while (*opts)
@@ -157,8 +138,7 @@ int Search::find(Message &msg, MessageInfo &,
 	if (init(expr, opts))	return (-1);
 
 	msg.Rewind();
-	return (strchr(opts, 'w') ? findinsection(msg, expr, foreachp):
-		findinline(msg, expr, foreachp));
+	return (findinline(msg, expr, foreachp));
 }
 
 int Search::find(const char *str, const char *expr, const char *opts,
@@ -185,57 +165,26 @@ int Search::find(const char *str, const char *expr, const char *opts,
 
 	for (;;)
 	{
-		if (pcre_regexp)
-		{
-			match_count=pcre_exec(pcre_regexp, pcre_regexp_extra,
-					      orig_str, strlen(orig_str),
-					      startoffset,
-					      0,
-					      pcre_vectors,
-					      pcre_vector_count);
-			if (match_count <= 0)
-				break;
-			startoffset=pcre_vectors[1];
-
-			score += weight1;
-			weight1 *= weight2;
-
-			if (!scoring_match || foreachp)
-			{
-				init_match_vars(orig_str, match_count,
-						pcre_vectors, foreachp);
-				if (!foreachp)
-					break;
-			}
-			continue;
-		}
-
-		ReMatchStr match(str);
-
-		if ( regexp.Match(match))	break;
+		match_count=pcre_exec(pcre_regexp, pcre_regexp_extra,
+				      orig_str, strlen(orig_str),
+				      startoffset,
+				      0,
+				      pcre_vectors,
+				      pcre_vector_count);
+		if (match_count <= 0)
+			break;
+		startoffset=pcre_vectors[1];
 
 		score += weight1;
 		weight1 *= weight2;
 
 		if (!scoring_match || foreachp)
 		{
-			match.SetCurrentPos(0);
-			init_match_vars(match, foreachp);
+			init_match_vars(orig_str, match_count,
+					pcre_vectors, foreachp);
 			if (!foreachp)
-				break;	// No need for more.
-		}
-
-	Re *p;
-	off_t	c=0;
-
-		for (p= &regexp; p; )
-			c += p->MatchCount( &p );
-		if (c == 0)
-		{
-			if (!*str)	break;
-			++c;
+				break;
 		}
-		str += c;
 	}
 	return (0);
 }
@@ -291,85 +240,6 @@ int	eof;
 				merr.write(msg);
 			}
 
-			if (pcre_regexp)
-			{
-				const char *orig_str=current_line;
-				int match_count;
-
-				match_count=pcre_exec(pcre_regexp,
-						      pcre_regexp_extra,
-						      orig_str,
-						      strlen(orig_str),
-						      0,
-						      0,
-						      pcre_vectors,
-						      pcre_vector_count);
-
-				if (match_count > 0)
-				{
-					score += weight1;
-					weight1 *= weight2;
-
-					if (!scoring_match || foreachp)
-					{
-						init_match_vars(orig_str,
-								match_count,
-								pcre_vectors,
-								foreachp);
-						if (!foreachp)
-							return (0);
-					}
-				}
-				else	if (VerboseLevel() > 2)
-					merr.write("Not matched.\n");
-			}
-			else
-			{
-				ReMatchStr match(current_line);
-
-				if (regexp.Match(match) == 0)
-				{
-					score += weight1;
-					weight1 *= weight2;
-					if (!scoring_match || foreachp)
-					{
-						match.SetCurrentPos(0);
-						init_match_vars(match,
-								foreachp);
-						if (!foreachp)
-							return (0);
-					}
-				}
-				else	if (VerboseLevel() > 2)
-					merr.write("Not matched.\n");
-			}
-		}
-		if ( c == '\n')	break;
-		current_line=next_line;
-	}
-	if (!match_body || eof)	return (0);
-
-	while (current_line.reset(), msg.appendline(current_line) == 0)
-	{
-		current_line.pop();
-		current_line += '\0';
-
-		if (VerboseLevel() > 2)
-		{
-		Buffer	msg;
-
-			msg="Matching /";
-			msg.append(expr);
-			msg.append("/ against ");
-			msg += current_line;
-			msg.pop();	// Trailing null byte.
-			msg += '\n';
-			msg += '\0';
-			merr.write(msg);
-		}
-
-		if (pcre_regexp)
-		{
 			const char *orig_str=current_line;
 			int match_count;
 
@@ -399,93 +269,61 @@ int	eof;
 			}
 			else	if (VerboseLevel() > 2)
 				merr.write("Not matched.\n");
+		}
+		if ( c == '\n')	break;
+		current_line=next_line;
+	}
+	if (!match_body || eof)	return (0);
 
-			continue;
+	while (current_line.reset(), msg.appendline(current_line) == 0)
+	{
+		current_line.pop();
+		current_line += '\0';
+
+		if (VerboseLevel() > 2)
+		{
+		Buffer	msg;
+
+			msg="Matching /";
+			msg.append(expr);
+			msg.append("/ against ");
+			msg += current_line;
+			msg.pop();	// Trailing null byte.
+			msg += '\n';
+			msg += '\0';
+			merr.write(msg);
 		}
 
-		ReMatchStr match(current_line);
+		const char *orig_str=current_line;
+		int match_count;
 
-		if (regexp.Match(match) == 0)
+		match_count=pcre_exec(pcre_regexp,
+				      pcre_regexp_extra,
+				      orig_str,
+				      strlen(orig_str),
+				      0,
+				      0,
+				      pcre_vectors,
+				      pcre_vector_count);
+
+		if (match_count > 0)
 		{
 			score += weight1;
 			weight1 *= weight2;
+
 			if (!scoring_match || foreachp)
 			{
-				match.SetCurrentPos(0);
-				init_match_vars(match, foreachp);
+				init_match_vars(orig_str,
+						match_count,
+						pcre_vectors,
+						foreachp);
 				if (!foreachp)
 					return (0);
 			}
 		}
 		else	if (VerboseLevel() > 2)
-				merr.write("Not matched.\n");
-	}
-	return (0);
-}
-
-///////////////////////////////////////////////////////////////////////////
-//
-// Search anchored in the entire message.
-//
-///////////////////////////////////////////////////////////////////////////
+			merr.write("Not matched.\n");
 
-int Search::findinsection(Message &msg, const char *expr, Buffer *foreachp)
-{
-	if (!match_header && !match_body)	return (0);	// Huh?
-
-	if (VerboseLevel() > 2)
-	{
-	Buffer	m;
-
-		m="Matching /";
-		m.append(expr);
-		m.append("/ against");
-		if (match_header)
-			m.append(" header");
-		if (match_body)
-			m.append(" body");
-		m += '\n';
-		m += '\0';
-		merr.write(m);
-	}
-
-	if (!match_header)
-	{
-	Buffer	dummy;
-
-		do
-		{
-			dummy.reset();
-			if (msg.appendline(dummy) < 0)	return (0);
-						// No message body, give up.
-		} while (dummy.Length() != 1 ||
-				*(const char *)dummy != '\n');
-	}
-
-off_t start_pos=msg.tell();
-ReMatchMsg	match_msg(&msg, !match_body, match_header);
-
-	while ( match_msg.CurrentChar() >= 0 && regexp.Match(match_msg) == 0)
-	{
-		score += weight1;
-		weight1 *= weight2;
-
-		if (!scoring_match || foreachp)
-		{
-			match_msg.SetCurrentPos(start_pos);
-			init_match_vars(match_msg, foreachp);
-			if (!foreachp)
-				break;	// No need for more.
-		}
-
-	Re *p;
-	off_t c=0;
-
-		for (p= &regexp; p; )
-			c += p->MatchCount( &p );
-		if (c == 0)	++c;
-		start_pos += c;
-		match_msg.SetCurrentPos(start_pos);
 	}
 	return (0);
 }
@@ -533,41 +371,3 @@ void Search::init_match_vars(const char *str, int nranges, int *offsets,
 		SetVar(varname, v);
 	}
 }
-
-void Search::init_match_vars(ReMatch &m, Buffer *foreachp)
-{
-Re	*p;
-Buffer	buf;
-Buffer	varname;
-unsigned long varnamecount=1;
-
-	varname="MATCH";
-	for (p= &regexp; p; )
-	{
-	Re	*q=p;
-	unsigned	count=p->MatchCount(&p);
-
-		buf.reset();
-		while (count)
-		{
-			buf.push( m.NextChar() );
-			count--;
-		}
-
-		if ( !q->IsDummy())
-		{
-			if (foreachp)
-			{
-				*foreachp += buf;
-				*foreachp += '\0';
-			}
-			else
-			{
-				SetVar(varname, buf);
-				++varnamecount;
-				varname="MATCH";
-				varname.append(varnamecount);
-			}
-		}
-	}
-}
diff --git a/maildrop/search.h b/maildrop/search.h
index 1eda5aa..2ba726d 100644
--- a/maildrop/search.h
+++ b/maildrop/search.h
@@ -2,7 +2,6 @@
 #define	search_h
 
 
-#include	"re.h"
 #include	"buffer.h"
 
 #if	HAVE_PCRE_H
@@ -48,7 +47,6 @@ class Search {
 	int	*pcre_vectors;
 	size_t	pcre_vector_count;
 
-	Re	regexp;
 	Buffer	current_line;
 	Buffer	next_line;
 
@@ -73,7 +71,6 @@ public:
 private:
 	int findinline(Message &, const char *, Buffer *);
 	int findinsection(Message &, const char *, Buffer *);
-	void init_match_vars(ReMatch &, Buffer *);
 	void init_match_vars(const char *, int, int *, Buffer *);
 } ;
 #endif
author	Sam Varshavchik	2013-08-25 22:20:03 -0400
committer	Sam Varshavchik	2013-08-28 21:07:40 -0400
commit	064186c841dee4f58e9ef3577e7550fbc761ab48 (patch)
tree	31c3b82045d0867d156bb6a3edc7b0e4be78fe39
parent	9bb1a8d85390653f702e8ad5556a2cd3793acbfe (diff)
download	courier-libs-064186c841dee4f58e9ef3577e7550fbc761ab48.tar.bz2