1 files changed, 888 insertions, 0 deletions
diff --git a/maildrop/re.C b/maildrop/re.C
new file mode 100644
index 0000000..f2ab94e
--- /dev/null
+++ b/maildrop/re.C
@@ -0,0 +1,888 @@
+#include	"config.h"
+#include	"re.h"
+#include	"mio.h"
+#include	"regexpnode.h"
+#include	"rematch.h"
+#include	"funcs.h"
+#include	"buffer.h"
+#include	<ctype.h>
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Create sets for the [:is....:] codes.
+//
+
+static void mk_alnum(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isalnum(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_alpha(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isalpha(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_cntrl(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (iscntrl(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_digit(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isdigit(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_graph(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isgraph(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_lower(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (islower(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_print(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isprint(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_punct(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (ispunct(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_space(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isspace(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_upper(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isupper(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_xdigit(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (isxdigit(i))
+			p[i/8] |= 1 << (i % 8);
+}
+
+static void mk_wbreak(unsigned char *p)
+{
+register unsigned i;
+
+	for (i=0; i<256; i++)
+		if (!isalnum(i) && i != '_')
+			p[i/8] |= 1 << (i % 8);
+}
+
+static const char *const is_setname[]={
+	":alnum:",
+	":alpha:",
+	":cntrl:",
+	":digit:",
+	":graph:",
+	":lower:",
+	":print:",
+	":punct:",
+	":space:",
+	":upper:",
+	":xdigit:",
+	":wbreak:"};
+
+static void (*is_setfunc[])(unsigned char *)={
+	mk_alnum,
+	mk_alpha,
+	mk_cntrl,
+	mk_digit,
+	mk_graph,
+	mk_lower,
+	mk_print,
+	mk_punct,
+	mk_space,
+	mk_upper,
+	mk_xdigit,
+	mk_wbreak};
+
+Re::Re() : chainedre(0), prevre(0), nodes(0), first(0), isCaret(0)
+{
+}
+
+Re::~Re()
+{
+	init();
+}
+
+void Re::init()
+{
+	if (chainedre)	delete chainedre;
+	chainedre=0;
+
+RegExpNode *n;
+
+	while ((n=nodes) != 0)
+	{
+		nodes=n->next;
+		delete n;
+	}
+}
+
+inline RegExpNode	*Re::allocnode()
+{
+RegExpNode	*n;
+
+	if ((n=new RegExpNode(nextid++)) == 0)
+		outofmem();
+	n->next=nodes; nodes=n; return(n);
+}
+
+int Re::Compile(const char *ptr, int caseflag, int &errindex)
+{
+	if (*ptr == '^')
+	{
+		if (CompileS(ptr+1, caseflag, errindex))	return (-1);
+		isCaret=1;
+		return (0);
+	}
+
+	if (CompileS("[.\n]*", 1, errindex) < 0)	return (-1);
+	if ((chainedre=new Re) == 0)
+		outofmem();
+	isDummy=1;
+	chainedre->prevre=this;
+	return (chainedre->CompileS(ptr, caseflag, errindex));
+}
+
+int Re::CompileS(const char *ptr, int caseflag, int &errindex)
+{
+	expr=ptr;
+	origexpr=expr;
+	init();
+	nextid=0;
+	first=0;
+	isCaret=0;
+	isDummy=0;
+	casesensitive=caseflag;
+	matchFull=0;
+
+int	rc=0;
+
+	try
+	{
+	RegExpNode **p=CompileOrClause(&first);
+
+		if (*expr == '!')
+		{
+		int dummy;
+
+			++expr;
+			if ((chainedre=new Re) == 0)
+				outofmem();
+			if ( chainedre->CompileS(expr, caseflag, dummy) < 0)
+			{
+				expr += dummy;
+				throw -1;
+			}
+			chainedre->prevre=this;
+			if (VerboseLevel() > 7)
+				merr.write("\n*** CHAINED TO ***\n");
+
+		} else if (curchar())	throw -1;
+
+		final=*p=allocnode();
+		final->thechar=REFINAL;
+	}
+	catch (int n)
+	{
+		init();
+		errindex=expr-origexpr;
+		rc= n;
+	}
+	if (rc == 0 && VerboseLevel() > 7)
+	{
+	RegExpNode *n;
+	Buffer	b;
+
+		if (first)
+		{
+			b="Start node: ";
+			b.append( (unsigned long)first->id );
+			b += "\n\n";
+			b += '\0';
+			merr.write(b);
+		}
+		for (n=nodes; n; n=n->next)
+		{
+			b="Node ";
+			b.append( (unsigned long)n->id );
+			b += ": ";
+			switch (n->thechar)	{
+			case RENULL:
+				b  += "null";
+				break;
+			case RESET:
+				b += "[set] ";
+				{
+				int i,j=0;
+
+					for (i=0; i<256; i=j)
+					{
+						j=i+1;
+						if ((n->reset[i/8] &
+							(1 << (i % 8))) == 0)
+							continue;
+						for (j=i; j<256; j++)
+							if ((n->reset[j/8] &
+								(1 << (j % 8)))
+								== 0)
+							break;
+						if (i < ' ' || i > 127)
+						{
+							b += '#';
+							b.append((unsigned long)
+								i);
+						}
+						else
+						{
+							if (i == '#'
+								|| i == '-'
+								|| i == '\\')
+								b += '\\';
+							b += (char)i;
+						}
+						if (i+1 == j)	continue;
+						b += ('-');
+						--j;
+					}
+				}
+				break;
+			case REFINAL:
+				b += "final";
+				break;
+			default:
+				if (n->thechar >= ' ' && n->thechar < 127)
+				{
+					b += '\'';
+					b += (char)n->thechar;
+					b += '\'';
+				}
+				else
+				{
+					b += "chr(";
+					b.append((unsigned long)n->thechar);
+					b += ')';
+				}
+			}
+			b += '\n';
+			b += '\0';
+			merr.write( b );
+			if (n->next1)
+			{
+				b="    transition to ";
+				b.append((unsigned long)n->next1->id);
+				b += '\n';
+				b += '\0';
+				merr.write(b);
+			}
+
+			if (n->next2)
+			{
+				b="    transition to ";
+				b.append((unsigned long)n->next2->id);
+				b += '\n';
+				b += '\0';
+				merr.write(b);
+			}
+			merr.write("\n");
+		}
+	}
+	return (rc);
+}
+
+RegExpNode **Re::CompileOrClause(RegExpNode **ptr)
+{
+RegExpNode **finish=CompileAtomString(ptr);
+
+	if ( curchar() != '|')	return (finish);
+
+RegExpNode *realfinish=allocnode();
+
+	realfinish->thechar=RENULL;
+	*finish=realfinish;
+
+	while ( curchar() == '|' )
+	{
+		nextchar();
+
+	RegExpNode *newstart=allocnode();
+
+		newstart->thechar=RENULL;
+		newstart->next1= *ptr;
+		*ptr=newstart;
+
+		finish=CompileAtomString(&newstart->next2);
+		*finish=realfinish;
+	}
+	return (&realfinish->next1);
+}
+
+RegExpNode **Re::CompileAtomString(RegExpNode **ptr)
+{
+int	c;
+
+	for (;;)
+	{
+		c=curchar();
+		if (c == 0 || c == '|' || c == ')' || c == '!')
+			break;
+		ptr=CompileElement(ptr);
+	}
+	return (ptr);
+}
+
+RegExpNode **Re::CompileElement(RegExpNode **start)
+{
+RegExpNode **finish;
+
+	if (curchar() != '$')
+	{
+		finish=CompileAtom(start);
+	}
+	else
+	{
+		nextchar();
+		if (curchar() == 0)
+		{
+			matchFull=1;
+			return (start);
+		}
+		(*start)=allocnode();
+		(*start)->thechar='$';
+		finish= & (*start)->next1;
+	}
+
+	switch (curchar())	{
+	case '+':
+		(*finish)=allocnode();
+		(*finish)->thechar=RENULL;
+		(*finish)->next1=(*start);
+		finish= &(*finish)->next2;
+		nextchar();
+		break;
+	case '*':
+		(*finish)=allocnode();
+		(*finish)->thechar=RENULL;
+		(*finish)->next1=(*start);
+		(*start)=(*finish);
+		finish= &(*finish)->next2;
+		nextchar();
+		break;
+	case '?':
+
+		{
+		RegExpNode *newstart=allocnode();
+
+			newstart->thechar=RENULL;
+			(*finish)=allocnode();
+			(*finish)->thechar=RENULL;
+			newstart->next1= *start;
+			newstart->next2= *finish;
+			*start=newstart;
+			finish= &(*finish)->next1;
+			nextchar();
+		}
+		break;
+	}
+	return (finish);
+}
+
+RegExpNode **Re::CompileAtom(RegExpNode **ptr)
+{
+int	c=curchar();
+
+	if (c == '(')	// Subexpression
+	{
+		nextchar();
+		ptr=CompileOrClause(ptr);
+		if ( curchar() != ')')	throw -1;
+		nextchar();
+		return (ptr);
+	}
+
+	(*ptr)=allocnode();
+
+	if (c == '[' || c == '.')
+	{
+	int	i, complement=0;
+
+		if ( ((*ptr)->reset=new unsigned char[256/8]) == 0)
+			outofmem();
+		for (i=0; i<256/8; i++)
+			(*ptr)->reset[i]=0;
+
+		if ( c == '.' )
+		{
+			(*ptr)->reset[ '\n' / 8 ] |= 1 << ('\n' % 8);
+			complement=1;
+		}
+		else
+		{
+			nextchar();
+			if ( curchar() == '^')
+			{
+				complement=1;
+				nextchar();
+			}
+
+			is_sets(*ptr);
+		}
+		nextchar();
+		if (complement)
+			for (i=0; i<256/8; i++)
+				(*ptr)->reset[i] ^= ~0;
+		c=RESET;
+	}
+	else c=parsechar();
+
+	(*ptr)->thechar=c;
+	return (&(*ptr)->next1);
+}
+
+void Re::is_sets(RegExpNode *p)
+{
+Buffer	buf;
+int	c=curchar();
+int	call_parsechar=1;
+
+	if (c == ':')
+	{
+		do
+		{
+			buf += c;
+			nextchar();
+		} while ( (c=curchar()) >= 0 && isalpha(c));
+
+		if (c == ':')
+		{
+			buf += c;
+			nextchar();
+			c=curchar();
+			if (c == ']')
+			{
+				buf += '\0';
+
+			const char *q=(const char *)buf;
+			unsigned i;
+
+				for (i=0; i<sizeof(is_setname)/
+					sizeof(is_setname[0]); i++)
+				{
+					if (strcmp(is_setname[i], q) == 0)
+					{
+						(*is_setfunc[i])(p->reset);
+						return;
+					}
+				}
+			}
+		}
+
+	int	i=0;
+
+		for (i=0; i<buf.Length(); i++)
+		{
+			c=(int)(unsigned char)((const char*)buf)[i];
+			p->reset[ c / 8 ] |= 1 << (c % 8);
+		}
+		// In case the next character is '-', leave 'c' the way it
+		// is.
+		call_parsechar=0;
+		if (curchar() == ']')	return;
+	}
+
+	do
+	{
+	int	c2;
+
+		if (c == 0)	throw -1;
+
+		if (c == '.')
+		{
+			for (c2=0; c2 < 256/8; c2++)
+				if (c2 != '\n' / 8)
+					p->reset[c2]= ~0;
+				else
+					p->reset[c2] |= ~(1 << ('\n' % 8));
+			if (call_parsechar)
+				nextchar();
+			call_parsechar=1;
+			continue;
+		}
+		if (call_parsechar)
+			c=parsechar();
+		c2=c;
+		call_parsechar=1;
+
+		if (curchar() == '-')
+		{
+			nextchar();
+			c2=parsechar();
+		}
+		while ( c <= c2 )
+		{
+			p->reset[ c / 8 ] |= 1 << (c % 8);
+			++c;
+		}
+	} while ((c=curchar()) != ']');
+}
+
+int Re::parsechar()
+{
+int	c;
+
+	c=curchar();
+	if (c == 0)	throw -1;
+	nextchar();
+	if (c != '\\') return (c);
+	c=curchar();
+
+	if (c == 0)
+		throw -1;
+	else if (c >= '0' && c <= '7')
+	{
+	unsigned char uc=0;
+
+		while ( c >= '0' && c <= '7' )
+		{
+			uc = uc * 8 + (c-'0');
+			nextchar();
+			c=curchar();
+		}
+		c=uc;
+	}
+	else
+	{
+		c=backslash_char(c);
+		nextchar();
+	}
+	return (c);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+int Re::Match(ReMatch &string)
+{
+	matched= -1;
+	matchedpos=0;
+
+	charsmatched=0;
+	state1.init(nextid);
+	state2.init(nextid);
+
+	curstate= &state1;
+	nextstate= &state2;
+
+	curstate->nodes[0]=first;
+	curstate->numnodes=1;
+	curstate->nodenums[first->id]=0;
+
+	final_id=final->id;
+
+	if (VerboseLevel() > 8)
+	{
+		merr.write("*** MATCH START ***\n");
+	}
+
+	for (;;)
+	{
+	// Compute null closure
+
+	unsigned n;
+
+		for (n=0; n<curstate->numnodes; n++)
+		{
+		RegExpNode *p=curstate->nodes[n];
+
+			if (p->thechar != RENULL)	continue;
+
+		RegExpNode *q=p->next1;
+
+			if (q && curstate->nodenums[q->id] != charsmatched)
+			{
+				curstate->nodes[curstate->numnodes++]=q;
+				curstate->nodenums[q->id]=charsmatched;
+				if (VerboseLevel() > 8)
+				{
+				Buffer b;
+
+					b="  Transition to state ";
+					b.append((unsigned long)q->id);
+					b += '\n';
+					b += '\0';
+					merr.write(b);
+				}
+			}
+
+			q=p->next2;
+			if (q && curstate->nodenums[q->id] != charsmatched)
+			{
+				curstate->nodes[curstate->numnodes++]=q;
+				curstate->nodenums[q->id]=charsmatched;
+				if (VerboseLevel() > 8)
+				{
+				Buffer b;
+
+					b="  Transition to state ";
+					b.append((unsigned long)q->id);
+					b += '\n';
+					b += '\0';
+					merr.write(b);
+				}
+			}
+		}
+
+	int	nextChar;
+
+		if (curstate->nodenums[final_id] == charsmatched)
+		{
+		off_t	pos=string.GetCurrentPos();
+
+			if (VerboseLevel() > 8)
+				merr.write("**Final node.\n");
+			if (chainedre)
+			{
+			unsigned long saved_matched_chainedre=
+					chainedre->charsmatched;
+
+				// On subsequent passes, charsmatched gets
+				// reset.  If, previously, we had a match,
+				// don't forget # of characters matched!
+
+				if (VerboseLevel() > 8)
+					merr.write(
+					"**Final node - checking subexpr.\n");
+				if (chainedre->Match(string) == 0)
+				{
+					if (VerboseLevel() > 8)
+					{
+					Buffer	buf;
+
+						buf="**Subexpr matched after ";
+						buf.append( (unsigned long)
+							charsmatched);
+						buf += " characters.\n";
+						buf += '\0';
+						merr.write(buf);
+					}
+					matched=0;
+					matchedpos=charsmatched;
+					if (isDummy)	// Don't need to
+							// look for max matches
+							// for the dummy block
+					{
+						return (0);
+					}
+				}
+				else
+				{
+					if (VerboseLevel() > 8)
+						merr.write(
+						"**Subexpr didn't match.\n");
+					chainedre->charsmatched=
+						saved_matched_chainedre;
+				}
+				string.SetCurrentPos(pos);
+				nextChar=string.NextChar();
+			}
+			else
+			{
+				if (!matchFull)	// We don't need to match full
+						// string.
+				{
+					if (VerboseLevel() > 8)
+					{
+					Buffer	buf;
+
+						buf="Matched ";
+						buf.append( (unsigned long)
+							charsmatched);
+						buf += " characters.\n";
+						buf += '\0';
+						merr.write(buf);
+					}
+					matched=0;
+					matchedpos=charsmatched;
+				}
+
+				nextChar=string.NextChar();
+				if ( nextChar < 0)
+				{
+					if (VerboseLevel() > 8)
+					{
+					Buffer	buf;
+
+						buf="Matched ";
+						buf.append( (unsigned long)
+							charsmatched);
+						buf += " characters.\n";
+						buf += '\0';
+						merr.write(buf);
+					}
+					return (0);	// Matched everything
+				}
+			}
+		}
+		else nextChar=string.NextChar();
+
+		if (nextChar < 0)
+		{
+			if (VerboseLevel() > 8)
+				merr.write(
+					"Failed - End of matching string.\n");
+			charsmatched=matchedpos;
+			return (matched);
+		}
+		if (curstate->numnodes == 0)	// No sense to continue
+		{
+			if (VerboseLevel() > 8)
+				merr.write(
+					"Failed - out of states.\n");
+			charsmatched=matchedpos;
+			return (matched);
+		}
+
+		if (VerboseLevel() > 8)
+		{
+		Buffer	b;
+
+			b="Matching character: ";
+
+			if (nextChar <= ' ' || nextChar > 127)
+			{
+				b += '#';
+				b.append((unsigned long)nextChar);
+			}
+			else	b += (char)nextChar;
+			b += '\n';
+			b += '\0';
+			merr.write(b);
+		}
+		++charsmatched;
+
+		if (!casesensitive)
+			nextChar=tolower(nextChar);
+
+		nextstate->numnodes=0;
+
+		for (n=0; n<curstate->numnodes; n++)
+		{
+		RegExpNode *p=curstate->nodes[n];
+
+			if (p->thechar == RESET)
+			{
+				if ((p->reset[nextChar / 8] &
+						(1 << (nextChar % 8))) == 0)
+				{
+					if (casesensitive)	continue;
+
+				int	uchar=toupper(nextChar);
+					if ((p->reset[uchar / 8] &
+						(1 << (uchar % 8))) == 0)
+							continue;
+				}
+			}
+			else
+			{
+				if (p->thechar != nextChar)
+				{
+					if (casesensitive)	continue;
+				int	uchar=toupper(nextChar);
+					if (p->thechar != uchar)
+						continue;
+				}
+			}
+
+		RegExpNode *q=p->next1;
+
+			if (q && nextstate->nodenums[q->id] != charsmatched)
+			{
+				nextstate->nodes[nextstate->numnodes++]=q;
+				nextstate->nodenums[q->id]=charsmatched;
+				if (VerboseLevel() > 8)
+				{
+				Buffer b;
+
+					b="  Transition to state ";
+					b.append((unsigned long)q->id);
+					b += '\n';
+					b += '\0';
+					merr.write(b);
+				}
+			}
+
+			q=p->next2;
+
+			if (q && nextstate->nodenums[q->id] != charsmatched)
+			{
+				nextstate->nodes[nextstate->numnodes++]=q;
+				nextstate->nodenums[q->id]=charsmatched;
+				if (VerboseLevel() > 8)
+				{
+				Buffer b;
+
+					b="  Transition to state ";
+					b.append((unsigned long)q->id);
+					b += '\n';
+					b += '\0';
+					merr.write(b);
+				}
+			}
+		}
+
+	ReEval *swap=curstate; curstate=nextstate; nextstate=swap;
+	}
+}