diff options
Diffstat (limited to 'maildrop/re.h')
| -rw-r--r-- | maildrop/re.h | 100 | 
1 files changed, 100 insertions, 0 deletions
| diff --git a/maildrop/re.h b/maildrop/re.h new file mode 100644 index 0000000..1c2d3ab --- /dev/null +++ b/maildrop/re.h @@ -0,0 +1,100 @@ +#ifndef	re_h +#define	re_h + + +#include	"config.h" +#include	<sys/types.h> +#include	"funcs.h" +#include	"reeval.h" + +class ReMatch; + +/////////////////////////////////////////////////////////////////////////// +// +//  The Re class represents a regular expression.   The regular expression +//  is translated into a non-deterministic automaton, stored as a list +//  of RegExpNodes. +// +//  Then, one or more strings are matched against the regular expression. +// +//  The Re object may dynamically allocate another Re object in order to +//  implement the ! operator.  Each ! operator introduces a dynamically- +//  allocated Re object, which contains the next chained regular expression. +//  Another ! operator causes another object to be allocated. +// +//  The ^ and $ anchors are implemented here.  The ABSENCE of a ^ anchor +//  causes a dummy "[.\n]*" expression to be created in the first Re object, +//  with the real expression being parsed in the 2nd Re object. +// +//  When a string is matched against a regular expression, when the current +//  state includes a FINAL state, and there is a chained Re object, the +//  remainder of the string gets matched against the chained Re object. +//  If the chained matched succeeds, the entire match succeeds, otherwise, +//  we continue matching the original string. +// +//  If a match is succesfull, MatchCount() may be called to return the number +//  of characters that were matched.  If an ! operator is used, the optional +//  argument to MatchCount(), if not null, can be used to call MatchCount() +//  to return the count that the next expression matched. +// +/////////////////////////////////////////////////////////////////////////// + +class	RegExpNode; + +class Re { + +	Re	*chainedre;		// Chained regular expression +	Re	*prevre; +	RegExpNode *nodes;		// Singly-linked list of nodes +	RegExpNode *first;		// Starting node +	RegExpNode *final;		// Final node +	unsigned nextid;		// When creating, next ID to assign + +	RegExpNode	*allocnode(); +	const	char *expr, *origexpr; + +	// When matching: +	int	matched; +	off_t matchedpos; +	ReEval	*curstate, *nextstate; +	unsigned final_id; + +	int	curchar() { return ((int)(unsigned char)*expr); } +	void	nextchar() { ++expr; } +	int	casesensitive; +	int	matchFull; +	int	isCaret; +	int	isDummy; +public: +	Re(); +	~Re(); + +	int Compile(const char *, int, int &); +			// Compile regular expression +private: +	int CompileS(const char *, int, int &); + + +	void init(); +	RegExpNode **CompileAtom(RegExpNode **); +	RegExpNode **CompileAtomString(RegExpNode **); +	RegExpNode **CompileOrClause(RegExpNode **); +	RegExpNode **CompileElement(RegExpNode **); +	void is_sets(RegExpNode *); + +	int	parsechar(); + +// Evaluation + +	ReEval	state1, state2; +	unsigned charsmatched; +public: +	int	Match(ReMatch &); +	unsigned MatchCount(Re **p =0) { +					if (p) *p=chainedre; +					return (charsmatched); } +	int	IsDummy()	{ return (isDummy); } +	int	IsAnchorStart()	{ return (isCaret); } +} ; + +#endif | 
