summaryrefslogtreecommitdiffstats
path: root/maildrop/re.h
diff options
context:
space:
mode:
Diffstat (limited to 'maildrop/re.h')
-rw-r--r--maildrop/re.h100
1 files changed, 100 insertions, 0 deletions
diff --git a/maildrop/re.h b/maildrop/re.h
new file mode 100644
index 0000000..1c2d3ab
--- /dev/null
+++ b/maildrop/re.h
@@ -0,0 +1,100 @@
+#ifndef re_h
+#define re_h
+
+
+#include "config.h"
+#include <sys/types.h>
+#include "funcs.h"
+#include "reeval.h"
+
+class ReMatch;
+
+///////////////////////////////////////////////////////////////////////////
+//
+// The Re class represents a regular expression. The regular expression
+// is translated into a non-deterministic automaton, stored as a list
+// of RegExpNodes.
+//
+// Then, one or more strings are matched against the regular expression.
+//
+// The Re object may dynamically allocate another Re object in order to
+// implement the ! operator. Each ! operator introduces a dynamically-
+// allocated Re object, which contains the next chained regular expression.
+// Another ! operator causes another object to be allocated.
+//
+// The ^ and $ anchors are implemented here. The ABSENCE of a ^ anchor
+// causes a dummy "[.\n]*" expression to be created in the first Re object,
+// with the real expression being parsed in the 2nd Re object.
+//
+// When a string is matched against a regular expression, when the current
+// state includes a FINAL state, and there is a chained Re object, the
+// remainder of the string gets matched against the chained Re object.
+// If the chained matched succeeds, the entire match succeeds, otherwise,
+// we continue matching the original string.
+//
+// If a match is succesfull, MatchCount() may be called to return the number
+// of characters that were matched. If an ! operator is used, the optional
+// argument to MatchCount(), if not null, can be used to call MatchCount()
+// to return the count that the next expression matched.
+//
+///////////////////////////////////////////////////////////////////////////
+
+class RegExpNode;
+
+class Re {
+
+ Re *chainedre; // Chained regular expression
+ Re *prevre;
+ RegExpNode *nodes; // Singly-linked list of nodes
+ RegExpNode *first; // Starting node
+ RegExpNode *final; // Final node
+ unsigned nextid; // When creating, next ID to assign
+
+ RegExpNode *allocnode();
+ const char *expr, *origexpr;
+
+ // When matching:
+ int matched;
+ off_t matchedpos;
+ ReEval *curstate, *nextstate;
+ unsigned final_id;
+
+ int curchar() { return ((int)(unsigned char)*expr); }
+ void nextchar() { ++expr; }
+ int casesensitive;
+ int matchFull;
+ int isCaret;
+ int isDummy;
+public:
+ Re();
+ ~Re();
+
+ int Compile(const char *, int, int &);
+ // Compile regular expression
+private:
+ int CompileS(const char *, int, int &);
+
+
+ void init();
+ RegExpNode **CompileAtom(RegExpNode **);
+ RegExpNode **CompileAtomString(RegExpNode **);
+ RegExpNode **CompileOrClause(RegExpNode **);
+ RegExpNode **CompileElement(RegExpNode **);
+ void is_sets(RegExpNode *);
+
+ int parsechar();
+
+// Evaluation
+
+ ReEval state1, state2;
+ unsigned charsmatched;
+public:
+ int Match(ReMatch &);
+ unsigned MatchCount(Re **p =0) {
+ if (p) *p=chainedre;
+ return (charsmatched); }
+ int IsDummy() { return (isDummy); }
+ int IsAnchorStart() { return (isCaret); }
+} ;
+
+#endif