diff options
Diffstat (limited to 'maildrop/lexer.C')
| -rw-r--r-- | maildrop/lexer.C | 519 | 
1 files changed, 519 insertions, 0 deletions
| diff --git a/maildrop/lexer.C b/maildrop/lexer.C new file mode 100644 index 0000000..99732dd --- /dev/null +++ b/maildrop/lexer.C @@ -0,0 +1,519 @@ +#include "config.h" +#include	"lexer.h" +#include	"funcs.h" +#include	"varlist.h" +#include	<ctype.h> + + +int	Lexer::Open(const char *filename_arg) +{ +	linenum=1; +	lasttokentype=Token::semicolon; + +int	fd; + +	if ((fd=file.Open(filename_arg, O_RDONLY)) < 0) +		return (-1); +	filename=filename_arg; +	return (fd); +} + +void	Lexer::error(const char *errmsg) +{ +	merr.write(errmsg); +} + +void	Lexer::token(Token &t) +{ +	if ( file.fd() < 0) +		t.Type( Token::eof); +	else +	{ +		token2(t); +		if (t.Type() == Token::eof) +			file.Close(); +	} + +	lasttokentype=t.Type(); +	if (maildrop.embedded_mode) +		switch (lasttokentype)	{ +		case Token::tokento: +		case Token::tokencc: +		case Token::btstring: +		case Token::tokenxfilter: +		case Token::dotlock: +		case Token::flock: +		case Token::logfile: +		case Token::log: +			{ +			Buffer	errmsg; + +				errmsg="maildrop: '"; +				errmsg += t.Name(); +				errmsg += "' disabled in embedded mode.\n"; +				errmsg += '\0'; +				error((const char *)errmsg); +				t.Type( Token::error ); +				break; +			} +		default: +			break; +		} + +	if (VerboseLevel() > 8) +	{ +	Buffer	debug; + +		debug="Tokenized "; +		debug += t.Name(); +		debug += '\n'; +		debug += '\0'; +		error((const char *)debug); +	} +} + +void	Lexer::token2(Token &t) +{ +int	c; + +	t.Type(Token::error); + +	// Eat whitespace & comments + +	for (;;) +	{ +		while ((c=curchar()) >= 0 && isspace(c)) +		{ +			nextchar(); +			if (c == '\n' || c == '\r')	// Treat as semicolon +			{ +				t.Type(Token::semicolon); +				return; +			} +		} +		if (c == '\\')	// Continued line? +		{ +			nextchar(); +			c=curchar(); +			if (c < 0 || !isspace(c)) +			{ +				return;	// Error +			} +			while (c >= 0 && c != '\n') +			{ +				nextchar(); +				c=curchar(); +			} +			if (c == '\n')	nextchar(); +			continue; +		} + +		if (c != '#')	break; +		while ( (c=nextchar()) >= 0 && c != '\n') +			; +		if (c == '\n') +		{ +			t.Type(Token::semicolon); +			return; +		} +	} + +	if (c < 0) +	{ +		t.Type(lasttokentype == Token::semicolon ? Token::eof +			: Token::semicolon); +		return; +	} + +	// String, quoted by ", ', or ` + + +Buffer	&pattern=t.String(); +	pattern.reset(); + +	if (c == '\'' || c == '"' || c == '`') +	{ +	Token::tokentype ttype=Token::qstring; +	int quote_char=c; + +		if (c == '\'')	ttype=Token::sqstring; +		if (c == '`')	ttype=Token::btstring; + +		nextchar(); + +	int	q; + +		// Grab string until matching close is found. + +		while ((q=curchar()) != c) +		{ +			if (q < 0 || q == '\n' || q == '\r') +			{ +missquote: +				error("maildrop: Missing ', \", or `.\n"); +				return; +			} + +			// Backslash escape + +			if (q != '\\') +			{ +				nextchar(); +				pattern.push(q); +				continue; +			} +			nextchar(); + +			// Look what's after the backslash. +			// If it's whitespace, we may have a continuation +			// on the next line. + +		int	qq=curchar(); + +			if (qq < 0)	goto missquote; +			if (!isspace(qq) && qq != '\r' && qq != '\n') +			{ +				if (qq != quote_char && qq != '\\') +					pattern.push('\\'); +				pattern.push(qq); +				nextchar(); +				continue; +			} + +			// If it's not a continuation, we need to dutifully +			// save the characters as the string.  So, save the +			// current length of the string, and backtrack if +			// necessary. + +		int	l=pattern.Length(); +			pattern.push('\\'); + +			// Collect all whitespace after the backslash, +			// not including newline characters. + +			while ((q=curchar()) >= 0 && isspace(q) && +				q != '\r' && q != '\n') +			{ +				pattern.push(q); +				nextchar(); +			} +			if (q < 0)	goto missquote; + +			// If the next character is a newline char, or +			// a comment, we have a continuation. + +			if (q != '#' && q != '\r' && q != '\n')	continue; +			pattern.Length(l);	// Discard padding +			while (q != '\n') +			{ +				if (q < 0)	goto missquote; +				nextchar(); +				q=curchar(); +			} +			// Discard all whitespace at the beginning of the +			// next line. +			nextchar(); +			while ( (q=curchar()) >= 0 && isspace(q)) +				nextchar(); +			if (q < 0)	goto missquote; +		} +		nextchar(); +		t.Type(ttype); +		return; +	} + +	// A pattern - "/", then arbitrary text, terminated by "/" + +	if (c == '/' && lasttokentype != Token::equals && +		lasttokentype != Token::tokento && +		lasttokentype != Token::tokencc) +	{ +		pattern.push(c); +		nextchar(); +		c=curchar(); +		if (c == '\r' || c == '\n' || c < 0 || isspace(c)) +		{ +			t.Type(Token::divi); +			return; +		} + +		while ( (c=curchar()) != '/') +		{ +			if (c < 0 || c == '\r' || c == '\n') +				return;	// Error token - let parser throw +					// an error +			if (c == '\\') +			{ +				pattern.push(c); +				nextchar(); +				c=curchar(); +				if (c < 0 || c == '\r' || c == '\n') +					return; +			} + +			pattern.push(c); +			nextchar(); +		} +		pattern.push(c); +		nextchar(); +		if ((c=curchar()) == ':') +		{ +			pattern.push(c); +			nextchar(); +			while ( (c=curchar()) >= 0 && (isalnum(c) || +				c == '-' || c == '+' || c == '.' || c == ',')) +			{ +				pattern.push(c); +				nextchar(); +			} +		} +		t.Type(Token::regexpr); +		return; +	} + +// Letters, digits, -, ., :, /, can be in an unquoted string + +#define	ISUNQSTRING(x)	(x >= 0 && (isalnum(x) || (x) == '_' || x == '-' || \ +	(x) == '@' || (x) == '.' || x == ':' || x == SLASH_CHAR || x == '$' || \ +        x == '{' || x == '}')) + +// Unquoted string may not begin with {} + +#define	ISLUNQSTRING(x)	(x >= 0 && (isalnum(x) || (x) == '_' || x == '-' || \ +	(x) == '@' || (x) == '.' || x == ':' || x == SLASH_CHAR || x == '$')) + +	if (ISLUNQSTRING(c)) +	{ +		do +		{ +			nextchar(); +			pattern.push(c); +			c=curchar(); +		} while ( ISUNQSTRING(c) ); + +		while ( c >= 0 && isspace(c) && c != '\r' && c != '\n') +		{ +			nextchar(); +			c=curchar(); +		} +		if (pattern.Length() == 2) +		{ +		int	n= ((int)(unsigned char)*(const char *)pattern) << 8 +				| (unsigned char)((const char *)pattern)[1]; + +			switch (n)	{ +			case (('l' << 8) | 't'): +				t.Type(Token::slt); +				return; +			case (('l' << 8) | 'e'): +				t.Type(Token::sle); +				return; +			case (('g' << 8) | 't'): +				t.Type(Token::sgt); +				return; +			case (('g' << 8) | 'e'): +				t.Type(Token::sge); +				return; +			case (('e' << 8) | 'q'): +				t.Type(Token::seq); +				return; +			case (('n' << 8) | 'e'): +				t.Type(Token::sne); +				return; +			case (('t' << 8) | 'o'): +				t.Type(Token::tokento); +				return; +			case (('c' << 8) | 'c'): +				t.Type(Token::tokencc); +				return; +			} +		} +		if (pattern == "length") +			t.Type(Token::length); +		else if (pattern == "substr") +			t.Type(Token::substr); +		else if (pattern == "if") +			t.Type(Token::tokenif); +		else if (pattern == "elsif") +			t.Type(Token::tokenelsif); +		else if (pattern == "else") +			t.Type(Token::tokenelse); +		else if (pattern == "while") +			t.Type(Token::tokenwhile); +		else if (pattern == "exception") +			t.Type(Token::exception); +		else if (pattern == "echo") +			t.Type(Token::echo); +		else if (pattern == "xfilter") +			t.Type(Token::tokenxfilter); +		else if (pattern == "dotlock") +			t.Type(Token::dotlock); +		else if (pattern == "flock") +			t.Type(Token::flock); +		else if (pattern == "logfile") +			t.Type(Token::logfile); +		else if (pattern == "log") +			t.Type(Token::log); +		else if (pattern == "include") +			t.Type(Token::include); +		else if (pattern == "exit") +			t.Type(Token::exit); +		else if (pattern == "foreach") +			t.Type(Token::foreach); +		else if (pattern == "getaddr") +			t.Type(Token::getaddr); +		else if (pattern == "lookup") +			t.Type(Token::lookup); +		else if (pattern == "escape") +			t.Type(Token::escape); +		else if (pattern == "tolower") +			t.Type(Token::to_lower); +		else if (pattern == "toupper") +			t.Type(Token::to_upper); +		else if (pattern == "hasaddr") +			t.Type(Token::hasaddr); +		else if (pattern == "gdbmopen") +			t.Type(Token::gdbmopen); +		else if (pattern == "gdbmclose") +			t.Type(Token::gdbmclose); +		else if (pattern == "gdbmfetch") +			t.Type(Token::gdbmfetch); +		else if (pattern == "gdbmstore") +			t.Type(Token::gdbmstore); +		else if (pattern == "time") +			t.Type(Token::timetoken); +		else if (pattern == "import") +			t.Type(Token::importtoken); +		else if (pattern == "-")		// Hack +			t.Type(Token::minus); +		else if (pattern == "unset") +			t.Type(Token::unset); +		else +			t.Type(Token::qstring); +		return; +	} +	switch (c)	{ +	case '&': +		nextchar(); +		if ( curchar() == '&') +		{ +			t.Type(Token::land); +			nextchar(); +			return; +		} +		t.Type(Token::band); +		return; +	case '|': +		nextchar(); +		if ( curchar() == '|') +		{ +			t.Type(Token::lor); +			nextchar(); +			return; +		} +		t.Type(Token::bor); +		return; +	case '{': +		t.Type(Token::lbrace); +		nextchar(); +		return; +	case '}': +		t.Type(Token::rbrace); +		nextchar(); +		return; +	case '(': +		t.Type(Token::lparen); +		nextchar(); +		return; +	case ')': +		t.Type(Token::rparen); +		nextchar(); +		return; +	case ';': +		t.Type(Token::semicolon); +		nextchar(); +		return; +	case '+': +		t.Type(Token::plus); +		nextchar(); +		return; +	case '*': +		t.Type(Token::mult); +		nextchar(); +		return; +	case '~': +		t.Type(Token::bitwisenot); +		nextchar(); +		return; +	case '<': +		nextchar(); +		if ( curchar() == '=') +		{ +			nextchar(); +			t.Type(Token::le); +			return; +		} +		t.Type(Token::lt); +		return; +	case '>': +		nextchar(); +		if ( curchar() == '=') +		{ +			nextchar(); +			t.Type(Token::ge); +			return; +		} +		t.Type(Token::gt); +		return; +	case '=': +		nextchar(); +		if ( curchar() == '~') +		{ +			nextchar(); +			t.Type(Token::strregexp); +			return; +		} +		if ( curchar() != '=') +		{ +			t.Type(Token::equals); +			return; +		} +		nextchar(); +		t.Type(Token::eq); +		return; +	case '!': +		nextchar(); +		if ( curchar() != '=') +		{ +			t.Type(Token::logicalnot); +			return; +		} +		nextchar(); +		t.Type(Token::ne); +		return; +	case ',': +		nextchar(); +		t.Type(Token::comma); +		return; +	} +	nextchar(); +	// Let the parser throw an error. +} + +void	Lexer::errmsg(const char *emsg) +{ +	errmsg(linenum, emsg); +} + +void	Lexer::errmsg(unsigned long lnum, const char *emsg) +{ +Buffer	errbuf; + +	errbuf=filename; +	errbuf += "("; +	errbuf.append(lnum); +	errbuf += "): "; +	errbuf += emsg; +	errbuf += "\n"; +	merr << errbuf; +} | 
