diff options
| author | Sam Varshavchik | 2018-07-29 22:58:53 -0400 | 
|---|---|---|
| committer | Sam Varshavchik | 2018-07-29 23:15:23 -0400 | 
| commit | 7ccfca0cf50348039815532273d0aaf2d4318474 (patch) | |
| tree | d6bb17ccc88fbd674eb804bf80f4fc0bec41a964 | |
| parent | 6c2fe8b0d5be19cdd579bb5b86cec95cab62c320 (diff) | |
| download | courier-libs-7ccfca0cf50348039815532273d0aaf2d4318474.tar.bz2 | |
Implement RFC 6533 address encoding and decoding.
| -rw-r--r-- | rfc2045/.gitignore | 1 | ||||
| -rw-r--r-- | rfc2045/Makefile.am | 11 | ||||
| -rw-r--r-- | rfc2045/rfc2045.h | 24 | ||||
| -rw-r--r-- | rfc2045/rfc6533.c | 283 | ||||
| -rw-r--r-- | rfc2045/testrfc6533parser.c | 64 | 
5 files changed, 381 insertions, 2 deletions
| diff --git a/rfc2045/.gitignore b/rfc2045/.gitignore index 2fefef8..28aefd4 100644 --- a/rfc2045/.gitignore +++ b/rfc2045/.gitignore @@ -11,3 +11,4 @@  /rfc2045_config.h.in  /rfc2045charset.h  /testrfc3676parser +/testrfc6533parser diff --git a/rfc2045/Makefile.am b/rfc2045/Makefile.am index 1fe66e6..2847a7c 100644 --- a/rfc2045/Makefile.am +++ b/rfc2045/Makefile.am @@ -1,9 +1,10 @@  # -# Copyright 1998 - 2014 Double Precision, Inc.  See COPYING for +# Copyright 1998 - 2018 Double Precision, Inc.  See COPYING for  # distribution information.  noinst_LTLIBRARIES=librfc2045.la -noinst_PROGRAMS=reformime makemime headercheck testrfc3676parser +noinst_PROGRAMS=reformime makemime headercheck testrfc3676parser \ +	testrfc6533parser  AM_CXXFLAGS=@COURIER_UNICODE_CXXFLAGS@ @@ -29,6 +30,7 @@ librfc2045_la_SOURCES=rfc2045.c rfc2045.h rfc2045src.h \  		     rfc2045decodemsgtoutf8.c \  		     rfc2231.c rfc2231encode.c \  		     rfc3676parser.h rfc3676parser.c rfc3676parsercpp.C \ +                     rfc6533.c \  		     base64.c base64.h  reformime_SOURCES=reformime.c @@ -51,6 +53,10 @@ testrfc3676parser_SOURCES=testrfc3676parser.c  testrfc3676parser_DEPENDENCIES=librfc2045.la  testrfc3676parser_LDADD=$(testrfc3676parser_DEPENDENCIES) -lcourier-unicode +testrfc6533parser_SOURCES=testrfc6533parser.c +testrfc6533parser_DEPENDENCIES=librfc2045.la +testrfc6533parser_LDADD=$(testrfc6533parser_DEPENDENCIES) -lcourier-unicode +  if HAVE_SGML  reformime.html: reformime.sgml ../docbook/sgml2html  	../docbook/sgml2html reformime.sgml reformime.html @@ -81,3 +87,4 @@ check-am:  	@SHELL@ $(srcdir)/testsuite | cmp -s - $(srcdir)/$(TESTSUITE)  	@SHELL@ $(srcdir)/testsuitemm | cmp -s - $(srcdir)/testsuitemm.txt  	@SHELL@ $(srcdir)/testrfc3676parsersuite | diff -U 3 $(srcdir)/testrfc3676parsersuite.txt - +	./testrfc6533parser diff --git a/rfc2045/rfc2045.h b/rfc2045/rfc2045.h index 4aec67b..71c0121 100644 --- a/rfc2045/rfc2045.h +++ b/rfc2045/rfc2045.h @@ -683,6 +683,30 @@ void rfc2231_paramDecode(struct rfc2231param *paramList,  			 int *langLen,  			 int *textLen); +/* +** Encode an E-mail address as utf-8 address type specified in RFC 6533. +** The e-mail address parameter must be encoded in UTF-8. +** +** The E-mail address is encoded as "rfc822" address type if it has only +** ASCII characters, or if use_rfc822 is set to non0. +** +** A malloc-ed address gets returned. +*/ + +char *rfc6533_encode(const char *address, int use_rfc822); + +/* +** Decode a utf-8 or an rfc-822 address type. Returns a malloc-ed buffer, +** or NULL if the address cannot be decoded. +** +** Assumes valid UTF-8 coding, and does not verify it. +** +** Does verify, for both rfc-822 and utf-8 formats, that the returned address +** does not contain control characters. +*/ + +char *rfc6533_decode(const char *address); +  #if 0  {  #endif diff --git a/rfc2045/rfc6533.c b/rfc2045/rfc6533.c new file mode 100644 index 0000000..529da45 --- /dev/null +++ b/rfc2045/rfc6533.c @@ -0,0 +1,283 @@ +/* +** Copyright 2018 Double Precision, Inc.  See COPYING for +** distribution information. +*/ + +/* +*/ + +#if    HAVE_CONFIG_H +#include "rfc2045_config.h" +#endif +#include	"rfc2045.h" +#include	<courier-unicode.h> +#include	<string.h> +#include	<stdlib.h> + +static const char xdigit[]="0123456789ABCDEF"; + +static void count(const char *s, size_t n, void *ptr) +{ +	(*(size_t *)ptr) += n; +} + +static void save(const char *s, size_t n, void *ptr) +{ +	char **p=(char **)ptr; + +	memcpy(*p, s, n); +	*p += n; +} + +static void encode_rfc822(const char *address, +			  void (*callback)(const char *, size_t, void *), +			  void *arg) +{ +	callback("rfc822;", 7, arg); + +	while (*address) +	{ +		size_t i; + +		for (i=0; address[i]; ++i) +		{ +			if (address[i] < '!' || address[i] > '~') +				break; +			if (address[i] == '+' || address[i] == '=') +				break; +		} + +		if (i == 0) +		{ +			(*callback)("+", 1, arg); + +			(*callback)(xdigit + ((*address >> 4) & 15), 1, arg); +			(*callback)(xdigit + (*address & 15), 1, arg); +			++address; +			continue; +		} + +		(*callback)(address, i, arg); +		address += i; +	} +	(*callback)("", 1, arg); +} + +static void encode_rfc6533(const char *address, +			   void (*callback)(const char *, size_t, void *), +			   void *arg) +{ +	callback("utf-8;", 6, arg); + +	while (*address) +	{ +		size_t i; + +		for (i=0; address[i]; ++i) +		{ +			if ((unsigned char)address[i] <= ' ') +				break; + +			if (address[i] == '\\' || +			    address[i] == '+' || +			    address[i] == '=' || +			    address[i] == 0x7f) +				break; +		} + +		if (i == 0) +		{ +			static const char xdigit[]="0123456789ABCDEF"; + +			(*callback)("\\x{", 3, arg); +			(*callback)(xdigit + ((*address >> 4) & 15), 1, arg); +			(*callback)(xdigit + (*address & 15), 1, arg); +			(*callback)("}", 1, arg); +			++address; +			continue; +		} + +		(*callback)(address, i, arg); +		address += i; +	} +	(*callback)("", 1, arg); +} + +char *rfc6533_encode(const char *address, int use_rfc822) +{ +	size_t l=0; +	char *buffer; +	char *p; +	const char *cp; + +	for (cp=address; *cp; ++cp) +		if (*cp & 0x80) +			break; + +	if (!*cp || use_rfc822) +	{ +		encode_rfc822(address, count, &l); + +		if ((buffer=malloc(l)) == NULL) +			abort(); + +		p=buffer; +		encode_rfc822(address, save, &p); +		return buffer; +	} + +	encode_rfc6533(address, count, &l); + +	if ((buffer=malloc(l)) == NULL) +		abort(); +	p=buffer; +	encode_rfc6533(address, save, &p); +	return buffer; +} + +static int decode_rfc6533(const char *address, +			  void (*callback)(const char *, size_t, void *), +			  void *arg) +{ +	while (*address) +	{ +		size_t i; +		char32_t c; +		char *p; +		size_t ignore1; +		int err; + +		for (i=0; address[i]; ++i) +		{ +			if (address[i] == '\\') +				break; +		} + +		if (i) +		{ +			(*callback)(address, i, arg); + +			address += i; +			continue; +		} + +		if (address[1] != 'x' || +		    address[2] != '{') +			return -1; + +		c=0; + +		address += 3; + +		while (*address != '}') +		{ +			const char *p; + +			if (!*address) +				return -1; + +			p=strchr(xdigit, *address); +			if (!p) +				return -1; +			c <<= 4; +			c |= (p-xdigit); +			++address; +		} +		++address; +		if (c == 0) +			return -1; + +		err=0; +		if (unicode_convert_fromu_tobuf(&c, 1, "utf-8", +						&p, &ignore1, &err)) +			return NULL; + +		if (err) +		{ +			free(p); +			return NULL; +		} +		(*callback)(p, strlen(p), arg); +		free(p); +	} +	(*callback)("", 1, arg); +	return 0; +} + +char *rfc6533_decode(const char *address) +{ +	size_t l; +	char *buf; +	char *p; + +	if (strncasecmp(address, "rfc822;", 7) == 0) +	{ +		buf=malloc(strlen(address)); + +		if (!buf) +			abort(); + +		p=buf; + +		address += 7; + +		while (*address) +		{ +			const char *hi, *lo; + +			if (*address != '+') +			{ +				*p++ = *address++; +				continue; +			} + +			++address; + +			if (*address) +			{ +				hi=strchr(xdigit, *address); +				++address; +				if (*address) +				{ +					lo=strchr(xdigit, *address); +					++address; + +					if (hi && lo) +					{ +						char n= (char) +							((hi-xdigit) * 16 +							 +(lo-xdigit)); + +						if (n) +							*p++=n; +					} +				} +			} +		} +		*p=0; +	} +	else +	{ +		if (strncasecmp(address, "utf-8;", 6)) +			return NULL; + +		l=0; + +		if (decode_rfc6533(address+6, count, &l)) +			return NULL; + +		if ((buf=malloc(l)) == NULL) +			abort(); + +		p=buf; +		decode_rfc6533(address+6, save, &p); +	} + +	for (p=buf; *p; ++p) +		if ((unsigned char)*p <= ' ') +		{ +			free(buf); +			return NULL; +		} +	return buf; +} diff --git a/rfc2045/testrfc6533parser.c b/rfc2045/testrfc6533parser.c new file mode 100644 index 0000000..0dbf249 --- /dev/null +++ b/rfc2045/testrfc6533parser.c @@ -0,0 +1,64 @@ +/* +** Copyright 2018 Double Precision, Inc. +** See COPYING for distribution information. +*/ + +#include	"rfc2045.h" + +#include	<stdlib.h> +#include	<stdio.h> + +static const struct { +	const char *address; +	int use_rfc822; +	const char *result; +} encode_tests[]={ +		  {"nobody@example.com", 0, "rfc822;nobody@example.com"}, +		  {"nobody+=me@example.com", 0, +		   "rfc822;nobody+2B+3Dme@example.com"}, +		  {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com", 0, "utf-8;\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com"}, +		  {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com", 1, "rfc822;+D0+B8+D1+81+D0+BF+D1+8B+D1+82+D0+B0+D0+BD+D0+B8+D0+B5@example.com"}, +		  {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5+=\\me@example.com", 0, "utf-8;\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5\\x{2B}\\x{3D}\\x{5C}me@example.com"}, + +}; + +int main(int argc, char **argv) +{ +	size_t i; + +	for (i=0; i<sizeof(encode_tests)/sizeof(encode_tests[0]); ++i) +	{ +		char *p=rfc6533_encode(encode_tests[i].address, +				       encode_tests[i].use_rfc822); +		char *q; + +		if (strcmp(p, encode_tests[i].result)) +		{ +			fprintf(stderr, "Expected to encode %s as %s, got %s\n", +				encode_tests[i].address, +				encode_tests[i].result, +				p); +			exit(1); +		} +		q=rfc6533_decode(p); + +		if (!q) +		{ +			fprintf(stderr, "Could not decode %s\n", p); +			exit(1); +		} + +		if (strcmp(q, encode_tests[i].address)) +		{ +			fprintf(stderr, "Expected to decode %s as %s, got %s\n", +				p, +				encode_tests[i].address, +				q); +			exit(1); +		} +		free(p); +		free(q); +	} + +	exit(0); +} | 
