diff options
| -rw-r--r-- | unicode/ChangeLog | 4 | ||||
| -rw-r--r-- | unicode/Makefile.am | 3 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 15 | ||||
| -rw-r--r-- | unicode/unicode.c | 321 | ||||
| -rw-r--r-- | unicode/unicodetest.c | 6 | 
5 files changed, 345 insertions, 4 deletions
| diff --git a/unicode/ChangeLog b/unicode/ChangeLog index 42278a8..f29176e 100644 --- a/unicode/ChangeLog +++ b/unicode/ChangeLog @@ -1,3 +1,7 @@ +2018-07-11  Sam Varshavchik  <mrsam@courier-mta.com> + +	* unicode.c: Implement unicode_x_smap_modutf8 pseudo-encoding. +  2018-04-27  Sam Varshavchik  <mrsam@courier-mta.com>  	* gcc 8 update, fix assertions. libtool and toolchain updates. diff --git a/unicode/Makefile.am b/unicode/Makefile.am index a7a0783..bb46ae6 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -167,6 +167,9 @@ check-am: unicodetest  	test "`./unicodetest 'foobааааааr'`" = "foob&BDAEMAQwBDAEMAQw-r"  	test "`./unicodetest 'foobаaаr'`" = "foob&BDA-a&BDA-r"  	test "`./unicodetest 'foobааaааr'`" = "foob&BDAEMA-a&BDAEMA-r" +	test "`./unicodetest --smaputf8 'hello world'`" = 'hello\040world' +	test "`./unicodetest --smaputf8 'hello\\\\world'`" = 'hello\134\134world' +	test "`./unicodetest --smaputf8 ':hello:world:'`" = '\072hello\072world\072'  	n="aaaaaaaa"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n"; n="`echo $$n | cut -c1-1023`"; test "`./unicodetest $$n`" = "$$n"  	n="aaaaaaaa"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n"; test "`./unicodetest $$n`" = "$$n"  	n="aaaaaaaa"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n"; test "`./unicodetest a$$n`" = "a$$n" diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 6bf9c17..7c6a00d 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -2,7 +2,7 @@  #define	courier_unicode_h  /* -** Copyright 2000-2017 Double Precision, Inc. +** Copyright 2000-2018 Double Precision, Inc.  ** See COPYING for distribution information.  **  */ @@ -987,6 +987,19 @@ extern const char unicode_u_ucs2_native[];  #define unicode_x_imap_modutf7 "x-imap-modutf7" +/* +** EAI-capable Courier-IMAP does not use modified-UTF7, and uses UTF-8. +** +** However, to support SMAP we will still need to encode/decode some +** special characters. +** +** The characters U+0000-U+0020 (including space), and ./~:\ +** +** They are encoded as a backslash followed by three octal digits. +*/ + +#define unicode_x_smap_modutf8 "x-smap-modutf8" +  #if 0  {  #endif diff --git a/unicode/unicode.c b/unicode/unicode.c index 71e6439..2cf5856 100644 --- a/unicode/unicode.c +++ b/unicode/unicode.c @@ -1,5 +1,5 @@  /* -** Copyright 2000-2011 Double Precision, Inc. +** Copyright 2000-2018 Double Precision, Inc.  ** See COPYING for distribution information.  **  */ @@ -239,6 +239,42 @@ static int do_convert_toutf7(const char *text, size_t cnt, void *arg);  static int convert_utf7_handler(void *ptr, const char *text, size_t cnt);  /* +** Conversion wrapper for converting to modified-utf8 SMAP encoding. +** +** This is done by converting to UTF-8, then stacking on a module that +** takes that and converts UTF-8 to modified-UTF8. +** +** init_nottosmaputf8() returns an opaque stack for converting to modified +** UTF-8. +*/ + +static unicode_convert_handle_t +init_nottosmaputf8(const char *src_chset, +		   const char *dst_chset, +		   int (*output_func)(const char *, size_t, void *), +		   void *convert_arg); + +/* +** The to modified UTF8 module +*/ + +struct unicode_convert_tosmaputf8 { + +	struct unicode_convert_hdr hdr; + +	int errflag; + +	/* Remembered output function */ + +	int (*output_func)(const char *, size_t, void *); + +	/* Remembered arg to the output function */ +	void *convert_arg; +}; + + + +/*  ** Create a conversion module stack  */ @@ -417,6 +453,129 @@ static int deinit_toimaputf7(void *ptr, int *errptr)  	return rc;  } +/* +** Convert to unicode_x_smap_modutf8. +*/ + +static int deinit_tosmaputf8(void *ptr, int *errptr); +static int do_convert_tosmaputf8(const char *text, size_t cnt, void *arg); +static int convert_utf8_handler(void *ptr, const char *text, size_t cnt); + +static unicode_convert_handle_t +init_nottoimaputf7(const char *src_chset, +		   const char *dst_chset, +		   int (*output_func)(const char *, size_t, void *), +		   void *convert_arg) +{ +	struct unicode_convert_tosmaputf8 *toutf8; +	unicode_convert_handle_t h; + +	if (strcmp(dst_chset, unicode_x_smap_modutf8)) +		return init_nottosmaputf8(src_chset, dst_chset, +					  output_func, +					  convert_arg); + +	toutf8=malloc(sizeof(struct unicode_convert_tosmaputf8)); + +	if (!toutf8) +		return NULL; + +	memset(toutf8, 0, sizeof(*toutf8)); + +	h=init_nottosmaputf8(src_chset, "utf-8", +			     do_convert_tosmaputf8, +			     toutf8); +	if (!h) +	{ +		free(toutf8); +		return (NULL); +	} + +	toutf8->output_func=output_func; +	toutf8->convert_arg=convert_arg; + +	toutf8->hdr.convert_handler=convert_utf8_handler; +	toutf8->hdr.deinit_handler=deinit_tosmaputf8; +	toutf8->hdr.ptr=toutf8; +	toutf8->hdr.next=h; +	return &toutf8->hdr; +} + +static int deinit_tosmaputf8(void *ptr, int *errptr) +{ +	int rc; + +	struct unicode_convert_tosmaputf8 *toutf8= +		(struct unicode_convert_tosmaputf8 *)ptr; + +	/* Flush out the downstream stack */ +	rc=(*toutf8->hdr.next->deinit_handler)(toutf8->hdr.next->ptr, errptr); + +	free(toutf8); +	return rc; +} + +static int do_convert_tosmaputf8(const char *text, size_t cnt, void *arg) +{ +	struct unicode_convert_tosmaputf8 *toutf8= +		(struct unicode_convert_tosmaputf8 *)arg; +	int rc; +	size_t i; +	char octal[4]; + +	while (cnt) +	{ +		if (toutf8->errflag) +			return toutf8->errflag; + +		for (i=0; i<cnt; ++i) +			if (strchr(" ./~:\\", text[i])) +				break; +		if (i) +		{ +			rc= (*toutf8->output_func)(text, i, +						   toutf8->convert_arg); + +			if (rc) +			{ +				toutf8->errflag=rc; +				return rc; +			} +			text += i; +			cnt -= i; +		} + +		if (cnt) +		{ +			char c= *text; + +			octal[0]='\\'; +			octal[3]= (c & 7)+'0'; c /= 8; +			octal[2]= (c & 7)+'0'; c /= 8; +			octal[1]= (c & 7)+'0'; +			rc= (*toutf8->output_func)(octal, 4, +						   toutf8->convert_arg); +			if (rc) +			{ +				toutf8->errflag=rc; +				return rc; +			} +			++text; +			--cnt; +		} +	} +	return 0; +} + +static int convert_utf8_handler(void *ptr, const char *text, size_t cnt) +{ +	struct unicode_convert_tosmaputf8 *toutf8= +		(struct unicode_convert_tosmaputf8 *)ptr; + +	return (*toutf8->hdr.next->convert_handler)(toutf8->hdr.next->ptr, +						    text, cnt); +} +  /************/  /* @@ -483,7 +642,7 @@ static int convert_fromutf7(void *ptr,  static int deinit_fromutf7(void *ptr, int *errptr);  static unicode_convert_handle_t -init_nottoimaputf7(const char *src_chset, +init_nottosmaputf8(const char *src_chset,  		   const char *dst_chset,  		   int (*output_func)(const char *, size_t, void *),  		   void *convert_arg) @@ -648,6 +807,162 @@ static int deinit_fromutf7(void *ptr, int *errptr)  /************/ +/* +** Convert from modified-utf8 SMAP encoding. +** +** This module converts it to UTF-8, then this is attached to a stack that +** converts UTF-8 to the requested charset. +*/ + +static unicode_convert_handle_t +init_notfromsmaputf8(const char *src_chset, +		     const char *dst_chset, +		     int (*output_func)(const char *, size_t, void *), +		     void *convert_arg); + +struct unicode_convert_fromsmaputf8 { + +	struct unicode_convert_hdr hdr; + +	/* Convert a backslash escape */ + +	int in_escape; + +	/* The escaped character */ + +	unsigned char escape_char; + +	int errflag; +	int converr; +}; + +static int convert_fromutf8(void *ptr, +			    const char *text, size_t cnt); +static int deinit_fromutf8(void *ptr, int *errptr); + +static unicode_convert_handle_t +init_notfromimaputf7(const char *src_chset, +		     const char *dst_chset, +		     int (*output_func)(const char *, size_t, void *), +		     void *convert_arg) +{ +	struct unicode_convert_fromsmaputf8 *fromutf8; +	unicode_convert_handle_t h; + +	if (strcmp(src_chset, unicode_x_smap_modutf8)) +		return init_notfromsmaputf8(src_chset, dst_chset, +					    output_func, convert_arg); + +	fromutf8=(struct unicode_convert_fromsmaputf8 *) +		malloc(sizeof(struct unicode_convert_fromsmaputf8)); + +	if (!fromutf8) +		return NULL; + +	memset(fromutf8, 0, sizeof(*fromutf8)); + +	/* Create a stack for converting UTF-8 to the dest charset */ + +	h=init_notfromimaputf7("utf-8", dst_chset, +			       output_func, convert_arg); + +	if (!h) +	{ +		free(fromutf8); +		return (NULL); +	} + +	fromutf8->hdr.next=h; +	fromutf8->hdr.convert_handler=convert_fromutf8; +	fromutf8->hdr.deinit_handler=deinit_fromutf8; +	fromutf8->hdr.ptr=fromutf8; +	return &fromutf8->hdr; +} + +static int convert_fromutf8(void *ptr, +			    const char *text, size_t cnt) +{ +	struct unicode_convert_fromsmaputf8 *fromutf8= +		(struct unicode_convert_fromsmaputf8 *)ptr; +	size_t i; + +	while (cnt) +	{ +		if (fromutf8->errflag) +			return fromutf8->errflag; + +		if (fromutf8->in_escape) +		{ +			if (*text < '0' || *text > '7') +			{ +				errno=EILSEQ; +				return fromutf8->errflag=-1; +			} +			fromutf8->escape_char <<= 3; +			fromutf8->escape_char |= *text - '0'; +			if (--fromutf8->in_escape == 0) +			{ +				fromutf8->errflag=(*fromutf8->hdr.next +						   ->convert_handler) +					(fromutf8->hdr.next->ptr, +					 (const char *)&fromutf8->escape_char, +					 1); +			} +			++text; +			--cnt; +			continue; +		} + +		for (i=0; i<cnt; ++i) +			if (text[i] == '\\') +				break; + +		if (i) +		{ +			fromutf8->errflag=(*fromutf8->hdr.next +					   ->convert_handler) +				(fromutf8->hdr.next->ptr, text, i); +			text += i; +			cnt -= i; +		} + +		if (cnt) +		{ +			fromutf8->escape_char=0; +			fromutf8->in_escape=3; +			++text; +			--cnt; +		} +	} +	return 0; +} + +static int deinit_fromutf8(void *ptr, int *errptr) +{ +	struct unicode_convert_fromsmaputf8 *fromutf8= +		(struct unicode_convert_fromsmaputf8 *)ptr; +	int rc; + +	if (fromutf8->in_escape) +	{ +		fromutf8->errflag= -1; +		errno=EILSEQ; +	} + +	rc=fromutf8->hdr.next->deinit_handler(fromutf8->hdr.next->ptr, errptr); + +	if (fromutf8->errflag && rc == 0) +		rc=fromutf8->errflag; + +	if (errptr && fromutf8->converr) +		*errptr=1; + +	free(fromutf8); +	return rc; +} + +/************/ +  /* A real conversion module, via iconv */  struct unicode_convert_iconv { @@ -674,7 +989,7 @@ static int init_iconv(struct unicode_convert_iconv *h,  		      void *convert_arg);  static unicode_convert_handle_t -init_notfromimaputf7(const char *src_chset, +init_notfromsmaputf8(const char *src_chset,  		     const char *dst_chset,  		     int (*output_func)(const char *, size_t, void *),  		     void *convert_arg) diff --git a/unicode/unicodetest.c b/unicode/unicodetest.c index b59fab3..b309ad6 100644 --- a/unicode/unicodetest.c +++ b/unicode/unicodetest.c @@ -134,6 +134,12 @@ int main(int argc, char **argv)  		++argn;  	} +	if (argn < argc && strcmp(argv[argn], "--smaputf8") == 0) +	{ +		chset=unicode_x_smap_modutf8; +		++argn; +	} +  	if (argn < argc && strcmp(argv[argn], "--totitle") == 0)  	{  		++argn; | 
