diff options
| author | Sam Varshavchik | 2018-07-11 22:16:22 -0400 |
|---|---|---|
| committer | Sam Varshavchik | 2018-07-11 22:16:22 -0400 |
| commit | 4fc91797ada09e9f8e3bd0a3cdbe0c78edf2530d (patch) | |
| tree | 0f4417dcffecd8b8e8f060a8a79ca42bc236484a | |
| parent | 03050820bf8e22adb2ec88d079cdde01d7fa6a29 (diff) | |
| download | courier-libs-4fc91797ada09e9f8e3bd0a3cdbe0c78edf2530d.tar.bz2 | |
courier-unicode: implement unicode_x_smap_modutf8 pseudo-encoding
| -rw-r--r-- | unicode/ChangeLog | 4 | ||||
| -rw-r--r-- | unicode/Makefile.am | 3 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 15 | ||||
| -rw-r--r-- | unicode/unicode.c | 321 | ||||
| -rw-r--r-- | unicode/unicodetest.c | 6 |
5 files changed, 345 insertions, 4 deletions
diff --git a/unicode/ChangeLog b/unicode/ChangeLog index 42278a8..f29176e 100644 --- a/unicode/ChangeLog +++ b/unicode/ChangeLog @@ -1,3 +1,7 @@ +2018-07-11 Sam Varshavchik <mrsam@courier-mta.com> + + * unicode.c: Implement unicode_x_smap_modutf8 pseudo-encoding. + 2018-04-27 Sam Varshavchik <mrsam@courier-mta.com> * gcc 8 update, fix assertions. libtool and toolchain updates. diff --git a/unicode/Makefile.am b/unicode/Makefile.am index a7a0783..bb46ae6 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -167,6 +167,9 @@ check-am: unicodetest test "`./unicodetest 'foobааааааr'`" = "foob&BDAEMAQwBDAEMAQw-r" test "`./unicodetest 'foobаaаr'`" = "foob&BDA-a&BDA-r" test "`./unicodetest 'foobааaааr'`" = "foob&BDAEMA-a&BDAEMA-r" + test "`./unicodetest --smaputf8 'hello world'`" = 'hello\040world' + test "`./unicodetest --smaputf8 'hello\\\\world'`" = 'hello\134\134world' + test "`./unicodetest --smaputf8 ':hello:world:'`" = '\072hello\072world\072' n="aaaaaaaa"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n"; n="`echo $$n | cut -c1-1023`"; test "`./unicodetest $$n`" = "$$n" n="aaaaaaaa"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n"; test "`./unicodetest $$n`" = "$$n" n="aaaaaaaa"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n$$n$$n"; n="$$n$$n"; test "`./unicodetest a$$n`" = "a$$n" diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 6bf9c17..7c6a00d 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -2,7 +2,7 @@ #define courier_unicode_h /* -** Copyright 2000-2017 Double Precision, Inc. +** Copyright 2000-2018 Double Precision, Inc. ** See COPYING for distribution information. ** */ @@ -987,6 +987,19 @@ extern const char unicode_u_ucs2_native[]; #define unicode_x_imap_modutf7 "x-imap-modutf7" +/* +** EAI-capable Courier-IMAP does not use modified-UTF7, and uses UTF-8. +** +** However, to support SMAP we will still need to encode/decode some +** special characters. +** +** The characters U+0000-U+0020 (including space), and ./~:\ +** +** They are encoded as a backslash followed by three octal digits. +*/ + +#define unicode_x_smap_modutf8 "x-smap-modutf8" + #if 0 { #endif diff --git a/unicode/unicode.c b/unicode/unicode.c index 71e6439..2cf5856 100644 --- a/unicode/unicode.c +++ b/unicode/unicode.c @@ -1,5 +1,5 @@ /* -** Copyright 2000-2011 Double Precision, Inc. +** Copyright 2000-2018 Double Precision, Inc. ** See COPYING for distribution information. ** */ @@ -239,6 +239,42 @@ static int do_convert_toutf7(const char *text, size_t cnt, void *arg); static int convert_utf7_handler(void *ptr, const char *text, size_t cnt); /* +** Conversion wrapper for converting to modified-utf8 SMAP encoding. +** +** This is done by converting to UTF-8, then stacking on a module that +** takes that and converts UTF-8 to modified-UTF8. +** +** init_nottosmaputf8() returns an opaque stack for converting to modified +** UTF-8. +*/ + +static unicode_convert_handle_t +init_nottosmaputf8(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg); + +/* +** The to modified UTF8 module +*/ + +struct unicode_convert_tosmaputf8 { + + struct unicode_convert_hdr hdr; + + int errflag; + + /* Remembered output function */ + + int (*output_func)(const char *, size_t, void *); + + /* Remembered arg to the output function */ + void *convert_arg; +}; + + + +/* ** Create a conversion module stack */ @@ -417,6 +453,129 @@ static int deinit_toimaputf7(void *ptr, int *errptr) return rc; } +/* +** Convert to unicode_x_smap_modutf8. +*/ + +static int deinit_tosmaputf8(void *ptr, int *errptr); +static int do_convert_tosmaputf8(const char *text, size_t cnt, void *arg); +static int convert_utf8_handler(void *ptr, const char *text, size_t cnt); + +static unicode_convert_handle_t +init_nottoimaputf7(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg) +{ + struct unicode_convert_tosmaputf8 *toutf8; + unicode_convert_handle_t h; + + if (strcmp(dst_chset, unicode_x_smap_modutf8)) + return init_nottosmaputf8(src_chset, dst_chset, + output_func, + convert_arg); + + toutf8=malloc(sizeof(struct unicode_convert_tosmaputf8)); + + if (!toutf8) + return NULL; + + memset(toutf8, 0, sizeof(*toutf8)); + + h=init_nottosmaputf8(src_chset, "utf-8", + do_convert_tosmaputf8, + toutf8); + if (!h) + { + free(toutf8); + return (NULL); + } + + toutf8->output_func=output_func; + toutf8->convert_arg=convert_arg; + + toutf8->hdr.convert_handler=convert_utf8_handler; + toutf8->hdr.deinit_handler=deinit_tosmaputf8; + toutf8->hdr.ptr=toutf8; + toutf8->hdr.next=h; + return &toutf8->hdr; +} + +static int deinit_tosmaputf8(void *ptr, int *errptr) +{ + int rc; + + struct unicode_convert_tosmaputf8 *toutf8= + (struct unicode_convert_tosmaputf8 *)ptr; + + /* Flush out the downstream stack */ + rc=(*toutf8->hdr.next->deinit_handler)(toutf8->hdr.next->ptr, errptr); + + free(toutf8); + return rc; +} + +static int do_convert_tosmaputf8(const char *text, size_t cnt, void *arg) +{ + struct unicode_convert_tosmaputf8 *toutf8= + (struct unicode_convert_tosmaputf8 *)arg; + int rc; + size_t i; + char octal[4]; + + while (cnt) + { + if (toutf8->errflag) + return toutf8->errflag; + + for (i=0; i<cnt; ++i) + if (strchr(" ./~:\\", text[i])) + break; + if (i) + { + rc= (*toutf8->output_func)(text, i, + toutf8->convert_arg); + + if (rc) + { + toutf8->errflag=rc; + return rc; + } + text += i; + cnt -= i; + } + + if (cnt) + { + char c= *text; + + octal[0]='\\'; + octal[3]= (c & 7)+'0'; c /= 8; + octal[2]= (c & 7)+'0'; c /= 8; + octal[1]= (c & 7)+'0'; + rc= (*toutf8->output_func)(octal, 4, + toutf8->convert_arg); + if (rc) + { + toutf8->errflag=rc; + return rc; + } + ++text; + --cnt; + } + } + return 0; +} + +static int convert_utf8_handler(void *ptr, const char *text, size_t cnt) +{ + struct unicode_convert_tosmaputf8 *toutf8= + (struct unicode_convert_tosmaputf8 *)ptr; + + return (*toutf8->hdr.next->convert_handler)(toutf8->hdr.next->ptr, + text, cnt); +} + /************/ /* @@ -483,7 +642,7 @@ static int convert_fromutf7(void *ptr, static int deinit_fromutf7(void *ptr, int *errptr); static unicode_convert_handle_t -init_nottoimaputf7(const char *src_chset, +init_nottosmaputf8(const char *src_chset, const char *dst_chset, int (*output_func)(const char *, size_t, void *), void *convert_arg) @@ -648,6 +807,162 @@ static int deinit_fromutf7(void *ptr, int *errptr) /************/ +/* +** Convert from modified-utf8 SMAP encoding. +** +** This module converts it to UTF-8, then this is attached to a stack that +** converts UTF-8 to the requested charset. +*/ + +static unicode_convert_handle_t +init_notfromsmaputf8(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg); + +struct unicode_convert_fromsmaputf8 { + + struct unicode_convert_hdr hdr; + + /* Convert a backslash escape */ + + int in_escape; + + /* The escaped character */ + + unsigned char escape_char; + + int errflag; + int converr; +}; + +static int convert_fromutf8(void *ptr, + const char *text, size_t cnt); +static int deinit_fromutf8(void *ptr, int *errptr); + +static unicode_convert_handle_t +init_notfromimaputf7(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg) +{ + struct unicode_convert_fromsmaputf8 *fromutf8; + unicode_convert_handle_t h; + + if (strcmp(src_chset, unicode_x_smap_modutf8)) + return init_notfromsmaputf8(src_chset, dst_chset, + output_func, convert_arg); + + fromutf8=(struct unicode_convert_fromsmaputf8 *) + malloc(sizeof(struct unicode_convert_fromsmaputf8)); + + if (!fromutf8) + return NULL; + + memset(fromutf8, 0, sizeof(*fromutf8)); + + /* Create a stack for converting UTF-8 to the dest charset */ + + h=init_notfromimaputf7("utf-8", dst_chset, + output_func, convert_arg); + + if (!h) + { + free(fromutf8); + return (NULL); + } + + fromutf8->hdr.next=h; + fromutf8->hdr.convert_handler=convert_fromutf8; + fromutf8->hdr.deinit_handler=deinit_fromutf8; + fromutf8->hdr.ptr=fromutf8; + return &fromutf8->hdr; +} + +static int convert_fromutf8(void *ptr, + const char *text, size_t cnt) +{ + struct unicode_convert_fromsmaputf8 *fromutf8= + (struct unicode_convert_fromsmaputf8 *)ptr; + size_t i; + + while (cnt) + { + if (fromutf8->errflag) + return fromutf8->errflag; + + if (fromutf8->in_escape) + { + if (*text < '0' || *text > '7') + { + errno=EILSEQ; + return fromutf8->errflag=-1; + } + fromutf8->escape_char <<= 3; + fromutf8->escape_char |= *text - '0'; + if (--fromutf8->in_escape == 0) + { + fromutf8->errflag=(*fromutf8->hdr.next + ->convert_handler) + (fromutf8->hdr.next->ptr, + (const char *)&fromutf8->escape_char, + 1); + } + ++text; + --cnt; + continue; + } + + for (i=0; i<cnt; ++i) + if (text[i] == '\\') + break; + + if (i) + { + fromutf8->errflag=(*fromutf8->hdr.next + ->convert_handler) + (fromutf8->hdr.next->ptr, text, i); + text += i; + cnt -= i; + } + + if (cnt) + { + fromutf8->escape_char=0; + fromutf8->in_escape=3; + ++text; + --cnt; + } + } + return 0; +} + +static int deinit_fromutf8(void *ptr, int *errptr) +{ + struct unicode_convert_fromsmaputf8 *fromutf8= + (struct unicode_convert_fromsmaputf8 *)ptr; + int rc; + + if (fromutf8->in_escape) + { + fromutf8->errflag= -1; + errno=EILSEQ; + } + + rc=fromutf8->hdr.next->deinit_handler(fromutf8->hdr.next->ptr, errptr); + + if (fromutf8->errflag && rc == 0) + rc=fromutf8->errflag; + + if (errptr && fromutf8->converr) + *errptr=1; + + free(fromutf8); + return rc; +} + +/************/ + /* A real conversion module, via iconv */ struct unicode_convert_iconv { @@ -674,7 +989,7 @@ static int init_iconv(struct unicode_convert_iconv *h, void *convert_arg); static unicode_convert_handle_t -init_notfromimaputf7(const char *src_chset, +init_notfromsmaputf8(const char *src_chset, const char *dst_chset, int (*output_func)(const char *, size_t, void *), void *convert_arg) diff --git a/unicode/unicodetest.c b/unicode/unicodetest.c index b59fab3..b309ad6 100644 --- a/unicode/unicodetest.c +++ b/unicode/unicodetest.c @@ -134,6 +134,12 @@ int main(int argc, char **argv) ++argn; } + if (argn < argc && strcmp(argv[argn], "--smaputf8") == 0) + { + chset=unicode_x_smap_modutf8; + ++argn; + } + if (argn < argc && strcmp(argv[argn], "--totitle") == 0) { ++argn; |
