diff options
| -rw-r--r-- | rfc2045/.gitignore | 1 | ||||
| -rw-r--r-- | rfc2045/Makefile.am | 11 | ||||
| -rw-r--r-- | rfc2045/rfc2045.h | 24 | ||||
| -rw-r--r-- | rfc2045/rfc6533.c | 283 | ||||
| -rw-r--r-- | rfc2045/testrfc6533parser.c | 64 |
5 files changed, 381 insertions, 2 deletions
diff --git a/rfc2045/.gitignore b/rfc2045/.gitignore index 2fefef8..28aefd4 100644 --- a/rfc2045/.gitignore +++ b/rfc2045/.gitignore @@ -11,3 +11,4 @@ /rfc2045_config.h.in /rfc2045charset.h /testrfc3676parser +/testrfc6533parser diff --git a/rfc2045/Makefile.am b/rfc2045/Makefile.am index 1fe66e6..2847a7c 100644 --- a/rfc2045/Makefile.am +++ b/rfc2045/Makefile.am @@ -1,9 +1,10 @@ # -# Copyright 1998 - 2014 Double Precision, Inc. See COPYING for +# Copyright 1998 - 2018 Double Precision, Inc. See COPYING for # distribution information. noinst_LTLIBRARIES=librfc2045.la -noinst_PROGRAMS=reformime makemime headercheck testrfc3676parser +noinst_PROGRAMS=reformime makemime headercheck testrfc3676parser \ + testrfc6533parser AM_CXXFLAGS=@COURIER_UNICODE_CXXFLAGS@ @@ -29,6 +30,7 @@ librfc2045_la_SOURCES=rfc2045.c rfc2045.h rfc2045src.h \ rfc2045decodemsgtoutf8.c \ rfc2231.c rfc2231encode.c \ rfc3676parser.h rfc3676parser.c rfc3676parsercpp.C \ + rfc6533.c \ base64.c base64.h reformime_SOURCES=reformime.c @@ -51,6 +53,10 @@ testrfc3676parser_SOURCES=testrfc3676parser.c testrfc3676parser_DEPENDENCIES=librfc2045.la testrfc3676parser_LDADD=$(testrfc3676parser_DEPENDENCIES) -lcourier-unicode +testrfc6533parser_SOURCES=testrfc6533parser.c +testrfc6533parser_DEPENDENCIES=librfc2045.la +testrfc6533parser_LDADD=$(testrfc6533parser_DEPENDENCIES) -lcourier-unicode + if HAVE_SGML reformime.html: reformime.sgml ../docbook/sgml2html ../docbook/sgml2html reformime.sgml reformime.html @@ -81,3 +87,4 @@ check-am: @SHELL@ $(srcdir)/testsuite | cmp -s - $(srcdir)/$(TESTSUITE) @SHELL@ $(srcdir)/testsuitemm | cmp -s - $(srcdir)/testsuitemm.txt @SHELL@ $(srcdir)/testrfc3676parsersuite | diff -U 3 $(srcdir)/testrfc3676parsersuite.txt - + ./testrfc6533parser diff --git a/rfc2045/rfc2045.h b/rfc2045/rfc2045.h index 4aec67b..71c0121 100644 --- a/rfc2045/rfc2045.h +++ b/rfc2045/rfc2045.h @@ -683,6 +683,30 @@ void rfc2231_paramDecode(struct rfc2231param *paramList, int *langLen, int *textLen); +/* +** Encode an E-mail address as utf-8 address type specified in RFC 6533. +** The e-mail address parameter must be encoded in UTF-8. +** +** The E-mail address is encoded as "rfc822" address type if it has only +** ASCII characters, or if use_rfc822 is set to non0. +** +** A malloc-ed address gets returned. +*/ + +char *rfc6533_encode(const char *address, int use_rfc822); + +/* +** Decode a utf-8 or an rfc-822 address type. Returns a malloc-ed buffer, +** or NULL if the address cannot be decoded. +** +** Assumes valid UTF-8 coding, and does not verify it. +** +** Does verify, for both rfc-822 and utf-8 formats, that the returned address +** does not contain control characters. +*/ + +char *rfc6533_decode(const char *address); + #if 0 { #endif diff --git a/rfc2045/rfc6533.c b/rfc2045/rfc6533.c new file mode 100644 index 0000000..529da45 --- /dev/null +++ b/rfc2045/rfc6533.c @@ -0,0 +1,283 @@ +/* +** Copyright 2018 Double Precision, Inc. See COPYING for +** distribution information. +*/ + +/* +*/ + +#if HAVE_CONFIG_H +#include "rfc2045_config.h" +#endif +#include "rfc2045.h" +#include <courier-unicode.h> +#include <string.h> +#include <stdlib.h> + +static const char xdigit[]="0123456789ABCDEF"; + +static void count(const char *s, size_t n, void *ptr) +{ + (*(size_t *)ptr) += n; +} + +static void save(const char *s, size_t n, void *ptr) +{ + char **p=(char **)ptr; + + memcpy(*p, s, n); + *p += n; +} + +static void encode_rfc822(const char *address, + void (*callback)(const char *, size_t, void *), + void *arg) +{ + callback("rfc822;", 7, arg); + + while (*address) + { + size_t i; + + for (i=0; address[i]; ++i) + { + if (address[i] < '!' || address[i] > '~') + break; + if (address[i] == '+' || address[i] == '=') + break; + } + + if (i == 0) + { + (*callback)("+", 1, arg); + + (*callback)(xdigit + ((*address >> 4) & 15), 1, arg); + (*callback)(xdigit + (*address & 15), 1, arg); + ++address; + continue; + } + + (*callback)(address, i, arg); + address += i; + } + (*callback)("", 1, arg); +} + +static void encode_rfc6533(const char *address, + void (*callback)(const char *, size_t, void *), + void *arg) +{ + callback("utf-8;", 6, arg); + + while (*address) + { + size_t i; + + for (i=0; address[i]; ++i) + { + if ((unsigned char)address[i] <= ' ') + break; + + if (address[i] == '\\' || + address[i] == '+' || + address[i] == '=' || + address[i] == 0x7f) + break; + } + + if (i == 0) + { + static const char xdigit[]="0123456789ABCDEF"; + + (*callback)("\\x{", 3, arg); + (*callback)(xdigit + ((*address >> 4) & 15), 1, arg); + (*callback)(xdigit + (*address & 15), 1, arg); + (*callback)("}", 1, arg); + ++address; + continue; + } + + (*callback)(address, i, arg); + address += i; + } + (*callback)("", 1, arg); +} + +char *rfc6533_encode(const char *address, int use_rfc822) +{ + size_t l=0; + char *buffer; + char *p; + const char *cp; + + for (cp=address; *cp; ++cp) + if (*cp & 0x80) + break; + + if (!*cp || use_rfc822) + { + encode_rfc822(address, count, &l); + + if ((buffer=malloc(l)) == NULL) + abort(); + + p=buffer; + encode_rfc822(address, save, &p); + return buffer; + } + + encode_rfc6533(address, count, &l); + + if ((buffer=malloc(l)) == NULL) + abort(); + p=buffer; + encode_rfc6533(address, save, &p); + return buffer; +} + +static int decode_rfc6533(const char *address, + void (*callback)(const char *, size_t, void *), + void *arg) +{ + while (*address) + { + size_t i; + char32_t c; + char *p; + size_t ignore1; + int err; + + for (i=0; address[i]; ++i) + { + if (address[i] == '\\') + break; + } + + if (i) + { + (*callback)(address, i, arg); + + address += i; + continue; + } + + if (address[1] != 'x' || + address[2] != '{') + return -1; + + c=0; + + address += 3; + + while (*address != '}') + { + const char *p; + + if (!*address) + return -1; + + p=strchr(xdigit, *address); + if (!p) + return -1; + c <<= 4; + c |= (p-xdigit); + ++address; + } + ++address; + if (c == 0) + return -1; + + err=0; + if (unicode_convert_fromu_tobuf(&c, 1, "utf-8", + &p, &ignore1, &err)) + return NULL; + + if (err) + { + free(p); + return NULL; + } + (*callback)(p, strlen(p), arg); + free(p); + } + (*callback)("", 1, arg); + return 0; +} + +char *rfc6533_decode(const char *address) +{ + size_t l; + char *buf; + char *p; + + if (strncasecmp(address, "rfc822;", 7) == 0) + { + buf=malloc(strlen(address)); + + if (!buf) + abort(); + + p=buf; + + address += 7; + + while (*address) + { + const char *hi, *lo; + + if (*address != '+') + { + *p++ = *address++; + continue; + } + + ++address; + + if (*address) + { + hi=strchr(xdigit, *address); + ++address; + if (*address) + { + lo=strchr(xdigit, *address); + ++address; + + if (hi && lo) + { + char n= (char) + ((hi-xdigit) * 16 + +(lo-xdigit)); + + if (n) + *p++=n; + } + } + } + } + *p=0; + } + else + { + if (strncasecmp(address, "utf-8;", 6)) + return NULL; + + l=0; + + if (decode_rfc6533(address+6, count, &l)) + return NULL; + + if ((buf=malloc(l)) == NULL) + abort(); + + p=buf; + decode_rfc6533(address+6, save, &p); + } + + for (p=buf; *p; ++p) + if ((unsigned char)*p <= ' ') + { + free(buf); + return NULL; + } + return buf; +} diff --git a/rfc2045/testrfc6533parser.c b/rfc2045/testrfc6533parser.c new file mode 100644 index 0000000..0dbf249 --- /dev/null +++ b/rfc2045/testrfc6533parser.c @@ -0,0 +1,64 @@ +/* +** Copyright 2018 Double Precision, Inc. +** See COPYING for distribution information. +*/ + +#include "rfc2045.h" + +#include <stdlib.h> +#include <stdio.h> + +static const struct { + const char *address; + int use_rfc822; + const char *result; +} encode_tests[]={ + {"nobody@example.com", 0, "rfc822;nobody@example.com"}, + {"nobody+=me@example.com", 0, + "rfc822;nobody+2B+3Dme@example.com"}, + {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com", 0, "utf-8;\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com"}, + {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com", 1, "rfc822;+D0+B8+D1+81+D0+BF+D1+8B+D1+82+D0+B0+D0+BD+D0+B8+D0+B5@example.com"}, + {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5+=\\me@example.com", 0, "utf-8;\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5\\x{2B}\\x{3D}\\x{5C}me@example.com"}, + +}; + +int main(int argc, char **argv) +{ + size_t i; + + for (i=0; i<sizeof(encode_tests)/sizeof(encode_tests[0]); ++i) + { + char *p=rfc6533_encode(encode_tests[i].address, + encode_tests[i].use_rfc822); + char *q; + + if (strcmp(p, encode_tests[i].result)) + { + fprintf(stderr, "Expected to encode %s as %s, got %s\n", + encode_tests[i].address, + encode_tests[i].result, + p); + exit(1); + } + q=rfc6533_decode(p); + + if (!q) + { + fprintf(stderr, "Could not decode %s\n", p); + exit(1); + } + + if (strcmp(q, encode_tests[i].address)) + { + fprintf(stderr, "Expected to decode %s as %s, got %s\n", + p, + encode_tests[i].address, + q); + exit(1); + } + free(p); + free(q); + } + + exit(0); +} |
