summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Varshavchik2018-07-29 22:58:53 -0400
committerSam Varshavchik2018-07-29 23:15:23 -0400
commit7ccfca0cf50348039815532273d0aaf2d4318474 (patch)
treed6bb17ccc88fbd674eb804bf80f4fc0bec41a964
parent6c2fe8b0d5be19cdd579bb5b86cec95cab62c320 (diff)
downloadcourier-libs-7ccfca0cf50348039815532273d0aaf2d4318474.tar.bz2
Implement RFC 6533 address encoding and decoding.
-rw-r--r--rfc2045/.gitignore1
-rw-r--r--rfc2045/Makefile.am11
-rw-r--r--rfc2045/rfc2045.h24
-rw-r--r--rfc2045/rfc6533.c283
-rw-r--r--rfc2045/testrfc6533parser.c64
5 files changed, 381 insertions, 2 deletions
diff --git a/rfc2045/.gitignore b/rfc2045/.gitignore
index 2fefef8..28aefd4 100644
--- a/rfc2045/.gitignore
+++ b/rfc2045/.gitignore
@@ -11,3 +11,4 @@
/rfc2045_config.h.in
/rfc2045charset.h
/testrfc3676parser
+/testrfc6533parser
diff --git a/rfc2045/Makefile.am b/rfc2045/Makefile.am
index 1fe66e6..2847a7c 100644
--- a/rfc2045/Makefile.am
+++ b/rfc2045/Makefile.am
@@ -1,9 +1,10 @@
#
-# Copyright 1998 - 2014 Double Precision, Inc. See COPYING for
+# Copyright 1998 - 2018 Double Precision, Inc. See COPYING for
# distribution information.
noinst_LTLIBRARIES=librfc2045.la
-noinst_PROGRAMS=reformime makemime headercheck testrfc3676parser
+noinst_PROGRAMS=reformime makemime headercheck testrfc3676parser \
+ testrfc6533parser
AM_CXXFLAGS=@COURIER_UNICODE_CXXFLAGS@
@@ -29,6 +30,7 @@ librfc2045_la_SOURCES=rfc2045.c rfc2045.h rfc2045src.h \
rfc2045decodemsgtoutf8.c \
rfc2231.c rfc2231encode.c \
rfc3676parser.h rfc3676parser.c rfc3676parsercpp.C \
+ rfc6533.c \
base64.c base64.h
reformime_SOURCES=reformime.c
@@ -51,6 +53,10 @@ testrfc3676parser_SOURCES=testrfc3676parser.c
testrfc3676parser_DEPENDENCIES=librfc2045.la
testrfc3676parser_LDADD=$(testrfc3676parser_DEPENDENCIES) -lcourier-unicode
+testrfc6533parser_SOURCES=testrfc6533parser.c
+testrfc6533parser_DEPENDENCIES=librfc2045.la
+testrfc6533parser_LDADD=$(testrfc6533parser_DEPENDENCIES) -lcourier-unicode
+
if HAVE_SGML
reformime.html: reformime.sgml ../docbook/sgml2html
../docbook/sgml2html reformime.sgml reformime.html
@@ -81,3 +87,4 @@ check-am:
@SHELL@ $(srcdir)/testsuite | cmp -s - $(srcdir)/$(TESTSUITE)
@SHELL@ $(srcdir)/testsuitemm | cmp -s - $(srcdir)/testsuitemm.txt
@SHELL@ $(srcdir)/testrfc3676parsersuite | diff -U 3 $(srcdir)/testrfc3676parsersuite.txt -
+ ./testrfc6533parser
diff --git a/rfc2045/rfc2045.h b/rfc2045/rfc2045.h
index 4aec67b..71c0121 100644
--- a/rfc2045/rfc2045.h
+++ b/rfc2045/rfc2045.h
@@ -683,6 +683,30 @@ void rfc2231_paramDecode(struct rfc2231param *paramList,
int *langLen,
int *textLen);
+/*
+** Encode an E-mail address as utf-8 address type specified in RFC 6533.
+** The e-mail address parameter must be encoded in UTF-8.
+**
+** The E-mail address is encoded as "rfc822" address type if it has only
+** ASCII characters, or if use_rfc822 is set to non0.
+**
+** A malloc-ed address gets returned.
+*/
+
+char *rfc6533_encode(const char *address, int use_rfc822);
+
+/*
+** Decode a utf-8 or an rfc-822 address type. Returns a malloc-ed buffer,
+** or NULL if the address cannot be decoded.
+**
+** Assumes valid UTF-8 coding, and does not verify it.
+**
+** Does verify, for both rfc-822 and utf-8 formats, that the returned address
+** does not contain control characters.
+*/
+
+char *rfc6533_decode(const char *address);
+
#if 0
{
#endif
diff --git a/rfc2045/rfc6533.c b/rfc2045/rfc6533.c
new file mode 100644
index 0000000..529da45
--- /dev/null
+++ b/rfc2045/rfc6533.c
@@ -0,0 +1,283 @@
+/*
+** Copyright 2018 Double Precision, Inc. See COPYING for
+** distribution information.
+*/
+
+/*
+*/
+
+#if HAVE_CONFIG_H
+#include "rfc2045_config.h"
+#endif
+#include "rfc2045.h"
+#include <courier-unicode.h>
+#include <string.h>
+#include <stdlib.h>
+
+static const char xdigit[]="0123456789ABCDEF";
+
+static void count(const char *s, size_t n, void *ptr)
+{
+ (*(size_t *)ptr) += n;
+}
+
+static void save(const char *s, size_t n, void *ptr)
+{
+ char **p=(char **)ptr;
+
+ memcpy(*p, s, n);
+ *p += n;
+}
+
+static void encode_rfc822(const char *address,
+ void (*callback)(const char *, size_t, void *),
+ void *arg)
+{
+ callback("rfc822;", 7, arg);
+
+ while (*address)
+ {
+ size_t i;
+
+ for (i=0; address[i]; ++i)
+ {
+ if (address[i] < '!' || address[i] > '~')
+ break;
+ if (address[i] == '+' || address[i] == '=')
+ break;
+ }
+
+ if (i == 0)
+ {
+ (*callback)("+", 1, arg);
+
+ (*callback)(xdigit + ((*address >> 4) & 15), 1, arg);
+ (*callback)(xdigit + (*address & 15), 1, arg);
+ ++address;
+ continue;
+ }
+
+ (*callback)(address, i, arg);
+ address += i;
+ }
+ (*callback)("", 1, arg);
+}
+
+static void encode_rfc6533(const char *address,
+ void (*callback)(const char *, size_t, void *),
+ void *arg)
+{
+ callback("utf-8;", 6, arg);
+
+ while (*address)
+ {
+ size_t i;
+
+ for (i=0; address[i]; ++i)
+ {
+ if ((unsigned char)address[i] <= ' ')
+ break;
+
+ if (address[i] == '\\' ||
+ address[i] == '+' ||
+ address[i] == '=' ||
+ address[i] == 0x7f)
+ break;
+ }
+
+ if (i == 0)
+ {
+ static const char xdigit[]="0123456789ABCDEF";
+
+ (*callback)("\\x{", 3, arg);
+ (*callback)(xdigit + ((*address >> 4) & 15), 1, arg);
+ (*callback)(xdigit + (*address & 15), 1, arg);
+ (*callback)("}", 1, arg);
+ ++address;
+ continue;
+ }
+
+ (*callback)(address, i, arg);
+ address += i;
+ }
+ (*callback)("", 1, arg);
+}
+
+char *rfc6533_encode(const char *address, int use_rfc822)
+{
+ size_t l=0;
+ char *buffer;
+ char *p;
+ const char *cp;
+
+ for (cp=address; *cp; ++cp)
+ if (*cp & 0x80)
+ break;
+
+ if (!*cp || use_rfc822)
+ {
+ encode_rfc822(address, count, &l);
+
+ if ((buffer=malloc(l)) == NULL)
+ abort();
+
+ p=buffer;
+ encode_rfc822(address, save, &p);
+ return buffer;
+ }
+
+ encode_rfc6533(address, count, &l);
+
+ if ((buffer=malloc(l)) == NULL)
+ abort();
+ p=buffer;
+ encode_rfc6533(address, save, &p);
+ return buffer;
+}
+
+static int decode_rfc6533(const char *address,
+ void (*callback)(const char *, size_t, void *),
+ void *arg)
+{
+ while (*address)
+ {
+ size_t i;
+ char32_t c;
+ char *p;
+ size_t ignore1;
+ int err;
+
+ for (i=0; address[i]; ++i)
+ {
+ if (address[i] == '\\')
+ break;
+ }
+
+ if (i)
+ {
+ (*callback)(address, i, arg);
+
+ address += i;
+ continue;
+ }
+
+ if (address[1] != 'x' ||
+ address[2] != '{')
+ return -1;
+
+ c=0;
+
+ address += 3;
+
+ while (*address != '}')
+ {
+ const char *p;
+
+ if (!*address)
+ return -1;
+
+ p=strchr(xdigit, *address);
+ if (!p)
+ return -1;
+ c <<= 4;
+ c |= (p-xdigit);
+ ++address;
+ }
+ ++address;
+ if (c == 0)
+ return -1;
+
+ err=0;
+ if (unicode_convert_fromu_tobuf(&c, 1, "utf-8",
+ &p, &ignore1, &err))
+ return NULL;
+
+ if (err)
+ {
+ free(p);
+ return NULL;
+ }
+ (*callback)(p, strlen(p), arg);
+ free(p);
+ }
+ (*callback)("", 1, arg);
+ return 0;
+}
+
+char *rfc6533_decode(const char *address)
+{
+ size_t l;
+ char *buf;
+ char *p;
+
+ if (strncasecmp(address, "rfc822;", 7) == 0)
+ {
+ buf=malloc(strlen(address));
+
+ if (!buf)
+ abort();
+
+ p=buf;
+
+ address += 7;
+
+ while (*address)
+ {
+ const char *hi, *lo;
+
+ if (*address != '+')
+ {
+ *p++ = *address++;
+ continue;
+ }
+
+ ++address;
+
+ if (*address)
+ {
+ hi=strchr(xdigit, *address);
+ ++address;
+ if (*address)
+ {
+ lo=strchr(xdigit, *address);
+ ++address;
+
+ if (hi && lo)
+ {
+ char n= (char)
+ ((hi-xdigit) * 16
+ +(lo-xdigit));
+
+ if (n)
+ *p++=n;
+ }
+ }
+ }
+ }
+ *p=0;
+ }
+ else
+ {
+ if (strncasecmp(address, "utf-8;", 6))
+ return NULL;
+
+ l=0;
+
+ if (decode_rfc6533(address+6, count, &l))
+ return NULL;
+
+ if ((buf=malloc(l)) == NULL)
+ abort();
+
+ p=buf;
+ decode_rfc6533(address+6, save, &p);
+ }
+
+ for (p=buf; *p; ++p)
+ if ((unsigned char)*p <= ' ')
+ {
+ free(buf);
+ return NULL;
+ }
+ return buf;
+}
diff --git a/rfc2045/testrfc6533parser.c b/rfc2045/testrfc6533parser.c
new file mode 100644
index 0000000..0dbf249
--- /dev/null
+++ b/rfc2045/testrfc6533parser.c
@@ -0,0 +1,64 @@
+/*
+** Copyright 2018 Double Precision, Inc.
+** See COPYING for distribution information.
+*/
+
+#include "rfc2045.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+static const struct {
+ const char *address;
+ int use_rfc822;
+ const char *result;
+} encode_tests[]={
+ {"nobody@example.com", 0, "rfc822;nobody@example.com"},
+ {"nobody+=me@example.com", 0,
+ "rfc822;nobody+2B+3Dme@example.com"},
+ {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com", 0, "utf-8;\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com"},
+ {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5@example.com", 1, "rfc822;+D0+B8+D1+81+D0+BF+D1+8B+D1+82+D0+B0+D0+BD+D0+B8+D0+B5@example.com"},
+ {"\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5+=\\me@example.com", 0, "utf-8;\xd0\xb8\xd1\x81\xd0\xbf\xd1\x8b\xd1\x82\xd0\xb0\xd0\xbd\xd0\xb8\xd0\xb5\\x{2B}\\x{3D}\\x{5C}me@example.com"},
+
+};
+
+int main(int argc, char **argv)
+{
+ size_t i;
+
+ for (i=0; i<sizeof(encode_tests)/sizeof(encode_tests[0]); ++i)
+ {
+ char *p=rfc6533_encode(encode_tests[i].address,
+ encode_tests[i].use_rfc822);
+ char *q;
+
+ if (strcmp(p, encode_tests[i].result))
+ {
+ fprintf(stderr, "Expected to encode %s as %s, got %s\n",
+ encode_tests[i].address,
+ encode_tests[i].result,
+ p);
+ exit(1);
+ }
+ q=rfc6533_decode(p);
+
+ if (!q)
+ {
+ fprintf(stderr, "Could not decode %s\n", p);
+ exit(1);
+ }
+
+ if (strcmp(q, encode_tests[i].address))
+ {
+ fprintf(stderr, "Expected to decode %s as %s, got %s\n",
+ p,
+ encode_tests[i].address,
+ q);
+ exit(1);
+ }
+ free(p);
+ free(q);
+ }
+
+ exit(0);
+}