diff options
| author | Sam Varshavchik | 2013-08-19 16:39:41 -0400 | 
|---|---|---|
| committer | Sam Varshavchik | 2013-08-25 14:43:51 -0400 | 
| commit | 9c45d9ad13fdf439d44d7443ae75da15ea0223ed (patch) | |
| tree | 7a81a04cb51efb078ee350859a64be2ebc6b8813 /unicode/unicode.c | |
| parent | a9520698b770168d1f33d6301463bb70a19655ec (diff) | |
| download | courier-libs-9c45d9ad13fdf439d44d7443ae75da15ea0223ed.tar.bz2 | |
Initial checkin
Imported from subversion report, converted to git. Updated all paths in
scripts and makefiles, reflecting the new directory hierarchy.
Diffstat (limited to 'unicode/unicode.c')
| -rw-r--r-- | unicode/unicode.c | 1643 | 
1 files changed, 1643 insertions, 0 deletions
| diff --git a/unicode/unicode.c b/unicode/unicode.c new file mode 100644 index 0000000..4ca098b --- /dev/null +++ b/unicode/unicode.c @@ -0,0 +1,1643 @@ +/* +** Copyright 2000-2011 Double Precision, Inc. +** See COPYING for distribution information. +** +*/ + +#include	"unicode_config.h" +#include	"unicode.h" +#include	"../rfc822/rfc822hdr.h" +#include	<string.h> +#include	<ctype.h> +#include	<stdlib.h> +#include	<iconv.h> +#include	<errno.h> +#if	HAVE_LOCALE_H +#if	HAVE_SETLOCALE +#include	<locale.h> +#if	USE_LIBCHARSET +#if	HAVE_LOCALCHARSET_H +#include	<localcharset.h> +#elif	HAVE_LIBCHARSET_H +#include	<libcharset.h> +#endif	/* HAVE_LOCALCHARSET_H */ +#elif	HAVE_LANGINFO_CODESET +#include	<langinfo.h> +#endif	/* USE_LIBCHARSET */ +#endif	/* HAVE_SETLOCALE */ +#endif	/* HAVE_LOCALE_H */ + +static char default_chset_buf[32]; + +static void init_default_chset() +{ +	const char *old_locale=NULL; +	const char *chset=NULL; +	char *locale_cpy=NULL; +	char buf[sizeof(default_chset_buf)]; + +	chset=getenv("MM_CHARSET"); + +	if (chset == NULL) +		chset=getenv("CHARSET"); + +	if (chset == NULL) +	{ +#if	HAVE_LOCALE_H +#if	HAVE_SETLOCALE +		old_locale=setlocale(LC_ALL, ""); +		locale_cpy=old_locale ? strdup(old_locale):NULL; +#if	USE_LIBCHARSET +		chset = locale_charset(); +#elif	HAVE_LANGINFO_CODESET +		chset=nl_langinfo(CODESET); +#endif +#endif +#endif +	} + +	memset(buf, 0, sizeof(buf)); + +	if (chset && + +	    /* Map GNU libc iconv oddity to us-ascii */ + +	    (strcmp(chset, "ANSI_X3.4") == 0 || +	     strncmp(chset, "ANSI_X3.4-", 10) == 0)) +		chset="US-ASCII"; + +	if (chset) +	{ +		strncat(buf, chset, sizeof(buf)-1); +	} +	else +	{ +		const char *p=getenv("LANG"); + +		/* LANG is xx_yy.CHARSET@modifier */ + +		if (p && *p && (p=strchr(p, '.')) != NULL) +		{ +			const char *q=strchr(++p, '@'); + +			if (!q) +				q=p+strlen(p); + +			if (q-p >= sizeof(buf)-1) +				q=p+sizeof(buf)-1; + +			memcpy(buf, p, q-p); +			buf[q-p]=0; +		} +		else +			strcpy(buf, "US-ASCII"); +	} + +	memcpy(default_chset_buf, buf, sizeof(buf)); + +#if	HAVE_LOCALE_H +#if	HAVE_SETLOCALE +	if (locale_cpy) +	{ +		setlocale(LC_ALL, locale_cpy); +		free(locale_cpy); +	} +#endif +#endif + +} + +const char *unicode_default_chset() +{ +	if (default_chset_buf[0] == 0) +		init_default_chset(); + +	return default_chset_buf; +} + + +/*****************************************************************************/ + +const char libmail_u_ucs4_native[]= +#if WORDS_BIGENDIAN +	"UCS-4BE" +#else +	"UCS-4LE" +#endif +	; + +const char libmail_u_ucs2_native[]= +#if WORDS_BIGENDIAN +	"UCS-2BE" +#else +	"UCS-2LE" +#endif +	; + +/* A stack of conversion modules */ + +struct libmail_u_convert_hdr { + +	int (*convert_handler)(void *ptr, +			       const char *text, size_t cnt); +	int (*deinit_handler)(void *ptr, int *errptr); +	void *ptr; + +	struct libmail_u_convert_hdr *next; +}; + +/* Decoding table for modified UTF7-encoding as used in imap */ + +static const char mbase64_lookup[]={ +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,63,-1,-1,-1, +	52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, +	-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, +	15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, +	-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, +	41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; + +static const char mbase64[]= +	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + +/* +** Conversion wrapper for converting to modified-utf7 IMAP encoding. +** +** This is done by converting to UCS2, then stacking on a module that +** takes that and converts UCS2 to modified-UTF7. +** +** init_nottoimaputf7() returns an opaque stack for converting to ucs2. +*/ + +static libmail_u_convert_handle_t +init_nottoimaputf7(const char *src_chset, +		   const char *dst_chset, +		   int (*output_func)(const char *, size_t, void *), +		   void *convert_arg); + +/* +** The to modified UTF7 module +*/ + +struct libmail_u_convert_toimaputf7 { + +	struct libmail_u_convert_hdr hdr; + +	/* Accumulated output buffer */ + +	char utf7encodebuf[1024]; +	size_t utf7encodebuf_cnt; + +	/* Accumulated bits for base64 encoding */ +	uint32_t utf7bits; + +	/* How many bits in utf7bits */ +	uint16_t utf7bitcount; + +	/* Flag: in base64mode */ +	uint16_t utfmode; + +	int errflag; + +	/* Any extra characters that should be munged */ + +	char smapmunge[16]; + +	/* Remembered output function */ + +	int (*output_func)(const char *, size_t, void *); + +	/* Remembered arg to the output function */ +	void *convert_arg; +}; + +/* Macro - flush the output buffer */ +#define toimaputf7_encode_flush(p) do {					\ +		int rc;							\ +									\ +		rc=(*(p)->output_func)((p)->utf7encodebuf,		\ +				       (p)->utf7encodebuf_cnt,		\ +				       (p)->convert_arg);		\ +		if (rc)							\ +			return ((p)->errflag=(rc));			\ +									\ +		(p)->utf7encodebuf_cnt=0;				\ +	} while (0) + +static int toimaputf7_encode_flushfinal(struct libmail_u_convert_toimaputf7 *p) +{ +	if (p->utf7encodebuf_cnt > 0) +		toimaputf7_encode_flush(p); +	return 0; +} + +/* Macro - add one char to the output buffer */ + +#define toimaputf7_encode_add(p,c) do {					\ +		if ((p)->utf7encodebuf_cnt >= sizeof((p)->utf7encodebuf)) \ +			toimaputf7_encode_flush((p));			\ +									\ +		(p)->utf7encodebuf[(p)->utf7encodebuf_cnt++]=(c);	\ +	} while (0); + +static int deinit_toimaputf7(void *ptr, int *errptr); + +static int do_convert_toutf7(const char *text, size_t cnt, void *arg); +static int convert_utf7_handler(void *ptr, const char *text, size_t cnt); + +/* +** Create a conversion module stack +*/ + +libmail_u_convert_handle_t +libmail_u_convert_init(const char *src_chset, +		       const char *dst_chset, +		       int (*output_func)(const char *, size_t, void *), +		       void *convert_arg) +{ +	struct libmail_u_convert_toimaputf7 *toutf7; +	libmail_u_convert_handle_t h; +	const char *smapmunge; +	size_t l=strlen(unicode_x_imap_modutf7); + +	if (strncmp(dst_chset, unicode_x_imap_modutf7, l) == 0 && +	    (dst_chset[l] == 0 || dst_chset[l] == ' ')) +	{ +		smapmunge=dst_chset + l; + +		if (*smapmunge) +			++smapmunge; +	} +	else +		return init_nottoimaputf7(src_chset, dst_chset, +					     output_func, +					     convert_arg); + +	toutf7=malloc(sizeof(struct libmail_u_convert_toimaputf7)); + +	if (!toutf7) +		return NULL; + +	memset(toutf7, 0, sizeof(*toutf7)); + +	h=init_nottoimaputf7(src_chset, libmail_u_ucs2_native, +			     do_convert_toutf7, toutf7); +	if (!h) +	{ +		free(toutf7); +		return (NULL); +	} + +	toutf7->output_func=output_func; +	toutf7->convert_arg=convert_arg; + +	strncat(toutf7->smapmunge, smapmunge, sizeof(toutf7->smapmunge)-1); + +	toutf7->hdr.convert_handler=convert_utf7_handler; +	toutf7->hdr.deinit_handler=deinit_toimaputf7; +	toutf7->hdr.ptr=toutf7; +	toutf7->hdr.next=h; +	return &toutf7->hdr; +} + +/* Passthrough to the wrapped stack */ + +static int convert_utf7_handler(void *ptr, const char *text, size_t cnt) +{ +	struct libmail_u_convert_toimaputf7 *toutf7= +		(struct libmail_u_convert_toimaputf7 *)ptr; + +	return (*toutf7->hdr.next->convert_handler)(toutf7->hdr.next->ptr, +						    text, cnt); +} + +static int utf7off(struct libmail_u_convert_toimaputf7 *toutf7) +{ +	if (!toutf7->utfmode) +		return 0; +	toutf7->utfmode=0; + +	if (toutf7->utf7bitcount > 0) +		toimaputf7_encode_add(toutf7, +				      mbase64[(toutf7->utf7bits +					       << (6-toutf7->utf7bitcount)) +					      & 63]); +	toimaputf7_encode_add(toutf7, '-'); +	return 0; +} + + +static int do_convert_toutf7(const char *text, size_t cnt, void *arg) +{ +	struct libmail_u_convert_toimaputf7 *toutf7= +		(struct libmail_u_convert_toimaputf7 *)arg; + +	/* We better be getting UCS-2 here! */ + +	const uint16_t *utext=(const uint16_t *)text; +	cnt /= 2; + +	while (cnt) +	{ +		if (toutf7->errflag) +			return toutf7->errflag; + +		if (*utext >= 0x20 && *utext <= 0x7F +		    && strchr( toutf7->smapmunge, (char)*utext) == NULL) + +			/* +			  && (!toutf7->smapmunge || (*utext != '.' && *utext != '/' && +			  *utext != '~' && *utext != ':'))) +			  */ +		{ +			if (utf7off(toutf7)) +				return toutf7->errflag; + +			toimaputf7_encode_add(toutf7, *utext); + +			if (*utext == '&') +				toimaputf7_encode_add(toutf7, '-'); + +			++utext; +			--cnt; +			continue; +		} + +		if (!toutf7->utfmode) +		{ +			toutf7->utfmode=1; +			toutf7->utf7bitcount=0; +			toimaputf7_encode_add(toutf7, '&'); +			continue; +		} + +		toutf7->utf7bits = (toutf7->utf7bits << 16) | +			(((uint32_t)*utext) & 0xFFFF); +		toutf7->utf7bitcount += 16; + +		++utext; +		--cnt; + +		/* If there's at least 6 bits, output base64-encoded char */ + +		while (toutf7->utf7bitcount >= 6) +		{ +			uint32_t v; +			int n; + +			if (toutf7->errflag) +				return toutf7->errflag; + +			v=toutf7->utf7bits; +			n=toutf7->utf7bitcount-6; +			toutf7->utf7bitcount -= 6; + +			if (n > 0) +				v >>= n; + +			toimaputf7_encode_add(toutf7, mbase64[v & 63]); +		} +	} + +	return 0; +} + +static int deinit_toimaputf7(void *ptr, int *errptr) +{ +	int rc; + +	struct libmail_u_convert_toimaputf7 *toutf7= +		(struct libmail_u_convert_toimaputf7 *)ptr; + +	/* Flush out the downstream stack */ +	rc=(*toutf7->hdr.next->deinit_handler)(toutf7->hdr.next->ptr, errptr); + +	/* Make sure we're out of modified base64 */ + +	if (rc == 0) +		rc=utf7off(toutf7); + +	if (rc == 0 && toutf7->utf7encodebuf_cnt > 0) +		rc=toimaputf7_encode_flushfinal(toutf7); +			 +	free(toutf7); +	return rc; +} + +/************/ + +/* +** Convert from modified-utf7 IMAP encoding. +** +** This module converts it to UCS-2, then this is attached to a stack that +** converts UCS-2 to the requested charset. +*/ + +static libmail_u_convert_handle_t +init_notfromimaputf7(const char *src_chset, +		     const char *dst_chset, +		     int (*output_func)(const char *, size_t, void *), +		     void *convert_arg); + +struct libmail_u_convert_fromimaputf7 { + +	struct libmail_u_convert_hdr hdr; + +	/* Accumulated UCS-2 stream */ +	uint16_t convbuf[512]; +	size_t convbuf_cnt; + +	/* Accumulated base64 bits */ +	uint32_t modbits; + +	/* How many bits extracted from a base64 stream */ + +	short modcnt; + +	/* Flag: seen the & */ +	char seenamp; + +	/* Flag: seen the &, and the next char wasn't - */ + +	char inmod; +	int errflag; +	int converr; +}; + +/* Flush the accumulated UCS-2 stream */ + +#define convert_fromutf7_flush(p) do {					\ +		(p)->errflag=(*(p)->hdr.next->convert_handler)		\ +			((p)->hdr.next->ptr,				\ +			 (const char *)(p)->convbuf,			\ +			 (p)->convbuf_cnt *				\ +			 sizeof((p)->convbuf[0]));			\ +		(p)->convbuf_cnt=0;					\ +	} while (0) + +/* Accumulated a UCS-2 char */ + +#define convert_fromutf7_add(p,c) do {					\ +		if ((p)->convbuf_cnt >=					\ +		    sizeof((p)->convbuf)/sizeof((p)->convbuf[0]))	\ +			convert_fromutf7_flush((p));			\ +		(p)->convbuf[(p)->convbuf_cnt++]=(c);			\ +	} while (0) + + +static int convert_fromutf7(void *ptr, +			    const char *text, size_t cnt); +static int deinit_fromutf7(void *ptr, int *errptr); + +static libmail_u_convert_handle_t +init_nottoimaputf7(const char *src_chset, +		   const char *dst_chset, +		   int (*output_func)(const char *, size_t, void *), +		   void *convert_arg) +{ +	struct libmail_u_convert_fromimaputf7 *fromutf7; +	libmail_u_convert_handle_t h; +	size_t l=strlen(unicode_x_imap_modutf7); + +	if (strncmp(src_chset, unicode_x_imap_modutf7, l) == 0 && +	    (src_chset[l] == 0 || src_chset[l] == ' ')) +		; +	else +		return init_notfromimaputf7(src_chset, dst_chset, +					    output_func, +					    convert_arg); + +	fromutf7=(struct libmail_u_convert_fromimaputf7 *) +		malloc(sizeof(struct libmail_u_convert_fromimaputf7)); + +	if (!fromutf7) +		return NULL; + +	memset(fromutf7, 0, sizeof(*fromutf7)); + +	/* Create a stack for converting UCS-2 to the dest charset */ + +	h=init_notfromimaputf7(libmail_u_ucs2_native, dst_chset, +			       output_func, convert_arg); + +	if (!h) +	{ +		free(fromutf7); +		return (NULL); +	} + +	fromutf7->hdr.next=h; +	fromutf7->hdr.convert_handler=convert_fromutf7; +	fromutf7->hdr.deinit_handler=deinit_fromutf7; +	fromutf7->hdr.ptr=fromutf7; +	return &fromutf7->hdr; +} + +static int convert_fromutf7(void *ptr, +			    const char *text, size_t cnt) +{ +	struct libmail_u_convert_fromimaputf7 *fromutf7= +		(struct libmail_u_convert_fromimaputf7 *)ptr; +	int bits; + +	while (cnt) +	{ +		if (fromutf7->errflag) +			return fromutf7->errflag; + +		if (!fromutf7->seenamp && *text == '&') +		{ +			fromutf7->seenamp=1; +			fromutf7->inmod=0; +			fromutf7->modcnt=0; +			++text; +			--cnt; +			continue; +		} + +		if (fromutf7->seenamp) +		{ +			if (*text == '-') +			{ +				convert_fromutf7_add(fromutf7, '&'); +				++text; +				--cnt; +				fromutf7->seenamp=0; +				continue; +			} +			fromutf7->seenamp=0; +			fromutf7->inmod=1; +		} + +		if (!fromutf7->inmod) +		{ +			/* Not in the base64 encoded stream */ + +			convert_fromutf7_add(fromutf7, +					     ((uint16_t)*text) & 0xFFFF); +			++text; +			--cnt; +			continue; +		} + +		if (*text == '-') +		{ +			/* End of the base64 encoded stream */ +			fromutf7->inmod=0; +			++text; +			--cnt; +			continue; +		} + +		/* Got 6 more bits */ + +		bits=mbase64_lookup[(unsigned char)*text]; + +		++text; +		--cnt; + +		if (bits < 0) +		{ +			errno=EILSEQ; +			return fromutf7->errflag=-1; +		} + +		fromutf7->modbits = (fromutf7->modbits << 6) | bits; +		fromutf7->modcnt += 6; + +		if (fromutf7->modcnt >= 16) +		{ +			/* Got a UCS-2 char */ + +			int shiftcnt=fromutf7->modcnt - 16; +			uint32_t v=fromutf7->modbits; + +			if (shiftcnt) +				v >>= shiftcnt; + +			fromutf7->modcnt -= 16; + +			convert_fromutf7_add(fromutf7, v); +		} +	} +	return 0; +} + +static int deinit_fromutf7(void *ptr, int *errptr) +{ +	struct libmail_u_convert_fromimaputf7 *fromutf7= +		(struct libmail_u_convert_fromimaputf7 *)ptr; +	int rc; + +	if (fromutf7->seenamp || fromutf7->inmod) +	{ +		if (fromutf7->errflag == 0) +		{ +			fromutf7->errflag= -1; +			errno=EILSEQ; +		} +	} + +	if (fromutf7->convbuf_cnt) +		convert_fromutf7_flush(fromutf7); + +	rc=fromutf7->hdr.next->deinit_handler(fromutf7->hdr.next->ptr, errptr); + +	if (fromutf7->errflag && rc == 0) +		rc=fromutf7->errflag; + +	if (errptr && fromutf7->converr) +		*errptr=1; + +	free(fromutf7); +	return rc; +} + +/************/ + +/* A real conversion module, via iconv */ + +struct libmail_u_convert_iconv { + +	struct libmail_u_convert_hdr hdr; + +	iconv_t h; +	int errflag; /* Accumulated errors */ + +	int (*output_func)(const char *, size_t, void *); +	void *convert_arg; + +	char buffer[1024]; /* Input buffer */ +	size_t bufcnt; /* Accumulated input in buffer */ +	char skipcnt; /* Skip this many bytes upon encountering EILSEQ */ +	char skipleft; /* How many bytes are currently left to skip */ +	char converr; /* Flag - an EILSEQ was encountered */ +} ; + +static int init_iconv(struct libmail_u_convert_iconv *h, +		      const char *src_chset, +		      const char *dst_chset, +		      int (*output_func)(const char *, size_t, void *), +		      void *convert_arg); + +static libmail_u_convert_handle_t +init_notfromimaputf7(const char *src_chset, +		     const char *dst_chset, +		     int (*output_func)(const char *, size_t, void *), +		     void *convert_arg) +{ + + +	struct libmail_u_convert_iconv *h= +		malloc(sizeof(struct libmail_u_convert_iconv)); + +	if (!h) +		return NULL; + +	memset(h, 0, sizeof(*h)); + +	if (init_iconv(h, src_chset, dst_chset, output_func, convert_arg)) +	{ +		free(h); +		return NULL; +	} +	return &h->hdr; +} + +/* Run the stack */ + +int libmail_u_convert(libmail_u_convert_handle_t h, +		      const char *text, size_t cnt) +{ +	return (*h->convert_handler)(h->ptr, text, cnt); +} + +/* Destroy the stack */ + +int libmail_u_convert_deinit(libmail_u_convert_handle_t h, int *errptr) +{ +	return (*h->deinit_handler)(h, errptr); +} + +static int deinit_iconv(void *ptr, int *errptr); +static int convert_iconv(void *ptr, +			 const char *text, size_t cnt); + +/* Initialize a single conversion module, in the stack */ + +static int init_iconv(struct libmail_u_convert_iconv *h, +		      const char *src_chset, +		      const char *dst_chset, +		      int (*output_func)(const char *, size_t, void *), +		      void *convert_arg) +{ +	if ((h->h=iconv_open(dst_chset, src_chset)) == (iconv_t)-1) +		return -1; + +	h->hdr.convert_handler=convert_iconv; +	h->hdr.deinit_handler=deinit_iconv; +	h->hdr.ptr=h; + +	h->output_func=output_func; +	h->convert_arg=convert_arg; + +	/* Heuristically determine how many octets to skip upon an EILSEQ */ + +	h->skipcnt=1; +	switch (src_chset[0]) { +	case 'u': +	case 'U': +		switch (src_chset[1]) { +		case 'c': +		case 'C': +			switch (src_chset[2]) { +			case 's': +			case 'S': +				if (src_chset[3] == '-') +					switch (src_chset[4]) { +					case '4': +						/* UCS-4 */ +						h->skipcnt=4; +						break; +					case '2': +						/* UCS-2 */ +						h->skipcnt=2; +						break; +					} +			} +			break; +		case 't': +		case 'T': +			switch (src_chset[2]) { +			case 'f': +			case 'F': +				if (src_chset[3] == '-') +					switch (src_chset[4]) { +					case '3': +						/* UTF-32 */ +						h->skipcnt=4; +						break; +					case '1': +						/* UTF-16 */ +						h->skipcnt=2; +						break; +					} +			} +		} +	} +					 +	return 0; +} + +static void convert_flush(struct libmail_u_convert_iconv *); +static void convert_flush_iconv(struct libmail_u_convert_iconv *, const char **, +				size_t *); + +/* +** iconv conversion module. Accumulate input in an input buffer. When the +** input buffer is full, invoke convert_flush(). +*/ + +static int convert_iconv(void *ptr, +			 const char *text, size_t cnt) +{ +	struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr; + +	while (cnt && h->errflag == 0) +	{ +		if (h->bufcnt >= sizeof(h->buffer)-1) +		{ +			convert_flush(h); + +			if (h->errflag) +				break; +		} + +		h->buffer[h->bufcnt++]= *text++; +		--cnt; +	} + +	return h->errflag; +} + +/* +** Finish an iconv conversion module. Invoke convert_flush() to flush any +** buffered input. Invoke convert_flush_iconv() to return state to the initial +** conversion state. +*/ + +static int deinit_iconv(void *ptr, int *errptr) +{ +	int rc; +	int converr; +	struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr; +	libmail_u_convert_handle_t next; + +	if (h->errflag == 0) +		convert_flush(h); + +	if (h->bufcnt && h->errflag == 0) +		h->converr=1; + +	if (h->errflag == 0) +		convert_flush_iconv(h, NULL, NULL); + +	rc=h->errflag; +	converr=h->converr != 0; +	iconv_close(h->h); +	next=h->hdr.next; +	free(h); +	if (errptr) +		*errptr=converr; + +	/* If there's another module in the stack, clean that up */ + +	if (next) +	{ +		int converrnext; +		int rcnext=libmail_u_convert_deinit(next, &converrnext); + +		if (converrnext && errptr && *errptr == 0) +			*errptr=converr; + +		if (rcnext && rc == 0) +			rc=rcnext; +	} +	return rc; +} + +/* +** Invoke convert_flush_iconv() to flush the input buffer. If there's +** unconverted text remaining, reposition it at the beginning of the input +** buffer. +*/ + +static void convert_flush(struct libmail_u_convert_iconv *h) +{ +	const char *p; +	size_t n; + +	if (h->bufcnt == 0 || h->errflag) +		return; + +	p=h->buffer; +	n=h->bufcnt; + +	convert_flush_iconv(h, &p, &n); + +	if (h->errflag) +		return; + +	if (h->bufcnt == n) +		n=0; /* Unexpected error, dunno what to do, punt */ + +	h->bufcnt=0; + +	while (n) +	{ +		h->buffer[h->bufcnt]= *p; + +		++h->bufcnt; +		++p; +		--n; +	} +} + +/* +** Convert text via iconv. +*/ + +static void convert_flush_iconv(struct libmail_u_convert_iconv *h, +				const char **inbuf, size_t *inbytesleft) +{ +	int save_errno; + +	while (1) +	{ +		char outbuf[1024]; +		char *outp; +		size_t outleft; +		size_t n; +		size_t origin=0; + +		if (inbytesleft) +		{ +			if ((origin=*inbytesleft) == 0) +				return; + +			if (inbuf && h->skipleft && origin) +			{ +				/* Skipping after an EILSEQ */ + +				--h->skipleft; +				--*inbytesleft; +				++*inbuf; +				continue; +			} + +		} + +		if (h->errflag) +		{ +			/* Quietly eat everything after a previous error */ + +			if (inbytesleft) +				*inbytesleft=0; + +			return; +		} + +		outp=outbuf; +		outleft=sizeof(outbuf); + +		n=iconv(h->h, (char **)inbuf, inbytesleft, &outp, &outleft); + +		save_errno=errno; + +		/* Anything produced by iconv() gets pushed down the stack */ + +		if (outp > outbuf) +		{ +			int rc=(*h->output_func)(outbuf, outp-outbuf, +						 h->convert_arg); +			if (rc) +			{ +				h->errflag=rc; +				return; +			} +		} + +		if (n != (size_t)-1) +		{ +			/* iconv(3) reason #2 */ + +			break; +		} + +		if (inbytesleft == 0) +		{ +			/* +			** An error when generating the shift sequence to +			** return to the initial state. We don't know what to +			** do, now. +			*/ + +			errno=EINVAL; +			h->errflag= -1; +			return; +		} + +		/* +		** convert_flush() gets invoked when the 1024 char input buffer +		** fills or to convert input that has been buffered when +		** convert_chset_end() gets invoked. +		** +		** A return code of EINVAL from iconv() is iconv() encountering +		** an incomplete multibyte sequence. +		** +		** If iconv() failed without consuming any input: +		** +		** - iconv(3) reason #1, EILSEQ, invalid multibyte sequence +		** that starts at the beginning of the string we wish to +		** convert. Discard one character, and try again. +		** +		** - iconv(3) reason #3, EINVAL, incomplete multibyte sequence. +		** If it's possible to have an incomplete 1024 character long +		** multibyte sequence, we're in trouble. Or we've encountered +		** an EINVAL when flushing out the remaining buffered input, +		** in convert_chset_end(). In either case, it's ok to sicard +		** one character at a time, until we either reach the end, +		** or get some other result. +		** +		** - iconv(3) reason #4, E2BIG. If the 1024 character output +		** buffer, above, is insufficient to produce the output from a +		** single converted character, we're in trouble. +		*/ + +		if (*inbytesleft == origin) +		{ +			h->skipleft=h->skipcnt; +			h->converr=1; +		} + +		/* +		** Stopped at an incomplete multibyte sequence, try again on +		** the next round. +		*/ +		else if (save_errno == EINVAL) +			break; + +		if (save_errno == EILSEQ) +			h->converr=1; /* Another possibility this can happen */ + +		/* +		** If we get here because of iconv(3) reason #4, filled out +		** the output buffer, we should continue with the conversion. +		** Otherwise, upon encountering any other error condition, +		** reset the conversion state. +		*/ +		if (save_errno != E2BIG) +			iconv(h->h, NULL, NULL, NULL, NULL); +	} +} + +/*****************************************************************************/ + +/* +** A wrapper for libmail_u_convert() that collects the converted character +** text into a buffer. This is done by passing an output function to +** libmail_u_convert() that saves converted text in a linked-list +** of buffers. +** +** Then, in the deinitialization function, the buffers get concatenated into +** the final character buffer. +*/ + +struct libmail_u_convert_cbuf { +	struct libmail_u_convert_cbuf *next; +	char *fragment; +	size_t fragment_size; +}; + +struct libmail_u_convert_tocbuf { +	struct libmail_u_convert_hdr hdr; + +	char **cbufptr_ret; +	size_t *cbufsize_ret; +	int errflag; +	size_t tot_size; +	int nullterminate; + +	struct libmail_u_convert_cbuf *first, **last; +}; + +static int save_tocbuf(const char *, size_t, void *); +static int convert_tocbuf(void *ptr, +			  const char *text, size_t cnt); +static int deinit_tocbuf(void *ptr, int *errptr); + +libmail_u_convert_handle_t +libmail_u_convert_tocbuf_init(const char *src_chset, +			      const char *dst_chset, +			      char **cbufptr_ret, +			      size_t *cbufsize_ret, +			      int nullterminate +			      ) +{ +	struct libmail_u_convert_tocbuf *p= +		malloc(sizeof(struct libmail_u_convert_tocbuf)); +	libmail_u_convert_handle_t h; + +	if (!p) +		return NULL; + +	memset(p, 0, sizeof(*p)); + +	h=libmail_u_convert_init(src_chset, dst_chset, save_tocbuf, p); + +	if (!h) +	{ +		free(p); +		return NULL; +	} + +	p->cbufptr_ret=cbufptr_ret; +	p->cbufsize_ret=cbufsize_ret; +	p->last= &p->first; +	p->nullterminate=nullterminate; +	p->hdr.next=h; +	p->hdr.convert_handler=convert_tocbuf; +	p->hdr.deinit_handler=deinit_tocbuf; +	p->hdr.ptr=p; +	return &p->hdr; +} + +/* Capture the output of the conversion stack */ + +static int save_tocbuf(const char *text, size_t cnt, void *ptr) +{ +	struct libmail_u_convert_tocbuf *p= +		(struct libmail_u_convert_tocbuf *)ptr; +	struct libmail_u_convert_cbuf *fragment= +		malloc(sizeof(struct libmail_u_convert_cbuf)+cnt); +	size_t tot_size; + +	if (!fragment) +	{ +		p->errflag=1; +		return 1; +	} + +	fragment->next=NULL; +	fragment->fragment=(char *)(fragment+1); +	if ((fragment->fragment_size=cnt) > 0) +		memcpy(fragment->fragment, text, cnt); + +	*(p->last)=fragment; +	p->last=&fragment->next; + +	tot_size=p->tot_size + cnt; /* Keep track of the total size saved */ + +	if (tot_size < p->tot_size) /* Overflow? */ +	{ +		errno=E2BIG; +		return 1; +	} +	p->tot_size=tot_size; +	return 0; +} + +/* Punt converted text down the stack */ + +static int convert_tocbuf(void *ptr, const char *text, size_t cnt) +{ +	struct libmail_u_convert_tocbuf *p= +		(struct libmail_u_convert_tocbuf *)ptr; + +	return libmail_u_convert(p->hdr.next, text, cnt); +} + +/* +** Destroy the conversion stack. Destroy the downstream, then assemble the +** final array. +*/ + +static int deinit_tocbuf(void *ptr, int *errptr) +{ +	struct libmail_u_convert_tocbuf *p= +		(struct libmail_u_convert_tocbuf *)ptr; +	int rc=libmail_u_convert_deinit(p->hdr.next, errptr); +	struct libmail_u_convert_cbuf *bufptr; + +	if (rc == 0 && p->nullterminate) +	{ +		char zero=0; + +		rc=save_tocbuf( &zero, sizeof(zero), p->hdr.ptr); +	} + +	if (rc == 0) +	{ +		if (((*p->cbufptr_ret)=malloc(p->tot_size ? p->tot_size:1)) != +		    NULL) +		{ +			size_t i=0; + +			for (bufptr=p->first; bufptr; bufptr=bufptr->next) +			{ +				if (bufptr->fragment_size) +					memcpy(&(*p->cbufptr_ret)[i], +					       bufptr->fragment, +					       bufptr->fragment_size); +				i += bufptr->fragment_size; +			} +			(*p->cbufsize_ret)=i; +		} +		else +		{ +			rc= -1; +		} +	} + +	for (bufptr=p->first; bufptr; ) +	{ +		struct libmail_u_convert_cbuf *b=bufptr; + +		bufptr=bufptr->next; + +		free(b); +	} +	free(p); + +	return rc; +} + +libmail_u_convert_handle_t +libmail_u_convert_tocbuf_toutf8_init(const char *src_chset, +				     char **cbufptr_ret, +				     size_t *cbufsize_ret, +				     int nullterminate +				     ) +{ +	return libmail_u_convert_tocbuf_init(src_chset, "utf-8", +					     cbufptr_ret, cbufsize_ret, +					     nullterminate); +} + +libmail_u_convert_handle_t +libmail_u_convert_tocbuf_fromutf8_init(const char *dst_chset, +				       char **cbufptr_ret, +				       size_t *cbufsize_ret, +				       int nullterminate +				       ) +{ +	return libmail_u_convert_tocbuf_init("utf-8", dst_chset, +					     cbufptr_ret, cbufsize_ret, +					     nullterminate); +} + +char *libmail_u_convert_toutf8(const char *text, +			       const char *charset, +			       int *error) +{ +	char *cbufptr; +	size_t cbufsize; +	libmail_u_convert_handle_t h= +		libmail_u_convert_tocbuf_toutf8_init(charset, +						     &cbufptr, +						     &cbufsize, 1); + +	if (!h) +		return NULL; + +	libmail_u_convert(h, text, strlen(text)); + +	if (libmail_u_convert_deinit(h, error) == 0) +		return cbufptr; + +	return NULL; +} + +char *libmail_u_convert_fromutf8(const char *text, +				 const char *charset, +				 int *error) +{ +	char *cbufptr; +	size_t cbufsize; +	libmail_u_convert_handle_t h= +		libmail_u_convert_tocbuf_fromutf8_init(charset, +						       &cbufptr, +						       &cbufsize, 1); + +	if (!h) +		return NULL; + +	libmail_u_convert(h, text, strlen(text)); + +	if (libmail_u_convert_deinit(h, error) == 0) +		return cbufptr; + +	return NULL; +} + +char *libmail_u_convert_tobuf(const char *text, +			      const char *charset, +			      const char *dstcharset, +			      int *error) +{ +	char *cbufptr; +	size_t cbufsize; +	libmail_u_convert_handle_t h= +		libmail_u_convert_tocbuf_init(charset, +					      dstcharset, +					      &cbufptr, +					      &cbufsize, 1); + +	if (!h) +		return NULL; + +	libmail_u_convert(h, text, strlen(text)); + +	if (libmail_u_convert_deinit(h, error) == 0) +		return cbufptr; + +	return NULL; +} + +/*****************************************************************************/ + +/* +** Convert text to unicode_chars. Same basic approach as +** libmail_u_convert_tocbuf_init(). The output character set gets specified +** as UCS-4, the final output size is divided by 4, and the output buffer gets +** typed as a unicode_char array. +*/ + +struct libmail_u_convert_buf { +	struct libmail_u_convert_buf *next; +	unicode_char *fragment; +	size_t fragment_size; +	size_t max_fragment_size; +}; + +struct libmail_u_convert_tou { +	struct libmail_u_convert_hdr hdr; + +	unicode_char **ucptr_ret; +	size_t *ucsize_ret; +	int errflag; +	size_t tot_size; +	int nullterminate; + +	struct libmail_u_convert_buf *first, *tail, **last; +}; + +static int save_unicode(const char *, size_t, void *); +static int convert_tounicode(void *ptr, +			 const char *text, size_t cnt); +static int deinit_tounicode(void *ptr, int *errptr); + +libmail_u_convert_handle_t +libmail_u_convert_tou_init(const char *src_chset, +			   unicode_char **ucptr_ret, +			   size_t *ucsize_ret, +			   int nullterminate +			   ) +{ +	struct libmail_u_convert_tou *p= +		malloc(sizeof(struct libmail_u_convert_tou)); +	libmail_u_convert_handle_t h; + +	if (!p) +		return NULL; + +	memset(p, 0, sizeof(*p)); + +	h=libmail_u_convert_init(src_chset, libmail_u_ucs4_native, +				 save_unicode, p); + +	if (!h) +	{ +		free(p); +		return NULL; +	} + +	p->ucptr_ret=ucptr_ret; +	p->ucsize_ret=ucsize_ret; +	p->last= &p->first; +	p->nullterminate=nullterminate; +	p->hdr.next=h; +	p->hdr.convert_handler=convert_tounicode; +	p->hdr.deinit_handler=deinit_tounicode; +	p->hdr.ptr=p; +	return &p->hdr; +} + +libmail_u_convert_handle_t +libmail_u_convert_fromu_init(const char *dst_chset, +			     char **cbufptr_ret, +			     size_t *csize_ret, +			     int nullterminate +			     ) +{ +	return libmail_u_convert_tocbuf_init(libmail_u_ucs4_native, +					     dst_chset, +					     cbufptr_ret, +					     csize_ret, +					     nullterminate); +} + +int libmail_u_convert_uc(libmail_u_convert_handle_t handle, +			 const unicode_char *text, +			 size_t cnt) +{ +	return libmail_u_convert(handle, (const char *)text, +				 cnt * sizeof(*text)); +} + +/* Capture the output of the conversion stack */ + +static int save_unicode(const char *text, size_t cnt, void *ptr) +{ +	struct libmail_u_convert_tou *p= +		(struct libmail_u_convert_tou *)ptr; +	struct libmail_u_convert_buf *fragment; +	size_t tot_size; + +	cnt /= sizeof(unicode_char); + +	tot_size=p->tot_size + cnt*sizeof(unicode_char); +	/* Keep track of the total size saved */ + +	if (p->tail) +	{ +		size_t n=p->tail->max_fragment_size-p->tail->fragment_size; + +		if (n > cnt) +			n=cnt; + +		if (n) +		{ +			memcpy(p->tail->fragment+p->tail->fragment_size, +			       text, n*sizeof(unicode_char)); + +			cnt -= n; +			text += n*sizeof(unicode_char); +			p->tail->fragment_size += n; +		} +	} + +	if (cnt > 0) +	{ +		size_t cnt_alloc=cnt; + +		if (cnt_alloc < 16) +			cnt_alloc=16; + +		if ((fragment=malloc(sizeof(struct libmail_u_convert_buf) +				     +cnt_alloc*sizeof(unicode_char))) +		    == NULL) +		{ +			p->errflag=1; +			return 1; +		} + +		fragment->next=NULL; +		fragment->fragment=(unicode_char *)(fragment+1); +		fragment->max_fragment_size=cnt_alloc; +		fragment->fragment_size=cnt; +		memcpy(fragment->fragment, text, cnt*sizeof(unicode_char)); + +		*(p->last)=fragment; +		p->last=&fragment->next; +		p->tail=fragment; +	} + +	if (tot_size < p->tot_size) /* Overflow? */ +	{ +		errno=E2BIG; +		return 1; +	} +	p->tot_size=tot_size; +	return 0; +} + +/* Punt converted text down the stack */ + +static int convert_tounicode(void *ptr, +			     const char *text, size_t cnt) +{ +	struct libmail_u_convert_tou *p= +		(struct libmail_u_convert_tou *)ptr; + +	return libmail_u_convert(p->hdr.next, text, cnt); +} + +/* +** Destroy the conversion stack. Destroy the downstream, then assemble the +** final array. +*/ + +static int deinit_tounicode(void *ptr, int *errptr) +{ +	struct libmail_u_convert_tou *p= +		(struct libmail_u_convert_tou *)ptr; +	int rc=libmail_u_convert_deinit(p->hdr.next, errptr); +	struct libmail_u_convert_buf *bufptr; + +	if (rc == 0 && p->nullterminate) +	{ +		unicode_char zero=0; + +		rc=save_unicode( (const char *)&zero, sizeof(zero), +				 p->hdr.ptr); +	} + +	if (rc == 0) +	{ +		if (((*p->ucptr_ret)=malloc(p->tot_size ? p->tot_size:1)) != +		    NULL) +		{ +			size_t i=0; + +			for (bufptr=p->first; bufptr; bufptr=bufptr->next) +			{ +				if (bufptr->fragment_size) +					memcpy(&(*p->ucptr_ret)[i], +					       bufptr->fragment, +					       bufptr->fragment_size +					       *sizeof(*bufptr->fragment)); +				i += bufptr->fragment_size; +			} +			(*p->ucsize_ret)=i; +		} +		else +		{ +			rc= -1; +		} +	} + +	for (bufptr=p->first; bufptr; ) +	{ +		struct libmail_u_convert_buf *b=bufptr; + +		bufptr=bufptr->next; + +		free(b); +	} +	free(p); + +	return rc; +} + +int libmail_u_convert_tou_tobuf(const char *text, +				size_t text_l, +				const char *charset, +				unicode_char **uc, +				size_t *ucsize, +				int *err) +{ +	libmail_u_convert_handle_t h; + +	if ((h=libmail_u_convert_tou_init(charset, uc, ucsize, 0)) == NULL) +		return -1; + +	if (libmail_u_convert(h, text, text_l) < 0) +	{ +		libmail_u_convert_deinit(h, NULL); +		return -1; +	} + +	if (libmail_u_convert_deinit(h, err)) +		return -1; + +	return 0; +} + +int libmail_u_convert_fromu_tobuf(const unicode_char *utext, +				  size_t utext_l, +				  const char *charset, +				  char **c, +				  size_t *csize, +				  int *err) +{ +	libmail_u_convert_handle_t h; + +	if (utext_l == (size_t)-1) +	{ +		for (utext_l=0; utext[utext_l]; ++utext_l) +		     ; +	} + +	if ((h=libmail_u_convert_fromu_init(charset, c, csize, 1)) == NULL) +		return -1; + +	if (libmail_u_convert_uc(h, utext, utext_l) < 0) +	{ +		libmail_u_convert_deinit(h, NULL); +		return -1; +	} + +	if (libmail_u_convert_deinit(h, err)) +		return -1; + +	return 0; +} + +char *libmail_u_convert_tocase(const char *str, +			       const char *charset, +			       unicode_char (*first_char_func)(unicode_char), +			       unicode_char (*char_func)(unicode_char)) +{ +	unicode_char *uc; +	size_t ucsize; +	size_t i; +	int err; +	char *c; +	size_t csize; + +	if (libmail_u_convert_tou_tobuf(str, strlen(str), +					charset, &uc, &ucsize, &err)) +		return NULL; + +	if (err) +	{ +		free(uc); +		return NULL; +	} + +	for (i=0; i<ucsize; ++i) +	{ +		uc[i]=(*first_char_func)(uc[i]); + +		if (char_func) +			first_char_func=char_func; +	} + +	if (libmail_u_convert_fromu_tobuf(uc, ucsize, +					  charset, +					  &c, &csize, &err)) +	{ +		free(uc); +		return NULL; +	} + +	free(uc); + +	if (err) +	{ +		free(c); +		return NULL; +	} + +	return c; +} | 
