diff options
| author | Sam Varshavchik | 2018-07-11 22:16:22 -0400 | 
|---|---|---|
| committer | Sam Varshavchik | 2018-07-11 22:16:22 -0400 | 
| commit | 4fc91797ada09e9f8e3bd0a3cdbe0c78edf2530d (patch) | |
| tree | 0f4417dcffecd8b8e8f060a8a79ca42bc236484a /unicode/unicode.c | |
| parent | 03050820bf8e22adb2ec88d079cdde01d7fa6a29 (diff) | |
| download | courier-libs-4fc91797ada09e9f8e3bd0a3cdbe0c78edf2530d.tar.bz2 | |
courier-unicode: implement unicode_x_smap_modutf8 pseudo-encoding
Diffstat (limited to 'unicode/unicode.c')
| -rw-r--r-- | unicode/unicode.c | 321 | 
1 files changed, 318 insertions, 3 deletions
| diff --git a/unicode/unicode.c b/unicode/unicode.c index 71e6439..2cf5856 100644 --- a/unicode/unicode.c +++ b/unicode/unicode.c @@ -1,5 +1,5 @@  /* -** Copyright 2000-2011 Double Precision, Inc. +** Copyright 2000-2018 Double Precision, Inc.  ** See COPYING for distribution information.  **  */ @@ -239,6 +239,42 @@ static int do_convert_toutf7(const char *text, size_t cnt, void *arg);  static int convert_utf7_handler(void *ptr, const char *text, size_t cnt);  /* +** Conversion wrapper for converting to modified-utf8 SMAP encoding. +** +** This is done by converting to UTF-8, then stacking on a module that +** takes that and converts UTF-8 to modified-UTF8. +** +** init_nottosmaputf8() returns an opaque stack for converting to modified +** UTF-8. +*/ + +static unicode_convert_handle_t +init_nottosmaputf8(const char *src_chset, +		   const char *dst_chset, +		   int (*output_func)(const char *, size_t, void *), +		   void *convert_arg); + +/* +** The to modified UTF8 module +*/ + +struct unicode_convert_tosmaputf8 { + +	struct unicode_convert_hdr hdr; + +	int errflag; + +	/* Remembered output function */ + +	int (*output_func)(const char *, size_t, void *); + +	/* Remembered arg to the output function */ +	void *convert_arg; +}; + + + +/*  ** Create a conversion module stack  */ @@ -417,6 +453,129 @@ static int deinit_toimaputf7(void *ptr, int *errptr)  	return rc;  } +/* +** Convert to unicode_x_smap_modutf8. +*/ + +static int deinit_tosmaputf8(void *ptr, int *errptr); +static int do_convert_tosmaputf8(const char *text, size_t cnt, void *arg); +static int convert_utf8_handler(void *ptr, const char *text, size_t cnt); + +static unicode_convert_handle_t +init_nottoimaputf7(const char *src_chset, +		   const char *dst_chset, +		   int (*output_func)(const char *, size_t, void *), +		   void *convert_arg) +{ +	struct unicode_convert_tosmaputf8 *toutf8; +	unicode_convert_handle_t h; + +	if (strcmp(dst_chset, unicode_x_smap_modutf8)) +		return init_nottosmaputf8(src_chset, dst_chset, +					  output_func, +					  convert_arg); + +	toutf8=malloc(sizeof(struct unicode_convert_tosmaputf8)); + +	if (!toutf8) +		return NULL; + +	memset(toutf8, 0, sizeof(*toutf8)); + +	h=init_nottosmaputf8(src_chset, "utf-8", +			     do_convert_tosmaputf8, +			     toutf8); +	if (!h) +	{ +		free(toutf8); +		return (NULL); +	} + +	toutf8->output_func=output_func; +	toutf8->convert_arg=convert_arg; + +	toutf8->hdr.convert_handler=convert_utf8_handler; +	toutf8->hdr.deinit_handler=deinit_tosmaputf8; +	toutf8->hdr.ptr=toutf8; +	toutf8->hdr.next=h; +	return &toutf8->hdr; +} + +static int deinit_tosmaputf8(void *ptr, int *errptr) +{ +	int rc; + +	struct unicode_convert_tosmaputf8 *toutf8= +		(struct unicode_convert_tosmaputf8 *)ptr; + +	/* Flush out the downstream stack */ +	rc=(*toutf8->hdr.next->deinit_handler)(toutf8->hdr.next->ptr, errptr); + +	free(toutf8); +	return rc; +} + +static int do_convert_tosmaputf8(const char *text, size_t cnt, void *arg) +{ +	struct unicode_convert_tosmaputf8 *toutf8= +		(struct unicode_convert_tosmaputf8 *)arg; +	int rc; +	size_t i; +	char octal[4]; + +	while (cnt) +	{ +		if (toutf8->errflag) +			return toutf8->errflag; + +		for (i=0; i<cnt; ++i) +			if (strchr(" ./~:\\", text[i])) +				break; +		if (i) +		{ +			rc= (*toutf8->output_func)(text, i, +						   toutf8->convert_arg); + +			if (rc) +			{ +				toutf8->errflag=rc; +				return rc; +			} +			text += i; +			cnt -= i; +		} + +		if (cnt) +		{ +			char c= *text; + +			octal[0]='\\'; +			octal[3]= (c & 7)+'0'; c /= 8; +			octal[2]= (c & 7)+'0'; c /= 8; +			octal[1]= (c & 7)+'0'; +			rc= (*toutf8->output_func)(octal, 4, +						   toutf8->convert_arg); +			if (rc) +			{ +				toutf8->errflag=rc; +				return rc; +			} +			++text; +			--cnt; +		} +	} +	return 0; +} + +static int convert_utf8_handler(void *ptr, const char *text, size_t cnt) +{ +	struct unicode_convert_tosmaputf8 *toutf8= +		(struct unicode_convert_tosmaputf8 *)ptr; + +	return (*toutf8->hdr.next->convert_handler)(toutf8->hdr.next->ptr, +						    text, cnt); +} +  /************/  /* @@ -483,7 +642,7 @@ static int convert_fromutf7(void *ptr,  static int deinit_fromutf7(void *ptr, int *errptr);  static unicode_convert_handle_t -init_nottoimaputf7(const char *src_chset, +init_nottosmaputf8(const char *src_chset,  		   const char *dst_chset,  		   int (*output_func)(const char *, size_t, void *),  		   void *convert_arg) @@ -648,6 +807,162 @@ static int deinit_fromutf7(void *ptr, int *errptr)  /************/ +/* +** Convert from modified-utf8 SMAP encoding. +** +** This module converts it to UTF-8, then this is attached to a stack that +** converts UTF-8 to the requested charset. +*/ + +static unicode_convert_handle_t +init_notfromsmaputf8(const char *src_chset, +		     const char *dst_chset, +		     int (*output_func)(const char *, size_t, void *), +		     void *convert_arg); + +struct unicode_convert_fromsmaputf8 { + +	struct unicode_convert_hdr hdr; + +	/* Convert a backslash escape */ + +	int in_escape; + +	/* The escaped character */ + +	unsigned char escape_char; + +	int errflag; +	int converr; +}; + +static int convert_fromutf8(void *ptr, +			    const char *text, size_t cnt); +static int deinit_fromutf8(void *ptr, int *errptr); + +static unicode_convert_handle_t +init_notfromimaputf7(const char *src_chset, +		     const char *dst_chset, +		     int (*output_func)(const char *, size_t, void *), +		     void *convert_arg) +{ +	struct unicode_convert_fromsmaputf8 *fromutf8; +	unicode_convert_handle_t h; + +	if (strcmp(src_chset, unicode_x_smap_modutf8)) +		return init_notfromsmaputf8(src_chset, dst_chset, +					    output_func, convert_arg); + +	fromutf8=(struct unicode_convert_fromsmaputf8 *) +		malloc(sizeof(struct unicode_convert_fromsmaputf8)); + +	if (!fromutf8) +		return NULL; + +	memset(fromutf8, 0, sizeof(*fromutf8)); + +	/* Create a stack for converting UTF-8 to the dest charset */ + +	h=init_notfromimaputf7("utf-8", dst_chset, +			       output_func, convert_arg); + +	if (!h) +	{ +		free(fromutf8); +		return (NULL); +	} + +	fromutf8->hdr.next=h; +	fromutf8->hdr.convert_handler=convert_fromutf8; +	fromutf8->hdr.deinit_handler=deinit_fromutf8; +	fromutf8->hdr.ptr=fromutf8; +	return &fromutf8->hdr; +} + +static int convert_fromutf8(void *ptr, +			    const char *text, size_t cnt) +{ +	struct unicode_convert_fromsmaputf8 *fromutf8= +		(struct unicode_convert_fromsmaputf8 *)ptr; +	size_t i; + +	while (cnt) +	{ +		if (fromutf8->errflag) +			return fromutf8->errflag; + +		if (fromutf8->in_escape) +		{ +			if (*text < '0' || *text > '7') +			{ +				errno=EILSEQ; +				return fromutf8->errflag=-1; +			} +			fromutf8->escape_char <<= 3; +			fromutf8->escape_char |= *text - '0'; +			if (--fromutf8->in_escape == 0) +			{ +				fromutf8->errflag=(*fromutf8->hdr.next +						   ->convert_handler) +					(fromutf8->hdr.next->ptr, +					 (const char *)&fromutf8->escape_char, +					 1); +			} +			++text; +			--cnt; +			continue; +		} + +		for (i=0; i<cnt; ++i) +			if (text[i] == '\\') +				break; + +		if (i) +		{ +			fromutf8->errflag=(*fromutf8->hdr.next +					   ->convert_handler) +				(fromutf8->hdr.next->ptr, text, i); +			text += i; +			cnt -= i; +		} + +		if (cnt) +		{ +			fromutf8->escape_char=0; +			fromutf8->in_escape=3; +			++text; +			--cnt; +		} +	} +	return 0; +} + +static int deinit_fromutf8(void *ptr, int *errptr) +{ +	struct unicode_convert_fromsmaputf8 *fromutf8= +		(struct unicode_convert_fromsmaputf8 *)ptr; +	int rc; + +	if (fromutf8->in_escape) +	{ +		fromutf8->errflag= -1; +		errno=EILSEQ; +	} + +	rc=fromutf8->hdr.next->deinit_handler(fromutf8->hdr.next->ptr, errptr); + +	if (fromutf8->errflag && rc == 0) +		rc=fromutf8->errflag; + +	if (errptr && fromutf8->converr) +		*errptr=1; + +	free(fromutf8); +	return rc; +} + +/************/ +  /* A real conversion module, via iconv */  struct unicode_convert_iconv { @@ -674,7 +989,7 @@ static int init_iconv(struct unicode_convert_iconv *h,  		      void *convert_arg);  static unicode_convert_handle_t -init_notfromimaputf7(const char *src_chset, +init_notfromsmaputf8(const char *src_chset,  		     const char *dst_chset,  		     int (*output_func)(const char *, size_t, void *),  		     void *convert_arg) | 
