Implement unicode_locale_chset_l()

author: Sam Varshavchik 2015-07-31 08:30:25 -0400
committer: Sam Varshavchik 2015-07-31 08:30:25 -0400
commit: 84dd6e55bc0069a816d303e51e1f237a36b0c7a5 (patch)
tree: dde23184f469a31a49812771f1a6a0e3563e6386 /unicode/courier-unicode.h.in
parent: 228f4c614680f27acceb807074857c01c753e100 (diff)
download: courier-libs-84dd6e55bc0069a816d303e51e1f237a36b0c7a5.tar.bz2
1 files changed, 1868 insertions, 0 deletions
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
new file mode 100644
index 0000000..9b58f5c
--- /dev/null
+++ b/unicode/courier-unicode.h.in
@@ -0,0 +1,1868 @@
+#ifndef	courier_unicode_h
+#define	courier_unicode_h
+
+/*
+** Copyright 2000-2015 Double Precision, Inc.
+** See COPYING for distribution information.
+**
+*/
+
+#ifdef	__cplusplus
+
+#include <string>
+#include <vector>
+#include <list>
+
+extern "C" {
+#endif
+
+#if 0
+}
+#endif
+
+#include	<stdlib.h>
+#include	<stdio.h>
+#include	<stdint.h>
+#include	<sys/types.h>
+#include	<locale.h>
+
+typedef uint32_t unicode_char;
+
+/*
+** The system default character set, from the locale.
+*/
+
+extern const char *unicode_default_chset();
+
+/*
+** The current locale character set.
+*/
+
+extern const char *unicode_locale_chset();
+
+#if @LANGINFO_L@
+extern const char *unicode_locale_chset_l(locale_t l);
+#endif
+
+/* Unicode upper/lower/title case conversion functions */
+
+extern unicode_char unicode_uc(unicode_char);
+extern unicode_char unicode_lc(unicode_char);
+extern unicode_char unicode_tc(unicode_char);
+
+/*
+** Look up HTML 4.0/XHTML entity.
+**
+** n="amp", etc...
+**
+** Returns the unicode entity value, or 0 if no such entity is defined.
+*/
+
+unicode_char unicode_html40ent_lookup(const char *n);
+
+/*
+**
+** Return "width" of unicode character.
+**
+** This is defined as follows: for characters having the F or W property in
+** tr11 (EastAsianWidth), unicode_wcwidth() returns 2.
+**
+** Otherwise, characters having the BK, CR, LF, CM, NL, WJ, and ZW line
+** breaking property as per tr14, unicode_wcwdith() returns 0. For all other
+** cases, 1.
+**
+** This provides a rough estimate of the "width" of the character if its
+** shown on a text console.
+*/
+
+extern int unicode_wcwidth(unicode_char c);
+extern size_t unicode_wcwidth_str(const unicode_char *c);
+
+/* Internal unicode table lookup functions */
+
+extern uint8_t unicode_tab_lookup(unicode_char ch,
+				  const size_t *unicode_indextab,
+				  size_t unicode_indextab_sizeof,
+				  const uint8_t (*unicode_rangetab)[2],
+				  const uint8_t *unicode_classtab,
+				  uint8_t uclass);
+
+extern uint32_t unicode_tab32_lookup(unicode_char ch,
+				     const size_t *unicode_indextab,
+				     size_t unicode_indextab_sizeof,
+				     const uint8_t (*unicode_rangetab)[2],
+				     const uint32_t *unicode_classtab,
+				     uint32_t uclass);
+
+/*
+** Look up unicode categorization, see http://unicode.org/notes/tn36/
+**
+** Returns a 32 bit value with four unicode categories encoded in the
+** bits defined by UNICODE_CATEGORY_1..4
+*/
+
+#define UNICODE_CATEGORY_1   0xFF000000
+#define UNICODE_CATEGORY_2   0x00FF0000
+#define UNICODE_CATEGORY_3   0x0000FF00
+#define UNICODE_CATEGORY_4   0x000000FF
+
+#include <courier-unicode-categories-tab.h>
+
+uint32_t unicode_category_lookup(unicode_char);
+
+/*
+** Return non-0 for TAB, and all UNICODE_CATEGORY_2_SPACE.
+*/
+
+extern int unicode_isblank(unicode_char ch);
+
+/*
+** The unicode-ish isspace(). In addition to return non-0 for
+** unicode_isblank(), this also returns non-0 for unicode characters
+** with linebreaking properties of BK, CR, LF, NL, and SP.
+*/
+extern int unicode_isspace(unicode_char ch);
+
+/*
+** Return non-0 for all UNICODE_CATEGORY_1_LETTER
+*/
+
+extern int unicode_isalpha(unicode_char ch);
+
+/*
+** Return non-0 for all UNICODE_CATEGORY_1_NUMBER | UNICODE_CATEGORY_2_DIGIT,
+** only (no third categories).
+*/
+extern int unicode_isdigit(unicode_char ch);
+
+/*
+** Return non-0 for all unicode_isalpha() or unicode_isdigit().
+*/
+
+extern int unicode_isalnum(unicode_char ch);
+
+/*
+** Returns non-0 for all codepoints above SPACE which are not
+** unicode_isspace().
+*/
+
+extern int unicode_isgraph(unicode_char ch);
+
+/*
+** Return non-0 for all UNICODE_CATEGORY_1_PUNCTUATION.
+*/
+
+extern int unicode_ispunct(unicode_char ch);
+
+/*
+** Return non-0 for all unicode_isalpha() for which the character is
+** equal to unicode_lc() of itself.
+*/
+extern int unicode_islower(unicode_char ch);
+
+/*
+** Return non-0 for all unicode_isalpha() for which the character is
+** equal to unicode_uc() of itself.
+*/
+extern int unicode_isupper(unicode_char ch);
+
+/*
+** Implementation of grapheme cluster boundary rules, as per
+** http://www.unicode.org/reports/tr29/tr29-27.html
+** including  GB9a and GB9b.
+**
+** Returns non-zero if there's a grapheme break between the two referenced
+** characters.
+*/
+
+int unicode_grapheme_break(unicode_char a, unicode_char b);
+
+typedef enum {
+
+#include <courier-unicode-script-tab.h>
+
+} unicode_script_t;
+
+/*
+** Look up the unicode script property, as per
+** http://www.unicode.org/reports/tr24/tr24-24.html
+*/
+
+unicode_script_t unicode_script(unicode_char a);
+
+/*
+** Implementation of line break rules, as per
+** http://www.unicode.org/reports/tr14/tr14-35.html
+**
+** Invoke unicode_lb_init() to initialize the linebreaking algorithm. The
+** first parameter is a callback function that gets invoked with two
+** arguments: UNICODE_LB_{MANDATORY|NONE|ALLOWED}, and a passthrough argument.
+** The second parameter to unicode_lb_init() is the opaque passthrough
+** pointer, that is passed as the second argument to the callback function
+** with no further interpretation.
+**
+** unicode_lb_init() returns an opaque handle. Invoke unicode_lb_next(),
+** passing the handle and one unicode character. Repeatedly invoke
+** unicode_lb_next() to specify the input string for the linebreaking
+** algorithm, then invoke unicode_lb_end() to finish calculating the
+** linebreaking algorithm, and deallocate the opaque linebreaking handle.
+**
+** The callback function gets invoked once for each invocation of
+** unicode_lb_next(). The contract is that before unicode_lb_end() returns,
+** the callback function will get invoked the exact number of times that
+** unicode_lb_next(), as long as each invocation of the callback function
+** returned 0; nothing more, nothing less. The first parameter to the callback
+** function will be one of the following values:
+**
+** UNICODE_LB_MANDATORY - a linebreak is MANDATORY before the corresponding
+** character.
+** UNICODE_LB_NONE - a linebreak is PROHIBITED before the corresponding
+** character.
+** UNICODE_LB_ALLOWED - a linebreak is OPTIONAL before the corresponding
+** character (the preceding character is a space, or an equivalent).
+**
+** The callback function should return 0. A non-zero value indicates an
+** error, which gets propagated up to the caller. The contract that the
+** callback function gets invoked the same number of times that
+** unicode_lb_next() gets invoked is now broken.
+*/
+
+#define UNICODE_LB_MANDATORY	-1
+#define UNICODE_LB_NONE		0
+#define UNICODE_LB_ALLOWED	1
+
+struct unicode_lb_info;
+
+typedef struct unicode_lb_info *unicode_lb_info_t;
+
+/*
+** Allocate a linebreaking handle.
+*/
+extern unicode_lb_info_t unicode_lb_init(int (*cb_func)(int, void *),
+					 void *cb_arg);
+
+/*
+** Feed the next character through the linebreaking algorithm.
+** A non-zero return code indicates that the callback function was invoked
+** and it returned a non-zero return code (which is propagated as a return
+** value). unicode_lb_end() must still be invoked, in this case.
+**
+** A zero return code indicates that if the callback function was invoked,
+** it returned 0.
+*/
+
+extern int unicode_lb_next(unicode_lb_info_t i, unicode_char ch);
+
+/*
+** Convenience function that invokes unicode_lb_next() with a list of
+** unicode chars. Returns 0 if all invocations of unicode_lb_next() returned
+** 0, or the first non-zero return value from unicode_lb_next().
+*/
+
+extern int unicode_lb_next_cnt(unicode_lb_info_t i,
+			       const unicode_char *chars,
+			       size_t cnt);
+
+/*
+** Finish the linebreaking algorithm.
+**
+** A non-zero return code indicates that the callback function was invoked
+** and it returned a non-zero return code (which is propagated as a return
+** value).
+**
+** A zero return code indicates that if the callback function was invoked,
+** it returned 0, and that the callback function was invoked exactly the same
+** number of times that unicode_lb_next() was invoked.
+**
+** In all case, the linebreak handle will no longer be valid when this
+** function returns.
+*/
+
+extern int unicode_lb_end(unicode_lb_info_t i);
+
+/*
+** An alternative linebreak API where the callback function receives the
+** original unicode character in addition to its linebreak value.
+**
+** User unicode_lbc_init(), unicode_lbc_next(), and unicode_lbc_end(), whose
+** semantics are the same as their _lb_ counterparts.
+*/
+
+struct unicode_lbc_info;
+
+typedef struct unicode_lbc_info *unicode_lbc_info_t;
+
+extern unicode_lbc_info_t unicode_lbc_init(int (*cb_func)(int, unicode_char,
+							  void *),
+					   void *cb_arg);
+extern int unicode_lbc_next(unicode_lbc_info_t i, unicode_char ch);
+extern int unicode_lbc_next_cnt(unicode_lbc_info_t i,
+				const unicode_char *chars,
+				size_t cnt);
+extern int unicode_lbc_end(unicode_lbc_info_t i);
+
+/*
+** Set linebreaking options.
+**
+** OPTIONS SUBJECT TO CHANGE.
+*/
+
+extern void unicode_lb_set_opts(unicode_lb_info_t i, int opts);
+
+extern void unicode_lbc_set_opts(unicode_lbc_info_t i, int opts);
+
+/*
+** Tailorization of LB24: Prevent pluses, as in "C++", from breaking.
+**
+** Adds the following to LB24:
+**
+**            PR x PR
+**
+**            AL x PR
+**
+**            ID x PR
+**/
+#define UNICODE_LB_OPT_PRBREAK 0x0001
+
+
+/*
+** Tailored "/" breaking rules. This prevents breaking after the "/"
+** character. And provides an exception to the "x SY" rule in LB13.
+**
+** Adds the following rule to LB13:
+**
+**            SY x EX
+**
+**            SY x AL
+**
+**            SY x ID
+**
+**            SP ÷ SY, which takes precedence over "x SY".
+*/
+#define UNICODE_LB_OPT_SYBREAK 0x0002
+
+/*
+** Tailored / breaking rules.
+**
+** This reclassifies U+2013 and U+2014 as class WJ, prohibiting breaks before
+** and after mdash and ndash.
+*/
+#define UNICODE_LB_OPT_DASHWJ 0x0004
+
+/*
+** Implemention of word break rules, as per
+** http://www.unicode.org/reports/tr29/tr29-27.html
+**
+** Invoke unicode_wb_init() to initialize the wordbreaking algorithm. The
+** first parameter is a callback function that gets invoked with two
+** arguments: an int flag, and a passthrough argument. The second parameter to
+** unicode_wb_init() is the opaque passthrough pointer, that is passed as the
+** second argument to the callback function with no further interpretation.
+**
+** unicode_wb_init() returns an opaque handle. Invoke unicode_wb_next(),
+** passing the handle and one unicode character. Repeatedly invoke
+** unicode_wb_next() to specify the input string for the wordbreaking
+** algorithm, then invoke unicode_wb_end() to finish calculating the
+** wordbreaking algorithm, and deallocate the opaque wordbreaking handle.
+**
+** The callback function gets invoked once for each invocation of
+** unicode_wb_next(). The contract is that before unicode_wb_end() returns,
+** the callback function will get invoked the exact number of times that
+** unicode_wb_next(), as long as each invocation of the callback function
+** returned 0; nothing more, nothing less. The first parameter to the callback
+** function will be an int. A non-zero value indicates that there is a word
+** break between this character and the preceding one.
+**
+** The callback function should return 0. A non-zero value indicates an
+** error, which gets propagated up to the caller. The contract that the
+** callback function gets invoked the same number of times that
+** unicode_lb_next() gets invoked is now broken.
+*/
+
+struct unicode_wb_info;
+
+typedef struct unicode_wb_info *unicode_wb_info_t;
+
+/*
+** Allocate a wordbreaking handle.
+*/
+extern unicode_wb_info_t unicode_wb_init(int (*cb_func)(int, void *),
+					 void *cb_arg);
+
+/*
+** Feed the next character through the wordbreaking algorithm.
+** A non-zero return code indicates that the callback function was invoked
+** and it returned a non-zero return code (which is propagated as a return
+** value). unicode_wb_end() must still be invoked, in this case.
+**
+** A zero return code indicates that if the callback function was invoked,
+** it returned 0.
+*/
+
+extern int unicode_wb_next(unicode_wb_info_t i, unicode_char ch);
+
+/*
+** Convenience function that invokes unicode_wb_next() with a list of
+** unicode chars. Returns 0 if all invocations of unicode_wb_next() returned
+** 0, or the first non-zero return value from unicode_wb_next().
+*/
+
+extern int unicode_wb_next_cnt(unicode_wb_info_t i,
+			       const unicode_char *chars,
+			       size_t cnt);
+
+/*
+** Finish the wordbreaking algorithm.
+**
+** A non-zero return code indicates that the callback function was invoked
+** and it returned a non-zero return code (which is propagated as a return
+** value).
+**
+** A zero return code indicates that if the callback function was invoked,
+** it returned 0, and that the callback function was invoked exactly the same
+** number of times that unicode_wb_next() was invoked.
+**
+** In all case, the wordbreak handle will no longer be valid when this
+** function returns.
+*/
+
+extern int unicode_wb_end(unicode_wb_info_t i);
+
+/*
+** Search for a word boundary.
+**
+** Obtain a handle by calling unicode_wbscan_init(), then invoke
+** unicode_wbscan_next() to provide a unicode stream, then invoke
+** unicode_wbscan_end(). unicode_wbscan_end() returns the number of unicode
+** characters from the beginning of the stream until the first word boundary.
+**
+** You may prematurely stop calling unicode_wbscan_next() once it returns a
+** non-0 value, which means that there is sufficient context to compute the
+** first word boundary, and all further calls to unicode_wbscan_next() will
+** be internal no-ops.
+*/
+
+struct unicode_wbscan_info;
+
+typedef struct unicode_wbscan_info *unicode_wbscan_info_t;
+
+unicode_wbscan_info_t unicode_wbscan_init();
+
+int unicode_wbscan_next(unicode_wbscan_info_t i, unicode_char ch);
+
+size_t unicode_wbscan_end(unicode_wbscan_info_t i);
+
+/*
+** A buffer that holds unicode characters, and dynamically grows as needed.
+*/
+
+struct unicode_buf {
+	unicode_char *ptr;	/* The unicode characters */
+	size_t size,		/* Buffer size */
+		len,		/* How many characters in ptr are initialized */
+		max;		/* Maximum size the buffer can grow to */
+};
+
+/*
+** Initialize a buffer. Constructor.
+*/
+
+void unicode_buf_init(/* Initialize this structure. ptr, size, len cleared */
+		      struct unicode_buf *p,
+
+		      /*
+		      ** Maximum size the buffer can grow to. (size_t)-1
+		      ** means unlimited.
+		      */
+		      size_t max);
+/*
+** Like unicode_buf_init, and initialize the new buffer with the contents of
+** another buffer. The maximum size of the initialized buffer is exactly the
+** number of characters in the existing buffer. This copies a buffer using
+** the minimum amount of heap space.
+*/
+
+#define unicode_buf_init_copy(a,b)				\
+	do {							\
+		unicode_buf_init((a), unicode_buf_len(b));	\
+		unicode_buf_append_buf((a),(b));		\
+	} while (0)
+
+/*
+** Deinitialize the buffer. Destructor. Frees memory.
+*/
+
+void unicode_buf_deinit(struct unicode_buf *p);
+
+/*
+** Official way to access the characters in the unicode buffer.
+*/
+#define unicode_buf_ptr(p) ((p)->ptr)
+
+/*
+** Official way of obtaining the number of characters in the unicode buffer.
+*/
+#define unicode_buf_len(p) ((p)->len)
+
+/*
+** Remove all existing characters from an initialized buffer. Sets len to 0.
+*/
+
+#define unicode_buf_clear(p) ((p)->len=0)
+
+/*
+** Append characters to the existing characters in the unicode buffer.
+** The buffer grows, if needed. If the buffer would exceed its maximum size,
+** the extra characters get truncated.
+**
+** Returns 0 if the characters were appended. -1 for a malloc failure.
+*/
+
+int unicode_buf_append(struct unicode_buf *p,	/* The buffer */
+		       const unicode_char *uc,	/* Characters to append */
+		       size_t l);		/* How many of them */
+
+/*
+** Convert an iso-8859-1 char string and invoke unicode_buf_append().
+*/
+
+void unicode_buf_append_char(struct unicode_buf *dst,
+			     const char *str,
+			     size_t cnt);
+
+/*
+** Remove some portion of the unicode buffer
+*/
+
+void unicode_buf_remove(struct unicode_buf *p, /* The buffer */
+			size_t pos, /* Offset in buffer */
+			size_t cnt); /* How many to remove */
+
+/*
+** Append the contents of an existing buffer to another one.
+*/
+
+#define unicode_buf_append_buf(a,b)					\
+	unicode_buf_append((a), unicode_buf_ptr(b), unicode_buf_len(b))
+
+
+/*
+** The equivalent of strcmp() for unicode buffers.
+*/
+
+int unicode_buf_cmp(const struct unicode_buf *a,
+		    const struct unicode_buf *b);
+
+/*
+** The equivalent of unicode_buf_cmp, except that the second buffer is an
+** iso-8859-1 string.
+*/
+
+int unicode_buf_cmp_str(const struct unicode_buf *p,
+			const char *c,	/* iso-8859-1 string */
+			size_t cl);	/* Number of chars in c */
+
+/*
+** A wrapper for iconv(3). This wrapper provides a different API for iconv(3).
+** A handle gets created by unicode_convert_init().
+** unicode_convert_init() receives a pointer to the output function
+** which receives converted character text.
+**
+** The output function receives a pointer to the converted character text, and
+** the number of characters in the converted text.
+**
+** The character text to convert gets passed, repeatedly, to
+** unicode_convert(). Each call to unicode_convert() results in
+** the output function being invoked, zero or more times, with the converted
+** text. Finally, unicode_convert_deinit() stops the conversion and
+** deallocates the conversion handle.
+**
+** Internal buffering takes place. unicode_convert_deinit() may result
+** in the output function being called one or more times, to receive the final
+** part of the converted character stream.
+**
+** The output function should return 0. A non-0 value causes
+** unicode_convert() and/or unicode_convert_deinit() returning
+** non-0.
+*/
+
+struct unicode_convert_hdr;
+
+typedef struct unicode_convert_hdr *unicode_convert_handle_t;
+
+/*
+** unicode_convert_init() returns a non-NULL handle for the requested
+** conversion, or NULL if the requested conversion is not available.
+*/
+
+unicode_convert_handle_t
+unicode_convert_init(/* Convert from this chset */
+		       const char *src_chset,
+
+		       /* Convert to this chset */
+		       const char *dst_chset,
+
+		       /* The output function */
+
+		       int (*output_func)(const char *, size_t, void *),
+
+		       /* Passthrough arg */
+		       void *convert_arg);
+
+/*
+** Repeatedly pass the character text to convert to unicode_convert().
+**
+** Returns non-0 if the output function returned non-0, or 0 if all invocations
+** of the output function returned 0.
+*/
+
+int unicode_convert(/* The conversion handle */
+		      unicode_convert_handle_t handle,
+
+		      /* Text to convert */
+		      const char *text,
+
+		      /* Number of bytes to convert */
+		      size_t cnt);
+
+/*
+** Finish character set conversion. The handle gets deallocated.
+**
+** May still result in one or more invocations of the output function.
+** Returns non-zero if any previous invocation of the output function returned
+** non-zero (this includes any invocations of the output function resulting
+** from this call, or prior unicode_convert() calls), or 0 if all
+** invocations of the output function returned 0.
+**
+** If the errptr is not NULL, *errptr is set to non-zero if there were any
+** conversion errors -- if there was any text that could not be converted to
+** the destination character text.
+*/
+
+int unicode_convert_deinit(unicode_convert_handle_t handle,
+			     int *errptr);
+
+
+/*
+** Specialization: save converted character text in a buffer.
+**
+** Implementation: call unicode_convert_tocbuf_init() instead of
+** unicode_convert_init(), then call unicode_convert() and
+** unicode_convert_deinit(), as usual.
+**
+** If unicode_convert_deinit() returns 0, *cbufptr_ret gets initialized to a
+** malloc()ed buffer, and the number of converted characters, the size of the
+** malloc()ed buffer, are placed into *csize_ret arguments, that were passed
+** to unicode_convert_tou_init().
+**
+** Note: if the converted string is an empty string, *cbufsize_ret is set to 0,
+** but *cbufptr_ptr still gets initialized (to a dummy malloced buffer).
+**
+** The optional nullterminate places a trailing \0 character after the
+** converted string (this is included in *cbufsize_ret).
+*/
+
+unicode_convert_handle_t
+unicode_convert_tocbuf_init(/* Convert from this chset */
+			      const char *src_chset,
+
+			      /* Convert to this chset */
+			      const char *dst_chset,
+
+			      /* malloced buffer */
+			      char **cbufptr_ret,
+
+			      /* size of the malloced buffer */
+			      size_t *cbufsize_ret,
+
+			      /* null terminate the resulting string */
+			      int nullterminate
+			      );
+
+
+/*
+** Specialization: convert some character text to a unicode_char array.
+**
+** This is like unicode_convert_tocbuf_init(), but converts to a unicode_char
+** array.
+**
+** The returned *ucsize_ret is initialized with the number of unicode_chars,
+** rather than the byte count.
+**
+** In all other ways, this function behaves identically to
+** unicode_convert_tocbuf_init().
+*/
+
+unicode_convert_handle_t
+unicode_convert_tou_init(/* Convert from this chset */
+			   const char *src_chset,
+
+			   /* malloc()ed buffer pointer, on exit. */
+			   unicode_char **ucptr_ret,
+
+			   /* size of the malloc()ed buffer, upon exit */
+			   size_t *ucsize_ret,
+
+			   /* If true, terminate with U+0x0000, for convenience */
+			   int nullterminate
+			   );
+
+/*
+** Specialization: convert a unicode_char array to some character text.
+**
+** This is the opposite of unicode_convert_tou_init(). Call this to
+** initialize the conversion handle, then use unicode_convert_uc()
+** instead of unicode_convert.
+*/
+
+unicode_convert_handle_t
+unicode_convert_fromu_init(/* Convert to this chset */
+			     const char *dst_chset,
+
+			     /* malloc()ed buffer pointer, on exit. */
+			     char **cbufptr_ret,
+
+			     /* size of the malloc()ed buffer, upon exit */
+			     size_t *cbufsize_ret,
+
+			     /* If true, terminate with U+0x0000, for convenience */
+			     int nullterminate
+			     );
+
+int unicode_convert_uc(/* The conversion handle */
+			 unicode_convert_handle_t handle,
+
+			 /* Text to convert */
+			 const unicode_char *text,
+
+			 /* Number of bytes to convert */
+			 size_t cnt);
+
+/*
+** Initialize conversion to UTF-8.
+**
+** This is a wrapper for unicode_convert_tocbuf_init() that specifies the
+** destination charset as UTF-8.
+*/
+
+unicode_convert_handle_t
+unicode_convert_tocbuf_toutf8_init(const char *src_chset,
+				     char **cbufptr_ret,
+				     size_t *cbufsize_ret,
+				     int nullterminate);
+
+/*
+** Initialize conversion from UTF-8.
+**
+** This is a wrapper for unicode_convert_tocbuf_init() that specifies the
+** source charset as UTF-8.
+*/
+
+unicode_convert_handle_t
+unicode_convert_tocbuf_fromutf8_init(const char *dst_chset,
+				       char **cbufptr_ret,
+				       size_t *cbufsize_ret,
+				       int nullterminate);
+
+/*
+** Convert a character string to UTF-8.
+**
+** Returns a malloc-ed buffer holding the UTF-8 string, or NULL if an
+** error occured.
+*/
+char *unicode_convert_toutf8(/* Text to convert to UTF-8 */
+			       const char *text,
+
+			       /* Character set to convert to UTF-8 */
+			       const char *charset,
+
+			       /*
+			       ** If non-NULL, and a non-NULL pointer is
+			       ** returned, *error is set to non-zero if
+			       ** a character conversion error has occured.
+			       */
+			       int *error);
+
+/*
+** Convert UTF-8 text to another character set.
+**
+** Returns a malloc-ed buffer holding the string converted to the specified
+** character set, or NULL if an error occured.
+*/
+
+char *unicode_convert_fromutf8(/* A UTF-8 string */
+				 const char *text,
+
+				 /*
+				 ** Convert the UTF-8 string to this character
+				 ** set.
+				 */
+
+				 const char *charset,
+
+				 /*
+				 ** If non-NULL, and a non-NULL pointer is
+				 ** returned, *error is set to non-zero if
+				 ** a character conversion error has occured.
+				 */
+				 int *error);
+
+/*
+** Convert one charset to another charset, placing the result in a malloc-ed
+** buffer.
+**
+** Returns a malloc-ed buffer holding the string converted to the specified
+** character set, or NULL if an error occured.
+*/
+
+char *unicode_convert_tobuf(/* A string to convert */
+			      const char *text,
+
+			      /*
+			      ** String's charset.
+			      */
+
+			      const char *charset,
+
+			      /*
+			      ** Destination charset
+			      */
+			      const char *dstcharset,
+
+			      /*
+			      ** If non-NULL, and a non-NULL pointer is
+			      ** returned, *error is set to non-zero if
+			      ** a character conversion error has occured.
+			      */
+			      int *error);
+
+/*
+** Convenience function: call unicode_convert_tou_init(), feed the
+** character string through unicode_convert(), then call
+** unicode_convert_deinit().
+**
+** If this function returns 0, *uc and *ucsize is set to a malloced buffer+size
+** holding the unicode char array.
+*/
+
+int unicode_convert_tou_tobuf(/* Character text to convert */
+				const char *text,
+
+				/* Number of characters */
+				size_t text_l,
+
+				/* text's charset */
+				const char *charset,
+
+				/*
+				** If this function returns 0, this gets
+				** initialized
+				*/
+				unicode_char **uc,
+
+				/*
+				** Size of the allocated buffer
+				*/
+				size_t *ucsize,
+
+				/*
+				** If not null and this function returns 0,
+				** this is set to non-0 if there
+				** was a conversion error (but the output
+				** buffer gets still allocated and
+				** initialized)
+				*/
+				int *err);
+
+/*
+** Convenience function: call unicode_convert_fromu_init(), feed the
+** unicode_array through unicode_convert_uc(), then call
+** unicode_convert_deinit().
+**
+** If this function returns 0, *uc and *ucsize is set to a malloced buffer+size
+** holding the converted character string
+*/
+
+int unicode_convert_fromu_tobuf(/* Unicode array to convert to a char str */
+				  const unicode_char *utext,
+
+				  /*
+				  ** Size of the unicode array.
+				  ** If this is (size_t)-1, utext is a
+				  ** 0-terminated array.
+				  */
+				  size_t utext_l,
+
+				  /*
+				  ** Convert the unicode array to this charset.
+				  */
+				  const char *charset,
+
+				  /*
+				  ** If unicode_convert_fromu_tobuf()
+				  ** returns 0, this is initialized to a
+				  ** malloced buffer with a 0-terminated
+				  ** string is kept.
+				  */
+				  char **c,
+
+				  /*
+				  ** Size of the initialized array, including
+				  ** the 0-terminator.
+				  */
+				  size_t *csize,
+
+				  /*
+				  ** If unicode_convert_fromu_tobuf()
+				  ** returns 0 and this is not NULL,
+				  ** *err is set to non-0 if there was a
+				  ** conversion error to the requested
+				  ** character set.
+				  */
+				  int *err);
+
+/*
+** Convenience function: convert a string in a given character set
+** to/from uppercase, lowercase, or something else.
+**
+** This is done by calling unicode_convert_tou_tobuf() first,
+** applying the title_func and char_func, then using
+** unicode_convert_fromu_tobuf().
+**
+** A NULL return indicates that the requested conversion cannot be performed.
+*/
+
+char *unicode_convert_tocase( /* String to convert */
+			       const char *str,
+
+			       /* String's character set */
+
+			       const char *charset,
+
+			       /*
+			       ** Conversion of the first character in
+			       ** str: unicode_uc, unicode_lc, or unicode_tc:
+			       */
+
+			       unicode_char (*first_char_func)(unicode_char),
+
+			       /*
+			       ** Conversion of the second and the remaining
+			       ** character in str. If NULL, same as
+			       ** first_char_func.
+			       */
+			       unicode_char (*char_func)(unicode_char));
+
+
+
+/* Either UCS-4BE or UCS-4LE, matching the native unicode_char endianness */
+
+extern const char unicode_u_ucs4_native[];
+
+/* Either UCS-2BE or UCS-2LE, matching the native unicode_char endianness */
+
+extern const char unicode_u_ucs2_native[];
+
+/*
+** Modified-UTF7 encoding used for IMAP folder names. Pass it for a charset
+** parameter.
+**
+** This can be followed by a " " and up to 15 characters to be escaped in
+** addition to unicode chars.
+*/
+
+#define unicode_x_imap_modutf7 "x-imap-modutf7"
+
+#if 0
+{
+#endif
+
+#ifdef	__cplusplus
+}
+
+extern size_t unicode_wcwidth(const std::vector<unicode_char> &uc);
+
+namespace unicode {
+
+#if 0
+};
+#endif
+
+/*
+** Various character sets
+*/
+extern const char ucs_4[], ucs_2[], utf_8[], iso_8859_1[];
+
+/*
+** Interface to iconv.
+**
+** Subclass converted(). Invoke begin(), then operator(), repeatedly,
+** then end().
+**
+** converted() receives the converted text.
+*/
+
+class iconvert {
+
+	unicode_convert_handle_t handle;
+
+ public:
+	iconvert();
+	virtual ~iconvert();
+
+	/* Start conversion.
+	** Returns false if the requested conversion cannot be done.
+	**/
+
+	bool begin(/* Convert from */
+		   const std::string &src_chset,
+
+		   /* Convert to */
+		   const std::string &dst_chset);
+
+	/* Feed iconv(3). Returns false if the conversion was aborted.
+	 */
+
+	bool operator()(const char *, size_t);
+
+	bool operator()(const unicode_char *, size_t);
+
+	/*
+	** Get the results here. If the subclass returns a non-0
+	** value, the conversion is aborted.
+	*/
+
+	virtual int converted(const char *, size_t);
+
+	/*
+	** End of conversion.
+	**
+	** Returns true if all calls to converted() returned 0,
+	** false if the conversion was aborted.
+	**
+	** errflag is set to true if there was a character that could
+	** not be converted, and passed to converted().
+	*/
+
+	bool end(bool &errflag)
+	{
+		return end(&errflag);
+	}
+
+	bool end()
+	{
+		return end(NULL);
+	}
+
+	/* Convert between two different charsets */
+
+	static std::string convert(const std::string &text,
+				   const std::string &charset,
+				   const std::string &dstcharset,
+				   bool &errflag);
+
+	/* Convert between two different charsets */
+
+	static std::string convert(const std::string &text,
+				   const std::string &charset,
+				   const std::string &dstcharset)
+	{
+		bool dummy;
+
+		return convert(text, charset, dstcharset, dummy);
+	}
+
+	/* Convert from unicode to a charset */
+
+	static std::string convert(const std::vector<unicode_char> &uc,
+				   const std::string &dstcharset,
+				   bool &errflag);
+
+	/* Convert from unicode to a charset */
+
+	static std::string convert(const std::vector<unicode_char> &uc,
+				   const std::string &dstcharset)
+	{
+		bool dummy;
+
+		return convert(uc, dstcharset, dummy);
+	}
+
+	/* Convert charset to unicode */
+
+	static bool convert(const std::string &text,
+			    const std::string &charset,
+			    std::vector<unicode_char> &uc);
+
+
+	/* Convert to upper/lower/title case */
+
+	static std::string
+		convert_tocase(/* Text string */
+			       const std::string &text,
+
+			       /* Its charset */
+			       const std::string &charset,
+
+			       /* First character: unicode_uc, unicode_lc, or unicode_tc */
+			       unicode_char (*first_char_func)(unicode_char),
+
+			       /* If not NULL, second and subsequent chars */
+			       unicode_char (*char_func)(unicode_char)
+			       =NULL)
+	{
+		bool dummy;
+
+		return convert_tocase(text, charset, dummy,
+				      first_char_func,
+				      char_func);
+	}
+
+	/* Convert to upper/lower/title case */
+
+	static std::string
+		convert_tocase(/* Text string */
+			       const std::string &text,
+
+			       /* Its charset */
+			       const std::string &charset,
+
+			       /* Set if there's a conversion error */
+			       bool &err,
+
+			       /* First character: unicode_uc, unicode_lc, or unicode_tc */
+			       unicode_char (*first_char_func)(unicode_char),
+
+			       /* If not NULL, second and subsequent chars */
+			       unicode_char (*char_func)(unicode_char)
+			       =NULL);
+ private:
+	bool end(bool *);
+
+ public:
+	class tou;
+	class fromu;
+};
+
+/* Convert output of iconvert to unicode_chars. */
+
+class iconvert::tou : public iconvert {
+
+ public:
+	bool begin(const std::string &chset);
+
+	virtual int converted(const unicode_char *, size_t);
+
+	using iconvert::operator();
+ private:
+	int converted(const char *ptr, size_t cnt);
+
+ public:
+	template<typename iter_t> class to_iter_class;
+
+	template<typename input_iter_t,
+		typename output_iter_t>
+		static output_iter_t convert(input_iter_t from_iter,
+					     input_iter_t to_iter,
+					     const std::string &chset,
+					     bool &flag,
+					     output_iter_t out_iter);
+
+	template<typename input_iter_t>
+		static bool convert(input_iter_t from_iter,
+				    input_iter_t to_iter,
+				    const std::string &chset,
+				    std::vector<unicode_char> &out_buf)
+	{
+		bool flag;
+
+		out_buf.clear();
+		std::back_insert_iterator<std::vector<unicode_char> >
+			insert_iter(out_buf);
+
+		convert(from_iter, to_iter, chset, flag, insert_iter);
+
+		return flag;
+	}
+
+	static std::pair<std::vector<unicode_char>, bool>
+		convert(const std::string &str,
+			const std::string &chset);
+};
+
+/* Helper class that saves unicode output into an output iterator */
+
+template<typename iter_t>
+class iconvert::tou::to_iter_class : public iconvert::tou {
+
+	iter_t iter;
+ public:
+
+ to_iter_class(iter_t iterValue)
+	 : iter(iterValue) {}
+
+	using tou::operator();
+
+	operator iter_t() const { return iter; }
+
+ private:
+	int converted(const unicode_char *ptr, size_t cnt)
+	{
+		while (cnt)
+		{
+			*iter=*ptr;
+
+			++iter;
+			++ptr;
+			--cnt;
+		}
+		return 0;
+	}
+};
+
+template<typename input_iter_t,
+	typename output_iter_t>
+	output_iter_t iconvert::tou::convert(input_iter_t from_iter,
+					     input_iter_t to_iter,
+					     const std::string &chset,
+					     bool &flag,
+					     output_iter_t out_iter)
+{
+	class to_iter_class<output_iter_t> out(out_iter);
+
+	if (!out.begin(chset))
+		return out;
+
+	std::vector<char> string;
+
+	while (from_iter != to_iter)
+	{
+		string.push_back(*from_iter++);
+
+		if (string.size() > 31)
+		{
+			out(&string[0], string.size());
+			string.clear();
+		}
+	}
+
+	if (string.size() > 0)
+		out(&string[0], string.size());
+
+	out.end(flag);
+	return out;
+}
+
+/* Convert output of iconvert from unicode_chars. */
+
+class iconvert::fromu : public iconvert {
+
+ public:
+	bool begin(const std::string &chset);
+
+	using iconvert::operator();
+
+	template<typename iter_t> class to_iter_class;
+
+	template<typename input_iter_t,
+		typename output_iter_t>
+		static output_iter_t convert(input_iter_t from_iter,
+					     input_iter_t to_iter,
+					     const std::string &chset,
+					     output_iter_t out_iter,
+					     bool &errflag);
+
+	template<typename input_iter_t>
+		static void convert(input_iter_t from_iter,
+				    input_iter_t to_iter,
+				    const std::string &chset,
+				    std::string &out_buf,
+				    bool &errflag)
+	{
+		out_buf="";
+		std::back_insert_iterator<std::string>
+			insert_iter(out_buf);
+
+		convert(from_iter, to_iter, chset, insert_iter,
+			errflag);
+	}
+
+	static std::pair<std::string, bool>
+		convert(const std::vector<unicode_char> &ubuf,
+			const std::string &chset);
+};
+
+/* Helper class that saves unicode output into an output iterator */
+
+template<typename iter_t>
+class iconvert::fromu::to_iter_class : public iconvert::fromu {
+
+	iter_t iter;
+ public:
+
+ to_iter_class(iter_t iterValue)
+	 : iter(iterValue) {}
+
+	using fromu::operator();
+
+	operator iter_t() const { return iter; }
+
+ private:
+	int converted(const char *ptr, size_t cnt)
+	{
+		while (cnt)
+		{
+			*iter=*ptr;
+
+			++iter;
+			++ptr;
+			--cnt;
+		}
+		return 0;
+	}
+};
+
+template<typename input_iter_t,
+	typename output_iter_t>
+	output_iter_t iconvert::fromu::convert(input_iter_t from_iter,
+					       input_iter_t to_iter,
+					       const std::string &chset,
+					       output_iter_t out_iter,
+					       bool &errflag)
+{
+	errflag=true;
+
+	class to_iter_class<output_iter_t> out(out_iter);
+
+	if (!out.begin(chset))
+		return out;
+
+	std::vector<unicode_char> string;
+
+	while (from_iter != to_iter)
+	{
+		string.push_back(*from_iter++);
+
+		if (string.size() > 31)
+		{
+			out(&string[0], string.size());
+			string.clear();
+		}
+	}
+
+	if (string.size() > 0)
+		out(&string[0], string.size());
+
+	out.end(errflag);
+	return out;
+}
+
+/*
+** Unicode linebreaking algorithm, tr14.
+*/
+
+extern "C" int linebreak_trampoline(int value, void *ptr);
+extern "C" int linebreakc_trampoline(int value, unicode_char ch,
+				     void *ptr);
+
+/*
+** Subclass linebreak_callback_base, implement operator()(int).
+**
+** Use operator<< or operator()(iterator, iterator) to feed
+** unicode_chars into the linebreaking algorithm. The subclass receives
+** UNICODE_LB values, as they become available.
+*/
+
+class linebreak_callback_base {
+
+	unicode_lb_info_t handle;
+
+	int opts;
+
+#if __cplusplus >= 201103L
+ public:
+	linebreak_callback_base(const linebreak_callback_base &)=delete;
+	linebreak_callback_base &operator=(const
+					   linebreak_callback_base &)=delete;
+ private:
+#else
+	linebreak_callback_base(const linebreak_callback_base &);
+	/* NOT IMPLEMENTED */
+
+	linebreak_callback_base &operator=(const
+					   linebreak_callback_base &);
+	/* NOT IMPLEMENTED */
+#endif
+ public:
+	linebreak_callback_base();
+	virtual ~linebreak_callback_base();
+
+	void finish();
+
+	void set_opts(int opts);
+
+	friend int linebreak_trampoline(int, void *);
+
+	linebreak_callback_base &operator<<(unicode_char uc);
+
+	template<typename iter_type>
+		linebreak_callback_base &operator()(iter_type beg_iter,
+						    iter_type end_iter)
+	{
+		while (beg_iter != end_iter)
+			operator<<(*beg_iter++);
+		return *this;
+	}
+
+	template<typename container_type>
+		linebreak_callback_base &operator()(const container_type &vec)
+	{
+		return operator()(vec.begin(), vec.end());
+	}
+ private:
+	virtual int callback(int);
+};
+
+class linebreak_callback_save_buf : public linebreak_callback_base {
+
+ public:
+	std::list<int> lb_buf;
+
+	linebreak_callback_save_buf();
+	~linebreak_callback_save_buf();
+
+	using linebreak_callback_base::operator<<;
+	using linebreak_callback_base::operator();
+ private:
+	int callback(int value);
+};
+
+/*
+** Convert an input iterator sequence over unicode_chars into
+** an input iterator sequence over linebreak values.
+*/
+
+template<typename input_t> class linebreak_iter
+: public std::iterator<std::input_iterator_tag, int, void>
+{
+	mutable input_t iter_value, end_iter_value;
+
+	mutable linebreak_callback_save_buf *buf;
+
+	void fill() const
+	{
+		if (buf == NULL)
+			return;
+
+		while (buf->lb_buf.empty())
+		{
+			if (iter_value == end_iter_value)
+			{
+				buf->finish();
+				if (buf->lb_buf.empty())
+				{
+					delete buf;
+					buf=NULL;
+				}
+				break;
+			}
+
+			buf->operator<<(*iter_value++);
+		}
+	}
+
+	mutable value_type bufvalue;
+
+ public:
+ linebreak_iter(const input_t &iter_valueArg,
+		const input_t &iter_endvalueArg)
+	 : iter_value(iter_valueArg),
+		end_iter_value(iter_endvalueArg),
+		buf(new linebreak_callback_save_buf)
+		{
+		}
+
+ linebreak_iter() : buf(NULL)
+	{
+	}
+
+	void set_opts(int opts)
+	{
+		if (buf)
+			buf->set_opts(opts);
+	}
+
+	~linebreak_iter()
+	{
+		if (buf)
+			delete buf;
+	}
+
+ linebreak_iter(const linebreak_iter<input_t> &v)
+	 : buf(NULL)
+	{
+		operator=(v);
+	}
+
+	linebreak_iter<input_t> &operator=(const
+					   linebreak_iter<input_t> &v)
+		{
+			if (buf)
+				delete buf;
+			buf=v.buf;
+			iter_value=v.iter_value;
+			end_iter_value=v.end_iter_value;
+			v.buf=NULL;
+			return *this;
+		}
+
+	bool operator==(const linebreak_iter<input_t> &v) const
+	{
+		fill();
+		v.fill();
+
+		return buf == NULL && v.buf == NULL;
+	}
+
+	bool operator!=(const linebreak_iter<input_t> &v) const
+	{
+		return !operator==(v);
+	}
+
+	value_type operator*() const
+	{
+		fill();
+		return buf == NULL ? UNICODE_LB_MANDATORY:
+			buf->lb_buf.front();
+	}
+
+	linebreak_iter<input_t> &operator++()
+	{
+		bufvalue=operator*();
+
+		if (buf)
+			buf->lb_buf.pop_front();
+		return *this;
+	}
+
+	const value_type *operator++(int)
+	{
+		operator++();
+		return &bufvalue;
+	}
+};
+
+/*
+** Like linebreak_callback_base, except the subclass receives both
+** the linebreaking value, and the unicode character.
+*/
+
+class linebreakc_callback_base {
+
+	unicode_lbc_info_t handle;
+
+	int opts;
+
+#if __cplusplus >= 201103L
+ public:
+	linebreakc_callback_base(const linebreakc_callback_base &)
+		=delete;
+
+	linebreakc_callback_base &operator=(const
+					    linebreakc_callback_base
+					    &)=delete;
+ private:
+#else
+	linebreakc_callback_base(const linebreakc_callback_base &);
+	/* NOT IMPLEMENTED */
+
+	linebreakc_callback_base &operator=(const
+					    linebreakc_callback_base
+					    &);
+	/* NOT IMPLEMENTED */
+#endif
+
+ public:
+	linebreakc_callback_base();
+	virtual ~linebreakc_callback_base();
+
+	void finish();
+
+	void set_opts(int opts);
+
+	friend int linebreakc_trampoline(int, unicode_char, void *);
+
+	linebreakc_callback_base &operator<<(unicode_char uc);
+
+	template<typename iter_type>
+		linebreakc_callback_base &operator()(iter_type beg_iter,
+						     iter_type end_iter)
+	{
+		while (beg_iter != end_iter)
+			operator<<(*beg_iter++);
+		return *this;
+	}
+
+	linebreakc_callback_base &operator<<(const
+					     std::vector<unicode_char>
+					     &vec)
+	{
+		return operator()(vec.begin(), vec.end());
+	}
+ private:
+	virtual int callback(int, unicode_char);
+};
+
+class linebreakc_callback_save_buf : public linebreakc_callback_base {
+
+ public:
+	std::list<std::pair<int, unicode_char> > lb_buf;
+
+	linebreakc_callback_save_buf();
+	~linebreakc_callback_save_buf();
+
+	using linebreakc_callback_base::operator<<;
+	using linebreakc_callback_base::operator();
+ private:
+	int callback(int, unicode_char);
+};
+
+
+/*
+** Convert an input iterator sequence over unicode_chars into
+** an input iterator sequence over std::pair<int, unicode_char>,
+** the original unicode character, and the linebreaking value before
+** the character.
+*/
+
+template<typename input_t> class linebreakc_iter
+: public std::iterator<std::input_iterator_tag,
+	std::pair<int, unicode_char>, void>
+{
+	mutable input_t iter_value, end_iter_value;
+
+	mutable linebreakc_callback_save_buf *buf;
+
+	void fill() const
+	{
+		if (buf == NULL)
+			return;
+
+		while (buf->lb_buf.empty())
+		{
+			if (iter_value == end_iter_value)
+			{
+				buf->finish();
+				if (buf->lb_buf.empty())
+				{
+					delete buf;
+					buf=NULL;
+				}
+				break;
+			}
+
+			buf->operator<<(*iter_value);
+			++iter_value;
+		}
+	}
+
+	mutable value_type bufvalue;
+
+ public:
+ linebreakc_iter(const input_t &iter_valueArg,
+		 const input_t &iter_endvalueArg)
+	 : iter_value(iter_valueArg),
+		end_iter_value(iter_endvalueArg),
+		buf(new linebreakc_callback_save_buf)
+		{
+		}
+
+ linebreakc_iter() : buf(NULL)
+	{
+	}
+
+	~linebreakc_iter()
+	{
+		if (buf)
+			delete buf;
+	}
+
+ linebreakc_iter(const linebreakc_iter<input_t> &v)
+	 : buf(NULL)
+	{
+		operator=(v);
+	}
+
+	linebreakc_iter<input_t> &operator=(const
+					    linebreakc_iter<input_t> &v)
+		{
+			if (buf)
+				delete buf;
+			buf=v.buf;
+			iter_value=v.iter_value;
+			end_iter_value=v.end_iter_value;
+			v.buf=NULL;
+			return *this;
+		}
+
+	bool operator==(const linebreakc_iter<input_t> &v) const
+	{
+		fill();
+		v.fill();
+
+		return buf == NULL && v.buf == NULL;
+	}
+
+	bool operator!=(const linebreakc_iter<input_t> &v) const
+	{
+		return !operator==(v);
+	}
+
+	value_type operator*() const
+	{
+		fill();
+		return buf == NULL ?
+			std::make_pair(UNICODE_LB_MANDATORY,
+				       (unicode_char)0):
+			buf->lb_buf.front();
+	}
+
+	linebreakc_iter<input_t> &operator++()
+	{
+		bufvalue=operator*();
+
+		if (buf)
+			buf->lb_buf.pop_front();
+		return *this;
+	}
+
+	const value_type *operator++(int)
+	{
+		operator++();
+		return &bufvalue;
+	}
+};
+
+
+/*
+** Subclass wordbreak_callback_base, implement operator()(int).
+**
+** Use operator<< or operator()(iterator, iterator) to feed
+** unicode_chars into the wordbreaking algorithm. The subclass receives
+** word flags, as they become available.
+*/
+
+extern "C" int wordbreak_trampoline(int value, void *ptr);
+
+class wordbreak_callback_base {
+
+	unicode_wb_info_t handle;
+
+#if __cplusplus >= 201103L
+ public:
+	wordbreak_callback_base(const wordbreak_callback_base &)=delete;
+
+	wordbreak_callback_base &operator=(const
+					   wordbreak_callback_base &)
+		=delete;
+ private:
+#else
+	wordbreak_callback_base(const wordbreak_callback_base &);
+	/* NOT IMPLEMENTED */
+
+	wordbreak_callback_base &operator=(const
+					   wordbreak_callback_base &);
+	/* NOT IMPLEMENTED */
+#endif
+ public:
+	wordbreak_callback_base();
+	virtual ~wordbreak_callback_base();
+
+	void finish();
+
+	friend int wordbreak_trampoline(int, void *);
+
+	wordbreak_callback_base &operator<<(unicode_char uc);
+
+	template<typename iter_type>
+		wordbreak_callback_base &operator()(iter_type beg_iter,
+						    iter_type end_iter)
+	{
+		while (beg_iter != end_iter)
+			operator<<(*beg_iter++);
+		return *this;
+	}
+
+	wordbreak_callback_base &operator<<(const
+					    std::vector<unicode_char>
+					    &vec)
+	{
+		return operator()(vec.begin(), vec.end());
+	}
+ private:
+	virtual int callback(bool);
+};
+
+/*
+** A C++ wrapper for unicode_wbscan.
+*/
+
+class wordbreakscan {
+
+	unicode_wbscan_info_t handle;
+
+#if __cplusplus >= 201103L
+ public:
+	wordbreakscan(const wordbreakscan &)=delete;
+	wordbreakscan &operator=(const wordbreakscan &)=delete;
+ private:
+#else
+	wordbreakscan(const wordbreakscan &);
+	/* NOT IMPLEMENTED */
+
+	wordbreakscan &operator=(const wordbreakscan &);
+	/* NOT IMPLEMENTED */
+#endif
+ public:
+
+	wordbreakscan();
+	~wordbreakscan();
+
+	bool operator<<(unicode_char uc);
+
+	size_t finish();
+};
+
+//! Convert string in unicode_default_chset() to lowercase
+
+std::string tolower(const std::string &string);
+
+//! Convert string in unicode_default_chset() to uppercase
+
+std::string toupper(const std::string &string);
+
+//! Convert string in the given character set to lowercase
+
+std::string tolower(const std::string &string,
+		    const std::string &charset);
+
+//! Convert string in the given character set to uppercase
+
+std::string toupper(const std::string &string,
+		    const std::string &charset);
+
+//! Convert unicode to lowercase
+
+std::vector<unicode_char> tolower(const std::vector<unicode_char> &u);
+
+//! Convert unicode to uppercase
+
+std::vector<unicode_char> toupper(const std::vector<unicode_char> &u);
+
+#if 0
+{
+#endif
+}
+#endif
+
+#endif
author	Sam Varshavchik	2015-07-31 08:30:25 -0400
committer	Sam Varshavchik	2015-07-31 08:30:25 -0400
commit	84dd6e55bc0069a816d303e51e1f237a36b0c7a5 (patch)
tree	dde23184f469a31a49812771f1a6a0e3563e6386 /unicode/courier-unicode.h.in
parent	228f4c614680f27acceb807074857c01c753e100 (diff)
download	courier-libs-84dd6e55bc0069a816d303e51e1f237a36b0c7a5.tar.bz2