summaryrefslogtreecommitdiffstats
path: root/unicode
diff options
context:
space:
mode:
authorSam Varshavchik2014-04-21 22:20:36 -0400
committerSam Varshavchik2014-04-21 22:20:36 -0400
commit7cdedfdfc66f7045e739dd2ec2c33f059612fd86 (patch)
treef8778d49c80e0aa044b615be5287e35870692f85 /unicode
parente10a45421d2b95babe34eb31dbd09bda68a2a590 (diff)
downloadcourier-libs-7cdedfdfc66f7045e739dd2ec2c33f059612fd86.tar.bz2
courier-unicode: Add defs for standard charsets.
Diffstat (limited to 'unicode')
-rw-r--r--unicode/Makefile.am2
-rw-r--r--unicode/book.xml21
-rw-r--r--unicode/unicode.h5
-rw-r--r--unicode/unicodecpp.C22
4 files changed, 47 insertions, 3 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index 6a2fa6b..5f4447f 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -45,7 +45,7 @@ update-www-htmlent:
lib_LTLIBRARIES=libunicode.la
include_HEADERS=unicode.h
-man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3
+man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3
libunicode_la_SOURCES=unicode.h unicode.c unicodebuf.c \
unicodecpp.C \
diff --git a/unicode/book.xml b/unicode/book.xml
index 88fcf25..006b082 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -1453,13 +1453,22 @@ See COPYING for distribution information.
<refnamediv>
<refname>unicode::iconvert::convert</refname>
+ <refname>unicode::ucs_4</refname>
+ <refname>unicode::ucs_2</refname>
+ <refname>unicode::utf_8</refname>
+ <refname>unicode::iso_8859_1</refname>
<refpurpose>unicode character set conversion</refpurpose>
</refnamediv>
<refsynopsisdiv>
<funcsynopsis>
- <funcsynopsisinfo>#include &lt;unicode.h&gt;</funcsynopsisinfo>
+ <funcsynopsisinfo>#include &lt;unicode.h&gt;
+
+extern const char unicode::ucs_4[];
+extern const char unicode::ucs_2[];
+extern const char unicode::utf_8[];
+extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
<funcprototype>
<funcdef>std::string <function>unicode::iconvert::convert</function></funcdef>
@@ -1538,6 +1547,16 @@ See COPYING for distribution information.
character set that's supported by
<citerefentry><refentrytitle>iconv</refentrytitle>
<manvolnum>3</manvolnum></citerefentry>.
+
+ Use
+ <varname>unicode::ucs_2</varname> and
+ <varname>unicode::ucs_4</varname> to specify the 16 and the 32 bit
+ unicode octet in native byte order.
+ Use
+ <varname>unicode::utf_8</varname> and
+ <varname>unicode::iso_8859_1</varname> to specify these two
+ standard character sets.
+
The overloaded versions that pass a reference to a
<classname>bool</classname> set the flag to <literal>true</literal>
if some characters could not be converted.
diff --git a/unicode/unicode.h b/unicode/unicode.h
index b2ec0ab..bfe1256 100644
--- a/unicode/unicode.h
+++ b/unicode/unicode.h
@@ -887,6 +887,11 @@ extern size_t unicode_wcwidth(const std::vector<unicode_char> &uc);
namespace unicode {
/*
+ ** Various character sets
+ */
+ extern const char ucs_4[], ucs_2[], utf_8[], iso_8859_1[];
+
+ /*
** Interface to iconv.
**
** Subclass converted(). Invoke begin(), then operator(), repeatedly,
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index 2996c54..cbe8f56 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -1,5 +1,5 @@
/*
-** Copyright 2011 Double Precision, Inc.
+** Copyright 2011-2014 Double Precision, Inc.
** See COPYING for distribution information.
**
*/
@@ -35,6 +35,26 @@ extern "C" {
}
+const char unicode::ucs_4[]=
+#if WORDS_BIGENDIAN
+ "UCS-4BE"
+#else
+ "UCS-4LE"
+#endif
+ ;
+
+const char unicode::ucs_2[]=
+#if WORDS_BIGENDIAN
+ "UCS-2BE"
+#else
+ "UCS-2LE"
+#endif
+ ;
+
+const char unicode::utf_8[]="utf-8";
+
+const char unicode::iso_8859_1[]="iso-8859-1";
+
size_t unicode_wcwidth(const std::vector<unicode_char> &uc)
{
size_t w=0;