diff options
Diffstat (limited to 'unicode')
| -rw-r--r-- | unicode/Makefile.am | 2 | ||||
| -rw-r--r-- | unicode/book.xml | 21 | ||||
| -rw-r--r-- | unicode/unicode.h | 5 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 22 | 
4 files changed, 47 insertions, 3 deletions
| diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 6a2fa6b..5f4447f 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -45,7 +45,7 @@ update-www-htmlent:  lib_LTLIBRARIES=libunicode.la  include_HEADERS=unicode.h -man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3 +man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3  libunicode_la_SOURCES=unicode.h unicode.c unicodebuf.c \  			unicodecpp.C \ diff --git a/unicode/book.xml b/unicode/book.xml index 88fcf25..006b082 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -1453,13 +1453,22 @@ See COPYING for distribution information.  	<refnamediv>  	  <refname>unicode::iconvert::convert</refname> +	  <refname>unicode::ucs_4</refname> +	  <refname>unicode::ucs_2</refname> +	  <refname>unicode::utf_8</refname> +	  <refname>unicode::iso_8859_1</refname>  	  <refpurpose>unicode character set conversion</refpurpose>  	</refnamediv>  	<refsynopsisdiv>  	  <funcsynopsis> -	    <funcsynopsisinfo>#include <unicode.h></funcsynopsisinfo> +	    <funcsynopsisinfo>#include <unicode.h> + +extern const char unicode::ucs_4[]; +extern const char unicode::ucs_2[]; +extern const char unicode::utf_8[]; +extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	    <funcprototype>                <funcdef>std::string <function>unicode::iconvert::convert</function></funcdef> @@ -1538,6 +1547,16 @@ See COPYING for distribution information.  	    character set that's supported by  	    <citerefentry><refentrytitle>iconv</refentrytitle>  	    <manvolnum>3</manvolnum></citerefentry>. + +	    Use +	    <varname>unicode::ucs_2</varname> and +	    <varname>unicode::ucs_4</varname> to specify the 16 and the 32 bit +	    unicode octet in native byte order. +	    Use +	    <varname>unicode::utf_8</varname> and +	    <varname>unicode::iso_8859_1</varname> to specify these two +	    standard character sets. +  	    The overloaded versions that pass a reference to a  	    <classname>bool</classname> set the flag to <literal>true</literal>  	    if some characters could not be converted. diff --git a/unicode/unicode.h b/unicode/unicode.h index b2ec0ab..bfe1256 100644 --- a/unicode/unicode.h +++ b/unicode/unicode.h @@ -887,6 +887,11 @@ extern size_t unicode_wcwidth(const std::vector<unicode_char> &uc);  namespace unicode {  	/* +	** Various character sets +	*/ +	extern const char ucs_4[], ucs_2[], utf_8[], iso_8859_1[]; + +	/*  	** Interface to iconv.  	**  	** Subclass converted(). Invoke begin(), then operator(), repeatedly, diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 2996c54..cbe8f56 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -1,5 +1,5 @@  /* -** Copyright 2011 Double Precision, Inc. +** Copyright 2011-2014 Double Precision, Inc.  ** See COPYING for distribution information.  **  */ @@ -35,6 +35,26 @@ extern "C" {  } +const char unicode::ucs_4[]= +#if WORDS_BIGENDIAN +	"UCS-4BE" +#else +	"UCS-4LE" +#endif +	; + +const char unicode::ucs_2[]= +#if WORDS_BIGENDIAN +	"UCS-2BE" +#else +	"UCS-2LE" +#endif +	; + +const char unicode::utf_8[]="utf-8"; + +const char unicode::iso_8859_1[]="iso-8859-1"; +  size_t unicode_wcwidth(const std::vector<unicode_char> &uc)  {  	size_t w=0; | 
