diff options
| -rw-r--r-- | unicode/Makefile.am | 2 | ||||
| -rw-r--r-- | unicode/book.xml | 36 | ||||
| -rw-r--r-- | unicode/configure.ac | 6 | ||||
| -rw-r--r-- | unicode/courier-unicode.h | 22 | ||||
| -rw-r--r-- | unicode/unicode.c | 34 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 45 | 
6 files changed, 116 insertions, 29 deletions
| diff --git a/unicode/Makefile.am b/unicode/Makefile.am index da71e14..61ee291 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -56,7 +56,7 @@ include_HEADERS=courier-unicode.h \  	courier-unicode-categories-tab.h \  	courier-unicode-script-tab.h -man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3 +man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_locale_chset.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3  libcourier_unicode_la_SOURCES=courier-unicode.h \  			courier-unicode-categories-tab.h \ diff --git a/unicode/book.xml b/unicode/book.xml index f0475a3..3c5d758 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -620,6 +620,7 @@ See COPYING for distribution information.  	<refnamediv>  	  <refname>unicode_default_chset</refname> +	  <refname>unicode_locale_chset</refname>  	  <refpurpose>return the system character set name</refpurpose>  	</refnamediv> @@ -630,6 +631,11 @@ See COPYING for distribution information.                <funcdef>const char *<function>unicode_default_chset</function></funcdef>  	      <void />  	    </funcprototype> + +	    <funcprototype> +              <funcdef>const char *<function>unicode_locale_chset</function></funcdef> +	      <void /> +	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv>  	<refsect1> @@ -639,6 +645,8 @@ See COPYING for distribution information.  	    system environment character set (usually  	    <quote>nl_langinfo(CODESET)</quote>, or from some suitable environment  	    variable). +	    <function>unicode_locale_chset</function>() returns the name of the +	    current application locale's character set.  	  </para>  	</refsect1>  	<refsect1> @@ -2367,6 +2375,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>  	    </funcprototype>  	    <funcprototype> +              <funcdef>std::vector<unicode_char> <function>unicode::tolower</function></funcdef> +	      <paramdef>const std::vector<unicode_char> &<parameter>u</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>void <function>unicode::tolower</function></funcdef> +	      <paramdef>std::vector<unicode_char> &<parameter>u</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype>                <funcdef>std::string <function>unicode::toupper</function></funcdef>  	      <paramdef>const std::string &<parameter>string</parameter></paramdef>  	    </funcprototype> @@ -2376,6 +2394,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>  	      <paramdef>const std::string &<parameter>string</parameter></paramdef>  	      <paramdef>const std::string &<parameter>charset</parameter></paramdef>  	    </funcprototype> + +	    <funcprototype> +              <funcdef>std::vector<unicode_char> <function>unicode::toupper</function></funcdef> +	      <paramdef>const std::vector<unicode_char> &<parameter>u</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>void <function>unicode::toupper</function></funcdef> +	      <paramdef>std::vector<unicode_char> &<parameter>u</parameter></paramdef> +	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv> @@ -2398,6 +2426,14 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>  		then convert it back to the same character set, returning  		the resulting string.  	  </para> + +	  <para> +	    An alternative is to pass a +	    <classname>std::vector<unicode_char> &</classname> +	    that gets converted in-place, or a +	    <classname>const std::vector<unicode_char> &</classname> +	    that gets converted, and returned. +	  </para>  	</refsect1>  	<refsect1> diff --git a/unicode/configure.ac b/unicode/configure.ac index d04693f..d90dfc9 100644 --- a/unicode/configure.ac +++ b/unicode/configure.ac @@ -29,14 +29,12 @@ dnl Checks for libraries.  dnl Checks for header files. -AC_CHECK_HEADERS(stddef.h wchar.h locale.h) +AC_CHECK_HEADERS(stddef.h wchar.h)  dnl Checks for typedefs, structures, and compiler characteristics.  dnl Checks for library functions. -AC_CHECK_FUNCS(setlocale) -  AM_CONDITIONAL(UPDATE_UNICODE, test -f ${srcdir}/UnicodeData.txt)  AC_SYS_LARGEFILE @@ -58,8 +56,6 @@ if test "$unicode_with_libcharset" = "yes"; then    fi    AC_DEFINE_UNQUOTED(UNICODE_USE_LIBCHARSET, 1,  	[ Set this to 1 to use libcharset library. ]) -else -  AM_LANGINFO_CODESET  fi  save_LIBS="$LIBS" diff --git a/unicode/courier-unicode.h b/unicode/courier-unicode.h index 5c564a8..3d3715c 100644 --- a/unicode/courier-unicode.h +++ b/unicode/courier-unicode.h @@ -33,6 +33,12 @@ typedef uint32_t unicode_char;  extern const char *unicode_default_chset(); +/* +** The current locale character set. +*/ + +extern const char *unicode_locale_charset(); +  /* Unicode upper/lower/title case conversion functions */  extern unicode_char unicode_uc(unicode_char); @@ -1840,6 +1846,22 @@ std::string tolower(const std::string &string,  std::string toupper(const std::string &string,  		    const std::string &charset); +//! Convert unicode to lowercase + +void tolower(std::vector<unicode_char> &u); + +//! Convert unicode to lowercase + +std::vector<unicode_char> tolower(const std::vector<unicode_char> &u); + +//! Convert unicode to uppercase + +void toupper(std::vector<unicode_char> &u); + +//! Convert unicode to uppercase + +std::vector<unicode_char> toupper(const std::vector<unicode_char> &u); +  #if 0  {  #endif diff --git a/unicode/unicode.c b/unicode/unicode.c index 2dda2f2..551854d 100644 --- a/unicode/unicode.c +++ b/unicode/unicode.c @@ -11,8 +11,6 @@  #include	<stdlib.h>  #include	<iconv.h>  #include	<errno.h> -#if	HAVE_LOCALE_H -#if	HAVE_SETLOCALE  #include	<locale.h>  #if	USE_LIBCHARSET  #if	HAVE_LOCALCHARSET_H @@ -20,14 +18,21 @@  #elif	HAVE_LIBCHARSET_H  #include	<libcharset.h>  #endif	/* HAVE_LOCALCHARSET_H */ -#elif	HAVE_LANGINFO_CODESET +#else  #include	<langinfo.h>  #endif	/* USE_LIBCHARSET */ -#endif	/* HAVE_SETLOCALE */ -#endif	/* HAVE_LOCALE_H */  static char default_chset_buf[32]; +const char *unicode_locale_charset() +{ +#if	USE_LIBCHARSET +	return locale_charset(); +#else +	return nl_langinfo(CODESET); +#endif +} +  static void init_default_chset()  {  	const char *old_locale=NULL; @@ -42,17 +47,9 @@ static void init_default_chset()  	if (chset == NULL)  	{ -#if	HAVE_LOCALE_H -#if	HAVE_SETLOCALE  		old_locale=setlocale(LC_ALL, "");  		locale_cpy=old_locale ? strdup(old_locale):NULL; -#if	USE_LIBCHARSET -		chset = locale_charset(); -#elif	HAVE_LANGINFO_CODESET -		chset=nl_langinfo(CODESET); -#endif -#endif -#endif +		chset=unicode_locale_charset();  	}  	memset(buf, 0, sizeof(buf)); @@ -94,16 +91,11 @@ static void init_default_chset()  	memcpy(default_chset_buf, buf, sizeof(buf)); -#if	HAVE_LOCALE_H -#if	HAVE_SETLOCALE  	if (locale_cpy)  	{  		setlocale(LC_ALL, locale_cpy);  		free(locale_cpy);  	} -#endif -#endif -  }  const char *unicode_default_chset() @@ -427,7 +419,7 @@ static int deinit_toimaputf7(void *ptr, int *errptr)  	if (rc == 0 && toutf7->utf7encodebuf_cnt > 0)  		rc=toimaputf7_encode_flushfinal(toutf7); -			 +  	free(toutf7);  	return rc;  } @@ -793,7 +785,7 @@ static int init_iconv(struct unicode_convert_iconv *h,  			}  		}  	} -					 +  	return 0;  } diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index e6b31bd..214eb5d 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -57,6 +57,21 @@ const char unicode::utf_8[]="utf-8";  const char unicode::iso_8859_1[]="iso-8859-1"; +// Initialize unicode_default_chset() at thread startup. + +namespace unicode { + +	class init_chset { +	public: +		init_chset(); +	}; +}; + +unicode::init_chset::init_chset() +{ +	unicode_default_chset(); +} +  size_t unicode_wcwidth(const std::vector<unicode_char> &uc)  {  	size_t w=0; @@ -505,11 +520,24 @@ std::string unicode::tolower(const std::string &string,  	unicode::iconvert::convert(string, charset, uc); -	std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc); +	tolower(uc);  	return unicode::iconvert::convert(uc, charset);  } +std::vector<unicode_char> unicode::tolower(const std::vector<unicode_char> &u) +{ +	std::vector<unicode_char> copy=u; + +	tolower(copy); +	return copy; +} + +void unicode::tolower(std::vector<unicode_char> &uc) +{ +	std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc); +} +  std::string unicode::toupper(const std::string &string)  {  	return toupper(string, unicode_default_chset()); @@ -522,7 +550,20 @@ std::string unicode::toupper(const std::string &string,  	unicode::iconvert::convert(string, charset, uc); -	std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc); +	toupper(uc);  	return unicode::iconvert::convert(uc, charset);  } + +std::vector<unicode_char> unicode::toupper(const std::vector<unicode_char> &u) +{ +	std::vector<unicode_char> copy=u; + +	toupper(copy); +	return copy; +} + +void unicode::toupper(std::vector<unicode_char> &uc) +{ +	std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc); +} | 
