diff options
| author | Sam Varshavchik | 2015-07-28 08:31:30 -0400 |
|---|---|---|
| committer | Sam Varshavchik | 2015-07-28 08:31:30 -0400 |
| commit | f9e2606abe105d7183b69e50da8a156a547d65e0 (patch) | |
| tree | f0856138b2cdb568c56bd14f282476cc43556687 /unicode | |
| parent | bc4028bbe7b3692bd0d8d5309cc425abe57fb9c0 (diff) | |
| download | courier-libs-f9e2606abe105d7183b69e50da8a156a547d65e0.tar.bz2 | |
Add additional tolower/toupper overloads, unicode_locale_charset().
Diffstat (limited to 'unicode')
| -rw-r--r-- | unicode/Makefile.am | 2 | ||||
| -rw-r--r-- | unicode/book.xml | 36 | ||||
| -rw-r--r-- | unicode/configure.ac | 6 | ||||
| -rw-r--r-- | unicode/courier-unicode.h | 22 | ||||
| -rw-r--r-- | unicode/unicode.c | 34 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 45 |
6 files changed, 116 insertions, 29 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am index da71e14..61ee291 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -56,7 +56,7 @@ include_HEADERS=courier-unicode.h \ courier-unicode-categories-tab.h \ courier-unicode-script-tab.h -man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3 +man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_locale_chset.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3 libcourier_unicode_la_SOURCES=courier-unicode.h \ courier-unicode-categories-tab.h \ diff --git a/unicode/book.xml b/unicode/book.xml index f0475a3..3c5d758 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -620,6 +620,7 @@ See COPYING for distribution information. <refnamediv> <refname>unicode_default_chset</refname> + <refname>unicode_locale_chset</refname> <refpurpose>return the system character set name</refpurpose> </refnamediv> @@ -630,6 +631,11 @@ See COPYING for distribution information. <funcdef>const char *<function>unicode_default_chset</function></funcdef> <void /> </funcprototype> + + <funcprototype> + <funcdef>const char *<function>unicode_locale_chset</function></funcdef> + <void /> + </funcprototype> </funcsynopsis> </refsynopsisdiv> <refsect1> @@ -639,6 +645,8 @@ See COPYING for distribution information. system environment character set (usually <quote>nl_langinfo(CODESET)</quote>, or from some suitable environment variable). + <function>unicode_locale_chset</function>() returns the name of the + current application locale's character set. </para> </refsect1> <refsect1> @@ -2367,6 +2375,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int> </funcprototype> <funcprototype> + <funcdef>std::vector<unicode_char> <function>unicode::tolower</function></funcdef> + <paramdef>const std::vector<unicode_char> &<parameter>u</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>void <function>unicode::tolower</function></funcdef> + <paramdef>std::vector<unicode_char> &<parameter>u</parameter></paramdef> + </funcprototype> + + <funcprototype> <funcdef>std::string <function>unicode::toupper</function></funcdef> <paramdef>const std::string &<parameter>string</parameter></paramdef> </funcprototype> @@ -2376,6 +2394,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int> <paramdef>const std::string &<parameter>string</parameter></paramdef> <paramdef>const std::string &<parameter>charset</parameter></paramdef> </funcprototype> + + <funcprototype> + <funcdef>std::vector<unicode_char> <function>unicode::toupper</function></funcdef> + <paramdef>const std::vector<unicode_char> &<parameter>u</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>void <function>unicode::toupper</function></funcdef> + <paramdef>std::vector<unicode_char> &<parameter>u</parameter></paramdef> + </funcprototype> </funcsynopsis> </refsynopsisdiv> @@ -2398,6 +2426,14 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int> then convert it back to the same character set, returning the resulting string. </para> + + <para> + An alternative is to pass a + <classname>std::vector<unicode_char> &</classname> + that gets converted in-place, or a + <classname>const std::vector<unicode_char> &</classname> + that gets converted, and returned. + </para> </refsect1> <refsect1> diff --git a/unicode/configure.ac b/unicode/configure.ac index d04693f..d90dfc9 100644 --- a/unicode/configure.ac +++ b/unicode/configure.ac @@ -29,14 +29,12 @@ dnl Checks for libraries. dnl Checks for header files. -AC_CHECK_HEADERS(stddef.h wchar.h locale.h) +AC_CHECK_HEADERS(stddef.h wchar.h) dnl Checks for typedefs, structures, and compiler characteristics. dnl Checks for library functions. -AC_CHECK_FUNCS(setlocale) - AM_CONDITIONAL(UPDATE_UNICODE, test -f ${srcdir}/UnicodeData.txt) AC_SYS_LARGEFILE @@ -58,8 +56,6 @@ if test "$unicode_with_libcharset" = "yes"; then fi AC_DEFINE_UNQUOTED(UNICODE_USE_LIBCHARSET, 1, [ Set this to 1 to use libcharset library. ]) -else - AM_LANGINFO_CODESET fi save_LIBS="$LIBS" diff --git a/unicode/courier-unicode.h b/unicode/courier-unicode.h index 5c564a8..3d3715c 100644 --- a/unicode/courier-unicode.h +++ b/unicode/courier-unicode.h @@ -33,6 +33,12 @@ typedef uint32_t unicode_char; extern const char *unicode_default_chset(); +/* +** The current locale character set. +*/ + +extern const char *unicode_locale_charset(); + /* Unicode upper/lower/title case conversion functions */ extern unicode_char unicode_uc(unicode_char); @@ -1840,6 +1846,22 @@ std::string tolower(const std::string &string, std::string toupper(const std::string &string, const std::string &charset); +//! Convert unicode to lowercase + +void tolower(std::vector<unicode_char> &u); + +//! Convert unicode to lowercase + +std::vector<unicode_char> tolower(const std::vector<unicode_char> &u); + +//! Convert unicode to uppercase + +void toupper(std::vector<unicode_char> &u); + +//! Convert unicode to uppercase + +std::vector<unicode_char> toupper(const std::vector<unicode_char> &u); + #if 0 { #endif diff --git a/unicode/unicode.c b/unicode/unicode.c index 2dda2f2..551854d 100644 --- a/unicode/unicode.c +++ b/unicode/unicode.c @@ -11,8 +11,6 @@ #include <stdlib.h> #include <iconv.h> #include <errno.h> -#if HAVE_LOCALE_H -#if HAVE_SETLOCALE #include <locale.h> #if USE_LIBCHARSET #if HAVE_LOCALCHARSET_H @@ -20,14 +18,21 @@ #elif HAVE_LIBCHARSET_H #include <libcharset.h> #endif /* HAVE_LOCALCHARSET_H */ -#elif HAVE_LANGINFO_CODESET +#else #include <langinfo.h> #endif /* USE_LIBCHARSET */ -#endif /* HAVE_SETLOCALE */ -#endif /* HAVE_LOCALE_H */ static char default_chset_buf[32]; +const char *unicode_locale_charset() +{ +#if USE_LIBCHARSET + return locale_charset(); +#else + return nl_langinfo(CODESET); +#endif +} + static void init_default_chset() { const char *old_locale=NULL; @@ -42,17 +47,9 @@ static void init_default_chset() if (chset == NULL) { -#if HAVE_LOCALE_H -#if HAVE_SETLOCALE old_locale=setlocale(LC_ALL, ""); locale_cpy=old_locale ? strdup(old_locale):NULL; -#if USE_LIBCHARSET - chset = locale_charset(); -#elif HAVE_LANGINFO_CODESET - chset=nl_langinfo(CODESET); -#endif -#endif -#endif + chset=unicode_locale_charset(); } memset(buf, 0, sizeof(buf)); @@ -94,16 +91,11 @@ static void init_default_chset() memcpy(default_chset_buf, buf, sizeof(buf)); -#if HAVE_LOCALE_H -#if HAVE_SETLOCALE if (locale_cpy) { setlocale(LC_ALL, locale_cpy); free(locale_cpy); } -#endif -#endif - } const char *unicode_default_chset() @@ -427,7 +419,7 @@ static int deinit_toimaputf7(void *ptr, int *errptr) if (rc == 0 && toutf7->utf7encodebuf_cnt > 0) rc=toimaputf7_encode_flushfinal(toutf7); - + free(toutf7); return rc; } @@ -793,7 +785,7 @@ static int init_iconv(struct unicode_convert_iconv *h, } } } - + return 0; } diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index e6b31bd..214eb5d 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -57,6 +57,21 @@ const char unicode::utf_8[]="utf-8"; const char unicode::iso_8859_1[]="iso-8859-1"; +// Initialize unicode_default_chset() at thread startup. + +namespace unicode { + + class init_chset { + public: + init_chset(); + }; +}; + +unicode::init_chset::init_chset() +{ + unicode_default_chset(); +} + size_t unicode_wcwidth(const std::vector<unicode_char> &uc) { size_t w=0; @@ -505,11 +520,24 @@ std::string unicode::tolower(const std::string &string, unicode::iconvert::convert(string, charset, uc); - std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc); + tolower(uc); return unicode::iconvert::convert(uc, charset); } +std::vector<unicode_char> unicode::tolower(const std::vector<unicode_char> &u) +{ + std::vector<unicode_char> copy=u; + + tolower(copy); + return copy; +} + +void unicode::tolower(std::vector<unicode_char> &uc) +{ + std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc); +} + std::string unicode::toupper(const std::string &string) { return toupper(string, unicode_default_chset()); @@ -522,7 +550,20 @@ std::string unicode::toupper(const std::string &string, unicode::iconvert::convert(string, charset, uc); - std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc); + toupper(uc); return unicode::iconvert::convert(uc, charset); } + +std::vector<unicode_char> unicode::toupper(const std::vector<unicode_char> &u) +{ + std::vector<unicode_char> copy=u; + + toupper(copy); + return copy; +} + +void unicode::toupper(std::vector<unicode_char> &uc) +{ + std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc); +} |
