From f9e2606abe105d7183b69e50da8a156a547d65e0 Mon Sep 17 00:00:00 2001
From: Sam Varshavchik
Date: Tue, 28 Jul 2015 08:31:30 -0400
Subject: Add additional tolower/toupper overloads, unicode_locale_charset().
---
unicode/Makefile.am | 2 +-
unicode/book.xml | 36 ++++++++++++++++++++++++++++++++++++
unicode/configure.ac | 6 +-----
unicode/courier-unicode.h | 22 ++++++++++++++++++++++
unicode/unicode.c | 34 +++++++++++++---------------------
unicode/unicodecpp.C | 45 +++++++++++++++++++++++++++++++++++++++++++--
6 files changed, 116 insertions(+), 29 deletions(-)
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index da71e14..61ee291 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -56,7 +56,7 @@ include_HEADERS=courier-unicode.h \
courier-unicode-categories-tab.h \
courier-unicode-script-tab.h
-man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3
+man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_locale_chset.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3
libcourier_unicode_la_SOURCES=courier-unicode.h \
courier-unicode-categories-tab.h \
diff --git a/unicode/book.xml b/unicode/book.xml
index f0475a3..3c5d758 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -620,6 +620,7 @@ See COPYING for distribution information.
unicode_default_chset
+ unicode_locale_chset
return the system character set name
@@ -630,6 +631,11 @@ See COPYING for distribution information.
const char *unicode_default_chset
+
+
+ const char *unicode_locale_chset
+
+
@@ -639,6 +645,8 @@ See COPYING for distribution information.
system environment character set (usually
nl_langinfo(CODESET)
, or from some suitable environment
variable).
+ unicode_locale_chset() returns the name of the
+ current application locale's character set.
@@ -2366,6 +2374,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>
const std::string &charset
+
+ std::vector<unicode_char> unicode::tolower
+ const std::vector<unicode_char> &u
+
+
+
+ void unicode::tolower
+ std::vector<unicode_char> &u
+
+
std::string unicode::toupper
const std::string &string
@@ -2376,6 +2394,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>
const std::string &string
const std::string &charset
+
+
+ std::vector<unicode_char> unicode::toupper
+ const std::vector<unicode_char> &u
+
+
+
+ void unicode::toupper
+ std::vector<unicode_char> &u
+
@@ -2398,6 +2426,14 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>
then convert it back to the same character set, returning
the resulting string.
+
+
+ An alternative is to pass a
+ std::vector<unicode_char> &
+ that gets converted in-place, or a
+ const std::vector<unicode_char> &
+ that gets converted, and returned.
+
diff --git a/unicode/configure.ac b/unicode/configure.ac
index d04693f..d90dfc9 100644
--- a/unicode/configure.ac
+++ b/unicode/configure.ac
@@ -29,14 +29,12 @@ dnl Checks for libraries.
dnl Checks for header files.
-AC_CHECK_HEADERS(stddef.h wchar.h locale.h)
+AC_CHECK_HEADERS(stddef.h wchar.h)
dnl Checks for typedefs, structures, and compiler characteristics.
dnl Checks for library functions.
-AC_CHECK_FUNCS(setlocale)
-
AM_CONDITIONAL(UPDATE_UNICODE, test -f ${srcdir}/UnicodeData.txt)
AC_SYS_LARGEFILE
@@ -58,8 +56,6 @@ if test "$unicode_with_libcharset" = "yes"; then
fi
AC_DEFINE_UNQUOTED(UNICODE_USE_LIBCHARSET, 1,
[ Set this to 1 to use libcharset library. ])
-else
- AM_LANGINFO_CODESET
fi
save_LIBS="$LIBS"
diff --git a/unicode/courier-unicode.h b/unicode/courier-unicode.h
index 5c564a8..3d3715c 100644
--- a/unicode/courier-unicode.h
+++ b/unicode/courier-unicode.h
@@ -33,6 +33,12 @@ typedef uint32_t unicode_char;
extern const char *unicode_default_chset();
+/*
+** The current locale character set.
+*/
+
+extern const char *unicode_locale_charset();
+
/* Unicode upper/lower/title case conversion functions */
extern unicode_char unicode_uc(unicode_char);
@@ -1840,6 +1846,22 @@ std::string tolower(const std::string &string,
std::string toupper(const std::string &string,
const std::string &charset);
+//! Convert unicode to lowercase
+
+void tolower(std::vector &u);
+
+//! Convert unicode to lowercase
+
+std::vector tolower(const std::vector &u);
+
+//! Convert unicode to uppercase
+
+void toupper(std::vector &u);
+
+//! Convert unicode to uppercase
+
+std::vector toupper(const std::vector &u);
+
#if 0
{
#endif
diff --git a/unicode/unicode.c b/unicode/unicode.c
index 2dda2f2..551854d 100644
--- a/unicode/unicode.c
+++ b/unicode/unicode.c
@@ -11,8 +11,6 @@
#include
#include
#include
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
#include
#if USE_LIBCHARSET
#if HAVE_LOCALCHARSET_H
@@ -20,14 +18,21 @@
#elif HAVE_LIBCHARSET_H
#include
#endif /* HAVE_LOCALCHARSET_H */
-#elif HAVE_LANGINFO_CODESET
+#else
#include
#endif /* USE_LIBCHARSET */
-#endif /* HAVE_SETLOCALE */
-#endif /* HAVE_LOCALE_H */
static char default_chset_buf[32];
+const char *unicode_locale_charset()
+{
+#if USE_LIBCHARSET
+ return locale_charset();
+#else
+ return nl_langinfo(CODESET);
+#endif
+}
+
static void init_default_chset()
{
const char *old_locale=NULL;
@@ -42,17 +47,9 @@ static void init_default_chset()
if (chset == NULL)
{
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
old_locale=setlocale(LC_ALL, "");
locale_cpy=old_locale ? strdup(old_locale):NULL;
-#if USE_LIBCHARSET
- chset = locale_charset();
-#elif HAVE_LANGINFO_CODESET
- chset=nl_langinfo(CODESET);
-#endif
-#endif
-#endif
+ chset=unicode_locale_charset();
}
memset(buf, 0, sizeof(buf));
@@ -94,16 +91,11 @@ static void init_default_chset()
memcpy(default_chset_buf, buf, sizeof(buf));
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
if (locale_cpy)
{
setlocale(LC_ALL, locale_cpy);
free(locale_cpy);
}
-#endif
-#endif
-
}
const char *unicode_default_chset()
@@ -427,7 +419,7 @@ static int deinit_toimaputf7(void *ptr, int *errptr)
if (rc == 0 && toutf7->utf7encodebuf_cnt > 0)
rc=toimaputf7_encode_flushfinal(toutf7);
-
+
free(toutf7);
return rc;
}
@@ -793,7 +785,7 @@ static int init_iconv(struct unicode_convert_iconv *h,
}
}
}
-
+
return 0;
}
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index e6b31bd..214eb5d 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -57,6 +57,21 @@ const char unicode::utf_8[]="utf-8";
const char unicode::iso_8859_1[]="iso-8859-1";
+// Initialize unicode_default_chset() at thread startup.
+
+namespace unicode {
+
+ class init_chset {
+ public:
+ init_chset();
+ };
+};
+
+unicode::init_chset::init_chset()
+{
+ unicode_default_chset();
+}
+
size_t unicode_wcwidth(const std::vector &uc)
{
size_t w=0;
@@ -505,11 +520,24 @@ std::string unicode::tolower(const std::string &string,
unicode::iconvert::convert(string, charset, uc);
- std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc);
+ tolower(uc);
return unicode::iconvert::convert(uc, charset);
}
+std::vector unicode::tolower(const std::vector &u)
+{
+ std::vector copy=u;
+
+ tolower(copy);
+ return copy;
+}
+
+void unicode::tolower(std::vector &uc)
+{
+ std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc);
+}
+
std::string unicode::toupper(const std::string &string)
{
return toupper(string, unicode_default_chset());
@@ -522,7 +550,20 @@ std::string unicode::toupper(const std::string &string,
unicode::iconvert::convert(string, charset, uc);
- std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc);
+ toupper(uc);
return unicode::iconvert::convert(uc, charset);
}
+
+std::vector unicode::toupper(const std::vector &u)
+{
+ std::vector copy=u;
+
+ toupper(copy);
+ return copy;
+}
+
+void unicode::toupper(std::vector &uc)
+{
+ std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc);
+}
--
cgit v1.2.3