summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--unicode/Makefile.am2
-rw-r--r--unicode/book.xml36
-rw-r--r--unicode/configure.ac6
-rw-r--r--unicode/courier-unicode.h22
-rw-r--r--unicode/unicode.c34
-rw-r--r--unicode/unicodecpp.C45
6 files changed, 116 insertions, 29 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index da71e14..61ee291 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -56,7 +56,7 @@ include_HEADERS=courier-unicode.h \
courier-unicode-categories-tab.h \
courier-unicode-script-tab.h
-man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3
+man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_locale_chset.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3
libcourier_unicode_la_SOURCES=courier-unicode.h \
courier-unicode-categories-tab.h \
diff --git a/unicode/book.xml b/unicode/book.xml
index f0475a3..3c5d758 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -620,6 +620,7 @@ See COPYING for distribution information.
<refnamediv>
<refname>unicode_default_chset</refname>
+ <refname>unicode_locale_chset</refname>
<refpurpose>return the system character set name</refpurpose>
</refnamediv>
@@ -630,6 +631,11 @@ See COPYING for distribution information.
<funcdef>const char *<function>unicode_default_chset</function></funcdef>
<void />
</funcprototype>
+
+ <funcprototype>
+ <funcdef>const char *<function>unicode_locale_chset</function></funcdef>
+ <void />
+ </funcprototype>
</funcsynopsis>
</refsynopsisdiv>
<refsect1>
@@ -639,6 +645,8 @@ See COPYING for distribution information.
system environment character set (usually
<quote>nl_langinfo(CODESET)</quote>, or from some suitable environment
variable).
+ <function>unicode_locale_chset</function>() returns the name of the
+ current application locale's character set.
</para>
</refsect1>
<refsect1>
@@ -2367,6 +2375,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
</funcprototype>
<funcprototype>
+ <funcdef>std::vector&lt;unicode_char&gt; <function>unicode::tolower</function></funcdef>
+ <paramdef>const std::vector&lt;unicode_char&gt; &amp;<parameter>u</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>void <function>unicode::tolower</function></funcdef>
+ <paramdef>std::vector&lt;unicode_char&gt; &amp;<parameter>u</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
<funcdef>std::string <function>unicode::toupper</function></funcdef>
<paramdef>const std::string &amp;<parameter>string</parameter></paramdef>
</funcprototype>
@@ -2376,6 +2394,16 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
<paramdef>const std::string &amp;<parameter>string</parameter></paramdef>
<paramdef>const std::string &amp;<parameter>charset</parameter></paramdef>
</funcprototype>
+
+ <funcprototype>
+ <funcdef>std::vector&lt;unicode_char&gt; <function>unicode::toupper</function></funcdef>
+ <paramdef>const std::vector&lt;unicode_char&gt; &amp;<parameter>u</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>void <function>unicode::toupper</function></funcdef>
+ <paramdef>std::vector&lt;unicode_char&gt; &amp;<parameter>u</parameter></paramdef>
+ </funcprototype>
</funcsynopsis>
</refsynopsisdiv>
@@ -2398,6 +2426,14 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
then convert it back to the same character set, returning
the resulting string.
</para>
+
+ <para>
+ An alternative is to pass a
+ <classname>std::vector&lt;unicode_char&gt; &amp;</classname>
+ that gets converted in-place, or a
+ <classname>const std::vector&lt;unicode_char&gt; &amp;</classname>
+ that gets converted, and returned.
+ </para>
</refsect1>
<refsect1>
diff --git a/unicode/configure.ac b/unicode/configure.ac
index d04693f..d90dfc9 100644
--- a/unicode/configure.ac
+++ b/unicode/configure.ac
@@ -29,14 +29,12 @@ dnl Checks for libraries.
dnl Checks for header files.
-AC_CHECK_HEADERS(stddef.h wchar.h locale.h)
+AC_CHECK_HEADERS(stddef.h wchar.h)
dnl Checks for typedefs, structures, and compiler characteristics.
dnl Checks for library functions.
-AC_CHECK_FUNCS(setlocale)
-
AM_CONDITIONAL(UPDATE_UNICODE, test -f ${srcdir}/UnicodeData.txt)
AC_SYS_LARGEFILE
@@ -58,8 +56,6 @@ if test "$unicode_with_libcharset" = "yes"; then
fi
AC_DEFINE_UNQUOTED(UNICODE_USE_LIBCHARSET, 1,
[ Set this to 1 to use libcharset library. ])
-else
- AM_LANGINFO_CODESET
fi
save_LIBS="$LIBS"
diff --git a/unicode/courier-unicode.h b/unicode/courier-unicode.h
index 5c564a8..3d3715c 100644
--- a/unicode/courier-unicode.h
+++ b/unicode/courier-unicode.h
@@ -33,6 +33,12 @@ typedef uint32_t unicode_char;
extern const char *unicode_default_chset();
+/*
+** The current locale character set.
+*/
+
+extern const char *unicode_locale_charset();
+
/* Unicode upper/lower/title case conversion functions */
extern unicode_char unicode_uc(unicode_char);
@@ -1840,6 +1846,22 @@ std::string tolower(const std::string &string,
std::string toupper(const std::string &string,
const std::string &charset);
+//! Convert unicode to lowercase
+
+void tolower(std::vector<unicode_char> &u);
+
+//! Convert unicode to lowercase
+
+std::vector<unicode_char> tolower(const std::vector<unicode_char> &u);
+
+//! Convert unicode to uppercase
+
+void toupper(std::vector<unicode_char> &u);
+
+//! Convert unicode to uppercase
+
+std::vector<unicode_char> toupper(const std::vector<unicode_char> &u);
+
#if 0
{
#endif
diff --git a/unicode/unicode.c b/unicode/unicode.c
index 2dda2f2..551854d 100644
--- a/unicode/unicode.c
+++ b/unicode/unicode.c
@@ -11,8 +11,6 @@
#include <stdlib.h>
#include <iconv.h>
#include <errno.h>
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
#include <locale.h>
#if USE_LIBCHARSET
#if HAVE_LOCALCHARSET_H
@@ -20,14 +18,21 @@
#elif HAVE_LIBCHARSET_H
#include <libcharset.h>
#endif /* HAVE_LOCALCHARSET_H */
-#elif HAVE_LANGINFO_CODESET
+#else
#include <langinfo.h>
#endif /* USE_LIBCHARSET */
-#endif /* HAVE_SETLOCALE */
-#endif /* HAVE_LOCALE_H */
static char default_chset_buf[32];
+const char *unicode_locale_charset()
+{
+#if USE_LIBCHARSET
+ return locale_charset();
+#else
+ return nl_langinfo(CODESET);
+#endif
+}
+
static void init_default_chset()
{
const char *old_locale=NULL;
@@ -42,17 +47,9 @@ static void init_default_chset()
if (chset == NULL)
{
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
old_locale=setlocale(LC_ALL, "");
locale_cpy=old_locale ? strdup(old_locale):NULL;
-#if USE_LIBCHARSET
- chset = locale_charset();
-#elif HAVE_LANGINFO_CODESET
- chset=nl_langinfo(CODESET);
-#endif
-#endif
-#endif
+ chset=unicode_locale_charset();
}
memset(buf, 0, sizeof(buf));
@@ -94,16 +91,11 @@ static void init_default_chset()
memcpy(default_chset_buf, buf, sizeof(buf));
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
if (locale_cpy)
{
setlocale(LC_ALL, locale_cpy);
free(locale_cpy);
}
-#endif
-#endif
-
}
const char *unicode_default_chset()
@@ -427,7 +419,7 @@ static int deinit_toimaputf7(void *ptr, int *errptr)
if (rc == 0 && toutf7->utf7encodebuf_cnt > 0)
rc=toimaputf7_encode_flushfinal(toutf7);
-
+
free(toutf7);
return rc;
}
@@ -793,7 +785,7 @@ static int init_iconv(struct unicode_convert_iconv *h,
}
}
}
-
+
return 0;
}
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index e6b31bd..214eb5d 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -57,6 +57,21 @@ const char unicode::utf_8[]="utf-8";
const char unicode::iso_8859_1[]="iso-8859-1";
+// Initialize unicode_default_chset() at thread startup.
+
+namespace unicode {
+
+ class init_chset {
+ public:
+ init_chset();
+ };
+};
+
+unicode::init_chset::init_chset()
+{
+ unicode_default_chset();
+}
+
size_t unicode_wcwidth(const std::vector<unicode_char> &uc)
{
size_t w=0;
@@ -505,11 +520,24 @@ std::string unicode::tolower(const std::string &string,
unicode::iconvert::convert(string, charset, uc);
- std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc);
+ tolower(uc);
return unicode::iconvert::convert(uc, charset);
}
+std::vector<unicode_char> unicode::tolower(const std::vector<unicode_char> &u)
+{
+ std::vector<unicode_char> copy=u;
+
+ tolower(copy);
+ return copy;
+}
+
+void unicode::tolower(std::vector<unicode_char> &uc)
+{
+ std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc);
+}
+
std::string unicode::toupper(const std::string &string)
{
return toupper(string, unicode_default_chset());
@@ -522,7 +550,20 @@ std::string unicode::toupper(const std::string &string,
unicode::iconvert::convert(string, charset, uc);
- std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc);
+ toupper(uc);
return unicode::iconvert::convert(uc, charset);
}
+
+std::vector<unicode_char> unicode::toupper(const std::vector<unicode_char> &u)
+{
+ std::vector<unicode_char> copy=u;
+
+ toupper(copy);
+ return copy;
+}
+
+void unicode::toupper(std::vector<unicode_char> &uc)
+{
+ std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc);
+}