diff options
| author | Sam Varshavchik | 2015-07-27 08:33:57 -0400 |
|---|---|---|
| committer | Sam Varshavchik | 2015-07-27 08:33:57 -0400 |
| commit | bc4028bbe7b3692bd0d8d5309cc425abe57fb9c0 (patch) | |
| tree | 50e5120dc3f531986191cae69921925d3323fdea /unicode | |
| parent | 9827d1dcf469fd4d715ad115a573c7c00218dd01 (diff) | |
| download | courier-libs-bc4028bbe7b3692bd0d8d5309cc425abe57fb9c0.tar.bz2 | |
Add tolower, toupper.
Diffstat (limited to 'unicode')
| -rw-r--r-- | unicode/Makefile.am | 2 | ||||
| -rw-r--r-- | unicode/README | 2 | ||||
| -rw-r--r-- | unicode/book.xml | 93 | ||||
| -rw-r--r-- | unicode/courier-unicode.h | 1253 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 36 |
5 files changed, 770 insertions, 616 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 8729714..da71e14 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -56,7 +56,7 @@ include_HEADERS=courier-unicode.h \ courier-unicode-categories-tab.h \ courier-unicode-script-tab.h -man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3 +man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3 libcourier_unicode_la_SOURCES=courier-unicode.h \ courier-unicode-categories-tab.h \ diff --git a/unicode/README b/unicode/README index 09b7438..47f2441 100644 --- a/unicode/README +++ b/unicode/README @@ -37,7 +37,7 @@ Courier Unicode Library a unicode character. Also, an adaptation of the iconv(3) API for this unicode library. - * Look up Unicode script property. + * Look up the Unicode script property. * Look up the category property. diff --git a/unicode/book.xml b/unicode/book.xml index 17d63ce..f0475a3 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -202,6 +202,9 @@ See COPYING for distribution information. <link linkend="unicode__iconvert__tou"> <citerefentry><refentrytitle>unicode::iconvert::tou</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, + <link linkend="unicode__tolower"> + <citerefentry><refentrytitle>unicode::tolower</refentrytitle> + <manvolnum>3</manvolnum></citerefentry></link>, <link linkend="unicode__linebreak"> <citerefentry><refentrytitle>unicode::linebreak</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, @@ -2329,6 +2332,96 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int> </refsect1> </refentry> + <refentry id="unicode__tolower"> + <info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> + + <refmeta> + <refentrytitle>unicode::tolower</refentrytitle> + <manvolnum>3</manvolnum> + </refmeta> + + <refnamediv> + <refname>unicode::tolower</refname> + <refname>unicode::toupper</refname> + <refpurpose>unicode version of + <citerefentry><refentrytitle>tolower</refentrytitle> + <manvolnum>3</manvolnum></citerefentry> + and + <citerefentry><refentrytitle>toupper</refentrytitle> + <manvolnum>3</manvolnum></citerefentry> + </refpurpose> + </refnamediv> + + <refsynopsisdiv> + <funcsynopsis> + <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> + <funcprototype> + <funcdef>std::string <function>unicode::tolower</function></funcdef> + <paramdef>const std::string &<parameter>string</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>std::string <function>unicode::tolower</function></funcdef> + <paramdef>const std::string &<parameter>string</parameter></paramdef> + <paramdef>const std::string &<parameter>charset</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>std::string <function>unicode::toupper</function></funcdef> + <paramdef>const std::string &<parameter>string</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>std::string <function>unicode::toupper</function></funcdef> + <paramdef>const std::string &<parameter>string</parameter></paramdef> + <paramdef>const std::string &<parameter>charset</parameter></paramdef> + </funcprototype> + </funcsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>DESCRIPTION</title> + + <para> + These functions convert the <replaceable>string</replaceable> + parameter, in <replaceable>charset</replaceable> or + <link linkend="unicode_default_chset"> + <citerefentry><refentrytitle>unicode_default_chset</refentrytitle> + <manvolnum>3</manvolnum></citerefentry></link>, + to unicode, replace each character with + <link linkend="unicode_uc"> + <citerefentry><refentrytitle>unicode_lc</refentrytitle> + <manvolnum>3</manvolnum></citerefentry></link> or + <link linkend="unicode_uc"> + <citerefentry><refentrytitle>unicode_uc</refentrytitle> + <manvolnum>3</manvolnum></citerefentry></link>, + then convert it back to the same character set, returning + the resulting string. + </para> + </refsect1> + + <refsect1> + <title>SEE ALSO</title> + <para> + <link linkend="courier-unicode"> + <citerefentry> + <refentrytitle>courier-unicode</refentrytitle> + <manvolnum>7</manvolnum></citerefentry></link>. + </para> + </refsect1> + </refentry> + + + + + + + + + + + + <refentry id="unicode__wordbreak"> <info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> diff --git a/unicode/courier-unicode.h b/unicode/courier-unicode.h index a84e230..5c564a8 100644 --- a/unicode/courier-unicode.h +++ b/unicode/courier-unicode.h @@ -973,524 +973,528 @@ extern size_t unicode_wcwidth(const std::vector<unicode_char> &uc); namespace unicode { - /* - ** Various character sets - */ - extern const char ucs_4[], ucs_2[], utf_8[], iso_8859_1[]; +#if 0 +}; +#endif - /* - ** Interface to iconv. - ** - ** Subclass converted(). Invoke begin(), then operator(), repeatedly, - ** then end(). - ** - ** converted() receives the converted text. - */ +/* +** Various character sets +*/ +extern const char ucs_4[], ucs_2[], utf_8[], iso_8859_1[]; - class iconvert { +/* +** Interface to iconv. +** +** Subclass converted(). Invoke begin(), then operator(), repeatedly, +** then end(). +** +** converted() receives the converted text. +*/ - unicode_convert_handle_t handle; +class iconvert { - public: - iconvert(); - virtual ~iconvert(); + unicode_convert_handle_t handle; - /* Start conversion. - ** Returns false if the requested conversion cannot be done. - **/ + public: + iconvert(); + virtual ~iconvert(); - bool begin(/* Convert from */ - const std::string &src_chset, + /* Start conversion. + ** Returns false if the requested conversion cannot be done. + **/ - /* Convert to */ - const std::string &dst_chset); + bool begin(/* Convert from */ + const std::string &src_chset, - /* Feed iconv(3). Returns false if the conversion was aborted. - */ + /* Convert to */ + const std::string &dst_chset); - bool operator()(const char *, size_t); + /* Feed iconv(3). Returns false if the conversion was aborted. + */ - bool operator()(const unicode_char *, size_t); + bool operator()(const char *, size_t); - /* - ** Get the results here. If the subclass returns a non-0 - ** value, the conversion is aborted. - */ + bool operator()(const unicode_char *, size_t); - virtual int converted(const char *, size_t); + /* + ** Get the results here. If the subclass returns a non-0 + ** value, the conversion is aborted. + */ - /* - ** End of conversion. - ** - ** Returns true if all calls to converted() returned 0, - ** false if the conversion was aborted. - ** - ** errflag is set to true if there was a character that could - ** not be converted, and passed to converted(). - */ + virtual int converted(const char *, size_t); - bool end(bool &errflag) - { - return end(&errflag); - } + /* + ** End of conversion. + ** + ** Returns true if all calls to converted() returned 0, + ** false if the conversion was aborted. + ** + ** errflag is set to true if there was a character that could + ** not be converted, and passed to converted(). + */ - bool end() - { - return end(NULL); - } + bool end(bool &errflag) + { + return end(&errflag); + } - /* Convert between two different charsets */ + bool end() + { + return end(NULL); + } - static std::string convert(const std::string &text, - const std::string &charset, - const std::string &dstcharset, - bool &errflag); + /* Convert between two different charsets */ - /* Convert between two different charsets */ + static std::string convert(const std::string &text, + const std::string &charset, + const std::string &dstcharset, + bool &errflag); - static std::string convert(const std::string &text, - const std::string &charset, - const std::string &dstcharset) - { - bool dummy; + /* Convert between two different charsets */ - return convert(text, charset, dstcharset, dummy); - } + static std::string convert(const std::string &text, + const std::string &charset, + const std::string &dstcharset) + { + bool dummy; - /* Convert from unicode to a charset */ + return convert(text, charset, dstcharset, dummy); + } - static std::string convert(const std::vector<unicode_char> &uc, - const std::string &dstcharset, - bool &errflag); + /* Convert from unicode to a charset */ - /* Convert from unicode to a charset */ + static std::string convert(const std::vector<unicode_char> &uc, + const std::string &dstcharset, + bool &errflag); - static std::string convert(const std::vector<unicode_char> &uc, - const std::string &dstcharset) - { - bool dummy; + /* Convert from unicode to a charset */ - return convert(uc, dstcharset, dummy); - } + static std::string convert(const std::vector<unicode_char> &uc, + const std::string &dstcharset) + { + bool dummy; - /* Convert charset to unicode */ + return convert(uc, dstcharset, dummy); + } - static bool convert(const std::string &text, - const std::string &charset, - std::vector<unicode_char> &uc); + /* Convert charset to unicode */ + static bool convert(const std::string &text, + const std::string &charset, + std::vector<unicode_char> &uc); - /* Convert to upper/lower/title case */ - static std::string - convert_tocase(/* Text string */ - const std::string &text, + /* Convert to upper/lower/title case */ - /* Its charset */ - const std::string &charset, + static std::string + convert_tocase(/* Text string */ + const std::string &text, - /* First character: unicode_uc, unicode_lc, or unicode_tc */ - unicode_char (*first_char_func)(unicode_char), + /* Its charset */ + const std::string &charset, - /* If not NULL, second and subsequent chars */ - unicode_char (*char_func)(unicode_char) - =NULL) - { - bool dummy; + /* First character: unicode_uc, unicode_lc, or unicode_tc */ + unicode_char (*first_char_func)(unicode_char), - return convert_tocase(text, charset, dummy, - first_char_func, - char_func); - } + /* If not NULL, second and subsequent chars */ + unicode_char (*char_func)(unicode_char) + =NULL) + { + bool dummy; - /* Convert to upper/lower/title case */ + return convert_tocase(text, charset, dummy, + first_char_func, + char_func); + } - static std::string - convert_tocase(/* Text string */ - const std::string &text, + /* Convert to upper/lower/title case */ - /* Its charset */ - const std::string &charset, + static std::string + convert_tocase(/* Text string */ + const std::string &text, - /* Set if there's a conversion error */ - bool &err, + /* Its charset */ + const std::string &charset, - /* First character: unicode_uc, unicode_lc, or unicode_tc */ - unicode_char (*first_char_func)(unicode_char), + /* Set if there's a conversion error */ + bool &err, - /* If not NULL, second and subsequent chars */ - unicode_char (*char_func)(unicode_char) - =NULL); - private: - bool end(bool *); + /* First character: unicode_uc, unicode_lc, or unicode_tc */ + unicode_char (*first_char_func)(unicode_char), - public: - class tou; - class fromu; - }; + /* If not NULL, second and subsequent chars */ + unicode_char (*char_func)(unicode_char) + =NULL); + private: + bool end(bool *); - /* Convert output of iconvert to unicode_chars. */ + public: + class tou; + class fromu; +}; - class iconvert::tou : public iconvert { +/* Convert output of iconvert to unicode_chars. */ - public: - bool begin(const std::string &chset); +class iconvert::tou : public iconvert { - virtual int converted(const unicode_char *, size_t); + public: + bool begin(const std::string &chset); - using iconvert::operator(); - private: - int converted(const char *ptr, size_t cnt); + virtual int converted(const unicode_char *, size_t); - public: - template<typename iter_t> class to_iter_class; + using iconvert::operator(); + private: + int converted(const char *ptr, size_t cnt); - template<typename input_iter_t, - typename output_iter_t> - static output_iter_t convert(input_iter_t from_iter, - input_iter_t to_iter, - const std::string &chset, - bool &flag, - output_iter_t out_iter); + public: + template<typename iter_t> class to_iter_class; - template<typename input_iter_t> - static bool convert(input_iter_t from_iter, - input_iter_t to_iter, - const std::string &chset, - std::vector<unicode_char> &out_buf) - { - bool flag; + template<typename input_iter_t, + typename output_iter_t> + static output_iter_t convert(input_iter_t from_iter, + input_iter_t to_iter, + const std::string &chset, + bool &flag, + output_iter_t out_iter); + + template<typename input_iter_t> + static bool convert(input_iter_t from_iter, + input_iter_t to_iter, + const std::string &chset, + std::vector<unicode_char> &out_buf) + { + bool flag; - out_buf.clear(); - std::back_insert_iterator<std::vector<unicode_char> > - insert_iter(out_buf); + out_buf.clear(); + std::back_insert_iterator<std::vector<unicode_char> > + insert_iter(out_buf); - convert(from_iter, to_iter, chset, flag, insert_iter); + convert(from_iter, to_iter, chset, flag, insert_iter); - return flag; - } + return flag; + } - static std::pair<std::vector<unicode_char>, bool> - convert(const std::string &str, - const std::string &chset); - }; + static std::pair<std::vector<unicode_char>, bool> + convert(const std::string &str, + const std::string &chset); +}; - /* Helper class that saves unicode output into an output iterator */ +/* Helper class that saves unicode output into an output iterator */ - template<typename iter_t> - class iconvert::tou::to_iter_class : public iconvert::tou { +template<typename iter_t> +class iconvert::tou::to_iter_class : public iconvert::tou { - iter_t iter; - public: + iter_t iter; + public: - to_iter_class(iter_t iterValue) - : iter(iterValue) {} + to_iter_class(iter_t iterValue) + : iter(iterValue) {} - using tou::operator(); + using tou::operator(); - operator iter_t() const { return iter; } + operator iter_t() const { return iter; } - private: - int converted(const unicode_char *ptr, size_t cnt) + private: + int converted(const unicode_char *ptr, size_t cnt) + { + while (cnt) { - while (cnt) - { - *iter=*ptr; + *iter=*ptr; - ++iter; - ++ptr; - --cnt; - } - return 0; + ++iter; + ++ptr; + --cnt; } - }; - - template<typename input_iter_t, - typename output_iter_t> - output_iter_t iconvert::tou::convert(input_iter_t from_iter, - input_iter_t to_iter, - const std::string &chset, - bool &flag, - output_iter_t out_iter) - { - class to_iter_class<output_iter_t> out(out_iter); - - if (!out.begin(chset)) - return out; + return 0; + } +}; - std::vector<char> string; +template<typename input_iter_t, + typename output_iter_t> + output_iter_t iconvert::tou::convert(input_iter_t from_iter, + input_iter_t to_iter, + const std::string &chset, + bool &flag, + output_iter_t out_iter) +{ + class to_iter_class<output_iter_t> out(out_iter); - while (from_iter != to_iter) - { - string.push_back(*from_iter++); + if (!out.begin(chset)) + return out; - if (string.size() > 31) - { - out(&string[0], string.size()); - string.clear(); - } - } + std::vector<char> string; - if (string.size() > 0) - out(&string[0], string.size()); + while (from_iter != to_iter) + { + string.push_back(*from_iter++); - out.end(flag); - return out; + if (string.size() > 31) + { + out(&string[0], string.size()); + string.clear(); } + } - /* Convert output of iconvert from unicode_chars. */ + if (string.size() > 0) + out(&string[0], string.size()); - class iconvert::fromu : public iconvert { + out.end(flag); + return out; +} - public: - bool begin(const std::string &chset); +/* Convert output of iconvert from unicode_chars. */ - using iconvert::operator(); +class iconvert::fromu : public iconvert { - template<typename iter_t> class to_iter_class; + public: + bool begin(const std::string &chset); - template<typename input_iter_t, - typename output_iter_t> - static output_iter_t convert(input_iter_t from_iter, - input_iter_t to_iter, - const std::string &chset, - output_iter_t out_iter, - bool &errflag); + using iconvert::operator(); - template<typename input_iter_t> - static void convert(input_iter_t from_iter, - input_iter_t to_iter, - const std::string &chset, - std::string &out_buf, - bool &errflag) - { - out_buf=""; - std::back_insert_iterator<std::string> - insert_iter(out_buf); + template<typename iter_t> class to_iter_class; - convert(from_iter, to_iter, chset, insert_iter, - errflag); - } + template<typename input_iter_t, + typename output_iter_t> + static output_iter_t convert(input_iter_t from_iter, + input_iter_t to_iter, + const std::string &chset, + output_iter_t out_iter, + bool &errflag); + + template<typename input_iter_t> + static void convert(input_iter_t from_iter, + input_iter_t to_iter, + const std::string &chset, + std::string &out_buf, + bool &errflag) + { + out_buf=""; + std::back_insert_iterator<std::string> + insert_iter(out_buf); - static std::pair<std::string, bool> - convert(const std::vector<unicode_char> &ubuf, - const std::string &chset); - }; + convert(from_iter, to_iter, chset, insert_iter, + errflag); + } - /* Helper class that saves unicode output into an output iterator */ + static std::pair<std::string, bool> + convert(const std::vector<unicode_char> &ubuf, + const std::string &chset); +}; - template<typename iter_t> - class iconvert::fromu::to_iter_class : public iconvert::fromu { +/* Helper class that saves unicode output into an output iterator */ - iter_t iter; - public: +template<typename iter_t> +class iconvert::fromu::to_iter_class : public iconvert::fromu { - to_iter_class(iter_t iterValue) - : iter(iterValue) {} + iter_t iter; + public: - using fromu::operator(); + to_iter_class(iter_t iterValue) + : iter(iterValue) {} - operator iter_t() const { return iter; } + using fromu::operator(); - private: - int converted(const char *ptr, size_t cnt) + operator iter_t() const { return iter; } + + private: + int converted(const char *ptr, size_t cnt) + { + while (cnt) { - while (cnt) - { - *iter=*ptr; + *iter=*ptr; - ++iter; - ++ptr; - --cnt; - } - return 0; + ++iter; + ++ptr; + --cnt; } - }; + return 0; + } +}; - template<typename input_iter_t, - typename output_iter_t> - output_iter_t iconvert::fromu::convert(input_iter_t from_iter, - input_iter_t to_iter, - const std::string &chset, - output_iter_t out_iter, - bool &errflag) - { - errflag=true; +template<typename input_iter_t, + typename output_iter_t> + output_iter_t iconvert::fromu::convert(input_iter_t from_iter, + input_iter_t to_iter, + const std::string &chset, + output_iter_t out_iter, + bool &errflag) +{ + errflag=true; - class to_iter_class<output_iter_t> out(out_iter); + class to_iter_class<output_iter_t> out(out_iter); - if (!out.begin(chset)) - return out; + if (!out.begin(chset)) + return out; - std::vector<unicode_char> string; + std::vector<unicode_char> string; - while (from_iter != to_iter) - { - string.push_back(*from_iter++); + while (from_iter != to_iter) + { + string.push_back(*from_iter++); - if (string.size() > 31) - { - out(&string[0], string.size()); - string.clear(); - } - } + if (string.size() > 31) + { + out(&string[0], string.size()); + string.clear(); + } + } - if (string.size() > 0) - out(&string[0], string.size()); + if (string.size() > 0) + out(&string[0], string.size()); - out.end(errflag); - return out; - } + out.end(errflag); + return out; +} - /* - ** Unicode linebreaking algorithm, tr14. - */ +/* +** Unicode linebreaking algorithm, tr14. +*/ - extern "C" int linebreak_trampoline(int value, void *ptr); - extern "C" int linebreakc_trampoline(int value, unicode_char ch, - void *ptr); +extern "C" int linebreak_trampoline(int value, void *ptr); +extern "C" int linebreakc_trampoline(int value, unicode_char ch, + void *ptr); - /* - ** Subclass linebreak_callback_base, implement operator()(int). - ** - ** Use operator<< or operator()(iterator, iterator) to feed - ** unicode_chars into the linebreaking algorithm. The subclass receives - ** UNICODE_LB values, as they become available. - */ +/* +** Subclass linebreak_callback_base, implement operator()(int). +** +** Use operator<< or operator()(iterator, iterator) to feed +** unicode_chars into the linebreaking algorithm. The subclass receives +** UNICODE_LB values, as they become available. +*/ - class linebreak_callback_base { +class linebreak_callback_base { - unicode_lb_info_t handle; + unicode_lb_info_t handle; - int opts; + int opts; #if __cplusplus >= 201103L - public: - linebreak_callback_base(const linebreak_callback_base &)=delete; - linebreak_callback_base &operator=(const - linebreak_callback_base &)=delete; - private: + public: + linebreak_callback_base(const linebreak_callback_base &)=delete; + linebreak_callback_base &operator=(const + linebreak_callback_base &)=delete; + private: #else - linebreak_callback_base(const linebreak_callback_base &); - /* NOT IMPLEMENTED */ + linebreak_callback_base(const linebreak_callback_base &); + /* NOT IMPLEMENTED */ - linebreak_callback_base &operator=(const - linebreak_callback_base &); - /* NOT IMPLEMENTED */ + linebreak_callback_base &operator=(const + linebreak_callback_base &); + /* NOT IMPLEMENTED */ #endif - public: - linebreak_callback_base(); - virtual ~linebreak_callback_base(); + public: + linebreak_callback_base(); + virtual ~linebreak_callback_base(); - void finish(); + void finish(); - void set_opts(int opts); + void set_opts(int opts); - friend int linebreak_trampoline(int, void *); + friend int linebreak_trampoline(int, void *); - linebreak_callback_base &operator<<(unicode_char uc); + linebreak_callback_base &operator<<(unicode_char uc); - template<typename iter_type> - linebreak_callback_base &operator()(iter_type beg_iter, - iter_type end_iter) - { - while (beg_iter != end_iter) - operator<<(*beg_iter++); - return *this; - } + template<typename iter_type> + linebreak_callback_base &operator()(iter_type beg_iter, + iter_type end_iter) + { + while (beg_iter != end_iter) + operator<<(*beg_iter++); + return *this; + } - template<typename container_type> - linebreak_callback_base &operator()(const container_type &vec) - { - return operator()(vec.begin(), vec.end()); - } - private: - virtual int callback(int); - }; + template<typename container_type> + linebreak_callback_base &operator()(const container_type &vec) + { + return operator()(vec.begin(), vec.end()); + } + private: + virtual int callback(int); +}; - class linebreak_callback_save_buf : public linebreak_callback_base { +class linebreak_callback_save_buf : public linebreak_callback_base { - public: - std::list<int> lb_buf; + public: + std::list<int> lb_buf; - linebreak_callback_save_buf(); - ~linebreak_callback_save_buf(); + linebreak_callback_save_buf(); + ~linebreak_callback_save_buf(); - using linebreak_callback_base::operator<<; - using linebreak_callback_base::operator(); - private: - int callback(int value); - }; + using linebreak_callback_base::operator<<; + using linebreak_callback_base::operator(); + private: + int callback(int value); +}; - /* - ** Convert an input iterator sequence over unicode_chars into - ** an input iterator sequence over linebreak values. - */ +/* +** Convert an input iterator sequence over unicode_chars into +** an input iterator sequence over linebreak values. +*/ - template<typename input_t> class linebreak_iter - : public std::iterator<std::input_iterator_tag, int, void> - { - mutable input_t iter_value, end_iter_value; +template<typename input_t> class linebreak_iter +: public std::iterator<std::input_iterator_tag, int, void> +{ + mutable input_t iter_value, end_iter_value; - mutable linebreak_callback_save_buf *buf; + mutable linebreak_callback_save_buf *buf; - void fill() const - { - if (buf == NULL) - return; + void fill() const + { + if (buf == NULL) + return; - while (buf->lb_buf.empty()) + while (buf->lb_buf.empty()) + { + if (iter_value == end_iter_value) { - if (iter_value == end_iter_value) + buf->finish(); + if (buf->lb_buf.empty()) { - buf->finish(); - if (buf->lb_buf.empty()) - { - delete buf; - buf=NULL; - } - break; + delete buf; + buf=NULL; } - - buf->operator<<(*iter_value++); + break; } - } - mutable value_type bufvalue; + buf->operator<<(*iter_value++); + } + } - public: - linebreak_iter(const input_t &iter_valueArg, - const input_t &iter_endvalueArg) - : iter_value(iter_valueArg), - end_iter_value(iter_endvalueArg), - buf(new linebreak_callback_save_buf) - { - } + mutable value_type bufvalue; - linebreak_iter() : buf(NULL) + public: + linebreak_iter(const input_t &iter_valueArg, + const input_t &iter_endvalueArg) + : iter_value(iter_valueArg), + end_iter_value(iter_endvalueArg), + buf(new linebreak_callback_save_buf) { } - void set_opts(int opts) - { - if (buf) - buf->set_opts(opts); - } + linebreak_iter() : buf(NULL) + { + } - ~linebreak_iter() - { - if (buf) - delete buf; - } + void set_opts(int opts) + { + if (buf) + buf->set_opts(opts); + } - linebreak_iter(const linebreak_iter<input_t> &v) - : buf(NULL) - { - operator=(v); - } + ~linebreak_iter() + { + if (buf) + delete buf; + } - linebreak_iter<input_t> &operator=(const - linebreak_iter<input_t> &v) + linebreak_iter(const linebreak_iter<input_t> &v) + : buf(NULL) + { + operator=(v); + } + + linebreak_iter<input_t> &operator=(const + linebreak_iter<input_t> &v) { if (buf) delete buf; @@ -1501,185 +1505,185 @@ namespace unicode { return *this; } - bool operator==(const linebreak_iter<input_t> &v) const - { - fill(); - v.fill(); + bool operator==(const linebreak_iter<input_t> &v) const + { + fill(); + v.fill(); - return buf == NULL && v.buf == NULL; - } + return buf == NULL && v.buf == NULL; + } - bool operator!=(const linebreak_iter<input_t> &v) const - { - return !operator==(v); - } + bool operator!=(const linebreak_iter<input_t> &v) const + { + return !operator==(v); + } - value_type operator*() const - { - fill(); - return buf == NULL ? UNICODE_LB_MANDATORY: - buf->lb_buf.front(); - } + value_type operator*() const + { + fill(); + return buf == NULL ? UNICODE_LB_MANDATORY: + buf->lb_buf.front(); + } - linebreak_iter<input_t> &operator++() - { - bufvalue=operator*(); + linebreak_iter<input_t> &operator++() + { + bufvalue=operator*(); - if (buf) - buf->lb_buf.pop_front(); - return *this; - } + if (buf) + buf->lb_buf.pop_front(); + return *this; + } - const value_type *operator++(int) - { - operator++(); - return &bufvalue; - } - }; + const value_type *operator++(int) + { + operator++(); + return &bufvalue; + } +}; - /* - ** Like linebreak_callback_base, except the subclass receives both - ** the linebreaking value, and the unicode character. - */ +/* +** Like linebreak_callback_base, except the subclass receives both +** the linebreaking value, and the unicode character. +*/ - class linebreakc_callback_base { +class linebreakc_callback_base { - unicode_lbc_info_t handle; + unicode_lbc_info_t handle; - int opts; + int opts; #if __cplusplus >= 201103L - public: - linebreakc_callback_base(const linebreakc_callback_base &) - =delete; - - linebreakc_callback_base &operator=(const - linebreakc_callback_base - &)=delete; - private: + public: + linebreakc_callback_base(const linebreakc_callback_base &) + =delete; + + linebreakc_callback_base &operator=(const + linebreakc_callback_base + &)=delete; + private: #else - linebreakc_callback_base(const linebreakc_callback_base &); - /* NOT IMPLEMENTED */ + linebreakc_callback_base(const linebreakc_callback_base &); + /* NOT IMPLEMENTED */ - linebreakc_callback_base &operator=(const - linebreakc_callback_base - &); - /* NOT IMPLEMENTED */ + linebreakc_callback_base &operator=(const + linebreakc_callback_base + &); + /* NOT IMPLEMENTED */ #endif - public: - linebreakc_callback_base(); - virtual ~linebreakc_callback_base(); + public: + linebreakc_callback_base(); + virtual ~linebreakc_callback_base(); - void finish(); + void finish(); - void set_opts(int opts); + void set_opts(int opts); - friend int linebreakc_trampoline(int, unicode_char, void *); + friend int linebreakc_trampoline(int, unicode_char, void *); - linebreakc_callback_base &operator<<(unicode_char uc); + linebreakc_callback_base &operator<<(unicode_char uc); - template<typename iter_type> - linebreakc_callback_base &operator()(iter_type beg_iter, - iter_type end_iter) - { - while (beg_iter != end_iter) - operator<<(*beg_iter++); - return *this; - } + template<typename iter_type> + linebreakc_callback_base &operator()(iter_type beg_iter, + iter_type end_iter) + { + while (beg_iter != end_iter) + operator<<(*beg_iter++); + return *this; + } + + linebreakc_callback_base &operator<<(const + std::vector<unicode_char> + &vec) + { + return operator()(vec.begin(), vec.end()); + } + private: + virtual int callback(int, unicode_char); +}; - linebreakc_callback_base &operator<<(const - std::vector<unicode_char> - &vec) - { - return operator()(vec.begin(), vec.end()); - } - private: - virtual int callback(int, unicode_char); - }; +class linebreakc_callback_save_buf : public linebreakc_callback_base { - class linebreakc_callback_save_buf : public linebreakc_callback_base { + public: + std::list<std::pair<int, unicode_char> > lb_buf; - public: - std::list<std::pair<int, unicode_char> > lb_buf; + linebreakc_callback_save_buf(); + ~linebreakc_callback_save_buf(); - linebreakc_callback_save_buf(); - ~linebreakc_callback_save_buf(); + using linebreakc_callback_base::operator<<; + using linebreakc_callback_base::operator(); + private: + int callback(int, unicode_char); +}; - using linebreakc_callback_base::operator<<; - using linebreakc_callback_base::operator(); - private: - int callback(int, unicode_char); - }; +/* +** Convert an input iterator sequence over unicode_chars into +** an input iterator sequence over std::pair<int, unicode_char>, +** the original unicode character, and the linebreaking value before +** the character. +*/ - /* - ** Convert an input iterator sequence over unicode_chars into - ** an input iterator sequence over std::pair<int, unicode_char>, - ** the original unicode character, and the linebreaking value before - ** the character. - */ +template<typename input_t> class linebreakc_iter +: public std::iterator<std::input_iterator_tag, + std::pair<int, unicode_char>, void> +{ + mutable input_t iter_value, end_iter_value; - template<typename input_t> class linebreakc_iter - : public std::iterator<std::input_iterator_tag, - std::pair<int, unicode_char>, void> - { - mutable input_t iter_value, end_iter_value; + mutable linebreakc_callback_save_buf *buf; - mutable linebreakc_callback_save_buf *buf; + void fill() const + { + if (buf == NULL) + return; - void fill() const + while (buf->lb_buf.empty()) { - if (buf == NULL) - return; - - while (buf->lb_buf.empty()) + if (iter_value == end_iter_value) { - if (iter_value == end_iter_value) + buf->finish(); + if (buf->lb_buf.empty()) { - buf->finish(); - if (buf->lb_buf.empty()) - { - delete buf; - buf=NULL; - } - break; + delete buf; + buf=NULL; } - - buf->operator<<(*iter_value); - ++iter_value; + break; } + + buf->operator<<(*iter_value); + ++iter_value; } + } - mutable value_type bufvalue; + mutable value_type bufvalue; - public: - linebreakc_iter(const input_t &iter_valueArg, - const input_t &iter_endvalueArg) - : iter_value(iter_valueArg), - end_iter_value(iter_endvalueArg), - buf(new linebreakc_callback_save_buf) - { - } - - linebreakc_iter() : buf(NULL) + public: + linebreakc_iter(const input_t &iter_valueArg, + const input_t &iter_endvalueArg) + : iter_value(iter_valueArg), + end_iter_value(iter_endvalueArg), + buf(new linebreakc_callback_save_buf) { } - ~linebreakc_iter() - { - if (buf) - delete buf; - } + linebreakc_iter() : buf(NULL) + { + } - linebreakc_iter(const linebreakc_iter<input_t> &v) - : buf(NULL) - { - operator=(v); - } + ~linebreakc_iter() + { + if (buf) + delete buf; + } - linebreakc_iter<input_t> &operator=(const - linebreakc_iter<input_t> &v) + linebreakc_iter(const linebreakc_iter<input_t> &v) + : buf(NULL) + { + operator=(v); + } + + linebreakc_iter<input_t> &operator=(const + linebreakc_iter<input_t> &v) { if (buf) delete buf; @@ -1690,134 +1694,155 @@ namespace unicode { return *this; } - bool operator==(const linebreakc_iter<input_t> &v) const - { - fill(); - v.fill(); - - return buf == NULL && v.buf == NULL; - } + bool operator==(const linebreakc_iter<input_t> &v) const + { + fill(); + v.fill(); - bool operator!=(const linebreakc_iter<input_t> &v) const - { - return !operator==(v); - } + return buf == NULL && v.buf == NULL; + } - value_type operator*() const - { - fill(); - return buf == NULL ? - std::make_pair(UNICODE_LB_MANDATORY, - (unicode_char)0): - buf->lb_buf.front(); - } + bool operator!=(const linebreakc_iter<input_t> &v) const + { + return !operator==(v); + } - linebreakc_iter<input_t> &operator++() - { - bufvalue=operator*(); + value_type operator*() const + { + fill(); + return buf == NULL ? + std::make_pair(UNICODE_LB_MANDATORY, + (unicode_char)0): + buf->lb_buf.front(); + } + + linebreakc_iter<input_t> &operator++() + { + bufvalue=operator*(); - if (buf) - buf->lb_buf.pop_front(); - return *this; - } + if (buf) + buf->lb_buf.pop_front(); + return *this; + } - const value_type *operator++(int) - { - operator++(); - return &bufvalue; - } - }; + const value_type *operator++(int) + { + operator++(); + return &bufvalue; + } +}; - /* - ** Subclass wordbreak_callback_base, implement operator()(int). - ** - ** Use operator<< or operator()(iterator, iterator) to feed - ** unicode_chars into the wordbreaking algorithm. The subclass receives - ** word flags, as they become available. - */ +/* +** Subclass wordbreak_callback_base, implement operator()(int). +** +** Use operator<< or operator()(iterator, iterator) to feed +** unicode_chars into the wordbreaking algorithm. The subclass receives +** word flags, as they become available. +*/ - extern "C" int wordbreak_trampoline(int value, void *ptr); +extern "C" int wordbreak_trampoline(int value, void *ptr); - class wordbreak_callback_base { +class wordbreak_callback_base { - unicode_wb_info_t handle; + unicode_wb_info_t handle; #if __cplusplus >= 201103L - public: - wordbreak_callback_base(const wordbreak_callback_base &)=delete; + public: + wordbreak_callback_base(const wordbreak_callback_base &)=delete; - wordbreak_callback_base &operator=(const - wordbreak_callback_base &) - =delete; - private: + wordbreak_callback_base &operator=(const + wordbreak_callback_base &) + =delete; + private: #else - wordbreak_callback_base(const wordbreak_callback_base &); - /* NOT IMPLEMENTED */ + wordbreak_callback_base(const wordbreak_callback_base &); + /* NOT IMPLEMENTED */ - wordbreak_callback_base &operator=(const - wordbreak_callback_base &); - /* NOT IMPLEMENTED */ + wordbreak_callback_base &operator=(const + wordbreak_callback_base &); + /* NOT IMPLEMENTED */ #endif - public: - wordbreak_callback_base(); - virtual ~wordbreak_callback_base(); + public: + wordbreak_callback_base(); + virtual ~wordbreak_callback_base(); - void finish(); + void finish(); - friend int wordbreak_trampoline(int, void *); + friend int wordbreak_trampoline(int, void *); - wordbreak_callback_base &operator<<(unicode_char uc); - - template<typename iter_type> - wordbreak_callback_base &operator()(iter_type beg_iter, - iter_type end_iter) - { - while (beg_iter != end_iter) - operator<<(*beg_iter++); - return *this; - } + wordbreak_callback_base &operator<<(unicode_char uc); - wordbreak_callback_base &operator<<(const - std::vector<unicode_char> - &vec) - { - return operator()(vec.begin(), vec.end()); - } - private: - virtual int callback(bool); - }; + template<typename iter_type> + wordbreak_callback_base &operator()(iter_type beg_iter, + iter_type end_iter) + { + while (beg_iter != end_iter) + operator<<(*beg_iter++); + return *this; + } + + wordbreak_callback_base &operator<<(const + std::vector<unicode_char> + &vec) + { + return operator()(vec.begin(), vec.end()); + } + private: + virtual int callback(bool); +}; - /* - ** A C++ wrapper for unicode_wbscan. - */ +/* +** A C++ wrapper for unicode_wbscan. +*/ - class wordbreakscan { +class wordbreakscan { - unicode_wbscan_info_t handle; + unicode_wbscan_info_t handle; #if __cplusplus >= 201103L - public: - wordbreakscan(const wordbreakscan &)=delete; - wordbreakscan &operator=(const wordbreakscan &)=delete; - private: + public: + wordbreakscan(const wordbreakscan &)=delete; + wordbreakscan &operator=(const wordbreakscan &)=delete; + private: #else - wordbreakscan(const wordbreakscan &); - /* NOT IMPLEMENTED */ + wordbreakscan(const wordbreakscan &); + /* NOT IMPLEMENTED */ - wordbreakscan &operator=(const wordbreakscan &); - /* NOT IMPLEMENTED */ + wordbreakscan &operator=(const wordbreakscan &); + /* NOT IMPLEMENTED */ #endif - public: + public: + + wordbreakscan(); + ~wordbreakscan(); + + bool operator<<(unicode_char uc); + + size_t finish(); +}; + +//! Convert string in unicode_default_chset() to lowercase - wordbreakscan(); - ~wordbreakscan(); +std::string tolower(const std::string &string); - bool operator<<(unicode_char uc); +//! Convert string in unicode_default_chset() to uppercase - size_t finish(); - }; +std::string toupper(const std::string &string); +//! Convert string in the given character set to lowercase + +std::string tolower(const std::string &string, + const std::string &charset); + +//! Convert string in the given character set to uppercase + +std::string toupper(const std::string &string, + const std::string &charset); + +#if 0 +{ +#endif } #endif diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 8894f1e..e6b31bd 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -7,6 +7,8 @@ #include "unicode_config.h" #include "courier-unicode.h" +#include <algorithm> + extern "C" { static int iconv_trampoline(const char *str, size_t cnt, void *arg) @@ -490,3 +492,37 @@ size_t unicode::wordbreakscan::finish() } return n; } + +std::string unicode::tolower(const std::string &string) +{ + return tolower(string, unicode_default_chset()); +} + +std::string unicode::tolower(const std::string &string, + const std::string &charset) +{ + std::vector<unicode_char> uc; + + unicode::iconvert::convert(string, charset, uc); + + std::transform(uc.begin(), uc.end(), uc.begin(), unicode_lc); + + return unicode::iconvert::convert(uc, charset); +} + +std::string unicode::toupper(const std::string &string) +{ + return toupper(string, unicode_default_chset()); +} + +std::string unicode::toupper(const std::string &string, + const std::string &charset) +{ + std::vector<unicode_char> uc; + + unicode::iconvert::convert(string, charset, uc); + + std::transform(uc.begin(), uc.end(), uc.begin(), unicode_uc); + + return unicode::iconvert::convert(uc, charset); +} |
