diff options
| -rw-r--r-- | unicode/README | 9 | ||||
| -rw-r--r-- | unicode/book.xml | 22 | ||||
| -rw-r--r-- | unicode/linebreaktest.C | 24 | ||||
| -rw-r--r-- | unicode/unicode.h | 18 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 11 |
5 files changed, 57 insertions, 27 deletions
diff --git a/unicode/README b/unicode/README index ff899c4..55e29a5 100644 --- a/unicode/README +++ b/unicode/README @@ -1,11 +1,12 @@ Link: Courier Unicode Library (start) - Link: Installation (next) + Link: Installation and usage (next) Courier Unicode Library Next -------------------------------------------------------------------------- + Courier Unicode Library -------------------------------------------------------------------------- @@ -14,7 +15,7 @@ Courier Unicode Library Current status - Installation + Installation and usage Manual pages @@ -45,5 +46,5 @@ Current status -------------------------------------------------------------------------- - Next - Installation + Next + Installation and usage diff --git a/unicode/book.xml b/unicode/book.xml index a95d0a9..88fcf25 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -156,7 +156,7 @@ See COPYING for distribution information. <citerefentry><refentrytitle>unicode_html40ent_lookup</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, <link linkend="unicode_isspace"> - <citerefentry><refentrytitle>unicode_convert</refentrytitle> + <citerefentry><refentrytitle>unicode_isspace</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, <link linkend="unicode_grapheme_break"> <citerefentry><refentrytitle>unicode_grapheme_break</refentrytitle> @@ -1107,7 +1107,7 @@ See COPYING for distribution information. <citerefentry> <refentrytitle>courier-unicode</refentrytitle> <manvolnum>7</manvolnum></citerefentry></link>, - <link linkend="unicode_line_break"> + <link linkend="unicode__linebreak"> <citerefentry><refentrytitle>unicode::linebreak</refentrytitle> <manvolnum>3</manvolnum></citerefentry></link>, <ulink url="http://www.unicode.org/reports/tr14/tr14-32.html">TR-14</ulink> @@ -1779,11 +1779,12 @@ See COPYING for distribution information. <paramdef>input_iter_t <parameter>beg_iter</parameter></paramdef> <paramdef>input_iter_t <parameter>end_iter</parameter></paramdef> <paramdef>const std::string &<parameter>charset</parameter></paramdef> + <paramdef>bool &<parameter>errflag</parameter></paramdef> <paramdef>output_iter_t <parameter>output_iter</parameter></paramdef> </funcprototype> <funcprototype> - <funcdef>void <function>convert</function></funcdef> + <funcdef>bool <function>convert</function></funcdef> <paramdef>input_iter_t <parameter>beg_iter</parameter></paramdef> <paramdef>input_iter_t <parameter>end_iter</parameter></paramdef> <paramdef>const std::string &<parameter>charset</parameter></paramdef> @@ -1791,10 +1792,9 @@ See COPYING for distribution information. </funcprototype> <funcprototype> - <funcdef>void <function>convert</function></funcdef> + <funcdef>std::pair<std::vector<unicode_char>, bool> <function>convert</function></funcdef> <paramdef>const std::string &<parameter>text</parameter></paramdef> <paramdef>const std::string &<parameter>charset</parameter></paramdef> - <paramdef>std::vector<unicode_char> &<parameter>out_buf</parameter></paramdef> </funcprototype> </funcsynopsis> </refsynopsisdiv> @@ -1814,15 +1814,23 @@ See COPYING for distribution information. iterates over <classname>unicode_char</classname>s. <function>convert</function>() returns the value of the output iterator after iterating over the converted character sequence. + <parameter>errflag</parameter>, passed by reference, gets set to + <literal>true</literal> if some character could not be converted + to unicode, from the specified character set, and + <literal>false</literal> if the conversion completed without + errors. </para> <para> An overloaded <function>convert</function>() puts the unicode character sequence into a vector of <classname>unicode_char</classname>s, instead of an output - sequence. Finally, a single <classname>std::string</classname> + sequence, and returned the error flag. + Finally, a single <classname>std::string</classname> specifies the character string, instead of a beginning and an - ending iterator. + ending iterator, and returns a + <classname>std::pair</classname> with the converted unicode + text in a vector, and the error flag. </para> </refsect1> diff --git a/unicode/linebreaktest.C b/unicode/linebreaktest.C index d7b93c4..0d1dffe 100644 --- a/unicode/linebreaktest.C +++ b/unicode/linebreaktest.C @@ -179,13 +179,18 @@ int main(int argc, char **argv) std::string convteststr="0000000000000000000000000000000\xe3\x82\xa2"; - std::vector<unicode_char> uc; + std::pair<std::vector<unicode_char>, bool> uc; - unicode::iconvert::tou - ::convert(convteststr, "utf-8", uc); + uc=unicode::iconvert::tou::convert(convteststr, "utf-8"); - std::vector<unicode_char>::iterator e(uc.end()), - b(std::find_if(uc.begin(), e, + if (uc.second) + { + std::cerr << "Valid UTF-8 string is invalid" << std::endl; + exit(1); + } + + std::vector<unicode_char>::iterator e(uc.first.end()), + b(std::find_if(uc.first.begin(), e, std::not1(std::bind2nd(std::equal_to<unicode_char> (), unicode_char('0'))))); @@ -197,12 +202,19 @@ int main(int argc, char **argv) exit(1); } - if (unicode::iconvert::fromu::convert(uc, "utf-8") != convteststr) + if (unicode::iconvert::fromu::convert(uc.first, "utf-8") != convteststr) { std::cerr << "unicode::iconvert::fromu::convert failed" << std::endl; exit(1); } + uc=unicode::iconvert::tou::convert("\xE3", "utf-8"); + + if (!uc.second) + { + std::cerr << "Invalid UTF-8 string is valid" << std::endl; + exit(1); + } return 0; } diff --git a/unicode/unicode.h b/unicode/unicode.h index 3789e7e..3efd9c7 100644 --- a/unicode/unicode.h +++ b/unicode/unicode.h @@ -1058,24 +1058,29 @@ namespace unicode { static output_iter_t convert(input_iter_t from_iter, input_iter_t to_iter, const std::string &chset, + bool &flag, output_iter_t out_iter); template<typename input_iter_t> - static void convert(input_iter_t from_iter, + static bool convert(input_iter_t from_iter, input_iter_t to_iter, const std::string &chset, std::vector<unicode_char> &out_buf) { + bool flag; + out_buf.clear(); std::back_insert_iterator<std::vector<unicode_char> > insert_iter(out_buf); - convert(from_iter, to_iter, chset, insert_iter); + convert(from_iter, to_iter, chset, flag, insert_iter); + + return flag; } - static void convert(const std::string &str, - const std::string &chset, - std::vector<unicode_char> &out_buf); + static std::pair<std::vector<unicode_char>, bool> + convert(const std::string &str, + const std::string &chset); }; /* Helper class that saves unicode output into an output iterator */ @@ -1113,6 +1118,7 @@ namespace unicode { output_iter_t iconvert::tou::convert(input_iter_t from_iter, input_iter_t to_iter, const std::string &chset, + bool &flag, output_iter_t out_iter) { class to_iter_class<output_iter_t> out(out_iter); @@ -1136,7 +1142,7 @@ namespace unicode { if (string.size() > 0) out(&string[0], string.size()); - out.end(); + out.end(flag); return out; } diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 87e1cc5..485f05d 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -214,11 +214,14 @@ int unicode::iconvert::tou::converted(const char *ptr, size_t cnt) cnt/sizeof(unicode_char)); } -void unicode::iconvert::tou::convert(const std::string &str, - const std::string &chset, - std::vector<unicode_char> &out_buf) +std::pair<std::vector<unicode_char>, bool> +unicode::iconvert::tou::convert(const std::string &str, + const std::string &chset) { - convert(str.begin(), str.end(), chset, out_buf); + std::pair<std::vector<unicode_char>, bool> ret; + + ret.second=convert(str.begin(), str.end(), chset, ret.first); + return ret; } bool unicode::iconvert::fromu::begin(const std::string &chset) |
