summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Varshavchik2014-01-16 18:05:38 -0500
committerSam Varshavchik2014-01-16 18:06:02 -0500
commitb84d8ff82f5e2f5f81690913701ab54d12a1a22e (patch)
treeb4623d62b3657d501c7732a4d3c99218225a945a
parent4be24d5017d0563ec78bd83fb8fcd8528cbd7b6b (diff)
downloadcourier-libs-b84d8ff82f5e2f5f81690913701ab54d12a1a22e.tar.bz2
unicode: cleanup unicode::iconvert::tou API
-rw-r--r--unicode/README9
-rw-r--r--unicode/book.xml22
-rw-r--r--unicode/linebreaktest.C24
-rw-r--r--unicode/unicode.h18
-rw-r--r--unicode/unicodecpp.C11
5 files changed, 57 insertions, 27 deletions
diff --git a/unicode/README b/unicode/README
index ff899c4..55e29a5 100644
--- a/unicode/README
+++ b/unicode/README
@@ -1,11 +1,12 @@
Link: Courier Unicode Library (start)
- Link: Installation (next)
+ Link: Installation and usage (next)
Courier Unicode Library
     Next
--------------------------------------------------------------------------
+
Courier Unicode Library
--------------------------------------------------------------------------
@@ -14,7 +15,7 @@ Courier Unicode Library
Current status
- Installation
+ Installation and usage
Manual pages
@@ -45,5 +46,5 @@ Current status
--------------------------------------------------------------------------
-      Next
-      Installation
+      Next
+      Installation and usage
diff --git a/unicode/book.xml b/unicode/book.xml
index a95d0a9..88fcf25 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -156,7 +156,7 @@ See COPYING for distribution information.
<citerefentry><refentrytitle>unicode_html40ent_lookup</refentrytitle>
<manvolnum>3</manvolnum></citerefentry></link>,
<link linkend="unicode_isspace">
- <citerefentry><refentrytitle>unicode_convert</refentrytitle>
+ <citerefentry><refentrytitle>unicode_isspace</refentrytitle>
<manvolnum>3</manvolnum></citerefentry></link>,
<link linkend="unicode_grapheme_break">
<citerefentry><refentrytitle>unicode_grapheme_break</refentrytitle>
@@ -1107,7 +1107,7 @@ See COPYING for distribution information.
<citerefentry>
<refentrytitle>courier-unicode</refentrytitle>
<manvolnum>7</manvolnum></citerefentry></link>,
- <link linkend="unicode_line_break">
+ <link linkend="unicode__linebreak">
<citerefentry><refentrytitle>unicode::linebreak</refentrytitle>
<manvolnum>3</manvolnum></citerefentry></link>,
<ulink url="http://www.unicode.org/reports/tr14/tr14-32.html">TR-14</ulink>
@@ -1779,11 +1779,12 @@ See COPYING for distribution information.
<paramdef>input_iter_t <parameter>beg_iter</parameter></paramdef>
<paramdef>input_iter_t <parameter>end_iter</parameter></paramdef>
<paramdef>const std::string &amp;<parameter>charset</parameter></paramdef>
+ <paramdef>bool &amp;<parameter>errflag</parameter></paramdef>
<paramdef>output_iter_t <parameter>output_iter</parameter></paramdef>
</funcprototype>
<funcprototype>
- <funcdef>void <function>convert</function></funcdef>
+ <funcdef>bool <function>convert</function></funcdef>
<paramdef>input_iter_t <parameter>beg_iter</parameter></paramdef>
<paramdef>input_iter_t <parameter>end_iter</parameter></paramdef>
<paramdef>const std::string &amp;<parameter>charset</parameter></paramdef>
@@ -1791,10 +1792,9 @@ See COPYING for distribution information.
</funcprototype>
<funcprototype>
- <funcdef>void <function>convert</function></funcdef>
+ <funcdef>std::pair&lt;std::vector&lt;unicode_char&gt;, bool&gt; <function>convert</function></funcdef>
<paramdef>const std::string &amp;<parameter>text</parameter></paramdef>
<paramdef>const std::string &amp;<parameter>charset</parameter></paramdef>
- <paramdef>std::vector&lt;unicode_char&gt; &amp;<parameter>out_buf</parameter></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
@@ -1814,15 +1814,23 @@ See COPYING for distribution information.
iterates over <classname>unicode_char</classname>s.
<function>convert</function>() returns the value of the output
iterator after iterating over the converted character sequence.
+ <parameter>errflag</parameter>, passed by reference, gets set to
+ <literal>true</literal> if some character could not be converted
+ to unicode, from the specified character set, and
+ <literal>false</literal> if the conversion completed without
+ errors.
</para>
<para>
An overloaded <function>convert</function>() puts the unicode
character sequence into a vector of
<classname>unicode_char</classname>s, instead of an output
- sequence. Finally, a single <classname>std::string</classname>
+ sequence, and returned the error flag.
+ Finally, a single <classname>std::string</classname>
specifies the character string, instead of a beginning and an
- ending iterator.
+ ending iterator, and returns a
+ <classname>std::pair</classname> with the converted unicode
+ text in a vector, and the error flag.
</para>
</refsect1>
diff --git a/unicode/linebreaktest.C b/unicode/linebreaktest.C
index d7b93c4..0d1dffe 100644
--- a/unicode/linebreaktest.C
+++ b/unicode/linebreaktest.C
@@ -179,13 +179,18 @@ int main(int argc, char **argv)
std::string convteststr="0000000000000000000000000000000\xe3\x82\xa2";
- std::vector<unicode_char> uc;
+ std::pair<std::vector<unicode_char>, bool> uc;
- unicode::iconvert::tou
- ::convert(convteststr, "utf-8", uc);
+ uc=unicode::iconvert::tou::convert(convteststr, "utf-8");
- std::vector<unicode_char>::iterator e(uc.end()),
- b(std::find_if(uc.begin(), e,
+ if (uc.second)
+ {
+ std::cerr << "Valid UTF-8 string is invalid" << std::endl;
+ exit(1);
+ }
+
+ std::vector<unicode_char>::iterator e(uc.first.end()),
+ b(std::find_if(uc.first.begin(), e,
std::not1(std::bind2nd(std::equal_to<unicode_char>
(),
unicode_char('0')))));
@@ -197,12 +202,19 @@ int main(int argc, char **argv)
exit(1);
}
- if (unicode::iconvert::fromu::convert(uc, "utf-8") != convteststr)
+ if (unicode::iconvert::fromu::convert(uc.first, "utf-8") != convteststr)
{
std::cerr << "unicode::iconvert::fromu::convert failed"
<< std::endl;
exit(1);
}
+ uc=unicode::iconvert::tou::convert("\xE3", "utf-8");
+
+ if (!uc.second)
+ {
+ std::cerr << "Invalid UTF-8 string is valid" << std::endl;
+ exit(1);
+ }
return 0;
}
diff --git a/unicode/unicode.h b/unicode/unicode.h
index 3789e7e..3efd9c7 100644
--- a/unicode/unicode.h
+++ b/unicode/unicode.h
@@ -1058,24 +1058,29 @@ namespace unicode {
static output_iter_t convert(input_iter_t from_iter,
input_iter_t to_iter,
const std::string &chset,
+ bool &flag,
output_iter_t out_iter);
template<typename input_iter_t>
- static void convert(input_iter_t from_iter,
+ static bool convert(input_iter_t from_iter,
input_iter_t to_iter,
const std::string &chset,
std::vector<unicode_char> &out_buf)
{
+ bool flag;
+
out_buf.clear();
std::back_insert_iterator<std::vector<unicode_char> >
insert_iter(out_buf);
- convert(from_iter, to_iter, chset, insert_iter);
+ convert(from_iter, to_iter, chset, flag, insert_iter);
+
+ return flag;
}
- static void convert(const std::string &str,
- const std::string &chset,
- std::vector<unicode_char> &out_buf);
+ static std::pair<std::vector<unicode_char>, bool>
+ convert(const std::string &str,
+ const std::string &chset);
};
/* Helper class that saves unicode output into an output iterator */
@@ -1113,6 +1118,7 @@ namespace unicode {
output_iter_t iconvert::tou::convert(input_iter_t from_iter,
input_iter_t to_iter,
const std::string &chset,
+ bool &flag,
output_iter_t out_iter)
{
class to_iter_class<output_iter_t> out(out_iter);
@@ -1136,7 +1142,7 @@ namespace unicode {
if (string.size() > 0)
out(&string[0], string.size());
- out.end();
+ out.end(flag);
return out;
}
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index 87e1cc5..485f05d 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -214,11 +214,14 @@ int unicode::iconvert::tou::converted(const char *ptr, size_t cnt)
cnt/sizeof(unicode_char));
}
-void unicode::iconvert::tou::convert(const std::string &str,
- const std::string &chset,
- std::vector<unicode_char> &out_buf)
+std::pair<std::vector<unicode_char>, bool>
+unicode::iconvert::tou::convert(const std::string &str,
+ const std::string &chset)
{
- convert(str.begin(), str.end(), chset, out_buf);
+ std::pair<std::vector<unicode_char>, bool> ret;
+
+ ret.second=convert(str.begin(), str.end(), chset, ret.first);
+ return ret;
}
bool unicode::iconvert::fromu::begin(const std::string &chset)