diff options
| author | Sam Varshavchik | 2014-01-16 18:05:38 -0500 | 
|---|---|---|
| committer | Sam Varshavchik | 2014-01-16 18:06:02 -0500 | 
| commit | b84d8ff82f5e2f5f81690913701ab54d12a1a22e (patch) | |
| tree | b4623d62b3657d501c7732a4d3c99218225a945a | |
| parent | 4be24d5017d0563ec78bd83fb8fcd8528cbd7b6b (diff) | |
| download | courier-libs-b84d8ff82f5e2f5f81690913701ab54d12a1a22e.tar.bz2 | |
unicode: cleanup unicode::iconvert::tou API
| -rw-r--r-- | unicode/README | 9 | ||||
| -rw-r--r-- | unicode/book.xml | 22 | ||||
| -rw-r--r-- | unicode/linebreaktest.C | 24 | ||||
| -rw-r--r-- | unicode/unicode.h | 18 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 11 | 
5 files changed, 57 insertions, 27 deletions
| diff --git a/unicode/README b/unicode/README index ff899c4..55e29a5 100644 --- a/unicode/README +++ b/unicode/README @@ -1,11 +1,12 @@     Link: Courier Unicode Library (start) -   Link: Installation (next) +   Link: Installation and usage (next)                              Courier Unicode Library                                                                           Next     -------------------------------------------------------------------------- +  Courier Unicode Library     -------------------------------------------------------------------------- @@ -14,7 +15,7 @@ Courier Unicode Library     Current status -   Installation +   Installation and usage     Manual pages @@ -45,5 +46,5 @@ Current status     -------------------------------------------------------------------------- -                                                                         Next -                                                                 Installation +                                                                         Next +                                                       Installation and usage diff --git a/unicode/book.xml b/unicode/book.xml index a95d0a9..88fcf25 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -156,7 +156,7 @@ See COPYING for distribution information.  	      <citerefentry><refentrytitle>unicode_html40ent_lookup</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode_isspace"> -	      <citerefentry><refentrytitle>unicode_convert</refentrytitle> +	      <citerefentry><refentrytitle>unicode_isspace</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode_grapheme_break">  	      <citerefentry><refentrytitle>unicode_grapheme_break</refentrytitle> @@ -1107,7 +1107,7 @@ See COPYING for distribution information.  	      <citerefentry>  		<refentrytitle>courier-unicode</refentrytitle>  		<manvolnum>7</manvolnum></citerefentry></link>, -	    <link linkend="unicode_line_break"> +	    <link linkend="unicode__linebreak">  	      <citerefentry><refentrytitle>unicode::linebreak</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <ulink url="http://www.unicode.org/reports/tr14/tr14-32.html">TR-14</ulink> @@ -1779,11 +1779,12 @@ See COPYING for distribution information.                <paramdef>input_iter_t <parameter>beg_iter</parameter></paramdef>                <paramdef>input_iter_t <parameter>end_iter</parameter></paramdef>                <paramdef>const std::string &<parameter>charset</parameter></paramdef> +	      <paramdef>bool &<parameter>errflag</parameter></paramdef>                <paramdef>output_iter_t <parameter>output_iter</parameter></paramdef>  	    </funcprototype>  	    <funcprototype> -              <funcdef>void <function>convert</function></funcdef> +              <funcdef>bool <function>convert</function></funcdef>                <paramdef>input_iter_t <parameter>beg_iter</parameter></paramdef>                <paramdef>input_iter_t <parameter>end_iter</parameter></paramdef>                <paramdef>const std::string &<parameter>charset</parameter></paramdef> @@ -1791,10 +1792,9 @@ See COPYING for distribution information.  	    </funcprototype>  	    <funcprototype> -              <funcdef>void <function>convert</function></funcdef> +              <funcdef>std::pair<std::vector<unicode_char>, bool> <function>convert</function></funcdef>                <paramdef>const std::string &<parameter>text</parameter></paramdef>                <paramdef>const std::string &<parameter>charset</parameter></paramdef> -              <paramdef>std::vector<unicode_char> &<parameter>out_buf</parameter></paramdef>  	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv> @@ -1814,15 +1814,23 @@ See COPYING for distribution information.  	    iterates over <classname>unicode_char</classname>s.  	    <function>convert</function>() returns the value of the output  	    iterator after iterating over the converted character sequence. +	    <parameter>errflag</parameter>, passed by reference, gets set to +	    <literal>true</literal> if some character could not be converted +	    to unicode, from the specified character set, and +	    <literal>false</literal> if the conversion completed without +	    errors.  	  </para>  	  <para>  	    An overloaded <function>convert</function>() puts the unicode  	    character sequence into a vector of  	    <classname>unicode_char</classname>s, instead of an output -	    sequence. Finally, a single <classname>std::string</classname> +	    sequence, and returned the error flag. +	    Finally, a single <classname>std::string</classname>  	    specifies the character string, instead of a beginning and an -	    ending iterator. +	    ending iterator, and returns a +	    <classname>std::pair</classname> with the converted unicode +	    text in a vector, and the error flag.  	  </para>  	</refsect1> diff --git a/unicode/linebreaktest.C b/unicode/linebreaktest.C index d7b93c4..0d1dffe 100644 --- a/unicode/linebreaktest.C +++ b/unicode/linebreaktest.C @@ -179,13 +179,18 @@ int main(int argc, char **argv)  	std::string convteststr="0000000000000000000000000000000\xe3\x82\xa2"; -	std::vector<unicode_char> uc; +	std::pair<std::vector<unicode_char>, bool> uc; -	unicode::iconvert::tou -		::convert(convteststr, "utf-8", uc); +	uc=unicode::iconvert::tou::convert(convteststr, "utf-8"); -	std::vector<unicode_char>::iterator e(uc.end()), -		b(std::find_if(uc.begin(), e, +	if (uc.second) +	{ +		std::cerr << "Valid UTF-8 string is invalid" << std::endl; +		exit(1); +	} + +	std::vector<unicode_char>::iterator e(uc.first.end()), +		b(std::find_if(uc.first.begin(), e,  			       std::not1(std::bind2nd(std::equal_to<unicode_char>  						      (),  						      unicode_char('0'))))); @@ -197,12 +202,19 @@ int main(int argc, char **argv)  		exit(1);  	} -	if (unicode::iconvert::fromu::convert(uc, "utf-8") != convteststr) +	if (unicode::iconvert::fromu::convert(uc.first, "utf-8") != convteststr)  	{  		std::cerr << "unicode::iconvert::fromu::convert failed"  			  << std::endl;  		exit(1);  	} +	uc=unicode::iconvert::tou::convert("\xE3", "utf-8"); + +	if (!uc.second) +	{ +		std::cerr << "Invalid UTF-8 string is valid" << std::endl; +		exit(1); +	}  	return 0;  } diff --git a/unicode/unicode.h b/unicode/unicode.h index 3789e7e..3efd9c7 100644 --- a/unicode/unicode.h +++ b/unicode/unicode.h @@ -1058,24 +1058,29 @@ namespace unicode {  			static output_iter_t convert(input_iter_t from_iter,  						     input_iter_t to_iter,  						     const std::string &chset, +						     bool &flag,  						     output_iter_t out_iter);  		template<typename input_iter_t> -			static void convert(input_iter_t from_iter, +			static bool convert(input_iter_t from_iter,  					    input_iter_t to_iter,  					    const std::string &chset,  					    std::vector<unicode_char> &out_buf)  		{ +			bool flag; +  			out_buf.clear();  			std::back_insert_iterator<std::vector<unicode_char> >  				insert_iter(out_buf); -			convert(from_iter, to_iter, chset, insert_iter); +			convert(from_iter, to_iter, chset, flag, insert_iter); + +			return flag;  		} -		static void convert(const std::string &str, -				    const std::string &chset, -				    std::vector<unicode_char> &out_buf); +		static std::pair<std::vector<unicode_char>, bool> +			convert(const std::string &str, +				const std::string &chset);  	};  	/* Helper class that saves unicode output into an output iterator */ @@ -1113,6 +1118,7 @@ namespace unicode {  		output_iter_t iconvert::tou::convert(input_iter_t from_iter,  						     input_iter_t to_iter,  						     const std::string &chset, +						     bool &flag,  						     output_iter_t out_iter)  		{  			class to_iter_class<output_iter_t> out(out_iter); @@ -1136,7 +1142,7 @@ namespace unicode {  			if (string.size() > 0)  				out(&string[0], string.size()); -			out.end(); +			out.end(flag);  			return out;  		} diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 87e1cc5..485f05d 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -214,11 +214,14 @@ int unicode::iconvert::tou::converted(const char *ptr, size_t cnt)  			 cnt/sizeof(unicode_char));  } -void unicode::iconvert::tou::convert(const std::string &str, -				  const std::string &chset, -				  std::vector<unicode_char> &out_buf) +std::pair<std::vector<unicode_char>, bool> +unicode::iconvert::tou::convert(const std::string &str, +				const std::string &chset)  { -	convert(str.begin(), str.end(), chset, out_buf); +	std::pair<std::vector<unicode_char>, bool> ret; + +	ret.second=convert(str.begin(), str.end(), chset, ret.first); +	return ret;  }  bool unicode::iconvert::fromu::begin(const std::string &chset) | 
