diff options
| -rw-r--r-- | unicode/ChangeLog | 2 | ||||
| -rw-r--r-- | unicode/Makefile.am | 4 | ||||
| -rw-r--r-- | unicode/biditest2.C | 34 | ||||
| -rw-r--r-- | unicode/book.xml | 59 | ||||
| -rw-r--r-- | unicode/configure.ac | 14 | ||||
| -rw-r--r-- | unicode/courier-unicode-version.m4.in | 5 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 19 | ||||
| -rw-r--r-- | unicode/courier-unicode.spec.in | 37 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 39 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 17 | 
10 files changed, 211 insertions, 19 deletions
| diff --git a/unicode/ChangeLog b/unicode/ChangeLog index 1995736..fcb1c10 100644 --- a/unicode/ChangeLog +++ b/unicode/ChangeLog @@ -1,3 +1,5 @@ +2.2.1 +  2021-02-14  Sam Varshavchik  <mrsam@courier-mta.com>  	* unicode_bidi_calc and unicode_bidi_calc_levels return a diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 32380d3..5877d22 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -97,6 +97,7 @@ man_MANS= \          $(srcdir)/man/unicode[\:][\:]bidi_embed_paragraph_level.3 \          $(srcdir)/man/unicode[\:][\:]bidi_get_direction.3 \          $(srcdir)/man/unicode[\:][\:]bidi_logical_order.3 \ +        $(srcdir)/man/unicode[\:][\:]bidi_override.3 \          $(srcdir)/man/unicode[\:][\:]bidi_reorder.3 \          $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 \          $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 \ @@ -119,6 +120,7 @@ man_MANS= \          $(srcdir)/man/unicode_bidi_calc.3 \          $(srcdir)/man/unicode_bidi_calc_levels.3 \          $(srcdir)/man/unicode_bidi_calc_types.3 \ +        $(srcdir)/man/unicode_bidi_cleaned_size.3 \          $(srcdir)/man/unicode_bidi_cleanup.3 \          $(srcdir)/man/unicode_bidi_direction.3 \          $(srcdir)/man/unicode_bidi_embed.3 \ @@ -219,7 +221,7 @@ libcourier_unicode_la_SOURCES=\  			bidi_mirroring.h \  			unicode_categories.c -libcourier_unicode_la_LDFLAGS=-version-info 6:0:2 +libcourier_unicode_la_LDFLAGS=-version-info 7:0:0  EXTRA_DIST=$(noinst_SCRIPTS) $(man_MANS) $(PACKAGE).spec \  	m4/courier-unicode.m4 \ diff --git a/unicode/biditest2.C b/unicode/biditest2.C index 7787a33..6ab347b 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -274,6 +274,9 @@ void character_test()  				 std::reverse(b+index, b+index+n);  			 }); +		size_t cleaned_size=unicode_bidi_cleaned_size(s.c_str(), +							      s.size(), 0); +  		n=0;  		unicode::bidi_cleanup  			(s, levels, @@ -285,6 +288,17 @@ void character_test()  				 ++n;  			 }); +		if (cleaned_size != s.size()) +		{ +			std::cerr << "Regression, line " +				  << linenum +				  << ": default cleaned size" +				  << std::endl +				  << "   Expected size: " << cleaned_size +				  << ", actual size: " << s.size() +				  << std::endl; +			exit(1); +		}  		if (render_order != actual_render_order)  		{  			std::cerr << "Regression, line " @@ -408,6 +422,12 @@ void character_test()  			}  			unicode::bidi_reorder(new_string, std::get<0>(ret)); + +			cleaned_size=unicode_bidi_cleaned_size +				(new_string.c_str(), +				 new_string.size(), +				 UNICODE_BIDI_CLEANUP_CANONICAL); +  			unicode::bidi_cleanup(new_string,  					      std::get<0>(ret),  					      [] @@ -416,6 +436,20 @@ void character_test()  					      },  					      UNICODE_BIDI_CLEANUP_CANONICAL); +			if (cleaned_size != new_string.size()) +			{ +				std::cerr << "Regression, line " +					  << linenum +					  << ": canonoical cleaned size" +					  << std::endl +					  << "   Expected size: " +					  << cleaned_size +					  << ", actual size: " +					  << new_string.size() +					  << std::endl; +				exit(1); +			} +  			/* New string is now back in logical order */  			if (new_string == s && std::get<0>(ret) == levels) diff --git a/unicode/book.xml b/unicode/book.xml index 2a83033..0b45433 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -334,6 +334,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti  	  <refname>unicode_bidi_calc</refname>  	  <refname>unicode_bidi_reorder</refname>  	  <refname>unicode_bidi_cleanup</refname> +	  <refname>unicode_bidi_cleaned_size</refname>  	  <refname>unicode_bidi_logical_order</refname>  	  <refname>unicode_bidi_embed</refname>  	  <refname>unicode_bidi_embed_paragraph_level</refname> @@ -395,6 +396,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti  	    </funcprototype>  	    <funcprototype> +	      <funcdef>size_t <function>unicode_bidi_cleaned_size</function></funcdef> +              <paramdef>const char32_t *<parameter>string</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +	      <paramdef>int <parameter>options</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype>  	      <funcdef>size_t <function>unicode_bidi_logical_order</function></funcdef>                <paramdef>char32_t *<parameter>string</parameter></paramdef>                <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> @@ -538,6 +546,9 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti  		  to remove the characters from the string which are used  		  by the bi-directional algorithm, and are not needed for  		  rendering the text. +		  <function>unicode_bidi_cleaned_size</function>() is +		  available to determine, in advance, how many characters +		  will remain.  		</para>  	      </listitem>  	    </orderedlist> @@ -865,6 +876,11 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti  	      canonical rendering order for their string and embedding level  	      values.              </para> +	    <para> +	      The parameters to <function>unicode_bidi_cleaned_size</function>() +	      are a pointer to the unicode string, its size, and +	      the bitmask option to <function>unicode_bidi_cleanup</function>(). +            </para>  	  </refsect2>  	  <refsect2 id="unicode_bidi_embed"> @@ -2906,6 +2922,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti  	  <refname>unicode::bidi_embed</refname>  	  <refname>unicode::bidi_embed_paragraph_level</refname>  	  <refname>unicode::bidi_get_direction</refname> +	  <refname>unicode::bidi_override</refname>  	  <refpurpose>unicode bi-directional algorithm</refpurpose>  	</refnamediv> @@ -3035,6 +3052,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti  	      <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef>  	      <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef>  	    </funcprototype> + +	    <funcprototype> +	      <funcdef>std::u32string <function>bidi_override</function></funcdef> +              <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +              <paramdef>unicode_bidi_level_t <parameter>direction</parameter></paramdef> +	      <paramdef>int <parameter>cleanup_options</parameter>=0</paramdef> +	    </funcprototype>            </funcsynopsis>  	</refsynopsisdiv> @@ -3190,6 +3214,41 @@ auto [levels, direction]=unicode::bidi_calc(types);  	      </para>  	    </listitem> +	    <listitem> +	      <para> +		<function>unicode::bidi_override</function> +		modifies the passed-in <parameter>string</parameter> as +		follows: +	      </para> + +	      <itemizedlist> +		<listitem> +		  <para> +		    <function>unicode::bidi_cleanup</function>() is applied +		    with the specified, or defaulted, +		    <replaceable>cleanup_options</replaceable> +		  </para> +		</listitem> + +		<listitem> +		  <para> +		    Either the <literal>LRO</literal> or an +		    <literal>RLO</literal> override marker gets prepended +		    to the Unicode string, forcing the entire string to +		    be interpreted in a single rendering direction, when +		    processed by the Unicode bi-directional algorithm. +		  </para> +		</listitem> +	      </itemizedlist> + +	      <para> +		<function>unicode::bidi_override</function> makes it +		possible to use a Unicode-aware application or algorithm +		in a context that only works with text that's always +		displayed in a fixed direction, allowing graceful handling +		of input containing bi-directional text. +	      </para> +	    </listitem>            </itemizedlist>  	  <refsect2 id="unicode_cpp_bidi_literals"> diff --git a/unicode/configure.ac b/unicode/configure.ac index 66c1b97..1cc3b76 100644 --- a/unicode/configure.ac +++ b/unicode/configure.ac @@ -117,7 +117,19 @@ fi  CFLAGS="-I.. -I$srcdir/.. $CFLAGS"  CXXFLAGS="-I.. -I$srcdir/.. $CXXFLAGS" -HVERSION="`echo $VERSION | tr -d '.'`" +set -- `echo "$VERSION" | tr '.' ' '` + +v=$1 +r=`echo "00"$2 | sed 's/.*(...)$/$1/'` +p=$3 + +if test "$p" = "" +   then p="0" +fi + +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` + +HVERSION="$v$r$p"  AC_SUBST(HVERSION)  AM_CONDITIONAL(HAVE_DOCS,[test -f $srcdir/docbook/icon.gif]) diff --git a/unicode/courier-unicode-version.m4.in b/unicode/courier-unicode-version.m4.in index d40b5b7..94b0c04 100644 --- a/unicode/courier-unicode-version.m4.in +++ b/unicode/courier-unicode-version.m4.in @@ -14,13 +14,16 @@ fi  set -- `echo "$vers" | tr '.' ' '`  v=$[]1 -r=$[]2 +r=`echo "00"$[]2 | sed 's/.*(...)$/$[]1/'` +  p=$[]3  if test "$p" = ""     then p="0"  fi +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` +  AC_TRY_COMPILE([  #include <courier-unicode.h>  #ifndef COURIER_UNICODE_VERSION diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index e0a5b99..a1a502c 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -705,6 +705,10 @@ extern size_t unicode_bidi_cleanup(char32_t *string,  				   void (*removed_callback)(size_t, void *),  				   void *); +extern size_t unicode_bidi_cleaned_size(const char32_t *string, +					size_t n, +					int options); +  extern void unicode_bidi_logical_order(char32_t *string,  				       unicode_bidi_level_t *levels,  				       size_t n, @@ -2354,14 +2358,19 @@ std::u32string bidi_embed(const std::u32string &string,  //! In order for the unicode string to have the specified default  //! paragraph embedding level. -extern char32_t bidi_embed_paragraph_level(const std::u32string &string, -					   unicode_bidi_level_t level); +char32_t bidi_embed_paragraph_level(const std::u32string &string, +				    unicode_bidi_level_t level);  //! Compute default direction of text -extern unicode_bidi_direction bidi_get_direction(const std::u32string &string, -						 size_t starting_pos=0, -						 size_t n=(size_t)-1); +unicode_bidi_direction bidi_get_direction(const std::u32string &string, +					  size_t starting_pos=0, +					  size_t n=(size_t)-1); + +//! Override bidi direction. +std::u32string bidi_override(const std::u32string &s, +			     unicode_bidi_level_t direction, +			     int cleanup_options=0);  #if 0  { diff --git a/unicode/courier-unicode.spec.in b/unicode/courier-unicode.spec.in index f7d1eb6..440d6f1 100644 --- a/unicode/courier-unicode.spec.in +++ b/unicode/courier-unicode.spec.in @@ -1,5 +1,12 @@  Summary: Courier Unicode Library +%if 0%{?compat:1} +Name: courier-unicode%(echo @VERSION@ | tr -d '.') + +%define __brp_ldconfig %{nil} + +%else  Name: courier-unicode +%endif  Version: @VERSION@  Release: 1%{?dist}%{?courier_release}  License: GPLv3 @@ -11,10 +18,14 @@ BuildRequires: perl  BuildRequires: gcc-c++  BuildRequires: %{__make} +%if 0%{?compat:1} + +%else  %package devel  Summary: Courier Unicode Library development files  Group: Development/Libraries  Requires: %{name} = 0:%{version}-%{release} +%endif  %description  This library implements several algorithms related to the Unicode @@ -24,13 +35,17 @@ This package installs only the run-time libraries needed by applications that  use this library. Install the "courier-unicode-devel" package if you want  to develop new applications using this library. +%if 0%{?compat:1} + +%else  %description devel  This package contains development files for the Courier Unicode Library.  Install this package if you want to develop applications that uses this  unicode library. +%endif  %prep -%setup -q +%setup -q -n courier-unicode-@VERSION@  %configure  %build  %{__make} -s %{?_smp_mflags} @@ -39,12 +54,27 @@ unicode library.  rm -rf $RPM_BUILD_ROOT  %{__make} install DESTDIR=$RPM_BUILD_ROOT -%post -p /sbin/ldconfig -%postun -p /sbin/ldconfig +%if 0%{?compat:1} +find $RPM_BUILD_ROOT%{_libdir} -type l -print | xargs rm -f +rm -rf $RPM_BUILD_ROOT%{_includedir} +rm -f $RPM_BUILD_ROOT%{_libdir}/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la +rm -rf $RPM_BUILD_ROOT%{_datadir}/aclocal +rm -rf $RPM_BUILD_ROOT%{_mandir} +%endif  %clean  rm -rf $RPM_BUILD_ROOT +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + +%if 0%{?compat:1} +%files +%defattr(-,root,root,-) +%{_libdir}/*.so.* + +%else  %files  %defattr(-,root,root,-) @@ -58,6 +88,7 @@ rm -rf $RPM_BUILD_ROOT  %{_libdir}/*.la  %{_libdir}/*.a  %{_datadir}/aclocal/*.m4 +%endif  %changelog  * Sun Jan 12 2014 Sam Varshavchik <mrsam@octopus.email-scan.com> - 1.0 diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index b97ec25..1aa4a88 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -2063,12 +2063,14 @@ void unicode_bidi_reorder(char32_t *p,  	level_run_layers_deinit(&layers);  } -size_t unicode_bidi_cleanup(char32_t *string, -			    unicode_bidi_level_t *levels, -			    size_t n, -			    int cleanup_options, -			    void (*removed_callback)(size_t, void *), -			    void *arg) +static size_t unicode_bidi_count_or_cleanup(const char32_t *string, +					    char32_t *dest, +					    unicode_bidi_level_t *levels, +					    size_t n, +					    int cleanup_options, +					    void (*removed_callback)(size_t, +								     void *), +					    void *arg)  {  	size_t i=0;  	for (size_t j=0; j<n; ++j) @@ -2090,13 +2092,34 @@ size_t unicode_bidi_cleanup(char32_t *string,  		if (levels)  			levels[i]=levels[j] & 1; -		string[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL) -			&& cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j]; +		if (dest) +			dest[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL) +				&& cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j];  		++i;  	}  	return i;  } +size_t unicode_bidi_cleanup(char32_t *string, +			    unicode_bidi_level_t *levels, +			    size_t n, +			    int cleanup_options, +			    void (*removed_callback)(size_t, void *), +			    void *arg) +{ +	return unicode_bidi_count_or_cleanup(string, string, levels, n, +					     cleanup_options, removed_callback, +					     arg); +} + +size_t unicode_bidi_cleaned_size(const char32_t *string, +				 size_t n, +				 int cleanup_options) +{ +	return unicode_bidi_count_or_cleanup(string, NULL, NULL, n, +					     cleanup_options, NULL, NULL); +} +  void unicode_bidi_logical_order(char32_t *string,  				unicode_bidi_level_t *levels,  				size_t n, diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index e91f756..04d9879 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -949,3 +949,20 @@ unicode_bidi_direction unicode::bidi_get_direction(const std::u32string &string,  	return unicode_bidi_get_direction(string.c_str()+starting_pos, n);  } + +std::u32string unicode::bidi_override(const std::u32string &s, +				      unicode_bidi_level_t direction, +				      int cleanup_options) +{ +	std::u32string ret; + +	ret.reserve(s.size()+1); + +	ret.push_back(' '); +	ret.insert(ret.end(), s.begin(), s.end()); + +	bidi_cleanup(ret, [](size_t) {}, cleanup_options); +	ret.at(0)=direction & 1 ? UNICODE_RLO : UNICODE_LRO; + +	return ret; +} | 
