From a6e6ff2e38a390e66982210eb56972f9e3c00477 Mon Sep 17 00:00:00 2001 From: Sam Varshavchik Date: Tue, 23 Feb 2021 19:18:13 -0500 Subject: courier-unicode: unicode_bidi_cleaned_size, unicode::bidi_override --- unicode/ChangeLog | 2 ++ unicode/Makefile.am | 4 ++- unicode/biditest2.C | 34 ++++++++++++++++++++ unicode/book.xml | 59 +++++++++++++++++++++++++++++++++++ unicode/configure.ac | 14 ++++++++- unicode/courier-unicode-version.m4.in | 5 ++- unicode/courier-unicode.h.in | 19 ++++++++--- unicode/courier-unicode.spec.in | 37 ++++++++++++++++++++-- unicode/unicode_bidi.c | 39 ++++++++++++++++++----- unicode/unicodecpp.C | 17 ++++++++++ 10 files changed, 211 insertions(+), 19 deletions(-) diff --git a/unicode/ChangeLog b/unicode/ChangeLog index 1995736..fcb1c10 100644 --- a/unicode/ChangeLog +++ b/unicode/ChangeLog @@ -1,3 +1,5 @@ +2.2.1 + 2021-02-14 Sam Varshavchik * unicode_bidi_calc and unicode_bidi_calc_levels return a diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 32380d3..5877d22 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -97,6 +97,7 @@ man_MANS= \ $(srcdir)/man/unicode[\:][\:]bidi_embed_paragraph_level.3 \ $(srcdir)/man/unicode[\:][\:]bidi_get_direction.3 \ $(srcdir)/man/unicode[\:][\:]bidi_logical_order.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_override.3 \ $(srcdir)/man/unicode[\:][\:]bidi_reorder.3 \ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 \ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 \ @@ -119,6 +120,7 @@ man_MANS= \ $(srcdir)/man/unicode_bidi_calc.3 \ $(srcdir)/man/unicode_bidi_calc_levels.3 \ $(srcdir)/man/unicode_bidi_calc_types.3 \ + $(srcdir)/man/unicode_bidi_cleaned_size.3 \ $(srcdir)/man/unicode_bidi_cleanup.3 \ $(srcdir)/man/unicode_bidi_direction.3 \ $(srcdir)/man/unicode_bidi_embed.3 \ @@ -219,7 +221,7 @@ libcourier_unicode_la_SOURCES=\ bidi_mirroring.h \ unicode_categories.c -libcourier_unicode_la_LDFLAGS=-version-info 6:0:2 +libcourier_unicode_la_LDFLAGS=-version-info 7:0:0 EXTRA_DIST=$(noinst_SCRIPTS) $(man_MANS) $(PACKAGE).spec \ m4/courier-unicode.m4 \ diff --git a/unicode/biditest2.C b/unicode/biditest2.C index 7787a33..6ab347b 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -274,6 +274,9 @@ void character_test() std::reverse(b+index, b+index+n); }); + size_t cleaned_size=unicode_bidi_cleaned_size(s.c_str(), + s.size(), 0); + n=0; unicode::bidi_cleanup (s, levels, @@ -285,6 +288,17 @@ void character_test() ++n; }); + if (cleaned_size != s.size()) + { + std::cerr << "Regression, line " + << linenum + << ": default cleaned size" + << std::endl + << " Expected size: " << cleaned_size + << ", actual size: " << s.size() + << std::endl; + exit(1); + } if (render_order != actual_render_order) { std::cerr << "Regression, line " @@ -408,6 +422,12 @@ void character_test() } unicode::bidi_reorder(new_string, std::get<0>(ret)); + + cleaned_size=unicode_bidi_cleaned_size + (new_string.c_str(), + new_string.size(), + UNICODE_BIDI_CLEANUP_CANONICAL); + unicode::bidi_cleanup(new_string, std::get<0>(ret), [] @@ -416,6 +436,20 @@ void character_test() }, UNICODE_BIDI_CLEANUP_CANONICAL); + if (cleaned_size != new_string.size()) + { + std::cerr << "Regression, line " + << linenum + << ": canonoical cleaned size" + << std::endl + << " Expected size: " + << cleaned_size + << ", actual size: " + << new_string.size() + << std::endl; + exit(1); + } + /* New string is now back in logical order */ if (new_string == s && std::get<0>(ret) == levels) diff --git a/unicode/book.xml b/unicode/book.xml index 2a83033..0b45433 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -334,6 +334,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.unicode_bidi_calc unicode_bidi_reorder unicode_bidi_cleanup + unicode_bidi_cleaned_size unicode_bidi_logical_order unicode_bidi_embed unicode_bidi_embed_paragraph_level @@ -394,6 +395,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.void *arg + + size_t unicode_bidi_cleaned_size + const char32_t *string + size_t n + int options + + size_t unicode_bidi_logical_order char32_t *string @@ -538,6 +546,9 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.unicode_bidi_cleaned_size() is + available to determine, in advance, how many characters + will remain. @@ -865,6 +876,11 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example. + + The parameters to unicode_bidi_cleaned_size() + are a pointer to the unicode string, its size, and + the bitmask option to unicode_bidi_cleanup(). + @@ -2906,6 +2922,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.unicode::bidi_embed unicode::bidi_embed_paragraph_level unicode::bidi_get_direction + unicode::bidi_override unicode bi-directional algorithm @@ -3035,6 +3052,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.size_t starting_pos=0 size_t n=(size_t)-1 + + + std::u32string bidi_override + const std::u32string &string + unicode_bidi_level_t direction + int cleanup_options=0 + @@ -3190,6 +3214,41 @@ auto [levels, direction]=unicode::bidi_calc(types); + + + unicode::bidi_override + modifies the passed-in string as + follows: + + + + + + unicode::bidi_cleanup() is applied + with the specified, or defaulted, + cleanup_options + + + + + + Either the LRO or an + RLO override marker gets prepended + to the Unicode string, forcing the entire string to + be interpreted in a single rendering direction, when + processed by the Unicode bi-directional algorithm. + + + + + + unicode::bidi_override makes it + possible to use a Unicode-aware application or algorithm + in a context that only works with text that's always + displayed in a fixed direction, allowing graceful handling + of input containing bi-directional text. + + diff --git a/unicode/configure.ac b/unicode/configure.ac index 66c1b97..1cc3b76 100644 --- a/unicode/configure.ac +++ b/unicode/configure.ac @@ -117,7 +117,19 @@ fi CFLAGS="-I.. -I$srcdir/.. $CFLAGS" CXXFLAGS="-I.. -I$srcdir/.. $CXXFLAGS" -HVERSION="`echo $VERSION | tr -d '.'`" +set -- `echo "$VERSION" | tr '.' ' '` + +v=$1 +r=`echo "00"$2 | sed 's/.*(...)$/$1/'` +p=$3 + +if test "$p" = "" + then p="0" +fi + +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` + +HVERSION="$v$r$p" AC_SUBST(HVERSION) AM_CONDITIONAL(HAVE_DOCS,[test -f $srcdir/docbook/icon.gif]) diff --git a/unicode/courier-unicode-version.m4.in b/unicode/courier-unicode-version.m4.in index d40b5b7..94b0c04 100644 --- a/unicode/courier-unicode-version.m4.in +++ b/unicode/courier-unicode-version.m4.in @@ -14,13 +14,16 @@ fi set -- `echo "$vers" | tr '.' ' '` v=$[]1 -r=$[]2 +r=`echo "00"$[]2 | sed 's/.*(...)$/$[]1/'` + p=$[]3 if test "$p" = "" then p="0" fi +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` + AC_TRY_COMPILE([ #include #ifndef COURIER_UNICODE_VERSION diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index e0a5b99..a1a502c 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -705,6 +705,10 @@ extern size_t unicode_bidi_cleanup(char32_t *string, void (*removed_callback)(size_t, void *), void *); +extern size_t unicode_bidi_cleaned_size(const char32_t *string, + size_t n, + int options); + extern void unicode_bidi_logical_order(char32_t *string, unicode_bidi_level_t *levels, size_t n, @@ -2354,14 +2358,19 @@ std::u32string bidi_embed(const std::u32string &string, //! In order for the unicode string to have the specified default //! paragraph embedding level. -extern char32_t bidi_embed_paragraph_level(const std::u32string &string, - unicode_bidi_level_t level); +char32_t bidi_embed_paragraph_level(const std::u32string &string, + unicode_bidi_level_t level); //! Compute default direction of text -extern unicode_bidi_direction bidi_get_direction(const std::u32string &string, - size_t starting_pos=0, - size_t n=(size_t)-1); +unicode_bidi_direction bidi_get_direction(const std::u32string &string, + size_t starting_pos=0, + size_t n=(size_t)-1); + +//! Override bidi direction. +std::u32string bidi_override(const std::u32string &s, + unicode_bidi_level_t direction, + int cleanup_options=0); #if 0 { diff --git a/unicode/courier-unicode.spec.in b/unicode/courier-unicode.spec.in index f7d1eb6..440d6f1 100644 --- a/unicode/courier-unicode.spec.in +++ b/unicode/courier-unicode.spec.in @@ -1,5 +1,12 @@ Summary: Courier Unicode Library +%if 0%{?compat:1} +Name: courier-unicode%(echo @VERSION@ | tr -d '.') + +%define __brp_ldconfig %{nil} + +%else Name: courier-unicode +%endif Version: @VERSION@ Release: 1%{?dist}%{?courier_release} License: GPLv3 @@ -11,10 +18,14 @@ BuildRequires: perl BuildRequires: gcc-c++ BuildRequires: %{__make} +%if 0%{?compat:1} + +%else %package devel Summary: Courier Unicode Library development files Group: Development/Libraries Requires: %{name} = 0:%{version}-%{release} +%endif %description This library implements several algorithms related to the Unicode @@ -24,13 +35,17 @@ This package installs only the run-time libraries needed by applications that use this library. Install the "courier-unicode-devel" package if you want to develop new applications using this library. +%if 0%{?compat:1} + +%else %description devel This package contains development files for the Courier Unicode Library. Install this package if you want to develop applications that uses this unicode library. +%endif %prep -%setup -q +%setup -q -n courier-unicode-@VERSION@ %configure %build %{__make} -s %{?_smp_mflags} @@ -39,12 +54,27 @@ unicode library. rm -rf $RPM_BUILD_ROOT %{__make} install DESTDIR=$RPM_BUILD_ROOT -%post -p /sbin/ldconfig -%postun -p /sbin/ldconfig +%if 0%{?compat:1} +find $RPM_BUILD_ROOT%{_libdir} -type l -print | xargs rm -f +rm -rf $RPM_BUILD_ROOT%{_includedir} +rm -f $RPM_BUILD_ROOT%{_libdir}/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la +rm -rf $RPM_BUILD_ROOT%{_datadir}/aclocal +rm -rf $RPM_BUILD_ROOT%{_mandir} +%endif %clean rm -rf $RPM_BUILD_ROOT +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + +%if 0%{?compat:1} +%files +%defattr(-,root,root,-) +%{_libdir}/*.so.* + +%else %files %defattr(-,root,root,-) @@ -58,6 +88,7 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/*.la %{_libdir}/*.a %{_datadir}/aclocal/*.m4 +%endif %changelog * Sun Jan 12 2014 Sam Varshavchik - 1.0 diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index b97ec25..1aa4a88 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -2063,12 +2063,14 @@ void unicode_bidi_reorder(char32_t *p, level_run_layers_deinit(&layers); } -size_t unicode_bidi_cleanup(char32_t *string, - unicode_bidi_level_t *levels, - size_t n, - int cleanup_options, - void (*removed_callback)(size_t, void *), - void *arg) +static size_t unicode_bidi_count_or_cleanup(const char32_t *string, + char32_t *dest, + unicode_bidi_level_t *levels, + size_t n, + int cleanup_options, + void (*removed_callback)(size_t, + void *), + void *arg) { size_t i=0; for (size_t j=0; j