diff options
| author | Sam Varshavchik | 2021-02-23 19:18:13 -0500 |
|---|---|---|
| committer | Sam Varshavchik | 2021-02-23 19:18:13 -0500 |
| commit | a6e6ff2e38a390e66982210eb56972f9e3c00477 (patch) | |
| tree | da9eafa6f4f9c324c08f7cee3527cda572f611c7 | |
| parent | 8327472b2c20a2e2466e5f4224f2910dda66950c (diff) | |
| download | courier-libs-a6e6ff2e38a390e66982210eb56972f9e3c00477.tar.bz2 | |
courier-unicode: unicode_bidi_cleaned_size, unicode::bidi_override
| -rw-r--r-- | unicode/ChangeLog | 2 | ||||
| -rw-r--r-- | unicode/Makefile.am | 4 | ||||
| -rw-r--r-- | unicode/biditest2.C | 34 | ||||
| -rw-r--r-- | unicode/book.xml | 59 | ||||
| -rw-r--r-- | unicode/configure.ac | 14 | ||||
| -rw-r--r-- | unicode/courier-unicode-version.m4.in | 5 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 19 | ||||
| -rw-r--r-- | unicode/courier-unicode.spec.in | 37 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 39 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 17 |
10 files changed, 211 insertions, 19 deletions
diff --git a/unicode/ChangeLog b/unicode/ChangeLog index 1995736..fcb1c10 100644 --- a/unicode/ChangeLog +++ b/unicode/ChangeLog @@ -1,3 +1,5 @@ +2.2.1 + 2021-02-14 Sam Varshavchik <mrsam@courier-mta.com> * unicode_bidi_calc and unicode_bidi_calc_levels return a diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 32380d3..5877d22 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -97,6 +97,7 @@ man_MANS= \ $(srcdir)/man/unicode[\:][\:]bidi_embed_paragraph_level.3 \ $(srcdir)/man/unicode[\:][\:]bidi_get_direction.3 \ $(srcdir)/man/unicode[\:][\:]bidi_logical_order.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_override.3 \ $(srcdir)/man/unicode[\:][\:]bidi_reorder.3 \ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 \ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 \ @@ -119,6 +120,7 @@ man_MANS= \ $(srcdir)/man/unicode_bidi_calc.3 \ $(srcdir)/man/unicode_bidi_calc_levels.3 \ $(srcdir)/man/unicode_bidi_calc_types.3 \ + $(srcdir)/man/unicode_bidi_cleaned_size.3 \ $(srcdir)/man/unicode_bidi_cleanup.3 \ $(srcdir)/man/unicode_bidi_direction.3 \ $(srcdir)/man/unicode_bidi_embed.3 \ @@ -219,7 +221,7 @@ libcourier_unicode_la_SOURCES=\ bidi_mirroring.h \ unicode_categories.c -libcourier_unicode_la_LDFLAGS=-version-info 6:0:2 +libcourier_unicode_la_LDFLAGS=-version-info 7:0:0 EXTRA_DIST=$(noinst_SCRIPTS) $(man_MANS) $(PACKAGE).spec \ m4/courier-unicode.m4 \ diff --git a/unicode/biditest2.C b/unicode/biditest2.C index 7787a33..6ab347b 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -274,6 +274,9 @@ void character_test() std::reverse(b+index, b+index+n); }); + size_t cleaned_size=unicode_bidi_cleaned_size(s.c_str(), + s.size(), 0); + n=0; unicode::bidi_cleanup (s, levels, @@ -285,6 +288,17 @@ void character_test() ++n; }); + if (cleaned_size != s.size()) + { + std::cerr << "Regression, line " + << linenum + << ": default cleaned size" + << std::endl + << " Expected size: " << cleaned_size + << ", actual size: " << s.size() + << std::endl; + exit(1); + } if (render_order != actual_render_order) { std::cerr << "Regression, line " @@ -408,6 +422,12 @@ void character_test() } unicode::bidi_reorder(new_string, std::get<0>(ret)); + + cleaned_size=unicode_bidi_cleaned_size + (new_string.c_str(), + new_string.size(), + UNICODE_BIDI_CLEANUP_CANONICAL); + unicode::bidi_cleanup(new_string, std::get<0>(ret), [] @@ -416,6 +436,20 @@ void character_test() }, UNICODE_BIDI_CLEANUP_CANONICAL); + if (cleaned_size != new_string.size()) + { + std::cerr << "Regression, line " + << linenum + << ": canonoical cleaned size" + << std::endl + << " Expected size: " + << cleaned_size + << ", actual size: " + << new_string.size() + << std::endl; + exit(1); + } + /* New string is now back in logical order */ if (new_string == s && std::get<0>(ret) == levels) diff --git a/unicode/book.xml b/unicode/book.xml index 2a83033..0b45433 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -334,6 +334,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti <refname>unicode_bidi_calc</refname> <refname>unicode_bidi_reorder</refname> <refname>unicode_bidi_cleanup</refname> + <refname>unicode_bidi_cleaned_size</refname> <refname>unicode_bidi_logical_order</refname> <refname>unicode_bidi_embed</refname> <refname>unicode_bidi_embed_paragraph_level</refname> @@ -395,6 +396,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti </funcprototype> <funcprototype> + <funcdef>size_t <function>unicode_bidi_cleaned_size</function></funcdef> + <paramdef>const char32_t *<parameter>string</parameter></paramdef> + <paramdef>size_t <parameter>n</parameter></paramdef> + <paramdef>int <parameter>options</parameter></paramdef> + </funcprototype> + + <funcprototype> <funcdef>size_t <function>unicode_bidi_logical_order</function></funcdef> <paramdef>char32_t *<parameter>string</parameter></paramdef> <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> @@ -538,6 +546,9 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti to remove the characters from the string which are used by the bi-directional algorithm, and are not needed for rendering the text. + <function>unicode_bidi_cleaned_size</function>() is + available to determine, in advance, how many characters + will remain. </para> </listitem> </orderedlist> @@ -865,6 +876,11 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti canonical rendering order for their string and embedding level values. </para> + <para> + The parameters to <function>unicode_bidi_cleaned_size</function>() + are a pointer to the unicode string, its size, and + the bitmask option to <function>unicode_bidi_cleanup</function>(). + </para> </refsect2> <refsect2 id="unicode_bidi_embed"> @@ -2906,6 +2922,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti <refname>unicode::bidi_embed</refname> <refname>unicode::bidi_embed_paragraph_level</refname> <refname>unicode::bidi_get_direction</refname> + <refname>unicode::bidi_override</refname> <refpurpose>unicode bi-directional algorithm</refpurpose> </refnamediv> @@ -3035,6 +3052,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef> <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef> </funcprototype> + + <funcprototype> + <funcdef>std::u32string <function>bidi_override</function></funcdef> + <paramdef>const std::u32string &<parameter>string</parameter></paramdef> + <paramdef>unicode_bidi_level_t <parameter>direction</parameter></paramdef> + <paramdef>int <parameter>cleanup_options</parameter>=0</paramdef> + </funcprototype> </funcsynopsis> </refsynopsisdiv> @@ -3190,6 +3214,41 @@ auto [levels, direction]=unicode::bidi_calc(types); </para> </listitem> + <listitem> + <para> + <function>unicode::bidi_override</function> + modifies the passed-in <parameter>string</parameter> as + follows: + </para> + + <itemizedlist> + <listitem> + <para> + <function>unicode::bidi_cleanup</function>() is applied + with the specified, or defaulted, + <replaceable>cleanup_options</replaceable> + </para> + </listitem> + + <listitem> + <para> + Either the <literal>LRO</literal> or an + <literal>RLO</literal> override marker gets prepended + to the Unicode string, forcing the entire string to + be interpreted in a single rendering direction, when + processed by the Unicode bi-directional algorithm. + </para> + </listitem> + </itemizedlist> + + <para> + <function>unicode::bidi_override</function> makes it + possible to use a Unicode-aware application or algorithm + in a context that only works with text that's always + displayed in a fixed direction, allowing graceful handling + of input containing bi-directional text. + </para> + </listitem> </itemizedlist> <refsect2 id="unicode_cpp_bidi_literals"> diff --git a/unicode/configure.ac b/unicode/configure.ac index 66c1b97..1cc3b76 100644 --- a/unicode/configure.ac +++ b/unicode/configure.ac @@ -117,7 +117,19 @@ fi CFLAGS="-I.. -I$srcdir/.. $CFLAGS" CXXFLAGS="-I.. -I$srcdir/.. $CXXFLAGS" -HVERSION="`echo $VERSION | tr -d '.'`" +set -- `echo "$VERSION" | tr '.' ' '` + +v=$1 +r=`echo "00"$2 | sed 's/.*(...)$/$1/'` +p=$3 + +if test "$p" = "" + then p="0" +fi + +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` + +HVERSION="$v$r$p" AC_SUBST(HVERSION) AM_CONDITIONAL(HAVE_DOCS,[test -f $srcdir/docbook/icon.gif]) diff --git a/unicode/courier-unicode-version.m4.in b/unicode/courier-unicode-version.m4.in index d40b5b7..94b0c04 100644 --- a/unicode/courier-unicode-version.m4.in +++ b/unicode/courier-unicode-version.m4.in @@ -14,13 +14,16 @@ fi set -- `echo "$vers" | tr '.' ' '` v=$[]1 -r=$[]2 +r=`echo "00"$[]2 | sed 's/.*(...)$/$[]1/'` + p=$[]3 if test "$p" = "" then p="0" fi +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` + AC_TRY_COMPILE([ #include <courier-unicode.h> #ifndef COURIER_UNICODE_VERSION diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index e0a5b99..a1a502c 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -705,6 +705,10 @@ extern size_t unicode_bidi_cleanup(char32_t *string, void (*removed_callback)(size_t, void *), void *); +extern size_t unicode_bidi_cleaned_size(const char32_t *string, + size_t n, + int options); + extern void unicode_bidi_logical_order(char32_t *string, unicode_bidi_level_t *levels, size_t n, @@ -2354,14 +2358,19 @@ std::u32string bidi_embed(const std::u32string &string, //! In order for the unicode string to have the specified default //! paragraph embedding level. -extern char32_t bidi_embed_paragraph_level(const std::u32string &string, - unicode_bidi_level_t level); +char32_t bidi_embed_paragraph_level(const std::u32string &string, + unicode_bidi_level_t level); //! Compute default direction of text -extern unicode_bidi_direction bidi_get_direction(const std::u32string &string, - size_t starting_pos=0, - size_t n=(size_t)-1); +unicode_bidi_direction bidi_get_direction(const std::u32string &string, + size_t starting_pos=0, + size_t n=(size_t)-1); + +//! Override bidi direction. +std::u32string bidi_override(const std::u32string &s, + unicode_bidi_level_t direction, + int cleanup_options=0); #if 0 { diff --git a/unicode/courier-unicode.spec.in b/unicode/courier-unicode.spec.in index f7d1eb6..440d6f1 100644 --- a/unicode/courier-unicode.spec.in +++ b/unicode/courier-unicode.spec.in @@ -1,5 +1,12 @@ Summary: Courier Unicode Library +%if 0%{?compat:1} +Name: courier-unicode%(echo @VERSION@ | tr -d '.') + +%define __brp_ldconfig %{nil} + +%else Name: courier-unicode +%endif Version: @VERSION@ Release: 1%{?dist}%{?courier_release} License: GPLv3 @@ -11,10 +18,14 @@ BuildRequires: perl BuildRequires: gcc-c++ BuildRequires: %{__make} +%if 0%{?compat:1} + +%else %package devel Summary: Courier Unicode Library development files Group: Development/Libraries Requires: %{name} = 0:%{version}-%{release} +%endif %description This library implements several algorithms related to the Unicode @@ -24,13 +35,17 @@ This package installs only the run-time libraries needed by applications that use this library. Install the "courier-unicode-devel" package if you want to develop new applications using this library. +%if 0%{?compat:1} + +%else %description devel This package contains development files for the Courier Unicode Library. Install this package if you want to develop applications that uses this unicode library. +%endif %prep -%setup -q +%setup -q -n courier-unicode-@VERSION@ %configure %build %{__make} -s %{?_smp_mflags} @@ -39,12 +54,27 @@ unicode library. rm -rf $RPM_BUILD_ROOT %{__make} install DESTDIR=$RPM_BUILD_ROOT -%post -p /sbin/ldconfig -%postun -p /sbin/ldconfig +%if 0%{?compat:1} +find $RPM_BUILD_ROOT%{_libdir} -type l -print | xargs rm -f +rm -rf $RPM_BUILD_ROOT%{_includedir} +rm -f $RPM_BUILD_ROOT%{_libdir}/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la +rm -rf $RPM_BUILD_ROOT%{_datadir}/aclocal +rm -rf $RPM_BUILD_ROOT%{_mandir} +%endif %clean rm -rf $RPM_BUILD_ROOT +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + +%if 0%{?compat:1} +%files +%defattr(-,root,root,-) +%{_libdir}/*.so.* + +%else %files %defattr(-,root,root,-) @@ -58,6 +88,7 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/*.la %{_libdir}/*.a %{_datadir}/aclocal/*.m4 +%endif %changelog * Sun Jan 12 2014 Sam Varshavchik <mrsam@octopus.email-scan.com> - 1.0 diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index b97ec25..1aa4a88 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -2063,12 +2063,14 @@ void unicode_bidi_reorder(char32_t *p, level_run_layers_deinit(&layers); } -size_t unicode_bidi_cleanup(char32_t *string, - unicode_bidi_level_t *levels, - size_t n, - int cleanup_options, - void (*removed_callback)(size_t, void *), - void *arg) +static size_t unicode_bidi_count_or_cleanup(const char32_t *string, + char32_t *dest, + unicode_bidi_level_t *levels, + size_t n, + int cleanup_options, + void (*removed_callback)(size_t, + void *), + void *arg) { size_t i=0; for (size_t j=0; j<n; ++j) @@ -2090,13 +2092,34 @@ size_t unicode_bidi_cleanup(char32_t *string, if (levels) levels[i]=levels[j] & 1; - string[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL) - && cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j]; + if (dest) + dest[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL) + && cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j]; ++i; } return i; } +size_t unicode_bidi_cleanup(char32_t *string, + unicode_bidi_level_t *levels, + size_t n, + int cleanup_options, + void (*removed_callback)(size_t, void *), + void *arg) +{ + return unicode_bidi_count_or_cleanup(string, string, levels, n, + cleanup_options, removed_callback, + arg); +} + +size_t unicode_bidi_cleaned_size(const char32_t *string, + size_t n, + int cleanup_options) +{ + return unicode_bidi_count_or_cleanup(string, NULL, NULL, n, + cleanup_options, NULL, NULL); +} + void unicode_bidi_logical_order(char32_t *string, unicode_bidi_level_t *levels, size_t n, diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index e91f756..04d9879 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -949,3 +949,20 @@ unicode_bidi_direction unicode::bidi_get_direction(const std::u32string &string, return unicode_bidi_get_direction(string.c_str()+starting_pos, n); } + +std::u32string unicode::bidi_override(const std::u32string &s, + unicode_bidi_level_t direction, + int cleanup_options) +{ + std::u32string ret; + + ret.reserve(s.size()+1); + + ret.push_back(' '); + ret.insert(ret.end(), s.begin(), s.end()); + + bidi_cleanup(ret, [](size_t) {}, cleanup_options); + ret.at(0)=direction & 1 ? UNICODE_RLO : UNICODE_LRO; + + return ret; +} |
