diff options
Diffstat (limited to 'unicode')
| -rw-r--r-- | unicode/.gitignore | 1 | ||||
| -rw-r--r-- | unicode/ChangeLog | 22 | ||||
| -rw-r--r-- | unicode/Makefile.am | 60 | ||||
| -rw-r--r-- | unicode/biditest2.C | 80 | ||||
| -rw-r--r-- | unicode/book.xml | 238 | ||||
| -rw-r--r-- | unicode/configure.ac | 24 | ||||
| -rw-r--r-- | unicode/courier-unicode-version.m4.in | 45 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 65 | ||||
| -rw-r--r-- | unicode/courier-unicode.spec.in | 37 | ||||
| -rw-r--r-- | unicode/m4/courier-unicode.m4 | 41 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 131 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 71 |
12 files changed, 665 insertions, 150 deletions
diff --git a/unicode/.gitignore b/unicode/.gitignore index 8905e05..e749d69 100644 --- a/unicode/.gitignore +++ b/unicode/.gitignore @@ -26,6 +26,7 @@ /config.sub /courier-unicode.h /courier-unicode.spec +/courier-unicode-version.m4 /depcomp /docs.stamp /enttest diff --git a/unicode/ChangeLog b/unicode/ChangeLog index 378fede..35cffe6 100644 --- a/unicode/ChangeLog +++ b/unicode/ChangeLog @@ -1,3 +1,25 @@ +2021-02-24 Sam Varshavchik <mrsam@courier-mta.com> + + * Implement unicode_bidi_needs_embed(), unicode_bidi_cleaned_size(), + unicode::bidi_override, + +2.2.1 + +2021-02-14 Sam Varshavchik <mrsam@courier-mta.com> + + * unicode_bidi_calc and unicode_bidi_calc_levels return a + unicode_bidi_direction object, to indicate whether the + computer paragraph embedding level was explicitly computed or + defaulted. + +2021-02-13 Sam Varshavchik <mrsam@courier-mta.com> + + * courier-unicode-version.m4: split version checking macro into its + own m4 file. + + * unicode::bidi_calc - an empty string with an explicit embedding + level should return the requested embedding level. + 2.2 2020-11-05 Sam Varshavchik <mrsam@courier-mta.com> diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 7ba36f1..dc502b3 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -24,7 +24,7 @@ noinst_PROGRAMS=unicodetest graphemetest linebreaktest wordbreaktest \ enttest scripttest biditest biditest2 aclocaldir=$(datadir)/aclocal -aclocal_DATA=m4/courier-unicode.m4 +aclocal_DATA=m4/courier-unicode.m4 courier-unicode-version.m4 update-www: @$(MAKE) update-www-unicode @@ -89,42 +89,46 @@ include_HEADERS=courier-unicode.h \ man_MANS= \ $(srcdir)/man/courier-unicode.7 \ - $(srcdir)/man/unicode\:\:bidi.3 \ - $(srcdir)/man/unicode\:\:bidi_calc.3 \ - $(srcdir)/man/unicode\:\:bidi_calc_types.3 \ - $(srcdir)/man/unicode\:\:bidi_cleanup.3 \ - $(srcdir)/man/unicode\:\:bidi_embed.3 \ - $(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \ - $(srcdir)/man/unicode\:\:bidi_get_direction.3 \ - $(srcdir)/man/unicode\:\:bidi_logical_order.3 \ - $(srcdir)/man/unicode\:\:bidi_reorder.3 \ - $(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \ - $(srcdir)/man/unicode\:\:iconvert\:\:convert_tocase.3 \ - $(srcdir)/man/unicode\:\:iconvert\:\:fromu.3 \ - $(srcdir)/man/unicode\:\:iconvert\:\:tou.3 \ - $(srcdir)/man/unicode\:\:iso_8859_1.3 \ - $(srcdir)/man/unicode\:\:linebreak_callback_base.3 \ - $(srcdir)/man/unicode\:\:linebreak_callback_save_buf.3 \ - $(srcdir)/man/unicode\:\:linebreak_iter.3 \ - $(srcdir)/man/unicode\:\:linebreakc_callback_base.3 \ - $(srcdir)/man/unicode\:\:linebreakc_iter.3 \ - $(srcdir)/man/unicode\:\:tolower.3 \ - $(srcdir)/man/unicode\:\:toupper.3 \ - $(srcdir)/man/unicode\:\:ucs_2.3 \ - $(srcdir)/man/unicode\:\:ucs_4.3 \ - $(srcdir)/man/unicode\:\:utf_8.3 \ - $(srcdir)/man/unicode\:\:wordbreak_callback_base.3 \ + $(srcdir)/man/unicode[\:][\:]bidi.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_calc.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_calc_types.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_cleanup.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_embed.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_embed_paragraph_level.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_get_direction.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_logical_order.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_needs_embed.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_override.3 \ + $(srcdir)/man/unicode[\:][\:]bidi_reorder.3 \ + $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 \ + $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 \ + $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 \ + $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 \ + $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 \ + $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 \ + $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 \ + $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 \ + $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 \ + $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 \ + $(srcdir)/man/unicode[\:][\:]tolower.3 \ + $(srcdir)/man/unicode[\:][\:]toupper.3 \ + $(srcdir)/man/unicode[\:][\:]ucs_2.3 \ + $(srcdir)/man/unicode[\:][\:]ucs_4.3 \ + $(srcdir)/man/unicode[\:][\:]utf_8.3 \ + $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 \ $(srcdir)/man/unicode_bidi.3 \ $(srcdir)/man/unicode_bidi_bracket_type.3 \ $(srcdir)/man/unicode_bidi_calc.3 \ $(srcdir)/man/unicode_bidi_calc_levels.3 \ $(srcdir)/man/unicode_bidi_calc_types.3 \ + $(srcdir)/man/unicode_bidi_cleaned_size.3 \ $(srcdir)/man/unicode_bidi_cleanup.3 \ $(srcdir)/man/unicode_bidi_direction.3 \ $(srcdir)/man/unicode_bidi_embed.3 \ $(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \ $(srcdir)/man/unicode_bidi_logical_order.3 \ $(srcdir)/man/unicode_bidi_mirror.3 \ + $(srcdir)/man/unicode_bidi_needs_embed.3 \ $(srcdir)/man/unicode_bidi_reorder.3 \ $(srcdir)/man/unicode_bidi_setbnl.3 \ $(srcdir)/man/unicode_bidi_type.3 \ @@ -219,7 +223,7 @@ libcourier_unicode_la_SOURCES=\ bidi_mirroring.h \ unicode_categories.c -libcourier_unicode_la_LDFLAGS=-version-info 6:0:2 +libcourier_unicode_la_LDFLAGS=-version-info 7:0:0 EXTRA_DIST=$(noinst_SCRIPTS) $(man_MANS) $(PACKAGE).spec \ m4/courier-unicode.m4 \ @@ -444,7 +448,7 @@ docs.stamp: rm -f man/*.[123456789] mv man.tmp/* man rm -rf html.tmp man.tmp - perl -e '$$f=join("",<STDIN>); $$p=join("", map { " \\\n \$$(srcdir)/$$_" } glob("man/*.[123456789]")); $$p=~s/:/\\:/g; $$f =~ s/\nman_MANS=([^\n]|\n[^\n])*/\nman_MANS=$$p/s; print $$f' <Makefile.am >Makefile.am.new + perl -e '$$f=join("",<STDIN>); $$p=join("", map { " \\\n \$$(srcdir)/$$_" } glob("man/*.[123456789]")); $$p=~s/:/\[\\:\]/g; $$f =~ s/\nman_MANS=([^\n]|\n[^\n])*/\nman_MANS=$$p/s; print $$f' <Makefile.am >Makefile.am.new cmp Makefile.am Makefile.am.new || mv -f Makefile.am.new Makefile.am; rm -f Makefile.am.new touch docs.stamp diff --git a/unicode/biditest2.C b/unicode/biditest2.C index 8e9d7da..a14b3ea 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -204,14 +204,14 @@ void character_test() ? unicode::bidi_calc(s, direction) : unicode::bidi_calc(s); - if (std::get<1>(ret) != paragraph_embedding_level) + if (std::get<1>(ret).direction != paragraph_embedding_level) { std::cerr << "Regression, line " << linenum << ": expected " << paragraph_embedding_level << " paragraph embedding level, got " - << (int)std::get<1>(ret) + << (int)std::get<1>(ret).direction << std::endl; exit(1); } @@ -274,6 +274,9 @@ void character_test() std::reverse(b+index, b+index+n); }); + size_t cleaned_size=unicode_bidi_cleaned_size(s.c_str(), + s.size(), 0); + n=0; unicode::bidi_cleanup (s, levels, @@ -285,6 +288,17 @@ void character_test() ++n; }); + if (cleaned_size != s.size()) + { + std::cerr << "Regression, line " + << linenum + << ": default cleaned size" + << std::endl + << " Expected size: " << cleaned_size + << ", actual size: " << s.size() + << std::endl; + exit(1); + } if (render_order != actual_render_order) { std::cerr << "Regression, line " @@ -408,6 +422,12 @@ void character_test() } unicode::bidi_reorder(new_string, std::get<0>(ret)); + + cleaned_size=unicode_bidi_cleaned_size + (new_string.c_str(), + new_string.size(), + UNICODE_BIDI_CLEANUP_CANONICAL); + unicode::bidi_cleanup(new_string, std::get<0>(ret), [] @@ -416,6 +436,20 @@ void character_test() }, UNICODE_BIDI_CLEANUP_CANONICAL); + if (cleaned_size != new_string.size()) + { + std::cerr << "Regression, line " + << linenum + << ": canonoical cleaned size" + << std::endl + << " Expected size: " + << cleaned_size + << ", actual size: " + << new_string.size() + << std::endl; + exit(1); + } + /* New string is now back in logical order */ if (new_string == s && std::get<0>(ret) == levels) @@ -548,34 +582,62 @@ void null_character_test() }, UNICODE_BIDI_CLEANUP_EXTRA, 0, 3); + + s=U""; + res=unicode::bidi_calc(s, UNICODE_BIDI_RL); + + if (std::get<1>(res).direction != UNICODE_BIDI_RL) + { + std::cerr << "Paragraph embedding level not honored" + << std::endl; + exit(1); + } } void direction_test() { static const struct { - const char32_t *str; + std::u32string str; unicode_bidi_level_t direction; int is_explicit; + bool needs_embed; } tests[]={ { U"Hello", UNICODE_BIDI_LR, 1, + true, }, { U" ", UNICODE_BIDI_LR, 0, + true, }, { U"", UNICODE_BIDI_LR, 0, + true, }, { U"שלום", UNICODE_BIDI_RL, 1, + true, + }, + { + U"Helloש", + UNICODE_BIDI_LR, + 1, + true, + }, + { + U"Hello" + std::u32string{unicode::literals::LRO} + + U"ש", + UNICODE_BIDI_LR, + 1, + false, }, }; @@ -589,6 +651,18 @@ void direction_test() std::cerr << "direction_test failed\n"; exit(1); } + + std::u32string s=t.str; + auto levels=std::get<0>(unicode::bidi_calc(s, t.direction)); + unicode::bidi_reorder(s, levels); + unicode::bidi_cleanup(s, levels); + + if (unicode::bidi_needs_embed(s, levels, &t.direction) + != t.needs_embed) + { + std::cerr << "needs embed failed\n"; + exit(1); + } } } diff --git a/unicode/book.xml b/unicode/book.xml index c3ebc33..4f0fd71 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -99,11 +99,39 @@ See COPYING for distribution information. <para> Download the current version of the library from <ulink url="/download.html#unicode">https://www.courier-mta.org/download.html#unicode</ulink>. - After unpacking the tarball, run the configure script, which takes - the usual options, followed by <command>make</command>, then - <command>make install</command>. + Use the downloaded tarball to create an appropriate installation + package for your operating system distribution. + The typical sequence of commands is: </para> + <blockquote> + <informalexample> + <programlisting> +./configure # Takes the default configure script options +make +make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisting> + </informalexample> + </blockquote> + + <para> + The library uses a stock configure script, <command>make</command> + and <command>make install</command> command that respects the + <varname>DESTDIR</varname> setting to create an installation image + in the directory specified by <varname>DESTDIR</varname>. + </para> + + <note> + <para> + <command>make install</command> will not take any explicit action + to uninstall any older version of the library, or remove any files + from an older version that do not exist any more in the new version. + The library's installation image should be used to prepare an + installable package in a native package format for your operating + system distribution. Use your native system distribution's package + manager to properly install and uninstall the library's package. + </para> + </note> + <para> To use the library, <quote>#include <courier-unicode.h></quote> and link with <literal>-lcourier-unicode</literal>. @@ -306,7 +334,9 @@ See COPYING for distribution information. <refname>unicode_bidi_calc</refname> <refname>unicode_bidi_reorder</refname> <refname>unicode_bidi_cleanup</refname> + <refname>unicode_bidi_cleaned_size</refname> <refname>unicode_bidi_logical_order</refname> + <refname>unicode_bidi_needs_embed</refname> <refname>unicode_bidi_embed</refname> <refname>unicode_bidi_embed_paragraph_level</refname> @@ -331,7 +361,7 @@ See COPYING for distribution information. </funcprototype> <funcprototype> - <funcdef>void <function>unicode_bidi_calc_levels</function></funcdef> + <funcdef>struct unicode_bidi_direction <function>unicode_bidi_calc_levels</function></funcdef> <paramdef>const char32_t *<parameter>p</parameter></paramdef> <paramdef>const unicode_bidi_type_t *<parameter>types</parameter></paramdef> <paramdef>size_t <parameter>n</parameter></paramdef> @@ -340,7 +370,7 @@ See COPYING for distribution information. </funcprototype> <funcprototype> - <funcdef>void <function>unicode_bidi_calc</function></funcdef> + <funcdef>struct unicode_bidi_direction <function>unicode_bidi_calc</function></funcdef> <paramdef>const char32_t *<parameter>p</parameter></paramdef> <paramdef>size_t <parameter>n</parameter></paramdef> <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> @@ -367,7 +397,14 @@ See COPYING for distribution information. </funcprototype> <funcprototype> - <funcdef>size_t <function>unicode_bidi_logical_order</function></funcdef> + <funcdef>size_t <function>unicode_bidi_cleaned_size</function></funcdef> + <paramdef>const char32_t *<parameter>string</parameter></paramdef> + <paramdef>size_t <parameter>n</parameter></paramdef> + <paramdef>int <parameter>options</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>void <function>unicode_bidi_logical_order</function></funcdef> <paramdef>char32_t *<parameter>string</parameter></paramdef> <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> <paramdef>size_t <parameter>n</parameter></paramdef> @@ -377,6 +414,14 @@ See COPYING for distribution information. </funcprototype> <funcprototype> + <funcdef>int <function>unicode_bidi_needs_embed</function></funcdef> + <paramdef>const char32_t *<parameter>string</parameter></paramdef> + <paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef> + <paramdef>size_t <parameter>n</parameter></paramdef> + <paramdef>const unicode_bidi_level_t <parameter>*paragraph_embedding</parameter></paramdef> + </funcprototype> + + <funcprototype> <funcdef>size_t <function>unicode_bidi_embed</function></funcdef> <paramdef>const char32_t *<parameter>string</parameter></paramdef> <paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef> @@ -510,6 +555,9 @@ See COPYING for distribution information. to remove the characters from the string which are used by the bi-directional algorithm, and are not needed for rendering the text. + <function>unicode_bidi_cleaned_size</function>() is + available to determine, in advance, how many characters + will remain. </para> </listitem> </orderedlist> @@ -617,8 +665,46 @@ See COPYING for distribution information. return the resolved paragraph direction level, which always matches the passed in level, if specified, else it - reports the - derived one. + reports the derived one. These functions return a + <structname>unicode_bidi_direction</structname> structure: + </para> + + <informaltable border='0'> + <tgroup cols="3"> + <colspec colname='c1' /> + <colspec colname='c2' /> + <colspec colname='c3' /> + <tbody> + <row> + <entry namest='c1' nameend='c3'>struct <structname>unicode_bidi_direction</structname> {</entry> + </row> + <row> + <entry></entry> + <entry>unicode_bidi_level_t</entry> + <entry><varname>direction</varname>;</entry> + </row> + <row> + <entry></entry> + <entry>int</entry> + <entry><varname>is_explicit</varname>;</entry> + </row> + <row> + <entry namest='c1' nameend='c3'>};</entry> + </row> + </tbody> + </tgroup> + </informaltable> + <para> + <varname>direction</varname> gives the paragraph embedding + level, <literal>UNICODE_BIDI_LR</literal> or + <literal> UNICODE_BIDI_RL</literal>. + <varname>is_explicit</varname> indicates whether: + the optional pointer to a + <literal>UNICODE_BIDI_LR</literal> or + <literal>UNICODE_BIDI_RL</literal> value was specified (and + returned in <varname>direction</varname>), or whether the + <varname>direction</varname> comes from an character with an + explicit direction indication. </para> <para> @@ -794,18 +880,25 @@ See COPYING for distribution information. with the <literal>UNICODE_BIDI_CLEANUP_CANONICAL</literal> are in <quote>canonical rendering order</quote>. - <function>unicode_bidi_logical_order</function>() and + <function>unicode_bidi_logical_order</function>(), + <function>unicode_bidi_needs_embed</function>() and <function>unicode_bidi_embed</function>() require the canonical rendering order for their string and embedding level values. </para> + <para> + The parameters to <function>unicode_bidi_cleaned_size</function>() + are a pointer to the unicode string, its size, and + the bitmask option to <function>unicode_bidi_cleanup</function>(). + </para> </refsect2> <refsect2 id="unicode_bidi_embed"> <title>Embedding bi-directional markers in Unicode text strings</title> <para> - <function>unicode_bidi_logical_order</function>() and - <function>unicode_bidi_embed</function>() add various + <function>unicode_bidi_logical_order</function>() rearranges + the string from rendering to its logical order. + <function>unicode_bidi_embed</function>() adds various bi-directional markers to a Unicode string in canonical rendering order. The resulting string is not guaranteed to be identical to the @@ -819,12 +912,18 @@ See COPYING for distribution information. <function>unicode_bidi_cleanup()</function> (with the canonical option), with the same paragraph_embedding level. + <function>unicode_bidi_needs_embed</function>() attempts to + heuristically determine whether + <function>unicode_bidi_embed</function>() is required. </para> <para> <function>unicode_bidi_logical_order</function>() gets called first, followed by - <function>unicode_bidi_embed</function>(). + <function>unicode_bidi_embed</function>() + (or + <function>unicode_bidi_needs_embed</function>() in order to + determine whether bi-directional markers are required). Finally, <function>unicode_bidi_embed_paragraph_level</function>() optionally determines whether the resulting string's default paragraph embedding level matches the one used for the actual @@ -881,12 +980,12 @@ See COPYING for distribution information. <itemizedlist> <listitem> <para> - The Unicode string, and … + The Unicode string. </para> </listitem> <listitem> <para> - … the directional embedding buffer, in canonical + The directional embedding buffer, in canonical rendering order. </para> </listitem> @@ -998,6 +1097,53 @@ See COPYING for distribution information. </para> </listitem> </itemizedlist> + + <para> + <function>unicode_bidi_needs_embed</function>() attempts to + heuristically determine whether the Unicode string, in logical + order, requires bi-directional markers. + The parameters to + <function>unicode_bidi_embed_paragraph_level</function>() are: + </para> + <itemizedlist> + <listitem> + <para> + The Unicode string. + </para> + </listitem> + <listitem> + <para> + The directional embedding buffer, in logical + rendering order. + </para> + </listitem> + <listitem> + <para> + The size of the string and the embedding level buffer. + </para> + </listitem> + <listitem> + <para> + A pointer to an explicit paragraph embedding level, either + <literal>UNICODE_BIDI_LR</literal> or + <literal>UNICODE_BIDI_RL</literal>; or a + <literal>NULL</literal> pointer (see + <function>unicode_bidi_calc_types</function>()'s + explanation for this parameter). + </para> + </listitem> + </itemizedlist> + + <para> + <function>unicode_bidi_needs_embed</function>() returns 0 + if the Unicode string does not need explicit directional + markers, or 1 if it does. This is done by using + <function>unicode_bidi_calc()</function>, + <function>unicode_bidi_reorder()</function>, + <function>unicode_bidi_logical_order</function> and then + checking if the end result is different from what was passed + in. + </para> </refsect2> <refsect2 id="unicode_bidi_misc"> <title>Miscellaneous utility functions</title> @@ -2837,9 +2983,11 @@ See COPYING for distribution information. <refname>unicode::bidi_reorder</refname> <refname>unicode::bidi_cleanup</refname> <refname>unicode::bidi_logical_order</refname> + <refname>unicode::bidi_needs_embed</refname> <refname>unicode::bidi_embed</refname> <refname>unicode::bidi_embed_paragraph_level</refname> <refname>unicode::bidi_get_direction</refname> + <refname>unicode::bidi_override</refname> <refpurpose>unicode bi-directional algorithm</refpurpose> </refnamediv> @@ -2871,12 +3019,12 @@ See COPYING for distribution information. <funcsynopsis> <funcprototype> - <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> <function>unicode::bidi_calc</function></funcdef> + <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> <function>unicode::bidi_calc</function></funcdef> <paramdef>const unicode::bidi_calc_types &<parameter>ustring</parameter></paramdef> </funcprototype> <funcprototype> - <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> <function>unicode::bidi_calc</function></funcdef> + <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> <function>unicode::bidi_calc</function></funcdef> <paramdef>const unicode::bidi_calc_types &<parameter>ustring</parameter></paramdef> <paramdef>unicode_bidi_level_t <parameter>embedding_level</parameter></paramdef> </funcprototype> @@ -2943,6 +3091,15 @@ See COPYING for distribution information. </funcprototype> <funcprototype> + <funcdef>bool <function>unicode::bidi_needs_embed</function></funcdef> + <paramdef>const std::u32string &<parameter>string</parameter></paramdef> + <paramdef>const std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> + <paramdef>const unicode_bidi_level_t (<parameter>paragraph_embedding</parameter>=NULL</paramdef> + <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef> + <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef> + </funcprototype> + + <funcprototype> <funcdef>int <function>unicode::bidi_embed</function></funcdef> <paramdef>const std::u32string &<parameter>string</parameter></paramdef> <paramdef>const std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> @@ -2969,6 +3126,13 @@ See COPYING for distribution information. <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef> <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef> </funcprototype> + + <funcprototype> + <funcdef>std::u32string <function>bidi_override</function></funcdef> + <paramdef>const std::u32string &<parameter>string</parameter></paramdef> + <paramdef>unicode_bidi_level_t <parameter>direction</parameter></paramdef> + <paramdef>int <parameter>cleanup_options</parameter>=0</paramdef> + </funcprototype> </funcsynopsis> </refsynopsisdiv> @@ -2999,7 +3163,7 @@ See COPYING for distribution information. <programlisting><![CDATA[ std::u32string text; -auto [levels, level]=unicode::bidi_calc(text); +auto [levels, direction]=unicode::bidi_calc(text); ]]></programlisting> </informalexample> @@ -3022,7 +3186,7 @@ types.setbnl(text); // Optional // types.types is a std::vector of enum_bidi_types_t values -auto [levels, level]=unicode::bidi_calc(types); +auto [levels, direction]=unicode::bidi_calc(types); ]]></programlisting> </informalexample> @@ -3106,7 +3270,8 @@ auto [levels, level]=unicode::bidi_calc(types); <para> <function>unicode::bidi_reorder</function>, <function>unicode::bidi_cleanup</function>, - <function>unicode::bidi_logical_order</function> and + <function>unicode::bidi_logical_order</function>, + <function>unicode::bidi_needs_embed</function> and <function>unicode::bidi_get_direction</function> take two optional parameters (defaulted values or overloaded) specifying @@ -3124,6 +3289,41 @@ auto [levels, level]=unicode::bidi_calc(types); </para> </listitem> + <listitem> + <para> + <function>unicode::bidi_override</function> + modifies the passed-in <parameter>string</parameter> as + follows: + </para> + + <itemizedlist> + <listitem> + <para> + <function>unicode::bidi_cleanup</function>() is applied + with the specified, or defaulted, + <replaceable>cleanup_options</replaceable> + </para> + </listitem> + + <listitem> + <para> + Either the <literal>LRO</literal> or an + <literal>RLO</literal> override marker gets prepended + to the Unicode string, forcing the entire string to + be interpreted in a single rendering direction, when + processed by the Unicode bi-directional algorithm. + </para> + </listitem> + </itemizedlist> + + <para> + <function>unicode::bidi_override</function> makes it + possible to use a Unicode-aware application or algorithm + in a context that only works with text that's always + displayed in a fixed direction, allowing graceful handling + of input containing bi-directional text. + </para> + </listitem> </itemizedlist> <refsect2 id="unicode_cpp_bidi_literals"> diff --git a/unicode/configure.ac b/unicode/configure.ac index 5c366de..1cc3b76 100644 --- a/unicode/configure.ac +++ b/unicode/configure.ac @@ -1,6 +1,6 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT([courier-unicode], [2.2], [courier-users@lists.sourceforge.net]) +AC_INIT([courier-unicode], [2.2.1.20210220], [courier-users@lists.sourceforge.net]) >confdefs.h # Kill PACKAGE_ macros @@ -117,5 +117,25 @@ fi CFLAGS="-I.. -I$srcdir/.. $CFLAGS" CXXFLAGS="-I.. -I$srcdir/.. $CXXFLAGS" +set -- `echo "$VERSION" | tr '.' ' '` + +v=$1 +r=`echo "00"$2 | sed 's/.*(...)$/$1/'` +p=$3 + +if test "$p" = "" + then p="0" +fi + +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` + +HVERSION="$v$r$p" +AC_SUBST(HVERSION) + AM_CONDITIONAL(HAVE_DOCS,[test -f $srcdir/docbook/icon.gif]) -AC_OUTPUT(Makefile packaging/freebsd10/Makefile courier-unicode.spec courier-unicode.h) +AC_OUTPUT(Makefile + packaging/freebsd10/Makefile + courier-unicode.spec + courier-unicode.h + courier-unicode-version.m4 +) diff --git a/unicode/courier-unicode-version.m4.in b/unicode/courier-unicode-version.m4.in new file mode 100644 index 0000000..94b0c04 --- /dev/null +++ b/unicode/courier-unicode-version.m4.in @@ -0,0 +1,45 @@ +dnl Checks for the correct version of the courier-unicode library. + +AC_DEFUN([AX_COURIER_UNICODE_VERSION],[ + +AC_MSG_CHECKING(courier-unicode library and version) + +vers="$1" + +if test "$vers" = "" +then + vers=@VERSION@ +fi + +set -- `echo "$vers" | tr '.' ' '` + +v=$[]1 +r=`echo "00"$[]2 | sed 's/.*(...)$/$[]1/'` + +p=$[]3 + +if test "$p" = "" + then p="0" +fi + +p=`echo "00"$p | sed 's/.*(...)$/$[]1/'` + +AC_TRY_COMPILE([ +#include <courier-unicode.h> +#ifndef COURIER_UNICODE_VERSION +#define COURIER_UNICODE_VERSION 0 +#endif + +#if COURIER_UNICODE_VERSION < ]$v$r$p[ +#error "courier-unicode ]$vers[ library is required" +#endif + +],[],[], +AC_MSG_ERROR([ +ERROR: The Courier Unicode Library ]$vers[ header files appear not to be installed. +You may need to upgrade the library or install a separate development +subpackage in addition to the main package.]) +) + +AC_MSG_RESULT([ok]) +]) diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 57603da..2999ee3 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -41,7 +41,7 @@ typedef uint32_t char32_t; #endif #endif -#define COURIER_UNICODE_VERSION 220 +#define COURIER_UNICODE_VERSION @HVERSION@ /* ** The system default character set, from the locale. @@ -604,10 +604,10 @@ struct unicode_bidi_direction { struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *p, size_t n); -extern unicode_bidi_level_t unicode_bidi_calc(const char32_t *p, size_t n, - unicode_bidi_level_t *bufp, - const unicode_bidi_level_t * - initial_embedding_level); +struct unicode_bidi_direction unicode_bidi_calc(const char32_t *p, size_t n, + unicode_bidi_level_t *bufp, + const unicode_bidi_level_t * + initial_embedding_level); extern void unicode_bidi_reorder(char32_t *p, unicode_bidi_level_t *levels, @@ -652,13 +652,15 @@ extern void unicode_bidi_setbnl(char32_t *p, const enum_bidi_type_t *types, size_t n); -extern unicode_bidi_level_t unicode_bidi_calc_levels(const char32_t *p, - const enum_bidi_type_t - *types, - size_t n, - unicode_bidi_level_t *bufp, - const unicode_bidi_level_t - *initial_embedding_level); +extern struct unicode_bidi_direction +unicode_bidi_calc_levels(const char32_t *p, + const enum_bidi_type_t + *types, + size_t n, + unicode_bidi_level_t *bufp, + const unicode_bidi_level_t + *initial_embedding_level); + /* Bitmask options to unicode_bidi_cleanup */ /* @@ -703,6 +705,10 @@ extern size_t unicode_bidi_cleanup(char32_t *string, void (*removed_callback)(size_t, void *), void *); +extern size_t unicode_bidi_cleaned_size(const char32_t *string, + size_t n, + int options); + extern void unicode_bidi_logical_order(char32_t *string, unicode_bidi_level_t *levels, size_t n, @@ -711,6 +717,12 @@ extern void unicode_bidi_logical_order(char32_t *string, void *), void *arg); +extern int unicode_bidi_needs_embed(const char32_t *string, + const unicode_bidi_level_t *levels, + size_t n, + const unicode_bidi_level_t * + paragraph_embedding); + extern void unicode_bidi_embed(const char32_t *string, const unicode_bidi_level_t *levels, size_t n, @@ -2231,7 +2243,7 @@ struct bidi_calc_types { //! can be constructed explicitly, and then passed in directly. std::tuple<std::vector<unicode_bidi_level_t>, - unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s); + struct unicode_bidi_direction> bidi_calc(const bidi_calc_types &s); //! Calculate bidirectional embedding levels @@ -2242,8 +2254,8 @@ std::tuple<std::vector<unicode_bidi_level_t>, //! embedding level. std::tuple<std::vector<unicode_bidi_level_t>, - unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s, - unicode_bidi_level_t level); + struct unicode_bidi_direction> bidi_calc(const bidi_calc_types &s, + unicode_bidi_level_t level); //! Reorder bidirectional text @@ -2322,6 +2334,14 @@ void bidi_logical_order(std::vector<unicode_bidi_level_t> &levels, size_t starting_pos=0, size_t n=(size_t)-1); +//! Whether directional and isolation markers are needed. + +bool bidi_needs_embed(const std::u32string &string, + const std::vector<unicode_bidi_level_t> &levels, + const unicode_bidi_level_t *paragraph_embedding=0, + size_t starting_pos=0, + size_t n=(size_t)-1); + //! Embed directional and isolation markers //! Non-0 return value indicates the string and levels' sizes do not match. @@ -2352,14 +2372,19 @@ std::u32string bidi_embed(const std::u32string &string, //! In order for the unicode string to have the specified default //! paragraph embedding level. -extern char32_t bidi_embed_paragraph_level(const std::u32string &string, - unicode_bidi_level_t level); +char32_t bidi_embed_paragraph_level(const std::u32string &string, + unicode_bidi_level_t level); //! Compute default direction of text -extern unicode_bidi_direction bidi_get_direction(const std::u32string &string, - size_t starting_pos=0, - size_t n=(size_t)-1); +unicode_bidi_direction bidi_get_direction(const std::u32string &string, + size_t starting_pos=0, + size_t n=(size_t)-1); + +//! Override bidi direction. +std::u32string bidi_override(const std::u32string &s, + unicode_bidi_level_t direction, + int cleanup_options=0); #if 0 { diff --git a/unicode/courier-unicode.spec.in b/unicode/courier-unicode.spec.in index f7d1eb6..440d6f1 100644 --- a/unicode/courier-unicode.spec.in +++ b/unicode/courier-unicode.spec.in @@ -1,5 +1,12 @@ Summary: Courier Unicode Library +%if 0%{?compat:1} +Name: courier-unicode%(echo @VERSION@ | tr -d '.') + +%define __brp_ldconfig %{nil} + +%else Name: courier-unicode +%endif Version: @VERSION@ Release: 1%{?dist}%{?courier_release} License: GPLv3 @@ -11,10 +18,14 @@ BuildRequires: perl BuildRequires: gcc-c++ BuildRequires: %{__make} +%if 0%{?compat:1} + +%else %package devel Summary: Courier Unicode Library development files Group: Development/Libraries Requires: %{name} = 0:%{version}-%{release} +%endif %description This library implements several algorithms related to the Unicode @@ -24,13 +35,17 @@ This package installs only the run-time libraries needed by applications that use this library. Install the "courier-unicode-devel" package if you want to develop new applications using this library. +%if 0%{?compat:1} + +%else %description devel This package contains development files for the Courier Unicode Library. Install this package if you want to develop applications that uses this unicode library. +%endif %prep -%setup -q +%setup -q -n courier-unicode-@VERSION@ %configure %build %{__make} -s %{?_smp_mflags} @@ -39,12 +54,27 @@ unicode library. rm -rf $RPM_BUILD_ROOT %{__make} install DESTDIR=$RPM_BUILD_ROOT -%post -p /sbin/ldconfig -%postun -p /sbin/ldconfig +%if 0%{?compat:1} +find $RPM_BUILD_ROOT%{_libdir} -type l -print | xargs rm -f +rm -rf $RPM_BUILD_ROOT%{_includedir} +rm -f $RPM_BUILD_ROOT%{_libdir}/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la +rm -rf $RPM_BUILD_ROOT%{_datadir}/aclocal +rm -rf $RPM_BUILD_ROOT%{_mandir} +%endif %clean rm -rf $RPM_BUILD_ROOT +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + +%if 0%{?compat:1} +%files +%defattr(-,root,root,-) +%{_libdir}/*.so.* + +%else %files %defattr(-,root,root,-) @@ -58,6 +88,7 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/*.la %{_libdir}/*.a %{_datadir}/aclocal/*.m4 +%endif %changelog * Sun Jan 12 2014 Sam Varshavchik <mrsam@octopus.email-scan.com> - 1.0 diff --git a/unicode/m4/courier-unicode.m4 b/unicode/m4/courier-unicode.m4 index dadf321..673bc65 100644 --- a/unicode/m4/courier-unicode.m4 +++ b/unicode/m4/courier-unicode.m4 @@ -50,44 +50,3 @@ AC_MSG_ERROR([*** A compiler with C++11 Unicode support was not found]) CXXFLAGS="$save_FLAGS" AC_LANG_POP([C++]) ]) - -AC_DEFUN([AX_COURIER_UNICODE_VERSION],[ - -AC_MSG_CHECKING(courier-unicode library and version) - -v="$1" - -if test "$v" = "" -then - v=2.2 -fi - -set -- `echo "$v" | tr '.' ' '` - -v=$[]1 -r=$[]2 -p=$[]3 - -if test "$p" = "" - then p="0" -fi - -AC_TRY_COMPILE([ -#include <courier-unicode.h> -#ifndef COURIER_UNICODE_VERSION -#define COURIER_UNICODE_VERSION 0 -#endif - -#if COURIER_UNICODE_VERSION < ]$v$r$p[ -#error "courier-unicode ]$1[ library is required" -#endif - -],[],[], -AC_MSG_ERROR([ -ERROR: The Courier Unicode Library ]$1[ header files appear not to be installed. -You may need to upgrade the library or install a separate development -subpackage in addition to the main package.]) -) - -AC_MSG_RESULT([ok]) -]) diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index b23b833..772f9fe 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -464,7 +464,7 @@ struct directional_status_stack_entry { typedef struct { struct directional_status_stack_entry *head; - unicode_bidi_level_t paragraph_embedding_level; + struct unicode_bidi_direction paragraph_embedding_level; const char32_t *chars; enum_bidi_type_t *types; const enum_bidi_type_t *orig_types; @@ -618,7 +618,7 @@ get_enum_bidi_type_for_paragraph_embedding_level(size_t i, return p->p[i]; } -static unicode_bidi_level_t +static struct unicode_bidi_direction compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p, size_t i, size_t j) { @@ -628,7 +628,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p, return compute_paragraph_embedding_level (i, j, get_enum_bidi_type_for_paragraph_embedding_level, - &info).direction; + &info); } static directional_status_stack_t @@ -642,10 +642,18 @@ directional_status_stack_init(const char32_t *chars, stack=(directional_status_stack_t)calloc(1, sizeof(*stack)); - stack->paragraph_embedding_level= - initial_embedding_level - ? *initial_embedding_level & 1 - : compute_paragraph_embedding_level_from_types(types, 0, n); + if (initial_embedding_level) + { + stack->paragraph_embedding_level.direction= + *initial_embedding_level & 1; + stack->paragraph_embedding_level.is_explicit=1; + } + else + { + stack->paragraph_embedding_level= + compute_paragraph_embedding_level_from_types(types, + 0, n); + } stack->chars=chars; stack->orig_types=types; @@ -666,7 +674,8 @@ directional_status_stack_init(const char32_t *chars, stack->size=n; directional_status_stack_push(stack, - stack->paragraph_embedding_level, + stack->paragraph_embedding_level + .direction, do_neutral, 0); return stack; @@ -736,7 +745,7 @@ void unicode_bidi_setbnl(char32_t *p, } } -unicode_bidi_level_t +struct unicode_bidi_direction unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, const unicode_bidi_level_t *initial_embedding_level) { @@ -748,7 +757,7 @@ unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, unicode_bidi_calc_types(p, n, buf); - unicode_bidi_level_t level= + struct unicode_bidi_direction level= unicode_bidi_calc_levels(p, buf, n, @@ -762,7 +771,7 @@ unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, static void unicode_bidi_cl(directional_status_stack_t stack); -unicode_bidi_level_t +struct unicode_bidi_direction unicode_bidi_calc_levels(const char32_t *p, const enum_bidi_type_t *types, size_t n, @@ -779,12 +788,12 @@ unicode_bidi_calc_levels(const char32_t *p, stack=directional_status_stack_init(p, types, n, bufp, initial_embedding_level); - unicode_bidi_level_t paragraph_embedding_level= + struct unicode_bidi_direction paragraph_embedding_level= stack->paragraph_embedding_level; #ifdef BIDI_DEBUG fprintf(DEBUGDUMP, "BIDI: START: Paragraph embedding level: %d\n", - (int)stack->paragraph_embedding_level); + (int)paragraph_embedding_level.direction); #endif unicode_bidi_cl(stack); @@ -971,7 +980,8 @@ static void unicode_bidi_cl(directional_status_stack_t stack) } cur_class=compute_paragraph_embedding_level_from_types - (stack->types, i+1, j) == 1 + (stack->types, i+1, j).direction + != UNICODE_BIDI_LR ? UNICODE_BIDI_TYPE_RLI : UNICODE_BIDI_TYPE_LRI; } @@ -1104,7 +1114,8 @@ static void unicode_bidi_cl(directional_status_stack_t stack) { /* X8 */ - stack->levels[i]=stack->paragraph_embedding_level; + stack->levels[i]= + stack->paragraph_embedding_level.direction; } } @@ -1203,9 +1214,9 @@ static void unicode_bidi_cl(directional_status_stack_t stack) continue; /* Edge case */ unicode_bidi_level_t before= - stack->paragraph_embedding_level; + stack->paragraph_embedding_level.direction; unicode_bidi_level_t after= - stack->paragraph_embedding_level; + stack->paragraph_embedding_level.direction; size_t first_i=beg_iter.i; @@ -1301,11 +1312,11 @@ static void unicode_bidi_cl(directional_status_stack_t stack) case UNICODE_BIDI_TYPE_PDI: if (seen_sb) stack->levels[i]= - stack->paragraph_embedding_level; + stack->paragraph_embedding_level.direction; break; case UNICODE_BIDI_TYPE_S: case UNICODE_BIDI_TYPE_B: - stack->levels[i]=stack->paragraph_embedding_level; + stack->levels[i]=stack->paragraph_embedding_level.direction; seen_sb=1; break; default: @@ -2052,12 +2063,14 @@ void unicode_bidi_reorder(char32_t *p, level_run_layers_deinit(&layers); } -size_t unicode_bidi_cleanup(char32_t *string, - unicode_bidi_level_t *levels, - size_t n, - int cleanup_options, - void (*removed_callback)(size_t, void *), - void *arg) +static size_t unicode_bidi_count_or_cleanup(const char32_t *string, + char32_t *dest, + unicode_bidi_level_t *levels, + size_t n, + int cleanup_options, + void (*removed_callback)(size_t, + void *), + void *arg) { size_t i=0; for (size_t j=0; j<n; ++j) @@ -2079,13 +2092,34 @@ size_t unicode_bidi_cleanup(char32_t *string, if (levels) levels[i]=levels[j] & 1; - string[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL) - && cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j]; + if (dest) + dest[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL) + && cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j]; ++i; } return i; } +size_t unicode_bidi_cleanup(char32_t *string, + unicode_bidi_level_t *levels, + size_t n, + int cleanup_options, + void (*removed_callback)(size_t, void *), + void *arg) +{ + return unicode_bidi_count_or_cleanup(string, string, levels, n, + cleanup_options, removed_callback, + arg); +} + +size_t unicode_bidi_cleaned_size(const char32_t *string, + size_t n, + int cleanup_options) +{ + return unicode_bidi_count_or_cleanup(string, NULL, NULL, n, + cleanup_options, NULL, NULL); +} + void unicode_bidi_logical_order(char32_t *string, unicode_bidi_level_t *levels, size_t n, @@ -2276,6 +2310,49 @@ static void emit_marker(struct bidi_embed_levelrun *p, } } +int unicode_bidi_needs_embed(const char32_t *string, + const unicode_bidi_level_t *levels, + size_t n, + const unicode_bidi_level_t *paragraph_level) +{ + char32_t *string_cpy=(char32_t *)malloc(n * sizeof(char32_t)); + unicode_bidi_level_t *levels_cpy=(unicode_bidi_level_t *) + malloc(n * sizeof(unicode_bidi_level_t)); + size_t nn; + int ret; + + if (!string_cpy || !levels_cpy) + abort(); + + memcpy(string_cpy, string, n * sizeof(char32_t)); + + struct unicode_bidi_direction direction= + unicode_bidi_calc(string_cpy, n, + levels_cpy, paragraph_level); + + unicode_bidi_reorder(string_cpy, levels_cpy, n, NULL, NULL); + nn=unicode_bidi_cleanup(string_cpy, levels_cpy, n, 0, + NULL, NULL); + + ret=0; + if (n == nn && (paragraph_level == NULL || + direction.direction == *paragraph_level)) + { + unicode_bidi_logical_order(string_cpy, levels_cpy, nn, + direction.direction, + NULL, NULL); + if (memcmp(string_cpy, string, n * sizeof(char32_t)) == 0 && + memcmp(levels_cpy, levels, n * sizeof(unicode_bidi_level_t)) + == 0) + { + ret=1; + } + } + free(string_cpy); + free(levels_cpy); + return ret; +} + void unicode_bidi_embed(const char32_t *string, const unicode_bidi_level_t *levels, size_t n, diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index babb6bb..7bb6edc 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -581,18 +581,29 @@ void unicode::bidi_calc_types::setbnl(std::u32string &s) unicode_bidi_setbnl(&s[0], &types[0], s.size()); } -std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> +std::tuple<std::vector<unicode_bidi_level_t>, + struct unicode_bidi_direction> unicode::bidi_calc(const bidi_calc_types &s) { return unicode::bidi_calc(s, UNICODE_BIDI_SKIP); } -std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> +std::tuple<std::vector<unicode_bidi_level_t>, + struct unicode_bidi_direction> unicode::bidi_calc(const bidi_calc_types &st, unicode_bidi_level_t paragraph_embedding_level) { + std::tuple<std::vector<unicode_bidi_level_t>, + struct unicode_bidi_direction> + ret; + auto &direction_ret=std::get<1>(ret); + if (st.s.size() != st.types.size()) - return { {}, UNICODE_BIDI_LR }; + { + direction_ret.direction=UNICODE_BIDI_LR; + direction_ret.is_explicit=false; + return ret; + } const unicode_bidi_level_t *initial_embedding_level=0; @@ -602,11 +613,17 @@ unicode::bidi_calc(const bidi_calc_types &st, initial_embedding_level=¶graph_embedding_level; } - std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> - ret; - std::get<0>(ret).resize(st.s.size()); - std::get<1>(ret)=UNICODE_BIDI_LR; + + if (initial_embedding_level) + { + direction_ret.direction=paragraph_embedding_level; + direction_ret.is_explicit=1; + } + else + { + direction_ret.direction= UNICODE_BIDI_LR; + } if (st.s.size()) { @@ -932,3 +949,43 @@ unicode_bidi_direction unicode::bidi_get_direction(const std::u32string &string, return unicode_bidi_get_direction(string.c_str()+starting_pos, n); } + +bool unicode::bidi_needs_embed(const std::u32string &string, + const std::vector<unicode_bidi_level_t> &levels, + const unicode_bidi_level_t *paragraph_embedding, + size_t starting_pos, + size_t n) +{ + if (string.size() != levels.size()) + return false; + + auto s=levels.size(); + + if (starting_pos >= s) + return false; + + if (n > s-starting_pos) + n=s-starting_pos; + + return unicode_bidi_needs_embed(string.c_str(), + n == 0 ? NULL : &levels[starting_pos], + n, + paragraph_embedding) != 0; +} + +std::u32string unicode::bidi_override(const std::u32string &s, + unicode_bidi_level_t direction, + int cleanup_options) +{ + std::u32string ret; + + ret.reserve(s.size()+1); + + ret.push_back(' '); + ret.insert(ret.end(), s.begin(), s.end()); + + bidi_cleanup(ret, [](size_t) {}, cleanup_options); + ret.at(0)=direction & 1 ? UNICODE_RLO : UNICODE_LRO; + + return ret; +} |
