summaryrefslogtreecommitdiffstats
path: root/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'unicode')
-rw-r--r--unicode/.gitignore1
-rw-r--r--unicode/ChangeLog22
-rw-r--r--unicode/Makefile.am60
-rw-r--r--unicode/biditest2.C80
-rw-r--r--unicode/book.xml238
-rw-r--r--unicode/configure.ac24
-rw-r--r--unicode/courier-unicode-version.m4.in45
-rw-r--r--unicode/courier-unicode.h.in65
-rw-r--r--unicode/courier-unicode.spec.in37
-rw-r--r--unicode/m4/courier-unicode.m441
-rw-r--r--unicode/unicode_bidi.c131
-rw-r--r--unicode/unicodecpp.C71
12 files changed, 665 insertions, 150 deletions
diff --git a/unicode/.gitignore b/unicode/.gitignore
index 8905e05..e749d69 100644
--- a/unicode/.gitignore
+++ b/unicode/.gitignore
@@ -26,6 +26,7 @@
/config.sub
/courier-unicode.h
/courier-unicode.spec
+/courier-unicode-version.m4
/depcomp
/docs.stamp
/enttest
diff --git a/unicode/ChangeLog b/unicode/ChangeLog
index 378fede..35cffe6 100644
--- a/unicode/ChangeLog
+++ b/unicode/ChangeLog
@@ -1,3 +1,25 @@
+2021-02-24 Sam Varshavchik <mrsam@courier-mta.com>
+
+ * Implement unicode_bidi_needs_embed(), unicode_bidi_cleaned_size(),
+ unicode::bidi_override,
+
+2.2.1
+
+2021-02-14 Sam Varshavchik <mrsam@courier-mta.com>
+
+ * unicode_bidi_calc and unicode_bidi_calc_levels return a
+ unicode_bidi_direction object, to indicate whether the
+ computer paragraph embedding level was explicitly computed or
+ defaulted.
+
+2021-02-13 Sam Varshavchik <mrsam@courier-mta.com>
+
+ * courier-unicode-version.m4: split version checking macro into its
+ own m4 file.
+
+ * unicode::bidi_calc - an empty string with an explicit embedding
+ level should return the requested embedding level.
+
2.2
2020-11-05 Sam Varshavchik <mrsam@courier-mta.com>
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index 7ba36f1..dc502b3 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -24,7 +24,7 @@ noinst_PROGRAMS=unicodetest graphemetest linebreaktest wordbreaktest \
enttest scripttest biditest biditest2
aclocaldir=$(datadir)/aclocal
-aclocal_DATA=m4/courier-unicode.m4
+aclocal_DATA=m4/courier-unicode.m4 courier-unicode-version.m4
update-www:
@$(MAKE) update-www-unicode
@@ -89,42 +89,46 @@ include_HEADERS=courier-unicode.h \
man_MANS= \
$(srcdir)/man/courier-unicode.7 \
- $(srcdir)/man/unicode\:\:bidi.3 \
- $(srcdir)/man/unicode\:\:bidi_calc.3 \
- $(srcdir)/man/unicode\:\:bidi_calc_types.3 \
- $(srcdir)/man/unicode\:\:bidi_cleanup.3 \
- $(srcdir)/man/unicode\:\:bidi_embed.3 \
- $(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \
- $(srcdir)/man/unicode\:\:bidi_get_direction.3 \
- $(srcdir)/man/unicode\:\:bidi_logical_order.3 \
- $(srcdir)/man/unicode\:\:bidi_reorder.3 \
- $(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \
- $(srcdir)/man/unicode\:\:iconvert\:\:convert_tocase.3 \
- $(srcdir)/man/unicode\:\:iconvert\:\:fromu.3 \
- $(srcdir)/man/unicode\:\:iconvert\:\:tou.3 \
- $(srcdir)/man/unicode\:\:iso_8859_1.3 \
- $(srcdir)/man/unicode\:\:linebreak_callback_base.3 \
- $(srcdir)/man/unicode\:\:linebreak_callback_save_buf.3 \
- $(srcdir)/man/unicode\:\:linebreak_iter.3 \
- $(srcdir)/man/unicode\:\:linebreakc_callback_base.3 \
- $(srcdir)/man/unicode\:\:linebreakc_iter.3 \
- $(srcdir)/man/unicode\:\:tolower.3 \
- $(srcdir)/man/unicode\:\:toupper.3 \
- $(srcdir)/man/unicode\:\:ucs_2.3 \
- $(srcdir)/man/unicode\:\:ucs_4.3 \
- $(srcdir)/man/unicode\:\:utf_8.3 \
- $(srcdir)/man/unicode\:\:wordbreak_callback_base.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_calc.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_calc_types.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_cleanup.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_embed.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_embed_paragraph_level.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_get_direction.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_logical_order.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_needs_embed.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_override.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_reorder.3 \
+ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 \
+ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 \
+ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 \
+ $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 \
+ $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 \
+ $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 \
+ $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 \
+ $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 \
+ $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 \
+ $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 \
+ $(srcdir)/man/unicode[\:][\:]tolower.3 \
+ $(srcdir)/man/unicode[\:][\:]toupper.3 \
+ $(srcdir)/man/unicode[\:][\:]ucs_2.3 \
+ $(srcdir)/man/unicode[\:][\:]ucs_4.3 \
+ $(srcdir)/man/unicode[\:][\:]utf_8.3 \
+ $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 \
$(srcdir)/man/unicode_bidi.3 \
$(srcdir)/man/unicode_bidi_bracket_type.3 \
$(srcdir)/man/unicode_bidi_calc.3 \
$(srcdir)/man/unicode_bidi_calc_levels.3 \
$(srcdir)/man/unicode_bidi_calc_types.3 \
+ $(srcdir)/man/unicode_bidi_cleaned_size.3 \
$(srcdir)/man/unicode_bidi_cleanup.3 \
$(srcdir)/man/unicode_bidi_direction.3 \
$(srcdir)/man/unicode_bidi_embed.3 \
$(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \
$(srcdir)/man/unicode_bidi_logical_order.3 \
$(srcdir)/man/unicode_bidi_mirror.3 \
+ $(srcdir)/man/unicode_bidi_needs_embed.3 \
$(srcdir)/man/unicode_bidi_reorder.3 \
$(srcdir)/man/unicode_bidi_setbnl.3 \
$(srcdir)/man/unicode_bidi_type.3 \
@@ -219,7 +223,7 @@ libcourier_unicode_la_SOURCES=\
bidi_mirroring.h \
unicode_categories.c
-libcourier_unicode_la_LDFLAGS=-version-info 6:0:2
+libcourier_unicode_la_LDFLAGS=-version-info 7:0:0
EXTRA_DIST=$(noinst_SCRIPTS) $(man_MANS) $(PACKAGE).spec \
m4/courier-unicode.m4 \
@@ -444,7 +448,7 @@ docs.stamp:
rm -f man/*.[123456789]
mv man.tmp/* man
rm -rf html.tmp man.tmp
- perl -e '$$f=join("",<STDIN>); $$p=join("", map { " \\\n \$$(srcdir)/$$_" } glob("man/*.[123456789]")); $$p=~s/:/\\:/g; $$f =~ s/\nman_MANS=([^\n]|\n[^\n])*/\nman_MANS=$$p/s; print $$f' <Makefile.am >Makefile.am.new
+ perl -e '$$f=join("",<STDIN>); $$p=join("", map { " \\\n \$$(srcdir)/$$_" } glob("man/*.[123456789]")); $$p=~s/:/\[\\:\]/g; $$f =~ s/\nman_MANS=([^\n]|\n[^\n])*/\nman_MANS=$$p/s; print $$f' <Makefile.am >Makefile.am.new
cmp Makefile.am Makefile.am.new || mv -f Makefile.am.new Makefile.am; rm -f Makefile.am.new
touch docs.stamp
diff --git a/unicode/biditest2.C b/unicode/biditest2.C
index 8e9d7da..a14b3ea 100644
--- a/unicode/biditest2.C
+++ b/unicode/biditest2.C
@@ -204,14 +204,14 @@ void character_test()
? unicode::bidi_calc(s, direction)
: unicode::bidi_calc(s);
- if (std::get<1>(ret) != paragraph_embedding_level)
+ if (std::get<1>(ret).direction != paragraph_embedding_level)
{
std::cerr << "Regression, line "
<< linenum
<< ": expected "
<< paragraph_embedding_level
<< " paragraph embedding level, got "
- << (int)std::get<1>(ret)
+ << (int)std::get<1>(ret).direction
<< std::endl;
exit(1);
}
@@ -274,6 +274,9 @@ void character_test()
std::reverse(b+index, b+index+n);
});
+ size_t cleaned_size=unicode_bidi_cleaned_size(s.c_str(),
+ s.size(), 0);
+
n=0;
unicode::bidi_cleanup
(s, levels,
@@ -285,6 +288,17 @@ void character_test()
++n;
});
+ if (cleaned_size != s.size())
+ {
+ std::cerr << "Regression, line "
+ << linenum
+ << ": default cleaned size"
+ << std::endl
+ << " Expected size: " << cleaned_size
+ << ", actual size: " << s.size()
+ << std::endl;
+ exit(1);
+ }
if (render_order != actual_render_order)
{
std::cerr << "Regression, line "
@@ -408,6 +422,12 @@ void character_test()
}
unicode::bidi_reorder(new_string, std::get<0>(ret));
+
+ cleaned_size=unicode_bidi_cleaned_size
+ (new_string.c_str(),
+ new_string.size(),
+ UNICODE_BIDI_CLEANUP_CANONICAL);
+
unicode::bidi_cleanup(new_string,
std::get<0>(ret),
[]
@@ -416,6 +436,20 @@ void character_test()
},
UNICODE_BIDI_CLEANUP_CANONICAL);
+ if (cleaned_size != new_string.size())
+ {
+ std::cerr << "Regression, line "
+ << linenum
+ << ": canonoical cleaned size"
+ << std::endl
+ << " Expected size: "
+ << cleaned_size
+ << ", actual size: "
+ << new_string.size()
+ << std::endl;
+ exit(1);
+ }
+
/* New string is now back in logical order */
if (new_string == s && std::get<0>(ret) == levels)
@@ -548,34 +582,62 @@ void null_character_test()
},
UNICODE_BIDI_CLEANUP_EXTRA,
0, 3);
+
+ s=U"";
+ res=unicode::bidi_calc(s, UNICODE_BIDI_RL);
+
+ if (std::get<1>(res).direction != UNICODE_BIDI_RL)
+ {
+ std::cerr << "Paragraph embedding level not honored"
+ << std::endl;
+ exit(1);
+ }
}
void direction_test()
{
static const struct {
- const char32_t *str;
+ std::u32string str;
unicode_bidi_level_t direction;
int is_explicit;
+ bool needs_embed;
} tests[]={
{
U"Hello",
UNICODE_BIDI_LR,
1,
+ true,
},
{
U" ",
UNICODE_BIDI_LR,
0,
+ true,
},
{
U"",
UNICODE_BIDI_LR,
0,
+ true,
},
{
U"שלום",
UNICODE_BIDI_RL,
1,
+ true,
+ },
+ {
+ U"Helloש",
+ UNICODE_BIDI_LR,
+ 1,
+ true,
+ },
+ {
+ U"Hello" + std::u32string{unicode::literals::LRO}
+ + U"ש",
+ UNICODE_BIDI_LR,
+ 1,
+ false,
},
};
@@ -589,6 +651,18 @@ void direction_test()
std::cerr << "direction_test failed\n";
exit(1);
}
+
+ std::u32string s=t.str;
+ auto levels=std::get<0>(unicode::bidi_calc(s, t.direction));
+ unicode::bidi_reorder(s, levels);
+ unicode::bidi_cleanup(s, levels);
+
+ if (unicode::bidi_needs_embed(s, levels, &t.direction)
+ != t.needs_embed)
+ {
+ std::cerr << "needs embed failed\n";
+ exit(1);
+ }
}
}
diff --git a/unicode/book.xml b/unicode/book.xml
index c3ebc33..4f0fd71 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -99,11 +99,39 @@ See COPYING for distribution information.
<para>
Download the current version of the library from
<ulink url="/download.html#unicode">https://www.courier-mta.org/download.html#unicode</ulink>.
- After unpacking the tarball, run the configure script, which takes
- the usual options, followed by <command>make</command>, then
- <command>make install</command>.
+ Use the downloaded tarball to create an appropriate installation
+ package for your operating system distribution.
+ The typical sequence of commands is:
</para>
+ <blockquote>
+ <informalexample>
+ <programlisting>
+./configure # Takes the default configure script options
+make
+make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisting>
+ </informalexample>
+ </blockquote>
+
+ <para>
+ The library uses a stock configure script, <command>make</command>
+ and <command>make install</command> command that respects the
+ <varname>DESTDIR</varname> setting to create an installation image
+ in the directory specified by <varname>DESTDIR</varname>.
+ </para>
+
+ <note>
+ <para>
+ <command>make install</command> will not take any explicit action
+ to uninstall any older version of the library, or remove any files
+ from an older version that do not exist any more in the new version.
+ The library's installation image should be used to prepare an
+ installable package in a native package format for your operating
+ system distribution. Use your native system distribution's package
+ manager to properly install and uninstall the library's package.
+ </para>
+ </note>
+
<para>
To use the library, <quote>#include &lt;courier-unicode.h&gt;</quote> and link
with <literal>-lcourier-unicode</literal>.
@@ -306,7 +334,9 @@ See COPYING for distribution information.
<refname>unicode_bidi_calc</refname>
<refname>unicode_bidi_reorder</refname>
<refname>unicode_bidi_cleanup</refname>
+ <refname>unicode_bidi_cleaned_size</refname>
<refname>unicode_bidi_logical_order</refname>
+ <refname>unicode_bidi_needs_embed</refname>
<refname>unicode_bidi_embed</refname>
<refname>unicode_bidi_embed_paragraph_level</refname>
@@ -331,7 +361,7 @@ See COPYING for distribution information.
</funcprototype>
<funcprototype>
- <funcdef>void <function>unicode_bidi_calc_levels</function></funcdef>
+ <funcdef>struct unicode_bidi_direction <function>unicode_bidi_calc_levels</function></funcdef>
<paramdef>const char32_t *<parameter>p</parameter></paramdef>
<paramdef>const unicode_bidi_type_t *<parameter>types</parameter></paramdef>
<paramdef>size_t <parameter>n</parameter></paramdef>
@@ -340,7 +370,7 @@ See COPYING for distribution information.
</funcprototype>
<funcprototype>
- <funcdef>void <function>unicode_bidi_calc</function></funcdef>
+ <funcdef>struct unicode_bidi_direction <function>unicode_bidi_calc</function></funcdef>
<paramdef>const char32_t *<parameter>p</parameter></paramdef>
<paramdef>size_t <parameter>n</parameter></paramdef>
<paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
@@ -367,7 +397,14 @@ See COPYING for distribution information.
</funcprototype>
<funcprototype>
- <funcdef>size_t <function>unicode_bidi_logical_order</function></funcdef>
+ <funcdef>size_t <function>unicode_bidi_cleaned_size</function></funcdef>
+ <paramdef>const char32_t *<parameter>string</parameter></paramdef>
+ <paramdef>size_t <parameter>n</parameter></paramdef>
+ <paramdef>int <parameter>options</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>void <function>unicode_bidi_logical_order</function></funcdef>
<paramdef>char32_t *<parameter>string</parameter></paramdef>
<paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
<paramdef>size_t <parameter>n</parameter></paramdef>
@@ -377,6 +414,14 @@ See COPYING for distribution information.
</funcprototype>
<funcprototype>
+ <funcdef>int <function>unicode_bidi_needs_embed</function></funcdef>
+ <paramdef>const char32_t *<parameter>string</parameter></paramdef>
+ <paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+ <paramdef>size_t <parameter>n</parameter></paramdef>
+ <paramdef>const unicode_bidi_level_t <parameter>*paragraph_embedding</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
<funcdef>size_t <function>unicode_bidi_embed</function></funcdef>
<paramdef>const char32_t *<parameter>string</parameter></paramdef>
<paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
@@ -510,6 +555,9 @@ See COPYING for distribution information.
to remove the characters from the string which are used
by the bi-directional algorithm, and are not needed for
rendering the text.
+ <function>unicode_bidi_cleaned_size</function>() is
+ available to determine, in advance, how many characters
+ will remain.
</para>
</listitem>
</orderedlist>
@@ -617,8 +665,46 @@ See COPYING for distribution information.
return the resolved
paragraph direction level, which
always matches the passed in level, if specified, else it
- reports the
- derived one.
+ reports the derived one. These functions return a
+ <structname>unicode_bidi_direction</structname> structure:
+ </para>
+
+ <informaltable border='0'>
+ <tgroup cols="3">
+ <colspec colname='c1' />
+ <colspec colname='c2' />
+ <colspec colname='c3' />
+ <tbody>
+ <row>
+ <entry namest='c1' nameend='c3'>struct&nbsp;<structname>unicode_bidi_direction</structname>&nbsp;{</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>unicode_bidi_level_t</entry>
+ <entry><varname>direction</varname>;</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>int</entry>
+ <entry><varname>is_explicit</varname>;</entry>
+ </row>
+ <row>
+ <entry namest='c1' nameend='c3'>};</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ <para>
+ <varname>direction</varname> gives the paragraph embedding
+ level, <literal>UNICODE_BIDI_LR</literal> or
+ <literal> UNICODE_BIDI_RL</literal>.
+ <varname>is_explicit</varname> indicates whether:
+ the optional pointer to a
+ <literal>UNICODE_BIDI_LR</literal> or
+ <literal>UNICODE_BIDI_RL</literal> value was specified (and
+ returned in <varname>direction</varname>), or whether the
+ <varname>direction</varname> comes from an character with an
+ explicit direction indication.
</para>
<para>
@@ -794,18 +880,25 @@ See COPYING for distribution information.
with the <literal>UNICODE_BIDI_CLEANUP_CANONICAL</literal>
are in
<quote>canonical rendering order</quote>.
- <function>unicode_bidi_logical_order</function>() and
+ <function>unicode_bidi_logical_order</function>(),
+ <function>unicode_bidi_needs_embed</function>() and
<function>unicode_bidi_embed</function>() require the
canonical rendering order for their string and embedding level
values.
</para>
+ <para>
+ The parameters to <function>unicode_bidi_cleaned_size</function>()
+ are a pointer to the unicode string, its size, and
+ the bitmask option to <function>unicode_bidi_cleanup</function>().
+ </para>
</refsect2>
<refsect2 id="unicode_bidi_embed">
<title>Embedding bi-directional markers in Unicode text strings</title>
<para>
- <function>unicode_bidi_logical_order</function>() and
- <function>unicode_bidi_embed</function>() add various
+ <function>unicode_bidi_logical_order</function>() rearranges
+ the string from rendering to its logical order.
+ <function>unicode_bidi_embed</function>() adds various
bi-directional markers to a Unicode string in canonical rendering
order. The resulting string is not guaranteed to be
identical to the
@@ -819,12 +912,18 @@ See COPYING for distribution information.
<function>unicode_bidi_cleanup()</function>
(with the canonical option),
with the same paragraph_embedding level.
+ <function>unicode_bidi_needs_embed</function>() attempts to
+ heuristically determine whether
+ <function>unicode_bidi_embed</function>() is required.
</para>
<para>
<function>unicode_bidi_logical_order</function>() gets called
first, followed by
- <function>unicode_bidi_embed</function>().
+ <function>unicode_bidi_embed</function>()
+ (or
+ <function>unicode_bidi_needs_embed</function>() in order to
+ determine whether bi-directional markers are required).
Finally, <function>unicode_bidi_embed_paragraph_level</function>()
optionally determines whether the resulting string's default
paragraph embedding level matches the one used for the actual
@@ -881,12 +980,12 @@ See COPYING for distribution information.
<itemizedlist>
<listitem>
<para>
- The Unicode string, and &hellip;
+ The Unicode string.
</para>
</listitem>
<listitem>
<para>
- &hellip; the directional embedding buffer, in canonical
+ The directional embedding buffer, in canonical
rendering order.
</para>
</listitem>
@@ -998,6 +1097,53 @@ See COPYING for distribution information.
</para>
</listitem>
</itemizedlist>
+
+ <para>
+ <function>unicode_bidi_needs_embed</function>() attempts to
+ heuristically determine whether the Unicode string, in logical
+ order, requires bi-directional markers.
+ The parameters to
+ <function>unicode_bidi_embed_paragraph_level</function>() are:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ The Unicode string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The directional embedding buffer, in logical
+ rendering order.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The size of the string and the embedding level buffer.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ A pointer to an explicit paragraph embedding level, either
+ <literal>UNICODE_BIDI_LR</literal> or
+ <literal>UNICODE_BIDI_RL</literal>; or a
+ <literal>NULL</literal> pointer (see
+ <function>unicode_bidi_calc_types</function>()'s
+ explanation for this parameter).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ <function>unicode_bidi_needs_embed</function>() returns 0
+ if the Unicode string does not need explicit directional
+ markers, or 1 if it does. This is done by using
+ <function>unicode_bidi_calc()</function>,
+ <function>unicode_bidi_reorder()</function>,
+ <function>unicode_bidi_logical_order</function> and then
+ checking if the end result is different from what was passed
+ in.
+ </para>
</refsect2>
<refsect2 id="unicode_bidi_misc">
<title>Miscellaneous utility functions</title>
@@ -2837,9 +2983,11 @@ See COPYING for distribution information.
<refname>unicode::bidi_reorder</refname>
<refname>unicode::bidi_cleanup</refname>
<refname>unicode::bidi_logical_order</refname>
+ <refname>unicode::bidi_needs_embed</refname>
<refname>unicode::bidi_embed</refname>
<refname>unicode::bidi_embed_paragraph_level</refname>
<refname>unicode::bidi_get_direction</refname>
+ <refname>unicode::bidi_override</refname>
<refpurpose>unicode bi-directional algorithm</refpurpose>
</refnamediv>
@@ -2871,12 +3019,12 @@ See COPYING for distribution information.
<funcsynopsis>
<funcprototype>
- <funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
+ <funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, struct unicode_bidi_direction&gt; <function>unicode::bidi_calc</function></funcdef>
<paramdef>const unicode::bidi_calc_types &amp;<parameter>ustring</parameter></paramdef>
</funcprototype>
<funcprototype>
- <funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
+ <funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, struct unicode_bidi_direction&gt; <function>unicode::bidi_calc</function></funcdef>
<paramdef>const unicode::bidi_calc_types &amp;<parameter>ustring</parameter></paramdef>
<paramdef>unicode_bidi_level_t <parameter>embedding_level</parameter></paramdef>
</funcprototype>
@@ -2943,6 +3091,15 @@ See COPYING for distribution information.
</funcprototype>
<funcprototype>
+ <funcdef>bool <function>unicode::bidi_needs_embed</function></funcdef>
+ <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>const std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+ <paramdef>const unicode_bidi_level_t (<parameter>paragraph_embedding</parameter>=NULL</paramdef>
+ <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef>
+ <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef>
+ </funcprototype>
+
+ <funcprototype>
<funcdef>int <function>unicode::bidi_embed</function></funcdef>
<paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
<paramdef>const std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
@@ -2969,6 +3126,13 @@ See COPYING for distribution information.
<paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef>
<paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef>
</funcprototype>
+
+ <funcprototype>
+ <funcdef>std::u32string <function>bidi_override</function></funcdef>
+ <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>unicode_bidi_level_t <parameter>direction</parameter></paramdef>
+ <paramdef>int <parameter>cleanup_options</parameter>=0</paramdef>
+ </funcprototype>
</funcsynopsis>
</refsynopsisdiv>
@@ -2999,7 +3163,7 @@ See COPYING for distribution information.
<programlisting><![CDATA[
std::u32string text;
-auto [levels, level]=unicode::bidi_calc(text);
+auto [levels, direction]=unicode::bidi_calc(text);
]]></programlisting>
</informalexample>
@@ -3022,7 +3186,7 @@ types.setbnl(text); // Optional
// types.types is a std::vector of enum_bidi_types_t values
-auto [levels, level]=unicode::bidi_calc(types);
+auto [levels, direction]=unicode::bidi_calc(types);
]]></programlisting>
</informalexample>
@@ -3106,7 +3270,8 @@ auto [levels, level]=unicode::bidi_calc(types);
<para>
<function>unicode::bidi_reorder</function>,
<function>unicode::bidi_cleanup</function>,
- <function>unicode::bidi_logical_order</function> and
+ <function>unicode::bidi_logical_order</function>,
+ <function>unicode::bidi_needs_embed</function> and
<function>unicode::bidi_get_direction</function>
take two optional
parameters (defaulted values or overloaded) specifying
@@ -3124,6 +3289,41 @@ auto [levels, level]=unicode::bidi_calc(types);
</para>
</listitem>
+ <listitem>
+ <para>
+ <function>unicode::bidi_override</function>
+ modifies the passed-in <parameter>string</parameter> as
+ follows:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ <function>unicode::bidi_cleanup</function>() is applied
+ with the specified, or defaulted,
+ <replaceable>cleanup_options</replaceable>
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Either the <literal>LRO</literal> or an
+ <literal>RLO</literal> override marker gets prepended
+ to the Unicode string, forcing the entire string to
+ be interpreted in a single rendering direction, when
+ processed by the Unicode bi-directional algorithm.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ <function>unicode::bidi_override</function> makes it
+ possible to use a Unicode-aware application or algorithm
+ in a context that only works with text that's always
+ displayed in a fixed direction, allowing graceful handling
+ of input containing bi-directional text.
+ </para>
+ </listitem>
</itemizedlist>
<refsect2 id="unicode_cpp_bidi_literals">
diff --git a/unicode/configure.ac b/unicode/configure.ac
index 5c366de..1cc3b76 100644
--- a/unicode/configure.ac
+++ b/unicode/configure.ac
@@ -1,6 +1,6 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT([courier-unicode], [2.2], [courier-users@lists.sourceforge.net])
+AC_INIT([courier-unicode], [2.2.1.20210220], [courier-users@lists.sourceforge.net])
>confdefs.h # Kill PACKAGE_ macros
@@ -117,5 +117,25 @@ fi
CFLAGS="-I.. -I$srcdir/.. $CFLAGS"
CXXFLAGS="-I.. -I$srcdir/.. $CXXFLAGS"
+set -- `echo "$VERSION" | tr '.' ' '`
+
+v=$1
+r=`echo "00"$2 | sed 's/.*(...)$/$1/'`
+p=$3
+
+if test "$p" = ""
+ then p="0"
+fi
+
+p=`echo "00"$p | sed 's/.*(...)$/$[]1/'`
+
+HVERSION="$v$r$p"
+AC_SUBST(HVERSION)
+
AM_CONDITIONAL(HAVE_DOCS,[test -f $srcdir/docbook/icon.gif])
-AC_OUTPUT(Makefile packaging/freebsd10/Makefile courier-unicode.spec courier-unicode.h)
+AC_OUTPUT(Makefile
+ packaging/freebsd10/Makefile
+ courier-unicode.spec
+ courier-unicode.h
+ courier-unicode-version.m4
+)
diff --git a/unicode/courier-unicode-version.m4.in b/unicode/courier-unicode-version.m4.in
new file mode 100644
index 0000000..94b0c04
--- /dev/null
+++ b/unicode/courier-unicode-version.m4.in
@@ -0,0 +1,45 @@
+dnl Checks for the correct version of the courier-unicode library.
+
+AC_DEFUN([AX_COURIER_UNICODE_VERSION],[
+
+AC_MSG_CHECKING(courier-unicode library and version)
+
+vers="$1"
+
+if test "$vers" = ""
+then
+ vers=@VERSION@
+fi
+
+set -- `echo "$vers" | tr '.' ' '`
+
+v=$[]1
+r=`echo "00"$[]2 | sed 's/.*(...)$/$[]1/'`
+
+p=$[]3
+
+if test "$p" = ""
+ then p="0"
+fi
+
+p=`echo "00"$p | sed 's/.*(...)$/$[]1/'`
+
+AC_TRY_COMPILE([
+#include <courier-unicode.h>
+#ifndef COURIER_UNICODE_VERSION
+#define COURIER_UNICODE_VERSION 0
+#endif
+
+#if COURIER_UNICODE_VERSION < ]$v$r$p[
+#error "courier-unicode ]$vers[ library is required"
+#endif
+
+],[],[],
+AC_MSG_ERROR([
+ERROR: The Courier Unicode Library ]$vers[ header files appear not to be installed.
+You may need to upgrade the library or install a separate development
+subpackage in addition to the main package.])
+)
+
+AC_MSG_RESULT([ok])
+])
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 57603da..2999ee3 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -41,7 +41,7 @@ typedef uint32_t char32_t;
#endif
#endif
-#define COURIER_UNICODE_VERSION 220
+#define COURIER_UNICODE_VERSION @HVERSION@
/*
** The system default character set, from the locale.
@@ -604,10 +604,10 @@ struct unicode_bidi_direction {
struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *p,
size_t n);
-extern unicode_bidi_level_t unicode_bidi_calc(const char32_t *p, size_t n,
- unicode_bidi_level_t *bufp,
- const unicode_bidi_level_t *
- initial_embedding_level);
+struct unicode_bidi_direction unicode_bidi_calc(const char32_t *p, size_t n,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *
+ initial_embedding_level);
extern void unicode_bidi_reorder(char32_t *p,
unicode_bidi_level_t *levels,
@@ -652,13 +652,15 @@ extern void unicode_bidi_setbnl(char32_t *p,
const enum_bidi_type_t *types,
size_t n);
-extern unicode_bidi_level_t unicode_bidi_calc_levels(const char32_t *p,
- const enum_bidi_type_t
- *types,
- size_t n,
- unicode_bidi_level_t *bufp,
- const unicode_bidi_level_t
- *initial_embedding_level);
+extern struct unicode_bidi_direction
+unicode_bidi_calc_levels(const char32_t *p,
+ const enum_bidi_type_t
+ *types,
+ size_t n,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t
+ *initial_embedding_level);
+
/* Bitmask options to unicode_bidi_cleanup */
/*
@@ -703,6 +705,10 @@ extern size_t unicode_bidi_cleanup(char32_t *string,
void (*removed_callback)(size_t, void *),
void *);
+extern size_t unicode_bidi_cleaned_size(const char32_t *string,
+ size_t n,
+ int options);
+
extern void unicode_bidi_logical_order(char32_t *string,
unicode_bidi_level_t *levels,
size_t n,
@@ -711,6 +717,12 @@ extern void unicode_bidi_logical_order(char32_t *string,
void *),
void *arg);
+extern int unicode_bidi_needs_embed(const char32_t *string,
+ const unicode_bidi_level_t *levels,
+ size_t n,
+ const unicode_bidi_level_t *
+ paragraph_embedding);
+
extern void unicode_bidi_embed(const char32_t *string,
const unicode_bidi_level_t *levels,
size_t n,
@@ -2231,7 +2243,7 @@ struct bidi_calc_types {
//! can be constructed explicitly, and then passed in directly.
std::tuple<std::vector<unicode_bidi_level_t>,
- unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s);
+ struct unicode_bidi_direction> bidi_calc(const bidi_calc_types &s);
//! Calculate bidirectional embedding levels
@@ -2242,8 +2254,8 @@ std::tuple<std::vector<unicode_bidi_level_t>,
//! embedding level.
std::tuple<std::vector<unicode_bidi_level_t>,
- unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s,
- unicode_bidi_level_t level);
+ struct unicode_bidi_direction> bidi_calc(const bidi_calc_types &s,
+ unicode_bidi_level_t level);
//! Reorder bidirectional text
@@ -2322,6 +2334,14 @@ void bidi_logical_order(std::vector<unicode_bidi_level_t> &levels,
size_t starting_pos=0,
size_t n=(size_t)-1);
+//! Whether directional and isolation markers are needed.
+
+bool bidi_needs_embed(const std::u32string &string,
+ const std::vector<unicode_bidi_level_t> &levels,
+ const unicode_bidi_level_t *paragraph_embedding=0,
+ size_t starting_pos=0,
+ size_t n=(size_t)-1);
+
//! Embed directional and isolation markers
//! Non-0 return value indicates the string and levels' sizes do not match.
@@ -2352,14 +2372,19 @@ std::u32string bidi_embed(const std::u32string &string,
//! In order for the unicode string to have the specified default
//! paragraph embedding level.
-extern char32_t bidi_embed_paragraph_level(const std::u32string &string,
- unicode_bidi_level_t level);
+char32_t bidi_embed_paragraph_level(const std::u32string &string,
+ unicode_bidi_level_t level);
//! Compute default direction of text
-extern unicode_bidi_direction bidi_get_direction(const std::u32string &string,
- size_t starting_pos=0,
- size_t n=(size_t)-1);
+unicode_bidi_direction bidi_get_direction(const std::u32string &string,
+ size_t starting_pos=0,
+ size_t n=(size_t)-1);
+
+//! Override bidi direction.
+std::u32string bidi_override(const std::u32string &s,
+ unicode_bidi_level_t direction,
+ int cleanup_options=0);
#if 0
{
diff --git a/unicode/courier-unicode.spec.in b/unicode/courier-unicode.spec.in
index f7d1eb6..440d6f1 100644
--- a/unicode/courier-unicode.spec.in
+++ b/unicode/courier-unicode.spec.in
@@ -1,5 +1,12 @@
Summary: Courier Unicode Library
+%if 0%{?compat:1}
+Name: courier-unicode%(echo @VERSION@ | tr -d '.')
+
+%define __brp_ldconfig %{nil}
+
+%else
Name: courier-unicode
+%endif
Version: @VERSION@
Release: 1%{?dist}%{?courier_release}
License: GPLv3
@@ -11,10 +18,14 @@ BuildRequires: perl
BuildRequires: gcc-c++
BuildRequires: %{__make}
+%if 0%{?compat:1}
+
+%else
%package devel
Summary: Courier Unicode Library development files
Group: Development/Libraries
Requires: %{name} = 0:%{version}-%{release}
+%endif
%description
This library implements several algorithms related to the Unicode
@@ -24,13 +35,17 @@ This package installs only the run-time libraries needed by applications that
use this library. Install the "courier-unicode-devel" package if you want
to develop new applications using this library.
+%if 0%{?compat:1}
+
+%else
%description devel
This package contains development files for the Courier Unicode Library.
Install this package if you want to develop applications that uses this
unicode library.
+%endif
%prep
-%setup -q
+%setup -q -n courier-unicode-@VERSION@
%configure
%build
%{__make} -s %{?_smp_mflags}
@@ -39,12 +54,27 @@ unicode library.
rm -rf $RPM_BUILD_ROOT
%{__make} install DESTDIR=$RPM_BUILD_ROOT
-%post -p /sbin/ldconfig
-%postun -p /sbin/ldconfig
+%if 0%{?compat:1}
+find $RPM_BUILD_ROOT%{_libdir} -type l -print | xargs rm -f
+rm -rf $RPM_BUILD_ROOT%{_includedir}
+rm -f $RPM_BUILD_ROOT%{_libdir}/*.a
+rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
+rm -rf $RPM_BUILD_ROOT%{_datadir}/aclocal
+rm -rf $RPM_BUILD_ROOT%{_mandir}
+%endif
%clean
rm -rf $RPM_BUILD_ROOT
+%post -p /sbin/ldconfig
+%postun -p /sbin/ldconfig
+
+%if 0%{?compat:1}
+%files
+%defattr(-,root,root,-)
+%{_libdir}/*.so.*
+
+%else
%files
%defattr(-,root,root,-)
@@ -58,6 +88,7 @@ rm -rf $RPM_BUILD_ROOT
%{_libdir}/*.la
%{_libdir}/*.a
%{_datadir}/aclocal/*.m4
+%endif
%changelog
* Sun Jan 12 2014 Sam Varshavchik <mrsam@octopus.email-scan.com> - 1.0
diff --git a/unicode/m4/courier-unicode.m4 b/unicode/m4/courier-unicode.m4
index dadf321..673bc65 100644
--- a/unicode/m4/courier-unicode.m4
+++ b/unicode/m4/courier-unicode.m4
@@ -50,44 +50,3 @@ AC_MSG_ERROR([*** A compiler with C++11 Unicode support was not found])
CXXFLAGS="$save_FLAGS"
AC_LANG_POP([C++])
])
-
-AC_DEFUN([AX_COURIER_UNICODE_VERSION],[
-
-AC_MSG_CHECKING(courier-unicode library and version)
-
-v="$1"
-
-if test "$v" = ""
-then
- v=2.2
-fi
-
-set -- `echo "$v" | tr '.' ' '`
-
-v=$[]1
-r=$[]2
-p=$[]3
-
-if test "$p" = ""
- then p="0"
-fi
-
-AC_TRY_COMPILE([
-#include <courier-unicode.h>
-#ifndef COURIER_UNICODE_VERSION
-#define COURIER_UNICODE_VERSION 0
-#endif
-
-#if COURIER_UNICODE_VERSION < ]$v$r$p[
-#error "courier-unicode ]$1[ library is required"
-#endif
-
-],[],[],
-AC_MSG_ERROR([
-ERROR: The Courier Unicode Library ]$1[ header files appear not to be installed.
-You may need to upgrade the library or install a separate development
-subpackage in addition to the main package.])
-)
-
-AC_MSG_RESULT([ok])
-])
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index b23b833..772f9fe 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -464,7 +464,7 @@ struct directional_status_stack_entry {
typedef struct {
struct directional_status_stack_entry *head;
- unicode_bidi_level_t paragraph_embedding_level;
+ struct unicode_bidi_direction paragraph_embedding_level;
const char32_t *chars;
enum_bidi_type_t *types;
const enum_bidi_type_t *orig_types;
@@ -618,7 +618,7 @@ get_enum_bidi_type_for_paragraph_embedding_level(size_t i,
return p->p[i];
}
-static unicode_bidi_level_t
+static struct unicode_bidi_direction
compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p,
size_t i, size_t j)
{
@@ -628,7 +628,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p,
return compute_paragraph_embedding_level
(i, j,
get_enum_bidi_type_for_paragraph_embedding_level,
- &info).direction;
+ &info);
}
static directional_status_stack_t
@@ -642,10 +642,18 @@ directional_status_stack_init(const char32_t *chars,
stack=(directional_status_stack_t)calloc(1, sizeof(*stack));
- stack->paragraph_embedding_level=
- initial_embedding_level
- ? *initial_embedding_level & 1
- : compute_paragraph_embedding_level_from_types(types, 0, n);
+ if (initial_embedding_level)
+ {
+ stack->paragraph_embedding_level.direction=
+ *initial_embedding_level & 1;
+ stack->paragraph_embedding_level.is_explicit=1;
+ }
+ else
+ {
+ stack->paragraph_embedding_level=
+ compute_paragraph_embedding_level_from_types(types,
+ 0, n);
+ }
stack->chars=chars;
stack->orig_types=types;
@@ -666,7 +674,8 @@ directional_status_stack_init(const char32_t *chars,
stack->size=n;
directional_status_stack_push(stack,
- stack->paragraph_embedding_level,
+ stack->paragraph_embedding_level
+ .direction,
do_neutral, 0);
return stack;
@@ -736,7 +745,7 @@ void unicode_bidi_setbnl(char32_t *p,
}
}
-unicode_bidi_level_t
+struct unicode_bidi_direction
unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
const unicode_bidi_level_t *initial_embedding_level)
{
@@ -748,7 +757,7 @@ unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
unicode_bidi_calc_types(p, n, buf);
- unicode_bidi_level_t level=
+ struct unicode_bidi_direction level=
unicode_bidi_calc_levels(p,
buf,
n,
@@ -762,7 +771,7 @@ unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
static void unicode_bidi_cl(directional_status_stack_t stack);
-unicode_bidi_level_t
+struct unicode_bidi_direction
unicode_bidi_calc_levels(const char32_t *p,
const enum_bidi_type_t *types,
size_t n,
@@ -779,12 +788,12 @@ unicode_bidi_calc_levels(const char32_t *p,
stack=directional_status_stack_init(p, types, n, bufp,
initial_embedding_level);
- unicode_bidi_level_t paragraph_embedding_level=
+ struct unicode_bidi_direction paragraph_embedding_level=
stack->paragraph_embedding_level;
#ifdef BIDI_DEBUG
fprintf(DEBUGDUMP, "BIDI: START: Paragraph embedding level: %d\n",
- (int)stack->paragraph_embedding_level);
+ (int)paragraph_embedding_level.direction);
#endif
unicode_bidi_cl(stack);
@@ -971,7 +980,8 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
}
cur_class=compute_paragraph_embedding_level_from_types
- (stack->types, i+1, j) == 1
+ (stack->types, i+1, j).direction
+ != UNICODE_BIDI_LR
? UNICODE_BIDI_TYPE_RLI
: UNICODE_BIDI_TYPE_LRI;
}
@@ -1104,7 +1114,8 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
{
/* X8 */
- stack->levels[i]=stack->paragraph_embedding_level;
+ stack->levels[i]=
+ stack->paragraph_embedding_level.direction;
}
}
@@ -1203,9 +1214,9 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
continue; /* Edge case */
unicode_bidi_level_t before=
- stack->paragraph_embedding_level;
+ stack->paragraph_embedding_level.direction;
unicode_bidi_level_t after=
- stack->paragraph_embedding_level;
+ stack->paragraph_embedding_level.direction;
size_t first_i=beg_iter.i;
@@ -1301,11 +1312,11 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
case UNICODE_BIDI_TYPE_PDI:
if (seen_sb)
stack->levels[i]=
- stack->paragraph_embedding_level;
+ stack->paragraph_embedding_level.direction;
break;
case UNICODE_BIDI_TYPE_S:
case UNICODE_BIDI_TYPE_B:
- stack->levels[i]=stack->paragraph_embedding_level;
+ stack->levels[i]=stack->paragraph_embedding_level.direction;
seen_sb=1;
break;
default:
@@ -2052,12 +2063,14 @@ void unicode_bidi_reorder(char32_t *p,
level_run_layers_deinit(&layers);
}
-size_t unicode_bidi_cleanup(char32_t *string,
- unicode_bidi_level_t *levels,
- size_t n,
- int cleanup_options,
- void (*removed_callback)(size_t, void *),
- void *arg)
+static size_t unicode_bidi_count_or_cleanup(const char32_t *string,
+ char32_t *dest,
+ unicode_bidi_level_t *levels,
+ size_t n,
+ int cleanup_options,
+ void (*removed_callback)(size_t,
+ void *),
+ void *arg)
{
size_t i=0;
for (size_t j=0; j<n; ++j)
@@ -2079,13 +2092,34 @@ size_t unicode_bidi_cleanup(char32_t *string,
if (levels)
levels[i]=levels[j] & 1;
- string[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL)
- && cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j];
+ if (dest)
+ dest[i]=(cleanup_options & UNICODE_BIDI_CLEANUP_BNL)
+ && cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j];
++i;
}
return i;
}
+size_t unicode_bidi_cleanup(char32_t *string,
+ unicode_bidi_level_t *levels,
+ size_t n,
+ int cleanup_options,
+ void (*removed_callback)(size_t, void *),
+ void *arg)
+{
+ return unicode_bidi_count_or_cleanup(string, string, levels, n,
+ cleanup_options, removed_callback,
+ arg);
+}
+
+size_t unicode_bidi_cleaned_size(const char32_t *string,
+ size_t n,
+ int cleanup_options)
+{
+ return unicode_bidi_count_or_cleanup(string, NULL, NULL, n,
+ cleanup_options, NULL, NULL);
+}
+
void unicode_bidi_logical_order(char32_t *string,
unicode_bidi_level_t *levels,
size_t n,
@@ -2276,6 +2310,49 @@ static void emit_marker(struct bidi_embed_levelrun *p,
}
}
+int unicode_bidi_needs_embed(const char32_t *string,
+ const unicode_bidi_level_t *levels,
+ size_t n,
+ const unicode_bidi_level_t *paragraph_level)
+{
+ char32_t *string_cpy=(char32_t *)malloc(n * sizeof(char32_t));
+ unicode_bidi_level_t *levels_cpy=(unicode_bidi_level_t *)
+ malloc(n * sizeof(unicode_bidi_level_t));
+ size_t nn;
+ int ret;
+
+ if (!string_cpy || !levels_cpy)
+ abort();
+
+ memcpy(string_cpy, string, n * sizeof(char32_t));
+
+ struct unicode_bidi_direction direction=
+ unicode_bidi_calc(string_cpy, n,
+ levels_cpy, paragraph_level);
+
+ unicode_bidi_reorder(string_cpy, levels_cpy, n, NULL, NULL);
+ nn=unicode_bidi_cleanup(string_cpy, levels_cpy, n, 0,
+ NULL, NULL);
+
+ ret=0;
+ if (n == nn && (paragraph_level == NULL ||
+ direction.direction == *paragraph_level))
+ {
+ unicode_bidi_logical_order(string_cpy, levels_cpy, nn,
+ direction.direction,
+ NULL, NULL);
+ if (memcmp(string_cpy, string, n * sizeof(char32_t)) == 0 &&
+ memcmp(levels_cpy, levels, n * sizeof(unicode_bidi_level_t))
+ == 0)
+ {
+ ret=1;
+ }
+ }
+ free(string_cpy);
+ free(levels_cpy);
+ return ret;
+}
+
void unicode_bidi_embed(const char32_t *string,
const unicode_bidi_level_t *levels,
size_t n,
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index babb6bb..7bb6edc 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -581,18 +581,29 @@ void unicode::bidi_calc_types::setbnl(std::u32string &s)
unicode_bidi_setbnl(&s[0], &types[0], s.size());
}
-std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t>
+std::tuple<std::vector<unicode_bidi_level_t>,
+ struct unicode_bidi_direction>
unicode::bidi_calc(const bidi_calc_types &s)
{
return unicode::bidi_calc(s, UNICODE_BIDI_SKIP);
}
-std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t>
+std::tuple<std::vector<unicode_bidi_level_t>,
+ struct unicode_bidi_direction>
unicode::bidi_calc(const bidi_calc_types &st,
unicode_bidi_level_t paragraph_embedding_level)
{
+ std::tuple<std::vector<unicode_bidi_level_t>,
+ struct unicode_bidi_direction>
+ ret;
+ auto &direction_ret=std::get<1>(ret);
+
if (st.s.size() != st.types.size())
- return { {}, UNICODE_BIDI_LR };
+ {
+ direction_ret.direction=UNICODE_BIDI_LR;
+ direction_ret.is_explicit=false;
+ return ret;
+ }
const unicode_bidi_level_t *initial_embedding_level=0;
@@ -602,11 +613,17 @@ unicode::bidi_calc(const bidi_calc_types &st,
initial_embedding_level=&paragraph_embedding_level;
}
- std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t>
- ret;
-
std::get<0>(ret).resize(st.s.size());
- std::get<1>(ret)=UNICODE_BIDI_LR;
+
+ if (initial_embedding_level)
+ {
+ direction_ret.direction=paragraph_embedding_level;
+ direction_ret.is_explicit=1;
+ }
+ else
+ {
+ direction_ret.direction= UNICODE_BIDI_LR;
+ }
if (st.s.size())
{
@@ -932,3 +949,43 @@ unicode_bidi_direction unicode::bidi_get_direction(const std::u32string &string,
return unicode_bidi_get_direction(string.c_str()+starting_pos, n);
}
+
+bool unicode::bidi_needs_embed(const std::u32string &string,
+ const std::vector<unicode_bidi_level_t> &levels,
+ const unicode_bidi_level_t *paragraph_embedding,
+ size_t starting_pos,
+ size_t n)
+{
+ if (string.size() != levels.size())
+ return false;
+
+ auto s=levels.size();
+
+ if (starting_pos >= s)
+ return false;
+
+ if (n > s-starting_pos)
+ n=s-starting_pos;
+
+ return unicode_bidi_needs_embed(string.c_str(),
+ n == 0 ? NULL : &levels[starting_pos],
+ n,
+ paragraph_embedding) != 0;
+}
+
+std::u32string unicode::bidi_override(const std::u32string &s,
+ unicode_bidi_level_t direction,
+ int cleanup_options)
+{
+ std::u32string ret;
+
+ ret.reserve(s.size()+1);
+
+ ret.push_back(' ');
+ ret.insert(ret.end(), s.begin(), s.end());
+
+ bidi_cleanup(ret, [](size_t) {}, cleanup_options);
+ ret.at(0)=direction & 1 ? UNICODE_RLO : UNICODE_LRO;
+
+ return ret;
+}