Add additional bi-directional related algorithm.

Cleanup, remove markers, via unicode_bidi_cleanup() and unicode_bidi_extra_cleanup(). Re-embed directional markers, via unicode_bidi_logical_order(), unicode_bidi_embed() and unicode_bidi_embed_paragraph_level().
author: Sam Varshavchik 2020-07-12 09:44:24 -0400
committer: Sam Varshavchik 2020-08-02 14:56:50 -0400
commit: d2915c9cadf6fbc5ae29ffc387cce987b88dbbe0 (patch)
tree: f76c8edf36fb84c6e082f2a4ae9798b10aeda70e
parent: 51471a4d8b177adfcd40c145a809193a4ab9bd8d (diff)
download: courier-libs-d2915c9cadf6fbc5ae29ffc387cce987b88dbbe0.tar.bz2
9 files changed, 2108 insertions, 329 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index 8ac6fb1..f864e2d 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -90,6 +90,11 @@ include_HEADERS=courier-unicode.h \
 man_MANS= \
         $(srcdir)/man/courier-unicode.7 \
         $(srcdir)/man/unicode\:\:bidi_calc.3 \
+        $(srcdir)/man/unicode\:\:bidi_cleanup.3 \
+        $(srcdir)/man/unicode\:\:bidi_embed.3 \
+        $(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \
+        $(srcdir)/man/unicode\:\:bidi_extra_cleanup.3 \
+        $(srcdir)/man/unicode\:\:bidi_logical_order.3 \
         $(srcdir)/man/unicode\:\:bidi_reorder.3 \
         $(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \
         $(srcdir)/man/unicode\:\:iconvert\:\:convert_tocase.3 \
@@ -110,8 +115,14 @@ man_MANS= \
         $(srcdir)/man/unicode_bidi.3 \
         $(srcdir)/man/unicode_bidi_bracket_type.3 \
         $(srcdir)/man/unicode_bidi_calc.3 \
+        $(srcdir)/man/unicode_bidi_cleanup.3 \
+        $(srcdir)/man/unicode_bidi_embed.3 \
+        $(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \
+        $(srcdir)/man/unicode_bidi_extra_cleanup.3 \
+        $(srcdir)/man/unicode_bidi_logical_order.3 \
         $(srcdir)/man/unicode_bidi_mirror.3 \
         $(srcdir)/man/unicode_bidi_reorder.3 \
+        $(srcdir)/man/unicode_bidi_type.3 \
         $(srcdir)/man/unicode_canonical.3 \
         $(srcdir)/man/unicode_category_lookup.3 \
         $(srcdir)/man/unicode_convert.3 \
diff --git a/unicode/README b/unicode/README
index 926e004..9994cc9 100644
--- a/unicode/README
+++ b/unicode/README
@@ -16,12 +16,12 @@ Courier Unicode Library
    COPYING
 
    This library implements several algorithms related to the Unicode
-   Standard:
+   Standard, notably:
 
      * Look up uppercase, lowercase, and titlecase equivalents of a unicode
        character.
 
-     * Implementation of grapheme and work breaking rules.
+     * Implementation of grapheme and word breaking rules.
 
      * Implementation of line breaking rules.
 
diff --git a/unicode/biditest.C b/unicode/biditest.C
index 2d2a6e5..1aa2c63 100644
--- a/unicode/biditest.C
+++ b/unicode/biditest.C
@@ -8,6 +8,7 @@
 #include	<utility>
 #include	<iomanip>
 #include	<numeric>
+#include	<unistd.h>
 
 std::vector<std::string> testcase;
 
@@ -53,11 +54,11 @@ int main(int argc, char **argv)
 	{
 		buf.clear();
 
-		if (std::getline(fp, buf).eof() && buf.empty())
-			break;
+		bool iseof=std::getline(fp, buf).eof() && buf.empty();
 
-		if (++linenum >= nextlogline)
+		if (iseof || ++linenum >= nextlogline)
 		{
+			alarm(300);
 			std::cout << logmsg;
 
 			std::ostringstream o;
@@ -72,7 +73,8 @@ int main(int argc, char **argv)
 
 			nextlogline += 20000;
 		}
-
+		if (iseof)
+			break;
 		buf.erase(std::find(buf.begin(), buf.end(), '#'), buf.end());
 
 		if (buf.substr(0, 8) == "@Levels:")
@@ -334,11 +336,7 @@ int main(int argc, char **argv)
 			n >>= 1;
 		}
 	}
-
-	std::cout << logmsg;
-
-	std::fill(logmsg.begin(), logmsg.end(), ' ');
-	std::cout << logmsg << std::endl;
+	std::cout << std::endl;
 	return 0;
 }
 
diff --git a/unicode/biditest2.C b/unicode/biditest2.C
index f497bcf..cfa0e50 100644
--- a/unicode/biditest2.C
+++ b/unicode/biditest2.C
@@ -1,42 +1,110 @@
 #include	"unicode_config.h"
 #include	"courier-unicode.h"
 #include	<iostream>
+#include	<iterator>
 #include	<sstream>
 #include	<fstream>
 #include	<cstdint>
 #include	<iomanip>
+#include	<algorithm>
+#include	<unistd.h>
 
 FILE *DEBUGDUMP;
 
-int main(int argc, char **argv)
+#define BIDI_DEBUG
+
+extern "C" {
+#if 0
+}
+#endif
+
+#include "unicode_bidi.c"
+
+}
+
+void latin_test()
 {
-	std::ifstream fp("BidiCharacterTest.txt");
+	for (char32_t c=32; c<256; c++)
+	{
+		std::u32string s;
 
-	if (!fp.is_open())
+		s += c;
+
+		std::vector<unicode_bidi_level_t> levels={UNICODE_BIDI_LR};
+
+		auto new_string=unicode::bidi_embed(s, levels,
+						    UNICODE_BIDI_LR);
+
+		if (new_string != s)
+		{
+			std::cerr << "Character " << (int)c
+				  << " does not work." << std::endl;
+			exit(1);
+		}
+	}
+
+	std::u32string s;
+	std::vector<unicode_bidi_level_t> levels;
+
+	for (char32_t c=32; c<256; c++)
 	{
-		std::cerr << "Cannot open BidiCharacterTest.txt" << std::endl;
+		s += c;
+		levels.push_back(UNICODE_BIDI_LR);
+	}
+
+	auto new_string=unicode::bidi_embed(s, levels,
+					    UNICODE_BIDI_LR);
+
+	if (new_string != s)
+	{
+		std::cerr << "iso-8859-1 string does not work."
+			  << std::endl;
 		exit(1);
 	}
+}
 
-	DEBUGDUMP=fopen("/dev/null", "w");
-	if (!DEBUGDUMP)
+void character_test()
+{
+	std::ifstream fp("BidiCharacterTest.txt");
+
+	if (!fp.is_open())
 	{
-		perror("/dev/null");
+		std::cerr << "Cannot open BidiCharacterTest.txt" << std::endl;
 		exit(1);
 	}
 
 	std::string buf;
 
 	size_t linenum=0;
+	size_t nextlogline=0;
+	std::string logmsg;
 
 	while (1)
 	{
 		buf.clear();
 
-		if (std::getline(fp, buf).eof() && buf.empty())
-			break;
-		++linenum;
+		bool iseof=std::getline(fp, buf).eof() && buf.empty();
+
+		if (iseof || ++linenum >= nextlogline)
+		{
+			alarm(300);
+			std::cout << logmsg;
+
+			std::ostringstream o;
 
+			o << std::setw(6) << linenum << " lines processed... ";
+
+			logmsg=o.str();
+
+			std::cout << logmsg << std::flush;
+
+			std::fill(logmsg.begin(), logmsg.end(), '\b');
+
+			nextlogline += 20000;
+		}
+
+		if (iseof)
+			break;
 		auto p=buf.find('#');
 
 		if (p != buf.npos)
@@ -187,17 +255,202 @@ int main(int argc, char **argv)
 			std::cerr << std::endl;
 			exit(1);
 		}
-	}
-	return 0;
-}
 
-#define BIDI_DEBUG
+		std::vector<size_t> actual_render_order;
+
+		size_t n=0;
+
+		std::generate_n(std::back_inserter(actual_render_order),
+				s.size(),
+				[&] { return n++; });
+
+		unicode::bidi_reorder
+			(s, levels,
+			 [&]
+			 (size_t index,
+			  size_t n)
+			 {
+				 auto b=actual_render_order.begin();
+				 std::reverse(b+index, b+index+n);
+			 });
+
+		n=0;
+		unicode::bidi_cleanup
+			(s, levels,
+			 [&]
+			 (size_t i)
+			 {
+				 actual_render_order.erase
+					 (actual_render_order.begin()+i-n);
+				 ++n;
+			 });
+
+		if (render_order != actual_render_order)
+		{
+			std::cerr << "Regression, line "
+				  << linenum
+				  << ": render order"
+				  << std::endl
+				  << "   Expected:";
+			for (auto n:render_order)
+			{
+				std::cerr << " " << n;
+			}
+			std::cerr << std::endl
+				  << "     Actual:";
 
-extern "C" {
-#if 0
+			for (auto n:actual_render_order)
+			{
+				std::cerr << " " << n;
+			}
+			std::cerr << std::endl;
+			exit(1);
+		}
+
+		unicode::bidi_extra_cleanup(s, levels);
+
+		auto dump_ls=
+			[&]
+			(const std::u32string &s,
+			 const std::vector<unicode_bidi_level_t> &l)
+			{
+				for (size_t i=0; i<s.size(); ++i)
+				{
+					std::cerr << " " << std::hex
+						  << std::setw(4)
+						  << std::setfill('0')
+						  << s[i] << "/"
+						  << std::dec
+						  << (int)l[i];
+				}
+			};
+
+		for (int pass=0; pass<4; pass++)
+		{
+			int paragraph=pass & 1;
+			int use_default=pass & 2;
+
+			for (size_t i=0; i<s.size(); ++i)
+			{
+				/* L1 */
+				switch (unicode_bidi_type(s[i])) {
+				case UNICODE_BIDI_TYPE_S:
+				case UNICODE_BIDI_TYPE_B:
+					levels.at(i)=paragraph;
+				}
+			}
+
+			auto logical_string=s;
+			auto logical_levels=levels;
+
+			unicode::bidi_logical_order(logical_string,
+						    logical_levels,
+						    paragraph);
+
+			auto new_string=unicode::bidi_embed(logical_string,
+							    logical_levels,
+							    paragraph);
+
+			auto save_string=new_string;
+
+			if (use_default)
+			{
+				auto marker=unicode::bidi_embed_paragraph_level
+					(new_string, paragraph);
+
+				if (marker)
+					new_string.insert(0, 1, marker);
+
+				ret=unicode::bidi_calc(new_string);
+			}
+			else
+			{
+				ret=unicode::bidi_calc(new_string, paragraph);
+			}
+
+			unicode::bidi_reorder(new_string, std::get<0>(ret));
+			unicode::bidi_extra_cleanup(new_string,
+						    std::get<0>(ret));
+
+			/* New string is now back in logical order */
+
+			if (new_string == s && std::get<0>(ret) == levels)
+				continue;
+
+			fclose(DEBUGDUMP);
+			DEBUGDUMP=stderr;
+
+			std::cerr << "Regression, line "
+				  << linenum
+				  << ": embedding markers"
+				  << std::endl
+				  << "   Paragraph embedding level: "
+				  << paragraph;
+
+			if (use_default)
+				std::cerr << " (defaulted)";
+
+			std::cerr << std::endl
+				  << "String (1):";
+
+			dump_ls(s, levels);
+
+			std::cerr << std::endl << "String (2):";
+
+			dump_ls(new_string, std::get<0>(ret));
+			std::cerr << std::endl;
+
+			std::cerr << "Embedding:";
+			dump_ls(logical_string, logical_levels);
+			std::cerr << std::endl;
+
+			unicode::bidi_embed(logical_string,
+					    logical_levels,
+					    paragraph);
+
+			std::cerr << std::endl
+				  << "Embedded string:";
+
+			for (auto c:save_string)
+			{
+				std::cerr << " ";
+
+				switch (c) {
+				case LRM: std::cerr << "LRM"; break;
+				case RLM: std::cerr << "RLM"; break;
+				case RLI: std::cerr << "RLI"; break;
+				case LRI: std::cerr << "LRI"; break;
+				case RLO: std::cerr << "RLO"; break;
+				case LRO: std::cerr << "LRO"; break;
+				case PDF: std::cerr << "PDF"; break;
+				case PDI: std::cerr << "PDI"; break;
+				default:
+					std::cerr << std::hex << std::setw(4)
+						  << std::setfill('0')
+						  << c;
+					break;
+				}
+			}
+			std::cerr << std::dec << std::endl << std::flush;
+
+			ret=unicode::bidi_calc(save_string, paragraph);
+			unicode::bidi_reorder(save_string, std::get<0>(ret));
+			exit(1);
+		}
+	}
+	std::cout << std::endl;
 }
-#endif
 
-#include "unicode_bidi.c"
+int main(int argc, char **argv)
+{
+	DEBUGDUMP=fopen("/dev/null", "w");
+	if (!DEBUGDUMP)
+	{
+		perror("/dev/null");
+		exit(1);
+	}
 
+	latin_test();
+	character_test();
+	return 0;
 }
diff --git a/unicode/book.xml b/unicode/book.xml
index ad0009a..c8948ba 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -12,7 +12,7 @@
 
 <!--
 
-Copyright 2014-2017 Double Precision, Inc.
+Copyright 2014-2020 Double Precision, Inc.
 See COPYING for distribution information.
 
 -->
@@ -23,7 +23,7 @@ See COPYING for distribution information.
   <para>
     This library implements several algorithms related to the
     <ulink url="https://www.unicode.org/standard/standard.html">Unicode
-    Standard</ulink>:
+    Standard</ulink>, notably:
   </para>
 
   <itemizedlist>
@@ -36,22 +36,21 @@ See COPYING for distribution information.
     <listitem>
       <para>
 	Implementation of
-	<ulink url="https://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">grapheme
-	and work breaking</ulink> rules.
+	<link linkend="unicode_grapheme_break">grapheme
+	and word breaking</link> rules.
       </para>
     </listitem>
     <listitem>
       <para>
 	Implementation of
-	<ulink url="https://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">line
-	breaking</ulink> rules.
+	<link linkend="unicode_line_break">line	breaking</link> rules.
       </para>
     </listitem>
     <listitem>
       <para>
 	Implementation of the
-	<ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html">bi-directional
-	algorithm</ulink>.
+	<link linkend="unicode_bidi">bi-directional
+	algorithm</link>.
       </para>
     </listitem>
     <listitem>
@@ -69,15 +68,13 @@ See COPYING for distribution information.
     </listitem>
     <listitem>
       <para>
-	Look up the
-	<ulink url="https://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">Unicode
-	script property</ulink>.
+	Look up the <link linkend="unicode_script">Unicode
+	script property</link>.
       </para>
     </listitem>
     <listitem>
       <para>
-	Look up the
-	<ulink url="https://unicode.org/notes/tn36/">category</ulink>
+	Look up the <link linkend="unicode_category_lookup">category</link>
 	property.
       </para>
     </listitem>
@@ -192,7 +189,7 @@ See COPYING for distribution information.
 	  <programlisting>
 #include &lt;courier-unicode.h&gt;</programlisting>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="courier_unicode_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -226,7 +223,7 @@ See COPYING for distribution information.
 	    with this library.
 	  </para>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="courier_unicode_seealso">
 	  <title>SEE ALSO</title>
 
 	  <para>
@@ -306,16 +303,22 @@ See COPYING for distribution information.
 	  <refname>unicode_bidi</refname>
 	  <refname>unicode_bidi_calc</refname>
 	  <refname>unicode_bidi_reorder</refname>
+	  <refname>unicode_bidi_cleanup</refname>
+	  <refname>unicode_bidi_extra_cleanup</refname>
+	  <refname>unicode_bidi_logical_order</refname>
+	  <refname>unicode_bidi_embed</refname>
+	  <refname>unicode_bidi_embed_paragraph_level</refname>
+
+	  <refname>unicode_bidi_type</refname>
 	  <refname>unicode_bidi_mirror</refname>
 	  <refname>unicode_bidi_bracket_type</refname>
 
-	  <refpurpose>unicode bidirectional algorithm</refpurpose>
+	  <refpurpose>unicode bi-directional algorithm</refpurpose>
 	</refnamediv>
 
 	<refsynopsisdiv>
 	  <funcsynopsis>
-	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
-	    <funcsynopsisinfo>unicode_bidi_level_t lr=UNICODE_BIDI_LR;</funcsynopsisinfo>
+	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;&#10;&#10;unicode_bidi_level_t lr=UNICODE_BIDI_LR;</funcsynopsisinfo>
 	    <funcprototype>
 	      <funcdef>void <function>unicode_bidi_calc</function></funcdef>
               <paramdef>const char32_t *<parameter>p</parameter></paramdef>
@@ -334,6 +337,51 @@ See COPYING for distribution information.
 	    </funcprototype>
 
 	    <funcprototype>
+	      <funcdef>size_t <function>unicode_bidi_cleanup</function></funcdef>
+              <paramdef>char32_t *<parameter>string</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+              <paramdef>size_t <parameter>n</parameter></paramdef>
+              <paramdef>void (*<parameter>removed_callback</parameter>)(size_t, size_t, void *)</paramdef>
+	      <paramdef>void *<parameter>arg</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>size_t <function>unicode_bidi_extra_cleanup</function></funcdef>
+              <paramdef>char32_t *<parameter>string</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+              <paramdef>size_t <parameter>n</parameter></paramdef>
+              <paramdef>void (*<parameter>removed_callback</parameter>)(size_t, size_t, void *)</paramdef>
+	      <paramdef>void *<parameter>arg</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>size_t <function>unicode_bidi_logical_order</function></funcdef>
+              <paramdef>char32_t *<parameter>string</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+              <paramdef>size_t <parameter>n</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+              <paramdef>void (*<parameter>reorder_callback</parameter>)(size_t index, size_t n, void *arg)</paramdef>
+	      <paramdef>void *<parameter>arg</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>size_t <function>unicode_bidi_embed</function></funcdef>
+              <paramdef>const char32_t *<parameter>string</parameter></paramdef>
+              <paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+              <paramdef>size_t <parameter>n</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+              <paramdef>void (*<parameter>emit</parameter>)(const char32_t *string, size_t n, void *arg)</paramdef>
+	      <paramdef>void *<parameter>arg</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>char32_t <function>unicode_bidi_embed_paragraph_level</function></funcdef>
+              <paramdef>const char32_t *<parameter>string</parameter></paramdef>
+              <paramdef>size_t <parameter>n</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
 	      <funcdef>char32_t <function>bidi_mirror</function></funcdef>
               <paramdef>char32_t <parameter>c</parameter></paramdef>
 	    </funcprototype>
@@ -350,63 +398,160 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_bidi_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
-	    <function>unicode_bidi_calc</function>() and
-	    <function>unicode_bidi_reorder</function>() implement
-	    the
+	    These functions are related to the
 	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html"> Unicode Bi-Directional algorithm</ulink>.
-	  </para>
-	  <para>
-	    The first two parameters to
-	    <function>unicode_bidi_calc</function>() are a unicode string
-	    and the number of characters in the Unicode string.
-	    <parameter>levels</parameter> points to a buffer of
-	    <classname>unicode_bidi_level_t</classname> values.
-	    The caller is responsible for allocating and deallocating this
-	    buffer, of
-	    size <parameter>n</parameter>,
-	    the same number of values as the number of characters in the
-	    Unicode string.
-	  </para>
-	  <para>
-	    <function>unicode_bidi_calc</function>() calculates the
-	    embedding level of each character and fills in the
-	    <parameter>levels</parameter> buffer (executes all steps of the
-	    bidirectional algorithm up to, and including, step L1).
-	    A <literal>NULL</literal> <parameter>initial_embedding</parameter>
-	    value calculates the default paragraph embedding value.
-	    A pointer to a <literal>UNICODE_BIDI_LR</literal> or
-	    <literal>UNICODE_BIDI_RL</literal> value explicitly sets a
-	    left-to-right or right-to-left paragraph embedding value.
+	    They implement the algorithm up to and including step L2,
+	    and provide additional functionality of returning miscellaneous
+	    bi-directional-related metadata of Unicode characters. There's
+	    also a basic algorithm that <quote>reverses</quote> the
+	    bi-directional algorithm
+	    and produces a Unicode string with bi-directional markers that
+	    results in the same bi-directional string after reapplying the
+	    algorithm.
 	  </para>
 
-	  <para>
-	    <function>unicode_bidi_calc</function>() calculates each
-	    character's directional embedding value: an even value for
-	    left-to-right text or an odd value for right-to-left text.
-	    Unicode characters with an unspecified directional embedding
-	    value are specified by the
-	    <classname>UNICODE_BIDI_SKIP</classname> embedding level value.
-	    This indicates embedding and override markers, which can be
-	    removed from the string (together with this embedding value)
-	    from the string and the embedding value itself). This can be
-	    done before or after <function>unicode_bidi_reorder</function>().
-	  </para>
+	  <refsect2 id="unicode_bidi_calc_reorder">
+	    <title>Calculating bi-directional rendering order</title>
 
-	  <refsect2>
-	    <title>Reordering text</title>
+	    <para>
+	      The following process computes the rendering order of
+	      characters according to the Unicode Bi-Directional algorithm:
+	    </para>
+
+	    <orderedlist>
+	      <listitem>
+		<para>
+		  Allocate an array of
+		  <structname>unicode_bidi_level_t</structname> that's the
+		  same size as the Unicode string.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  Use <function>unicode_bidi_calc</function>() to compute
+		  the Unicode string's characters' bi-directional embedding
+		  level (executes the Bi-Directional algorithm up to and
+		  including step L1). This populates the
+		  <structname>unicode_bidi_level_t</structname> buffer.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  Use <function>unicode_bidi_reorder</function>() to reverse
+		  any characters in the string, according to the
+		  algorithm (step L2), with an optional
+		  callback that reports which ranges of characters get
+		  reversed.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  Use <function>unicode_bidi_cleanup</function>() or
+		  <function>unicode_bidi_extra_cleanup</function>(),
+		  to remove the characters from the string which are used
+		  by the bi-directional algorithm, and are not needed for
+		  rendering the text.
+		</para>
+	      </listitem>
+	    </orderedlist>
+
+	    <para>
+	      The parameters to
+	      <function>unicode_bidi_calc</function>() are:
+	    </para>
+
+	    <itemizedlist>
+	      <listitem>
+		<para>
+		  A pointer to the Unicode string.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  Number of characters in the Unicode string.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  A pointer to an array of
+		  <structname>unicode_bidi_level_t</structname> values.
+		  The caller is
+		  responsible for allocating and deallocating this array,
+		  which has the same size as the Unicode string.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  An optional pointer to a
+		  <literal>UNICODE_BIDI_LR</literal> or
+		  <literal>UNICODE_BIDI_RL</literal> value. This sets
+		  the default paragraph direction level.
+		  A null pointer computes the default paragraph direction
+		  level based on the string, as specified by the "P" rules
+		  of the bi-directional algorithm.
+		</para>
+	      </listitem>
+	    </itemizedlist>
+
+	    <para>
+	      <function>unicode_bidi_calc</function>() fills in the
+	      <structname>unicode_bidi_level_t</structname> array with the
+	      values corresponding to the embedding level of the
+	      corresponding character,
+	      according the Unicode Bidirection Algorithm (even values for
+	      left-to-right ordering, and odd values for right-to-left
+	      ordering).
+	      A value of UNICODE_BIDI_SKIP designates directional markers
+	      (from step X9).
+	    </para>
 
 	    <para>
-	      <function>unicode_bidi_reorder</function> takes the actual
+	      <function>unicode_bidi_calc</function>() returns the resolved
+	      paragraph direction level, which
+	      always matches the passed in level, if specified, else it
+	      reports the
+	      derived one.
+	    </para>
+
+	    <para>
+	      <function>unicode_bidi_reorder</function>() takes the actual
 	      unicode string together with the embedding values from
 	      <function>unicode_bidi_calc</function>, then reverses the
-	      bidirectional string, as specified by step L2 of the bidirectional
+	      bi-directional string, as specified by step L2 of the bi-directional
 	      algorithm.
+	      The parameters to
+	      <function>unicode_bidi_reorder</function>() are:
 	    </para>
+	    <itemizedlist>
+	      <listitem>
+		<para>
+		  A pointer to the Unicode string.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  A pointer to an array of
+		  <structname>unicode_bidi_level_t</structname> values.
+		</para>
+	      </listitem>
+	      <listitem>
+		<para>
+		  Number of characters in the Unicode string and the
+		  <structname>unicode_bidi_level_t</structname> array.
+		</para>
+	      </listitem>
 
+	      <listitem>
+		<para>
+		  An optional <varname>reorder_callback</varname> function
+		  pointer.
+		</para>
+	      </listitem>
+	    </itemizedlist>
 	    <para>
 	      A non-<literal>NULL</literal>
 	      <parameter>reorder_callback</parameter> gets invoked to report
@@ -434,13 +579,280 @@ See COPYING for distribution information.
 	      invokes the <parameter>reorder_callback</parameter> as if
 	      the character string, and their embedding values, were reversed.
 	    </para>
+
+	    <para>
+	      The resulting string and embedding levels are in
+	      <quote>rendering order</quote>, but still contain bi-directional
+	      embedding, override, boundary-neutral, isolate, and marker
+	      characters.
+	      <function>unicode_bidi_cleanup</function>() and
+	      <function>unicode_bidi_extra_cleanup</function>() remove these
+	      characters and directional markers from the unicode string.
+	      <function>unicode_bidi_cleanup</function> removes only the
+	      embedding, override, and  boundry-neutral characters (as
+	      specified by step X9 of the bi-directional algorithm).
+	      <function>unicode_bidi_extra_cleanup</function>()
+	      additionally removes the isolation markers, implicit markers;
+	      and all characters
+	      classified as paragraph separators get replaced by a newline.
+            </para>
+	    <para>
+	      A non-null pointer to the directional embedding level buffer,
+	      of the same size as the string, also removes the corresponding
+	      values from the buffer, and the remaining values in the
+	      embedding level buffer get reset to
+	      levels <literal>UNICODE_BIDI_LR</literal> and
+	      <literal> UNICODE_BIDI_RL</literal>, only.
+            </para>
+	    <para>
+	      The parameters to <function>unicode_bidi_cleanup</function>() and
+	      <function>unicode_bidi_extra_cleanup</function>() are:
+            </para>
+
+	    <itemizedlist>
+	      <listitem>
+		<para>
+		  The pointer to the unicode string.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  The pointer to the directional embedding buffer.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  The size of the unicode string and the directional embedding
+		  buffer.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  A pointer to a function that gets repeatedly invoked with the
+		  index of the character that gets removed from the Unicode
+		  string.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  An opaque pointer that gets forwarded to the callback.
+                </para>
+              </listitem>
+            </itemizedlist>
+	    <para>
+	      The function pointer (if not <literal>NULL</literal>)
+	      gets invoked to report the index of each
+	      removed character. The reported index is the index from the
+	      original string, and the callback gets invoked in strict order,
+	      from the first to
+	      the last removed character (if any).
+            </para>
+	    <para>
+	      Multiple calls to <function>unicode_bidi_cleanup</function>() or
+	      <function>unicode_bidi_extra_cleanup</function>() do no harm;
+	      except that <function>unicode_bidi_extra_cleanup</function>()
+	      always removes all the additional characters that
+	      <function>unicode_bidi_cleanup</function>() does not remove.
+            </para>
+	    <para>
+	      The character string and the embedding level values resulting
+	      from <function>unicode_bidi_extra_cleanup</function>() are in
+	      <quote>canonical rendering order</quote>.
+            </para>
 	  </refsect2>
-	  <refsect2>
+
+	  <refsect2 id="unicode_bidi_embed">
+	    <title>Embedding bi-directional markers in Unicode text strings</title>
+            <para>
+	      <function>unicode_bidi_logical_order</function>() and
+	      <function>unicode_bidi_embed</function>() add various
+	      bi-directional markers to a Unicode string in canonical rendering
+	      order. The resulting string is not guaranteed to be
+	      identical to the
+	      original Unicode bi-directional string. The algorithm is fairly
+	      basic,
+	      but the resulting bi-directional string produces the same
+	      canonical rendering order after applying
+	      <function>unicode_bidi_calc()</function>,
+	      <function>unicode_reorder()</function> and
+	      <function>unicode_bidi_extra_cleanup()</function>,
+	      with the same paragraph_embedding level.
+            </para>
+
+	    <para>
+	      <function>unicode_bidi_logical_order</function>() gets called
+	      first, followed by
+	      <function>unicode_bidi_embed</function>().
+	      Finally, <function>unicode_bidi_embed_paragraph_level</function>()
+	      optionally determines whether the resulting string's default
+	      paragraph embedding level matches the one used for the actual
+	      embedding direction, and if not returns a directional marker
+	      to be prepended to the Unicode character string, as a hint.
+            </para>
+	    <para>
+	      <function>unicode_bidi_logical_order</function>() factors in the
+	      characters' embedding values, and the provided paragraph
+	      embedding value
+	      (<literal>UNICODE_BIDI_LR</literal> or
+	      <literal>UNICODE_BIDI_RL</literal>), and rearranges the characters
+	      and the embedding levels in left-to-right order, while
+	      simultaneously
+	      invoking the supplied reorder_callback indicating each range of
+	      characters whose relative order gets reversed. The
+	      <function>reorder_callback</function>() receives, as
+	      parameters:
+            </para>
+	    <itemizedlist>
+	      <listitem>
+		<para>
+		  The starting index of the first reversed character, in the
+		  string.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  Number of reversed characters.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  Forwarded <parameter>arg</parameter> pointer value.
+                </para>
+              </listitem>
+            </itemizedlist>
+	    <para>
+	      This specifies a consecutive range of characters (and
+	      directional  embedding values)
+	      that get reversed (first character in the range becomes the
+	      last character,
+	      and the last character becomes the first character).
+            </para>
+
+	    <para>
+	      After
+	      <function>unicode_bidi_logical_order</function>(),
+	      <function>unicode_bidi_embed</function>() progressively invokes
+	      the passed-in callback with
+	      the contents of a bi-directional unicode string.
+	      The parameters to <function>unicode_bidi_embed</function>() are:
+            </para>
+            <itemizedlist>
+	      <listitem>
+		<para>
+		  The Unicode string, and &hellip;
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  &hellip; the directional embedding buffer, in canonical
+		  rendering order.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  The size of the string and the embedding level buffer.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  The paragraph embedding level, either
+		  <literal>UNICODE_BIDI_LR</literal> or
+		  <literal>UNICODE_BIDI_RL</literal>.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  The pointer to the callback function.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  An opaque pointer argument that gets forwarded to the
+		  callback function.
+                </para>
+              </listitem>
+            </itemizedlist>
+	    <para>
+	      The callback receives pointers to
+	      various parts of the original string that gets passed to
+	      <function>unicode_bidi_embed</function>(), intermixed with
+	      bi-directional markers,
+	      overrides, and isolates. The callback's parameters are:
+            </para>
+
+            <itemizedlist>
+	      <listitem>
+		<para>
+		  The pointer to a Unicode string.
+                </para>
+		<note>
+		  <para>
+		    It is not a given that the callback receives pointers
+		    to progressively increasing pointers of the original
+		    string that gets passed to
+		    <function>unicode_bidi_embed</function>().
+		    Some calls will be for individual bi-directional
+		    markers, and
+		    <function>unicode_bidi_embed</function>() also
+		    performs some additional internal reordering, on the fly,
+		    after <function>unicode_bidi_logical_order</function>()'s
+		    big hammer.
+                  </para>
+                </note>
+              </listitem>
+	      <listitem>
+		<para>
+		  Number of characters in the Unicode string.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  Forwarded <parameter>arg</parameter> pointer value.
+                </para>
+              </listitem>
+            </itemizedlist>
+
+	    <para>
+	      The assembled unicode string should produce the same
+	      canonical rendering order, for the same paragraph embedding
+	      level.
+	      <function>unicode_bidi_embed_paragraph_level</function>()
+	      checks if the specified Unicode string computes the given
+	      default paragraph embedding level and returns 0 if it matches.
+	      Otherwise it returns a directional marker that should be
+	      <emphasis>prepended</emphasis> to the Unicode string to allow
+	      <function>unicode_bidi_calc</function>'s optional paragraph
+	      embedding level pointer's value to be <literal>NULL</literal>,
+	      but derive the same default embedding level.
+	      The parameters to
+	      <function>unicode_bidi_embed_paragraph_level</function>() are:
+            </para>
+            <itemizedlist>
+	      <listitem>
+		<para>
+		  The Unicode string.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  The size of the string.
+                </para>
+              </listitem>
+	      <listitem>
+		<para>
+		  The paragraph embedding level, either
+		  <literal>UNICODE_BIDI_LR</literal> or
+		  <literal>UNICODE_BIDI_RL</literal>.
+                </para>
+              </listitem>
+	    </itemizedlist>
+          </refsect2>
+	  <refsect2 id="unicode_bidi_misc">
 	    <title>Miscellaneous utility functions</title>
 
 	    <para>
 	      <function>unicode_bidi_type</function>
-	      looks up each character's bidirectional character type.
+	      looks up each character's bi-directional character type.
 	    </para>
 	    <para>
 	      <function>unicode_bidi_mirror</function>
@@ -464,7 +876,7 @@ See COPYING for distribution information.
 	    </para>
 	  </refsect2>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="courier_unicode_bidi_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html">TR-9</ulink>,
@@ -502,7 +914,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_canonical_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -552,7 +964,7 @@ See COPYING for distribution information.
 	    equivalence.
 	  </para>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="unicode_canonical_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <ulink url="https://www.unicode.org/reports/tr15/tr15-&tr15ver;.html">TR-15</ulink>,
@@ -641,7 +1053,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_category_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -783,7 +1195,7 @@ See COPYING for distribution information.
 	    </varlistentry>
 	  </variablelist>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="unicode_category_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -950,7 +1362,7 @@ See COPYING for distribution information.
 
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_convert_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -1040,7 +1452,7 @@ See COPYING for distribution information.
 	  </para>
 
 
-	  <refsect2>
+	  <refsect2 id="unicode_convert_collect">
 	    <title>Collecting converted text into a buffer</title>
 
 	    <para>
@@ -1097,7 +1509,7 @@ See COPYING for distribution information.
 	    </para>
 	  </refsect2>
 
-	  <refsect2>
+	  <refsect2 id="unicode_convert_chset_unicode">
 	    <title>Converting between character sets and unicode</title>
 
 	    <para>
@@ -1126,7 +1538,7 @@ See COPYING for distribution information.
 	    </para>
 	  </refsect2>
 
-	  <refsect2>
+	  <refsect2 id="unicode_convert_oneshot">
 	    <title>One-shot conversions</title>
 
 	    <para>
@@ -1175,7 +1587,7 @@ See COPYING for distribution information.
 	    </para>
 	  </refsect2>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="unicode_convert_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -1220,7 +1632,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_default_chset_descr">
 	  <title>DESCRIPTION</title>
 	  <para>
 	    <function>unicode_default_chset</function>() returns the name of the
@@ -1231,7 +1643,7 @@ See COPYING for distribution information.
 	    current application locale's character set.
 	  </para>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="unicode_default_chset_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -1316,7 +1728,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_emoji_descr">
 	  <title>DESCRIPTION</title>
 	  <para>
 	    <function>unicode_emoji_lookup</function>() returns the
@@ -1334,7 +1746,7 @@ See COPYING for distribution information.
 	    character has the corresponding property.
 	  </para>
         </refsect1>
-	<refsect1>
+	<refsect1 id="unicode_emoji_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <ulink url="https://www.unicode.org/reports/tr51/tr51-&tr51ver;.html">TR-51</ulink>,
@@ -1368,7 +1780,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_html40_descr">
 	  <title>DESCRIPTION</title>
 	  <para>
 	    <function>unicode_html40ent_lookup</function>() returns the
@@ -1392,7 +1804,7 @@ See COPYING for distribution information.
 	    a single unicode character.
 	  </para>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="unicode_html40_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -1448,7 +1860,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_grapheme_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -1489,7 +1901,7 @@ See COPYING for distribution information.
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_grapheme_seealso">
 	  <title>SEE ALSO</title>
 
 	  <para>
@@ -1600,7 +2012,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_lb_descr">
 	  <title>DESCRIPTION</title>
 	  <para>
 	    These functions implement the unicode line breaking algorithm.
@@ -1730,7 +2142,7 @@ See COPYING for distribution information.
 	    line breaking handle is no longer valid.
 	  </para>
 
-	  <refsect2>
+	  <refsect2 id="unicode_lb_altcallback">
 	    <title>Alternative callback function</title>
 
 	    <para>
@@ -1745,7 +2157,7 @@ See COPYING for distribution information.
 	    </para>
 	  </refsect2>
 
-	  <refsect2>
+	  <refsect2 id="unicode_lb_altcallback_opt">
 	    <title>Options</title>
 
 	    <para>
@@ -1822,7 +2234,7 @@ See COPYING for distribution information.
 	  </refsect2>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_lb_seealso">
 	  <title>SEE ALSO</title>
 
 	  <para>
@@ -1859,7 +2271,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_script_descr">
 	  <title>DESCRIPTION</title>
 	  <para>
 	    <function>unicode_script</function>() looks up the
@@ -1871,7 +2283,7 @@ See COPYING for distribution information.
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_script_seealso">
 	  <title>SEE ALSO</title>
 
 	  <para>
@@ -1949,7 +2361,7 @@ See COPYING for distribution information.
 
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_wb_descr">
 	  <title>DESCRIPTION</title>
 	  <para>
 	    These functions implement the unicode word breaking algorithm.
@@ -2046,7 +2458,7 @@ See COPYING for distribution information.
 	    line breaking handle is no longer valid.
 	  </para>
 
-	  <refsect2>
+	  <refsect2 id="unicode_wb_scan">
 	    <title>Word scan</title>
 
 	    <para>
@@ -2075,7 +2487,7 @@ See COPYING for distribution information.
 	  </refsect2>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_wb_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <ulink url="https://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>,
@@ -2144,7 +2556,7 @@ See COPYING for distribution information.
 	    </funcprototype>
 	  </funcsynopsis>
 	</refsynopsisdiv>
-	<refsect1>
+	<refsect1 id="unicode_uc_descr">
 	  <title>DESCRIPTION</title>
 	  <para>
 	    <function>unicode_uc</function>(),
@@ -2174,7 +2586,7 @@ See COPYING for distribution information.
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_uc_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -2223,94 +2635,162 @@ See COPYING for distribution information.
 	<refnamediv>
 	  <refname>unicode::bidi_calc</refname>
 	  <refname>unicode::bidi_reorder</refname>
-	  <refpurpose>unicode bidirectional algorithm</refpurpose>
+	  <refname>unicode::bidi_cleanup</refname>
+	  <refname>unicode::bidi_extra_cleanup</refname>
+	  <refname>unicode::bidi_logical_order</refname>
+	  <refname>unicode::bidi_embed</refname>
+	  <refname>unicode::bidi_embed_paragraph_level</refname>
+	  <refpurpose>unicode bi-directional algorithm</refpurpose>
 	</refnamediv>
 
 	<refsynopsisdiv>
 	  <funcsynopsis>
 	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
 	    <funcprototype>
-              <funcdef>std::vector&lt;unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
+              <funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
 	      <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
 	    </funcprototype>
-	  </funcsynopsis>
 
-	  <funcsynopsis>
 	    <funcprototype>
-              <funcdef>std::vector&lt;unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
+              <funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
 	      <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
 	      <paramdef>unicode_bidi_level_t <parameter>embedding_level</parameter></paramdef>
 	    </funcprototype>
-	  </funcsynopsis>
 
-	  <funcsynopsis>
 	    <funcprototype>
               <funcdef>int <function>unicode::bidi_reorder</function></funcdef>
 	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
 	      <paramdef>std::vector&lt;unicode_bidi_level_t&gt; &amp;<parameter>embedding_level</parameter></paramdef>
-	      <paramdef>const std::function&lt;void (size_t, size_t)&gt; &amp;<parameter>reorder_callback</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t, size_t) noexcept&gt; &amp;<parameter>reorder_callback</parameter></paramdef>
 	    </funcprototype>
-	  </funcsynopsis>
 
-	  <funcsynopsis>
 	    <funcprototype>
-              <funcdef>int <function>unicode::bidi_reorder</function></funcdef>
+              <funcdef>void <function>unicode::bidi_reorder</function></funcdef>
 	      <paramdef>std::vector&lt;unicode_bidi_level_t&gt; &amp;<parameter>embedding_level</parameter></paramdef>
-	      <paramdef>const std::function&lt;void (size_t, size_t)&gt; &amp;<parameter>reorder_callback</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t, size_t) noexcept&gt; &amp;<parameter>reorder_callback</parameter></paramdef>
 	    </funcprototype>
-	  </funcsynopsis>
+
+	    <funcprototype>
+              <funcdef>void <function>unicode::bidi_cleanup</function></funcdef>
+	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode::bidi_cleanup</function></funcdef>
+	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+              <funcdef>void <function>unicode::bidi_extra_cleanup</function></funcdef>
+	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode::bidi_extra_cleanup</function></funcdef>
+	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode::bidi_logical_order</function></funcdef>
+	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t, size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+              <funcdef>void <function>unicode::bidi_logical_order</function></funcdef>
+	      <paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t, size_t) noexcept&gt; &amp;<parameter>removed_callback</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode::bidi_embed</function></funcdef>
+	      <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>const std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t, const char32_t *, size_t) noexcept&gt; &amp;<parameter>callback</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+              <funcdef>std::u32string <function>unicode::bidi_embed</function></funcdef>
+	      <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>const std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+            </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>char32_t <function>unicode_bidi_embed_paragraph_level</function></funcdef>
+              <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
+	    </funcprototype>
+          </funcsynopsis>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_bidi_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
 	    These functions implement the C++ interface for the
-	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html"> Unicode Bi-directional algorithm</ulink>.
+	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html"> Unicode Bi-Directional algorithm</ulink>.
 	    See the description of the underlying
 	    <link linkend="unicode_bidi">
 	      <citerefentry><refentrytitle>unicode_bidi</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link> C library
-	      API for more information.
+	      API for more information. C++ specific notes:
 	  </para>
 
-	  <para>
-            <function>unicode::bidi_calc</function> computes and return a vector
-	    of bidirection embedding level values for the given Unicode string.
-	    An overload takes an additional parameter that override the
-	    paragraph embedding level, a <literal>UNICODE_BIDI_LR</literal> or
-            an <literal>UNICODE_BIDI_RL</literal> value.
-          </para>
-	  <para>
-            <function>unicode::bidi_reorder</function> reverses the characters
-	    in the Unicode script, according to their embedding levels (and
-	    reverses the corresponding embedding level values too).
-	    As is with the C API, an optional parameter is a callable object
-	    that gets invoked to report each range of characters that gets
-	    reversed (specified as the starting position and a number of
-	    characters).
-          </para>
-	  <para>
-	    An overloaded <function>unicode::bidi_reorder</function> without
-	    the string parameter goes through the motions, according to the
-	    embedded level vector parameter, but without actually reversing
-	    the values in the vector, but still invoking the callable object
-	    normally.
-          </para>
-	  <para>
-	    This is comparable to the C API. Also comparable with the C API:
-	    the convention that even embedding levels specify left to right
-	    text and odd embedding values specify right to left text.
-	    An embedding value of <literal>UNICODE_BIDI_SKIP</literal>
-	    indicates an embedding or an override marker that has no
-	    specified embeded value. These markers may be removed from the
-	    Unicode string (together with the
-	    <literal>UNICODE_BIDI_SKIP</literal>
-	    values from the embedding values vector) either before or after
-	    they get reordered.
-	  </para>
+	  <itemizedlist>
+	    <listitem>
+	      <para>
+                <function>unicode::bidi_calc</function> returns the
+		directional embedding value buffer and the paragraph
+		embedding level.
+              </para>
+            </listitem>
+	    <listitem>
+	      <para>
+		Several C functions provide a <quote>dry-run</quote> mode
+		by passing a <literal>NULL</literal> pointer. The C++ API
+		provides separate overloads, with and without the nullable
+		parameter.
+              </para>
+            </listitem>
+	    <listitem>
+	      <para>
+		Several C functions accept a nullable function pointer, with
+		the <literal>NULL</literal> function pointer specifying no
+		callback. The C++ functions have a
+		<classname>std::function</classname> parameter with a
+		default do-nothing closure.
+              </para>
+            </listitem>
+
+	    <listitem>
+	      <para>
+		Several C functions accept two parameters, a Unicode character
+		pointer and the embedding level buffer, and a single parameter
+		that specifies the size of both.
+		The equivalent C++ function takes two discrete parameters,
+		a <classname>std::u32string</classname> and a
+		<classname>std::vector</classname> and returns an
+		<classname>int</classname>; a negative value if their sizes
+		differ, and 0 if their sizes match, and the requested function
+		completes. The <function>unicode::bidi_embed</function> overload
+		that returns a <classname>std::u32string</classname> returns
+		an empty string in case of a mismatch.
+              </para>
+            </listitem>
+          </itemizedlist>
 	</refsect1>
-	<refsect1>
+	<refsect1 id="unicode_cpp_bidi_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -2389,7 +2869,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </funcsynopsis>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -2447,7 +2927,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -2505,7 +2985,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </funcsynopsis>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_tocase_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -2537,7 +3017,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_tocase_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -2602,7 +3082,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </funcsynopsis>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_fromu_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -2634,7 +3114,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_fromu_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -2698,7 +3178,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </funcsynopsis>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_tou_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -2733,7 +3213,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_convert_tou_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -2846,7 +3326,7 @@ std::vector&lt;std::pair&lt;int, char32_t&gt;&gt; linebreaks;
 std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt;&gt;(linebreaks));</programlisting>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_lb_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -2941,7 +3421,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_lb_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -3012,7 +3492,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
 	  </funcsynopsis>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_tolower_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -3040,7 +3520,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_tolower_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
@@ -3104,7 +3584,7 @@ size_t nchars=scan.finish();
 </programlisting>
 	</refsynopsisdiv>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_wb_descr">
 	  <title>DESCRIPTION</title>
 
 	  <para>
@@ -3168,7 +3648,7 @@ size_t nchars=scan.finish();
 	  </para>
 	</refsect1>
 
-	<refsect1>
+	<refsect1 id="unicode_cpp_wb_seealso">
 	  <title>SEE ALSO</title>
 	  <para>
 	    <link linkend="courier-unicode">
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index c8161ea..f6b4b8c 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -536,65 +536,6 @@ int unicode_wbscan_next(unicode_wbscan_info_t i, char32_t ch);
 
 size_t unicode_wbscan_end(unicode_wbscan_info_t i);
 
-/*
-** Unicode Bidirectional bracket and mirroring lookup
-**
-** http://www.unicode.org/reports/tr9/tr9-42.html
-**
-** unicode_bidi_mirror() returns the Bidi_Mirroring_Glyph property.
-**
-** If there is no mirroring glyph for the given character, returns the
-** same character.
-**
-** unicode_bidi_bracket_type() looks up the Bidi_Paired_Bracket and
-** Bidi_Paired_Bracket_Type properties.
-**
-** unicode_bidi_bracket_type() returns the Bidi_Paired_Bracket property
-** value. If the ret parameter is not a null pointer, the pointed-to
-** value is set to Bidi_Paired_Bracket_Type value, one of the UNICODE_BIDI
-** values.
-**
-** unicode_bidi_bracket_type() returns the same character and
-** UNICODE_BIDI_n if the given character does not have these properties.
-**
-** unicode_bidi_type() looks up the bidirectional character type of the
-** given Unicode character.
-**
-** unicode_bidi_calc() implements the Unicode Bidirectional Algorithm up to
-** step L1.
-**
-** Parameters:
-**
-** - A pointer to char32_t, the Unicode string.
-**
-** - Number of characters in the char32_t string
-**
-** - A pointer to an array of unicode_bidi_level_t values. The caller is
-** responsible for allocating and deallocating this array, which has the
-** same size as the Unicode string (the second parameter).
-**
-** - An optional pointer to a unicode_bidi_level_t value, or a null pointer.
-** A pointer to UNICODE_BIDI_LR or UNICODE_BIDI_RL sets the default paragraph
-** direction level. A null pointer calculates the default paragraph direction
-** level based on the string, as specified by the "P" rules in the algorithm.
-**
-** unicode_bidi_calc() fills in the unicode_bidi_level_t array with the
-** values corresponding to the embedding level of the corresponding character,
-** as specified in the Unicode Bidirection Algorithm (even for left-to-right,
-** and odd for right-to-left). A value of UNICODE_BIDI_SKIP designates
-** directional markers (from step X9). These characters should be removed
-** before using unicode_bidi_reorder().
-**
-** unicode_bidi_calc() returns the resolved paragraph direction level, which
-** always matches the passed in level, if specified, else it reports the
-** derived one.
-**
-** unicode_bidi_reorder() reorders the characters according to the resolved
-** embedding levels. A non-null reorder_callback gets invoked repeatedly,
-** indicating the starting index and the number of characters reversed, so
-** that any related metadata can be updated accordingly.
-*/
-
 typedef char unicode_bidi_bracket_type_t;
 
 #define UNICODE_BIDI_n  'n'
@@ -654,6 +595,40 @@ typedef enum {
 
 extern enum_bidi_type_t unicode_bidi_type(char32_t c);
 
+extern size_t unicode_bidi_cleanup(char32_t *string,
+				   unicode_bidi_level_t *levels,
+				   size_t n,
+				   void (*removed_callback)(size_t, void *),
+				   void *);
+
+extern size_t unicode_bidi_extra_cleanup(char32_t *string,
+					 unicode_bidi_level_t *levels,
+					 size_t n,
+					 void (*removed_callback)(size_t,
+								  void *),
+					 void *);
+
+extern void unicode_bidi_logical_order(char32_t *string,
+				       unicode_bidi_level_t *levels,
+				       size_t n,
+				       unicode_bidi_level_t paragraph_embedding,
+				       void (*reorder_callback)(size_t, size_t,
+								void *),
+				       void *arg);
+
+extern void unicode_bidi_embed(const char32_t *string,
+			       const unicode_bidi_level_t *levels,
+			       size_t n,
+			       unicode_bidi_level_t paragraph_embedding,
+			       void (*emit)(const char32_t *string,
+					    size_t n,
+					    void *arg),
+			       void *arg);
+
+extern char32_t unicode_bidi_embed_paragraph_level(const char32_t *str,
+						   size_t n,
+						   unicode_bidi_level_t);
+
 /*
 ** unicode_canonical() returns the canonical mapping of the given Unicode
 ** character. The returned structure specifies:
@@ -2117,24 +2092,124 @@ std::u32string tolower(const std::u32string &u);
 std::u32string toupper(const std::u32string &u);
 
 //! Calculate bidirectional embedding levels
+
+//! Returns the bidirectional embedding levels, and the paragraph
+//! embedding level.
+
 std::tuple<std::vector<unicode_bidi_level_t>,
 	   unicode_bidi_level_t> bidi_calc(const std::u32string &s);
 
 //! Calculate bidirectional embedding levels
+
+//! Overload calculates the embedding levels using a predetermined
+//! paragraph embedding level.
+//!
+//! Returns the bidirectional embedding levels, and the same paragraph
+//! embedding level.
+
 std::tuple<std::vector<unicode_bidi_level_t>,
 	   unicode_bidi_level_t> bidi_calc(const std::u32string &s,
 					   unicode_bidi_level_t level);
 
 //! Reorder bidirectional text
+
+//! Reorders the string and levels in place.
+//!
+//! Non-0 return value indicates the string and levels' sizes do not match.
+
 int bidi_reorder(std::u32string &string,
 		 std::vector<unicode_bidi_level_t> &levels,
-		 const std::function<void (size_t, size_t)> &reorder_callback=
-		 [](size_t, size_t){});
+		 const std::function<void (size_t, size_t) noexcept>
+		 &reorder_callback=[](size_t, size_t) noexcept{});
 
-//! Reorder bidirectional text
+//! Dry-run reorder bidirectional text
 void bidi_reorder(std::vector<unicode_bidi_level_t> &levels,
-		  const std::function<void (size_t, size_t)> &reorder_callback=
-		  [](size_t, size_t){});
+		  const std::function<void (size_t, size_t) noexcept>
+		  &reorder_callback=[](size_t, size_t) noexcept{});
+
+//! Remove directional markers
+
+//! Removes them from the string, in place. Optional lambda gets notified
+//! of the index (in the original string, of each removed marker.
+
+void bidi_cleanup(std::u32string &string,
+		  const std::function<void (size_t) noexcept> &removed_callback=
+		  [](size_t) noexcept {});
+
+//! Also remove them from the embedding direction level buffer.
+
+//! Returns non-0 in case of non-matching level buffer size.
+
+int bidi_cleanup(std::u32string &string,
+		 std::vector<unicode_bidi_level_t> &levels,
+		 const std::function<void (size_t) noexcept> &removed_callback=
+		  [](size_t) noexcept {});
+
+
+//! Remove directional markers and isolation markers.
+
+//! Removes them from the string, in place. Optional lambda gets notified
+//! of the index (in the original string, of each removed marker.
+
+void bidi_extra_cleanup(std::u32string &string,
+			const std::function<void (size_t) noexcept>
+			&removed_callback=
+			[](size_t) noexcept {});
+
+//! Also remove them from the embedding direction level buffer.
+
+//! Returns non-0 in case of non-matching level buffer size.
+
+int bidi_extra_cleanup(std::u32string &string,
+		       std::vector<unicode_bidi_level_t> &levels,
+		       const std::function<void (size_t) noexcept>
+		       &removed_callback=
+		       [](size_t) noexcept {});
+
+//! Convert Unicode string from canonical rendering order to logical order.
+int bidi_logical_order(std::u32string &string,
+		       std::vector<unicode_bidi_level_t> &levels,
+		       unicode_bidi_level_t paragraph_embedding,
+		       const std::function<void (size_t, size_t) noexcept>
+		       &lambda=[](size_t,size_t){});
+
+//! Convert Unicode string from canonical rendering order to logical order.
+void bidi_logical_order(std::vector<unicode_bidi_level_t> &levels,
+			unicode_bidi_level_t paragraph_embedding,
+			const std::function<void (size_t, size_t) noexcept>
+			&lambda);
+
+//! Embed directional and isolation markers
+
+//! Non-0 return value indicates the string and levels' sizes do not match.
+//!
+//! The lambda gets called repeatedly, to specify the contents of the
+//! string with embedded direction markers.
+
+int bidi_embed(const std::u32string &string,
+	       const std::vector<unicode_bidi_level_t> &levels,
+	       unicode_bidi_level_t paragraph_embedding,
+	       const std::function<void (const char32_t *string,
+					 size_t n) noexcept> &lambda);
+
+//! Embed directional and isolation markers
+
+//! \overload
+//!
+//! Provides a lambda that collects the new string, and returns it. An
+//! empty string gets returned if the string and levels' sizes do not match.
+
+std::u32string bidi_embed(const std::u32string &string,
+			  const std::vector<unicode_bidi_level_t> &levels,
+			  unicode_bidi_level_t paragraph_embedding);
+
+//! Check if a directional marker needs to be inserted
+
+//! In order for the unicode string to have the specified default
+//! paragraph embedding level.
+
+extern char32_t bidi_embed_paragraph_level(const std::u32string &string,
+					   unicode_bidi_level_t level);
 
 #if 0
 {
diff --git a/unicode/docbook/book.css b/unicode/docbook/book.css
index d1420cd..a133e82 100644
--- a/unicode/docbook/book.css
+++ b/unicode/docbook/book.css
@@ -44,7 +44,7 @@ code.computeroutput div.literallayout {
     font-weight: bold;
 }
 
-.command, .acronym, .symbol {
+.command, .acronym, .symbol, .structname {
     font-family: "liberation mono", "courier new", monospace;
     background-color: #eeeeee;
 }
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index 055ee89..a35e9b5 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -112,6 +112,17 @@ typedef enum {
 	 (c) == UNICODE_BIDI_TYPE_LRO ||		\
 	 (c) == UNICODE_BIDI_TYPE_RLO)
 
+#define is_explicit_indicator_except_b(c)	\
+	( is_isolate_initiator(c) ||		\
+	  is_embedding_initiator(c) ||		\
+	  (c) == UNICODE_BIDI_TYPE_BN ||        \
+	  (c) == UNICODE_BIDI_TYPE_PDF ||       \
+	  (c) == UNICODE_BIDI_TYPE_PDI)
+
+#define is_explicit_indicator(c)               \
+	( is_explicit_indicator_except_b(c) || \
+	  (c) == UNICODE_BIDI_TYPE_B)
+
 /* BD13 implementation */
 
 /* A level run, specified as indexes */
@@ -529,6 +540,8 @@ static void directional_status_stack_push
 		(struct directional_status_stack_entry *)
 		malloc(sizeof(struct directional_status_stack_entry));
 
+	if (!p)
+		abort();
 #ifdef BIDI_DEBUG
 	fprintf(DEBUGDUMP, "BIDI: Push level %d, override: %s, isolate: %s\n",
 		(int)embedding_level,
@@ -548,16 +561,21 @@ static void directional_status_stack_push
 }
 
 static unicode_bidi_level_t
-compute_paragraph_embedding_level(const enum_bidi_type_t *p,
-				  size_t i, size_t j)
+compute_paragraph_embedding_level(size_t i, size_t j,
+				  enum_bidi_type_t (*get)(size_t i,
+							  void *arg),
+				  void *arg)
+
 {
 	unicode_bidi_level_t in_isolation=0;
 
 	for (; i<j; ++i)
 	{
-		if (is_isolate_initiator(p[i]))
+		enum_bidi_type_t t=get(i, arg);
+
+		if (is_isolate_initiator(t))
 			++in_isolation;
-		else if (p[i] == UNICODE_BIDI_TYPE_PDI)
+		else if (t == UNICODE_BIDI_TYPE_PDI)
 		{
 			if (in_isolation)
 				--in_isolation;
@@ -565,16 +583,43 @@ compute_paragraph_embedding_level(const enum_bidi_type_t *p,
 
 		if (in_isolation == 0)
 		{
-			if (p[i] == UNICODE_BIDI_TYPE_AL ||
-			    p[i] == UNICODE_BIDI_TYPE_R)
+			if (t == UNICODE_BIDI_TYPE_AL ||
+			    t == UNICODE_BIDI_TYPE_R)
 			{
-				return 1;
+				return UNICODE_BIDI_RL;
 			}
-			if (p[i] == UNICODE_BIDI_TYPE_L)
+			if (t == UNICODE_BIDI_TYPE_L)
 				break;
 		}
 	}
-	return 0;
+	return UNICODE_BIDI_LR;
+}
+
+struct compute_paragraph_embedding_level_type_info {
+	const enum_bidi_type_t *p;
+};
+
+static enum_bidi_type_t
+get_enum_bidi_type_for_paragraph_embedding_level(size_t i,
+						 void *arg)
+{
+	struct compute_paragraph_embedding_level_type_info *p=
+		(struct compute_paragraph_embedding_level_type_info *)arg;
+
+	return p->p[i];
+}
+
+static unicode_bidi_level_t
+compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p,
+					     size_t i, size_t j)
+{
+	struct compute_paragraph_embedding_level_type_info info;
+	info.p=p;
+
+	return compute_paragraph_embedding_level
+		(i, j,
+		 get_enum_bidi_type_for_paragraph_embedding_level,
+		 &info);
 }
 
 static directional_status_stack_t
@@ -591,7 +636,7 @@ directional_status_stack_init(const char32_t *chars,
 	stack->paragraph_embedding_level=
 		initial_embedding_level
 		? *initial_embedding_level & 1
-		: compute_paragraph_embedding_level(classes, 0, n);
+		: compute_paragraph_embedding_level_from_types(classes, 0, n);
 	stack->chars=chars;
 	stack->classes=classes;
 
@@ -676,6 +721,8 @@ unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
 	enum_bidi_type_t *buf=
 		(enum_bidi_type_t *)malloc(n * sizeof(enum_bidi_type_t));
 
+	if (!buf)
+		abort();
 	for (size_t i=0; i<n; ++i)
 	{
 		buf[i]=unicode_bidi_type(p[i]);
@@ -732,7 +779,7 @@ unicode_bidi_b(const char32_t *p,
 		}							\
 	} while(0)
 
-static void unicode_bidi_w(directional_status_stack_t stack,
+static void unicode_bidi_w(enum_bidi_type_t *classes,
 			   struct isolating_run_sequence_s *seq);
 static void unicode_bidi_n(directional_status_stack_t stack,
 			   struct isolating_run_sequence_s *seq);
@@ -900,7 +947,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
 				}
 			}
 
-			cur_class=compute_paragraph_embedding_level
+			cur_class=compute_paragraph_embedding_level_from_types
 				(stack->classes, i+1, j) == 1
 				? UNICODE_BIDI_TYPE_RLI
 				: UNICODE_BIDI_TYPE_LRI;
@@ -955,24 +1002,11 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
 			break;
 		}
 
-		switch (stack->orig_classes[i]) {
-		case UNICODE_BIDI_TYPE_BN:
-		case UNICODE_BIDI_TYPE_B:
-		case UNICODE_BIDI_TYPE_RLE:
-		case UNICODE_BIDI_TYPE_LRE:
-		case UNICODE_BIDI_TYPE_RLO:
-		case UNICODE_BIDI_TYPE_LRO:
-		case UNICODE_BIDI_TYPE_PDF:
-		case UNICODE_BIDI_TYPE_RLI:
-		case UNICODE_BIDI_TYPE_LRI:
-		case UNICODE_BIDI_TYPE_FSI:
-		case UNICODE_BIDI_TYPE_PDI:
-			break;
-		default:
+		if (!is_explicit_indicator(stack->orig_classes[i]))
+		{
 			/* X6 */
 			stack->levels[i]=stack->head->embedding_level;
 			RESET_CLASS(stack->classes[i],stack);
-			break;
 		}
 
 		if (stack->classes[i] == UNICODE_BIDI_TYPE_PDI)
@@ -1210,7 +1244,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
 		dump_sequence("Contents before W", stack, p);
 #endif
 
-		unicode_bidi_w(stack, p);
+		unicode_bidi_w(stack->classes, p);
 
 #ifdef BIDI_DEBUG
 		dump_sequence("Contents after W", stack, p);
@@ -1258,7 +1292,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack)
 	}
 }
 
-static void unicode_bidi_w(directional_status_stack_t stack,
+static void unicode_bidi_w(enum_bidi_type_t *classes,
 			   struct isolating_run_sequence_s *seq)
 {
 	irs_iterator iter=irs_begin(seq), end=irs_end(seq);
@@ -1268,10 +1302,10 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 
 	while (irs_compare(&iter, &end))
 	{
-		if (stack->classes[iter.i] == UNICODE_BIDI_TYPE_NSM)
+		if (classes[iter.i] == UNICODE_BIDI_TYPE_NSM)
 		{
 			/* W1 */
-			stack->classes[iter.i] =
+			classes[iter.i] =
 				is_isolate_initiator(previous_type) ||
 				previous_type == UNICODE_BIDI_TYPE_PDI
 				? UNICODE_BIDI_TYPE_ON
@@ -1281,14 +1315,14 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 
 		/* W2 */
 
-		if (stack->classes[iter.i] == UNICODE_BIDI_TYPE_EN &&
+		if (classes[iter.i] == UNICODE_BIDI_TYPE_EN &&
 		    strong_type == UNICODE_BIDI_TYPE_AL)
 		{
-			stack->classes[iter.i] = UNICODE_BIDI_TYPE_AN;
+			classes[iter.i] = UNICODE_BIDI_TYPE_AN;
 		}
 
 		/* W2 */
-		previous_type=stack->classes[iter.i];
+		previous_type=classes[iter.i];
 
 		switch (previous_type) {
 		case UNICODE_BIDI_TYPE_R:
@@ -1312,12 +1346,12 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 	while (not_eol)
 	{
 		/* W3 */
-		if (stack->classes[iter.i] == UNICODE_BIDI_TYPE_AL)
-			stack->classes[iter.i] = UNICODE_BIDI_TYPE_R;
+		if (classes[iter.i] == UNICODE_BIDI_TYPE_AL)
+			classes[iter.i] = UNICODE_BIDI_TYPE_R;
 
 		/* W4 */
 
-		enum_bidi_type_t this_type=stack->classes[iter.i];
+		enum_bidi_type_t this_type=classes[iter.i];
 		irs_incr(&iter);
 
 		not_eol=irs_compare(&iter, &end);
@@ -1332,13 +1366,13 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 		       previous_type == UNICODE_BIDI_TYPE_AN)
 		      )
 		     ) &&
-		    stack->classes[iter.i] == previous_type)
+		    classes[iter.i] == previous_type)
 		{
 			irs_iterator prev=iter;
 
 			irs_decr(&prev);
 
-			stack->classes[prev.i]=previous_type;
+			classes[prev.i]=previous_type;
 		}
 
 		if (not_eol)
@@ -1353,9 +1387,9 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 
 	while (irs_compare(&iter, &end))
 	{
-		if (stack->classes[iter.i] != UNICODE_BIDI_TYPE_ET)
+		if (classes[iter.i] != UNICODE_BIDI_TYPE_ET)
 		{
-			previous_type=stack->classes[iter.i];
+			previous_type=classes[iter.i];
 			irs_incr(&iter);
 			continue;
 		}
@@ -1363,7 +1397,7 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 		/* ET after EN */
 		if (previous_type == UNICODE_BIDI_TYPE_EN)
 		{
-			stack->classes[iter.i] = UNICODE_BIDI_TYPE_EN;
+			classes[iter.i] = UNICODE_BIDI_TYPE_EN;
 			irs_incr(&iter);
 			continue;
 		}
@@ -1374,7 +1408,7 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 
 		while (irs_incr(&iter), irs_compare(&iter, &end))
 		{
-			previous_type=stack->classes[iter.i];
+			previous_type=classes[iter.i];
 
 			if (previous_type == UNICODE_BIDI_TYPE_ET)
 				continue;
@@ -1383,7 +1417,7 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 			{
 				while (irs_compare(&start, &iter))
 				{
-					stack->classes[start.i]=
+					classes[start.i]=
 						UNICODE_BIDI_TYPE_EN;
 					irs_incr(&start);
 				}
@@ -1397,12 +1431,12 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 	for (iter=irs_begin(seq);
 	     irs_compare(&iter, &end); irs_incr(&iter))
 	{
-		switch (stack->classes[iter.i]) {
+		switch (classes[iter.i]) {
 		case UNICODE_BIDI_TYPE_ET:
 		case UNICODE_BIDI_TYPE_ES:
 		case UNICODE_BIDI_TYPE_CS:
 			/* W6 */
-			stack->classes[iter.i]=UNICODE_BIDI_TYPE_ON;
+			classes[iter.i]=UNICODE_BIDI_TYPE_ON;
 			break;
 		default:
 			break;
@@ -1416,14 +1450,14 @@ static void unicode_bidi_w(directional_status_stack_t stack,
 
 	while (irs_compare(&iter, &end))
 	{
-		switch (stack->classes[iter.i]) {
+		switch (classes[iter.i]) {
 		case UNICODE_BIDI_TYPE_L:
 		case UNICODE_BIDI_TYPE_R:
-			previous_type=stack->classes[iter.i];
+			previous_type=classes[iter.i];
 			break;
 		case UNICODE_BIDI_TYPE_EN:
 			if (previous_type == UNICODE_BIDI_TYPE_L)
-				stack->classes[iter.i]=previous_type;
+				classes[iter.i]=previous_type;
 			break;
 		default:
 			break;
@@ -1573,13 +1607,13 @@ static void unicode_bidi_n(directional_status_stack_t stack,
 
 		ADJUST_EOCLASS(eoclass);
 
-#define E_CLASS (seq->embedding_level & 1 ?			\
-		 UNICODE_BIDI_TYPE_R:UNICODE_BIDI_TYPE_L)
+#define E_CLASS(level) ((level) & 1 ?					\
+			UNICODE_BIDI_TYPE_R:UNICODE_BIDI_TYPE_L)
 
-#define O_CLASS (seq->embedding_level & 1 ?			\
-		 UNICODE_BIDI_TYPE_L:UNICODE_BIDI_TYPE_R)
+#define O_CLASS(level) ((level) & 1 ?					\
+			UNICODE_BIDI_TYPE_L:UNICODE_BIDI_TYPE_R)
 
-		if (eoclass == E_CLASS)
+		if (eoclass == E_CLASS(seq->embedding_level))
 		{
 #ifdef BIDI_DEBUG
 			if (stackp)
@@ -1599,7 +1633,7 @@ static void unicode_bidi_n(directional_status_stack_t stack,
 			for (size_t i=0; i<stackp; ++i)
 				stack_iters[i]->has_e=1;
 		}
-		else if (eoclass == O_CLASS)
+		else if (eoclass == O_CLASS(seq->embedding_level))
 		{
 #ifdef BIDI_DEBUG
 			if (stackp)
@@ -1636,8 +1670,8 @@ static void unicode_bidi_n(directional_status_stack_t stack,
 				"Brackets: %d and %d: e=%s, o=%s",
 				(int)p->start.i,
 				(int)p->end.i,
-				bidi_classname(E_CLASS),
-				bidi_classname(O_CLASS));
+				bidi_classname(E_CLASS(seq->embedding_level)),
+				bidi_classname(O_CLASS(seq->embedding_level)));
 
 			fprintf(DEBUGDUMP, ", has e=%d, has o=%d\n",
 				p->has_e,
@@ -1879,6 +1913,37 @@ static void level_run_layers_add(struct level_run_layers *p)
 	level_runs_init(p->lruns + (p->n_lruns++));
 }
 
+static void reverse_str(char32_t *p,
+			unicode_bidi_level_t *levels,
+			size_t start,
+			size_t end,
+			void (*reorder_callback)(size_t, size_t, void *),
+			void *arg)
+{
+	size_t right=end;
+	size_t left=start;
+
+	while (right > left)
+	{
+		--right;
+
+		if (p)
+		{
+			char32_t c=p[left];
+			unicode_bidi_level_t l=levels[left];
+
+			p[left]=p[right];
+			levels[left]=levels[right];
+			p[right]=c;
+			levels[right]=l;
+		}
+		++left;
+	}
+
+	if (end-start > 1 && reorder_callback)
+		(*reorder_callback)(start, end-start, arg);
+}
+
 void unicode_bidi_reorder(char32_t *p,
 			  unicode_bidi_level_t *levels,
 			  size_t n,
@@ -1887,6 +1952,15 @@ void unicode_bidi_reorder(char32_t *p,
 {
 	/* L2 */
 
+#ifdef BIDI_DEBUG
+	fprintf(DEBUGDUMP, "Before L2:");
+	for (size_t i=0; i<n; ++i)
+		fprintf(DEBUGDUMP, " %04x/%d",
+			(unsigned)p[i],
+			(int)levels[i]);
+	fprintf(DEBUGDUMP, "\n");
+#endif
+
 	struct level_run_layers layers;
 	unicode_bidi_level_t previous_level=0;
 
@@ -1920,39 +1994,738 @@ void unicode_bidi_reorder(char32_t *p,
 			}
 		}
 	}
-
+#ifdef BIDI_DEBUG
+	fprintf(DEBUGDUMP, "L2:\n");
+#endif
 	for (size_t i=layers.n_lruns; i; )
 	{
 		struct level_runs *runs=layers.lruns+ --i;
 
+#ifdef BIDI_DEBUG
+		if (runs->n_level_runs)
+			fprintf(DEBUGDUMP, "Reverse %d:",
+				(int)i);
+#endif
+
 		for (size_t j=0; j<runs->n_level_runs; ++j)
 		{
 			size_t start=runs->runs[j].start;
 			size_t end=runs->runs[j].end;
-			size_t right=end;
-			size_t left=start;
+#ifdef BIDI_DEBUG
+			fprintf(DEBUGDUMP, " %d-%d",
+				(int)start, (int)end-1);
+#endif
 
-			while (right > left)
+			reverse_str(p, levels, start, end,
+				    reorder_callback, arg);
+		}
+
+#ifdef BIDI_DEBUG
+		if (runs->n_level_runs)
+			fprintf(DEBUGDUMP, "\n");
+#endif
+	}
+
+	level_run_layers_deinit(&layers);
+}
+
+#define LRM	0x200E
+#define RLM	0x200F
+#define ALM	0x061C
+
+size_t unicode_bidi_cleanup(char32_t *string,
+			    unicode_bidi_level_t *levels,
+			    size_t n,
+			    void (*removed_callback)(size_t, void *),
+			    void *arg)
+{
+	size_t i=0;
+	for (size_t j=0; j<n; ++j)
+	{
+		enum_bidi_type_t cl=unicode_bidi_type(string[j]);
+
+		if (IS_X9(cl))
+		{
+			if (removed_callback)
+				(*removed_callback)(j, arg);
+			continue;
+		}
+		if (levels)
+			levels[i]=levels[j] & 1;
+		++i;
+	}
+	return i;
+}
+
+size_t unicode_bidi_extra_cleanup(char32_t *string,
+				  unicode_bidi_level_t *levels,
+				  size_t n,
+				  void (*removed_callback)(size_t, void *),
+				  void *arg)
+{
+	size_t i=0;
+	for (size_t j=0; j<n; ++j)
+	{
+		enum_bidi_type_t cl=unicode_bidi_type(string[j]);
+
+		if (is_explicit_indicator_except_b(cl) ||
+		    (string[j] == LRM ||
+		     string[j] == RLM ||
+		     string[j] == ALM))
+		{
+			if (removed_callback)
+				(*removed_callback)(j, arg);
+			continue;
+		}
+		string[i]=cl == UNICODE_BIDI_TYPE_B ? '\n' : string[j];
+		if (levels)
+			levels[i]=levels[j] & 1;
+		++i;
+	}
+	return i;
+}
+
+void unicode_bidi_logical_order(char32_t *string,
+				unicode_bidi_level_t *levels,
+				size_t n,
+				unicode_bidi_level_t paragraph_embedding,
+				void (*reorder_callback)(size_t, size_t,
+							 void *),
+				void *arg)
+{
+	size_t i=0;
+
+	// On this pass:
+	//
+	// When paragraph_embedding is 0, we reverse odd embedding levels.
+	// When paragraph_embedding is 1, we reverse even embedding levels.
+
+#define LOGICAL_FLIP(n) ( ((n) ^ paragraph_embedding) & 1)
+
+	while (i<n)
+	{
+		if ( !LOGICAL_FLIP(levels[i]))
+		{
+			++i;
+			continue;
+		}
+
+		size_t j=i;
+
+		while (i<n)
+		{
+			if (!LOGICAL_FLIP(levels[i]))
+				break;
+			++i;
+		}
+
+		reverse_str(string, levels, j, i,
+			    reorder_callback, arg);
+	}
+
+	if (paragraph_embedding & 1)
+		reverse_str(string, levels, 0, n, reorder_callback, arg);
+}
+
+/*
+** Track consecutive sequences of characters with the same embedding level.
+**
+** Linked list create in compute_bidi_embed_levelruns().
+*/
+
+struct bidi_embed_levelrun {
+	struct bidi_embed_levelrun *next;
+	size_t start;
+	size_t end;
+	unicode_bidi_level_t level;
+};
+
+static struct bidi_embed_levelrun **
+record_bidi_embed_levelrun(struct bidi_embed_levelrun **tailp,
+			   size_t start,
+			   size_t end,
+			   unicode_bidi_level_t level)
+{
+	struct bidi_embed_levelrun *p;
+
+	p=(struct bidi_embed_levelrun *)calloc(1, sizeof(*p));
+	if (!p)
+		abort();
+
+	p->start=start;
+	p->end=end;
+	p->level=level;
+
+	if (*tailp)
+	{
+		(*tailp)->next=p;
+		return &(*tailp)->next;
+	}
+	else
+	{
+		*tailp=p;
+		return tailp;
+	}
+}
+
+static void compute_bidi_embed_levelruns(const char32_t *string,
+					 const unicode_bidi_level_t *levels,
+					 size_t n,
+					 struct bidi_embed_levelrun **tailp)
+{
+	size_t i=0;
+
+	while (i<n)
+	{
+		size_t j=i;
+
+		while (++i < n)
+		{
+			if ((levels[i] & 1) != (levels[j] & 1))
+				break;
+		}
+		tailp=record_bidi_embed_levelrun(tailp, j, i,
+						 levels[j] & 1);
+	}
+}
+
+#define RLI 0x2067
+#define LRI 0x2066
+#define RLO 0x202e
+#define LRO 0x202d
+#define PDF 0x202c
+#define PDI 0x2069
+
+/*
+** Whether a directional marker and a PDI is required to be generated after
+** some subset of characters.
+*/
+
+struct need_marker_info {
+	int need_marker;
+	int need_pdi;
+};
+
+static void need_marker_info_init(struct need_marker_info *info)
+{
+	info->need_marker=0;
+	info->need_pdi=0;
+}
+
+static void need_marker_info_merge(struct need_marker_info *info,
+				   const struct need_marker_info *other_info)
+{
+	if (other_info->need_marker)
+		info->need_marker=1;
+	if (other_info->need_pdi)
+		info->need_pdi=1;
+}
+
+static void emit_bidi_embed_levelrun(const char32_t *string,
+				     enum_bidi_type_t *classes,
+				     struct bidi_embed_levelrun *run,
+				     unicode_bidi_level_t paragraph_level,
+				     unicode_bidi_level_t previous_level,
+				     unicode_bidi_level_t next_level,
+				     struct need_marker_info *need_marker,
+				     void (*emit)(const char32_t *string,
+						  size_t n,
+						  void *arg),
+				     void *arg);
+
+/* L1 */
+
+static int is_l1_on_or_after(const enum_bidi_type_t *classes,
+			     size_t n,
+			     size_t i,
+			     int atend)
+{
+	/*
+	** Determine if rule L1 will apply starting at the given position.
+	*/
+	while (i<n)
+	{
+		enum_bidi_type_t t=classes[i];
+
+		if (t == UNICODE_BIDI_TYPE_WS)
+		{
+			++i;
+			continue;
+		}
+
+		if (t == UNICODE_BIDI_TYPE_S ||
+		    t == UNICODE_BIDI_TYPE_B)
+			return 1;
+		return 0;
+	}
+	return atend;
+}
+
+static void emit_marker(struct bidi_embed_levelrun *p,
+			struct need_marker_info *info,
+			void (*emit)(const char32_t *string,
+				     size_t n,
+				     void *arg),
+			void *arg)
+{
+	char32_t marker= (p->level & 1) ? RLM:LRM;
+
+	if (info->need_marker)
+		(*emit)(&marker, 1, arg);
+
+	if (info->need_pdi)
+	{
+		marker=PDI;
+		(*emit)(&marker, 1, arg);
+	}
+}
+
+void unicode_bidi_embed(const char32_t *string,
+			const unicode_bidi_level_t *levels,
+			size_t n,
+			unicode_bidi_level_t paragraph_level,
+			void (*emit)(const char32_t *string,
+				     size_t n,
+				     void *arg),
+			void *arg)
+{
+	struct bidi_embed_levelrun *runs=0;
+	enum_bidi_type_t *classes=
+		(enum_bidi_type_t *)calloc(n, sizeof(enum_bidi_type_t));
+
+	if (!classes)
+		abort();
+
+	for (size_t i=0; i<n; ++i)
+		classes[i]=unicode_bidi_type(string[i]);
+
+	compute_bidi_embed_levelruns(string, levels,
+				     n,
+				     &runs);
+
+	/*
+	** Go through the sequences of consecutive characters with the
+	** same embedding level. Keep track of the preceding and the
+	** next embedding level, which is usually the opposite from the
+	** current sequence's embedding level. Except that the first and
+	** the last sequence of characters, in the string, are bound to
+	** the paragraph_level, which may be the same.
+	*/
+
+	unicode_bidi_level_t previous_level=paragraph_level;
+
+	while (runs)
+	{
+		struct bidi_embed_levelrun *p=runs;
+
+		runs=runs->next;
+
+		unicode_bidi_level_t next_level=paragraph_level;
+
+		if (runs)
+			next_level=runs->level;
+
+#ifdef BIDI_DEBUG
+		fprintf(DEBUGDUMP, "  Range %d-%d, level %d\n",
+			(int)p->start, (int)(p->end-1), p->level);
+#endif
+
+		if (((p->level ^ paragraph_level) & 1) == 0)
+		{
+			/*
+			** Sequence in the same direction as the paragraph
+			** embedding level.
+			**
+			** We'll definitely need a directional marker if
+			** rule L1 applies after this sequence.
+			*/
+
+			struct need_marker_info need_marker;
+
+			need_marker_info_init(&need_marker);
+
+			if (classes[p->end-1] == UNICODE_BIDI_TYPE_WS)
+			{
+				need_marker.need_marker=
+					is_l1_on_or_after(classes, n,
+							  p->end,
+							  0);
+#ifdef BIDI_DEBUG
+				fprintf(DEBUGDUMP, "    need marker=%d\n",
+					need_marker.need_marker);
+#endif
+
+			}
+
+			emit_bidi_embed_levelrun(string, classes,
+						 p, paragraph_level,
+						 previous_level,
+						 next_level,
+						 &need_marker,
+						 emit, arg);
+
+			emit_marker(p, &need_marker, emit, arg);
+		}
+		else
+		{
+			struct need_marker_info need_marker;
+			size_t orig_end=p->end;
+
+			/*
+			** Sequence in the opposite direction. Because S and
+			** B reset to the paragraph level, no matter what,
+			** if we want things to render like that we will need
+			** to emit sequences on each side of S/B in reverse
+			** order. We start at the end of this sequence, then
+			** search towards the beginning, emit that sequence,
+			** emit the S and B, then go to the next sequence.
+			*/
+
+			need_marker_info_init(&need_marker);
+
+#ifdef BIDI_DEBUG
+			fprintf(DEBUGDUMP, "    need marker=%d\n",
+				need_marker);
+#endif
+
+			while (p->start < p->end)
 			{
-				--right;
+				size_t j=p->end;
 
-				if (p)
+				int end_with_ws=
+					classes[j-1] == UNICODE_BIDI_TYPE_WS;
+				while (j > p->start)
 				{
-					char32_t c=p[left];
-					unicode_bidi_level_t l=levels[left];
+					--j;
 
-					p[left]=p[right];
-					levels[left]=levels[right];
-					p[right]=c;
-					levels[right]=l;
+					enum_bidi_type_t t=classes[j];
+
+					if (t == UNICODE_BIDI_TYPE_S ||
+					    t == UNICODE_BIDI_TYPE_B)
+					{
+						++j;
+						break;
+					}
+				}
+
+				if (j == p->end) /* Must be lone break */
+				{
+#ifdef BIDI_DEBUG
+					fprintf(DEBUGDUMP,
+						"    break: %d\n",
+						(int)j);
+#endif
+					--p->end;
+
+					previous_level=paragraph_level;
+
+					(*emit)(string+p->end, 1, arg);
+					continue;
+				}
+
+				struct need_marker_info need_marker_partial;
+
+				need_marker_info_init(&need_marker_partial);
+
+				/*
+				** Rule L1, there's going to be an S or a B
+				** after we emit this sequence.
+				*/
+
+				if (j != p->start)
+					need_marker_partial.need_marker=1;
+
+				/*
+				** To emit this sequence, we monkey-patch
+				** the run level to indicate the sub-
+				** sequence to emit.
+				*/
+				size_t i=p->start;
+
+				p->start=j;
+
+				emit_bidi_embed_levelrun
+					(string, classes, p, paragraph_level,
+					 previous_level,
+
+					 j == i
+					 /* No more, this is next */
+					 ? next_level
+					 /* We'll emit a paragraph brk */
+					 : paragraph_level,
+					 &need_marker_partial,
+					 emit, arg);
+
+				/* Continue monkey-patching. */
+
+				p->end=p->start;
+				p->start=i;
+
+				if (p->start == p->end)
+					/* Do it below */
+				{
+					if (end_with_ws)
+						need_marker.need_marker=
+							is_l1_on_or_after
+							(classes, n,
+							 orig_end,
+							 0);
+					need_marker_info_merge
+						(&need_marker,
+						 &need_marker_partial);
+				}
+				else
+				{
+					emit_marker(p, &need_marker_partial,
+						    emit, arg);
 				}
-				++left;
 			}
+			emit_marker(p, &need_marker, emit, arg);
+		}
+		free(p);
+	}
+	free(classes);
+}
+
+#define ADJUST_LR(t,e) do {					\
+		switch (t) {					\
+		case UNICODE_BIDI_TYPE_AL:			\
+			(t)=UNICODE_BIDI_TYPE_R;		\
+			break;					\
+		case UNICODE_BIDI_TYPE_ET:			\
+		case UNICODE_BIDI_TYPE_ES:			\
+		case UNICODE_BIDI_TYPE_AN:			\
+		case UNICODE_BIDI_TYPE_EN:			\
+			(t)=UNICODE_BIDI_TYPE_L;		\
+			break;					\
+		default:					\
+			break;					\
+		}						\
+	} while (0)
+
+#define ADJUST_LRSTRONG(t) do {					\
+		switch (t) {					\
+		case UNICODE_BIDI_TYPE_AL:			\
+			(t)=UNICODE_BIDI_TYPE_R;		\
+		default:					\
+			break;					\
+		}						\
+	} while (0)
+
+static void emit_bidi_embed_levelrun(const char32_t *string,
+				     enum_bidi_type_t *classes,
+				     struct bidi_embed_levelrun *run,
+				     unicode_bidi_level_t paragraph_level,
+				     unicode_bidi_level_t previous_level,
+				     unicode_bidi_level_t next_level,
+				     struct need_marker_info *need_marker,
+				     void (*emit)(const char32_t *string,
+						  size_t n,
+						  void *arg),
+				     void *arg)
+{
+	/*
+	** Our first order of business will be to apply rules W to this
+	** sequence, to resolve weak types.
+	**
+	** It's easy to simulate what unicode_bidi_w() expects.
+	*/
+
+	struct level_run lrun;
+	struct isolating_run_sequence_s seq;
+	enum_bidi_type_t e_type=E_CLASS(run->level);
+	enum_bidi_type_t o_type=O_CLASS(run->level);
+
+	if (run->start == run->end)
+		return;
+
+	memset(&seq, 0, sizeof(seq));
+
+	seq.embedding_level=run->level;
+	seq.sos=seq.eos=e_type;
+	seq.runs.runs=&lrun;
+	seq.runs.n_level_runs=1;
+	seq.runs.cap_level_runs=1;
+	lrun.start=run->start;
+	lrun.end=run->end;
+	unicode_bidi_w(classes, &seq);
+
+	/*
+	** Peek at the first character's class.
+	**
+	** If the previous sequence's embedding level was the same, it
+	** guarantees the peristence of the embedding direction. We can
+	** accept classes that default to our embedding level.
+	**
+	** Otherwise we recognize only strong classes.
+	*/
+	enum_bidi_type_t t=classes[run->start];
+
+	if (previous_level == run->level)
+	{
+		ADJUST_LR(t, E_CLASS(previous_level));
+	}
+	else
+	{
+		ADJUST_LRSTRONG(t);
+	}
+
+	/*
+	** Sequence in the opposite direction always get isolated.
+	*/
+	char32_t override_start=run->level ? RLI:LRI;
+
+	if (run->level != paragraph_level)
+		(*emit)(&override_start, 1, arg);
+
+	/*
+	** Make sure the character sequence has strong context.
+	*/
+	if (t == o_type)
+	{
+		struct need_marker_info need_marker;
+
+		need_marker_info_init(&need_marker);
+
+		need_marker.need_marker=1;
+
+		emit_marker(run, &need_marker, emit, arg);
+	}
+
+	override_start=run->level ? RLO:LRO;
+	char32_t override_end=PDF;
+
+	size_t start=run->start;
+	size_t end=run->end;
+
+	while (start < end)
+	{
+		size_t i=start;
+		size_t word_start=i;
+
+#ifdef BIDI_DEBUG
+		fprintf(DEBUGDUMP,
+			"    examining, starting at: %d\n", (int)i);
+#endif
+
+		/*
+		** Look for the next character with the opposite class.
+		** While doing that, keep an eye out on any WS or ONs,
+		** which will tell us where the most recent "word"s starts,
+		** before this character.
+		*/
+		while (i < end)
+		{
+			enum_bidi_type_t t=classes[i];
+
+			ADJUST_LR(t, e_type);
+
+			if (t == o_type)
+				break;
+
+			switch (t) {
+			case UNICODE_BIDI_TYPE_WS:
+			case UNICODE_BIDI_TYPE_ON:
+				word_start=i+1;
+				break;
+			default:
+				break;
+			}
+
+			++i;
+		}
+
+		if (i < end)
+		{
+#ifdef BIDI_DEBUG
+			fprintf(DEBUGDUMP,
+				"    override needed: %d,"
+				" start of word at %d, ",
+				(int)i, (int)word_start);
+#endif
+			/*
+			** Found something to override. First, emit everything
+			** up to the start of this "word".
+			**
+			** Then emit the RLO or LRO, then look for the end
+			** of the "word", and drop the PDF there.
+			*/
+			if (word_start > start)
+				(*emit)(string+start,
+					word_start-start, arg);
+
+			(*emit)(&override_start, 1, arg);
+			while (++i < end)
+			{
+				enum_bidi_type_t t=classes[i];
 
-			if (end-start > 1 && reorder_callback)
-				(*reorder_callback)(start, end-start, arg);
+				switch (t) {
+				case UNICODE_BIDI_TYPE_WS:
+				case UNICODE_BIDI_TYPE_ON:
+					break;
+				default:
+					continue;
+				}
+				break;
+			}
+#ifdef BIDI_DEBUG
+			fprintf(DEBUGDUMP, "end of word at %d\n",
+				(int)i);
+#endif
+			(*emit)(string+word_start, i-word_start, arg);
+			(*emit)(&override_end, 1, arg);
+			start=i;
+			continue;
 		}
+		(*emit)(string+start, i-start, arg);
+		start=i;
 	}
 
-	level_run_layers_deinit(&layers);
+	/*
+	** Make sure that if a different embedding level follows we will
+	** emit a marker, to ensure strong context.
+	*/
+	t=classes[run->end-1];
+
+	if (next_level != run->level)
+	{
+		ADJUST_LRSTRONG(t);
+
+		if (e_type != t)
+			need_marker->need_marker=1;
+	}
+
+	if (run->level != paragraph_level)
+		need_marker->need_pdi=1;
+}
+
+struct compute_paragraph_embedding_level_char_info {
+	const char32_t *str;
+};
+
+static enum_bidi_type_t
+get_enum_bidi_type_for_embedding_paragraph_level(size_t i,
+						 void *arg)
+{
+	struct compute_paragraph_embedding_level_char_info *p=
+		(struct compute_paragraph_embedding_level_char_info *)arg;
+
+	return unicode_bidi_type(p->str[i]);
+}
+
+char32_t unicode_bidi_embed_paragraph_level(const char32_t *str,
+					    size_t n,
+					    unicode_bidi_level_t paragraph_level
+					    )
+{
+	struct compute_paragraph_embedding_level_char_info info;
+	info.str=str;
+
+	if ((compute_paragraph_embedding_level
+	     (0, n,
+	      get_enum_bidi_type_for_embedding_paragraph_level,
+	      &info) ^ paragraph_level) == 0)
+		return 0;
+
+	return (paragraph_level & 1) ? RLM:LRM;
 }
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index ca139cc..04d2893 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -1,5 +1,5 @@
 /*
-** Copyright 2011-2014 Double Precision, Inc.
+** Copyright 2011-2020 Double Precision, Inc.
 ** See COPYING for distribution information.
 **
 */
@@ -596,7 +596,8 @@ extern "C" {
 				     void *arg)
 	{
 		auto p=reinterpret_cast<const std::function<void (size_t,
-								  size_t)> *>
+								  size_t)
+							    noexcept> *>
 			(arg);
 
 		(*p)(i, cnt);
@@ -605,7 +606,8 @@ extern "C" {
 
 int unicode::bidi_reorder(std::u32string &string,
 			  std::vector<unicode_bidi_level_t> &levels,
-			  const std::function<void (size_t, size_t)> &lambda)
+			  const std::function<void (size_t, size_t)
+			  noexcept> &lambda)
 {
 	size_t s=string.size();
 
@@ -624,7 +626,8 @@ int unicode::bidi_reorder(std::u32string &string,
 }
 
 void unicode::bidi_reorder(std::vector<unicode_bidi_level_t> &levels,
-			   const std::function<void (size_t, size_t)> &lambda)
+			   const std::function<void (size_t, size_t)
+			   noexcept> &lambda)
 {
 	size_t s=levels.size();
 
@@ -636,3 +639,189 @@ void unicode::bidi_reorder(std::vector<unicode_bidi_level_t> &levels,
 			     (reinterpret_cast<const void *>(&lambda)));
 
 }
+
+extern "C" {
+	static void removed_callback(size_t i,
+				     void *arg)
+	{
+		auto p=reinterpret_cast<const std::function<void (size_t)
+							    noexcept> *>
+			(arg);
+
+		(*p)(i);
+	}
+}
+
+void unicode::bidi_cleanup(std::u32string &string,
+			   const std::function<void (size_t) noexcept> &lambda)
+{
+	if (string.empty())
+		return;
+
+	size_t n=unicode_bidi_cleanup(&string[0],
+				      0,
+				      string.size(),
+				      removed_callback,
+				      const_cast<void *>
+				      (reinterpret_cast<const void *>
+				       (&lambda)));
+
+	string.resize(n);
+}
+
+int unicode::bidi_cleanup(std::u32string &string,
+			  std::vector<unicode_bidi_level_t> &levels,
+			  const std::function<void (size_t) noexcept> &lambda)
+{
+	if (levels.size() != string.size())
+		return -1;
+
+	size_t n=unicode_bidi_cleanup(&string[0],
+				      &levels[0],
+				      string.size(),
+				      removed_callback,
+				      const_cast<void *>
+				      (reinterpret_cast<const void *>
+				       (&lambda)));
+
+	string.resize(n);
+	levels.resize(n);
+	return 0;
+}
+
+
+void unicode::bidi_extra_cleanup(std::u32string &string,
+				 const std::function<void (size_t) noexcept>
+				 &lambda)
+{
+	if (string.empty())
+		return;
+
+	size_t n=unicode_bidi_extra_cleanup(&string[0],
+					    0,
+					    string.size(),
+					    removed_callback,
+					    const_cast<void *>
+					    (reinterpret_cast<const void *>
+					     (&lambda)));
+
+	string.resize(n);
+}
+
+int unicode::bidi_extra_cleanup(std::u32string &string,
+				std::vector<unicode_bidi_level_t> &levels,
+				const std::function<void (size_t) noexcept>
+				&lambda)
+{
+	if (levels.size() != string.size())
+		return -1;
+
+	size_t n=unicode_bidi_extra_cleanup(&string[0],
+					    &levels[0],
+					    string.size(),
+					    removed_callback,
+					    const_cast<void *>
+					    (reinterpret_cast<const void *>
+					     (&lambda)));
+
+	string.resize(n);
+	levels.resize(n);
+	return 0;
+}
+
+int unicode::bidi_logical_order(std::u32string &string,
+				std::vector<unicode_bidi_level_t> &levels,
+				unicode_bidi_level_t paragraph_embedding,
+				const std::function<void (size_t, size_t)
+				noexcept> &lambda)
+{
+	if (string.size() != levels.size())
+		return -1;
+
+	if (string.empty())
+		return 0;
+
+	unicode_bidi_logical_order(&string[0], &levels[0], string.size(),
+				   paragraph_embedding,
+				   &reorder_callback,
+				   const_cast<void *>
+				   (reinterpret_cast<const void *>(&lambda)));
+	return 0;
+}
+
+void unicode::bidi_logical_order(std::vector<unicode_bidi_level_t> &levels,
+				 unicode_bidi_level_t paragraph_embedding,
+				 const std::function<void (size_t, size_t)
+				 noexcept> &lambda)
+{
+	if (levels.size() == 0)
+		return;
+
+	unicode_bidi_logical_order(NULL, &levels[0], levels.size(),
+				   paragraph_embedding,
+				   &reorder_callback,
+				   const_cast<void *>
+				   (reinterpret_cast<const void *>(&lambda)));
+}
+
+extern "C" {
+	static void embed_callback(const char32_t *string,
+				   size_t n,
+				   void *arg)
+	{
+		auto p=reinterpret_cast<const std::function<void
+							    (const char32_t *,
+							     size_t n)
+							    noexcept> *>(arg);
+		(*p)(string, n);
+	}
+}
+
+int unicode::bidi_embed(const std::u32string &string,
+			const std::vector<unicode_bidi_level_t> &levels,
+			unicode_bidi_level_t paragraph_embedding,
+			const std::function<void (const char32_t *string,
+						  size_t n) noexcept>
+			&lambda)
+{
+	if (string.size() != levels.size())
+		return -1;
+
+	if (string.empty())
+		return 0;
+
+	unicode_bidi_embed(&string[0], &levels[0], string.size(),
+			   paragraph_embedding,
+			   embed_callback,
+			   const_cast<void *>
+			   (reinterpret_cast<const void *>
+			    (&lambda)));
+	return 0;
+}
+
+std::u32string unicode::bidi_embed(const std::u32string &string,
+				   const std::vector<unicode_bidi_level_t
+				   > &levels,
+				   unicode_bidi_level_t paragraph_embedding)
+{
+	std::u32string new_string;
+
+	(void)bidi_embed(string, levels, paragraph_embedding,
+			 [&]
+			 (const char32_t *string,
+			  size_t n)
+			 {
+				 new_string.insert(new_string.end(),
+						   string, string+n);
+			 });
+
+	return new_string;
+}
+
+char32_t unicode::bidi_embed_paragraph_level(const std::u32string &string,
+					     unicode_bidi_level_t level)
+{
+	return unicode_bidi_embed_paragraph_level(string.c_str(),
+						  string.size(),
+						  level);
+}
author	Sam Varshavchik	2020-07-12 09:44:24 -0400
committer	Sam Varshavchik	2020-08-02 14:56:50 -0400
commit	d2915c9cadf6fbc5ae29ffc387cce987b88dbbe0 (patch)
tree	f76c8edf36fb84c6e082f2a4ae9798b10aeda70e
parent	51471a4d8b177adfcd40c145a809193a4ab9bd8d (diff)
download	courier-libs-d2915c9cadf6fbc5ae29ffc387cce987b88dbbe0.tar.bz2