Documentation, C++ bindings, reorder.

author: Sam Varshavchik 2020-07-09 21:36:46 -0400
committer: Sam Varshavchik 2020-07-12 15:56:45 -0400
commit: 7a9293cd28b293b793793368237d8856cfb0eff4 (patch)
tree: 3c19854a7869103405c78a97e40503db64fac7b6
parent: 2219f725acd0dc36fa00080c846a8982273a6f61 (diff)
download: courier-libs-7a9293cd28b293b793793368237d8856cfb0eff4.tar.bz2
7 files changed, 1111 insertions, 393 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index 397987c..081965e 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -85,7 +85,87 @@ include_HEADERS=courier-unicode.h \
 	courier-unicode-categories-tab.h \
 	courier-unicode-script-tab.h
 
-man_MANS=$(srcdir)/man/courier-unicode.7 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]convert_tocase.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]fromu.3 $(srcdir)/man/unicode[\:][\:]iconvert[\:][\:]tou.3 $(srcdir)/man/unicode[\:][\:]iso_8859_1.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreak_callback_save_buf.3 $(srcdir)/man/unicode[\:][\:]linebreak_iter.3 $(srcdir)/man/unicode[\:][\:]linebreakc_callback_base.3 $(srcdir)/man/unicode[\:][\:]linebreakc_iter.3 $(srcdir)/man/unicode[\:][\:]tolower.3 $(srcdir)/man/unicode[\:][\:]toupper.3 $(srcdir)/man/unicode[\:][\:]ucs_2.3 $(srcdir)/man/unicode[\:][\:]ucs_4.3 $(srcdir)/man/unicode[\:][\:]utf_8.3 $(srcdir)/man/unicode[\:][\:]wordbreak_callback_base.3 $(srcdir)/man/unicode_category_lookup.3 $(srcdir)/man/unicode_convert.3 $(srcdir)/man/unicode_convert_deinit.3 $(srcdir)/man/unicode_convert_fromu_init.3 $(srcdir)/man/unicode_convert_fromu_tobuf.3 $(srcdir)/man/unicode_convert_fromutf8.3 $(srcdir)/man/unicode_convert_init.3 $(srcdir)/man/unicode_convert_tobuf.3 $(srcdir)/man/unicode_convert_tocase.3 $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 $(srcdir)/man/unicode_convert_tocbuf_init.3 $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 $(srcdir)/man/unicode_convert_tou_init.3 $(srcdir)/man/unicode_convert_tou_tobuf.3 $(srcdir)/man/unicode_convert_toutf8.3 $(srcdir)/man/unicode_convert_uc.3 $(srcdir)/man/unicode_default_chset.3 $(srcdir)/man/unicode_grapheme_break.3 $(srcdir)/man/unicode_html40ent_lookup.3 $(srcdir)/man/unicode_isalnum.3 $(srcdir)/man/unicode_isalpha.3 $(srcdir)/man/unicode_isblank.3 $(srcdir)/man/unicode_isdigit.3 $(srcdir)/man/unicode_isgraph.3 $(srcdir)/man/unicode_islower.3 $(srcdir)/man/unicode_ispunct.3 $(srcdir)/man/unicode_isspace.3 $(srcdir)/man/unicode_isupper.3 $(srcdir)/man/unicode_lb_end.3 $(srcdir)/man/unicode_lb_init.3 $(srcdir)/man/unicode_lb_next.3 $(srcdir)/man/unicode_lb_next_cnt.3 $(srcdir)/man/unicode_lb_set_opts.3 $(srcdir)/man/unicode_lbc_end.3 $(srcdir)/man/unicode_lbc_init.3 $(srcdir)/man/unicode_lbc_next.3 $(srcdir)/man/unicode_lbc_next_cnt.3 $(srcdir)/man/unicode_lbc_set_opts.3 $(srcdir)/man/unicode_lc.3 $(srcdir)/man/unicode_locale_chset.3 $(srcdir)/man/unicode_script.3 $(srcdir)/man/unicode_tc.3 $(srcdir)/man/unicode_u_ucs2_native.3 $(srcdir)/man/unicode_u_ucs4_native.3 $(srcdir)/man/unicode_uc.3 $(srcdir)/man/unicode_wb_end.3 $(srcdir)/man/unicode_wb_init.3 $(srcdir)/man/unicode_wb_next.3 $(srcdir)/man/unicode_wb_next_cnt.3 $(srcdir)/man/unicode_wbscan_end.3 $(srcdir)/man/unicode_wbscan_init.3 $(srcdir)/man/unicode_wbscan_next.3
+man_MANS= \
+        $(srcdir)/man/courier-unicode.7 \
+        $(srcdir)/man/unicode\:\:bidi_calc.3 \
+        $(srcdir)/man/unicode\:\:bidi_reorder.3 \
+        $(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \
+        $(srcdir)/man/unicode\:\:iconvert\:\:convert_tocase.3 \
+        $(srcdir)/man/unicode\:\:iconvert\:\:fromu.3 \
+        $(srcdir)/man/unicode\:\:iconvert\:\:tou.3 \
+        $(srcdir)/man/unicode\:\:iso_8859_1.3 \
+        $(srcdir)/man/unicode\:\:linebreak_callback_base.3 \
+        $(srcdir)/man/unicode\:\:linebreak_callback_save_buf.3 \
+        $(srcdir)/man/unicode\:\:linebreak_iter.3 \
+        $(srcdir)/man/unicode\:\:linebreakc_callback_base.3 \
+        $(srcdir)/man/unicode\:\:linebreakc_iter.3 \
+        $(srcdir)/man/unicode\:\:tolower.3 \
+        $(srcdir)/man/unicode\:\:toupper.3 \
+        $(srcdir)/man/unicode\:\:ucs_2.3 \
+        $(srcdir)/man/unicode\:\:ucs_4.3 \
+        $(srcdir)/man/unicode\:\:utf_8.3 \
+        $(srcdir)/man/unicode\:\:wordbreak_callback_base.3 \
+        $(srcdir)/man/unicode_bidi.3 \
+        $(srcdir)/man/unicode_bidi_bracket_type.3 \
+        $(srcdir)/man/unicode_bidi_calc.3 \
+        $(srcdir)/man/unicode_bidi_mirror.3 \
+        $(srcdir)/man/unicode_bidi_reorder.3 \
+        $(srcdir)/man/unicode_category_lookup.3 \
+        $(srcdir)/man/unicode_convert.3 \
+        $(srcdir)/man/unicode_convert_deinit.3 \
+        $(srcdir)/man/unicode_convert_fromu_init.3 \
+        $(srcdir)/man/unicode_convert_fromu_tobuf.3 \
+        $(srcdir)/man/unicode_convert_fromutf8.3 \
+        $(srcdir)/man/unicode_convert_init.3 \
+        $(srcdir)/man/unicode_convert_tobuf.3 \
+        $(srcdir)/man/unicode_convert_tocase.3 \
+        $(srcdir)/man/unicode_convert_tocbuf_fromutf8_init.3 \
+        $(srcdir)/man/unicode_convert_tocbuf_init.3 \
+        $(srcdir)/man/unicode_convert_tocbuf_toutf8_init.3 \
+        $(srcdir)/man/unicode_convert_tou_init.3 \
+        $(srcdir)/man/unicode_convert_tou_tobuf.3 \
+        $(srcdir)/man/unicode_convert_toutf8.3 \
+        $(srcdir)/man/unicode_convert_uc.3 \
+        $(srcdir)/man/unicode_default_chset.3 \
+        $(srcdir)/man/unicode_grapheme_break.3 \
+        $(srcdir)/man/unicode_grapheme_break_deinit.3 \
+        $(srcdir)/man/unicode_grapheme_break_init.3 \
+        $(srcdir)/man/unicode_grapheme_break_next.3 \
+        $(srcdir)/man/unicode_html40ent_lookup.3 \
+        $(srcdir)/man/unicode_isalnum.3 \
+        $(srcdir)/man/unicode_isalpha.3 \
+        $(srcdir)/man/unicode_isblank.3 \
+        $(srcdir)/man/unicode_isdigit.3 \
+        $(srcdir)/man/unicode_isgraph.3 \
+        $(srcdir)/man/unicode_islower.3 \
+        $(srcdir)/man/unicode_ispunct.3 \
+        $(srcdir)/man/unicode_isspace.3 \
+        $(srcdir)/man/unicode_isupper.3 \
+        $(srcdir)/man/unicode_lb_end.3 \
+        $(srcdir)/man/unicode_lb_init.3 \
+        $(srcdir)/man/unicode_lb_next.3 \
+        $(srcdir)/man/unicode_lb_next_cnt.3 \
+        $(srcdir)/man/unicode_lb_set_opts.3 \
+        $(srcdir)/man/unicode_lbc_end.3 \
+        $(srcdir)/man/unicode_lbc_init.3 \
+        $(srcdir)/man/unicode_lbc_next.3 \
+        $(srcdir)/man/unicode_lbc_next_cnt.3 \
+        $(srcdir)/man/unicode_lbc_set_opts.3 \
+        $(srcdir)/man/unicode_lc.3 \
+        $(srcdir)/man/unicode_line_break.3 \
+        $(srcdir)/man/unicode_locale_chset.3 \
+        $(srcdir)/man/unicode_script.3 \
+        $(srcdir)/man/unicode_tc.3 \
+        $(srcdir)/man/unicode_u_ucs2_native.3 \
+        $(srcdir)/man/unicode_u_ucs4_native.3 \
+        $(srcdir)/man/unicode_uc.3 \
+        $(srcdir)/man/unicode_wb_end.3 \
+        $(srcdir)/man/unicode_wb_init.3 \
+        $(srcdir)/man/unicode_wb_next.3 \
+        $(srcdir)/man/unicode_wb_next_cnt.3 \
+        $(srcdir)/man/unicode_wbscan_end.3 \
+        $(srcdir)/man/unicode_wbscan_init.3 \
+        $(srcdir)/man/unicode_wbscan_next.3
 
 libcourier_unicode_la_SOURCES=\
 			courier-unicode-categories-tab.h \
@@ -329,7 +409,7 @@ docs.stamp:
 	rm -f man/*.[123456789]
 	mv man.tmp/* man
 	rm -rf html.tmp man.tmp
-	perl -p -e 's/:/[\\:]/g if s@^man_MANS=.*@"man_MANS=" . join(" ", map { "\$$(srcdir)/$$_" } glob("man/*.[123456789]"))@e' Makefile.am >Makefile.am.new
+	perl -e '$$f=join("",<STDIN>); $$p=join("", map { " \\\n        \$$(srcdir)/$$_" } glob("man/*.[123456789]")); $$p=~s/:/\\:/g; $$f =~ s/\nman_MANS=([^\n]|\n[^\n])*/\nman_MANS=$$p/s; print $$f' <Makefile.am >Makefile.am.new
 	cmp Makefile.am Makefile.am.new || mv -f Makefile.am.new Makefile.am; rm -f Makefile.am.new
 	touch docs.stamp
 
@@ -405,4 +485,4 @@ distrelease:
 	$(MAKE) dist
 
 www:
-	rsync -a html/. $$HOME/www/www.courier-mta.org/unicode
+	rsync -a html/. $$HOME/www/hostrocket/courier-mta.org/unicode
diff --git a/unicode/README b/unicode/README
index 2aeb1f0..926e004 100644
--- a/unicode/README
+++ b/unicode/README
@@ -25,6 +25,8 @@ Courier Unicode Library
 
      * Implementation of line breaking rules.
 
+     * Implementation of the bi-directional algorithm.
+
      * Several ancillary functions, like looking up the unicode character
        that corresponds to some HTML 4.0 entity (such as “&amp;”, for
        example), and determining the normal width or a double-width status of
@@ -40,7 +42,7 @@ Courier Unicode Library
 Current status
 
    The current release of the Courier Unicode library is based on the Unicode
-   8.0.0 standard.
+   13.0.0 standard.
 
    --------------------------------------------------------------------------
 
diff --git a/unicode/biditest.C b/unicode/biditest.C
index c58da0d..61841a1 100644
--- a/unicode/biditest.C
+++ b/unicode/biditest.C
@@ -5,7 +5,9 @@
 #include	<sstream>
 #include	<string>
 #include	<algorithm>
+#include	<utility>
 #include	<iomanip>
+#include	<numeric>
 
 std::vector<std::string> testcase;
 
@@ -43,6 +45,8 @@ int main(int argc, char **argv)
 
 	std::vector<unicode_bidi_level_t> expected_levels;
 
+	std::vector<size_t> expected_reorder;
+
 	while (1)
 	{
 		buf.clear();
@@ -99,6 +103,28 @@ int main(int argc, char **argv)
 			continue;
 		}
 
+
+
+		if (buf.substr(0, 9) == "@Reorder:")
+		{
+			expected_reorder.clear();
+
+			std::istringstream i(buf);
+
+			std::string word;
+
+			i >> word;
+
+			size_t n;
+
+			while (i >> n)
+			{
+				expected_reorder.push_back(n);
+			}
+			continue;
+		}
+
+
 		if (buf.substr(0, 1) == "@")
 			continue;
 
@@ -138,10 +164,9 @@ int main(int argc, char **argv)
 
 		std::vector<unicode_bidi_level_t> actual_levels;
 
-		std::vector<char32_t> dummy_input;
+		std::u32string dummy_input;
 
 		dummy_input.resize(testcase.size());
-		actual_levels.resize(testcase.size());
 
 		static const unicode_bidi_level_t level_0=0;
 		static const unicode_bidi_level_t level_1=1;
@@ -153,9 +178,9 @@ int main(int argc, char **argv)
 		{
 			if (n & 1)
 			{
-				unicode_bidi_calc(&dummy_input[0],
-						  testcase.size(),
-						  &actual_levels[0], level);
+				actual_levels=level ?
+					unicode::bidi_calc(dummy_input,*level)
+					: unicode::bidi_calc(dummy_input);
 
 				int matched=0;
 
@@ -220,6 +245,87 @@ int main(int argc, char **argv)
 					std::cerr << std::endl;
 					exit(1);
 				}
+
+				std::vector<size_t> actual_reorder;
+
+				actual_reorder.resize(testcase.size());
+
+				std::iota(actual_reorder.begin(),
+					  actual_reorder.end(), 0);
+
+				unicode::bidi_reorder
+					(dummy_input,
+					 actual_levels,
+					 [&]
+					 (size_t s, size_t cnt)
+					 {
+						 auto *b=&actual_reorder[s];
+						 auto *e=b+cnt;
+
+						 while (b < e)
+						 {
+							 --e;
+							 std::swap(*b, *e);
+							 ++b;
+						 }
+					 });
+
+				auto b=actual_reorder.begin(), p=b,
+					e=actual_reorder.end();
+
+				auto q=actual_levels.begin();
+
+				while (b != e)
+				{
+					if (*q != UNICODE_BIDI_SKIP)
+					{
+						*p=*b;
+						++p;
+					}
+					++b;
+					++q;
+				}
+				actual_reorder.erase(p, e);
+
+				if (actual_reorder != expected_reorder)
+				{
+					fclose(DEBUGDUMP);
+					DEBUGDUMP=stderr;
+					std::cout << std::endl
+						  << std::flush;
+					unicode_bidi_calc(&dummy_input[0],
+							  testcase.size(),
+							  &actual_levels[0],
+							  level);
+
+					std::cerr << "Regression, line "
+						  << linenum;
+
+					if (!level)
+					{
+						std::cerr << ", auto";
+					}
+					else
+					{
+						std::cerr <<
+							(*level ? ", RTL"
+							 : ", LTR");
+					}
+					std::cerr << ": expected reorder";
+
+					for (auto o:expected_reorder)
+					{
+						std::cerr << " " << o;
+					}
+					std::cerr << std::endl
+						  << "Moved: ";
+					for (auto o:actual_reorder)
+					{
+						std::cerr << " " << o;
+					}
+					std::cerr << std::endl;
+					exit(1);
+				}
 			}
 
 			n >>= 1;
diff --git a/unicode/book.xml b/unicode/book.xml
index 41b8037..ee4b5e5 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -1,7 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
- "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
+ "https://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 
+<!ENTITY tr9ver  "42">
 <!ENTITY tr14ver "45">
 <!ENTITY tr24ver "31">
 <!ENTITY tr29ver "37">
@@ -19,7 +20,7 @@ See COPYING for distribution information.
 
   <para>
     This library implements several algorithms related to the
-    <ulink url="http://www.unicode.org/standard/standard.html">Unicode
+    <ulink url="https://www.unicode.org/standard/standard.html">Unicode
     Standard</ulink>:
   </para>
 
@@ -33,25 +34,32 @@ See COPYING for distribution information.
     <listitem>
       <para>
 	Implementation of
-	<ulink url="http://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">grapheme
+	<ulink url="https://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">grapheme
 	and work breaking</ulink> rules.
       </para>
     </listitem>
     <listitem>
       <para>
 	Implementation of
-	<ulink url="http://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">line
+	<ulink url="https://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">line
 	breaking</ulink> rules.
       </para>
     </listitem>
     <listitem>
       <para>
+	Implementation of the
+	<ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html">bi-directional
+	algorithm</ulink>.
+      </para>
+    </listitem>
+    <listitem>
+      <para>
 	Several ancillary functions, like looking up
 	the unicode character that corresponds to some HTML 4.0
 	entity (such as <quote>&amp;amp;</quote>, for example), and
 	determining the normal width or a double-width status of a unicode
 	character. Also, an adaptation of the
-	<ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html">
+	<ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">
 	<citerefentry><refentrytitle>iconv</refentrytitle>
 	<manvolnum>3</manvolnum></citerefentry></ulink>
 	API for this unicode library.
@@ -60,14 +68,14 @@ See COPYING for distribution information.
     <listitem>
       <para>
 	Look up the
-	<ulink url="http://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">Unicode
+	<ulink url="https://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">Unicode
 	script property</ulink>.
       </para>
     </listitem>
     <listitem>
       <para>
 	Look up the
-	<ulink url="http://unicode.org/notes/tn36/">category</ulink>
+	<ulink url="https://unicode.org/notes/tn36/">category</ulink>
 	property.
       </para>
     </listitem>
@@ -82,7 +90,7 @@ See COPYING for distribution information.
 
     <para>
       The current release of the Courier Unicode library is based on the
-      Unicode 8.0.0 standard.
+      Unicode 13.0.0 standard.
     </para>
   </section>
 
@@ -91,7 +99,7 @@ See COPYING for distribution information.
 
     <para>
       Download the current version of the library from
-      <ulink url="/download.html#unicode">http://www.courier-mta.org/download.html#unicode</ulink>.
+      <ulink url="/download.html#unicode">https://www.courier-mta.org/download.html#unicode</ulink>.
       After unpacking the tarball, run the configure script, which takes
       the usual options, followed by <command>make</command>, then
       <command>make install</command>.
@@ -154,7 +162,7 @@ See COPYING for distribution information.
 	  <manvolnum>7</manvolnum></citerefentry></link>.
 	  Refer to the included manual pages,
 	  and
-	  <ulink url="http://www.courier-mta.org/unicode/manpages.html"> the HTML
+	  <ulink url="https://www.courier-mta.org/unicode/manpages.html"> the HTML
 	  version of the man pages</ulink> for more information.
     </para>
   </section>
@@ -166,7 +174,7 @@ See COPYING for distribution information.
       <title>C manual pages</title>
       <refentry id="courier-unicode">
 
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 	<refmeta>
 	  <refentrytitle>courier-unicode</refentrytitle>
 	  <manvolnum>7</manvolnum>
@@ -187,12 +195,12 @@ See COPYING for distribution information.
 
 	  <para>
 	    This library implements several algorithms related to the
-	    <ulink url="http://www.unicode.org/standard/standard.html">Unicode
+	    <ulink url="https://www.unicode.org/standard/standard.html">Unicode
 	    Standard</ulink>.
 	    This library uses
-	    <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html">
-	      <citerefentry><refentrytitle>iconv</refentrytitle>
-	      <manvolnum>3</manvolnum></citerefentry></ulink> to convert
+	    <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html"
+		   ><citerefentry><refentrytitle>iconv</refentrytitle>
+	    <manvolnum>3</manvolnum></citerefentry></ulink> to convert
 	      text in a given character set to unicode. Any character set
 	      displayed by <command>iconv --list</command> can be specified
 	      for the corresponding character set parameter. Additionally,
@@ -229,6 +237,9 @@ See COPYING for distribution information.
 	    <link linkend="unicode_html40ent_lookup">
 	      <citerefentry><refentrytitle>unicode_html40ent_lookup</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
+	    <link linkend="unicode_bidi">
+	      <citerefentry><refentrytitle>unicode_bidi</refentrytitle>
+	      <manvolnum>3</manvolnum></citerefentry></link>,
 	    <link linkend="unicode_category_lookup">
 	      <citerefentry><refentrytitle>unicode_category_lookup</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
@@ -247,6 +258,9 @@ See COPYING for distribution information.
 	    <link linkend="unicode_uc">
 	      <citerefentry><refentrytitle>unicode_uc</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
+	    <link linkend="unicode__bidi">
+	      <citerefentry><refentrytitle>unicode::bidi</refentrytitle>
+	      <manvolnum>3</manvolnum></citerefentry></link>,
 	    <link linkend="unicode__iconvert__convert">
 	      <citerefentry><refentrytitle>unicode::iconvert::convert</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
@@ -272,8 +286,409 @@ See COPYING for distribution information.
 	</refsect1>
       </refentry>
 
+      <refentry id="unicode_bidi">
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
+
+	<refmeta>
+	  <refentrytitle>unicode_bidi</refentrytitle>
+	  <manvolnum>3</manvolnum>
+	</refmeta>
+
+	<refnamediv>
+	  <refname>unicode_bidi</refname>
+	  <refname>unicode_bidi_calc</refname>
+	  <refname>unicode_bidi_reorder</refname>
+	  <refname>unicode_bidi_mirror</refname>
+	  <refname>unicode_bidi_bracket_type</refname>
+
+	  <refpurpose>unicode bidirectional algorithm</refpurpose>
+	</refnamediv>
+
+	<refsynopsisdiv>
+	  <funcsynopsis>
+	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
+	    <funcsynopsisinfo>unicode_bidi_level_t lr=UNICODE_BIDI_LR</funcsynopsisinfo>
+	    <funcprototype>
+	      <funcdef>void unicode_bidi_calc</funcdef>
+              <paramdef>const char32_t *<parameter>p</parameter></paramdef>
+              <paramdef>size_t <parameter>n</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+              <paramdef>const unicode_bidi_level_t *<parameter>initial_embedding_level</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>void unicode_bidi_reorder</funcdef>
+              <paramdef>char32_t *<parameter>string</parameter></paramdef>
+              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+              <paramdef>size_t <parameter>n</parameter></paramdef>
+              <paramdef>void (*<parameter>reorder_callback</parameter>)(size_t, size_t, void *)</paramdef>
+	      <paramdef>void *<parameter>arg</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>char32_t <function>bidi_mirror</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+	      <funcdef>char32_t <function>bidi_bracket_type</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+              <paramdef>unicode_bracket_type_t *<parameter>ret</parameter></paramdef>
+	    </funcprototype>
+	  </funcsynopsis>
+	</refsynopsisdiv>
+	<refsect1>
+	  <title>DESCRIPTION</title>
+
+	  <para>
+	    <function>unicode_bidi_calc</function>() and
+	    <function>unicode_bidi_reorder</function>() implement
+	    the
+	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html"> Unicode Bi-directional algorithm</ulink>.
+	  </para>
+	  <para>
+	    The first two parameters to
+	    <function>unicode_bidi_calc</function>() are a unicode string
+	    and the number of characters in the Unicode string.
+	    <parameter>levels</parameter> points to a buffer of
+	    <classname>unicode_bidi_level_t</classname> values which the
+	    caller is responsible for allocating and deallocating, and has
+	    the same number of values as the number of characters in the
+	    Unicode string.
+	  </para>
+	  <para>
+	    <function>unicode_bidi_calc</function>() calculates the
+	    embedding level of each character and fills in the
+	    <parameter>levels</parameter> buffer (executes all steps of the
+	    bidirectional algorithm up to step L1).
+	    A <literal>NULL</literal> <parameter>initial_embedding</parameter>
+	    value calculates the default paragraph embedding value.
+	    A pointer to a <literal>UNICODE_BIDI_LR</literal> or
+	    <literal>UNICODE_BIDI_RL</literal> value explicitly sets a
+	    left-to-right or right-to-left paragraph embedding value.
+	  </para>
+
+	  <para>
+	    <function>unicode_bidi_calc</function>() calculates each
+	    character's embedding value; an even value for left-to-right text
+	    or an odd value for right-to-left text. A
+	    <classname>UNICODE_BIDI_SKIP</classname> embedding level value
+	    specifies a character whose embedding value is unspecified.
+	    This is used for embedding and override markers which can be
+	    removed from the string (together with this embedding value)
+	    from the string and the embedding value itself). This can be
+	    done before or after <function>unicode_bidi_reorder</function>.
+	  </para>
+
+	  <refsect2>
+	    <title>Reordering text</title>
+
+	    <para>
+	      <function>unicode_bidi_reorder</function> takes the actual
+	      unicode string together with the embedding values from
+	      <function>unicode_bidi_calc</function>, then reverses the
+	      bidirectional string, as specified by step L2 of the bidirectional
+	      algorithm.
+	    </para>
+
+	    <para>
+	      A non-<literal>NULL</literal>
+	      <parameter>reorder_callback</parameter> gets invoked to report
+	      each reveversed character range. The callback's first parameter
+	      is the index of the first reversed character, the second parameter
+	      is the number of reversed characters.
+	      The third parameter is the <parameter>arg</parameter> passthrough
+	      parameter.
+	    </para>
+
+	    <para>
+	      <parameter>reorder_callback</parameter> gets invoked after
+	      reversing each consecutive range of values in the
+	      <parameter>string</parameter> and <parameter>levels</parameter>
+	      buffers. For example: <quote>reorder_callback(5, 2, arg)</quote>
+	      reports that character indexes #5 and #6 got reverse in the
+	      string.
+	    </para>
+
+	    <para>
+	      Specifying a NULL <parameter>string</parameter> leaves the
+	      <parameter>levels</parameter> buffer unchanged, but still
+	      invokes the <parameter>reorder_callback</parameter> as if
+	      the character string, and their values, were reversed.
+	    </para>
+	  </refsect2>
+	  <refsect2>
+	    <title>Miscellaneous utility functions</title>
+
+	    <para>
+	      <function>unicode_bidi_mirror</function>
+	      returns the glyph that's a mirror image of the parameter
+	      (i.e. an open parenthesis for a close parenthesis, and vice
+	      versa); or the same value if there is no mirror image.
+	    </para>
+
+	    <para>
+	      <function>unicode_bidi_bracket_type</function>
+	      looks up each bracket character and returns its opposite, or
+	      the same value if the character is not a bracket that has an
+	      opposing bracket character.
+	      A non-NULL <parameter>ret</parameter> gets initialized to
+	      either <literal>UNICODE_BIDI_o</literal>,
+	      <literal>UNICODE_BIDI_c</literal> or
+	      <literal>UNICODE_BIDI_n</literal>.
+	    </para>
+	  </refsect2>
+	</refsect1>
+	<refsect1>
+	  <title>SEE ALSO</title>
+	  <para>
+	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html">TR-9</ulink>,
+	    <link linkend="unicode__bidi">
+	      <citerefentry><refentrytitle>unicode::bidi</refentrytitle>
+	      <manvolnum>3</manvolnum></citerefentry></link>,
+	    <link linkend="courier-unicode">
+	      <citerefentry>
+		<refentrytitle>courier-unicode</refentrytitle>
+		<manvolnum>7</manvolnum></citerefentry></link>,
+	  </para>
+	</refsect1>
+      </refentry>
+
+      <refentry id="unicode_category_lookup">
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
+
+	<refmeta>
+	  <refentrytitle>unicode_category_lookup</refentrytitle>
+	  <manvolnum>3</manvolnum>
+	</refmeta>
+
+	<refnamediv>
+	  <refname>unicode_category_lookup</refname>
+	  <refname>unicode_isalnum</refname>
+	  <refname>unicode_isalpha</refname>
+	  <refname>unicode_isblank</refname>
+	  <refname>unicode_isdigit</refname>
+	  <refname>unicode_isgraph</refname>
+	  <refname>unicode_islower</refname>
+	  <refname>unicode_ispunct</refname>
+	  <refname>unicode_isspace</refname>
+	  <refname>unicode_isupper</refname>
+
+	  <refpurpose>unicode character categorization</refpurpose>
+	</refnamediv>
+
+	<refsynopsisdiv>
+	  <funcsynopsis>
+	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
+	    <funcprototype>
+	      <funcdef>uint32_t <function>unicode_category_lookup</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_isalnum</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_isalpha</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_isblank</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_isdigit</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_isgraph</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_islower</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_ispunct</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_isspace</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+
+	    <funcprototype>
+              <funcdef>int <function>unicode_isupper</function></funcdef>
+              <paramdef>char32_t <parameter>c</parameter></paramdef>
+	    </funcprototype>
+	  </funcsynopsis>
+	</refsynopsisdiv>
+	<refsect1>
+	  <title>DESCRIPTION</title>
+
+	  <para>
+	    <function>unicode_category_lookup</function>() looks up the
+	    <ulink url="https://unicode.org/notes/tn36/">unicode character's
+	    categorization</ulink>.
+	    <function>unicode_category_lookup</function>() returns a 32 bit
+	    value.
+	    The value's
+	    <symbol>UNICODE_CATEGORY_1</symbol> bits specify the first level
+	    of the unicode character's category, with
+	    <symbol>UNICODE_CATEGORY_2</symbol>,
+	    <symbol>UNICODE_CATEGORY_3</symbol>, and
+	    <symbol>UNICODE_CATEGORY_4</symbol> bits specifying the 2nd,
+	    3rd, and 4th level, if given. A value of 0 for each corresponding
+	    bit set indicates that no category is specified for this level,
+	    for this character; otherwise the possible values are defined
+	    in <filename>&lt;courier-unicode.h&gt;</filename>.
+	  </para>
+
+	  <para>
+	    The remaining functions implement comparable equivalents of
+	    their non-unicode versions in the standard C library, as follows:
+	  </para>
+
+	  <variablelist>
+	    <varlistentry>
+              <term><function>unicode_isalnum</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for all
+		  <function>unicode_isalpha</function>() or
+		  <function>unicode_isdigit</function>().
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_isalpha</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for all
+		  <symbol>UNICODE_CATEGORY_1_LETTER</symbol>.
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_isblank</function>()</term>
+	      <listitem>
+		<para>
+		  Return non-0 for
+		  <symbol>TAB</symbol>, and all
+		  <symbol>UNICODE_CATEGORY_2_SPACE</symbol>.
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_isdigit</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for all
+		  <symbol>UNICODE_CATEGORY_1_NUMBER</symbol>
+		  | <symbol>UNICODE_CATEGORY_2_DIGIT</symbol>,
+		  only (no third categories).
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_isgraph</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for all codepoints above
+		  <symbol>SPACE</symbol> which are not
+		  <function>unicode_isspace</function>().
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_islower</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for all
+		  <function>unicode_isalpha</function>() for which the
+		  character is
+		  equal to
+		  <link linkend="unicode_uc">
+		    <citerefentry><refentrytitle>unicode_lc</refentrytitle>
+		  <manvolnum>3</manvolnum></citerefentry></link>
+		  of itself.
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_ispunct</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for all
+		  <symbol>UNICODE_CATEGORY_1_PUNCTUATION</symbol>.
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_isspace</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for unicode_isblank() or
+		  for unicode characters
+		  with linebreaking properties of
+		  <symbol>BK</symbol>,
+		  <symbol>CR</symbol>,
+		  <symbol>LF</symbol>,
+		  <symbol>NL</symbol>,
+		  and
+		  <symbol>SP</symbol>.
+		</para>
+	      </listitem>
+	    </varlistentry>
+
+	    <varlistentry>
+              <term><function>unicode_isupper</function>()</term>
+	      <listitem>
+		<para>
+		  Returns non-0 for all
+		  <function>unicode_isalpha</function>() for which the
+		  character is
+		  equal to
+		  <link linkend="unicode_uc">
+		    <citerefentry><refentrytitle>unicode_uc</refentrytitle>
+		  <manvolnum>3</manvolnum></citerefentry></link>
+		  of itself.
+		</para>
+	      </listitem>
+	    </varlistentry>
+	  </variablelist>
+	</refsect1>
+	<refsect1>
+	  <title>SEE ALSO</title>
+	  <para>
+	    <link linkend="courier-unicode">
+	      <citerefentry>
+		<refentrytitle>courier-unicode</refentrytitle>
+		<manvolnum>7</manvolnum></citerefentry></link>,
+	    <link linkend="unicode_uc">
+	      <citerefentry><refentrytitle>unicode_convert_tocase</refentrytitle>
+	      <manvolnum>3</manvolnum></citerefentry></link>.
+	  </para>
+	</refsect1>
+      </refentry>
+
       <refentry id="unicode_convert">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode_convert</refentrytitle>
@@ -444,7 +859,7 @@ See COPYING for distribution information.
 	    <function>unicode_convert_init</function>(),
 	    <function>unicode_convert</function>(), and
 	    <function>unicode_convert_deinit</function>() are an adaption of th
-	    <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html">
+	    <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">
 	      <citerefentry><refentrytitle>iconv</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></ulink> API that uses the same
 	      calling convention as the other algorithms in this unicode library,
@@ -668,7 +1083,7 @@ See COPYING for distribution information.
       </refentry>
 
       <refentry id="unicode_default_chset">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode_default_chset</refentrytitle>
@@ -721,7 +1136,7 @@ See COPYING for distribution information.
       </refentry>
 
       <refentry id="unicode_html40ent_lookup">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode_html40ent_lookup</refentrytitle>
@@ -780,251 +1195,18 @@ See COPYING for distribution information.
 	</refsect1>
       </refentry>
 
-      <refentry id="unicode_category_lookup">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
-
-	<refmeta>
-	  <refentrytitle>unicode_category_lookup</refentrytitle>
-	  <manvolnum>3</manvolnum>
-	</refmeta>
-
-	<refnamediv>
-	  <refname>unicode_category_lookup</refname>
-	  <refname>unicode_isalnum</refname>
-	  <refname>unicode_isalpha</refname>
-	  <refname>unicode_isblank</refname>
-	  <refname>unicode_isdigit</refname>
-	  <refname>unicode_isgraph</refname>
-	  <refname>unicode_islower</refname>
-	  <refname>unicode_ispunct</refname>
-	  <refname>unicode_isspace</refname>
-	  <refname>unicode_isupper</refname>
-
-	  <refpurpose>unicode character categorization</refpurpose>
-	</refnamediv>
-
-	<refsynopsisdiv>
-	  <funcsynopsis>
-	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
-	    <funcprototype>
-	      <funcdef>uint32_t <function>unicode_category_lookup</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_isalnum</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_isalpha</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_isblank</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_isdigit</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_isgraph</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_islower</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_ispunct</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_isspace</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-
-	    <funcprototype>
-              <funcdef>int <function>unicode_isupper</function></funcdef>
-              <paramdef>char32_t <parameter>c</parameter></paramdef>
-	    </funcprototype>
-	  </funcsynopsis>
-	</refsynopsisdiv>
-	<refsect1>
-	  <title>DESCRIPTION</title>
-
-	  <para>
-	    <function>unicode_category_lookup</function>() looks up the
-	    <ulink url="http://unicode.org/notes/tn36/">unicode character's
-	    categorization</ulink>.
-	    <function>unicode_category_lookup</function>() returns a 32 bit
-	    value.
-	    The value's
-	    <symbol>UNICODE_CATEGORY_1</symbol> bits specify the first level
-	    of the unicode character's category, with
-	    <symbol>UNICODE_CATEGORY_2</symbol>,
-	    <symbol>UNICODE_CATEGORY_3</symbol>, and
-	    <symbol>UNICODE_CATEGORY_4</symbol> bits specifying the 2nd,
-	    3rd, and 4th level, if given. A value of 0 for each corresponding
-	    bit set indicates that no category is specified for this level,
-	    for this character; otherwise the possible values are defined
-	    in <filename>&lt;courier-unicode.h&gt;</filename>.
-	  </para>
-
-	  <para>
-	    The remaining functions implement comparable equivalents of
-	    their non-unicode versions in the standard C library, as follows:
-	  </para>
-
-	  <variablelist>
-	    <varlistentry>
-              <term><function>unicode_isalnum</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for all
-		  <function>unicode_isalpha</function>() or
-		  <function>unicode_isdigit</function>().
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_isalpha</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for all
-		  <symbol>UNICODE_CATEGORY_1_LETTER</symbol>.
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_isblank</function>()</term>
-	      <listitem>
-		<para>
-		  Return non-0 for
-		  <symbol>TAB</symbol>, and all
-		  <symbol>UNICODE_CATEGORY_2_SPACE</symbol>.
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_isdigit</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for all
-		  <symbol>UNICODE_CATEGORY_1_NUMBER</symbol>
-		  | <symbol>UNICODE_CATEGORY_2_DIGIT</symbol>,
-		  only (no third categories).
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_isgraph</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for all codepoints above
-		  <symbol>SPACE</symbol> which are not
-		  <function>unicode_isspace</function>().
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_islower</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for all
-		  <function>unicode_isalpha</function>() for which the
-		  character is
-		  equal to
-		  <link linkend="unicode_uc">
-		    <citerefentry><refentrytitle>unicode_lc</refentrytitle>
-		  <manvolnum>3</manvolnum></citerefentry></link>
-		  of itself.
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_ispunct</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for all
-		  <symbol>UNICODE_CATEGORY_1_PUNCTUATION</symbol>.
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_isspace</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for unicode_isblank() or
-		  for unicode characters
-		  with linebreaking properties of
-		  <symbol>BK</symbol>,
-		  <symbol>CR</symbol>,
-		  <symbol>LF</symbol>,
-		  <symbol>NL</symbol>,
-		  and
-		  <symbol>SP</symbol>.
-		</para>
-	      </listitem>
-	    </varlistentry>
-
-	    <varlistentry>
-              <term><function>unicode_isupper</function>()</term>
-	      <listitem>
-		<para>
-		  Returns non-0 for all
-		  <function>unicode_isalpha</function>() for which the
-		  character is
-		  equal to
-		  <link linkend="unicode_uc">
-		    <citerefentry><refentrytitle>unicode_uc</refentrytitle>
-		  <manvolnum>3</manvolnum></citerefentry></link>
-		  of itself.
-		</para>
-	      </listitem>
-	    </varlistentry>
-	  </variablelist>
-	</refsect1>
-	<refsect1>
-	  <title>SEE ALSO</title>
-	  <para>
-	    <link linkend="courier-unicode">
-	      <citerefentry>
-		<refentrytitle>courier-unicode</refentrytitle>
-		<manvolnum>7</manvolnum></citerefentry></link>,
-	    <link linkend="unicode_uc">
-	      <citerefentry><refentrytitle>unicode_convert_tocase</refentrytitle>
-	      <manvolnum>3</manvolnum></citerefentry></link>.
-	  </para>
-	</refsect1>
-      </refentry>
-
       <refentry id="unicode_grapheme_break">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 	<refmeta>
 	  <refentrytitle>unicode_grapheme_break</refentrytitle>
-	  <refentrytitle>unicode_grapheme_break_init</refentrytitle>
-	  <refentrytitle>unicode_grapheme_break_next</refentrytitle>
-	  <refentrytitle>unicode_grapheme_break_deinit</refentrytitle>
 	  <manvolnum>3</manvolnum>
 	</refmeta>
 
 	<refnamediv>
 	  <refname>unicode_grapheme_break</refname>
+	  <refname>unicode_grapheme_break_init</refname>
+	  <refname>unicode_grapheme_break_next</refname>
+	  <refname>unicode_grapheme_break_deinit</refname>
 	  <refpurpose>unicode grapheme cluster boundary rules</refpurpose>
 	</refnamediv>
 
@@ -1059,22 +1241,23 @@ See COPYING for distribution information.
 	  <title>DESCRIPTION</title>
 
 	  <para>
+	    These functions implement the unicode grapheme cluster breaking
+	    algorithm. Invoke
+	    <function>unicode_grapheme_break_init</function>() to initialize
+	    the grapheme cluster breaking algorithm.
 	    <function>unicode_grapheme_break_init</function>() returns an
-	    opaque handle for an object that computes grapheme breaks.
-	    Each call to <function>unicode_grapheme_break_next</function>()
-	    passes one character of a unicode string, and returns a non-0
-	    value if there's a grapheme break before this character, in the
+	    opaque handle. Each subsequent call to
+	    <function>unicode_grapheme_break_next</function>() passes this
+	    handle, and the next character.
+	    <function>unicode_grapheme_break_next</function>() returns a non-0
+	    value if there's a grapheme break before the character, in a
 	    sequence of Unicode characters.
 	    <function>unicode_grapheme_break_deinit</function>() releases
-	    all reosurces used by the grapheme breaking handle.
+	    all reosurces used by the grapheme breaking handle, and the
+	    <classname>unicode_grapheme_break_info_t</classname> handle
+	    is no longer valid after this call.
 	  </para>
 	  <para>
-	    Call
-	    <function>unicode_grapheme_break_init</function>(), then call
-	    <function>unicode_grapheme_break_next</function>() for each
-	    character,
-	    then call
-	    <function>unicode_grapheme_break_deinit</function>().
 	    The first call to <function>unicode_grapheme_break_next</function>()
 	    always returns non-0, as per the GB1 rule.
 	  </para>
@@ -1085,10 +1268,11 @@ See COPYING for distribution information.
 	    <parameter>a</parameter> and
 	    <parameter>b</parameter>.
 	    This is is equivalent to calling
-	    <function>> unicode_grapheme_break_init</function>(),
+	    <function>unicode_grapheme_break_init</function>(),
 	    followed by two calls to
 	    <function> unicode_grapheme_break_next</function>(), and finally
-	    <function>unicode_grapheme_break_deinit</function>(), and returns
+	    <function>unicode_grapheme_break_deinit</function>(), then
+	    returning
 	    the result of the second
 	    call to <function>unicode_grapheme_break_next</function>().
 	  </para>
@@ -1098,7 +1282,7 @@ See COPYING for distribution information.
 	  <title>SEE ALSO</title>
 
 	  <para>
-	    <ulink url="http://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>,
+	    <ulink url="https://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>,
 	    <link linkend="courier-unicode">
 	      <citerefentry>
 		<refentrytitle>courier-unicode</refentrytitle>
@@ -1116,60 +1300,15 @@ See COPYING for distribution information.
 	</refsect1>
       </refentry>
 
-      <refentry id="unicode_script">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
-	<refmeta>
-	  <refentrytitle>unicode_script</refentrytitle>
-	  <manvolnum>3</manvolnum>
-	</refmeta>
-
-	<refnamediv>
-	  <refname>unicode_script</refname>
-	  <refpurpose>unicode script property</refpurpose>
-	</refnamediv>
-
-	<refsynopsisdiv>
-	  <funcsynopsis>
-	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
-	    <funcprototype>
-              <funcdef>unicode_script_t <function>unicode_script</function></funcdef>
-              <paramdef>char32_t <parameter>ch</parameter></paramdef>
-	    </funcprototype>
-	  </funcsynopsis>
-	</refsynopsisdiv>
-	<refsect1>
-	  <title>DESCRIPTION</title>
-	  <para>
-	    <function>unicode_script</function>() looks up the
-	    <quote>script</quote> property of the specified unicode character,
-	    and returns it. The <classname>unicode_script_t</classname>
-	    enumeration encodes possible unicode script values.
-	    <literal>unicode_script_unknown</literal> gets returned for a
-	    unicode character  with an unknown script property.
-	  </para>
-	</refsect1>
-
-	<refsect1>
-	  <title>SEE ALSO</title>
-
-	  <para>
-	    <ulink url="http://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">TR-24</ulink>,
-	    <link linkend="courier-unicode">
-	      <citerefentry>
-		<refentrytitle>courier-unicode</refentrytitle>
-		<manvolnum>7</manvolnum></citerefentry></link>.
-	  </para>
-	</refsect1>
-      </refentry>
-
       <refentry id="unicode_line_break">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 	<refmeta>
 	  <refentrytitle>unicode_line_break</refentrytitle>
 	  <manvolnum>3</manvolnum>
 	</refmeta>
 
 	<refnamediv>
+	  <refname>unicode_line_break</refname>
 	  <refname>unicode_lb_init</refname>
 	  <refname>unicode_lb_set_opts</refname>
 	  <refname>unicode_lb_next</refname>
@@ -1483,13 +1622,59 @@ See COPYING for distribution information.
 	    <link linkend="unicode__linebreak">
 	      <citerefentry><refentrytitle>unicode::linebreak</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
-	    <ulink url="http://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">TR-14</ulink>
+	    <ulink url="https://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">TR-14</ulink>
+	  </para>
+	</refsect1>
+      </refentry>
+
+      <refentry id="unicode_script">
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
+	<refmeta>
+	  <refentrytitle>unicode_script</refentrytitle>
+	  <manvolnum>3</manvolnum>
+	</refmeta>
+
+	<refnamediv>
+	  <refname>unicode_script</refname>
+	  <refpurpose>unicode script property</refpurpose>
+	</refnamediv>
+
+	<refsynopsisdiv>
+	  <funcsynopsis>
+	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
+	    <funcprototype>
+              <funcdef>unicode_script_t <function>unicode_script</function></funcdef>
+              <paramdef>char32_t <parameter>ch</parameter></paramdef>
+	    </funcprototype>
+	  </funcsynopsis>
+	</refsynopsisdiv>
+	<refsect1>
+	  <title>DESCRIPTION</title>
+	  <para>
+	    <function>unicode_script</function>() looks up the
+	    <quote>script</quote> property of the specified unicode character,
+	    and returns it. The <classname>unicode_script_t</classname>
+	    enumeration encodes possible unicode script values.
+	    <literal>unicode_script_unknown</literal> gets returned for a
+	    unicode character  with an unknown script property.
+	  </para>
+	</refsect1>
+
+	<refsect1>
+	  <title>SEE ALSO</title>
+
+	  <para>
+	    <ulink url="https://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">TR-24</ulink>,
+	    <link linkend="courier-unicode">
+	      <citerefentry>
+		<refentrytitle>courier-unicode</refentrytitle>
+		<manvolnum>7</manvolnum></citerefentry></link>.
 	  </para>
 	</refsect1>
       </refentry>
 
       <refentry id="unicode_word_break">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 	<refmeta>
 	  <refentrytitle>unicode_word_break</refentrytitle>
 	  <manvolnum>3</manvolnum>
@@ -1682,7 +1867,7 @@ See COPYING for distribution information.
 	<refsect1>
 	  <title>SEE ALSO</title>
 	  <para>
-	    <ulink url="http://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>,
+	    <ulink url="https://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>,
 	    <link linkend="courier-unicode">
 	      <citerefentry>
 		<refentrytitle>courier-unicode</refentrytitle>
@@ -1704,7 +1889,7 @@ See COPYING for distribution information.
       </refentry>
 
       <refentry id="unicode_uc">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 	<refmeta>
 	  <refentrytitle>unicode_uc</refentrytitle>
 	  <manvolnum>3</manvolnum>
@@ -1816,8 +2001,109 @@ See COPYING for distribution information.
     <section id="manpagescpp">
       <title>C++ manual pages</title>
 
+      <refentry id="unicode__bidi">
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
+
+	<refmeta>
+	  <refentrytitle>unicode::bidi::calc</refentrytitle>
+	  <manvolnum>3</manvolnum>
+	</refmeta>
+
+	<refnamediv>
+	  <refname>unicode::bidi_calc</refname>
+	  <refname>unicode::bidi_reorder</refname>
+	  <refpurpose>unicode bidirectional algorithm</refpurpose>
+	</refnamediv>
+
+	<refsynopsisdiv>
+	  <funcsynopsis>
+	    <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
+	    <funcprototype>
+              <funcdef>std::vector&lt;unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
+	      <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+	    </funcprototype>
+	  </funcsynopsis>
+
+	  <funcsynopsis>
+	    <funcprototype>
+              <funcdef>std::vector&lt;unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
+	      <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>unicode_bidi_level_t<parameter>embedding_level</parameter></paramdef>
+	    </funcprototype>
+	  </funcsynopsis>
+
+	  <funcsynopsis>
+	    <funcprototype>
+              <funcdef>int <function>unicode::bidi_reorder</function></funcdef>
+	      <paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
+	      <paramdef>std::vector&lt;unicode_bidi_level_t&gt; &amp;<parameter> embedding_level</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t, size_t)&gt; &amp;<parameter>reorder_callback</parameter></paramdef>
+	    </funcprototype>
+	  </funcsynopsis>
+
+	  <funcsynopsis>
+	    <funcprototype>
+              <funcdef>int <function>unicode::bidi_reorder</function></funcdef>
+	      <paramdef>std::vector&lt;unicode_bidi_level_t&gt; &amp;<parameter>embedding_level</parameter></paramdef>
+	      <paramdef>const std::function&lt;void (size_t, size_t)&gt; &amp;<parameter>reorder_callback</parameter></paramdef>
+	    </funcprototype>
+	  </funcsynopsis>
+	</refsynopsisdiv>
+
+	<refsect1>
+	  <title>DESCRIPTION</title>
+
+	  <para>
+	    These functions implement the C++ interface for the
+	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html"> Unicode Bi-directional algorithm</ulink>.
+	    See the description of the underlying
+	    <link linkend="unicode_bidi">
+	      <citerefentry><refentrytitle>unicode_bidi</refentrytitle>
+	      <manvolnum>3</manvolnum></citerefentry></link> C library
+	      API for more information.
+	  </para>
+
+	  <para>
+            <function>unicode::bidi_calc</function> computes and return a vector
+	    of bidirection embedding level values for the given Unicode string.
+	    An overload takes an additional parameter that override the
+	    paragraph embedding level, a <literal>UNICODE_BIDI_LR</literal> or
+            an <literal>UNICODE_BIDI_RL</literal> value.
+          </para>
+	  <para>
+            <function>unicode::bidi_reorder</function> reverses the characters
+	    in the Unicode script, according to their embedding levels (and
+	    reverses the corresponding embedding level values too).
+	    As is with the C API, an optional parameter is a callable object
+	    that gets invoked to report each range of characters that gets
+	    reversed (specified as the starting position and a number of
+	    characters).
+          </para>
+	  <para>
+	    An overloaded <function>unicode::bidi_reorder</function> without
+	    the string parameter goes through the motions, according to the
+	    embedded level vector parameter, but without actually reversing
+	    the values in the vector, but still invoking the callable object
+	    normally.
+          </para>
+	  <para>
+	    This is comparable to the C API. Also comparable with the C API:
+	    the convention that even embedding levels specify left to right
+	    text and odd embedding values specify right to left text.
+	    An embedding value of <literal>UNICODE_BIDI_SKIP</literal>
+	    indicates an embedding or an override marker that has no
+	    specified embeded value. These markers may be removed from the
+	    Unicode string (together with the
+	    <literal>UNICODE_BIDI_SKIP</literal>
+	    values from the embedding values vector) either before or after
+	    they get reordered.
+	  </para>
+	</refsect1>
+      </refentry>
+
+
       <refentry id="unicode__iconvert__convert">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode::iconvert::convert</refentrytitle>
@@ -1951,7 +2237,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	    <link linkend="unicode_convert">
 	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
-	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html">
+	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">
 	      <citerefentry><refentrytitle>iconv</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></ulink>.
 
@@ -1960,7 +2246,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
       </refentry>
 
       <refentry id="unicode__iconvert__convert_tocase">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode::iconvert::convert_tocase</refentrytitle>
@@ -2041,7 +2327,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	    <link linkend="unicode_convert">
 	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
-	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html">
+	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">
 	      <citerefentry><refentrytitle>iconv</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></ulink>.
 
@@ -2050,7 +2336,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
       </refentry>
 
       <refentry id="unicode__iconvert__fromu">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode::iconvert::fromu</refentrytitle>
@@ -2138,7 +2424,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	    <link linkend="unicode_convert">
 	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
-	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html">
+	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">
 	      <citerefentry><refentrytitle>iconv</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></ulink>.
 
@@ -2147,7 +2433,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
       </refentry>
 
       <refentry id="unicode__iconvert__tou">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode::iconvert::tou</refentrytitle>
@@ -2237,7 +2523,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
 	    <link linkend="unicode_convert">
 	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></link>,
-	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html">
+	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">
 	      <citerefentry><refentrytitle>iconv</refentrytitle>
 	      <manvolnum>3</manvolnum></citerefentry></ulink>.
 
@@ -2246,7 +2532,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>
       </refentry>
 
       <refentry id="unicode__linebreak">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode::linebreak</refentrytitle>
@@ -2447,7 +2733,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
       </refentry>
 
       <refentry id="unicode__tolower">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode::tolower</refentrytitle>
@@ -2542,19 +2828,8 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator&lt;std::vector&lt;int&gt
 	</refsect1>
       </refentry>
 
-
-
-
-
-
-
-
-
-
-
-
       <refentry id="unicode__wordbreak">
-	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info>
+	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>
 
 	<refmeta>
 	  <refentrytitle>unicode::wordbreak</refentrytitle>
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 67f3bda..b8c88f4 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -2,7 +2,7 @@
 #define	courier_unicode_h
 
 /*
-** Copyright 2000-2018 Double Precision, Inc.
+** Copyright 2000-2020 Double Precision, Inc.
 ** See COPYING for distribution information.
 **
 */
@@ -12,6 +12,7 @@
 #include <string>
 #include <vector>
 #include <list>
+#include <functional>
 
 extern "C" {
 #endif
@@ -40,7 +41,7 @@ typedef uint32_t char32_t;
 #endif
 #endif
 
-#define COURIER_UNICODE_VERSION 210
+#define COURIER_UNICODE_VERSION 220
 
 /*
 ** The system default character set, from the locale.
@@ -605,6 +606,13 @@ extern void unicode_bidi_calc(const char32_t *p, size_t n,
 			      const unicode_bidi_level_t *
 			      initial_embedding_level);
 
+extern void unicode_bidi_reorder(char32_t *p,
+				 unicode_bidi_level_t *levels,
+				 size_t n,
+				 void (*reorder_callback)(size_t, size_t,
+							  void *),
+				 void *arg);
+
 /*
 ** A buffer that holds unicode characters, and dynamically grows as needed.
 */
@@ -2025,6 +2033,24 @@ std::u32string tolower(const std::u32string &u);
 
 std::u32string toupper(const std::u32string &u);
 
+//! Calculate bidirectional embedding levels
+std::vector<unicode_bidi_level_t> bidi_calc(const std::u32string &s);
+
+//! Calculate bidirectional embedding levels
+std::vector<unicode_bidi_level_t> bidi_calc(const std::u32string &s,
+					    unicode_bidi_level_t level);
+
+//! Reorder bidirectional text
+int bidi_reorder(std::u32string &string,
+		 std::vector<unicode_bidi_level_t> &levels,
+		 const std::function<void (size_t, size_t)> &reorder_callback=
+		 [](size_t, size_t){});
+
+//! Reorder bidirectional text
+void bidi_reorder(std::vector<unicode_bidi_level_t> &levels,
+		  const std::function<void (size_t, size_t)> &reorder_callback=
+		  [](size_t, size_t){});
+
 #if 0
 {
 #endif
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index 38dcb44..9e7fcf4 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -1,5 +1,5 @@
 /*
-** Copyright 2011-2020 Double Precision, Inc.
+** Copyright 2020 Double Precision, Inc.
 ** See COPYING for distribution information.
 **
 */
@@ -148,14 +148,56 @@ struct level_run {
 	size_t end; /* one past */
 };
 
+/* A growing list of level runs */
+
+struct level_runs {
+	struct level_run *runs; /* All level runs in the sequence */
+	size_t n_level_runs;          /* How many of them */
+	size_t cap_level_runs;        /* Capacity of the level runs */
+};
+
+static void level_runs_init(struct level_runs *p)
+{
+	p->runs=0;
+	p->n_level_runs=0;
+	p->cap_level_runs=0;
+}
+
+static void level_runs_deinit(struct level_runs *p)
+{
+	if (p->runs)
+		free(p->runs);
+}
+
+static struct level_run *level_runs_add(struct level_runs *p)
+{
+	if (p->n_level_runs == p->cap_level_runs)
+	{
+		p->cap_level_runs *= 2;
+
+		if (p->cap_level_runs == 0)
+			p->cap_level_runs=1;
+
+		p->runs=(struct level_run *)
+			(p->runs ?
+			 realloc(p->runs,
+				 sizeof(struct level_run) *
+				 p->cap_level_runs)
+			 :malloc(sizeof(struct level_run) *
+				 p->cap_level_runs));
+		if (!p->runs)
+			abort();
+	}
+
+	return p->runs + (p->n_level_runs++);
+}
+
 /* An isolating run sequence */
 
 struct isolating_run_sequence_s {
 	struct isolating_run_sequence_s *prev, *next; /* Linked list */
 
-	struct level_run *level_runs; /* All level runs in the sequence */
-	size_t n_level_runs;          /* How many of them */
-	size_t cap_level_runs;        /* Capacity of the level runs */
+	struct level_runs runs;
 	unicode_bidi_level_t embedding_level; /* This seq's embedding level */
 	enum_bidi_class_t sos, eos;
 };
@@ -185,11 +227,11 @@ static irs_iterator irs_begin(struct isolating_run_sequence_s *seq)
 
 	/* Edge case, empty isolating run sequence */
 
-	while (iter.level_run_i < seq->n_level_runs)
+	while (iter.level_run_i < seq->runs.n_level_runs)
 	{
-		iter.i=seq->level_runs[iter.level_run_i].start;
+		iter.i=seq->runs.runs[iter.level_run_i].start;
 
-		if (iter.i < seq->level_runs[iter.level_run_i].end)
+		if (iter.i < seq->runs.runs[iter.level_run_i].end)
 			break;
 
 		++iter.level_run_i;
@@ -202,7 +244,7 @@ static irs_iterator irs_end(struct isolating_run_sequence_s *seq)
 	irs_iterator iter;
 
 	iter.seq=seq;
-	iter.level_run_i=seq->n_level_runs;
+	iter.level_run_i=seq->runs.n_level_runs;
 	return iter;
 }
 
@@ -214,7 +256,7 @@ static int irs_compare(const irs_iterator *a,
 	if (a->level_run_i > b->level_run_i)
 		return 1;
 
-	if (a->level_run_i == a->seq->n_level_runs)
+	if (a->level_run_i == a->seq->runs.n_level_runs)
 		return 0;
 
 	if (a->i < b->i)
@@ -227,7 +269,7 @@ static int irs_compare(const irs_iterator *a,
 
 static void irs_incr(irs_iterator *iter)
 {
-	if (iter->seq->n_level_runs == iter->level_run_i)
+	if (iter->seq->runs.n_level_runs == iter->level_run_i)
 	{
 		fprintf(stderr, "%s%s\n",
 			"Internal error: attempting to increment ",
@@ -235,10 +277,10 @@ static void irs_incr(irs_iterator *iter)
 		abort();
 	}
 
-	if (++iter->i >= iter->seq->level_runs[iter->level_run_i].end)
+	if (++iter->i >= iter->seq->runs.runs[iter->level_run_i].end)
 	{
-		if (++iter->level_run_i < iter->seq->n_level_runs)
-			iter->i=iter->seq->level_runs[iter->level_run_i].start;
+		if (++iter->level_run_i < iter->seq->runs.n_level_runs)
+			iter->i=iter->seq->runs.runs[iter->level_run_i].start;
 	}
 }
 
@@ -246,8 +288,8 @@ static void irs_decr(irs_iterator *iter)
 {
 	while (1)
 	{
-		if (iter->seq->n_level_runs > iter->level_run_i &&
-		    iter->i > iter->seq->level_runs[iter->level_run_i].start)
+		if (iter->seq->runs.n_level_runs > iter->level_run_i &&
+		    iter->i > iter->seq->runs.runs[iter->level_run_i].start)
 		{
 			--iter->i;
 			break;
@@ -261,7 +303,7 @@ static void irs_decr(irs_iterator *iter)
 			abort();
 		}
 
-		iter->i=iter->seq->level_runs[--iter->level_run_i].end;
+		iter->i=iter->seq->runs.runs[--iter->level_run_i].end;
 	}
 }
 
@@ -328,13 +370,12 @@ isolating_run_sequences_init(struct isolating_run_sequences_s *p,
 
 	if (!seq) abort();
 
-	if ((seq->level_runs=(struct level_run *)
-	     malloc(sizeof(struct level_run))) == 0) abort();
+	level_runs_init(&seq->runs);
 
-	seq->level_runs->start=i;
-	seq->level_runs->end=i;
+	struct level_run *run=level_runs_add(&seq->runs);
 
-	seq->n_level_runs=seq->cap_level_runs=1;
+	run->start=i;
+	run->end=i;
 	seq->embedding_level=embedding_level;
 
 	if (!p->head)
@@ -355,7 +396,7 @@ static void isolating_run_sequences_record(struct isolating_run_sequence_s *p,
 					   size_t i)
 {
 	struct level_run *current_level_run=
-		&p->level_runs[p->n_level_runs-1];
+		&p->runs.runs[p->runs.n_level_runs-1];
 
 	if (current_level_run->start == current_level_run->end)
 	{
@@ -375,19 +416,7 @@ static void isolating_run_sequences_record(struct isolating_run_sequence_s *p,
 	** run sequence.
 	*/
 
-	if (p->n_level_runs == p->cap_level_runs)
-	{
-		p->cap_level_runs *= 2;
-
-		p->level_runs=(struct level_run *)
-			realloc(p->level_runs,
-				sizeof(struct level_run) *
-				p->cap_level_runs);
-		if (!p->level_runs)
-			abort();
-	}
-
-	current_level_run = p->level_runs + (p->n_level_runs++);
+	current_level_run=level_runs_add(&p->runs);
 
 	current_level_run->start=i;
 	current_level_run->end=i+1;
@@ -430,7 +459,7 @@ static void isolating_run_sequences_deinit(struct isolating_run_sequences_s *p)
 
 		seq=seq->next;
 
-		free(p->level_runs);
+		level_runs_deinit(&p->runs);
 		free(p);
 	}
 
@@ -706,12 +735,12 @@ void dump_sequence_info(directional_status_stack_t stack,
 		(seq->sos == UNICODE_BIDI_CLASS_L ? 'L':'R'),
 		(seq->eos == UNICODE_BIDI_CLASS_L ? 'L':'R'));
 
-	for (size_t i=0; i<seq->n_level_runs; ++i)
+	for (size_t i=0; i<seq->runs.n_level_runs; ++i)
 	{
 		fprintf(DEBUGDUMP, "%s[%lu-%lu]",
 			i == 0 ? " ":", ",
-			(unsigned long)seq->level_runs[i].start,
-			(unsigned long)seq->level_runs[i].end-1);
+			(unsigned long)seq->runs.runs[i].start,
+			(unsigned long)seq->runs.runs[i].end-1);
 	}
 	fprintf(DEBUGDUMP, "\n");
 }
@@ -1706,3 +1735,127 @@ static void unicode_bidi_n(directional_status_stack_t stack,
 	dump_sequence("Contents after I", stack, seq);
 #endif
 }
+
+struct level_run_layers {
+	struct level_runs *lruns;     /* At this embedding level, or higher */
+	size_t n_lruns;               /* How many of them */
+	size_t cap_lruns;             /* Capacity of the level runs */
+};
+
+static void level_run_layers_init(struct level_run_layers *p)
+{
+	p->lruns=0;
+	p->n_lruns=0;
+	p->cap_lruns=0;
+}
+
+static void level_run_layers_deinit(struct level_run_layers *p)
+{
+	if (p->lruns)
+	{
+		for (size_t i=0; i<p->n_lruns; ++i)
+			level_runs_deinit(&p->lruns[i]);
+		free(p->lruns);
+	}
+}
+
+static void level_run_layers_add(struct level_run_layers *p)
+{
+	if (p->n_lruns == p->cap_lruns)
+	{
+		p->cap_lruns *= 2;
+
+		if (p->cap_lruns == 0)
+			p->cap_lruns=1;
+
+		p->lruns=(struct level_runs *)
+			(p->lruns ?
+			 realloc(p->lruns,
+				 sizeof(struct level_runs) *
+				 p->cap_lruns)
+			 :malloc(sizeof(struct level_runs) *
+				 p->cap_lruns));
+		if (!p->lruns)
+			abort();
+	}
+
+	level_runs_init(p->lruns + (p->n_lruns++));
+}
+
+void unicode_bidi_reorder(char32_t *p,
+			  unicode_bidi_level_t *levels,
+			  size_t n,
+			  void (*reorder_callback)(size_t, size_t, void *),
+			  void *arg)
+{
+	/* L2 */
+
+	struct level_run_layers layers;
+	unicode_bidi_level_t previous_level=0;
+
+	level_run_layers_init(&layers);
+
+	for (size_t i=0; i<n; ++i)
+	{
+		if (levels[i] != UNICODE_BIDI_SKIP)
+			previous_level=levels[i];
+
+		while (layers.n_lruns <= previous_level)
+			level_run_layers_add(&layers);
+
+		/* We intentionally don't put anything in level 0 */
+		for (size_t j=1; j<=previous_level; ++j)
+		{
+			struct level_runs *runs=layers.lruns+j;
+
+			if (runs->n_level_runs &&
+			    runs->runs[runs->n_level_runs-1].end == i)
+			{
+				++runs->runs[runs->n_level_runs-1].end;
+			}
+			else
+			{
+				struct level_run *run=
+					level_runs_add(runs);
+
+				run->start=i;
+				run->end=i+1;
+			}
+		}
+	}
+
+	for (size_t i=layers.n_lruns; i; )
+	{
+		struct level_runs *runs=layers.lruns+ --i;
+
+		for (size_t j=0; j<runs->n_level_runs; ++j)
+		{
+			size_t start=runs->runs[j].start;
+			size_t end=runs->runs[j].end;
+			size_t right=end;
+			size_t left=start;
+
+			while (right > left)
+			{
+				--right;
+
+				if (p)
+				{
+					char32_t c=p[left];
+					unicode_bidi_level_t l=levels[left];
+
+					p[left]=p[right];
+					levels[left]=levels[right];
+					p[right]=c;
+					levels[right]=l;
+				}
+				++left;
+			}
+
+			if (end-start > 1 && reorder_callback)
+				(*reorder_callback)(start, end-start, arg);
+		}
+	}
+
+	level_run_layers_deinit(&layers);
+}
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index 51bed3c..adb7869 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -557,3 +557,79 @@ std::u32string unicode::toupper(const std::u32string &u)
 
 	return copy;
 }
+
+std::vector<unicode_bidi_level_t>
+unicode::bidi_calc(const std::u32string &s)
+{
+	return unicode::bidi_calc(s, UNICODE_BIDI_SKIP);
+}
+
+std::vector<unicode_bidi_level_t>
+unicode::bidi_calc(const std::u32string &s,
+		   unicode_bidi_level_t paragraph_embedding_level)
+{
+	const unicode_bidi_level_t *initial_embedding_level=0;
+
+	if (paragraph_embedding_level == UNICODE_BIDI_LR ||
+	    paragraph_embedding_level == UNICODE_BIDI_RL)
+	{
+		initial_embedding_level=&paragraph_embedding_level;
+	}
+
+	std::vector<unicode_bidi_level_t> buf;
+
+	buf.resize(s.size());
+
+	if (s.size())
+	{
+		unicode_bidi_calc(s.c_str(), s.size(), &buf[0],
+				  initial_embedding_level);
+	}
+	return buf;
+}
+
+extern "C" {
+	static void reorder_callback(size_t i, size_t cnt,
+				     void *arg)
+	{
+		auto p=reinterpret_cast<const std::function<void (size_t,
+								  size_t)> *>
+			(arg);
+
+		(*p)(i, cnt);
+	}
+}
+
+int unicode::bidi_reorder(std::u32string &string,
+			  std::vector<unicode_bidi_level_t> &levels,
+			  const std::function<void (size_t, size_t)> &lambda)
+{
+	size_t s=string.size();
+
+	if (s != levels.size())
+		return -1;
+
+	if (!s)
+		return 0;
+
+	unicode_bidi_reorder(&string[0], &levels[0], s,
+			     reorder_callback,
+			     const_cast<void *>
+			     (reinterpret_cast<const void *>(&lambda)));
+
+	return 0;
+}
+
+void unicode::bidi_reorder(std::vector<unicode_bidi_level_t> &levels,
+			   const std::function<void (size_t, size_t)> &lambda)
+{
+	size_t s=levels.size();
+
+	if (!s)
+		return;
+
+	unicode_bidi_reorder(0, &levels[0], s, reorder_callback,
+			     const_cast<void *>
+			     (reinterpret_cast<const void *>(&lambda)));
+
+}
author	Sam Varshavchik	2020-07-09 21:36:46 -0400
committer	Sam Varshavchik	2020-07-12 15:56:45 -0400
commit	7a9293cd28b293b793793368237d8856cfb0eff4 (patch)
tree	3c19854a7869103405c78a97e40503db64fac7b6
parent	2219f725acd0dc36fa00080c846a8982273a6f61 (diff)
download	courier-libs-7a9293cd28b293b793793368237d8856cfb0eff4.tar.bz2