diff options
Diffstat (limited to 'unicode/book.xml')
| -rw-r--r-- | unicode/book.xml | 1767 | 
1 files changed, 1397 insertions, 370 deletions
| diff --git a/unicode/book.xml b/unicode/book.xml index 6568762..c8948ba 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -1,15 +1,18 @@  <?xml version="1.0" encoding="utf-8"?>  <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" - "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ - -<!ENTITY tr14ver "35"> -<!ENTITY tr24ver "24"> -<!ENTITY tr29ver "27"> + "https://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [ + +<!ENTITY tr9ver  "42"> +<!ENTITY tr14ver "45"> +<!ENTITY tr15ver "50"> +<!ENTITY tr24ver "31"> +<!ENTITY tr29ver "37"> +<!ENTITY tr51ver "18">  ]>  <!-- -Copyright 2014-2017 Double Precision, Inc. +Copyright 2014-2020 Double Precision, Inc.  See COPYING for distribution information.  --> @@ -19,8 +22,8 @@ See COPYING for distribution information.    <para>      This library implements several algorithms related to the -    <ulink url="http://www.unicode.org/standard/standard.html">Unicode -    Standard</ulink>: +    <ulink url="https://www.unicode.org/standard/standard.html">Unicode +    Standard</ulink>, notably:    </para>    <itemizedlist> @@ -33,15 +36,21 @@ See COPYING for distribution information.      <listitem>        <para>  	Implementation of -	<ulink url="http://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">grapheme -	and work breaking</ulink> rules. +	<link linkend="unicode_grapheme_break">grapheme +	and word breaking</link> rules.        </para>      </listitem>      <listitem>        <para>  	Implementation of -	<ulink url="http://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">line -	breaking</ulink> rules. +	<link linkend="unicode_line_break">line	breaking</link> rules. +      </para> +    </listitem> +    <listitem> +      <para> +	Implementation of the +	<link linkend="unicode_bidi">bi-directional +	algorithm</link>.        </para>      </listitem>      <listitem> @@ -51,7 +60,7 @@ See COPYING for distribution information.  	entity (such as <quote>&amp;</quote>, for example), and  	determining the normal width or a double-width status of a unicode  	character. Also, an adaptation of the -	<ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html"> +	<ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">  	<citerefentry><refentrytitle>iconv</refentrytitle>  	<manvolnum>3</manvolnum></citerefentry></ulink>  	API for this unicode library. @@ -59,15 +68,13 @@ See COPYING for distribution information.      </listitem>      <listitem>        <para> -	Look up the -	<ulink url="http://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">Unicode -	script property</ulink>. +	Look up the <link linkend="unicode_script">Unicode +	script property</link>.        </para>      </listitem>      <listitem>        <para> -	Look up the -	<ulink url="http://unicode.org/notes/tn36/">category</ulink> +	Look up the <link linkend="unicode_category_lookup">category</link>  	property.        </para>      </listitem> @@ -82,7 +89,7 @@ See COPYING for distribution information.      <para>        The current release of the Courier Unicode library is based on the -      Unicode 8.0.0 standard. +      Unicode 13.0.0 standard.      </para>    </section> @@ -91,7 +98,7 @@ See COPYING for distribution information.      <para>        Download the current version of the library from -      <ulink url="/download.html#unicode">http://www.courier-mta.org/download.html#unicode</ulink>. +      <ulink url="/download.html#unicode">https://www.courier-mta.org/download.html#unicode</ulink>.        After unpacking the tarball, run the configure script, which takes        the usual options, followed by <command>make</command>, then        <command>make install</command>. @@ -154,7 +161,7 @@ See COPYING for distribution information.  	  <manvolnum>7</manvolnum></citerefentry></link>.  	  Refer to the included manual pages,  	  and -	  <ulink url="http://www.courier-mta.org/unicode/manpages.html"> the HTML +	  <ulink url="https://www.courier-mta.org/unicode/manpages.html"> the HTML  	  version of the man pages</ulink> for more information.      </para>    </section> @@ -166,7 +173,7 @@ See COPYING for distribution information.        <title>C manual pages</title>        <refentry id="courier-unicode"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>courier-unicode</refentrytitle>  	  <manvolnum>7</manvolnum> @@ -182,17 +189,17 @@ See COPYING for distribution information.  	  <programlisting>  #include <courier-unicode.h></programlisting>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="courier_unicode_descr">  	  <title>DESCRIPTION</title>  	  <para>  	    This library implements several algorithms related to the -	    <ulink url="http://www.unicode.org/standard/standard.html">Unicode +	    <ulink url="https://www.unicode.org/standard/standard.html">Unicode  	    Standard</ulink>.  	    This library uses -	    <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html"> -	      <citerefentry><refentrytitle>iconv</refentrytitle> -	      <manvolnum>3</manvolnum></citerefentry></ulink> to convert +	    <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html" +		   ><citerefentry><refentrytitle>iconv</refentrytitle> +	    <manvolnum>3</manvolnum></citerefentry></ulink> to convert  	      text in a given character set to unicode. Any character set  	      displayed by <command>iconv --list</command> can be specified  	      for the corresponding character set parameter. Additionally, @@ -216,22 +223,31 @@ See COPYING for distribution information.  	    with this library.  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="courier_unicode_seealso">  	  <title>SEE ALSO</title>  	  <para> +	    <link linkend="unicode_bidi"> +	      <citerefentry><refentrytitle>unicode_bidi</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>, +	    <link linkend="unicode_canonical"> +	      <citerefentry><refentrytitle>unicode_canonical</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>, +	    <link linkend="unicode_category_lookup"> +	      <citerefentry><refentrytitle>unicode_category_lookup</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode_convert">  	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode_default_chset">  	      <citerefentry><refentrytitle>unicode_default_chset</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, +	    <link linkend="unicode_emoji_lookup"> +	      <citerefentry><refentrytitle>unicode_emoji_lookup</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode_html40ent_lookup">  	      <citerefentry><refentrytitle>unicode_html40ent_lookup</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, -	    <link linkend="unicode_category_lookup"> -	      <citerefentry><refentrytitle>unicode_category_lookup</refentrytitle> -	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode_grapheme_break">  	      <citerefentry><refentrytitle>unicode_grapheme_break</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, @@ -241,11 +257,14 @@ See COPYING for distribution information.  	    <link linkend="unicode_script">  	      <citerefentry><refentrytitle>unicode_script</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, +	    <link linkend="unicode_uc"> +	      <citerefentry><refentrytitle>unicode_uc</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode_word_break">  	      <citerefentry><refentrytitle>unicode_word_break</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, -	    <link linkend="unicode_uc"> -	      <citerefentry><refentrytitle>unicode_uc</refentrytitle> +	    <link linkend="unicode__bidi"> +	      <citerefentry><refentrytitle>unicode::bidi</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>,  	    <link linkend="unicode__iconvert__convert">  	      <citerefentry><refentrytitle>unicode::iconvert::convert</refentrytitle> @@ -272,8 +291,926 @@ See COPYING for distribution information.  	</refsect1>        </refentry> +      <refentry id="unicode_bidi"> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo> + +	<refmeta> +	  <refentrytitle>unicode_bidi</refentrytitle> +	  <manvolnum>3</manvolnum> +	</refmeta> + +	<refnamediv> +	  <refname>unicode_bidi</refname> +	  <refname>unicode_bidi_calc</refname> +	  <refname>unicode_bidi_reorder</refname> +	  <refname>unicode_bidi_cleanup</refname> +	  <refname>unicode_bidi_extra_cleanup</refname> +	  <refname>unicode_bidi_logical_order</refname> +	  <refname>unicode_bidi_embed</refname> +	  <refname>unicode_bidi_embed_paragraph_level</refname> + +	  <refname>unicode_bidi_type</refname> +	  <refname>unicode_bidi_mirror</refname> +	  <refname>unicode_bidi_bracket_type</refname> + +	  <refpurpose>unicode bi-directional algorithm</refpurpose> +	</refnamediv> + +	<refsynopsisdiv> +	  <funcsynopsis> +	    <funcsynopsisinfo>#include <courier-unicode.h>

unicode_bidi_level_t lr=UNICODE_BIDI_LR;</funcsynopsisinfo> +	    <funcprototype> +	      <funcdef>void <function>unicode_bidi_calc</function></funcdef> +              <paramdef>const char32_t *<parameter>p</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> +              <paramdef>const unicode_bidi_level_t *<parameter>initial_embedding_level</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>void <function>unicode_bidi_reorder</function></funcdef> +              <paramdef>char32_t *<parameter>string</parameter></paramdef> +              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>void (*<parameter>reorder_callback</parameter>)(size_t, size_t, void *)</paramdef> +	      <paramdef>void *<parameter>arg</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>size_t <function>unicode_bidi_cleanup</function></funcdef> +              <paramdef>char32_t *<parameter>string</parameter></paramdef> +              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>void (*<parameter>removed_callback</parameter>)(size_t, size_t, void *)</paramdef> +	      <paramdef>void *<parameter>arg</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>size_t <function>unicode_bidi_extra_cleanup</function></funcdef> +              <paramdef>char32_t *<parameter>string</parameter></paramdef> +              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>void (*<parameter>removed_callback</parameter>)(size_t, size_t, void *)</paramdef> +	      <paramdef>void *<parameter>arg</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>size_t <function>unicode_bidi_logical_order</function></funcdef> +              <paramdef>char32_t *<parameter>string</parameter></paramdef> +              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +              <paramdef>void (*<parameter>reorder_callback</parameter>)(size_t index, size_t n, void *arg)</paramdef> +	      <paramdef>void *<parameter>arg</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>size_t <function>unicode_bidi_embed</function></funcdef> +              <paramdef>const char32_t *<parameter>string</parameter></paramdef> +              <paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +              <paramdef>void (*<parameter>emit</parameter>)(const char32_t *string, size_t n, void *arg)</paramdef> +	      <paramdef>void *<parameter>arg</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>char32_t <function>unicode_bidi_embed_paragraph_level</function></funcdef> +              <paramdef>const char32_t *<parameter>string</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>char32_t <function>bidi_mirror</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>char32_t <function>bidi_bracket_type</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +              <paramdef>unicode_bracket_type_t *<parameter>ret</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>enum_bidi_type_t <function>unicode_bidi_type</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> +	  </funcsynopsis> +	</refsynopsisdiv> +	<refsect1 id="unicode_bidi_descr"> +	  <title>DESCRIPTION</title> + +	  <para> +	    These functions are related to the +	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html"> Unicode Bi-Directional algorithm</ulink>. +	    They implement the algorithm up to and including step L2, +	    and provide additional functionality of returning miscellaneous +	    bi-directional-related metadata of Unicode characters. There's +	    also a basic algorithm that <quote>reverses</quote> the +	    bi-directional algorithm +	    and produces a Unicode string with bi-directional markers that +	    results in the same bi-directional string after reapplying the +	    algorithm. +	  </para> + +	  <refsect2 id="unicode_bidi_calc_reorder"> +	    <title>Calculating bi-directional rendering order</title> + +	    <para> +	      The following process computes the rendering order of +	      characters according to the Unicode Bi-Directional algorithm: +	    </para> + +	    <orderedlist> +	      <listitem> +		<para> +		  Allocate an array of +		  <structname>unicode_bidi_level_t</structname> that's the +		  same size as the Unicode string. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  Use <function>unicode_bidi_calc</function>() to compute +		  the Unicode string's characters' bi-directional embedding +		  level (executes the Bi-Directional algorithm up to and +		  including step L1). This populates the +		  <structname>unicode_bidi_level_t</structname> buffer. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  Use <function>unicode_bidi_reorder</function>() to reverse +		  any characters in the string, according to the +		  algorithm (step L2), with an optional +		  callback that reports which ranges of characters get +		  reversed. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  Use <function>unicode_bidi_cleanup</function>() or +		  <function>unicode_bidi_extra_cleanup</function>(), +		  to remove the characters from the string which are used +		  by the bi-directional algorithm, and are not needed for +		  rendering the text. +		</para> +	      </listitem> +	    </orderedlist> + +	    <para> +	      The parameters to +	      <function>unicode_bidi_calc</function>() are: +	    </para> + +	    <itemizedlist> +	      <listitem> +		<para> +		  A pointer to the Unicode string. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  Number of characters in the Unicode string. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  A pointer to an array of +		  <structname>unicode_bidi_level_t</structname> values. +		  The caller is +		  responsible for allocating and deallocating this array, +		  which has the same size as the Unicode string. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  An optional pointer to a +		  <literal>UNICODE_BIDI_LR</literal> or +		  <literal>UNICODE_BIDI_RL</literal> value. This sets +		  the default paragraph direction level. +		  A null pointer computes the default paragraph direction +		  level based on the string, as specified by the "P" rules +		  of the bi-directional algorithm. +		</para> +	      </listitem> +	    </itemizedlist> + +	    <para> +	      <function>unicode_bidi_calc</function>() fills in the +	      <structname>unicode_bidi_level_t</structname> array with the +	      values corresponding to the embedding level of the +	      corresponding character, +	      according the Unicode Bidirection Algorithm (even values for +	      left-to-right ordering, and odd values for right-to-left +	      ordering). +	      A value of UNICODE_BIDI_SKIP designates directional markers +	      (from step X9). +	    </para> + +	    <para> +	      <function>unicode_bidi_calc</function>() returns the resolved +	      paragraph direction level, which +	      always matches the passed in level, if specified, else it +	      reports the +	      derived one. +	    </para> + +	    <para> +	      <function>unicode_bidi_reorder</function>() takes the actual +	      unicode string together with the embedding values from +	      <function>unicode_bidi_calc</function>, then reverses the +	      bi-directional string, as specified by step L2 of the bi-directional +	      algorithm. +	      The parameters to +	      <function>unicode_bidi_reorder</function>() are: +	    </para> +	    <itemizedlist> +	      <listitem> +		<para> +		  A pointer to the Unicode string. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  A pointer to an array of +		  <structname>unicode_bidi_level_t</structname> values. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  Number of characters in the Unicode string and the +		  <structname>unicode_bidi_level_t</structname> array. +		</para> +	      </listitem> + +	      <listitem> +		<para> +		  An optional <varname>reorder_callback</varname> function +		  pointer. +		</para> +	      </listitem> +	    </itemizedlist> +	    <para> +	      A non-<literal>NULL</literal> +	      <parameter>reorder_callback</parameter> gets invoked to report +	      each reversed character range. The callback's first parameter +	      is the index of the first reversed character, the second parameter +	      is the number of reversed characters, starting at the given +	      index of the Unicode string. +	      The third parameter is the <parameter>arg</parameter> passthrough +	      parameter. +	    </para> + +	    <para> +	      <function>unicode_bidi_reorder</function> modifies its +	      <parameter>string</parameter> and <parameter>levels</parameter>. +	      <parameter>reorder_callback</parameter> gets invoked after +	      reversing each consecutive range of values in the +	      <parameter>string</parameter> and <parameter>levels</parameter> +	      buffers. For example: <quote>reorder_callback(5, 7, arg)</quote> +	      reports that character indexes #5 through #11 got reversed. +	    </para> + +	    <para> +	      A NULL <parameter>string</parameter> pointer leaves the +	      <parameter>levels</parameter> buffer unchanged, but still +	      invokes the <parameter>reorder_callback</parameter> as if +	      the character string, and their embedding values, were reversed. +	    </para> + +	    <para> +	      The resulting string and embedding levels are in +	      <quote>rendering order</quote>, but still contain bi-directional +	      embedding, override, boundary-neutral, isolate, and marker +	      characters. +	      <function>unicode_bidi_cleanup</function>() and +	      <function>unicode_bidi_extra_cleanup</function>() remove these +	      characters and directional markers from the unicode string. +	      <function>unicode_bidi_cleanup</function> removes only the +	      embedding, override, and  boundry-neutral characters (as +	      specified by step X9 of the bi-directional algorithm). +	      <function>unicode_bidi_extra_cleanup</function>() +	      additionally removes the isolation markers, implicit markers; +	      and all characters +	      classified as paragraph separators get replaced by a newline. +            </para> +	    <para> +	      A non-null pointer to the directional embedding level buffer, +	      of the same size as the string, also removes the corresponding +	      values from the buffer, and the remaining values in the +	      embedding level buffer get reset to +	      levels <literal>UNICODE_BIDI_LR</literal> and +	      <literal> UNICODE_BIDI_RL</literal>, only. +            </para> +	    <para> +	      The parameters to <function>unicode_bidi_cleanup</function>() and +	      <function>unicode_bidi_extra_cleanup</function>() are: +            </para> + +	    <itemizedlist> +	      <listitem> +		<para> +		  The pointer to the unicode string. +                </para> +              </listitem> +	      <listitem> +		<para> +		  The pointer to the directional embedding buffer. +                </para> +              </listitem> +	      <listitem> +		<para> +		  The size of the unicode string and the directional embedding +		  buffer. +                </para> +              </listitem> +	      <listitem> +		<para> +		  A pointer to a function that gets repeatedly invoked with the +		  index of the character that gets removed from the Unicode +		  string. +                </para> +              </listitem> +	      <listitem> +		<para> +		  An opaque pointer that gets forwarded to the callback. +                </para> +              </listitem> +            </itemizedlist> +	    <para> +	      The function pointer (if not <literal>NULL</literal>) +	      gets invoked to report the index of each +	      removed character. The reported index is the index from the +	      original string, and the callback gets invoked in strict order, +	      from the first to +	      the last removed character (if any). +            </para> +	    <para> +	      Multiple calls to <function>unicode_bidi_cleanup</function>() or +	      <function>unicode_bidi_extra_cleanup</function>() do no harm; +	      except that <function>unicode_bidi_extra_cleanup</function>() +	      always removes all the additional characters that +	      <function>unicode_bidi_cleanup</function>() does not remove. +            </para> +	    <para> +	      The character string and the embedding level values resulting +	      from <function>unicode_bidi_extra_cleanup</function>() are in +	      <quote>canonical rendering order</quote>. +            </para> +	  </refsect2> + +	  <refsect2 id="unicode_bidi_embed"> +	    <title>Embedding bi-directional markers in Unicode text strings</title> +            <para> +	      <function>unicode_bidi_logical_order</function>() and +	      <function>unicode_bidi_embed</function>() add various +	      bi-directional markers to a Unicode string in canonical rendering +	      order. The resulting string is not guaranteed to be +	      identical to the +	      original Unicode bi-directional string. The algorithm is fairly +	      basic, +	      but the resulting bi-directional string produces the same +	      canonical rendering order after applying +	      <function>unicode_bidi_calc()</function>, +	      <function>unicode_reorder()</function> and +	      <function>unicode_bidi_extra_cleanup()</function>, +	      with the same paragraph_embedding level. +            </para> + +	    <para> +	      <function>unicode_bidi_logical_order</function>() gets called +	      first, followed by +	      <function>unicode_bidi_embed</function>(). +	      Finally, <function>unicode_bidi_embed_paragraph_level</function>() +	      optionally determines whether the resulting string's default +	      paragraph embedding level matches the one used for the actual +	      embedding direction, and if not returns a directional marker +	      to be prepended to the Unicode character string, as a hint. +            </para> +	    <para> +	      <function>unicode_bidi_logical_order</function>() factors in the +	      characters' embedding values, and the provided paragraph +	      embedding value +	      (<literal>UNICODE_BIDI_LR</literal> or +	      <literal>UNICODE_BIDI_RL</literal>), and rearranges the characters +	      and the embedding levels in left-to-right order, while +	      simultaneously +	      invoking the supplied reorder_callback indicating each range of +	      characters whose relative order gets reversed. The +	      <function>reorder_callback</function>() receives, as +	      parameters: +            </para> +	    <itemizedlist> +	      <listitem> +		<para> +		  The starting index of the first reversed character, in the +		  string. +                </para> +              </listitem> +	      <listitem> +		<para> +		  Number of reversed characters. +                </para> +              </listitem> +	      <listitem> +		<para> +		  Forwarded <parameter>arg</parameter> pointer value. +                </para> +              </listitem> +            </itemizedlist> +	    <para> +	      This specifies a consecutive range of characters (and +	      directional  embedding values) +	      that get reversed (first character in the range becomes the +	      last character, +	      and the last character becomes the first character). +            </para> + +	    <para> +	      After +	      <function>unicode_bidi_logical_order</function>(), +	      <function>unicode_bidi_embed</function>() progressively invokes +	      the passed-in callback with +	      the contents of a bi-directional unicode string. +	      The parameters to <function>unicode_bidi_embed</function>() are: +            </para> +            <itemizedlist> +	      <listitem> +		<para> +		  The Unicode string, and … +                </para> +              </listitem> +	      <listitem> +		<para> +		  … the directional embedding buffer, in canonical +		  rendering order. +                </para> +              </listitem> +	      <listitem> +		<para> +		  The size of the string and the embedding level buffer. +                </para> +              </listitem> +	      <listitem> +		<para> +		  The paragraph embedding level, either +		  <literal>UNICODE_BIDI_LR</literal> or +		  <literal>UNICODE_BIDI_RL</literal>. +                </para> +              </listitem> +	      <listitem> +		<para> +		  The pointer to the callback function. +                </para> +              </listitem> +	      <listitem> +		<para> +		  An opaque pointer argument that gets forwarded to the +		  callback function. +                </para> +              </listitem> +            </itemizedlist> +	    <para> +	      The callback receives pointers to +	      various parts of the original string that gets passed to +	      <function>unicode_bidi_embed</function>(), intermixed with +	      bi-directional markers, +	      overrides, and isolates. The callback's parameters are: +            </para> + +            <itemizedlist> +	      <listitem> +		<para> +		  The pointer to a Unicode string. +                </para> +		<note> +		  <para> +		    It is not a given that the callback receives pointers +		    to progressively increasing pointers of the original +		    string that gets passed to +		    <function>unicode_bidi_embed</function>(). +		    Some calls will be for individual bi-directional +		    markers, and +		    <function>unicode_bidi_embed</function>() also +		    performs some additional internal reordering, on the fly, +		    after <function>unicode_bidi_logical_order</function>()'s +		    big hammer. +                  </para> +                </note> +              </listitem> +	      <listitem> +		<para> +		  Number of characters in the Unicode string. +                </para> +              </listitem> +	      <listitem> +		<para> +		  Forwarded <parameter>arg</parameter> pointer value. +                </para> +              </listitem> +            </itemizedlist> + +	    <para> +	      The assembled unicode string should produce the same +	      canonical rendering order, for the same paragraph embedding +	      level. +	      <function>unicode_bidi_embed_paragraph_level</function>() +	      checks if the specified Unicode string computes the given +	      default paragraph embedding level and returns 0 if it matches. +	      Otherwise it returns a directional marker that should be +	      <emphasis>prepended</emphasis> to the Unicode string to allow +	      <function>unicode_bidi_calc</function>'s optional paragraph +	      embedding level pointer's value to be <literal>NULL</literal>, +	      but derive the same default embedding level. +	      The parameters to +	      <function>unicode_bidi_embed_paragraph_level</function>() are: +            </para> +            <itemizedlist> +	      <listitem> +		<para> +		  The Unicode string. +                </para> +              </listitem> +	      <listitem> +		<para> +		  The size of the string. +                </para> +              </listitem> +	      <listitem> +		<para> +		  The paragraph embedding level, either +		  <literal>UNICODE_BIDI_LR</literal> or +		  <literal>UNICODE_BIDI_RL</literal>. +                </para> +              </listitem> +	    </itemizedlist> +          </refsect2> +	  <refsect2 id="unicode_bidi_misc"> +	    <title>Miscellaneous utility functions</title> + +	    <para> +	      <function>unicode_bidi_type</function> +	      looks up each character's bi-directional character type. +	    </para> +	    <para> +	      <function>unicode_bidi_mirror</function> +	      returns the glyph that's a mirror image of the parameter +	      (i.e. an open parenthesis for a close parenthesis, and vice +	      versa); or the same value if there is no mirror image +	      (this is the <literal>Bidi_Mirrored=Yes</literal> property). +	    </para> + +	    <para> +	      <function>unicode_bidi_bracket_type</function> +	      looks up each bracket character and returns its opposite, or +	      the same value if the character is not a bracket that has an +	      opposing bracket character +	      (this is the <literal>Bidi_Paired_Bracket_type</literal> +	      property). +	      A non-NULL <parameter>ret</parameter> gets initialized to +	      either <literal>UNICODE_BIDI_o</literal>, +	      <literal>UNICODE_BIDI_c</literal> or +	      <literal>UNICODE_BIDI_n</literal>. +	    </para> +	  </refsect2> +	</refsect1> +	<refsect1 id="courier_unicode_bidi_seealso"> +	  <title>SEE ALSO</title> +	  <para> +	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html">TR-9</ulink>, +	    <link linkend="unicode__bidi"> +	      <citerefentry><refentrytitle>unicode::bidi</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>, +	    <link linkend="courier-unicode"> +	      <citerefentry> +		<refentrytitle>courier-unicode</refentrytitle> +		<manvolnum>7</manvolnum></citerefentry></link>, +	  </para> +	</refsect1> +      </refentry> + +      <refentry id="unicode_canonical"> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo> + +	<refmeta> +	  <refentrytitle>unicode_canonical</refentrytitle> +	  <manvolnum>3</manvolnum> +	</refmeta> + +	<refnamediv> +	  <refname>unicode_canonical</refname> + +	  <refpurpose>unicode canonical character mapping</refpurpose> +	</refnamediv> + +	<refsynopsisdiv> +	  <funcsynopsis> +	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> +	    <funcprototype> +	      <funcdef>unicode_canonical_t <function>unicode_canonical</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> +	  </funcsynopsis> +	</refsynopsisdiv> +	<refsect1 id="unicode_canonical_descr"> +	  <title>DESCRIPTION</title> + +	  <para> +	    <function>unicode_canonical</function>() looks up the +	    character's +	    <ulink url="https://www.unicode.org/reports/tr15/tr15-&tr15ver;.html +">canonical +	    and compatibility mapping</ulink>. + +	    <function>unicode_canonical</function>() returns a structure +	    with the following fields: +	  </para> + +	  <variablelist> +	    <varlistentry> +	      <term><structfield>canonical_chars</structfield></term> +	      <listitem> +		<para> +		  A pointer to the canonical or equivalent representation +		  of the character. +	        </para> +	      </listitem> +	    </varlistentry> +	    <varlistentry> +	      <term><structfield>n_canonical_chars</structfield></term> +	      <listitem> +		<para> +		  Number of characters in the +		  <structfield>canonical_chars</structfield>. +	        </para> +	      </listitem> +	    </varlistentry> +	    <varlistentry> +	      <term><structfield>format</structfield></term> +	      <listitem> +		<para> +		  The character's canonical formatting flag, if any. +	        </para> +	      </listitem> +	    </varlistentry> +	  </variablelist> + +	  <para> +	    A NULL <structfield>canonical_chars</structfield> (with a 0 +	    <structfield>n_canonical_chars</structfield>) indicates +	    that the character without a canonical or compatibility +	    equivalence. +	  </para> +	</refsect1> +	<refsect1 id="unicode_canonical_seealso"> +	  <title>SEE ALSO</title> +	  <para> +	    <ulink url="https://www.unicode.org/reports/tr15/tr15-&tr15ver;.html">TR-15</ulink>, +	    <link linkend="courier-unicode"> +	      <citerefentry> +		<refentrytitle>courier-unicode</refentrytitle> +		<manvolnum>7</manvolnum></citerefentry></link>. +	    </para> +	</refsect1> +      </refentry> + +      <refentry id="unicode_category_lookup"> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo> + +	<refmeta> +	  <refentrytitle>unicode_category_lookup</refentrytitle> +	  <manvolnum>3</manvolnum> +	</refmeta> + +	<refnamediv> +	  <refname>unicode_category_lookup</refname> +	  <refname>unicode_isalnum</refname> +	  <refname>unicode_isalpha</refname> +	  <refname>unicode_isblank</refname> +	  <refname>unicode_isdigit</refname> +	  <refname>unicode_isgraph</refname> +	  <refname>unicode_islower</refname> +	  <refname>unicode_ispunct</refname> +	  <refname>unicode_isspace</refname> +	  <refname>unicode_isupper</refname> + +	  <refpurpose>unicode character categorization</refpurpose> +	</refnamediv> + +	<refsynopsisdiv> +	  <funcsynopsis> +	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> +	    <funcprototype> +	      <funcdef>uint32_t <function>unicode_category_lookup</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_isalnum</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_isalpha</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_isblank</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_isdigit</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_isgraph</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_islower</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_ispunct</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_isspace</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_isupper</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> +	  </funcsynopsis> +	</refsynopsisdiv> +	<refsect1 id="unicode_category_descr"> +	  <title>DESCRIPTION</title> + +	  <para> +	    <function>unicode_category_lookup</function>() looks up the +	    <ulink url="https://unicode.org/notes/tn36/">unicode character's +	    categorization</ulink>. +	    <function>unicode_category_lookup</function>() returns a 32 bit +	    value. +	    The value's +	    <symbol>UNICODE_CATEGORY_1</symbol> bits specify the first level +	    of the unicode character's category, with +	    <symbol>UNICODE_CATEGORY_2</symbol>, +	    <symbol>UNICODE_CATEGORY_3</symbol>, and +	    <symbol>UNICODE_CATEGORY_4</symbol> bits specifying the 2nd, +	    3rd, and 4th level, if given. A value of 0 for each corresponding +	    bit set indicates that no category is specified for this level, +	    for this character; otherwise the possible values are defined +	    in <filename><courier-unicode.h></filename>. +	  </para> + +	  <para> +	    The remaining functions implement comparable equivalents of +	    their non-unicode versions in the standard C library, as follows: +	  </para> + +	  <variablelist> +	    <varlistentry> +              <term><function>unicode_isalnum</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for all +		  <function>unicode_isalpha</function>() or +		  <function>unicode_isdigit</function>(). +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_isalpha</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for all +		  <symbol>UNICODE_CATEGORY_1_LETTER</symbol>. +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_isblank</function>()</term> +	      <listitem> +		<para> +		  Return non-0 for +		  <symbol>TAB</symbol>, and all +		  <symbol>UNICODE_CATEGORY_2_SPACE</symbol>. +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_isdigit</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for all +		  <symbol>UNICODE_CATEGORY_1_NUMBER</symbol> +		  | <symbol>UNICODE_CATEGORY_2_DIGIT</symbol>, +		  only (no third categories). +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_isgraph</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for all codepoints above +		  <symbol>SPACE</symbol> which are not +		  <function>unicode_isspace</function>(). +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_islower</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for all +		  <function>unicode_isalpha</function>() for which the +		  character is +		  equal to +		  <link linkend="unicode_uc"> +		    <citerefentry><refentrytitle>unicode_lc</refentrytitle> +		  <manvolnum>3</manvolnum></citerefentry></link> +		  of itself. +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_ispunct</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for all +		  <symbol>UNICODE_CATEGORY_1_PUNCTUATION</symbol>. +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_isspace</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for unicode_isblank() or +		  for unicode characters +		  with linebreaking properties of +		  <symbol>BK</symbol>, +		  <symbol>CR</symbol>, +		  <symbol>LF</symbol>, +		  <symbol>NL</symbol>, +		  and +		  <symbol>SP</symbol>. +		</para> +	      </listitem> +	    </varlistentry> + +	    <varlistentry> +              <term><function>unicode_isupper</function>()</term> +	      <listitem> +		<para> +		  Returns non-0 for all +		  <function>unicode_isalpha</function>() for which the +		  character is +		  equal to +		  <link linkend="unicode_uc"> +		    <citerefentry><refentrytitle>unicode_uc</refentrytitle> +		  <manvolnum>3</manvolnum></citerefentry></link> +		  of itself. +		</para> +	      </listitem> +	    </varlistentry> +	  </variablelist> +	</refsect1> +	<refsect1 id="unicode_category_seealso"> +	  <title>SEE ALSO</title> +	  <para> +	    <link linkend="courier-unicode"> +	      <citerefentry> +		<refentrytitle>courier-unicode</refentrytitle> +		<manvolnum>7</manvolnum></citerefentry></link>, +	    <link linkend="unicode_uc"> +	      <citerefentry><refentrytitle>unicode_convert_tocase</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>. +	  </para> +	</refsect1> +      </refentry> +        <refentry id="unicode_convert"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode_convert</refentrytitle> @@ -425,7 +1362,7 @@ See COPYING for distribution information.  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_convert_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -444,7 +1381,7 @@ See COPYING for distribution information.  	    <function>unicode_convert_init</function>(),  	    <function>unicode_convert</function>(), and  	    <function>unicode_convert_deinit</function>() are an adaption of th -	    <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html"> +	    <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">  	      <citerefentry><refentrytitle>iconv</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></ulink> API that uses the same  	      calling convention as the other algorithms in this unicode library, @@ -515,7 +1452,7 @@ See COPYING for distribution information.  	  </para> -	  <refsect2> +	  <refsect2 id="unicode_convert_collect">  	    <title>Collecting converted text into a buffer</title>  	    <para> @@ -572,7 +1509,7 @@ See COPYING for distribution information.  	    </para>  	  </refsect2> -	  <refsect2> +	  <refsect2 id="unicode_convert_chset_unicode">  	    <title>Converting between character sets and unicode</title>  	    <para> @@ -601,7 +1538,7 @@ See COPYING for distribution information.  	    </para>  	  </refsect2> -	  <refsect2> +	  <refsect2 id="unicode_convert_oneshot">  	    <title>One-shot conversions</title>  	    <para> @@ -650,7 +1587,7 @@ See COPYING for distribution information.  	    </para>  	  </refsect2>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_convert_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -668,7 +1605,7 @@ See COPYING for distribution information.        </refentry>        <refentry id="unicode_default_chset"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode_default_chset</refentrytitle> @@ -695,7 +1632,7 @@ See COPYING for distribution information.  	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_default_chset_descr">  	  <title>DESCRIPTION</title>  	  <para>  	    <function>unicode_default_chset</function>() returns the name of the @@ -706,7 +1643,7 @@ See COPYING for distribution information.  	    current application locale's character set.  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_default_chset_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -720,286 +1657,154 @@ See COPYING for distribution information.  	</refsect1>        </refentry> -      <refentry id="unicode_html40ent_lookup"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +      <refentry id="unicode_emoji_lookup"> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta> -	  <refentrytitle>unicode_html40ent_lookup</refentrytitle> +	  <refentrytitle>unicode_emoji_lookup</refentrytitle>  	  <manvolnum>3</manvolnum>  	</refmeta>  	<refnamediv> -	  <refname>unicode_html40ent_lookup</refname> -	  <refpurpose>look up unicode character for an HTML 4.0 entity</refpurpose> +	  <refname>unicode_emoji_lookup</refname> +          <refname>unicode_emoji</refname> +          <refname>unicode_emoji_presentation</refname> +	  <refname>unicode_emoji_modifier</refname> +	  <refname>unicode_emoji_modifier_base</refname> +	  <refname>unicode_emoji_component</refname> +	  <refname>unicode_emoji_extended_pictographic</refname> + +	  <refpurpose>look up unicode character's Unicode Emoji Classification</refpurpose>  	</refnamediv>  	<refsynopsisdiv>  	  <funcsynopsis>  	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo>  	    <funcprototype> -              <funcdef>char32_t <function>unicode_html40ent_lookup</function></funcdef> -              <paramdef>const char *<parameter>entity</parameter></paramdef> +              <funcdef>unicode_emoji_t <function>unicode_emoji_lookup</function></funcdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype>  	  </funcsynopsis> -	</refsynopsisdiv> -	<refsect1> -	  <title>DESCRIPTION</title> -	  <para> -	    <function>unicode_html40ent_lookup</function>() returns the -	    unicode character represented by an HTML 4.0 entity. The -	    <parameter>entity</parameter> is a string, such as -	    <quote>quot</quote>, in which case -	    <function>unicode_html40ent_lookup</function>() returns 34. -	  </para> - -	  <para> -	    Additionally, -	    <function>unicode_html40ent_lookup</function>() parses -	    a numerical entity given as -	    <quote>#<replaceable>decimal</replaceable></quote> or -	    <quote>#x<replaceable>hex</replaceable></quote>. -	  </para> -	  <para> -	    <function>unicode_html40ent_lookup</function>() returns 0 if the -	    <parameter>entity</parameter> is not a known entity that represents -	    a single unicode character. -	  </para> -	</refsect1> -	<refsect1> -	  <title>SEE ALSO</title> -	  <para> -	    <link linkend="courier-unicode"> -	      <citerefentry> -		<refentrytitle>courier-unicode</refentrytitle> -		<manvolnum>7</manvolnum></citerefentry></link>, -	    <link linkend="unicode_uc"> -	      <citerefentry><refentrytitle>unicode_convert_tocase</refentrytitle> -	      <manvolnum>3</manvolnum></citerefentry></link>. -	  </para> -	</refsect1> -      </refentry> - -      <refentry id="unicode_category_lookup"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> - -	<refmeta> -	  <refentrytitle>unicode_category_lookup</refentrytitle> -	  <manvolnum>3</manvolnum> -	</refmeta> - -	<refnamediv> -	  <refname>unicode_category_lookup</refname> -	  <refname>unicode_isalnum</refname> -	  <refname>unicode_isalpha</refname> -	  <refname>unicode_isblank</refname> -	  <refname>unicode_isdigit</refname> -	  <refname>unicode_isgraph</refname> -	  <refname>unicode_islower</refname> -	  <refname>unicode_ispunct</refname> -	  <refname>unicode_isspace</refname> -	  <refname>unicode_isupper</refname> - -	  <refpurpose>unicode character categorization</refpurpose> -	</refnamediv> - -	<refsynopsisdiv>  	  <funcsynopsis> -	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo>  	    <funcprototype> -	      <funcdef>uint32_t <function>unicode_category_lookup</function></funcdef> +              <funcdef>int <function>unicode_emoji</function></funcdef>                <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype> +	  </funcsynopsis> +	  <funcsynopsis>  	    <funcprototype> -              <funcdef>int <function>unicode_isalnum</function></funcdef> +              <funcdef>int <function>unicode_emoji_presentation</function></funcdef>                <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype> +	  </funcsynopsis> +	  <funcsynopsis>  	    <funcprototype> -              <funcdef>int <function>unicode_isalpha</function></funcdef> +              <funcdef>int <function>unicode_emoji_modifier</function></funcdef>                <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype> +	  </funcsynopsis> +	  <funcsynopsis>  	    <funcprototype> -              <funcdef>int <function>unicode_isblank</function></funcdef> +              <funcdef>int <function>unicode_emoji_modifier_base</function></funcdef>                <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype> +	  </funcsynopsis> +	  <funcsynopsis>  	    <funcprototype> -              <funcdef>int <function>unicode_isdigit</function></funcdef> +              <funcdef>int <function>unicode_emoji_component</function></funcdef>                <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype> +	  </funcsynopsis> +	  <funcsynopsis>  	    <funcprototype> -              <funcdef>int <function>unicode_isgraph</function></funcdef> +              <funcdef>int <function>unicode_emoji_extended_pictographic</function></funcdef>                <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype> +	  </funcsynopsis> +	</refsynopsisdiv> +	<refsect1 id="unicode_emoji_descr"> +	  <title>DESCRIPTION</title> +	  <para> +	    <function>unicode_emoji_lookup</function>() returns the +	    unicode emoji properties of the specified character, as a bitmask +	    of <literal>UNICODE_EMOJI</literal> flags, as defined in the +	    header file. +	    <function>unicode_emoji</function>(), +	    <function>unicode_emoji_presentation</function>(), +	    <function>unicode_emoji_modifier</function>(), +	    <function>unicode_emoji_modifier_base</function>(), +	    <function>unicode_emoji_component</function>(), and +	    <function>unicode_emoji_extended_pictographic</function>() +	    check whether the given character carries a specific emoji +	    property. They return 0 if not, and non-0 if the specified +	    character has the corresponding property. +	  </para> +        </refsect1> +	<refsect1 id="unicode_emoji_seealso"> +	  <title>SEE ALSO</title> +	  <para> +	    <ulink url="https://www.unicode.org/reports/tr51/tr51-&tr51ver;.html">TR-51</ulink>, +	    <link linkend="courier-unicode"> +	      <citerefentry> +		<refentrytitle>courier-unicode</refentrytitle> +		<manvolnum>7</manvolnum></citerefentry></link>. +	  </para> +	</refsect1> +      </refentry> -	    <funcprototype> -              <funcdef>int <function>unicode_islower</function></funcdef> -              <paramdef>char32_t <parameter>c</parameter></paramdef> -	    </funcprototype> +      <refentry id="unicode_html40ent_lookup"> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo> -	    <funcprototype> -              <funcdef>int <function>unicode_ispunct</function></funcdef> -              <paramdef>char32_t <parameter>c</parameter></paramdef> -	    </funcprototype> +	<refmeta> +	  <refentrytitle>unicode_html40ent_lookup</refentrytitle> +	  <manvolnum>3</manvolnum> +	</refmeta> -	    <funcprototype> -              <funcdef>int <function>unicode_isspace</function></funcdef> -              <paramdef>char32_t <parameter>c</parameter></paramdef> -	    </funcprototype> +	<refnamediv> +	  <refname>unicode_html40ent_lookup</refname> +	  <refpurpose>look up unicode character for an HTML 4.0 entity</refpurpose> +	</refnamediv> +	<refsynopsisdiv> +	  <funcsynopsis> +	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo>  	    <funcprototype> -              <funcdef>int <function>unicode_isupper</function></funcdef> -              <paramdef>char32_t <parameter>c</parameter></paramdef> +              <funcdef>char32_t <function>unicode_html40ent_lookup</function></funcdef> +              <paramdef>const char *<parameter>entity</parameter></paramdef>  	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_html40_descr">  	  <title>DESCRIPTION</title> -  	  <para> -	    <function>unicode_category_lookup</function>() looks up the -	    <ulink url="http://unicode.org/notes/tn36/">unicode character's -	    categorization</ulink>. -	    <function>unicode_category_lookup</function>() returns a 32 bit -	    value. -	    The value's -	    <symbol>UNICODE_CATEGORY_1</symbol> bits specify the first level -	    of the unicode character's category, with -	    <symbol>UNICODE_CATEGORY_2</symbol>, -	    <symbol>UNICODE_CATEGORY_3</symbol>, and -	    <symbol>UNICODE_CATEGORY_4</symbol> bits specifying the 2nd, -	    3rd, and 4th level, if given. A value of 0 for each corresponding -	    bit set indicates that no category is specified for this level, -	    for this character; otherwise the possible values are defined -	    in <filename><courier-unicode.h></filename>. +	    <function>unicode_html40ent_lookup</function>() returns the +	    unicode character represented by an HTML 4.0 entity. The +	    <parameter>entity</parameter> is a string, such as +	    <quote>quot</quote>, in which case +	    <function>unicode_html40ent_lookup</function>() returns 34.  	  </para>  	  <para> -	    The remaining functions implement comparable equivalents of -	    their non-unicode versions in the standard C library, as follows: +	    Additionally, +	    <function>unicode_html40ent_lookup</function>() parses +	    a numerical entity given as +	    <quote>#<replaceable>decimal</replaceable></quote> or +	    <quote>#x<replaceable>hex</replaceable></quote>.  	  </para> -	  <variablelist> -	    <varlistentry> -              <term><function>unicode_isalnum</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for all -		  <function>unicode_isalpha</function>() or -		  <function>unicode_isdigit</function>(). -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_isalpha</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for all -		  <symbol>UNICODE_CATEGORY_1_LETTER</symbol>. -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_isblank</function>()</term> -	      <listitem> -		<para> -		  Return non-0 for -		  <symbol>TAB</symbol>, and all -		  <symbol>UNICODE_CATEGORY_2_SPACE</symbol>. -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_isdigit</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for all -		  <symbol>UNICODE_CATEGORY_1_NUMBER</symbol> -		  | <symbol>UNICODE_CATEGORY_2_DIGIT</symbol>, -		  only (no third categories). -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_isgraph</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for all codepoints above -		  <symbol>SPACE</symbol> which are not -		  <function>unicode_isspace</function>(). -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_islower</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for all -		  <function>unicode_isalpha</function>() for which the -		  character is -		  equal to -		  <link linkend="unicode_uc"> -		    <citerefentry><refentrytitle>unicode_lc</refentrytitle> -		  <manvolnum>3</manvolnum></citerefentry></link> -		  of itself. -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_ispunct</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for all -		  <symbol>UNICODE_CATEGORY_1_PUNCTUATION</symbol>. -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_isspace</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for unicode_isblank() or -		  for unicode characters -		  with linebreaking properties of -		  <symbol>BK</symbol>, -		  <symbol>CR</symbol>, -		  <symbol>LF</symbol>, -		  <symbol>NL</symbol>, -		  and -		  <symbol>SP</symbol>. -		</para> -	      </listitem> -	    </varlistentry> - -	    <varlistentry> -              <term><function>unicode_isupper</function>()</term> -	      <listitem> -		<para> -		  Returns non-0 for all -		  <function>unicode_isalpha</function>() for which the -		  character is -		  equal to -		  <link linkend="unicode_uc"> -		    <citerefentry><refentrytitle>unicode_uc</refentrytitle> -		  <manvolnum>3</manvolnum></citerefentry></link> -		  of itself. -		</para> -	      </listitem> -	    </varlistentry> -	  </variablelist> +	  <para> +	    <function>unicode_html40ent_lookup</function>() returns 0 if the +	    <parameter>entity</parameter> is not a known entity that represents +	    a single unicode character. +	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_html40_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -1014,7 +1819,7 @@ See COPYING for distribution information.        </refentry>        <refentry id="unicode_grapheme_break"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode_grapheme_break</refentrytitle>  	  <manvolnum>3</manvolnum> @@ -1022,12 +1827,32 @@ See COPYING for distribution information.  	<refnamediv>  	  <refname>unicode_grapheme_break</refname> +	  <refname>unicode_grapheme_break_init</refname> +	  <refname>unicode_grapheme_break_next</refname> +	  <refname>unicode_grapheme_break_deinit</refname>  	  <refpurpose>unicode grapheme cluster boundary rules</refpurpose>  	</refnamediv>  	<refsynopsisdiv>  	  <funcsynopsis>  	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> + +	    <funcprototype> +              <funcdef>unicode_grapheme_break_info_t <function>unicode_grapheme_break_init</function></funcdef> +	      <void /> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode_grapheme_next</function></funcdef> +              <paramdef>unicode_grapheme_break_info_t <parameter>handle</parameter></paramdef> +              <paramdef>char32_t <parameter>c</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>void <function>unicode_grapheme_deinit</function></funcdef> +              <paramdef>unicode_grapheme_break_info_t <parameter>handle</parameter></paramdef> +	    </funcprototype> +  	    <funcprototype>                <funcdef>int <function>unicode_grapheme_break</function></funcdef>                <paramdef>char32_t <parameter>a</parameter></paramdef> @@ -1035,21 +1860,52 @@ See COPYING for distribution information.  	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_grapheme_descr">  	  <title>DESCRIPTION</title> + +	  <para> +	    These functions implement the unicode grapheme cluster breaking +	    algorithm. Invoke +	    <function>unicode_grapheme_break_init</function>() to initialize +	    the grapheme cluster breaking algorithm. +	    <function>unicode_grapheme_break_init</function>() returns an +	    opaque handle. Each subsequent call to +	    <function>unicode_grapheme_break_next</function>() passes this +	    handle, and the next character. +	    <function>unicode_grapheme_break_next</function>() returns a non-0 +	    value if there's a grapheme break before the character, in a +	    sequence of Unicode characters. +	    <function>unicode_grapheme_break_deinit</function>() releases +	    all reosurces used by the grapheme breaking handle, and the +	    <classname>unicode_grapheme_break_info_t</classname> handle +	    is no longer valid after this call. +	  </para> +	  <para> +	    The first call to <function>unicode_grapheme_break_next</function>() +	    always returns non-0, as per the GB1 rule. +	  </para>  	  <para> -	    <function>unicode_grapheme_break</function>() returns non-zero -	    if there is a grapheme break between the two unicode characters +	    <function>unicode_grapheme_break</function>() is a simplified +	    interface that returns non-zero +	    if there is a grapheme break between two unicode characters  	    <parameter>a</parameter> and  	    <parameter>b</parameter>. +	    This is is equivalent to calling +	    <function>unicode_grapheme_break_init</function>(), +	    followed by two calls to +	    <function> unicode_grapheme_break_next</function>(), and finally +	    <function>unicode_grapheme_break_deinit</function>(), then +	    returning +	    the result of the second +	    call to <function>unicode_grapheme_break_next</function>().  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_grapheme_seealso">  	  <title>SEE ALSO</title>  	  <para> -	    <ulink url="http://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>, +	    <ulink url="https://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>,  	    <link linkend="courier-unicode">  	      <citerefentry>  		<refentrytitle>courier-unicode</refentrytitle> @@ -1067,60 +1923,15 @@ See COPYING for distribution information.  	</refsect1>        </refentry> -      <refentry id="unicode_script"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> -	<refmeta> -	  <refentrytitle>unicode_script</refentrytitle> -	  <manvolnum>3</manvolnum> -	</refmeta> - -	<refnamediv> -	  <refname>unicode_script</refname> -	  <refpurpose>unicode script property</refpurpose> -	</refnamediv> - -	<refsynopsisdiv> -	  <funcsynopsis> -	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> -	    <funcprototype> -              <funcdef>unicode_script_t <function>unicode_script</function></funcdef> -              <paramdef>char32_t <parameter>ch</parameter></paramdef> -	    </funcprototype> -	  </funcsynopsis> -	</refsynopsisdiv> -	<refsect1> -	  <title>DESCRIPTION</title> -	  <para> -	    <function>unicode_script</function>() looks up the -	    <quote>script</quote> property of the specified unicode character, -	    and returns it. The <classname>unicode_script_t</classname> -	    enumeration encodes possible unicode script values. -	    <literal>unicode_script_unknown</literal> gets returned for a -	    unicode character  with an unknown script property. -	  </para> -	</refsect1> - -	<refsect1> -	  <title>SEE ALSO</title> - -	  <para> -	    <ulink url="http://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">TR-24</ulink>, -	    <link linkend="courier-unicode"> -	      <citerefentry> -		<refentrytitle>courier-unicode</refentrytitle> -		<manvolnum>7</manvolnum></citerefentry></link>. -	  </para> -	</refsect1> -      </refentry> -        <refentry id="unicode_line_break"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode_line_break</refentrytitle>  	  <manvolnum>3</manvolnum>  	</refmeta>  	<refnamediv> +	  <refname>unicode_line_break</refname>  	  <refname>unicode_lb_init</refname>  	  <refname>unicode_lb_set_opts</refname>  	  <refname>unicode_lb_next</refname> @@ -1201,7 +2012,7 @@ See COPYING for distribution information.  	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_lb_descr">  	  <title>DESCRIPTION</title>  	  <para>  	    These functions implement the unicode line breaking algorithm. @@ -1331,7 +2142,7 @@ See COPYING for distribution information.  	    line breaking handle is no longer valid.  	  </para> -	  <refsect2> +	  <refsect2 id="unicode_lb_altcallback">  	    <title>Alternative callback function</title>  	    <para> @@ -1346,7 +2157,7 @@ See COPYING for distribution information.  	    </para>  	  </refsect2> -	  <refsect2> +	  <refsect2 id="unicode_lb_altcallback_opt">  	    <title>Options</title>  	    <para> @@ -1423,7 +2234,7 @@ See COPYING for distribution information.  	  </refsect2>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_lb_seealso">  	  <title>SEE ALSO</title>  	  <para> @@ -1434,13 +2245,59 @@ See COPYING for distribution information.  	    <link linkend="unicode__linebreak">  	      <citerefentry><refentrytitle>unicode::linebreak</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, -	    <ulink url="http://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">TR-14</ulink> +	    <ulink url="https://www.unicode.org/reports/tr14/tr14-&tr14ver;.html">TR-14</ulink> +	  </para> +	</refsect1> +      </refentry> + +      <refentry id="unicode_script"> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo> +	<refmeta> +	  <refentrytitle>unicode_script</refentrytitle> +	  <manvolnum>3</manvolnum> +	</refmeta> + +	<refnamediv> +	  <refname>unicode_script</refname> +	  <refpurpose>unicode script property</refpurpose> +	</refnamediv> + +	<refsynopsisdiv> +	  <funcsynopsis> +	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> +	    <funcprototype> +              <funcdef>unicode_script_t <function>unicode_script</function></funcdef> +              <paramdef>char32_t <parameter>ch</parameter></paramdef> +	    </funcprototype> +	  </funcsynopsis> +	</refsynopsisdiv> +	<refsect1 id="unicode_script_descr"> +	  <title>DESCRIPTION</title> +	  <para> +	    <function>unicode_script</function>() looks up the +	    <quote>script</quote> property of the specified unicode character, +	    and returns it. The <classname>unicode_script_t</classname> +	    enumeration encodes possible unicode script values. +	    <literal>unicode_script_unknown</literal> gets returned for a +	    unicode character  with an unknown script property. +	  </para> +	</refsect1> + +	<refsect1 id="unicode_script_seealso"> +	  <title>SEE ALSO</title> + +	  <para> +	    <ulink url="https://www.unicode.org/reports/tr24/tr24-&tr24ver;.html">TR-24</ulink>, +	    <link linkend="courier-unicode"> +	      <citerefentry> +		<refentrytitle>courier-unicode</refentrytitle> +		<manvolnum>7</manvolnum></citerefentry></link>.  	  </para>  	</refsect1>        </refentry>        <refentry id="unicode_word_break"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode_word_break</refentrytitle>  	  <manvolnum>3</manvolnum> @@ -1504,7 +2361,7 @@ See COPYING for distribution information.  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_wb_descr">  	  <title>DESCRIPTION</title>  	  <para>  	    These functions implement the unicode word breaking algorithm. @@ -1601,7 +2458,7 @@ See COPYING for distribution information.  	    line breaking handle is no longer valid.  	  </para> -	  <refsect2> +	  <refsect2 id="unicode_wb_scan">  	    <title>Word scan</title>  	    <para> @@ -1630,10 +2487,10 @@ See COPYING for distribution information.  	  </refsect2>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_wb_seealso">  	  <title>SEE ALSO</title>  	  <para> -	    <ulink url="http://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>, +	    <ulink url="https://www.unicode.org/reports/tr29/tr29-&tr29ver;.html">TR-29</ulink>,  	    <link linkend="courier-unicode">  	      <citerefentry>  		<refentrytitle>courier-unicode</refentrytitle> @@ -1655,7 +2512,7 @@ See COPYING for distribution information.        </refentry>        <refentry id="unicode_uc"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode_uc</refentrytitle>  	  <manvolnum>3</manvolnum> @@ -1699,7 +2556,7 @@ See COPYING for distribution information.  	    </funcprototype>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_uc_descr">  	  <title>DESCRIPTION</title>  	  <para>  	    <function>unicode_uc</function>(), @@ -1729,7 +2586,7 @@ See COPYING for distribution information.  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_uc_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -1767,8 +2624,189 @@ See COPYING for distribution information.      <section id="manpagescpp">        <title>C++ manual pages</title> +      <refentry id="unicode__bidi"> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo> + +	<refmeta> +	  <refentrytitle>unicode::bidi::calc</refentrytitle> +	  <manvolnum>3</manvolnum> +	</refmeta> + +	<refnamediv> +	  <refname>unicode::bidi_calc</refname> +	  <refname>unicode::bidi_reorder</refname> +	  <refname>unicode::bidi_cleanup</refname> +	  <refname>unicode::bidi_extra_cleanup</refname> +	  <refname>unicode::bidi_logical_order</refname> +	  <refname>unicode::bidi_embed</refname> +	  <refname>unicode::bidi_embed_paragraph_level</refname> +	  <refpurpose>unicode bi-directional algorithm</refpurpose> +	</refnamediv> + +	<refsynopsisdiv> +	  <funcsynopsis> +	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo> +	    <funcprototype> +              <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> <function>unicode::bidi_calc</function></funcdef> +	      <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> <function>unicode::bidi_calc</function></funcdef> +	      <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>unicode_bidi_level_t <parameter>embedding_level</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode::bidi_reorder</function></funcdef> +	      <paramdef>std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>std::vector<unicode_bidi_level_t> &<parameter>embedding_level</parameter></paramdef> +	      <paramdef>const std::function<void (size_t, size_t) noexcept> &<parameter>reorder_callback</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>void <function>unicode::bidi_reorder</function></funcdef> +	      <paramdef>std::vector<unicode_bidi_level_t> &<parameter>embedding_level</parameter></paramdef> +	      <paramdef>const std::function<void (size_t, size_t) noexcept> &<parameter>reorder_callback</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +              <funcdef>void <function>unicode::bidi_cleanup</function></funcdef> +	      <paramdef>std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode::bidi_cleanup</function></funcdef> +	      <paramdef>std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> +	      <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +              <funcdef>void <function>unicode::bidi_extra_cleanup</function></funcdef> +	      <paramdef>std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode::bidi_extra_cleanup</function></funcdef> +	      <paramdef>std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> +	      <paramdef>const std::function<void (size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode::bidi_logical_order</function></funcdef> +	      <paramdef>std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> +	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +	      <paramdef>const std::function<void (size_t, size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +              <funcdef>void <function>unicode::bidi_logical_order</function></funcdef> +	      <paramdef>std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> +	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +	      <paramdef>const std::function<void (size_t, size_t) noexcept> &<parameter>removed_callback</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +              <funcdef>int <function>unicode::bidi_embed</function></funcdef> +	      <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>const std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> +	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +	      <paramdef>const std::function<void (size_t, const char32_t *, size_t) noexcept> &<parameter>callback</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +              <funcdef>std::u32string <function>unicode::bidi_embed</function></funcdef> +	      <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>const std::vector <unicode_bidi_level_t> &<parameter>levels</parameter></paramdef> +	      <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +            </funcprototype> + +	    <funcprototype> +	      <funcdef>char32_t <function>unicode_bidi_embed_paragraph_level</function></funcdef> +              <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +              <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> +	    </funcprototype> +          </funcsynopsis> +	</refsynopsisdiv> + +	<refsect1 id="unicode_cpp_bidi_descr"> +	  <title>DESCRIPTION</title> + +	  <para> +	    These functions implement the C++ interface for the +	    <ulink url="https://www.unicode.org/reports/tr9/tr9-&tr9ver;.html"> Unicode Bi-Directional algorithm</ulink>. +	    See the description of the underlying +	    <link linkend="unicode_bidi"> +	      <citerefentry><refentrytitle>unicode_bidi</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link> C library +	      API for more information. C++ specific notes: +	  </para> + +	  <itemizedlist> +	    <listitem> +	      <para> +                <function>unicode::bidi_calc</function> returns the +		directional embedding value buffer and the paragraph +		embedding level. +              </para> +            </listitem> +	    <listitem> +	      <para> +		Several C functions provide a <quote>dry-run</quote> mode +		by passing a <literal>NULL</literal> pointer. The C++ API +		provides separate overloads, with and without the nullable +		parameter. +              </para> +            </listitem> +	    <listitem> +	      <para> +		Several C functions accept a nullable function pointer, with +		the <literal>NULL</literal> function pointer specifying no +		callback. The C++ functions have a +		<classname>std::function</classname> parameter with a +		default do-nothing closure. +              </para> +            </listitem> + +	    <listitem> +	      <para> +		Several C functions accept two parameters, a Unicode character +		pointer and the embedding level buffer, and a single parameter +		that specifies the size of both. +		The equivalent C++ function takes two discrete parameters, +		a <classname>std::u32string</classname> and a +		<classname>std::vector</classname> and returns an +		<classname>int</classname>; a negative value if their sizes +		differ, and 0 if their sizes match, and the requested function +		completes. The <function>unicode::bidi_embed</function> overload +		that returns a <classname>std::u32string</classname> returns +		an empty string in case of a mismatch. +              </para> +            </listitem> +          </itemizedlist> +	</refsect1> +	<refsect1 id="unicode_cpp_bidi_seealso"> +	  <title>SEE ALSO</title> +	  <para> +	    <link linkend="courier-unicode"> +	      <citerefentry> +		<refentrytitle>courier-unicode</refentrytitle> +		<manvolnum>7</manvolnum></citerefentry></link>, +	    <link linkend="unicode_bidi"> +	      <citerefentry><refentrytitle>unicode_bidi</refentrytitle> +	      <manvolnum>3</manvolnum></citerefentry></link>. +          </para> +        </refsect1> +      </refentry> + +        <refentry id="unicode__iconvert__convert"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode::iconvert::convert</refentrytitle> @@ -1831,7 +2869,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -1889,7 +2927,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -1902,7 +2940,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	    <link linkend="unicode_convert">  	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, -	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html"> +	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">  	      <citerefentry><refentrytitle>iconv</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></ulink>. @@ -1911,7 +2949,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>        </refentry>        <refentry id="unicode__iconvert__convert_tocase"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode::iconvert::convert_tocase</refentrytitle> @@ -1947,7 +2985,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_tocase_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -1979,7 +3017,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_tocase_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -1992,7 +3030,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	    <link linkend="unicode_convert">  	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, -	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html"> +	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">  	      <citerefentry><refentrytitle>iconv</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></ulink>. @@ -2001,7 +3039,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>        </refentry>        <refentry id="unicode__iconvert__fromu"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode::iconvert::fromu</refentrytitle> @@ -2044,7 +3082,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_fromu_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -2076,7 +3114,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_fromu_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -2089,7 +3127,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	    <link linkend="unicode_convert">  	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, -	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html"> +	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">  	      <citerefentry><refentrytitle>iconv</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></ulink>. @@ -2098,7 +3136,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>        </refentry>        <refentry id="unicode__iconvert__tou"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode::iconvert::tou</refentrytitle> @@ -2140,7 +3178,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_tou_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -2175,7 +3213,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_cpp_convert_tou_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -2188,7 +3226,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>  	    <link linkend="unicode_convert">  	      <citerefentry><refentrytitle>unicode_convert</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></link>, -	      <ulink url="http://manpages.courier-mta.org/htmlman3/iconv.3.html"> +	      <ulink url="https://manpages.courier-mta.org/htmlman3/iconv.3.html">  	      <citerefentry><refentrytitle>iconv</refentrytitle>  	      <manvolnum>3</manvolnum></citerefentry></ulink>. @@ -2197,7 +3235,7 @@ extern const char unicode::iso_8859_1[];</funcsynopsisinfo>        </refentry>        <refentry id="unicode__linebreak"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode::linebreak</refentrytitle> @@ -2288,7 +3326,7 @@ std::vector<std::pair<int, char32_t>> linebreaks;  std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>>(linebreaks));</programlisting>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_cpp_lb_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -2383,7 +3421,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_cpp_lb_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -2398,7 +3436,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>        </refentry>        <refentry id="unicode__tolower"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode::tolower</refentrytitle> @@ -2454,7 +3492,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>  	  </funcsynopsis>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_cpp_tolower_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -2482,7 +3520,7 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_cpp_tolower_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> @@ -2493,19 +3531,8 @@ std::copy(beg_iter, end_iter, std::back_insert_iterator<std::vector<int>  	</refsect1>        </refentry> - - - - - - - - - - -        <refentry id="unicode__wordbreak"> -	<info><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></info> +	<refentryinfo><author><firstname>Sam</firstname><surname>Varshavchik</surname><contrib>Author</contrib></author><productname>Courier Unicode Library</productname></refentryinfo>  	<refmeta>  	  <refentrytitle>unicode::wordbreak</refentrytitle> @@ -2557,7 +3584,7 @@ size_t nchars=scan.finish();  </programlisting>  	</refsynopsisdiv> -	<refsect1> +	<refsect1 id="unicode_cpp_wb_descr">  	  <title>DESCRIPTION</title>  	  <para> @@ -2621,7 +3648,7 @@ size_t nchars=scan.finish();  	  </para>  	</refsect1> -	<refsect1> +	<refsect1 id="unicode_cpp_wb_seealso">  	  <title>SEE ALSO</title>  	  <para>  	    <link linkend="courier-unicode"> | 
