summaryrefslogtreecommitdiffstats
path: root/unicode
diff options
context:
space:
mode:
authorSam Varshavchik2021-03-12 07:15:38 -0500
committerSam Varshavchik2021-03-12 20:27:32 -0500
commitcf15bdb799c6b8b395087480fe3e89fb8b53cc12 (patch)
treebb10f5f2f04a3abdf82ccece78eee35544dd82fe /unicode
parent18fc31347b80597f4100f96c86799fe130786781 (diff)
downloadcourier-libs-cf15bdb799c6b8b395087480fe3e89fb8b53cc12.tar.bz2
courier-unicode: further changes to the canonical compose/decompose.
Diffstat (limited to 'unicode')
-rw-r--r--unicode/ChangeLog7
-rw-r--r--unicode/Makefile.am7
-rw-r--r--unicode/README35
-rw-r--r--unicode/biditest2.C97
-rw-r--r--unicode/book.xml582
-rw-r--r--unicode/configure.ac2
-rw-r--r--unicode/courier-unicode.h.in76
-rw-r--r--unicode/mknormalization.pl3
-rw-r--r--unicode/normalization.h195
-rw-r--r--unicode/normalizetest.C20
-rw-r--r--unicode/unicode_bidi.c72
-rw-r--r--unicode/unicode_normalization.c205
-rw-r--r--unicode/unicodecpp.C166
13 files changed, 1044 insertions, 423 deletions
diff --git a/unicode/ChangeLog b/unicode/ChangeLog
index e1563f1..03fe2e2 100644
--- a/unicode/ChangeLog
+++ b/unicode/ChangeLog
@@ -1,3 +1,10 @@
+2.2.2
+
+2021-03-12 Sam Varshavchik <mrsam@courier-mta.com>
+
+ * unicode_bidi.c: Implement unicode_bidi_combining(). Implement
+ canonical and compatibility decomposition and canonical composition.
+
2021-03-06 Sam Varshavchik <mrsam@courier-mta.com>
* unicode_bidi.c (unicode_bidi_needs_embed): Make its return code
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index 03accd4..68058d4 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -97,6 +97,7 @@ man_MANS= \
$(srcdir)/man/unicode[\:][\:]bidi_calc.3 \
$(srcdir)/man/unicode[\:][\:]bidi_calc_types.3 \
$(srcdir)/man/unicode[\:][\:]bidi_cleanup.3 \
+ $(srcdir)/man/unicode[\:][\:]bidi_combinings.3 \
$(srcdir)/man/unicode[\:][\:]bidi_embed.3 \
$(srcdir)/man/unicode[\:][\:]bidi_embed_paragraph_level.3 \
$(srcdir)/man/unicode[\:][\:]bidi_get_direction.3 \
@@ -133,6 +134,7 @@ man_MANS= \
$(srcdir)/man/unicode_bidi_calc_types.3 \
$(srcdir)/man/unicode_bidi_cleaned_size.3 \
$(srcdir)/man/unicode_bidi_cleanup.3 \
+ $(srcdir)/man/unicode_bidi_combinings.3 \
$(srcdir)/man/unicode_bidi_direction.3 \
$(srcdir)/man/unicode_bidi_embed.3 \
$(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \
@@ -144,6 +146,7 @@ man_MANS= \
$(srcdir)/man/unicode_bidi_type.3 \
$(srcdir)/man/unicode_canonical.3 \
$(srcdir)/man/unicode_category_lookup.3 \
+ $(srcdir)/man/unicode_ccc.3 \
$(srcdir)/man/unicode_compose.3 \
$(srcdir)/man/unicode_composition_apply.3 \
$(srcdir)/man/unicode_composition_deinit.3 \
@@ -164,9 +167,9 @@ man_MANS= \
$(srcdir)/man/unicode_convert_toutf8.3 \
$(srcdir)/man/unicode_convert_uc.3 \
$(srcdir)/man/unicode_decompose.3 \
- $(srcdir)/man/unicode_decompose_info_deinit.3 \
- $(srcdir)/man/unicode_decompose_info_init.3 \
$(srcdir)/man/unicode_decompose_reallocate_size.3 \
+ $(srcdir)/man/unicode_decomposition_deinit.3 \
+ $(srcdir)/man/unicode_decomposition_init.3 \
$(srcdir)/man/unicode_default_chset.3 \
$(srcdir)/man/unicode_emoji.3 \
$(srcdir)/man/unicode_emoji_component.3 \
diff --git a/unicode/README b/unicode/README
index 74b7cf4..4e67d6c 100644
--- a/unicode/README
+++ b/unicode/README
@@ -15,29 +15,28 @@ Courier Unicode Library
COPYING
- This library implements several algorithms related to the Unicode Standard
- (with both C and C++ bindings), notably:
+ This library implements several algorithms related to the Unicode
+ Standard, featuring:
- * Look up uppercase, lowercase, and titlecase equivalents of a unicode
- character.
+ * Both C and C++11 bindings, with a complete manual page documentation
+ set.
- * Implementation of grapheme and word breaking rules.
+ * The library has all Unicode mappings compiled in as fast, compact,
+ lookup table. The library does not need to load the Unicode database
+ files at startup, every time.
- * Implementation of line breaking rules.
+ * The library implements lookups uppercase, lowercase, and titlecase
+ equivalents of a unicode character; grapheme and word breaking rules;
+ line breaking rules; and the bi-directional algorithm.
- * Implementation of the bi-directional algorithm.
+ * The library implements canonical and compatibility decomposition and
+ composition of Unicode text; and the Unicode script property.
- * Canonical forms and normalizations of Unicode text.
-
- * Look up the Unicode script property.
-
- * Look up the category property.
-
- * Several ancillary functions, like looking up the unicode character
- that corresponds to some HTML 4.0 entity (such as “&amp;”, for
- example), and determining the normal width or a double-width status of
- a unicode character. Also, an adaptation of the iconv(3) API for this
- unicode library.
+ * The library also implements ancillary functions, like looking up the
+ unicode character that corresponds to some HTML 4.0 entity (such as
+ “&amp;”, for example), and determining the normal width or a
+ double-width status of a unicode character. Also, an adaptation of the
+ iconv(3) API for this unicode library.
Current status
diff --git a/unicode/biditest2.C b/unicode/biditest2.C
index 7129805..d3178b8 100644
--- a/unicode/biditest2.C
+++ b/unicode/biditest2.C
@@ -727,6 +727,102 @@ void direction_test2()
}
}
+void composition_test()
+{
+ static const struct {
+ std::u32string str;
+ std::vector<unicode_bidi_level_t> levels;
+ std::vector<std::tuple<unicode_bidi_level_t,
+ size_t, size_t, size_t,
+ size_t>> results;
+ } tests[] = {
+ // Test 1
+ {
+ U"a\u0303\u0303b\u0303\u0303c",
+ {0, 0, 0, 0, 0, 0, 0},
+ {
+ {0, 0, 7, 1, 2},
+ {0, 0, 7, 4, 2},
+ }
+ },
+ // Test 2
+ {
+ U"\u0303ab\u0303",
+ {0, 0, 0, 0},
+ {
+ {0, 0, 4, 0, 1},
+ {0, 0, 4, 3, 1},
+ }
+ },
+ // Test 3
+ {
+ U"a\u0303\u0303b",
+ {0, 0, 1, 1},
+ {
+ {0, 0, 2, 1, 1},
+ {1, 2, 2, 2, 1},
+ }
+ },
+ // Test 4
+ {
+ U"\u0303a\u0303a",
+ {0, 0, 0, 0},
+ {
+ {0, 0, 4, 0, 1},
+ {0, 0, 4, 2, 1},
+ }
+ },
+ };
+
+ int testnum=0;
+
+ for (const auto &t:tests)
+ {
+ ++testnum;
+
+ std::vector<std::tuple<unicode_bidi_level_t,
+ size_t, size_t, size_t, size_t>> actual;
+
+ auto copy=t.str;
+
+ unicode::bidi_combinings(copy, t.levels,
+ [&]
+ (unicode_bidi_level_t level,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars)
+ {
+ actual.emplace_back
+ (level,
+ level_start,
+ n_chars,
+ comb_start,
+ n_comb_chars);
+
+ auto b=copy.begin()+comb_start;
+ auto e=b+n_comb_chars;
+
+ if (comb_start + n_comb_chars
+ < level_start + n_chars)
+ ++e;
+
+ while (b < e)
+ {
+ --e;
+ std::swap(*b, *e);
+ ++b;
+ }
+ });
+
+ if (actual != t.results)
+ {
+ std::cerr << "composition test " << testnum
+ << " failed\n";
+ exit(1);
+ }
+ }
+}
int main(int argc, char **argv)
{
DEBUGDUMP=fopen("/dev/null", "w");
@@ -736,6 +832,7 @@ int main(int argc, char **argv)
exit(1);
}
exception_test();
+ composition_test();
partial_reorder_cleanup();
null_character_test();
latin_test();
diff --git a/unicode/book.xml b/unicode/book.xml
index fdacab6..7744501 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -24,57 +24,47 @@ See COPYING for distribution information.
<para>
This library implements several algorithms related to the
<ulink url="https://www.unicode.org/standard/standard.html">Unicode
- Standard</ulink> (with both C and C++ bindings), notably:
+ Standard</ulink>, featuring:
</para>
<itemizedlist>
<listitem>
<para>
- Look up uppercase, lowercase, and titlecase equivalents of a unicode
- character.
- </para>
- </listitem>
- <listitem>
- <para>
- Implementation of
- <link linkend="unicode_grapheme_break">grapheme
- and word breaking</link> rules.
+ Both C and C++11 bindings, with a
+ <link linkend="courier-unicode">complete manual page
+ documentation set</link>.
</para>
</listitem>
+
<listitem>
<para>
- Implementation of
- <link linkend="unicode_line_break">line breaking</link> rules.
+ The library has all Unicode mappings compiled in as fast, compact,
+ lookup table. The library does not need to load the Unicode database
+ files at startup, every time.
</para>
</listitem>
<listitem>
<para>
- Implementation of the
+ The library implements lookups uppercase, lowercase, and
+ titlecase equivalents of a unicode character;
+ <link linkend="unicode_grapheme_break">grapheme
+ and word breaking</link> rules;
+ <link linkend="unicode_line_break">line breaking</link> rules;
+ and the
<link linkend="unicode_bidi">bi-directional
algorithm</link>.
</para>
</listitem>
<listitem>
<para>
- <link linkend="unicode_canonical">Canonical forms and
- normalizations</link> of Unicode text.
+ The library implements <link linkend="unicode_canonical">canonical
+ and compatibility decomposition and composition</link> of Unicode text;
+ and the <link linkend="unicode_script">Unicode script property</link>.
</para>
</listitem>
<listitem>
<para>
- Look up the <link linkend="unicode_script">Unicode
- script property</link>.
- </para>
- </listitem>
- <listitem>
- <para>
- Look up the <link linkend="unicode_category_lookup">category</link>
- property.
- </para>
- </listitem>
- <listitem>
- <para>
- Several ancillary functions, like looking up
+ The library also implements ancillary functions, like looking up
the unicode character that corresponds to some HTML 4.0
entity (such as <quote>&amp;amp;</quote>, for example), and
determining the normal width or a double-width status of a unicode
@@ -102,7 +92,7 @@ See COPYING for distribution information.
<para>
Download the current version of the library from
<ulink url="/download.html#unicode">https://www.courier-mta.org/download.html#unicode</ulink>.
- Use the downloaded tarball to create an appropriate installation
+ Use the downloaded tarball to prepare an appropriate installation
package for your operating system distribution.
The typical sequence of commands is:
</para>
@@ -125,13 +115,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<note>
<para>
- <command>make install</command> will not take any explicit action
+ <command>make install</command> does not take any explicit action
to uninstall any older version of the library, or remove any files
from an older version that do not exist any more in the new version.
- The library's installation image should be used to prepare an
+ Use the created installation image to prepare an
installable package in a native package format for your operating
system distribution. Use your native system distribution's package
- manager to properly install and uninstall the library's package.
+ manager to properly install and update this library.
</para>
</note>
@@ -141,9 +131,9 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
The C++ compiler must have C++11 support. Minimum usable version of
gcc appears to be gcc 4.4 with the <literal>-std=c++0x</literal> flag.
Current versions of gcc use C++11, or higher, by default and do not
- require extra flags. Like with all C++ code, the same compiler, and flags,
- must be used to build code that uses this library that was used to
- build the library itself.
+ require extra flags. For C++ code, as usual, the compiler and compilation
+ flags for compiling any code that uses this library must be ABI-compatible
+ too.
</para>
<para>
@@ -177,7 +167,19 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<para>
The <varname>AX_COURIER_UNICODE_VERSION</varname> macro
- checks the minimum library version.
+ checks the minimum library version, which defaults to the build
+ version. An optional parameter explicitly specifies which version
+ of the Courier Unicode library is the minimum version required, i.e.:
+ </para>
+
+ <blockquote>
+ <informalexample>
+ <programlisting>
+ AX_COURIER_UNICODE_VERSION(2.2.0)</programlisting>
+ </informalexample>
+ </blockquote>
+
+ <para>
<varname>AX_COURIER_UNICODE_CXXFLAGS</varname> sets
<varname>COURIER_UNICODE_CXXFLAGS</varname> to the appropriate option
for older gcc compilers that require an option to enable C++11
@@ -342,6 +344,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<refname>unicode_bidi_cleanup</refname>
<refname>unicode_bidi_cleaned_size</refname>
<refname>unicode_bidi_logical_order</refname>
+ <refname>unicode_bidi_combinings</refname>
<refname>unicode_bidi_needs_embed</refname>
<refname>unicode_bidi_embed</refname>
<refname>unicode_bidi_embed_paragraph_level</refname>
@@ -420,6 +423,14 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
</funcprototype>
<funcprototype>
+ <funcdef>void <function>unicode_bidi_combinings</function></funcdef>
+ <paramdef>const char32_t *<parameter>string</parameter></paramdef>
+ <paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+ <paramdef>size_t <parameter>n</parameter></paramdef>
+ <paramdef>void (*<parameter>combinings</parameter>)(unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars, void *arg)</paramdef>
+ <paramdef>void *<parameter>arg</parameter></paramdef>
+ </funcprototype>
+ <funcprototype>
<funcdef>int <function>unicode_bidi_needs_embed</function></funcdef>
<paramdef>const char32_t *<parameter>string</parameter></paramdef>
<paramdef>const unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
@@ -1119,8 +1130,7 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
</listitem>
<listitem>
<para>
- The directional embedding buffer, in logical
- rendering order.
+ The directional embedding buffer, in logical order.
</para>
</listitem>
<listitem>
@@ -1151,6 +1161,206 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
in.
</para>
</refsect2>
+ <refsect2 id="unicode_bidi_combinings">
+ <title>Combining character ranges</title>
+
+ <para>
+ <function>unicode_bidi_combinings</function>() reports
+ consecutive sequences of one or more combining marks
+ in bidirectional text (which can be either in rendering or
+ logical order) that have the same embedding level. It takes
+ the following parameters:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>The Unicode string.</para>
+ </listitem>
+ <listitem>
+ <para>
+ The directional embedding buffer, in logical
+ or rendering order. A <literal>NULL</literal> value for
+ this pointer is equivalent to a directional embedding
+ buffer with a level of 0 for every character in the Unicode
+ string.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Number of characters in the Unicode string.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The pointer to the callback function.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ An opaque pointer argument that gets forwarded to the
+ callback function.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The callback function gets invoked for every consecutive
+ sequence of one or more characters that have a canonical
+ combining class other than 0, and with the same
+ embedding level. The parameters to the callback function are:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>The embedding level of the combining characters.</para>
+ </listitem>
+ <listitem>
+ <para>
+ The starting index of a consecutive sequence of all
+ characters with the same embedding level.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The number of characters with the same embedding level.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The starting index of a consecutive sequence of all
+ characters with the same embedding level
+ and a canonical combining
+ class other than 0. This will always be equal to or greater
+ than the value of the second parameter.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The number of consecutive characters with the
+ characters with the same embedding level
+ and a canonical combining class other than 0.
+ The last character included in this sequence will always
+ be less than or equal to the last character in the sequence
+ defined by the second and the third parameters.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The opaque pointer argument that was passed to
+ <function>unicode_bidi_combinings</function>.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ A consecutive sequence of Unicode characters with non-0 combining
+ classes but different embedding levels gets reported individually,
+ for each consecutive sequence with the same embedding level.
+ </para>
+
+ <para>
+ This function helps with reordering the combining characters in
+ right-to-left-rendered text.
+ Right-to-left text reversed by
+ <function>unicode_bidi_reorder</function>() results in combining
+ characters preceding their starter character. They get reversed
+ no differently than any other character.
+ The same thing also occurs after
+ <function>unicode_bidi_logical_order</function>() reverses
+ everything back.
+ Use <function>unicode_bidi_combinings</function> to identify
+ consecutive sequences of combining characters followed by their
+ original starter.
+ </para>
+
+ <para>
+ The callback may reorder the characters identified
+ by its third and the fourth parameters
+ in the manner described below.
+ <function>unicode_bidi_reorder</function>'s parameter is
+ pointers to a constant Unicode string; but it can modify the
+ string (via an out-of-band mutable pointer) subject to the
+ following conditions:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ The characters identified by the third and the fourth
+ parameter may be modified.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ If the last character in this sequence is not the last
+ character included in the range specified by the first
+ and the second character, then one more character after
+ the last character may also be modified.
+ </para>
+
+ <para>
+ This is, presumably, the original starter that preceded
+ the combining characters before the entire sequence was
+ reversed.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Here's an example of a callback that reverses
+ combining characters and their immediately-following starter
+ character:
+ </para>
+ <blockquote>
+ <informalexample>
+ <programlisting><![CDATA[
+void reorder_right_to_left_combining(unicode_bidi_level_t level,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars,
+ void *arg)
+{
+ /* Let's say that this is the Unicode string */
+ char32_t *buf=(char32_t *)arg;
+
+ if ((level & 1) == 0)
+ return; /* Left-to-right text not reversed */
+
+ char32_t *b=buf+comb_start;
+ char32_t *e=b+n_comb_chars;
+
+ /*
+ ** Include the starter characters in the reversed range.
+ ** The semantics of the combining characters with different
+ ** embedding levels -- so they get reported here separately -- is
+ ** not specified. This will reverse just the combining marks, and
+ ** they're on their own.
+ */
+
+ if (comb_start + n_comb_chars < level_start + n_chars)
+ ++e;
+
+ while (b < e)
+ {
+ char32_t t;
+
+ --e;
+ t=*b;
+ *b=*e;
+ *e=t;
+ ++b;
+ }
+}]]></programlisting>
+ </informalexample>
+ </blockquote>
+ </refsect2>
<refsect2 id="unicode_bidi_misc">
<title>Miscellaneous utility functions</title>
@@ -1251,8 +1461,9 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<refnamediv>
<refname>unicode_canonical</refname>
- <refname>unicode_decompose_info_init</refname>
- <refname>unicode_decompose_info_deinit</refname>
+ <refname>unicode_ccc</refname>
+ <refname>unicode_decomposition_init</refname>
+ <refname>unicode_decomposition_deinit</refname>
<refname>unicode_decompose</refname>
<refname>unicode_decompose_reallocate_size</refname>
<refname>unicode_compose</refname>
@@ -1272,8 +1483,13 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
</funcprototype>
<funcprototype>
- <funcdef>void <function>unicode_decompose_info_init</function></funcdef>
- <paramdef>struct unicode_decompose_info *<parameter>info</parameter></paramdef>
+ <funcdef>uint8_t <function>unicode_ccc</function></funcdef>
+ <paramdef>char32_t <parameter>c</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>void <function>unicode_decomposition_init</function></funcdef>
+ <paramdef>unicode_decomposition_t *<parameter>info</parameter></paramdef>
<paramdef>char32_t *<parameter>string</parameter></paramdef>
<paramdef>size_t *<parameter>string_size</parameter></paramdef>
<paramdef>void *<parameter>arg</parameter></paramdef>
@@ -1281,17 +1497,17 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<funcprototype>
<funcdef>int <function>unicode_decompose</function></funcdef>
- <paramdef>struct unicode_decompose_info *<parameter>info</parameter></paramdef>
+ <paramdef>unicode_decomposition_t *<parameter>info</parameter></paramdef>
</funcprototype>
<funcprototype>
- <funcdef>void <function>unicode_decompose_info_deinit</function></funcdef>
- <paramdef>struct unicode_decompose_info *<parameter>info</parameter></paramdef>
+ <funcdef>void <function>unicode_decomposition_deinit</function></funcdef>
+ <paramdef>unicode_decomposition_t *<parameter>info</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>size_t <function>unicode_decompose_reallocate_size</function></funcdef>
- <paramdef>struct unicode_decompose_info *<parameter>info</parameter></paramdef>
+ <paramdef>unicode_decomposition_t *<parameter>info</parameter></paramdef>
<paramdef>const size_t *<parameter>sizes</parameter></paramdef>
<paramdef>size_t <parameter>n</parameter></paramdef>
</funcprototype>
@@ -1309,19 +1525,19 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<paramdef>const char32_t *<parameter>string</parameter></paramdef>
<paramdef>size_t <parameter>string_size</parameter></paramdef>
<paramdef>int <parameter>flags</parameter></paramdef>
- <paramdef>struct unicode_compositions **<parameter>ret</parameter></paramdef>
+ <paramdef>unicode_composition_t *<parameter>compositions</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>void <function>unicode_composition_deinit</function></funcdef>
- <paramdef>struct unicode_compositions *<parameter>ptr</parameter></paramdef>
+ <paramdef>unicode_composition_t *<parameter>compositions</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>size_t <function>unicode_composition_apply</function></funcdef>
<paramdef>char32_t *<parameter>string</parameter></paramdef>
<paramdef>size_t <parameter>string_size</parameter></paramdef>
- <paramdef>struct unicode_compositions *<parameter>compositions</parameter></paramdef>
+ <paramdef>unicode_composition_t *<parameter>compositions</parameter></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
@@ -1367,7 +1583,9 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<term><structfield>format</structfield></term>
<listitem>
<para>
- The character's canonical formatting flag, if any.
+ A value of <literal>UNICODE_CANONICAL_FMT_NONE</literal>
+ indicates a canonical mapping, other values indicate
+ a compatibility equivalent mapping.
</para>
</listitem>
</varlistentry>
@@ -1381,9 +1599,14 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
</para>
<para>
- <function>unicode_decompose_info_init</function>(),
+ <function>unicode_ccc</function>() returns the character's
+ canonical combining class value.
+ </para>
+
+ <para>
+ <function>unicode_decomposition_init</function>(),
<function>unicode_decompose</function>()
- and <function>unicode_decompose_info_deinit</function>()
+ and <function>unicode_decomposition_deinit</function>()
implement a complete interface for decomposing a
Unicode string:
</para>
@@ -1391,18 +1614,18 @@ make install DESTDIR=/tmp/courier-unicode-instimage # For example.</programlisti
<blockquote>
<informalexample>
<programlisting><![CDATA[
-struct unicode_decompose_info info;
+unicode_decomposition_t info;
-unicode_decompose_info_init(&info, before, (size_t)-1, NULL);
+unicode_decomposition_init(&info, before, (size_t)-1, NULL);
info.decompose_flags=UNICODE_DECOMPOSE_FLAG_QC;
unicode_decompose(&info);
-unicode_decompose_info_deinit(&info);]]></programlisting>
+unicode_decomposition_deinit(&info);]]></programlisting>
</informalexample>
</blockquote>
<para>
- <function>unicode_decompose_info_init</function>() initializes
- a new <structname>unicode_decompose_info</structname> structure,
+ <function>unicode_decomposition_init</function>() initializes
+ a new <classname>unicode_decomposition_t</classname> structure,
that gets passed in as its first parameter.
The second parameter is a pointer to a Unicode string,
with the number of characters in the string in the third parameter.
@@ -1412,8 +1635,8 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
<varname>string_size</varname> (which does not include the
trailing <literal>\0</literal>.
The last parameter is a <literal>void *</literal>, an opaque
- pointer that gets stored in the
- <structname>unicode_decompose_info</structname> object:
+ pointer that gets stored in the initialized
+ <classname>unicode_decomposition_t</classname> object:
</para>
<blockquote>
<informaltable border='0' colsep='0'>
@@ -1425,7 +1648,7 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
<colspec colname='c5' />
<tbody>
<row>
- <entry namest='c1' nameend='c5'>struct&nbsp;<structname>unicode_decompose_info</structname>&nbsp;{</entry>
+ <entry namest='c1' nameend='c5'>typedef struct&nbsp;<structname>unicode_decomposition</structname>&nbsp;{</entry>
</row>
<row>
<entry namest='c2'>char32_t</entry>
@@ -1444,7 +1667,7 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
<entry namest='c3' nameend='c5'>(*<varname>reallocate)(</varname></entry>
</row>
<row>
- <entry namest='c4'>struct&#160;unicode_decompose_info</entry>
+ <entry namest='c4'>struct&#160;unicode_decomposition</entry>
<entry>*<varname>info</varname>,</entry>
</row>
<row>
@@ -1467,7 +1690,7 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
<entry namest='c3' nameend='c5'>*<varname>arg</varname>;</entry>
</row>q
<row>
- <entry namest='c1' nameend='c5'>};</entry>
+ <entry namest='c1' nameend='c5'>} unicode_decomposition_t;</entry>
</row>
</tbody>
</tgroup>
@@ -1476,35 +1699,23 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
<para>
<function>unicode_decompose</function>() proceeds and decomposes
- the <varname>string</varname> and replacing it with its
- decomposed <varname>string</varname> version. Finally
- <function>unicode_decompose_info_deinit</function>() releases
- all resources and destroys the
- <structname>unicode_decompose_info</structname>; it is no longer
- valid.
+ the <varname>string</varname> and replaces it with its
+ decomposed <varname>string</varname> version.
</para>
- <note>
- <para>
- At this time
- <function>unicode_decompose_info_deinit</function>() does
- nothing. All code should explicitly call it in order to
- remain forward-compatible (at the source level).
- </para>
- </note>
-
<para>
- <structname>unicode_decompose_info</structname>'s
+ <classname>unicode_decomposition_t</classname>'s
<varname>string</varname>,
<varname>string_size</varname> and
<varname>arg</varname> are copies of
- <function>unicode_decompose_info_init</function>'s parameters;
- and it initializes all other fields to their default values.
+ <function>unicode_decomposition_init</function>'s parameters.
+ <function>unicode_decomposition_init</function>
+ initializes all other fields to their default values.
</para>
<para>
The <varname>decompose_flags</varname> bitmask gets initialized to
- 0, and can be set to:
+ 0, and is a bit mask:
</para>
<variablelist>
@@ -1532,18 +1743,22 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
</variablelist>
<para>
+ <varname>reallocate</varname> is a pointer to a function that
+ gets called to reallocate a larger <varname>string</varname>.
<function>unicode_decompose</function>() determines which characters
in the <varname>string</varname> need decomposing and calls
- the <varname>reallocate</varname> function zero or more times.
+ the <varname>reallocate</varname> function pointer
+ zero or more times.
Each call to <varname>reallocate</varname> passes information
about where new characters will get inserted into the
<varname>string</varname>.
</para>
<para>
- <varname>reallocate</varname> must enlarge the buffer where
- <varname>string</varname> points to be big enough to hold the
- larger, decomposed string; and update
+ <varname>reallocate</varname> only needs to grow the size of the
+ buffer where
+ <varname>string</varname> points so that it's big enough to hold
+ a larger, decomposed string; then update
<varname>string</varname> accordingly.
<varname>reallocate</varname> should not update
<varname>string_size</varname> or make any changes to the existing
@@ -1561,7 +1776,7 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
<listitem>
<para>
A pointer to the
- <structname>unicode_decompose_info</structname> and, notably,
+ <classname>unicode_decomposition_t</classname> and, notably,
its <varname>arg</varname>.
</para>
</listitem>
@@ -1596,7 +1811,7 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
</para>
<para>
- <function>unicode_decompose_info_init</function>() initializes
+ <function>unicode_decomposition_init</function>() initializes
the <varname>reallocate</varname> pointer to a default
implementation that uses
<citerefentry>
@@ -1613,7 +1828,7 @@ unicode_decompose_info_deinit(&info);]]></programlisting>
<blockquote>
<informalexample>
<programlisting><![CDATA[
-size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
+size_t unicode_decompose_reallocate_size(unicode_decomposition_t *info,
const size_t *sizes,
size_t n)
{
@@ -1630,12 +1845,16 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
</blockquote>
<para>
- The <varname>reallocate</varname> returns 0 on success and
+ The <varname>reallocate</varname> function
+ returns 0 on success and
a non-0 error code to report a failure; and
<varname>unicode_decompose</varname>() does the same.
- A successful decomposition results in
+ The only error condition from
+ <varname>unicode_decompose</varname>() is a non-0 error code
+ from the <varname>reallocate</varname> function. Otherwise:
+ a successful decomposition results in
<varname>unicode_decompose</varname>() returning 0 and
- <function>unicode_decompose_info_init</function>()'s
+ <function>unicode_decomposition_init</function>()'s
<varname>string</varname> pointing to the decomposed string
and <varname>string_size</varname> giving the number of
characters in the decomposed string.
@@ -1660,7 +1879,7 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
<literal>\0</literal> and no decomposition occurs, and
no calls to <varname>reallocate</varname> takes place:
the <varname>string</varname> in the
- <structname>unicode_decompose_info</structname> is unchanged
+ <classname>unicode_decomposition_t</classname> is unchanged
and it's still
<literal>\0</literal>-terminated.
</para>
@@ -1691,13 +1910,13 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
Multiple calls to the <varname>reallocate</varname> callback
are possible. Each call to <varname>reallocate</varname>
reflect the prior calls' decompositions. Example:
- the original string has ten characters and the first call
+ the original string has five characters and the first call
to <varname>reallocate</varname> had two offsets, at position
- 3 and 7, with a value of 1 for their both
+ 1 and 3, with a value of 1 for their both
<varname>sizes</varname>.
This effects transforming an original Unicode string
- "AAAAAAAAAA" into
- "AAAXAAAAXAAA" (with <quote>A</quote> representing unspecified
+ "AAAAA" into
+ "AXAAXAA" (with <quote>A</quote> representing unspecified
characters in the original string, and <quote>X</quote> showing
the two characters added in the first call to
<function>reallocate</function>.
@@ -1705,8 +1924,8 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
<para>
A second call to <varname>varname</varname> with am offset
- at position 8, and a size of 1, results in the updated
- string of "AAAXAAAAYXAAA" (with <quote>Y</quote>) marking an
+ at position 4, and a size of 1, results in the updated
+ string of "AXAAYXAA" (with <quote>Y</quote>) marking an
unspecified character inserted by the second call.
</para>
</note>
@@ -1733,6 +1952,45 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
</note>
<para>
+ <function>unicode_decomposition_deinit</function>() releases
+ all resources and destroys the
+ <classname>unicode_decomposition_t</classname>; it is no longer
+ valid.
+ </para>
+
+ <note>
+ <para>
+ <function>unicode_decomposition_deinit</function>() does not
+ <citerefentry>
+ <refentrytitle>free</refentrytitle>
+ <manvolnum>3</manvolnum>
+ </citerefentry>
+ the <varname>string</varname>. The original string gets passed
+ in to <function>unicode_decomposition_init</function>() and
+ the decomposed string is left in the <varname>string</varname>.
+ </para>
+ </note>
+ <para>
+ The default implementation of the
+ <varname>reallocate</varname> function assumes the
+ <varname>string</varname> is a
+ <citerefentry>
+ <refentrytitle>malloc</refentrytitle>
+ <manvolnum>3</manvolnum>
+ </citerefentry>-ed string, and
+ <function>realloc</function>s it.
+ </para>
+
+ <note>
+ <para>
+ At this time
+ <function>unicode_decomposition_deinit</function>() does
+ nothing. All code should explicitly call it in order to
+ remain forward-compatible (at the source level).
+ </para>
+ </note>
+
+ <para>
<function>unicode_compose</function>() performs a canonical
composition of a decomposed string. Its parameters are:
</para>
@@ -1810,19 +2068,19 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
<function>unicode_composition_apply</function>()
and <function>unicode_composition_deinit</function>()
implement a detailed interface for canonical composition
- of a decompose Unicode string:
+ of a decomposed Unicode string:
</para>
<blockquote>
<informalexample>
<programlisting><![CDATA[
-struct unicode_compositions *compositions;
+unicode_compositions_t compositions;
if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
{
- size_t new_size=unicode_composition_apply(str, strsize, compositions);
+ size_t new_size=unicode_composition_apply(str, strsize, &compositions);
- unicode_composition_deinit(compositions);
+ unicode_composition_deinit(&compositions);
}]]></programlisting>
</informalexample>
</blockquote>
@@ -1842,17 +2100,19 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
parameters are: any optional flags
(see <function>unicode_compose()</function> for a list of
available flags), and the address of a
- <structname>unicode_compositions</structname> pointer.
+ <classname>unicode_composition_t</classname> object.
A non-0 return from
<function>unicode_composition_init</function>() indicates an
error.
<function>unicode_composition_init</function>() indicates success
- by returning 0 and initializing a pointer to the head of a linked
- list of <structname>unicode_compositions</structname>'s objects
- that enumerate the canonical compositions.
+ by returning 0 and initializing the
+ <classname>unicode_composition_t</classname>'s object
+ which contains a pointer to an array of pointers to
+ of <structname>unicode_compose_info</structname> objects, and
+ the number of pointers.
<function>unicode_composition_init</function>() does not change
- the string; the only thing it does is calculate the
- <structname>unicode_compositions</structname> list.
+ the string; the only thing it does is initialize the
+ <structname>unicode_composition_t</structname> object.
</para>
<para>
@@ -1869,10 +2129,14 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
It is necessary to call
<function>unicode_composition_deinit</function>() to free all
memory that was allocated for the
- <structname>unicode_compositions</structname> list:
+ <classname>unicode_composition_t</classname> object:
</para>
<blockquote>
+
+
+
+
<informaltable border='0' colsep='0'>
<tgroup cols="3">
<colspec colname='c1' />
@@ -1880,14 +2144,10 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
<colspec colname='c3' />
<tbody>
<row>
- <entry namest='c1' nameend='c3'>struct&nbsp;<structname>unicode_compositions</structname> {</entry>
+ <entry namest='c1' nameend='c3'>struct&nbsp;<structname>unicode_compose_info</structname> {</entry>
</row>
<row>
- <entry namest='c2'>struct unicode_compositions</entry>
- <entry>*<varname>next</varname>;</entry>
- </row>
- <row>
<entry namest='c2'>size_t</entry>
<entry><varname>index</varname>;</entry>
</row>
@@ -1906,6 +2166,24 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
<row>
<entry namest='c1' nameend='c3'>};</entry>
</row>
+ <row>
+ <entry namest='c1' nameend='c3'>&nbsp;</entry>
+ </row>
+ <row>
+ <entry namest='c1' nameend='c3'>typedef&nbsp;struct&nbsp;{</entry>
+ </row>
+
+ <row>
+ <entry namest='c2'>struct&nbsp;<structname>unicode_compose_info</structname></entry>
+ <entry>**<varname>compositions</varname>;</entry>
+ </row>
+ <row>
+ <entry namest='c2'>size_t</entry>
+ <entry><varname>n_compositions</varname>;</entry>
+ </row>
+ <row>
+ <entry namest='c1' nameend='c3'>} unicode_composition_t;</entry>
+ </row>
</tbody>
</tgroup>
</informaltable>
@@ -1919,7 +2197,7 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
The composed characters are the
<varname>composition</varname>;
and <varname>n_composition</varname> gives the
- number of composed characters
+ number of composed characters.
</para>
<para>
@@ -1944,15 +2222,21 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
<para>
<function>unicode_composition_init</function>()
- sets the <structname>unicode_compositions</structname> pointer
- to <literal>NULL</literal> when the decomposed string has
- nothing to combine.
- This <literal>NULL</literal> pointer gets interpreted accordingly
+ sets <classname>unicode_composition_t</classname>'s
+ <varname>compositions</varname> pointer to an array of
+ pointers to <structname>unicode_compose_info</structname>s
+ that are sorted according to their <varname>index</varname>.
+ <varname>n_compositions</varname> gives the number of pointers
+ in the array, and is 0 if there are no compositions, the
+ array is empty.
+ The empty array gets interpreted accordingly
when it gets passed to
<function>unicode_composition_apply</function>() and
<function>unicode_composition_deinit</function>(): nothing
- happens (and <function>unicode_composition_apply</function>()
- simply returns the size of the unchanged <varname>string</varname>.
+ happens. <function>unicode_composition_apply</function>()
+ simply returns the size of the unchanged <varname>string</varname>,
+ and <function>unicode_composition_deinit</function>() does a
+ pro-forma cleanup.
</para>
</refsect1>
<refsect1 id="unicode_canonical_seealso">
@@ -3635,6 +3919,7 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
<refname>unicode::bidi_reorder</refname>
<refname>unicode::bidi_cleanup</refname>
<refname>unicode::bidi_logical_order</refname>
+ <refname>unicode::bidi_combinings</refname>
<refname>unicode::bidi_needs_embed</refname>
<refname>unicode::bidi_embed</refname>
<refname>unicode::bidi_embed_paragraph_level</refname>
@@ -3734,6 +4019,19 @@ if (unicode_composition_init(str, strsize, flags, &compositions) == 0)
</funcprototype>
<funcprototype>
+ <funcdef>void <function>unicode::bidi_combinings</function></funcdef>
+ <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>const std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
+ <paramdef>const std::function &lt;void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)&gt; &amp;<parameter>callback</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>void <function>unicode::bidi_combinings</function></funcdef>
+ <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>const std::function &lt;void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)&gt; &amp;<parameter>callback</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
<funcdef>void <function>unicode::bidi_logical_order</function></funcdef>
<paramdef>std::vector &lt;unicode_bidi_level_t&gt; &amp;<parameter>levels</parameter></paramdef>
<paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
@@ -4079,18 +4377,14 @@ constexpr int compose_flag_oneshot=UNICODE_COMPOSE_FLAG_ONESHOT;</funcsynopsisin
<funcprototype>
<funcdef>void <funcname>compose_default_callback</funcname></funcdef>
- <paramdef>size_t <parameter>index</parameter></paramdef>
- <paramdef>size_t <parameter>n_composed</parameter></paramdef>
- <paramdef>const char32_t *<parameter>compositions</parameter></paramdef>
- <paramdef>size_t <parameter>n_compositions</parameter></paramdef>
-
+ <paramdef>unicode_composition_t &amp;<parameter>compositions</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>void <funcname>compose</funcname></funcdef>
<paramdef>std::u32string &amp;<parameter>string</parameter></paramdef>
<paramdef>int <parameter>flags</parameter>=0</paramdef>
- <paramdef>const std::function&lt;void (size_t, size_t, const char32_t *, size_t)&gt; &amp;<parameter>cb</parameter>=compose_default_reallocate</paramdef>
+ <paramdef>const std::function&lt;void (unicode_composition_t &amp;)&gt; &amp;<parameter>cb</parameter>=compose_default_reallocate</paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
@@ -4114,24 +4408,32 @@ constexpr int compose_flag_oneshot=UNICODE_COMPOSE_FLAG_ONESHOT;</funcsynopsisin
The C++ decomposition <parameter>reallocate</parameter> callback
receives a single vector of <replaceable>offset</replaceable>
and
- <replaceable>size</replaceable> tuple instead of two separate
- arrays or vectors; with the C++ version of the default
- <varname>reallocate</varname> callback receiving the same
- parameter.
+ <replaceable>size</replaceable> tuples instead of two separate
+ arrays or vectors.
+ <function>unicode::decompose_default_reallocate</function>() is
+ the C++ version of the default
+ <varname>reallocate</varname> callback. It receives the
+ receiving the same tuple vector parameter, too.
+ The C++ interface use <classname>std::u32string</classname>s
+ to represent Unicode text strings, and
+ <function>unicode::decompose_default_reallocate</function>()
+ <function>resize</function>s it.
+ </para>
+
+ <para>
+ Like the C callback, the C++ one gets called 0 or more times.
</para>
</listitem>
<listitem>
<para>
- <function>unicode::compose</function>() invokes the callback
- function once for each composition point in the underlying
- <structname>unicode_compositions</structname> linked list.
- The callback directly receives the
- <varname>index</varname>,
- <varname>n_composed</varname>,
- <varname>composition</varname> and
- <varname>n_composition</varname> values as
- discrete parameters.
+ <function>unicode::compose</function>() takes care of
+ initializing, applying, and de-initialization
+ the <classname>unicode_composition_t</classname> object,
+ for decomposition.
+ The callback receives a reference to the
+ <classname>unicode_composition_t</classname> object, which
+ the callback should not modify in any way.
</para>
</listitem>
</itemizedlist>
diff --git a/unicode/configure.ac b/unicode/configure.ac
index 1cc3b76..b857741 100644
--- a/unicode/configure.ac
+++ b/unicode/configure.ac
@@ -1,6 +1,6 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT([courier-unicode], [2.2.1.20210220], [courier-users@lists.sourceforge.net])
+AC_INIT([courier-unicode], [2.2.2], [courier-users@lists.sourceforge.net])
>confdefs.h # Kill PACKAGE_ macros
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index b3caa47..b9e2ee9 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -742,6 +742,18 @@ extern char32_t unicode_bidi_embed_paragraph_level(const char32_t *str,
size_t n,
unicode_bidi_level_t);
+extern void unicode_bidi_combinings(const char32_t *str,
+ const unicode_bidi_level_t *levels,
+ size_t n,
+ void (*combinings)
+ (unicode_bidi_level_t level,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars,
+ void *arg),
+ void *arg);
+
/*
** unicode_canonical() returns the canonical mapping of the given Unicode
** character. The returned structure specifies:
@@ -1318,57 +1330,62 @@ extern const char unicode_u_ucs2_native[];
#define unicode_x_smap_modutf8 "x-smap-modutf8"
+extern uint8_t unicode_ccc(char32_t ch);
+
#define UNICODE_DECOMPOSE_FLAG_QC 1
#define UNICODE_DECOMPOSE_FLAG_COMPAT 2
-struct unicode_decompose_info {
+typedef struct unicode_decomposition {
char32_t *string;
size_t string_size;
int decompose_flags;
- int (*reallocate)(struct unicode_decompose_info *info,
+ int (*reallocate)(struct unicode_decomposition *info,
const size_t *offsets,
const size_t *sizes,
size_t n);
void *arg;
-};
+} unicode_decomposition_t;
-extern void unicode_decompose_info_init(struct unicode_decompose_info *,
- char32_t *string,
- size_t string_size,
- void *arg);
+extern void unicode_decomposition_init(unicode_decomposition_t *,
+ char32_t *string,
+ size_t string_size,
+ void *arg);
-extern void unicode_decompose_info_deinit(struct unicode_decompose_info *);
+extern void unicode_decomposition_deinit(unicode_decomposition_t *);
-extern int unicode_decompose(struct unicode_decompose_info *);
+extern int unicode_decompose(unicode_decomposition_t *);
-extern size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *,
+extern size_t unicode_decompose_reallocate_size(unicode_decomposition_t *,
const size_t *sizes,
size_t n);
-struct unicode_compositions {
- struct unicode_compositions *next;
+struct unicode_compose_info {
size_t index;
size_t n_composed;
char32_t *composition;
size_t n_composition;
};
+typedef struct {
+ struct unicode_compose_info **compositions;
+ size_t n_compositions;
+} unicode_composition_t;
+
#define UNICODE_COMPOSE_FLAG_REMOVEUNUSED 1
#define UNICODE_COMPOSE_FLAG_ONESHOT 128
int unicode_composition_init(const char32_t *string,
size_t string_size,
int flags,
- struct unicode_compositions **ret
- );
+ unicode_composition_t *info);
-void unicode_composition_deinit(struct unicode_compositions *);
+void unicode_composition_deinit(unicode_composition_t *info);
size_t unicode_composition_apply(char32_t *string,
size_t string_size,
- struct unicode_compositions *compositions);
+ unicode_composition_t *info);
int unicode_compose(char32_t *string,
size_t string_size,
@@ -2429,6 +2446,28 @@ std::u32string bidi_embed(const std::u32string &string,
const std::vector<unicode_bidi_level_t> &levels,
unicode_bidi_level_t paragraph_embedding);
+
+//! Identify contiguous sequences of combining characters
+
+//! Bounded by each embedding level.
+
+void bidi_combinings(const std::u32string &string,
+ const std::vector<unicode_bidi_level_t> &levels,
+ const std::function<void (unicode_bidi_level_t level,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars)> &callback);
+
+//! Identify contiguous sequences of composition characters
+
+void bidi_combinings(const std::u32string &string,
+ const std::function<void (unicode_bidi_level_t level,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars)> &callback);
+
//! Check if a directional marker needs to be inserted
//! In order for the unicode string to have the specified default
@@ -2466,12 +2505,11 @@ constexpr int compose_flag_removeunused=UNICODE_COMPOSE_FLAG_REMOVEUNUSED;
constexpr int compose_flag_oneshot=UNICODE_COMPOSE_FLAG_ONESHOT;
-void compose_default_callback(size_t, size_t, const char32_t *, size_t);
+void compose_default_callback(unicode_composition_t &);
void compose(std::u32string &string,
int flags=0,
- const std::function<void (size_t, size_t,
- const char32_t *, size_t)> &cb=
+ const std::function<void (unicode_composition_t &)> &cb=
compose_default_callback);
#if 0
diff --git a/unicode/mknormalization.pl b/unicode/mknormalization.pl
index 0955a8a..a267e26 100644
--- a/unicode/mknormalization.pl
+++ b/unicode/mknormalization.pl
@@ -339,6 +339,8 @@ struct decomposition_info {
#define canonical_mult1 $mult1
#define canonical_mult2 $mult2
+#ifndef exclusion_table
+
static uint16_t canonical_compositions_lookup[]={
);
$comma="\t";
@@ -353,6 +355,7 @@ static uint16_t canonical_compositions_lookup[]={
$counter += scalar @{ $buckets{$index} // []};
}
print "\n};\n";
+ print "#endif\n";
exit(0);
}
++$hash;
diff --git a/unicode/normalization.h b/unicode/normalization.h
index ecd2353..96bbfa2 100644
--- a/unicode/normalization.h
+++ b/unicode/normalization.h
@@ -8936,123 +8936,123 @@ static const char32_t ccc_starting_pagetab[]={
*/
static const char32_t canonical_compositions[][3]={
- {0x0047,0x0304,0x1e20}, {0x0045,0x0306,0x0114}, {0x03a9,0x0300,0x1ffa}, {0x0044,0x0307,0x1e0a}, {0x1f51,0x0301,0x1f55}, {0x0041,0x030a,0x00c5}, {0x0391,0x0345,0x1fbc}, {0x0056,0x0323,0x1e7e},
- {0x03a5,0x0304,0x1fe9}, {0x1e62,0x0307,0x1e68}, {0x0055,0x0324,0x1e72}, {0x30d8,0x309a,0x30da}, {0x2248,0x0338,0x2249}, {0x00d4,0x0303,0x1ed6}, {0x0053,0x0326,0x0218}, {0x0052,0x0327,0x0156},
- {0x0435,0x0300,0x0450}, {0x0079,0x0301,0x00fd}, {0x0d47,0x0d3e,0x0d4b}, {0x307b,0x309a,0x307d}, {0x0391,0x0301,0x0386}, {0x004c,0x032d,0x1e3c}, {0x1fc6,0x0345,0x1fc7}, {0x1f23,0x0345,0x1f93},
- {0x0073,0x0307,0x1e61}, {0x0049,0x0330,0x1e2c}, {0x042d,0x0308,0x04ec}, {0x0cc6,0x0cc2,0x0cca}, {0x0395,0x0314,0x1f19}, {0x006f,0x030b,0x0151}, {0x006e,0x030c,0x0148}, {0x1f69,0x0345,0x1fa9},
- {0x0103,0x0303,0x1eb5}, {0x2277,0x0338,0x2279}, {0x0061,0x0302,0x00e2}, {0x014c,0x0300,0x1e50}, {0x21d4,0x0338,0x21ce}, {0x1e37,0x0304,0x1e39}, {0x0cca,0x0cd5,0x0ccb}, {0x01eb,0x0304,0x01ed},
- {0x01a0,0x0309,0x1ede}, {0x0069,0x0311,0x020b}, {0x30ab,0x3099,0x30ac}, {0x00a8,0x0301,0x0385}, {0x0418,0x0306,0x0419}, {0x1f0c,0x0345,0x1f8c}, {0x0416,0x0308,0x04dc}, {0x1f69,0x0301,0x1f6d},
- {0x03a9,0x0345,0x1ffc}, {0x30f1,0x3099,0x30f9}, {0x006e,0x0323,0x1e47}, {0x114b9,0x114bd,0x114be}, {0x004b,0x0301,0x1e30}, {0x004a,0x0302,0x0134}, {0x03c5,0x0313,0x1f50}, {0x03b9,0x0308,0x03ca},
- {0x0049,0x0303,0x0128}, {0x0069,0x0328,0x012f}, {0x0052,0x0311,0x0212}, {0x0045,0x0307,0x0116}, {0x2203,0x0338,0x2204}, {0x03a9,0x0301,0x038f}, {0x0064,0x032d,0x1e13}, {0x0057,0x0323,0x1e88},
- {0x04e8,0x0308,0x04ea}, {0x110a5,0x110ba,0x110ab}, {0x1e63,0x0307,0x1e69}, {0x0054,0x0326,0x021a}, {0x0053,0x0327,0x015e}, {0x007a,0x0301,0x017a}, {0x0079,0x0302,0x0177}, {0x0430,0x0306,0x04d1},
- {0x0075,0x0306,0x016d}, {0x1f24,0x0345,0x1f94}, {0x0074,0x0307,0x1e6b}, {0x0397,0x0313,0x1f28}, {0x006f,0x030c,0x01d2}, {0x1f6a,0x0345,0x1faa}, {0x3066,0x3099,0x3067}, {0x0063,0x0301,0x0107},
- {0x0933,0x093c,0x0934}, {0x0d46,0x0d57,0x0d4c}, {0x014d,0x0300,0x1e51}, {0x0061,0x0303,0x00e3}, {0x014c,0x0301,0x1e52}, {0x01a1,0x0309,0x1edf}, {0x1f0d,0x0345,0x1f8d}, {0x0417,0x0308,0x04de},
- {0x1ff6,0x0345,0x1ff7}, {0x006f,0x0323,0x1ecd}, {0x30f2,0x3099,0x30fa}, {0x00ef,0x0301,0x1e2f}, {0x2261,0x0338,0x2262}, {0x304f,0x3099,0x3050}, {0x004c,0x0301,0x0139}, {0x03c5,0x0314,0x1f51},
- {0x006b,0x0327,0x0137}, {0x0055,0x030f,0x0214}, {0x0049,0x0304,0x012a}, {0x0047,0x0306,0x011e}, {0x0046,0x0307,0x1e1e}, {0x0045,0x0308,0x00cb}, {0x0065,0x032d,0x1e19}, {0x00d8,0x0301,0x01fe},
- {0x04e9,0x0308,0x04eb}, {0x30db,0x3099,0x30dc}, {0x0041,0x030c,0x01cd}, {0x03a5,0x0306,0x1fe8}, {0x00d5,0x0304,0x022c}, {0x0054,0x0327,0x0162}, {0x0443,0x030b,0x04f3}, {0x007a,0x0302,0x1e91},
- {0x0079,0x0303,0x1ef9}, {0x1f28,0x0342,0x1f2e}, {0x004e,0x032d,0x1e4a}, {0x00c2,0x0300,0x1ea6}, {0x30c4,0x3099,0x30c5}, {0x0041,0x0323,0x1ea0}, {0x1f25,0x0345,0x1f95}, {0x0074,0x0308,0x1e97},
- {0x03c5,0x0342,0x1fe6}, {0x0397,0x0314,0x1f29}, {0x2190,0x0338,0x219a}, {0x0065,0x0300,0x00e8}, {0x1f6b,0x0345,0x1fab}, {0x0063,0x0302,0x0109}, {0x03d2,0x0308,0x03d4}, {0x014d,0x0301,0x1e53},
- {0x0061,0x0304,0x0101}, {0x30ad,0x3099,0x30ae}, {0x1f0e,0x0345,0x1f8e}, {0x0418,0x0308,0x04e4}, {0x00fc,0x030c,0x01da}, {0x004e,0x0300,0x01f8}, {0x004d,0x0301,0x1e3e}, {0x006c,0x0327,0x013c},
+ {0x0047,0x0304,0x1e20}, {0x0045,0x0306,0x0114}, {0x0044,0x0307,0x1e0a}, {0x03a9,0x0300,0x1ffa}, {0x1f51,0x0301,0x1f55}, {0x0041,0x030a,0x00c5}, {0x0056,0x0323,0x1e7e}, {0x0391,0x0345,0x1fbc},
+ {0x03a5,0x0304,0x1fe9}, {0x30d8,0x309a,0x30da}, {0x0055,0x0324,0x1e72}, {0x1e62,0x0307,0x1e68}, {0x2248,0x0338,0x2249}, {0x00d4,0x0303,0x1ed6}, {0x0053,0x0326,0x0218}, {0x0052,0x0327,0x0156},
+ {0x0435,0x0300,0x0450}, {0x0d47,0x0d3e,0x0d4b}, {0x0079,0x0301,0x00fd}, {0x307b,0x309a,0x307d}, {0x0391,0x0301,0x0386}, {0x004c,0x032d,0x1e3c}, {0x1fc6,0x0345,0x1fc7}, {0x1f23,0x0345,0x1f93},
+ {0x0073,0x0307,0x1e61}, {0x0049,0x0330,0x1e2c}, {0x042d,0x0308,0x04ec}, {0x0cc6,0x0cc2,0x0cca}, {0x0395,0x0314,0x1f19}, {0x006f,0x030b,0x0151}, {0x1f69,0x0345,0x1fa9}, {0x006e,0x030c,0x0148},
+ {0x2277,0x0338,0x2279}, {0x0103,0x0303,0x1eb5}, {0x0061,0x0302,0x00e2}, {0x014c,0x0300,0x1e50}, {0x21d4,0x0338,0x21ce}, {0x1e37,0x0304,0x1e39}, {0x0cca,0x0cd5,0x0ccb}, {0x01eb,0x0304,0x01ed},
+ {0x0069,0x0311,0x020b}, {0x01a0,0x0309,0x1ede}, {0x0418,0x0306,0x0419}, {0x00a8,0x0301,0x0385}, {0x30ab,0x3099,0x30ac}, {0x1f0c,0x0345,0x1f8c}, {0x0416,0x0308,0x04dc}, {0x1f69,0x0301,0x1f6d},
+ {0x30f1,0x3099,0x30f9}, {0x114b9,0x114bd,0x114be}, {0x03a9,0x0345,0x1ffc}, {0x006e,0x0323,0x1e47}, {0x004b,0x0301,0x1e30}, {0x03c5,0x0313,0x1f50}, {0x004a,0x0302,0x0134}, {0x0049,0x0303,0x0128},
+ {0x03b9,0x0308,0x03ca}, {0x0069,0x0328,0x012f}, {0x0052,0x0311,0x0212}, {0x0045,0x0307,0x0116}, {0x2203,0x0338,0x2204}, {0x03a9,0x0301,0x038f}, {0x0064,0x032d,0x1e13}, {0x04e8,0x0308,0x04ea},
+ {0x0057,0x0323,0x1e88}, {0x110a5,0x110ba,0x110ab}, {0x1e63,0x0307,0x1e69}, {0x0054,0x0326,0x021a}, {0x0053,0x0327,0x015e}, {0x007a,0x0301,0x017a}, {0x0079,0x0302,0x0177}, {0x0075,0x0306,0x016d},
+ {0x0430,0x0306,0x04d1}, {0x0074,0x0307,0x1e6b}, {0x1f24,0x0345,0x1f94}, {0x0397,0x0313,0x1f28}, {0x006f,0x030c,0x01d2}, {0x1f6a,0x0345,0x1faa}, {0x0063,0x0301,0x0107}, {0x3066,0x3099,0x3067},
+ {0x0d46,0x0d57,0x0d4c}, {0x0933,0x093c,0x0934}, {0x014d,0x0300,0x1e51}, {0x0061,0x0303,0x00e3}, {0x014c,0x0301,0x1e52}, {0x01a1,0x0309,0x1edf}, {0x1f0d,0x0345,0x1f8d}, {0x0417,0x0308,0x04de},
+ {0x1ff6,0x0345,0x1ff7}, {0x006f,0x0323,0x1ecd}, {0x00ef,0x0301,0x1e2f}, {0x30f2,0x3099,0x30fa}, {0x304f,0x3099,0x3050}, {0x004c,0x0301,0x0139}, {0x2261,0x0338,0x2262}, {0x0055,0x030f,0x0214},
+ {0x006b,0x0327,0x0137}, {0x03c5,0x0314,0x1f51}, {0x0049,0x0304,0x012a}, {0x0047,0x0306,0x011e}, {0x0046,0x0307,0x1e1e}, {0x0045,0x0308,0x00cb}, {0x0065,0x032d,0x1e19}, {0x04e9,0x0308,0x04eb},
+ {0x00d8,0x0301,0x01fe}, {0x30db,0x3099,0x30dc}, {0x0041,0x030c,0x01cd}, {0x03a5,0x0306,0x1fe8}, {0x00d5,0x0304,0x022c}, {0x0054,0x0327,0x0162}, {0x0443,0x030b,0x04f3}, {0x007a,0x0302,0x1e91},
+ {0x0079,0x0303,0x1ef9}, {0x1f28,0x0342,0x1f2e}, {0x004e,0x032d,0x1e4a}, {0x00c2,0x0300,0x1ea6}, {0x30c4,0x3099,0x30c5}, {0x0041,0x0323,0x1ea0}, {0x1f25,0x0345,0x1f95}, {0x03c5,0x0342,0x1fe6},
+ {0x0074,0x0308,0x1e97}, {0x2190,0x0338,0x219a}, {0x0397,0x0314,0x1f29}, {0x1f6b,0x0345,0x1fab}, {0x0065,0x0300,0x00e8}, {0x0063,0x0302,0x0109}, {0x03d2,0x0308,0x03d4}, {0x0061,0x0304,0x0101},
+ {0x014d,0x0301,0x1e53}, {0x30ad,0x3099,0x30ae}, {0x1f0e,0x0345,0x1f8e}, {0x0418,0x0308,0x04e4}, {0x00fc,0x030c,0x01da}, {0x004e,0x0300,0x01f8}, {0x004d,0x0301,0x1e3e}, {0x006c,0x0327,0x013c},
{0x22a8,0x0338,0x22ad}, {0x0047,0x0307,0x0120}, {0x0045,0x0309,0x1eba}, {0x0059,0x0323,0x1ef4}, {0x30db,0x309a,0x30dd}, {0x0062,0x0331,0x1e07}, {0x0447,0x0308,0x04f5}, {0x00d6,0x0304,0x022a},
- {0x0438,0x0300,0x045d}, {0x09c7,0x09be,0x09cb}, {0x2291,0x0338,0x22e2}, {0x0395,0x0300,0x1fc8}, {0x1f29,0x0342,0x1f2f}, {0x0079,0x0304,0x0233}, {0x0042,0x0323,0x1e04}, {0x0391,0x0304,0x1fb9},
- {0x00c2,0x0301,0x1ea4}, {0x1f26,0x0345,0x1f96}, {0x004b,0x0331,0x1e34}, {0x0075,0x0308,0x00fc}, {0x0430,0x0308,0x04d3}, {0x01b7,0x030c,0x01ee}, {0x0399,0x0313,0x1f38}, {0x1f6c,0x0345,0x1fac},
+ {0x0438,0x0300,0x045d}, {0x09c7,0x09be,0x09cb}, {0x2291,0x0338,0x22e2}, {0x0395,0x0300,0x1fc8}, {0x1f29,0x0342,0x1f2f}, {0x0079,0x0304,0x0233}, {0x0042,0x0323,0x1e04}, {0x00c2,0x0301,0x1ea4},
+ {0x0391,0x0304,0x1fb9}, {0x1f26,0x0345,0x1f96}, {0x0075,0x0308,0x00fc}, {0x01b7,0x030c,0x01ee}, {0x004b,0x0331,0x1e34}, {0x0430,0x0308,0x04d3}, {0x0399,0x0313,0x1f38}, {0x1f6c,0x0345,0x1fac},
{0x227a,0x0338,0x2280}, {0x0065,0x0301,0x00e9}, {0x3068,0x3099,0x3069}, {0x1f0f,0x0345,0x1f8f}, {0x03ac,0x0345,0x1fb4}, {0x005a,0x030c,0x017d}, {0x004f,0x0300,0x00d2}, {0x3051,0x3099,0x3052},
- {0x004e,0x0301,0x0143}, {0x007a,0x0331,0x1e95}, {0x1f10,0x0300,0x1f12}, {0x0055,0x0311,0x0216}, {0x0049,0x0306,0x012c}, {0x22a9,0x0338,0x22ae}, {0x0048,0x0307,0x1e22}, {0x01a0,0x0323,0x1ee2},
+ {0x004e,0x0301,0x0143}, {0x007a,0x0331,0x1e95}, {0x1f10,0x0300,0x1f12}, {0x0055,0x0311,0x0216}, {0x0049,0x0306,0x012c}, {0x22a9,0x0338,0x22ae}, {0x01a0,0x0323,0x1ee2}, {0x0048,0x0307,0x1e22},
{0x005a,0x0323,0x1e92}, {0x0043,0x030c,0x010c}, {0x03b1,0x0313,0x1f00}, {0x03a5,0x0308,0x03ab}, {0x0055,0x0328,0x0172}, {0x2292,0x0338,0x22e3}, {0x0395,0x0301,0x0388}, {0x30c6,0x3099,0x30c7},
- {0x0077,0x0307,0x1e87}, {0x1f27,0x0345,0x1f97}, {0x0041,0x0325,0x1e00}, {0x004c,0x0331,0x1e3a}, {0x0075,0x0309,0x1ee7}, {0x0399,0x0314,0x1f39}, {0x2192,0x0338,0x219b}, {0x0160,0x0307,0x1e66},
- {0x0072,0x030c,0x0159}, {0x1f6d,0x0345,0x1fad}, {0x227b,0x0338,0x2281}, {0x0065,0x0302,0x00ea}, {0x006f,0x030f,0x020d}, {0x1f28,0x0300,0x1f2a}, {0x30af,0x3099,0x30b0}, {0x0061,0x0306,0x0103},
- {0x03c5,0x0300,0x1f7a}, {0x06d2,0x0654,0x06d3}, {0x1f59,0x0342,0x1f5f}, {0x0072,0x0323,0x1e5b}, {0x004f,0x0301,0x00d3}, {0x2264,0x0338,0x2270}, {0x03c9,0x0313,0x1f60}, {0x006e,0x0327,0x0146},
- {0x1f11,0x0300,0x1f13}, {0x1f10,0x0301,0x1f14}, {0x00ea,0x0309,0x1ec3}, {0x01a1,0x0323,0x1ee3}, {0x0049,0x0307,0x0130}, {0x0048,0x0308,0x1e26}, {0x00dc,0x0300,0x01db}, {0x0065,0x0330,0x1e1b},
- {0x0044,0x030c,0x010e}, {0x224d,0x0338,0x226d}, {0x0064,0x0331,0x1e0f}, {0x03b1,0x0314,0x1f01}, {0x0041,0x030f,0x0200}, {0x0397,0x0300,0x1fca}, {0x1f40,0x0300,0x1f42}, {0x0044,0x0323,0x1e0c},
- {0x0078,0x0307,0x1e8b}, {0x1f28,0x0345,0x1f98}, {0x0391,0x0306,0x1fb8}, {0x0077,0x0308,0x1e85}, {0x00c2,0x0303,0x1eaa}, {0x0075,0x030a,0x016f}, {0x0161,0x0307,0x1e67}, {0x0073,0x030c,0x0161},
- {0x1f6e,0x0345,0x1fae}, {0x09c7,0x09d7,0x09cc}, {0x227c,0x0338,0x22e0}, {0x0067,0x0301,0x01f5}, {0x1f29,0x0300,0x1f2b}, {0x0065,0x0303,0x1ebd}, {0x1f28,0x0301,0x1f2c}, {0x0102,0x0309,0x1eb2},
- {0x0061,0x0307,0x0227}, {0x03b1,0x0342,0x1fb6}, {0x03c5,0x0301,0x03cd}, {0x00f4,0x0300,0x1ed3}, {0x0073,0x0323,0x1e63}, {0x03ae,0x0345,0x1fc4}, {0x2265,0x0338,0x2271}, {0x0050,0x0301,0x1e54},
+ {0x1f27,0x0345,0x1f97}, {0x0077,0x0307,0x1e87}, {0x0041,0x0325,0x1e00}, {0x004c,0x0331,0x1e3a}, {0x0075,0x0309,0x1ee7}, {0x2192,0x0338,0x219b}, {0x0399,0x0314,0x1f39}, {0x0160,0x0307,0x1e66},
+ {0x1f6d,0x0345,0x1fad}, {0x0072,0x030c,0x0159}, {0x227b,0x0338,0x2281}, {0x0065,0x0302,0x00ea}, {0x006f,0x030f,0x020d}, {0x1f28,0x0300,0x1f2a}, {0x30af,0x3099,0x30b0}, {0x0061,0x0306,0x0103},
+ {0x03c5,0x0300,0x1f7a}, {0x06d2,0x0654,0x06d3}, {0x1f59,0x0342,0x1f5f}, {0x0072,0x0323,0x1e5b}, {0x2264,0x0338,0x2270}, {0x004f,0x0301,0x00d3}, {0x03c9,0x0313,0x1f60}, {0x1f11,0x0300,0x1f13},
+ {0x006e,0x0327,0x0146}, {0x1f10,0x0301,0x1f14}, {0x0049,0x0307,0x0130}, {0x00ea,0x0309,0x1ec3}, {0x01a1,0x0323,0x1ee3}, {0x0048,0x0308,0x1e26}, {0x00dc,0x0300,0x01db}, {0x0065,0x0330,0x1e1b},
+ {0x0044,0x030c,0x010e}, {0x0064,0x0331,0x1e0f}, {0x224d,0x0338,0x226d}, {0x0041,0x030f,0x0200}, {0x03b1,0x0314,0x1f01}, {0x0397,0x0300,0x1fca}, {0x1f40,0x0300,0x1f42}, {0x0044,0x0323,0x1e0c},
+ {0x1f28,0x0345,0x1f98}, {0x0078,0x0307,0x1e8b}, {0x00c2,0x0303,0x1eaa}, {0x0077,0x0308,0x1e85}, {0x0391,0x0306,0x1fb8}, {0x0075,0x030a,0x016f}, {0x0161,0x0307,0x1e67}, {0x0073,0x030c,0x0161},
+ {0x1f6e,0x0345,0x1fae}, {0x09c7,0x09d7,0x09cc}, {0x227c,0x0338,0x22e0}, {0x0067,0x0301,0x01f5}, {0x0065,0x0303,0x1ebd}, {0x1f29,0x0300,0x1f2b}, {0x1f28,0x0301,0x1f2c}, {0x0061,0x0307,0x0227},
+ {0x0102,0x0309,0x1eb2}, {0x03b1,0x0342,0x1fb6}, {0x03c5,0x0301,0x03cd}, {0x00f4,0x0300,0x1ed3}, {0x0073,0x0323,0x1e63}, {0x03ae,0x0345,0x1fc4}, {0x0050,0x0301,0x1e54}, {0x2265,0x0338,0x2271},
{0x3053,0x3099,0x3054}, {0x004f,0x0302,0x00d4}, {0x03c9,0x0314,0x1f61}, {0x004e,0x0303,0x00d1}, {0x1f11,0x0301,0x1f15}, {0x22ab,0x0338,0x22af}, {0x2208,0x0338,0x2209}, {0x0049,0x0308,0x00cf},
- {0x0068,0x032e,0x1e2b}, {0x00dc,0x0301,0x01d7}, {0x0397,0x0345,0x1fcc}, {0x0045,0x030c,0x011a}, {0x00d5,0x0308,0x1e4e}, {0x043a,0x0301,0x045c}, {0x00d4,0x0309,0x1ed4}, {0x1f41,0x0300,0x1f43},
- {0x1b05,0x1b35,0x1b06}, {0x0397,0x0301,0x0389}, {0x0168,0x0301,0x1e78}, {0x1f40,0x0301,0x1f44}, {0x0045,0x0323,0x1eb8}, {0x30c8,0x3099,0x30c9}, {0x0435,0x0306,0x04d7}, {0x00c5,0x0301,0x01fa},
- {0x1f29,0x0345,0x1f99}, {0x0079,0x0307,0x1e8f}, {0x01af,0x0300,0x1eea}, {0x004e,0x0331,0x1e48}, {0x03c9,0x0342,0x1ff6}, {0x0078,0x0308,0x1e8d}, {0x0648,0x0654,0x0624}, {0x2194,0x0338,0x21ae},
+ {0x0068,0x032e,0x1e2b}, {0x00dc,0x0301,0x01d7}, {0x0397,0x0345,0x1fcc}, {0x0045,0x030c,0x011a}, {0x00d5,0x0308,0x1e4e}, {0x043a,0x0301,0x045c}, {0x00d4,0x0309,0x1ed4}, {0x1b05,0x1b35,0x1b06},
+ {0x0397,0x0301,0x0389}, {0x1f41,0x0300,0x1f43}, {0x0168,0x0301,0x1e78}, {0x1f40,0x0301,0x1f44}, {0x00c5,0x0301,0x01fa}, {0x30c8,0x3099,0x30c9}, {0x0435,0x0306,0x04d7}, {0x0045,0x0323,0x1eb8},
+ {0x1f29,0x0345,0x1f99}, {0x0079,0x0307,0x1e8f}, {0x0078,0x0308,0x1e8d}, {0x004e,0x0331,0x1e48}, {0x01af,0x0300,0x1eea}, {0x03c9,0x0342,0x1ff6}, {0x0648,0x0654,0x0624}, {0x2194,0x0338,0x21ae},
{0x0075,0x030b,0x0171}, {0x0069,0x0300,0x00ec}, {0x0074,0x030c,0x0165}, {0x1f6f,0x0345,0x1faf}, {0x227d,0x0338,0x22e1}, {0x0067,0x0302,0x011d}, {0x0065,0x0304,0x0113}, {0x1f29,0x0301,0x1f2d},
- {0x006f,0x0311,0x020f}, {0x30b1,0x3099,0x30b2}, {0x1ffe,0x0342,0x1fdf}, {0x0062,0x0307,0x1e03}, {0x0103,0x0309,0x1eb3}, {0x0061,0x0308,0x00e4}, {0x0074,0x0323,0x1e6d}, {0x00f4,0x0301,0x1ed1},
+ {0x006f,0x0311,0x020f}, {0x30b1,0x3099,0x30b2}, {0x1ffe,0x0342,0x1fdf}, {0x0062,0x0307,0x1e03}, {0x0103,0x0309,0x1eb3}, {0x0061,0x0308,0x00e4}, {0x00f4,0x0301,0x1ed1}, {0x0074,0x0323,0x1e6d},
{0x004f,0x0303,0x00d5}, {0x006f,0x0328,0x01eb}, {0x1f59,0x0300,0x1f5b}, {0x0049,0x0309,0x1ec8}, {0x11347,0x1133e,0x1134b}, {0x044b,0x0308,0x04f9}, {0x0041,0x0311,0x0202}, {0x0399,0x0300,0x1fda},
- {0x0438,0x0304,0x04e3}, {0x1f41,0x0301,0x1f45}, {0x0169,0x0301,0x1e79}, {0x0436,0x0306,0x04c2}, {0x00c6,0x0301,0x01fc}, {0x1f2a,0x0345,0x1f9a}, {0x007a,0x0307,0x017c}, {0x01b0,0x0300,0x1eeb},
- {0x0079,0x0308,0x00ff}, {0x03ca,0x0342,0x1fd7}, {0x01af,0x0301,0x1ee8}, {0x0077,0x030a,0x1e98}, {0x0041,0x0328,0x0104}, {0x0075,0x030c,0x01d4}, {0x1f70,0x0345,0x1fb2}, {0x0069,0x0301,0x00ed},
- {0x0068,0x0302,0x0125}, {0x0072,0x030f,0x0211}, {0x1fb6,0x0345,0x1fb7}, {0x06d5,0x0654,0x06c0}, {0x0063,0x0307,0x010b}, {0x0061,0x0309,0x1ea3}, {0x0075,0x0323,0x1ee5}, {0x00f5,0x0301,0x1e4d},
+ {0x0169,0x0301,0x1e79}, {0x0438,0x0304,0x04e3}, {0x1f41,0x0301,0x1f45}, {0x00c6,0x0301,0x01fc}, {0x0436,0x0306,0x04c2}, {0x007a,0x0307,0x017c}, {0x1f2a,0x0345,0x1f9a}, {0x0079,0x0308,0x00ff},
+ {0x01b0,0x0300,0x1eeb}, {0x03ca,0x0342,0x1fd7}, {0x01af,0x0301,0x1ee8}, {0x0077,0x030a,0x1e98}, {0x0041,0x0328,0x0104}, {0x1f70,0x0345,0x1fb2}, {0x0075,0x030c,0x01d4}, {0x0069,0x0301,0x00ed},
+ {0x0068,0x0302,0x0125}, {0x0072,0x030f,0x0211}, {0x1fb6,0x0345,0x1fb7}, {0x06d5,0x0654,0x06c0}, {0x0063,0x0307,0x010b}, {0x0061,0x0309,0x1ea3}, {0x00f5,0x0301,0x1e4d}, {0x0075,0x0323,0x1ee5},
{0x1ecc,0x0302,0x1ed8}, {0x3055,0x3099,0x3056}, {0x0052,0x0301,0x0154}, {0x004f,0x0304,0x014c}, {0x03b1,0x0300,0x1f70}, {0x0406,0x0308,0x0407}, {0x1f59,0x0301,0x1f5d}, {0x0047,0x030c,0x01e6},
- {0x03b5,0x0313,0x1f10}, {0x004f,0x031b,0x01a0}, {0x0399,0x0301,0x038a}, {0x1b07,0x1b35,0x1b08}, {0x0054,0x032d,0x1e70}, {0x04d8,0x0308,0x04da}, {0x00c7,0x0301,0x1e08}, {0x0dd9,0x0ddf,0x0dde},
- {0x1f2b,0x0345,0x1f9b}, {0x0435,0x0308,0x0451}, {0x03cb,0x0342,0x1fe7}, {0x01b0,0x0301,0x1ee9}, {0x00c4,0x0304,0x01de}, {0x0079,0x0309,0x1ef7}, {0x064a,0x0654,0x0626}, {0x0043,0x0327,0x00c7},
+ {0x03b5,0x0313,0x1f10}, {0x004f,0x031b,0x01a0}, {0x1b07,0x1b35,0x1b08}, {0x0399,0x0301,0x038a}, {0x0054,0x032d,0x1e70}, {0x00c7,0x0301,0x1e08}, {0x04d8,0x0308,0x04da}, {0x0dd9,0x0ddf,0x0dde},
+ {0x1f2b,0x0345,0x1f9b}, {0x0435,0x0308,0x0451}, {0x03cb,0x0342,0x1fe7}, {0x064a,0x0654,0x0626}, {0x0079,0x0309,0x1ef7}, {0x00c4,0x0304,0x01de}, {0x01b0,0x0301,0x1ee9}, {0x0043,0x0327,0x00c7},
{0x0069,0x0302,0x00ee}, {0x0067,0x0304,0x1e21}, {0x30b3,0x3099,0x30b4}, {0x0065,0x0306,0x0115}, {0x0064,0x0307,0x1e0b}, {0x03c9,0x0300,0x1f7c}, {0x041e,0x0308,0x04e6}, {0x0474,0x030f,0x0476},
- {0x0061,0x030a,0x00e5}, {0x0076,0x0323,0x1e7f}, {0x03c5,0x0304,0x1fe1}, {0x03b1,0x0345,0x1fb3}, {0x1ecd,0x0302,0x1ed9}, {0x0075,0x0324,0x1e73}, {0x00f4,0x0303,0x1ed7}, {0x0053,0x0301,0x015a},
+ {0x0061,0x030a,0x00e5}, {0x03b1,0x0345,0x1fb3}, {0x03c5,0x0304,0x1fe1}, {0x0076,0x0323,0x1e7f}, {0x1ecd,0x0302,0x1ed9}, {0x0075,0x0324,0x1e73}, {0x00f4,0x0303,0x1ed7}, {0x0053,0x0301,0x015a},
{0x0073,0x0326,0x0219}, {0x0072,0x0327,0x0157}, {0x1f00,0x0342,0x1f06}, {0x1ffe,0x0300,0x1fdd}, {0x004d,0x0307,0x1e40}, {0x03b1,0x0301,0x03ac}, {0x220b,0x0338,0x220c}, {0x006c,0x032d,0x1e3d},
- {0x0069,0x0330,0x1e2d}, {0x0048,0x030c,0x021e}, {0x0068,0x0331,0x1e96}, {0x044d,0x0308,0x04ed}, {0x00dc,0x0304,0x01d5}, {0x0045,0x030f,0x0204}, {0x03b5,0x0314,0x1f11}, {0x0055,0x032d,0x1e76},
- {0x04d9,0x0308,0x04db}, {0x0048,0x0323,0x1e24}, {0x0438,0x0306,0x0439}, {0x1f2c,0x0345,0x1f9c}, {0x11347,0x11357,0x1134c}, {0x0436,0x0308,0x04dd}, {0x039f,0x0313,0x1f48}, {0x0079,0x030a,0x1e99},
- {0x0044,0x0327,0x1e10}, {0x03c9,0x0345,0x1ff3}, {0x01af,0x0303,0x1eee}, {0x006b,0x0301,0x1e31}, {0x006a,0x0302,0x0135}, {0x11935,0x11930,0x11938}, {0x0069,0x0303,0x0129}, {0x0423,0x0304,0x04ee},
- {0x0072,0x0311,0x0213}, {0x03ca,0x0300,0x1fd2}, {0x0065,0x0307,0x0117}, {0x2223,0x0338,0x2224}, {0x03c9,0x0301,0x03ce}, {0x0475,0x030f,0x0477}, {0x0077,0x0323,0x1e89}, {0x0055,0x0300,0x00d9},
- {0x3057,0x3099,0x3058}, {0x0074,0x0326,0x021b}, {0x0053,0x0302,0x015c}, {0x0073,0x0327,0x015f}, {0x1f01,0x0342,0x1f07}, {0x004f,0x0306,0x014e}, {0x309d,0x3099,0x309e}, {0x004e,0x0307,0x1e44},
- {0x1ffe,0x0301,0x1fde}, {0x0049,0x030c,0x01cf}, {0x0dd9,0x0dca,0x0dda}, {0x03b7,0x0313,0x1f20}, {0x1b09,0x1b35,0x1b0a}, {0x1f30,0x0342,0x1f36}, {0x00ca,0x0300,0x1ec0}, {0x0049,0x0323,0x1eca},
+ {0x0069,0x0330,0x1e2d}, {0x0048,0x030c,0x021e}, {0x044d,0x0308,0x04ed}, {0x0068,0x0331,0x1e96}, {0x00dc,0x0304,0x01d5}, {0x03b5,0x0314,0x1f11}, {0x0045,0x030f,0x0204}, {0x0055,0x032d,0x1e76},
+ {0x0438,0x0306,0x0439}, {0x0048,0x0323,0x1e24}, {0x04d9,0x0308,0x04db}, {0x1f2c,0x0345,0x1f9c}, {0x0436,0x0308,0x04dd}, {0x11347,0x11357,0x1134c}, {0x039f,0x0313,0x1f48}, {0x0079,0x030a,0x1e99},
+ {0x0044,0x0327,0x1e10}, {0x03c9,0x0345,0x1ff3}, {0x01af,0x0303,0x1eee}, {0x006b,0x0301,0x1e31}, {0x006a,0x0302,0x0135}, {0x0069,0x0303,0x0129}, {0x11935,0x11930,0x11938}, {0x0423,0x0304,0x04ee},
+ {0x0072,0x0311,0x0213}, {0x0065,0x0307,0x0117}, {0x03ca,0x0300,0x1fd2}, {0x2223,0x0338,0x2224}, {0x03c9,0x0301,0x03ce}, {0x0475,0x030f,0x0477}, {0x0077,0x0323,0x1e89}, {0x0055,0x0300,0x00d9},
+ {0x3057,0x3099,0x3058}, {0x0053,0x0302,0x015c}, {0x0074,0x0326,0x021b}, {0x0073,0x0327,0x015f}, {0x1f01,0x0342,0x1f07}, {0x004f,0x0306,0x014e}, {0x309d,0x3099,0x309e}, {0x1ffe,0x0301,0x1fde},
+ {0x004e,0x0307,0x1e44}, {0x0049,0x030c,0x01cf}, {0x03b7,0x0313,0x1f20}, {0x0dd9,0x0dca,0x0dda}, {0x1b09,0x1b35,0x1b0a}, {0x1f30,0x0342,0x1f36}, {0x00ca,0x0300,0x1ec0}, {0x0049,0x0323,0x1eca},
{0x1f2d,0x0345,0x1f9d}, {0x1ea0,0x0302,0x1eac}, {0x0052,0x0331,0x1e5e}, {0x0437,0x0308,0x04df}, {0x00c6,0x0304,0x01e2}, {0x0045,0x0327,0x0228}, {0x039f,0x0314,0x1f49}, {0x01b0,0x0303,0x1eef},
{0x006c,0x0301,0x013a}, {0x306f,0x3099,0x3070}, {0x0075,0x030f,0x0215}, {0x0069,0x0304,0x012b}, {0x30b5,0x3099,0x30b6}, {0x0067,0x0306,0x011f}, {0x0066,0x0307,0x1e1f}, {0x03cb,0x0300,0x1fe2},
{0x03ca,0x0301,0x0390}, {0x0065,0x0308,0x00eb}, {0x0b92,0x0bd7,0x0b94}, {0x114b9,0x114b0,0x114bc}, {0x00f8,0x0301,0x01ff}, {0x0061,0x030c,0x01ce}, {0x0055,0x0301,0x00da}, {0x03c5,0x0306,0x1fe0},
- {0x00f5,0x0304,0x022d}, {0x0074,0x0327,0x0163}, {0x06c1,0x0654,0x06c2}, {0x004f,0x0307,0x022e}, {0x006e,0x032d,0x1e4b}, {0x00e2,0x0300,0x1ea7}, {0x0061,0x0323,0x1ea1}, {0x1eb8,0x0302,0x1ec6},
- {0x03b7,0x0314,0x1f21}, {0x1f00,0x0300,0x1f02}, {0x0045,0x0311,0x0206}, {0x1f31,0x0342,0x1f37}, {0x0399,0x0304,0x1fd9}, {0x00ca,0x0301,0x1ebe}, {0x1f2e,0x0345,0x1f9e}, {0x1ea1,0x0302,0x1ead},
- {0x223c,0x0338,0x2241}, {0x0438,0x0308,0x04e5}, {0x0045,0x0328,0x0118}, {0x006e,0x0300,0x01f9}, {0x1f74,0x0345,0x1fc2}, {0x006d,0x0301,0x1e3f}, {0x2282,0x0338,0x2284}, {0x00c2,0x0309,0x1ea8},
- {0x306f,0x309a,0x3071}, {0x0423,0x0306,0x040e}, {0x0067,0x0307,0x0121}, {0x03cb,0x0301,0x03b0}, {0x2225,0x0338,0x2226}, {0x03b7,0x0342,0x1fc6}, {0x0065,0x0309,0x1ebb}, {0x1f60,0x0342,0x1f66},
- {0x0079,0x0323,0x1ef5}, {0x0057,0x0300,0x1e80}, {0x3059,0x3099,0x305a}, {0x0055,0x0302,0x00db}, {0x00f6,0x0304,0x022b}, {0x1f18,0x0300,0x1f1a}, {0x0b47,0x0b3e,0x0b4b}, {0x1f00,0x0345,0x1f80},
- {0x0050,0x0307,0x1e56}, {0x03b5,0x0300,0x1f72}, {0x004f,0x0308,0x00d6}, {0x0062,0x0323,0x1e05}, {0x03b1,0x0304,0x1fb1}, {0x00e2,0x0301,0x1ea5}, {0x004b,0x030c,0x01e8}, {0x1eb9,0x0302,0x1ec7},
- {0x006b,0x0331,0x1e35}, {0x03b9,0x0313,0x1f30}, {0x1f01,0x0300,0x1f03}, {0x017f,0x0307,0x1e9b}, {0x1f00,0x0301,0x1f04}, {0x1b0b,0x1b35,0x1b0c}, {0x1e5a,0x0304,0x1e5c}, {0x004b,0x0323,0x1e32},
- {0x11099,0x110ba,0x1109a}, {0x1f2f,0x0345,0x1f9f}, {0x0055,0x0330,0x1e74}, {0x0054,0x0331,0x1e6e}, {0x0112,0x0300,0x1e14}, {0x03a1,0x0314,0x1fec}, {0x0047,0x0327,0x0122}, {0x007a,0x030c,0x017e},
- {0x006f,0x0300,0x00f2}, {0x2283,0x0338,0x2285}, {0x006e,0x0301,0x0144}, {0x1f30,0x0300,0x1f32}, {0x0075,0x0311,0x0217}, {0x0069,0x0306,0x012d}, {0x30b7,0x3099,0x30b8}, {0x0068,0x0307,0x1e23},
- {0x1b3a,0x1b35,0x1b3b}, {0x1f61,0x0342,0x1f67}, {0x30fd,0x3099,0x30fe}, {0x0292,0x030c,0x01ef}, {0x007a,0x0323,0x1e93}, {0x0063,0x030c,0x010d}, {0x0057,0x0301,0x1e82}, {0x03c5,0x0308,0x03cb},
- {0x1f19,0x0300,0x1f1b}, {0x0055,0x0303,0x0168}, {0x0075,0x0328,0x0173}, {0x1f18,0x0301,0x1f1c}, {0x0cbf,0x0cd5,0x0cc0}, {0x22b2,0x0338,0x22ea}, {0x1f01,0x0345,0x1f81}, {0x03b5,0x0301,0x03ad},
- {0x004f,0x0309,0x1ece}, {0x0226,0x0304,0x01e0}, {0x0041,0x0300,0x00c0}, {0x004c,0x030c,0x013d}, {0x0061,0x0325,0x1e01}, {0x006c,0x0331,0x1e3b}, {0x0ddc,0x0dca,0x0ddd}, {0x03b9,0x0314,0x1f31},
- {0x0049,0x030f,0x0208}, {0x1f01,0x0301,0x1f05}, {0x0b47,0x0b56,0x0b48}, {0x039f,0x0300,0x1ff8}, {0x1f48,0x0300,0x1f4a}, {0x1e5b,0x0304,0x1e5d}, {0x004c,0x0323,0x1e36}, {0x30cf,0x3099,0x30d0},
- {0x0399,0x0306,0x1fd8}, {0x00ca,0x0303,0x1ec4}, {0x0113,0x0300,0x1e15}, {0x0048,0x0327,0x1e28}, {0x0112,0x0301,0x1e16}, {0x006f,0x0301,0x00f3}, {0x3072,0x3099,0x3073}, {0x1fbf,0x0342,0x1fcf},
- {0x1f31,0x0300,0x1f33}, {0x1f30,0x0301,0x1f34}, {0x03b9,0x0342,0x1fd6}, {0x0068,0x0308,0x1e27}, {0x0423,0x0308,0x04f0}, {0x00fc,0x0300,0x01dc}, {0x0064,0x030c,0x010f}, {0x0059,0x0300,0x1ef2},
- {0x0413,0x0301,0x0403}, {0x0bc6,0x0bbe,0x0bca}, {0x305b,0x3099,0x305c}, {0x1025,0x102e,0x1026}, {0x0928,0x093c,0x0929}, {0x0057,0x0302,0x0174}, {0x0056,0x0303,0x1e7c}, {0x0061,0x030f,0x0201},
- {0x1f19,0x0301,0x1f1d}, {0x0055,0x0304,0x016a}, {0x22b3,0x0338,0x22eb}, {0x0052,0x0307,0x1e58}, {0x03b7,0x0300,0x1f74}, {0x1f02,0x0345,0x1f82}, {0x1f60,0x0300,0x1f62}, {0x0227,0x0304,0x01e1},
- {0x0064,0x0323,0x1e0d}, {0x0041,0x0301,0x00c1}, {0x03b1,0x0306,0x1fb0}, {0x00e2,0x0303,0x1eab}, {0x0055,0x031b,0x01af}, {0x0b47,0x0b57,0x0b4c}, {0x039f,0x0301,0x038c}, {0x1f49,0x0300,0x1f4b},
- {0x1b0d,0x1b35,0x1b0e}, {0x1f48,0x0301,0x1f4c}, {0x004d,0x0323,0x1e42}, {0x1109b,0x110ba,0x1109c}, {0x30cf,0x309a,0x30d1}, {0x0113,0x0301,0x1e17}, {0x1ea0,0x0306,0x1eb6}, {0x03ce,0x0345,0x1ff4},
+ {0x00f5,0x0304,0x022d}, {0x0074,0x0327,0x0163}, {0x004f,0x0307,0x022e}, {0x06c1,0x0654,0x06c2}, {0x006e,0x032d,0x1e4b}, {0x00e2,0x0300,0x1ea7}, {0x0061,0x0323,0x1ea1}, {0x1eb8,0x0302,0x1ec6},
+ {0x03b7,0x0314,0x1f21}, {0x1f00,0x0300,0x1f02}, {0x0045,0x0311,0x0206}, {0x1f31,0x0342,0x1f37}, {0x0399,0x0304,0x1fd9}, {0x00ca,0x0301,0x1ebe}, {0x1ea1,0x0302,0x1ead}, {0x1f2e,0x0345,0x1f9e},
+ {0x0438,0x0308,0x04e5}, {0x223c,0x0338,0x2241}, {0x0045,0x0328,0x0118}, {0x1f74,0x0345,0x1fc2}, {0x006e,0x0300,0x01f9}, {0x006d,0x0301,0x1e3f}, {0x2282,0x0338,0x2284}, {0x306f,0x309a,0x3071},
+ {0x00c2,0x0309,0x1ea8}, {0x0423,0x0306,0x040e}, {0x0067,0x0307,0x0121}, {0x03b7,0x0342,0x1fc6}, {0x03cb,0x0301,0x03b0}, {0x2225,0x0338,0x2226}, {0x1f60,0x0342,0x1f66}, {0x0065,0x0309,0x1ebb},
+ {0x0079,0x0323,0x1ef5}, {0x0057,0x0300,0x1e80}, {0x3059,0x3099,0x305a}, {0x0055,0x0302,0x00db}, {0x00f6,0x0304,0x022b}, {0x1f18,0x0300,0x1f1a}, {0x0b47,0x0b3e,0x0b4b}, {0x0050,0x0307,0x1e56},
+ {0x03b5,0x0300,0x1f72}, {0x1f00,0x0345,0x1f80}, {0x004f,0x0308,0x00d6}, {0x03b1,0x0304,0x1fb1}, {0x00e2,0x0301,0x1ea5}, {0x0062,0x0323,0x1e05}, {0x004b,0x030c,0x01e8}, {0x1eb9,0x0302,0x1ec7},
+ {0x006b,0x0331,0x1e35}, {0x03b9,0x0313,0x1f30}, {0x017f,0x0307,0x1e9b}, {0x1f01,0x0300,0x1f03}, {0x1f00,0x0301,0x1f04}, {0x1b0b,0x1b35,0x1b0c}, {0x1e5a,0x0304,0x1e5c}, {0x004b,0x0323,0x1e32},
+ {0x11099,0x110ba,0x1109a}, {0x0055,0x0330,0x1e74}, {0x1f2f,0x0345,0x1f9f}, {0x0054,0x0331,0x1e6e}, {0x0047,0x0327,0x0122}, {0x03a1,0x0314,0x1fec}, {0x0112,0x0300,0x1e14}, {0x006f,0x0300,0x00f2},
+ {0x007a,0x030c,0x017e}, {0x2283,0x0338,0x2285}, {0x006e,0x0301,0x0144}, {0x1f30,0x0300,0x1f32}, {0x0075,0x0311,0x0217}, {0x0069,0x0306,0x012d}, {0x30b7,0x3099,0x30b8}, {0x0068,0x0307,0x1e23},
+ {0x1b3a,0x1b35,0x1b3b}, {0x1f61,0x0342,0x1f67}, {0x30fd,0x3099,0x30fe}, {0x0292,0x030c,0x01ef}, {0x007a,0x0323,0x1e93}, {0x0063,0x030c,0x010d}, {0x0057,0x0301,0x1e82}, {0x0055,0x0303,0x0168},
+ {0x1f19,0x0300,0x1f1b}, {0x03c5,0x0308,0x03cb}, {0x0075,0x0328,0x0173}, {0x1f18,0x0301,0x1f1c}, {0x0cbf,0x0cd5,0x0cc0}, {0x22b2,0x0338,0x22ea}, {0x1f01,0x0345,0x1f81}, {0x03b5,0x0301,0x03ad},
+ {0x004f,0x0309,0x1ece}, {0x0226,0x0304,0x01e0}, {0x0041,0x0300,0x00c0}, {0x004c,0x030c,0x013d}, {0x0061,0x0325,0x1e01}, {0x006c,0x0331,0x1e3b}, {0x0ddc,0x0dca,0x0ddd}, {0x0049,0x030f,0x0208},
+ {0x03b9,0x0314,0x1f31}, {0x1f01,0x0301,0x1f05}, {0x0b47,0x0b56,0x0b48}, {0x039f,0x0300,0x1ff8}, {0x1f48,0x0300,0x1f4a}, {0x1e5b,0x0304,0x1e5d}, {0x004c,0x0323,0x1e36}, {0x30cf,0x3099,0x30d0},
+ {0x00ca,0x0303,0x1ec4}, {0x0399,0x0306,0x1fd8}, {0x0113,0x0300,0x1e15}, {0x0048,0x0327,0x1e28}, {0x0112,0x0301,0x1e16}, {0x3072,0x3099,0x3073}, {0x006f,0x0301,0x00f3}, {0x1fbf,0x0342,0x1fcf},
+ {0x1f31,0x0300,0x1f33}, {0x1f30,0x0301,0x1f34}, {0x0068,0x0308,0x1e27}, {0x0423,0x0308,0x04f0}, {0x03b9,0x0342,0x1fd6}, {0x00fc,0x0300,0x01dc}, {0x0059,0x0300,0x1ef2}, {0x0064,0x030c,0x010f},
+ {0x0bc6,0x0bbe,0x0bca}, {0x305b,0x3099,0x305c}, {0x0413,0x0301,0x0403}, {0x0057,0x0302,0x0174}, {0x1025,0x102e,0x1026}, {0x0928,0x093c,0x0929}, {0x0056,0x0303,0x1e7c}, {0x0061,0x030f,0x0201},
+ {0x1f19,0x0301,0x1f1d}, {0x0055,0x0304,0x016a}, {0x22b3,0x0338,0x22eb}, {0x03b7,0x0300,0x1f74}, {0x1f02,0x0345,0x1f82}, {0x0052,0x0307,0x1e58}, {0x1f60,0x0300,0x1f62}, {0x0227,0x0304,0x01e1},
+ {0x0064,0x0323,0x1e0d}, {0x03b1,0x0306,0x1fb0}, {0x00e2,0x0303,0x1eab}, {0x0041,0x0301,0x00c1}, {0x0055,0x031b,0x01af}, {0x0b47,0x0b57,0x0b4c}, {0x1f49,0x0300,0x1f4b}, {0x1b0d,0x1b35,0x1b0e},
+ {0x039f,0x0301,0x038c}, {0x1f48,0x0301,0x1f4c}, {0x1109b,0x110ba,0x1109c}, {0x004d,0x0323,0x1e42}, {0x30cf,0x309a,0x30d1}, {0x03ce,0x0345,0x1ff4}, {0x0113,0x0301,0x1e17}, {0x1ea0,0x0306,0x1eb6},
{0x115b8,0x115af,0x115ba}, {0x0070,0x0301,0x1e55}, {0x3072,0x309a,0x3074}, {0x006f,0x0302,0x00f4}, {0x006e,0x0303,0x00f1}, {0x1f31,0x0301,0x1f35}, {0x30b9,0x3099,0x30ba}, {0x0069,0x0308,0x00ef},
- {0x01a0,0x0300,0x1edc}, {0x1b3c,0x1b35,0x1b3d}, {0x03b7,0x0345,0x1fc3}, {0x00fc,0x0301,0x01d8}, {0x0415,0x0300,0x0400}, {0x1f60,0x0345,0x1fa0}, {0x0065,0x030c,0x011b}, {0x0bc7,0x0bbe,0x0bcb},
- {0x0059,0x0301,0x00dd}, {0x00f5,0x0308,0x1e4f}, {0x22b4,0x0338,0x22ec}, {0x1f03,0x0345,0x1f83}, {0x00f4,0x0309,0x1ed5}, {0x0053,0x0307,0x1e60}, {0x1f61,0x0300,0x1f63}, {0x03b7,0x0301,0x03ae},
+ {0x01a0,0x0300,0x1edc}, {0x1b3c,0x1b35,0x1b3d}, {0x03b7,0x0345,0x1fc3}, {0x00fc,0x0301,0x01d8}, {0x0415,0x0300,0x0400}, {0x0065,0x030c,0x011b}, {0x1f60,0x0345,0x1fa0}, {0x0bc7,0x0bbe,0x0bcb},
+ {0x0059,0x0301,0x00dd}, {0x00f5,0x0308,0x1e4f}, {0x22b4,0x0338,0x22ec}, {0x00f4,0x0309,0x1ed5}, {0x1f03,0x0345,0x1f83}, {0x0053,0x0307,0x1e60}, {0x1f61,0x0300,0x1f63}, {0x03b7,0x0301,0x03ae},
{0x1f60,0x0301,0x1f64}, {0x0065,0x0323,0x1eb9}, {0x00e5,0x0301,0x01fb}, {0x004f,0x030b,0x0150}, {0x004e,0x030c,0x0147}, {0x006e,0x0331,0x1e49}, {0x0041,0x0302,0x00c2}, {0x0049,0x0311,0x020a},
- {0x0dd9,0x0dcf,0x0ddc}, {0x1f49,0x0301,0x1f4d}, {0x004e,0x0323,0x1e46}, {0x0399,0x0308,0x03aa}, {0x016a,0x0308,0x1e7a}, {0x1ea1,0x0306,0x1eb7}, {0x0049,0x0328,0x012e}, {0x115b9,0x115af,0x115bb},
- {0x2286,0x0338,0x2288}, {0x006f,0x0303,0x00f5}, {0x0044,0x032d,0x1e12}, {0x01af,0x0309,0x1eec}, {0x01a1,0x0300,0x1edd}, {0x0069,0x0309,0x1ec9}, {0x01a0,0x0301,0x1eda}, {0x1f61,0x0345,0x1fa1},
- {0x1fbf,0x0300,0x1fcd}, {0x305d,0x3099,0x305e}, {0x005a,0x0301,0x0179}, {0x0059,0x0302,0x0176}, {0x0061,0x0311,0x0203}, {0x0410,0x0306,0x04d0}, {0x0055,0x0306,0x016c}, {0x22b5,0x0338,0x22ed},
- {0x0054,0x0307,0x1e6a}, {0x1f04,0x0345,0x1f84}, {0x03b9,0x0300,0x1f76}, {0x1f61,0x0301,0x1f65}, {0x00e6,0x0301,0x01fd}, {0x004f,0x030c,0x01d1}, {0x0043,0x0301,0x0106}, {0x3046,0x3099,0x3094},
- {0x0bc6,0x0bd7,0x0bcc}, {0x0041,0x0303,0x00c3}, {0x0061,0x0328,0x0105}, {0x004f,0x0323,0x1ecc}, {0x00cf,0x0301,0x1e2e}, {0x30d2,0x3099,0x30d3}, {0x004b,0x0327,0x0136}, {0x03a5,0x0314,0x1f59},
- {0x016b,0x0308,0x1e7b}, {0x3075,0x3099,0x3076}, {0x0072,0x0301,0x0155}, {0x2287,0x0338,0x2289}, {0x0045,0x032d,0x1e18}, {0x006f,0x0304,0x014d}, {0x01b0,0x0309,0x1eed}, {0x30bb,0x3099,0x30bc},
- {0x1b3e,0x1b35,0x1b40}, {0x01a1,0x0301,0x1edb}, {0x0423,0x030b,0x04f2}, {0x0067,0x030c,0x01e7}, {0x1f62,0x0345,0x1fa2}, {0x1fbf,0x0301,0x1fce}, {0x005a,0x0302,0x1e90}, {0x0059,0x0303,0x1ef8},
- {0x006f,0x031b,0x01a1}, {0x1f08,0x0342,0x1f0e}, {0x1f05,0x0345,0x1f85}, {0x03b9,0x0301,0x03af}, {0x0074,0x032d,0x1e71}, {0x00e7,0x0301,0x1e09}, {0x0045,0x0300,0x00c8}, {0x0228,0x0306,0x1e1c},
- {0x00e4,0x0304,0x01df}, {0x0043,0x0302,0x0108}, {0x0063,0x0327,0x00e7}, {0x0041,0x0304,0x0100}, {0x00dc,0x030c,0x01d9}, {0x30d2,0x309a,0x30d4}, {0x043e,0x0308,0x04e7}, {0x004c,0x0327,0x013b},
- {0x0073,0x0301,0x015b}, {0x3075,0x309a,0x3077}, {0x1f20,0x0342,0x1f26}, {0x006d,0x0307,0x1e41}, {0x0427,0x0308,0x04f4}, {0x1b3f,0x1b35,0x1b41}, {0x0042,0x0331,0x1e06}, {0x01a0,0x0303,0x1ee0},
- {0x1f63,0x0345,0x1fa3}, {0x0068,0x030c,0x021f}, {0x0418,0x0300,0x040d}, {0x305f,0x3099,0x3060}, {0x00fc,0x0304,0x01d6}, {0x0065,0x030f,0x0205}, {0x0059,0x0304,0x0232}, {0x1f09,0x0342,0x1f0f},
- {0x1f06,0x0345,0x1f86}, {0x0055,0x0308,0x00dc}, {0x0410,0x0308,0x04d2}, {0x0c46,0x0c56,0x0c48}, {0x0075,0x032d,0x1e77}, {0x0068,0x0323,0x1e25}, {0x0229,0x0306,0x1e1d}, {0x0456,0x0308,0x0457},
- {0x0045,0x0301,0x00c9}, {0x03bf,0x0313,0x1f40}, {0x0064,0x0327,0x1e11}, {0x1b11,0x1b35,0x1b12}, {0x0443,0x0304,0x04ef}, {0x1f38,0x0342,0x1f3e}, {0x005a,0x0331,0x1e94}, {0x2243,0x0338,0x2244},
+ {0x0dd9,0x0dcf,0x0ddc}, {0x1f49,0x0301,0x1f4d}, {0x004e,0x0323,0x1e46}, {0x0399,0x0308,0x03aa}, {0x1ea1,0x0306,0x1eb7}, {0x016a,0x0308,0x1e7a}, {0x0049,0x0328,0x012e}, {0x115b9,0x115af,0x115bb},
+ {0x2286,0x0338,0x2288}, {0x006f,0x0303,0x00f5}, {0x0044,0x032d,0x1e12}, {0x01af,0x0309,0x1eec}, {0x01a1,0x0300,0x1edd}, {0x01a0,0x0301,0x1eda}, {0x0069,0x0309,0x1ec9}, {0x1f61,0x0345,0x1fa1},
+ {0x1fbf,0x0300,0x1fcd}, {0x005a,0x0301,0x0179}, {0x305d,0x3099,0x305e}, {0x0059,0x0302,0x0176}, {0x0061,0x0311,0x0203}, {0x0055,0x0306,0x016c}, {0x22b5,0x0338,0x22ed}, {0x0410,0x0306,0x04d0},
+ {0x03b9,0x0300,0x1f76}, {0x1f04,0x0345,0x1f84}, {0x0054,0x0307,0x1e6a}, {0x1f61,0x0301,0x1f65}, {0x00e6,0x0301,0x01fd}, {0x004f,0x030c,0x01d1}, {0x3046,0x3099,0x3094}, {0x0043,0x0301,0x0106},
+ {0x0bc6,0x0bd7,0x0bcc}, {0x0041,0x0303,0x00c3}, {0x0061,0x0328,0x0105}, {0x30d2,0x3099,0x30d3}, {0x00cf,0x0301,0x1e2e}, {0x004f,0x0323,0x1ecc}, {0x004b,0x0327,0x0136}, {0x03a5,0x0314,0x1f59},
+ {0x016b,0x0308,0x1e7b}, {0x2287,0x0338,0x2289}, {0x0072,0x0301,0x0155}, {0x3075,0x3099,0x3076}, {0x0045,0x032d,0x1e18}, {0x006f,0x0304,0x014d}, {0x01b0,0x0309,0x1eed}, {0x30bb,0x3099,0x30bc},
+ {0x1b3e,0x1b35,0x1b40}, {0x01a1,0x0301,0x1edb}, {0x0423,0x030b,0x04f2}, {0x1f62,0x0345,0x1fa2}, {0x0067,0x030c,0x01e7}, {0x005a,0x0302,0x1e90}, {0x1fbf,0x0301,0x1fce}, {0x006f,0x031b,0x01a1},
+ {0x0059,0x0303,0x1ef8}, {0x1f08,0x0342,0x1f0e}, {0x1f05,0x0345,0x1f85}, {0x03b9,0x0301,0x03af}, {0x0074,0x032d,0x1e71}, {0x00e7,0x0301,0x1e09}, {0x0228,0x0306,0x1e1c}, {0x0045,0x0300,0x00c8},
+ {0x0043,0x0302,0x0108}, {0x00e4,0x0304,0x01df}, {0x0063,0x0327,0x00e7}, {0x0041,0x0304,0x0100}, {0x00dc,0x030c,0x01d9}, {0x30d2,0x309a,0x30d4}, {0x043e,0x0308,0x04e7}, {0x004c,0x0327,0x013b},
+ {0x0073,0x0301,0x015b}, {0x3075,0x309a,0x3077}, {0x1f20,0x0342,0x1f26}, {0x006d,0x0307,0x1e41}, {0x0042,0x0331,0x1e06}, {0x1b3f,0x1b35,0x1b41}, {0x0427,0x0308,0x04f4}, {0x01a0,0x0303,0x1ee0},
+ {0x0068,0x030c,0x021f}, {0x0418,0x0300,0x040d}, {0x1f63,0x0345,0x1fa3}, {0x305f,0x3099,0x3060}, {0x00fc,0x0304,0x01d6}, {0x0065,0x030f,0x0205}, {0x1f09,0x0342,0x1f0f}, {0x0059,0x0304,0x0232},
+ {0x1f06,0x0345,0x1f86}, {0x0410,0x0308,0x04d2}, {0x0055,0x0308,0x00dc}, {0x0075,0x032d,0x1e77}, {0x0c46,0x0c56,0x0c48}, {0x0068,0x0323,0x1e25}, {0x0229,0x0306,0x1e1d}, {0x0045,0x0301,0x00c9},
+ {0x0456,0x0308,0x0457}, {0x03bf,0x0313,0x1f40}, {0x0064,0x0327,0x1e11}, {0x1b11,0x1b35,0x1b12}, {0x1f38,0x0342,0x1f3e}, {0x0443,0x0304,0x04ef}, {0x2243,0x0338,0x2244}, {0x005a,0x0331,0x1e94},
{0x0075,0x0300,0x00f9}, {0x0073,0x0302,0x015d}, {0x1f21,0x0342,0x1f27}, {0x006f,0x0306,0x014f}, {0x30bd,0x3099,0x30be}, {0x006e,0x0307,0x1e45}, {0x03d2,0x0301,0x03d3}, {0x0391,0x0313,0x1f08},
- {0x01a1,0x0303,0x1ee1}, {0x0069,0x030c,0x01d0}, {0x1f64,0x0345,0x1fa4}, {0x2272,0x0338,0x2274}, {0x003c,0x0338,0x226e}, {0x30a6,0x3099,0x30f4}, {0x1f07,0x0345,0x1f87}, {0x0057,0x0307,0x1e86},
+ {0x01a1,0x0303,0x1ee1}, {0x1f64,0x0345,0x1fa4}, {0x0069,0x030c,0x01d0}, {0x2272,0x0338,0x2274}, {0x003c,0x0338,0x226e}, {0x30a6,0x3099,0x30f4}, {0x1f07,0x0345,0x1f87}, {0x0057,0x0307,0x1e86},
{0x01af,0x0323,0x1ef0}, {0x0627,0x0653,0x0622}, {0x0055,0x0309,0x1ee6}, {0x1f50,0x0342,0x1f56}, {0x00ea,0x0300,0x1ec1}, {0x0069,0x0323,0x1ecb}, {0x0052,0x030c,0x0158}, {0x0072,0x0331,0x1e5f},
- {0x0045,0x0302,0x00ca}, {0x00e6,0x0304,0x01e3}, {0x03bf,0x0314,0x1f41}, {0x1f08,0x0300,0x1f0a}, {0x004f,0x030f,0x020c}, {0x0065,0x0327,0x0229}, {0x0041,0x0306,0x0102}, {0x03a5,0x0300,0x1fea},
+ {0x0045,0x0302,0x00ca}, {0x00e6,0x0304,0x01e3}, {0x0065,0x0327,0x0229}, {0x03bf,0x0314,0x1f41}, {0x1f08,0x0300,0x1f0a}, {0x004f,0x030f,0x020c}, {0x0041,0x0306,0x0102}, {0x03a5,0x0300,0x1fea},
{0x1f39,0x0342,0x1f3f}, {0x30d5,0x3099,0x30d6}, {0x0052,0x0323,0x1e5a}, {0x03a9,0x0313,0x1f68}, {0x004e,0x0327,0x0145}, {0x1f7c,0x0345,0x1ff2}, {0x3078,0x3099,0x3079}, {0x0075,0x0301,0x00fa},
{0x00ca,0x0309,0x1ec2}, {0x006f,0x0307,0x022f}, {0x0045,0x0330,0x1e1a}, {0x0044,0x0331,0x1e0e}, {0x1f68,0x0342,0x1f6e}, {0x0102,0x0300,0x1eb0}, {0x0391,0x0314,0x1f09}, {0x006a,0x030c,0x01f0},
- {0x11131,0x11127,0x1112e}, {0x1f65,0x0345,0x1fa5}, {0x2273,0x0338,0x2275}, {0x3061,0x3099,0x3062}, {0x003d,0x0338,0x2260}, {0x1f20,0x0300,0x1f22}, {0x21d0,0x0338,0x21cd}, {0x0cc6,0x0cd5,0x0cc7},
- {0x0065,0x0311,0x0207}, {0x1f08,0x0345,0x1f88}, {0x0058,0x0307,0x1e8a}, {0x01b0,0x0323,0x1ef1}, {0x0057,0x0308,0x1e84}, {0x0627,0x0654,0x0623}, {0x1f51,0x0342,0x1f57}, {0x0055,0x030a,0x016e},
- {0x03b9,0x0304,0x1fd1}, {0x00ea,0x0301,0x1ebf}, {0x0053,0x030c,0x0160}, {0x0047,0x0301,0x01f4}, {0x03c1,0x0313,0x1fe4}, {0x0045,0x0303,0x1ebc}, {0x1f09,0x0300,0x1f0b}, {0x1f08,0x0301,0x1f0c},
- {0x0065,0x0328,0x0119}, {0x22a2,0x0338,0x22ac}, {0x0041,0x0307,0x0226}, {0x00e2,0x0309,0x1ea9}, {0x03a5,0x0301,0x038e}, {0x00d4,0x0300,0x1ed2}, {0x0443,0x0306,0x045e}, {0x0053,0x0323,0x1e62},
+ {0x1f65,0x0345,0x1fa5}, {0x11131,0x11127,0x1112e}, {0x3061,0x3099,0x3062}, {0x2273,0x0338,0x2275}, {0x1f20,0x0300,0x1f22}, {0x003d,0x0338,0x2260}, {0x21d0,0x0338,0x21cd}, {0x0cc6,0x0cd5,0x0cc7},
+ {0x0065,0x0311,0x0207}, {0x01b0,0x0323,0x1ef1}, {0x0058,0x0307,0x1e8a}, {0x1f08,0x0345,0x1f88}, {0x0057,0x0308,0x1e84}, {0x0627,0x0654,0x0623}, {0x1f51,0x0342,0x1f57}, {0x0055,0x030a,0x016e},
+ {0x00ea,0x0301,0x1ebf}, {0x03b9,0x0304,0x1fd1}, {0x0053,0x030c,0x0160}, {0x0047,0x0301,0x01f4}, {0x03c1,0x0313,0x1fe4}, {0x0045,0x0303,0x1ebc}, {0x1f09,0x0300,0x1f0b}, {0x0065,0x0328,0x0119},
+ {0x1f08,0x0301,0x1f0c}, {0x22a2,0x0338,0x22ac}, {0x00e2,0x0309,0x1ea9}, {0x0041,0x0307,0x0226}, {0x03a5,0x0301,0x038e}, {0x00d4,0x0300,0x1ed2}, {0x0443,0x0306,0x045e}, {0x0053,0x0323,0x1e62},
{0x30d5,0x309a,0x30d7}, {0x2245,0x0338,0x2247}, {0x03a9,0x0314,0x1f69}, {0x0077,0x0300,0x1e81}, {0x0075,0x0302,0x00fb}, {0x3078,0x309a,0x307a}, {0x1f38,0x0300,0x1f3a}, {0x0048,0x032e,0x1e2a},
- {0x00a8,0x0342,0x1fc1}, {0x30bf,0x3099,0x30c0}, {0x0070,0x0307,0x1e57}, {0x1f20,0x0345,0x1f90}, {0x006f,0x0308,0x00f6}, {0x1b42,0x1b35,0x1b43}, {0x1f69,0x0342,0x1f6f}, {0x0103,0x0300,0x1eb1},
- {0x0102,0x0301,0x1eae}, {0x006b,0x030c,0x01e9}, {0x1f66,0x0345,0x1fa6}, {0x11132,0x11127,0x1112f}, {0x041a,0x0301,0x040c}, {0x003e,0x0338,0x226f}, {0x1f21,0x0300,0x1f23}, {0x1f20,0x0301,0x1f24},
- {0x0cc6,0x0cd6,0x0cc8}, {0x0415,0x0306,0x04d6}, {0x1f09,0x0345,0x1f89}, {0x0059,0x0307,0x1e8e}, {0x114b9,0x114ba,0x114bb}, {0x0058,0x0308,0x1e8c}, {0x0627,0x0655,0x0625}, {0x022e,0x0304,0x0230},
- {0x006b,0x0323,0x1e33}, {0x0055,0x030b,0x0170}, {0x0075,0x0330,0x1e75}, {0x0054,0x030c,0x0164}, {0x0049,0x0300,0x00cc}, {0x0074,0x0331,0x1e6f}, {0x304b,0x3099,0x304c}, {0x0047,0x0302,0x011c},
+ {0x30bf,0x3099,0x30c0}, {0x00a8,0x0342,0x1fc1}, {0x0070,0x0307,0x1e57}, {0x1f20,0x0345,0x1f90}, {0x1b42,0x1b35,0x1b43}, {0x006f,0x0308,0x00f6}, {0x1f69,0x0342,0x1f6f}, {0x0103,0x0300,0x1eb1},
+ {0x0102,0x0301,0x1eae}, {0x1f66,0x0345,0x1fa6}, {0x006b,0x030c,0x01e9}, {0x11132,0x11127,0x1112f}, {0x041a,0x0301,0x040c}, {0x1f21,0x0300,0x1f23}, {0x003e,0x0338,0x226f}, {0x1f20,0x0301,0x1f24},
+ {0x0cc6,0x0cd6,0x0cc8}, {0x0415,0x0306,0x04d6}, {0x1f09,0x0345,0x1f89}, {0x0059,0x0307,0x1e8e}, {0x114b9,0x114ba,0x114bb}, {0x0058,0x0308,0x1e8c}, {0x022e,0x0304,0x0230}, {0x0627,0x0655,0x0625},
+ {0x006b,0x0323,0x1e33}, {0x0055,0x030b,0x0170}, {0x0054,0x030c,0x0164}, {0x0075,0x0330,0x1e75}, {0x0049,0x0300,0x00cc}, {0x0074,0x0331,0x1e6f}, {0x304b,0x3099,0x304c}, {0x0047,0x0302,0x011c},
{0x0067,0x0327,0x0123}, {0x03c1,0x0314,0x1fe5}, {0x0045,0x0304,0x0112}, {0x1f09,0x0301,0x1f0d}, {0x004f,0x0311,0x020e}, {0x0042,0x0307,0x1e02}, {0x0041,0x0308,0x00c4}, {0x1f50,0x0300,0x1f52},
- {0x0054,0x0323,0x1e6c}, {0x00d4,0x0301,0x1ed0}, {0x004f,0x0328,0x01ea}, {0x0077,0x0301,0x1e83}, {0x0075,0x0303,0x0169}, {0x1f39,0x0300,0x1f3b}, {0x1f38,0x0301,0x1f3c}, {0x1f21,0x0345,0x1f91},
+ {0x00d4,0x0301,0x1ed0}, {0x0054,0x0323,0x1e6c}, {0x004f,0x0328,0x01ea}, {0x0077,0x0301,0x1e83}, {0x0075,0x0303,0x0169}, {0x1f39,0x0300,0x1f3b}, {0x1f38,0x0301,0x1f3c}, {0x1f21,0x0345,0x1f91},
{0x042b,0x0308,0x04f8}, {0x006f,0x0309,0x1ecf}, {0x015a,0x0307,0x1e64}, {0x0103,0x0301,0x1eaf}, {0x0061,0x0300,0x00e0}, {0x1f67,0x0345,0x1fa7}, {0x006c,0x030c,0x013e}, {0x0930,0x093c,0x0931},
- {0x0069,0x030f,0x0209}, {0x21d2,0x0338,0x21cf}, {0x1f21,0x0301,0x1f25}, {0x0418,0x0304,0x04e2}, {0x0416,0x0306,0x04c1}, {0x005a,0x0307,0x017b}, {0x03bf,0x0300,0x1f78}, {0x1f0a,0x0345,0x1f8a},
- {0x0059,0x0308,0x0178}, {0x1f68,0x0300,0x1f6a}, {0x022f,0x0304,0x0231}, {0x30ef,0x3099,0x30f7}, {0x006c,0x0323,0x1e37}, {0x0055,0x030c,0x01d3}, {0x0049,0x0301,0x00cd}, {0x00ea,0x0303,0x1ec5},
- {0x03b9,0x0306,0x1fd0}, {0x0048,0x0302,0x0124}, {0x0068,0x0327,0x1e29}, {0x0052,0x030f,0x0210}, {0x0043,0x0307,0x010a}, {0x1f51,0x0300,0x1f53}, {0x0041,0x0309,0x1ea2}, {0x1f50,0x0301,0x1f54},
- {0x30d8,0x3099,0x30d9}, {0x00d5,0x0301,0x1e4c}, {0x0055,0x0323,0x1ee4}, {0x0443,0x0308,0x04f1}, {0x0079,0x0300,0x1ef3}, {0x307b,0x3099,0x307c}, {0x0433,0x0301,0x0453}, {0x0d46,0x0d3e,0x0d4a},
- {0x0077,0x0302,0x0175}, {0x0391,0x0300,0x1fba}, {0x0076,0x0303,0x1e7d}, {0x0075,0x0304,0x016b}, {0x1f39,0x0301,0x1f3d}, {0x30c1,0x3099,0x30c2}, {0x1f22,0x0345,0x1f92}, {0x0072,0x0307,0x1e59},
- {0x0395,0x0313,0x1f18}, {0x015b,0x0307,0x1e65}, {0x1f68,0x0345,0x1fa8}, {0x0061,0x0301,0x00e1}, {0x0102,0x0303,0x1eb4}, {0x3064,0x3099,0x3065}, {0x2276,0x0338,0x2278}, {0x0075,0x031b,0x01b0},
- {0x1e36,0x0304,0x1e38}, {0x00a8,0x0300,0x1fed}, {0x01ea,0x0304,0x01ec}, {0x1f0b,0x0345,0x1f8b}, {0x0415,0x0308,0x0401}, {0x1f69,0x0300,0x1f6b}, {0x03bf,0x0301,0x03cc}, {0x0059,0x0309,0x1ef6},
+ {0x21d2,0x0338,0x21cf}, {0x0069,0x030f,0x0209}, {0x0418,0x0304,0x04e2}, {0x1f21,0x0301,0x1f25}, {0x0416,0x0306,0x04c1}, {0x1f0a,0x0345,0x1f8a}, {0x03bf,0x0300,0x1f78}, {0x005a,0x0307,0x017b},
+ {0x1f68,0x0300,0x1f6a}, {0x0059,0x0308,0x0178}, {0x022f,0x0304,0x0231}, {0x30ef,0x3099,0x30f7}, {0x006c,0x0323,0x1e37}, {0x0055,0x030c,0x01d3}, {0x03b9,0x0306,0x1fd0}, {0x0049,0x0301,0x00cd},
+ {0x00ea,0x0303,0x1ec5}, {0x0048,0x0302,0x0124}, {0x0052,0x030f,0x0210}, {0x0068,0x0327,0x1e29}, {0x0043,0x0307,0x010a}, {0x1f51,0x0300,0x1f53}, {0x1f50,0x0301,0x1f54}, {0x0041,0x0309,0x1ea2},
+ {0x00d5,0x0301,0x1e4c}, {0x30d8,0x3099,0x30d9}, {0x0055,0x0323,0x1ee4}, {0x0443,0x0308,0x04f1}, {0x0079,0x0300,0x1ef3}, {0x0d46,0x0d3e,0x0d4a}, {0x307b,0x3099,0x307c}, {0x0433,0x0301,0x0453},
+ {0x0391,0x0300,0x1fba}, {0x0077,0x0302,0x0175}, {0x0076,0x0303,0x1e7d}, {0x1f39,0x0301,0x1f3d}, {0x0075,0x0304,0x016b}, {0x30c1,0x3099,0x30c2}, {0x1f22,0x0345,0x1f92}, {0x0072,0x0307,0x1e59},
+ {0x0395,0x0313,0x1f18}, {0x015b,0x0307,0x1e65}, {0x1f68,0x0345,0x1fa8}, {0x2276,0x0338,0x2278}, {0x3064,0x3099,0x3065}, {0x0102,0x0303,0x1eb4}, {0x0061,0x0301,0x00e1}, {0x0075,0x031b,0x01b0},
+ {0x1e36,0x0304,0x1e38}, {0x00a8,0x0300,0x1fed}, {0x01ea,0x0304,0x01ec}, {0x1f0b,0x0345,0x1f8b}, {0x1f69,0x0300,0x1f6b}, {0x03bf,0x0301,0x03cc}, {0x0415,0x0308,0x0401}, {0x0059,0x0309,0x1ef6},
{0x1f68,0x0301,0x1f6c}, {0x006d,0x0323,0x1e43}, {0x30f0,0x3099,0x30f8}, {0x304d,0x3099,0x304e}, {0x0049,0x0302,0x00ce}
};
@@ -9066,6 +9066,8 @@ static const char32_t ccc_starting_pagetab[]={
#define canonical_mult1 41
#define canonical_mult2 42
+#ifndef exclusion_table
+
static uint16_t canonical_compositions_lookup[]={
0,
1,
@@ -10023,3 +10025,4 @@ static uint16_t canonical_compositions_lookup[]={
940,
941
};
+#endif
diff --git a/unicode/normalizetest.C b/unicode/normalizetest.C
index da827d7..c4ecc3f 100644
--- a/unicode/normalizetest.C
+++ b/unicode/normalizetest.C
@@ -91,12 +91,12 @@ void testdecompose()
for (i=0; (before[i]=t.before[i]) != 0; ++i)
;
- struct unicode_decompose_info info;
+ unicode_decomposition_t info;
- unicode_decompose_info_init(&info, before, (size_t)-1, NULL);
+ unicode_decomposition_init(&info, before, (size_t)-1, NULL);
info.decompose_flags=t.flags;
unicode_decompose(&info);
- unicode_decompose_info_deinit(&info);
+ unicode_decomposition_deinit(&info);
if (info.string[info.string_size] != 0)
{
@@ -195,20 +195,6 @@ void testtablookup()
}
-static int unicode_exclude(char32_t ch)
-{
- return unicode_tab_lookup(ch,
- exclusion_starting_indextab,
- exclusion_starting_pagetab,
- sizeof(exclusion_starting_indextab)/
- sizeof(exclusion_starting_indextab[0]),
- exclusion_rangetab,
- sizeof(exclusion_rangetab)/
- sizeof(exclusion_rangetab[0]),
- NULL,
- 0);
-}
-
void testcompose1()
{
for (size_t i=0; i<sizeof(canonical_compositions)/
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index b4e3f2c..ac4f5bc 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -2818,3 +2818,75 @@ struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *str,
(0, n,
get_enum_bidi_type_for_embedding_paragraph_level, &info);
}
+
+void unicode_bidi_combinings(const char32_t *str,
+ const unicode_bidi_level_t *levels,
+ size_t n,
+ void (*combinings)(unicode_bidi_level_t level,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars,
+ void *arg),
+ void *arg)
+{
+ size_t level_start=0;
+
+ while (level_start < n)
+ {
+ size_t level_end;
+ size_t comb_start;
+ size_t comb_end;
+
+ // Find the end of this level
+
+ for (level_end=level_start; ++level_end<n; )
+ {
+ if (levels && (levels[level_end] !=
+ levels[level_start]))
+ break;
+ }
+
+ // Now sweep from level_start to level_end.
+
+ for (comb_start=level_start; comb_start < level_end; )
+ {
+ // Search for a non-0 ccc
+
+ if (unicode_ccc(str[comb_start]) == 0)
+ {
+ ++comb_start;
+ continue;
+ }
+
+ // Now, search for the next ccc of 0, stopping at
+ // level_end
+
+ for (comb_end=comb_start; ++comb_end < level_end; )
+ {
+ if (unicode_ccc(str[comb_end]) == 0)
+ break;
+ }
+
+ // Report this
+ (*combinings)((levels ? levels[level_start]
+ : 0), level_start,
+ level_end-level_start,
+ comb_start,
+ comb_end-comb_start, arg);
+
+ // If we're here before the level_end we must
+ // have reached the next starter. So, on the next
+ // iteration we want to start with the following
+ // character. So, if the callback reversed the
+ // combinings and the following starter the
+ // next character will now be a composition, so
+ // we can skip it.
+
+ if (comb_end < level_end)
+ ++comb_end;
+ comb_start=comb_end;
+ }
+ level_start=level_end;
+ }
+}
diff --git a/unicode/unicode_normalization.c b/unicode/unicode_normalization.c
index ea9e256..93e691f 100644
--- a/unicode/unicode_normalization.c
+++ b/unicode/unicode_normalization.c
@@ -51,7 +51,7 @@ static int unicode_nfkc_qc(char32_t ch)
** Lookup a character's canonical combining class.
*/
-static uint8_t unicode_ccc(char32_t ch)
+uint8_t unicode_ccc(char32_t ch)
{
return unicode_tab_lookup(ch,
ccc_starting_indextab,
@@ -113,8 +113,8 @@ unicode_canonical_t unicode_canonical(char32_t ch)
** decomposed.
*/
-static void search_for_decompose(struct unicode_decompose_info *info,
- void (*f)(struct unicode_decompose_info *,
+static void search_for_decompose(unicode_decomposition_t *info,
+ void (*f)(unicode_decomposition_t *,
size_t,
const struct decomposition_info *,
void *),
@@ -182,7 +182,7 @@ struct decompose_meta {
/* Pass 1: count the number of characters to decompose. */
-static void decompose_meta_count(struct unicode_decompose_info *info,
+static void decompose_meta_count(unicode_decomposition_t *info,
size_t i,
const struct decomposition_info *cinfo,
void *arg)
@@ -194,7 +194,7 @@ static void decompose_meta_count(struct unicode_decompose_info *info,
/* Pass 2: compile a list of characters to decompose. */
-static void decompose_meta_save(struct unicode_decompose_info *info,
+static void decompose_meta_save(unicode_decomposition_t *info,
size_t i,
const struct decomposition_info *cinfo,
void *arg)
@@ -208,7 +208,7 @@ static void decompose_meta_save(struct unicode_decompose_info *info,
++ptr->nchars;
}
-size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
+size_t unicode_decompose_reallocate_size(unicode_decomposition_t *info,
const size_t *sizes,
size_t n)
{
@@ -221,7 +221,7 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
return new_size;
}
-static int unicode_decompose_reallocate(struct unicode_decompose_info *info,
+static int unicode_decompose_reallocate(unicode_decomposition_t *info,
const size_t *offsets,
const size_t *sizes,
size_t n)
@@ -240,10 +240,10 @@ static int unicode_decompose_reallocate(struct unicode_decompose_info *info,
return 0;
}
-void unicode_decompose_info_init(struct unicode_decompose_info *info,
- char32_t *string,
- size_t string_size,
- void *arg)
+void unicode_decomposition_init(unicode_decomposition_t *info,
+ char32_t *string,
+ size_t string_size,
+ void *arg)
{
memset(info, 0, sizeof(*info));
@@ -259,11 +259,11 @@ void unicode_decompose_info_init(struct unicode_decompose_info *info,
info->arg=arg;
}
-void unicode_decompose_info_deinit(struct unicode_decompose_info *info)
+void unicode_decomposition_deinit(unicode_decomposition_t *info)
{
}
-int unicode_decompose(struct unicode_decompose_info *info)
+int unicode_decompose(unicode_decomposition_t *info)
{
int replaced;
int rc=0;
@@ -450,6 +450,14 @@ static char32_t lookup_composition(char32_t a, char32_t b)
return 0;
}
+/* Temporary linked list, until all compositions get built. */
+
+struct unicode_compose_info_list {
+ struct unicode_compose_info_list *next;
+ struct unicode_compose_info *info;
+};
+
+
/*
** Collect consecutive sequence of composable characters. We cache each
** character's composition level.
@@ -525,15 +533,15 @@ static int unicode_composition_init2(const char32_t *string,
size_t string_size,
int flags,
struct chars_and_levels *clptr,
- struct unicode_compositions ***tail_ptr);
+ struct unicode_compose_info_list ***tail_ptr);
int unicode_composition_init(const char32_t *string,
size_t string_size,
int flags,
- struct unicode_compositions **ret)
+ unicode_composition_t *info)
{
/*
- ** Initialize a singly-linked unicode_compositions_list.
+ ** Initialize a singly-linked unicode_compose_info_list_list.
**
** Initialize the tail pointer. We'll be adding onto the tail pointer
** as we find each composition.
@@ -541,11 +549,14 @@ int unicode_composition_init(const char32_t *string,
** Initialize the chars_and_levels buffer.
*/
- struct unicode_compositions *list=NULL;
- struct unicode_compositions **tail=&list;
+ struct unicode_compose_info_list *list=NULL;
+ struct unicode_compose_info_list **tail=&list;
struct chars_and_levels cl;
int c;
+ info->n_compositions=0;
+ info->compositions=0;
+
if (chars_and_levels_init(&cl))
return -1;
@@ -561,13 +572,51 @@ int unicode_composition_init(const char32_t *string,
&cl, &tail);
chars_and_levels_deinit(&cl);
+ if (c == 0)
+ {
+ struct unicode_compose_info_list *ptr;
+
+ info->n_compositions=0;
+
+ for (ptr=list; ptr; ptr=ptr->next)
+ ++info->n_compositions;
+
+ if ((info->compositions=(struct unicode_compose_info **)
+ malloc(sizeof(struct unicode_composition_info *)
+ * (info->n_compositions+1))) == NULL)
+ {
+ c= -1;
+ info->n_compositions=0;
+ }
+ }
+
+ if (c == 0)
+ {
+ struct unicode_compose_info_list *ptr;
+ size_t i=0;
+
+ while (list)
+ {
+ ptr=list->next;
+ info->compositions[i++]=list->info;
+ free(list);
+ list=ptr;
+ }
+ info->compositions[i]=NULL;
+ }
+
if (c)
{
- unicode_composition_deinit(list);
- list=NULL;
+ while (list)
+ {
+ struct unicode_compose_info_list *next=list->next;
+
+ free(list->info);
+ free(list);
+ list=next;
+ }
}
- *ret=list;
return c;
}
@@ -575,22 +624,22 @@ static int compose_chars_and_levels(const char32_t *starterptr,
size_t starter_index,
int flags,
struct chars_and_levels *clptr,
- struct unicode_compositions
+ struct unicode_compose_info_list
**last_compositionptr,
- struct unicode_compositions ***tail_ptr);
+ struct unicode_compose_info_list ***tail_ptr);
static int create_new_composition(size_t starter_index,
size_t n_combining_marks,
- struct unicode_compositions **ptr);
+ struct unicode_compose_info_list **ptr);
static int unicode_composition_init2(const char32_t *string,
size_t string_size,
int flags,
struct chars_and_levels *clptr,
- struct unicode_compositions ***tail_ptr)
+ struct unicode_compose_info_list ***tail_ptr)
{
size_t i;
- struct unicode_compositions *last_composition=NULL;
+ struct unicode_compose_info_list *last_composition=NULL;
/*
** Here we consecutively scan the string and look up each character's
@@ -629,13 +678,13 @@ static int unicode_composition_init2(const char32_t *string,
if (starterptr &&
/* Did we just compose this starter? */
last_composition &&
- last_composition->index == starter_index &&
+ last_composition->info->index == starter_index &&
/*
** Did we compose everything, didn't leave
** any combined marks behind?
*/
- last_composition->n_composition == 1)
+ last_composition->info->n_composition == 1)
{
/*
** So, check if we can combine with that
@@ -643,7 +692,7 @@ static int unicode_composition_init2(const char32_t *string,
** original starter, the new one is here.
*/
new_char=lookup_composition
- (last_composition->composition[0],
+ (last_composition->info->composition[0],
string[i]);
if (new_char != 0)
@@ -651,7 +700,7 @@ static int unicode_composition_init2(const char32_t *string,
/*
** Just update the composed char.
*/
- last_composition->composition[0]=
+ last_composition->info->composition[0]=
new_char;
/*
@@ -659,7 +708,7 @@ static int unicode_composition_init2(const char32_t *string,
** This nukes this starter, as if
** it was a part of the composition!
*/
- ++last_composition->n_composed;
+ ++last_composition->info->n_composed;
continue;
}
}
@@ -679,7 +728,7 @@ static int unicode_composition_init2(const char32_t *string,
** from two starters here.
*/
- struct unicode_compositions *new_composition;
+ struct unicode_compose_info_list *new_composition;
if (create_new_composition(starter_index,
1, &new_composition))
@@ -689,9 +738,9 @@ static int unicode_composition_init2(const char32_t *string,
**tail_ptr=new_composition;
*tail_ptr= &new_composition->next;
- new_composition->n_composed=2;
- new_composition->n_composition=1;
- new_composition->composition[0]=new_char;
+ new_composition->info->n_composed=2;
+ new_composition->info->n_composition=1;
+ new_composition->info->composition[0]=new_char;
continue;
}
/*
@@ -739,26 +788,30 @@ static int compare_levels(const void *a, const void *b)
static int create_new_composition(size_t starter_index,
size_t n_combining_marks,
- struct unicode_compositions **ptr)
+ struct unicode_compose_info_list **ptr)
{
- struct unicode_compositions *c=
- (struct unicode_compositions *)
- malloc(sizeof(struct unicode_compositions));
+ struct unicode_compose_info_list *c=
+ (struct unicode_compose_info_list *)
+ malloc(sizeof(struct unicode_compose_info_list));
if (!c)
return -1;
- c->index=starter_index;
- c->next=NULL;
+ c->info=malloc(sizeof(struct unicode_compose_info)+
+ sizeof(char32_t) * n_combining_marks);
- /* Worst case: nothing is composed */
-
- if ((c->composition=malloc(sizeof(char32_t) *
- n_combining_marks)) == NULL)
+ if (!c->info)
{
free(c);
return -1;
}
+
+ c->info->index=starter_index;
+ c->info->composition=(char32_t *)(c->info+1);
+ c->next=NULL;
+
+ /* Worst case: nothing is composed */
+
*ptr=c;
return 0;
}
@@ -767,11 +820,11 @@ static int compose_chars_and_levels(const char32_t *starterptr,
size_t starter_index,
int flags,
struct chars_and_levels *clptr,
- struct unicode_compositions
+ struct unicode_compose_info_list
**last_compositionptr,
- struct unicode_compositions ***tail_ptr)
+ struct unicode_compose_info_list ***tail_ptr)
{
- struct unicode_compositions *new_composition;
+ struct unicode_compose_info_list *new_composition;
char32_t starter=0;
size_t i;
int composed;
@@ -826,9 +879,9 @@ static int compose_chars_and_levels(const char32_t *starterptr,
{
size_t j;
- new_composition->n_composed=clptr->size+1;
+ new_composition->info->n_composed=clptr->size+1;
- new_composition->composition[0]=starter;
+ new_composition->info->composition[0]=starter;
i=1;
if (!(flags & UNICODE_COMPOSE_FLAG_REMOVEUNUSED))
@@ -841,12 +894,12 @@ static int compose_chars_and_levels(const char32_t *starterptr,
*/
if (clptr->ptr[j].level)
{
- new_composition->composition[i++]=
+ new_composition->info->composition[i++]=
clptr->ptr[j].ch;
}
}
}
- new_composition->n_composition=i;
+ new_composition->info->n_composition=i;
} else if (!starterptr && (flags & UNICODE_COMPOSE_FLAG_REMOVEUNUSED))
{
/*
@@ -855,8 +908,8 @@ static int compose_chars_and_levels(const char32_t *starterptr,
** new_composition.
*/
- new_composition->n_composed=clptr->size;
- new_composition->n_composition=0;
+ new_composition->info->n_composed=clptr->size;
+ new_composition->info->n_composition=0;
composed=1;
}
@@ -868,7 +921,7 @@ static int compose_chars_and_levels(const char32_t *starterptr,
}
else
{
- free(new_composition->composition);
+ free(new_composition->info);
free(new_composition);
new_composition=NULL;
}
@@ -877,37 +930,39 @@ static int compose_chars_and_levels(const char32_t *starterptr,
return 0;
}
-void unicode_composition_deinit(struct unicode_compositions *ptr)
+void unicode_composition_deinit(unicode_composition_t *info)
{
- while (ptr)
- {
- struct unicode_compositions *next=ptr->next;
+ size_t i;
- if (ptr->composition)
- free(ptr->composition);
- free(ptr);
- ptr=next;
- }
+ for (i=0; i<info->n_compositions; ++i)
+ free(info->compositions[i]);
+
+ if (info->compositions)
+ free(info->compositions);
+ info->compositions=0;
+ info->n_compositions=0;
}
size_t unicode_composition_apply(char32_t *string,
size_t string_size,
- struct unicode_compositions *compositions)
+ unicode_composition_t *info)
{
size_t j=0;
size_t i;
+ size_t c_index=0;
for (i=0; i<string_size; )
{
- if (compositions && compositions->index == i)
+ if (c_index < info->n_compositions &&
+ info->compositions[c_index]->index == i)
{
size_t k;
+ struct unicode_compose_info *compose=
+ info->compositions[c_index++];
- for (k=0; k<compositions->n_composition; ++k)
- string[j++]=compositions->composition[k];
- i += compositions->n_composed;
-
- compositions=compositions->next;
+ for (k=0; k<compose->n_composition; ++k)
+ string[j++]=compose->composition[k];
+ i += compose->n_composed;
}
else
{
@@ -925,14 +980,14 @@ int unicode_compose(char32_t *string,
int flags,
size_t *new_size)
{
- struct unicode_compositions *composes;
+ unicode_composition_t info;
- if (unicode_composition_init(string, string_size, flags, &composes))
+ if (unicode_composition_init(string, string_size, flags, &info))
return -1;
- *new_size=unicode_composition_apply(string, string_size, composes);
+ *new_size=unicode_composition_apply(string, string_size, &info);
- unicode_composition_deinit(composes);
+ unicode_composition_deinit(&info);
return 0;
}
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index e030c33..21ed2c3 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -11,6 +11,46 @@
#include <exception>
#include <new>
+
+namespace {
+#if 0
+}
+#endif
+
+template<typename callable>
+struct cb_wrapper {
+
+ const std::function<callable> &cb;
+ std::exception_ptr caught;
+
+ cb_wrapper(const std::function<callable> &cb) : cb{cb}
+ {
+ }
+
+ template<typename ...Args> void operator()(Args && ...args)
+ {
+ if (caught)
+ return;
+ try {
+ cb(std::forward<Args>(args)...);
+ } catch (...)
+ {
+ caught=std::current_exception();
+ }
+ }
+
+ void rethrow()
+ {
+ if (caught)
+ std::rethrow_exception(caught);
+ }
+};
+#if 0
+{
+#endif
+}
+
+
extern "C" {
static int iconv_trampoline(const char *str, size_t cnt, void *arg)
@@ -638,44 +678,6 @@ unicode::bidi_calc(const bidi_calc_types &st,
return ret;
}
-namespace {
-#if 0
-}
-#endif
-template<typename callable>
-struct cb_wrapper {
-
- const std::function<callable> &cb;
- std::exception_ptr caught;
-
- cb_wrapper(const std::function<callable> &cb) : cb{cb}
- {
- }
-
- template<typename ...Args> void operator()(Args && ...args)
- {
- if (caught)
- return;
- try {
- cb(std::forward<Args>(args)...);
- } catch (...)
- {
- caught=std::current_exception();
- }
- }
-
- void rethrow()
- {
- if (caught)
- std::rethrow_exception(caught);
- }
-};
-#if 0
-{
-#endif
-}
-
-
extern "C" {
static void reorder_callback(size_t i, size_t cnt,
void *arg)
@@ -991,6 +993,66 @@ std::u32string unicode::bidi_override(const std::u32string &s,
return ret;
}
+typedef void bidi_combinings_callback_t(unicode_bidi_level_t,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars);
+
+extern "C" {
+ static void bidi_combinings_trampoline(unicode_bidi_level_t level,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars,
+ void *arg)
+ {
+ (*reinterpret_cast<cb_wrapper<bidi_combinings_callback_t> *>
+ (arg))(level, level_start, n_chars, comb_start, n_comb_chars);
+ }
+};
+
+void unicode::bidi_combinings(const std::u32string &string,
+ const std::vector<unicode_bidi_level_t> &levels,
+ const std::function<void (unicode_bidi_level_t,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars)>
+ &callback)
+{
+ if (string.size() != levels.size() || string.empty())
+ return;
+
+ cb_wrapper<bidi_combinings_callback_t> cb{callback};
+
+ unicode_bidi_combinings(&string[0], &levels[0],
+ string.size(),
+ bidi_combinings_trampoline,
+ &cb);
+ cb.rethrow();
+}
+
+void unicode::bidi_combinings(const std::u32string &string,
+ const std::function<void (unicode_bidi_level_t,
+ size_t level_start,
+ size_t n_chars,
+ size_t comb_start,
+ size_t n_comb_chars)>
+ &callback)
+{
+ if (string.empty())
+ return;
+
+ cb_wrapper<bidi_combinings_callback_t> cb{callback};
+
+ unicode_bidi_combinings(&string[0], nullptr,
+ string.size(),
+ bidi_combinings_trampoline,
+ &cb);
+ cb.rethrow();
+}
+
void unicode::decompose_default_reallocate(std::u32string &s,
const std::vector<std::tuple<size_t,
size_t>> &v)
@@ -1012,7 +1074,7 @@ namespace {
size_t>>)> &resizes;
std::exception_ptr caught;
- void do_reallocate(struct unicode_decompose_info *info,
+ void do_reallocate(unicode_decomposition_t *info,
const size_t *offsets,
const size_t *sizes,
size_t n)
@@ -1035,7 +1097,7 @@ namespace {
extern "C" {
- static int decompose_reallocate(struct unicode_decompose_info *info,
+ static int decompose_reallocate(unicode_decomposition_t *info,
const size_t *offsets,
const size_t *sizes,
size_t n)
@@ -1066,13 +1128,13 @@ void unicode::decompose(std::u32string &s,
decompose_info info={s, resizes};
- unicode_decompose_info uinfo;
+ unicode_decomposition_t uinfo;
- unicode_decompose_info_init(&uinfo, &s[0], s.size(), &info);
+ unicode_decomposition_init(&uinfo, &s[0], s.size(), &info);
uinfo.decompose_flags=decompose_flags;
uinfo.reallocate=decompose_reallocate;
int rc=unicode_decompose(&uinfo);
- unicode_decompose_info_deinit(&uinfo);
+ unicode_decomposition_deinit(&uinfo);
if (info.caught)
std::rethrow_exception(info.caught);
@@ -1082,25 +1144,24 @@ void unicode::decompose(std::u32string &s,
throw std::bad_alloc();
}
-void unicode::compose_default_callback(size_t, size_t, const char32_t *, size_t)
+void unicode::compose_default_callback(unicode_composition_t &)
{
}
namespace {
struct comps_raii {
- struct unicode_compositions *comps;
+ unicode_composition_t comps;
~comps_raii()
{
- unicode_composition_deinit(comps);
+ unicode_composition_deinit(&comps);
}
};
};
void unicode::compose(std::u32string &s,
int flags,
- const std::function<void (size_t, size_t,
- const char32_t *, size_t)> &cb)
+ const std::function<void (unicode_composition_t &)> &cb)
{
if (s.empty())
return;
@@ -1109,15 +1170,10 @@ void unicode::compose(std::u32string &s,
if (unicode_composition_init(&s[0], s.size(), flags, &comps.comps))
{
- comps.comps=nullptr;
throw std::bad_alloc(); /* The only reason */
}
- for (auto ptr=comps.comps; ptr; ptr=ptr->next)
- {
- cb(ptr->index, ptr->n_composed,
- ptr->composition, ptr->n_composition);
- }
+ cb(comps.comps);
- s.resize(unicode_composition_apply(&s[0], s.size(), comps.comps));
+ s.resize(unicode_composition_apply(&s[0], s.size(), &comps.comps));
}