summaryrefslogtreecommitdiffstats
path: root/unicode
diff options
context:
space:
mode:
authorSam Varshavchik2020-11-30 23:36:40 -0500
committerSam Varshavchik2020-11-30 23:44:59 -0500
commit521f14cdadc891575b4599bd8ceb92a1dd41615a (patch)
treea7a3178ee43de2babbb1de513d7f6810c02d08a6 /unicode
parent844f6a9ef755c1c5826c9583b364af08b54a4dcc (diff)
downloadcourier-libs-521f14cdadc891575b4599bd8ceb92a1dd41615a.tar.bz2
Break up bidi_calc into bidi_calc_types and bidi_calc_levels.
Diffstat (limited to 'unicode')
-rw-r--r--unicode/Makefile.am5
-rw-r--r--unicode/book.xml188
-rw-r--r--unicode/courier-unicode.h.in46
-rw-r--r--unicode/unicode_bidi.c85
-rw-r--r--unicode/unicodecpp.C31
5 files changed, 292 insertions, 63 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index dbc71aa..25b0719 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -91,6 +91,7 @@ man_MANS= \
$(srcdir)/man/courier-unicode.7 \
$(srcdir)/man/unicode\:\:bidi.3 \
$(srcdir)/man/unicode\:\:bidi_calc.3 \
+ $(srcdir)/man/unicode\:\:bidi_calc_types.3 \
$(srcdir)/man/unicode\:\:bidi_cleanup.3 \
$(srcdir)/man/unicode\:\:bidi_embed.3 \
$(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \
@@ -115,6 +116,8 @@ man_MANS= \
$(srcdir)/man/unicode_bidi.3 \
$(srcdir)/man/unicode_bidi_bracket_type.3 \
$(srcdir)/man/unicode_bidi_calc.3 \
+ $(srcdir)/man/unicode_bidi_calc_levels.3 \
+ $(srcdir)/man/unicode_bidi_calc_types.3 \
$(srcdir)/man/unicode_bidi_cleanup.3 \
$(srcdir)/man/unicode_bidi_embed.3 \
$(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \
@@ -432,7 +435,7 @@ docs.stamp:
rm -rf man.tmp
mkdir man.tmp
d=`cd $(srcdir); pwd`; cd man.tmp; xsltproc --nonet --xinclude \
- http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl\
+ http://cdn.docbook.org/release/xsl-nons/current//manpages/docbook.xsl\
$$d/book.xml
mkdir -p man
rm -f man/*.[123456789]
diff --git a/unicode/book.xml b/unicode/book.xml
index b0342ea..ad96d82 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -301,6 +301,8 @@ See COPYING for distribution information.
<refnamediv>
<refname>unicode_bidi</refname>
+ <refname>unicode_bidi_calc_levels</refname>
+ <refname>unicode_bidi_calc_types</refname>
<refname>unicode_bidi_calc</refname>
<refname>unicode_bidi_reorder</refname>
<refname>unicode_bidi_cleanup</refname>
@@ -318,6 +320,23 @@ See COPYING for distribution information.
<refsynopsisdiv>
<funcsynopsis>
<funcsynopsisinfo>#include &lt;courier-unicode.h&gt;&#10;&#10;unicode_bidi_level_t lr=UNICODE_BIDI_LR;</funcsynopsisinfo>
+
+ <funcprototype>
+ <funcdef>void <function>unicode_bidi_calc_types</function></funcdef>
+ <paramdef>const char32_t *<parameter>p</parameter></paramdef>
+ <paramdef>size_t <parameter>n</parameter></paramdef>
+ <paramdef>unicode_bidi_type_t *<parameter>types</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
+ <funcdef>void <function>unicode_bidi_calc_levels</function></funcdef>
+ <paramdef>const char32_t *<parameter>p</parameter></paramdef>
+ <paramdef>const unicode_bidi_type_t *<parameter>types</parameter></paramdef>
+ <paramdef>size_t <parameter>n</parameter></paramdef>
+ <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef>
+ <paramdef>const unicode_bidi_level_t *<parameter>initial_embedding_level</parameter></paramdef>
+ </funcprototype>
+
<funcprototype>
<funcdef>void <function>unicode_bidi_calc</function></funcdef>
<paramdef>const char32_t *<parameter>p</parameter></paramdef>
@@ -417,19 +436,49 @@ See COPYING for distribution information.
<listitem>
<para>
Allocate an array of
+ <structname>unicode_bidi_type_t</structname> that's the
+ same size as the Unicode string.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Allocate an array of
<structname>unicode_bidi_level_t</structname> that's the
same size as the Unicode string.
</para>
</listitem>
+
<listitem>
<para>
- Use <function>unicode_bidi_calc</function>() to compute
+ Use <function>unicode_bidi_calc_types</function>() to compute
+ the Unicode string's characters' bi-directional types,
+ and populate the
+ <structname>unicode_bidi_type_t</structname> buffer.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Use <function>unicode_bidi_calc_levels</function>() to compute
the Unicode string's characters' bi-directional embedding
level (executes the Bi-Directional algorithm up to and
including step L1). This populates the
<structname>unicode_bidi_level_t</structname> buffer.
</para>
</listitem>
+
+ <listitem>
+ <para>
+ Alternatively: allocate only the
+ <structname>unicode_bidi_level_t</structname> array
+ and use <function>unicode_bidi_calc</function>(), which
+ <function>malloc</function>()s the
+ <structname>unicode_bidi_type_t</structname> buffer,
+ calls <function>unicode_bidi_calc_levels</function>(),
+ and then <function>free</function>()s the buffer.
+ </para>
+ </listitem>
+
<listitem>
<para>
Use <function>unicode_bidi_reorder</function>() to reverse
@@ -451,7 +500,7 @@ See COPYING for distribution information.
<para>
The parameters to
- <function>unicode_bidi_calc</function>() are:
+ <function>unicode_bidi_calc_types</function>() are:
</para>
<itemizedlist>
@@ -468,6 +517,42 @@ See COPYING for distribution information.
<listitem>
<para>
A pointer to an array of
+ <structname>unicode_bidi_type_t</structname> values.
+ The caller is
+ responsible for allocating and deallocating this array,
+ which has the same size as the Unicode string.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The parameters to
+ <function>unicode_bidi_calc_levels</function>() are:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ A pointer to the Unicode string.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ A pointer to the buffer that was passed to
+ <function>unicode_bidi_calc_types</function>().
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Number of characters in the Unicode string and the
+ <structname>unicode_bidi_type_t</structname> buffer.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ A pointer to an array of
<structname>unicode_bidi_level_t</structname> values.
The caller is
responsible for allocating and deallocating this array,
@@ -488,7 +573,18 @@ See COPYING for distribution information.
</itemizedlist>
<para>
- <function>unicode_bidi_calc</function>() fills in the
+ The parameters to <function>unicode_bidi_calc</function>() are
+ the same except for the
+ <structname>unicode_bidi_type_t</structname> pointer.
+ <function>unicode_bidi_calc</function>() allocates this
+ buffer by itself and calls
+ <function>unicode_bidi_calc_types</function>, and
+ destroys the buffer before returning.
+ </para>
+
+ <para>
+ <function>unicode_bidi_calc</function>()
+ and <function>unicode_bidi_calc_levels</function>() fill in the
<structname>unicode_bidi_level_t</structname> array with the
values corresponding to the embedding level of the
corresponding character,
@@ -500,7 +596,9 @@ See COPYING for distribution information.
</para>
<para>
- <function>unicode_bidi_calc</function>() returns the resolved
+ <function>unicode_bidi_calc</function>()
+ and <function>unicode_bidi_calc_levels</function>()
+ return the resolved
paragraph direction level, which
always matches the passed in level, if specified, else it
reports the
@@ -510,7 +608,8 @@ See COPYING for distribution information.
<para>
<function>unicode_bidi_reorder</function>() takes the actual
unicode string together with the embedding values from
- <function>unicode_bidi_calc</function>, then reverses the
+ <function>unicode_bidi_calc</function> or
+ <function>unicode_bidi_calc_levels</function>(), then reverses the
bi-directional string, as specified by step L2 of the bi-directional
algorithm.
The parameters to
@@ -698,7 +797,8 @@ See COPYING for distribution information.
basic,
but the resulting bi-directional string produces the same
canonical rendering order after applying
- <function>unicode_bidi_calc()</function>,
+ <function>unicode_bidi_calc()</function> or
+ <function>unicode_bidi_calc_levels</function>(),
<function>unicode_reorder()</function> and
<function>unicode_bidi_cleanup()</function>
(with the canonical option),
@@ -847,7 +947,9 @@ See COPYING for distribution information.
default paragraph embedding level and returns 0 if it matches.
Otherwise it returns a directional marker that should be
<emphasis>prepended</emphasis> to the Unicode string to allow
- <function>unicode_bidi_calc</function>'s optional paragraph
+ <function>unicode_bidi_calc</function>'s
+ (or <function>unicode_bidi_calc_levels</function>())
+ optional paragraph
embedding level pointer's value to be <literal>NULL</literal>,
but derive the same default embedding level.
The parameters to
@@ -2661,6 +2763,7 @@ See COPYING for distribution information.
<refnamediv>
<refname>unicode::bidi</refname>
<refname>unicode::bidi_calc</refname>
+ <refname>unicode::bidi_calc_types</refname>
<refname>unicode::bidi_reorder</refname>
<refname>unicode::bidi_cleanup</refname>
<refname>unicode::bidi_logical_order</refname>
@@ -2670,16 +2773,31 @@ See COPYING for distribution information.
</refnamediv>
<refsynopsisdiv>
+ <synopsis>#include &lt;courier-unicode.h&gt;</synopsis>
+ <classsynopsis class="class" language="C++">
+ <ooclass>
+ <classname>struct unicode::bidi_calc_types</classname>
+ </ooclass>
+ <constructorsynopsis>
+ <methodname>bidi_calc_types</methodname>
+ <methodparam><modifier>const std::u32string &amp;</modifier><parameter>string</parameter>
+ </methodparam>
+ </constructorsynopsis>
+ <fieldsynopsis>
+ <modifier>std::vector&lt;unicode_bidi_type_t&gt;</modifier>
+ <varname>types</varname>
+ </fieldsynopsis>
+ </classsynopsis>
+
<funcsynopsis>
- <funcsynopsisinfo>#include &lt;courier-unicode.h&gt;</funcsynopsisinfo>
<funcprototype>
<funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
- <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>const unicode::bidi_calc_types &amp;<parameter>ustring</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>std::tuple&lt;std::vector&lt;unicode_bidi_level_t&gt;, unicode_bidi_level_t&gt; <function>unicode::bidi_calc</function></funcdef>
- <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>const unicode::bidi_calc_types &amp;<parameter>ustring</parameter></paramdef>
<paramdef>unicode_bidi_level_t <parameter>embedding_level</parameter></paramdef>
</funcprototype>
@@ -2766,9 +2884,55 @@ See COPYING for distribution information.
<listitem>
<para>
<function>unicode::bidi_calc</function> returns the
- directional embedding value buffer and the paragraph
- embedding level.
+ directional embedding value buffer and the calculated paragraph
+ embedding level. Its <parameter>ustring</parameter>
+ is implicitly converted from a
+ <classname>std::u32string</classname>:
</para>
+ <blockquote>
+ <informalexample>
+ <programlisting><![CDATA[
+std::u32string text;
+
+auto [levels, level]=unicode::bidi_calc(text);
+
+]]></programlisting>
+ </informalexample>
+ </blockquote>
+
+ <para>
+ Alternatively a <classname>unicode::bidi_calc_types</classname>
+ objects gets constructed from the same
+ <classname>std::u32string</classname> and then passed
+ directly to <function>unicode::bidi_calc</function>:
+ </para>
+ <blockquote>
+ <informalexample>
+ <programlisting><![CDATA[
+std::u32string text;
+
+unicode::bidi_calc_types types{text};
+
+// types.types is a std::vector of enum_bidi_types_t values
+
+auto [levels, level]=unicode::bidi_calc(types);
+
+]]></programlisting>
+ </informalexample>
+ </blockquote>
+ <para>
+ This provides the means to access the intermediate
+ <classname>enum_bidi_types_t</classname> values that
+ get calculated from the Unicode text string.
+ </para>
+
+ <note>
+ <para>
+ In all cases the <classname>std::u32string</classname>
+ cannot be a temporary object, and it must remain in scope
+ until <function>unicode::bidi_calc</function>() returns.
+ </para>
+ </note>
</listitem>
<listitem>
<para>
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 3de76d3..f8ab117 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -626,6 +626,16 @@ typedef enum {
extern enum_bidi_type_t unicode_bidi_type(char32_t c);
+extern void unicode_bidi_calc_types(const char32_t *p, size_t n,
+ enum_bidi_type_t *buf);
+
+extern unicode_bidi_level_t unicode_bidi_calc_levels(const char32_t *p,
+ const enum_bidi_type_t
+ *types,
+ size_t n,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t
+ *initial_embedding_level);
/* Bitmask options to unicode_bidi_cleanup */
/*
@@ -2153,13 +2163,45 @@ std::u32string tolower(const std::u32string &u);
std::u32string toupper(const std::u32string &u);
+//! Calculate bidirectional character types
+
+//! Passed as a parameter to bidi_calc(), supplying the string and the
+//! calculated bidirectional types.
+
+struct bidi_calc_types {
+ const std::u32string &s;
+
+ //! Calculated bidirectional types.
+
+ std::vector<enum_bidi_type_t> types;
+
+ //! A reference to an existing std::u32string
+
+ //! bidi_calc_types can be constructed only from a reference to
+ //! an existing std::u32string.
+ bidi_calc_types(const std::u32string &);
+
+ //! Deleted constructor
+
+ //! bidi_calc_types cannot be constructed from a temporary
+ //! std::u32string.
+ bidi_calc_types(std::u32string &&)=delete;
+
+ //! Destructor
+ ~bidi_calc_types();
+};
+
//! Calculate bidirectional embedding levels
//! Returns the bidirectional embedding levels, and the paragraph
//! embedding level.
+//!
+//! The first parameter can be implicitly converted from an existing
+//! std::u32string object. Alternatively a bidi_calc_types helper
+//! can be constructed explicitly, and then passed in directly.
std::tuple<std::vector<unicode_bidi_level_t>,
- unicode_bidi_level_t> bidi_calc(const std::u32string &s);
+ unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s);
//! Calculate bidirectional embedding levels
@@ -2170,7 +2212,7 @@ std::tuple<std::vector<unicode_bidi_level_t>,
//! embedding level.
std::tuple<std::vector<unicode_bidi_level_t>,
- unicode_bidi_level_t> bidi_calc(const std::u32string &s,
+ unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s,
unicode_bidi_level_t level);
//! Reorder bidirectional text
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index cfae12f..cbb11dc 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -467,7 +467,7 @@ typedef struct {
unicode_bidi_level_t paragraph_embedding_level;
const char32_t *chars;
enum_bidi_type_t *classes;
- enum_bidi_type_t *orig_classes;
+ const enum_bidi_type_t *orig_classes;
unicode_bidi_level_t *levels;
size_t size;
int overflow_isolate_count;
@@ -624,7 +624,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p,
static directional_status_stack_t
directional_status_stack_init(const char32_t *chars,
- enum_bidi_type_t *classes, size_t n,
+ const enum_bidi_type_t *classes, size_t n,
unicode_bidi_level_t *levels,
const unicode_bidi_level_t
*initial_embedding_level)
@@ -638,21 +638,21 @@ directional_status_stack_init(const char32_t *chars,
? *initial_embedding_level & 1
: compute_paragraph_embedding_level_from_types(classes, 0, n);
stack->chars=chars;
- stack->classes=classes;
+ stack->orig_classes=classes;
if (n)
{
- classes=(enum_bidi_type_t *)
+ stack->classes=(enum_bidi_type_t *)
malloc(sizeof(enum_bidi_type_t)*n);
- if (!classes)
+ if (!stack->classes)
abort();
- memcpy(classes, stack->classes, sizeof(enum_bidi_type_t)*n);
+ memcpy(stack->classes, stack->orig_classes,
+ sizeof(enum_bidi_type_t)*n);
}
else
{
- classes=0;
+ stack->classes=0;
}
- stack->orig_classes=classes;
stack->levels=levels;
stack->size=n;
@@ -682,19 +682,12 @@ static void directional_status_stack_deinit(directional_status_stack_t stack)
{
while (stack->head)
directional_status_stack_pop(stack);
- if (stack->orig_classes)
- free(stack->orig_classes);
+ if (stack->classes)
+ free(stack->classes);
isolating_run_sequences_deinit(&stack->isolating_run_sequences);
free(stack);
}
-static unicode_bidi_level_t
-unicode_bidi_b(const char32_t *p,
- size_t n,
- enum_bidi_type_t *buf,
- unicode_bidi_level_t *bufp,
- const unicode_bidi_level_t *initial_embedding_level);
-
enum_bidi_type_t unicode_bidi_type(char32_t c)
{
return (enum_bidi_type_t)
@@ -707,35 +700,40 @@ enum_bidi_type_t unicode_bidi_type(char32_t c)
UNICODE_BIDI_TYPE_L);
}
-unicode_bidi_level_t
-unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
- const unicode_bidi_level_t *initial_embedding_level)
+
+void unicode_bidi_calc_types(const char32_t *p, size_t n,
+ enum_bidi_type_t *buf)
{
/*
** Look up the bidi class for each char32_t.
- **
- ** When we encounter a paragraph break we call unicode_bidi_b() to
- ** process it.
*/
-
- enum_bidi_type_t *buf=
- (enum_bidi_type_t *)malloc(n * sizeof(enum_bidi_type_t));
-
- if (!buf)
- abort();
for (size_t i=0; i<n; ++i)
{
buf[i]=unicode_bidi_type(p[i]);
#ifdef UNICODE_BIDI_TEST
UNICODE_BIDI_TEST(i);
#endif
- bufp[i]=UNICODE_BIDI_SKIP;
}
+}
- unicode_bidi_level_t level=unicode_bidi_b(p, n,
- buf,
- bufp,
- initial_embedding_level);
+unicode_bidi_level_t
+unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level)
+{
+ enum_bidi_type_t *buf=
+ (enum_bidi_type_t *)malloc(n * sizeof(enum_bidi_type_t));
+
+ if (!buf)
+ abort();
+
+ unicode_bidi_calc_types(p, n, buf);
+
+ unicode_bidi_level_t level=
+ unicode_bidi_calc_levels(p,
+ buf,
+ n,
+ bufp,
+ initial_embedding_level);
free(buf);
@@ -744,16 +742,21 @@ unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
static void unicode_bidi_cl(directional_status_stack_t stack);
-static unicode_bidi_level_t
-unicode_bidi_b(const char32_t *p,
- size_t n,
- enum_bidi_type_t *buf,
- unicode_bidi_level_t *bufp,
- const unicode_bidi_level_t *initial_embedding_level)
+unicode_bidi_level_t
+unicode_bidi_calc_levels(const char32_t *p,
+ const enum_bidi_type_t *classes,
+ size_t n,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level)
{
directional_status_stack_t stack;
- stack=directional_status_stack_init(p, buf, n, bufp,
+ for (size_t i=0; i<n; ++i)
+ {
+ bufp[i]=UNICODE_BIDI_SKIP;
+ }
+
+ stack=directional_status_stack_init(p, classes, n, bufp,
initial_embedding_level);
unicode_bidi_level_t paragraph_embedding_level=
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index a0d5ac4..4b864b3 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -562,16 +562,30 @@ std::u32string unicode::toupper(const std::u32string &u)
return copy;
}
+
+unicode::bidi_calc_types::bidi_calc_types(const std::u32string &s)
+ : s{s}
+{
+ types.resize(s.size());
+ if (!s.empty())
+ unicode_bidi_calc_types(s.c_str(), s.size(), &types[0]);
+}
+
+unicode::bidi_calc_types::~bidi_calc_types()=default;
+
std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t>
-unicode::bidi_calc(const std::u32string &s)
+unicode::bidi_calc(const bidi_calc_types &s)
{
return unicode::bidi_calc(s, UNICODE_BIDI_SKIP);
}
std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t>
-unicode::bidi_calc(const std::u32string &s,
+unicode::bidi_calc(const bidi_calc_types &st,
unicode_bidi_level_t paragraph_embedding_level)
{
+ if (st.s.size() != st.types.size())
+ return { {}, UNICODE_BIDI_LR };
+
const unicode_bidi_level_t *initial_embedding_level=0;
if (paragraph_embedding_level == UNICODE_BIDI_LR ||
@@ -583,14 +597,17 @@ unicode::bidi_calc(const std::u32string &s,
std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t>
ret;
- std::get<0>(ret).resize(s.size());
+ std::get<0>(ret).resize(st.s.size());
std::get<1>(ret)=UNICODE_BIDI_LR;
- if (s.size())
+ if (st.s.size())
{
- std::get<1>(ret)=unicode_bidi_calc(s.c_str(), s.size(),
- &std::get<0>(ret)[0],
- initial_embedding_level);
+ std::get<1>(ret)=
+ unicode_bidi_calc_levels(st.s.c_str(),
+ &st.types[0],
+ st.s.size(),
+ &std::get<0>(ret)[0],
+ initial_embedding_level);
}
return ret;
}