summaryrefslogtreecommitdiffstats
path: root/unicode
diff options
context:
space:
mode:
authorSam Varshavchik2021-01-06 22:05:50 -0500
committerSam Varshavchik2021-01-06 22:05:53 -0500
commit23fffe52808157e36795af52266cc27ac03cbcb9 (patch)
treef0c62446b3f5d1dc3fbe623206719dee42ac36c5 /unicode
parentf6d00ac939f7c9c63eca8dbb2a237b3aa24bc85b (diff)
downloadcourier-libs-23fffe52808157e36795af52266cc27ac03cbcb9.tar.bz2
courier-unicode: implement bidi get_direction.
Diffstat (limited to 'unicode')
-rw-r--r--unicode/Makefile.am2
-rw-r--r--unicode/biditest2.C61
-rw-r--r--unicode/book.xml58
-rw-r--r--unicode/courier-unicode.h.in27
-rw-r--r--unicode/unicode_bidi.c32
-rw-r--r--unicode/unicodecpp.C15
6 files changed, 183 insertions, 12 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index 135617a..7ba36f1 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -95,6 +95,7 @@ man_MANS= \
$(srcdir)/man/unicode\:\:bidi_cleanup.3 \
$(srcdir)/man/unicode\:\:bidi_embed.3 \
$(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \
+ $(srcdir)/man/unicode\:\:bidi_get_direction.3 \
$(srcdir)/man/unicode\:\:bidi_logical_order.3 \
$(srcdir)/man/unicode\:\:bidi_reorder.3 \
$(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \
@@ -119,6 +120,7 @@ man_MANS= \
$(srcdir)/man/unicode_bidi_calc_levels.3 \
$(srcdir)/man/unicode_bidi_calc_types.3 \
$(srcdir)/man/unicode_bidi_cleanup.3 \
+ $(srcdir)/man/unicode_bidi_direction.3 \
$(srcdir)/man/unicode_bidi_embed.3 \
$(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \
$(srcdir)/man/unicode_bidi_logical_order.3 \
diff --git a/unicode/biditest2.C b/unicode/biditest2.C
index 0e0472e..8e9d7da 100644
--- a/unicode/biditest2.C
+++ b/unicode/biditest2.C
@@ -533,6 +533,65 @@ void partial_reorder_cleanup()
0, 3);
}
+void null_character_test()
+{
+ std::u32string s{{0}};
+
+ auto res=unicode::bidi_calc(s);
+
+ unicode::bidi_reorder(s, std::get<0>(res));
+
+ unicode::bidi_cleanup(s, std::get<0>(res),
+ []
+ (size_t)
+ {
+ },
+ UNICODE_BIDI_CLEANUP_EXTRA,
+ 0, 3);
+}
+
+void direction_test()
+{
+ static const struct {
+ const char32_t *str;
+ unicode_bidi_level_t direction;
+ int is_explicit;
+ } tests[]={
+ {
+ U"Hello",
+ UNICODE_BIDI_LR,
+ 1,
+ },
+ {
+ U" ",
+ UNICODE_BIDI_LR,
+ 0,
+ },
+ {
+ U"",
+ UNICODE_BIDI_LR,
+ 0,
+ },
+ {
+ U"שלום",
+ UNICODE_BIDI_RL,
+ 1,
+ },
+ };
+
+ for (const auto &t:tests)
+ {
+ auto ret=unicode::bidi_get_direction(t.str);
+
+ if (ret.direction != t.direction ||
+ ret.is_explicit != t.is_explicit)
+ {
+ std::cerr << "direction_test failed\n";
+ exit(1);
+ }
+ }
+}
+
int main(int argc, char **argv)
{
DEBUGDUMP=fopen("/dev/null", "w");
@@ -543,7 +602,9 @@ int main(int argc, char **argv)
}
exception_test();
partial_reorder_cleanup();
+ null_character_test();
latin_test();
character_test();
+ direction_test();
return 0;
}
diff --git a/unicode/book.xml b/unicode/book.xml
index 0275058..c3ebc33 100644
--- a/unicode/book.xml
+++ b/unicode/book.xml
@@ -310,6 +310,7 @@ See COPYING for distribution information.
<refname>unicode_bidi_embed</refname>
<refname>unicode_bidi_embed_paragraph_level</refname>
+ <refname>unicode_bidi_direction</refname>
<refname>unicode_bidi_type</refname>
<refname>unicode_bidi_setbnl</refname>
<refname>unicode_bidi_mirror</refname>
@@ -404,6 +405,12 @@ See COPYING for distribution information.
</funcprototype>
<funcprototype>
+ <funcdef>struct unicode_bidi_direction <function>unicode_bidi_get_direction</function></funcdef>
+ <paramdef>char32_t *<parameter>c</parameter></paramdef>
+ <paramdef>size_t <parameter>n</parameter></paramdef>
+ </funcprototype>
+
+ <funcprototype>
<funcdef>enum_bidi_type_t <function>unicode_bidi_type</function></funcdef>
<paramdef>char32_t <parameter>c</parameter></paramdef>
</funcprototype>
@@ -996,6 +1003,40 @@ See COPYING for distribution information.
<title>Miscellaneous utility functions</title>
<para>
+ <function>unicode_bidi_get_direction</function>
+ takes a pointer to a unicode string, the number of
+ characters in the unicode string, and determines
+ default paragraph level level.
+ <function>unicode_bidi_get_direction</function> returns
+ a <literal>struct</literal> with the following fields:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><varname>direction</varname></term>
+ <listitem>
+ <para>
+ This value is either <literal>UNICODE_BIDI_LR</literal>
+ or <literal>UNICODE_BIDI_RL</literal> (left to right or
+ right to left).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>is_explicit</varname></term>
+ <listitem>
+ <para>
+ This value is a flag. A non-0 value indicates that
+ the embedding level was derived from an explicit
+ character type (<literal>L</literal>, <literal>R</literal>
+ or <literal>AL</literal>) from the stirng. A 0 value
+ indicates the default paragraph direction, no explicit
+ character was found in the string.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
<function>unicode_bidi_type</function>
looks up each character's bi-directional character type.
</para>
@@ -2798,6 +2839,7 @@ See COPYING for distribution information.
<refname>unicode::bidi_logical_order</refname>
<refname>unicode::bidi_embed</refname>
<refname>unicode::bidi_embed_paragraph_level</refname>
+ <refname>unicode::bidi_get_direction</refname>
<refpurpose>unicode bi-directional algorithm</refpurpose>
</refnamediv>
@@ -2920,6 +2962,13 @@ See COPYING for distribution information.
<paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
<paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>
</funcprototype>
+
+ <funcprototype>
+ <funcdef>unicode_bidi_direction <function>bidi_get_direction</function></funcdef>
+ <paramdef>const std::u32string &amp;<parameter>string</parameter></paramdef>
+ <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef>
+ <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef>
+ </funcprototype>
</funcsynopsis>
</refsynopsisdiv>
@@ -3056,13 +3105,14 @@ auto [levels, level]=unicode::bidi_calc(types);
<listitem>
<para>
<function>unicode::bidi_reorder</function>,
- <function>unicode::bidi_cleanup</function>, and
- <function>unicode::bidi_logical_order</function>
+ <function>unicode::bidi_cleanup</function>,
+ <function>unicode::bidi_logical_order</function> and
+ <function>unicode::bidi_get_direction</function>
take two optional
parameters (defaulted values or overloaded) specifying
an optional starting position and number of characters that
- define a subset of the original string that gets reordered
- or cleaned up.
+ define a subset of the original string that gets reordered,
+ cleaned up, or has its direction determined.
</para>
<para>
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 1c7245c..57603da 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -2,7 +2,7 @@
#define courier_unicode_h
/*
-** Copyright 2000-2020 Double Precision, Inc.
+** Copyright 2000-2021 Double Precision, Inc.
** See COPYING for distribution information.
**
*/
@@ -585,6 +585,25 @@ typedef unsigned char unicode_bidi_level_t;
#define UNICODE_BIDI_RL ((unicode_bidi_level_t)1)
#define UNICODE_BIDI_SKIP ((unicode_bidi_level_t)254)
+/*
+** What unicode_bidi_direction returns.
+*/
+
+struct unicode_bidi_direction {
+
+ /* Direction of the given text */
+ unicode_bidi_level_t direction;
+
+ /*
+ ** The direction is explicit, if not direction is UNICODE_BIDI_LR by
+ ** default.
+ */
+ int is_explicit;
+};
+
+struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *p,
+ size_t n);
+
extern unicode_bidi_level_t unicode_bidi_calc(const char32_t *p, size_t n,
unicode_bidi_level_t *bufp,
const unicode_bidi_level_t *
@@ -2336,6 +2355,12 @@ std::u32string bidi_embed(const std::u32string &string,
extern char32_t bidi_embed_paragraph_level(const std::u32string &string,
unicode_bidi_level_t level);
+//! Compute default direction of text
+
+extern unicode_bidi_direction bidi_get_direction(const std::u32string &string,
+ size_t starting_pos=0,
+ size_t n=(size_t)-1);
+
#if 0
{
#endif
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index 73440ed..b23b833 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -560,13 +560,17 @@ static void directional_status_stack_push
stack->head=p;
}
-static unicode_bidi_level_t
+static struct unicode_bidi_direction
compute_paragraph_embedding_level(size_t i, size_t j,
enum_bidi_type_t (*get)(size_t i,
void *arg),
void *arg)
-
{
+ struct unicode_bidi_direction ret;
+
+ memset(&ret, 0, sizeof(ret));
+ ret.direction=UNICODE_BIDI_LR;
+
unicode_bidi_level_t in_isolation=0;
for (; i<j; ++i)
@@ -586,13 +590,18 @@ compute_paragraph_embedding_level(size_t i, size_t j,
if (t == UNICODE_BIDI_TYPE_AL ||
t == UNICODE_BIDI_TYPE_R)
{
- return UNICODE_BIDI_RL;
+ ret.direction=UNICODE_BIDI_RL;
+ ret.is_explicit=1;
+ break;
}
if (t == UNICODE_BIDI_TYPE_L)
+ {
+ ret.is_explicit=1;
break;
+ }
}
}
- return UNICODE_BIDI_LR;
+ return ret;
}
struct compute_paragraph_embedding_level_type_info {
@@ -619,7 +628,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p,
return compute_paragraph_embedding_level
(i, j,
get_enum_bidi_type_for_paragraph_embedding_level,
- &info);
+ &info).direction;
}
static directional_status_stack_t
@@ -2713,8 +2722,19 @@ char32_t unicode_bidi_embed_paragraph_level(const char32_t *str,
if ((compute_paragraph_embedding_level
(0, n,
get_enum_bidi_type_for_embedding_paragraph_level,
- &info) ^ paragraph_level) == 0)
+ &info).direction ^ paragraph_level) == 0)
return 0;
return (paragraph_level & 1) ? UNICODE_RLM:UNICODE_LRM;
}
+
+struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *str,
+ size_t n)
+{
+ struct compute_paragraph_embedding_level_char_info info;
+
+ info.str=str;
+ return compute_paragraph_embedding_level
+ (0, n,
+ get_enum_bidi_type_for_embedding_paragraph_level, &info);
+}
diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C
index 10156a4..babb6bb 100644
--- a/unicode/unicodecpp.C
+++ b/unicode/unicodecpp.C
@@ -1,5 +1,5 @@
/*
-** Copyright 2011-2020 Double Precision, Inc.
+** Copyright 2011-2021 Double Precision, Inc.
** See COPYING for distribution information.
**
*/
@@ -919,3 +919,16 @@ char32_t unicode::bidi_embed_paragraph_level(const std::u32string &string,
string.size(),
level);
}
+
+unicode_bidi_direction unicode::bidi_get_direction(const std::u32string &string,
+ size_t starting_pos,
+ size_t n)
+{
+ if (starting_pos >= string.size())
+ starting_pos=string.size();
+
+ if (string.size()-starting_pos < n)
+ n=string.size()-starting_pos;
+
+ return unicode_bidi_get_direction(string.c_str()+starting_pos, n);
+}