diff options
Diffstat (limited to 'unicode')
| -rw-r--r-- | unicode/Makefile.am | 2 | ||||
| -rw-r--r-- | unicode/biditest2.C | 61 | ||||
| -rw-r--r-- | unicode/book.xml | 58 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 27 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 32 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 15 |
6 files changed, 183 insertions, 12 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 135617a..7ba36f1 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -95,6 +95,7 @@ man_MANS= \ $(srcdir)/man/unicode\:\:bidi_cleanup.3 \ $(srcdir)/man/unicode\:\:bidi_embed.3 \ $(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \ + $(srcdir)/man/unicode\:\:bidi_get_direction.3 \ $(srcdir)/man/unicode\:\:bidi_logical_order.3 \ $(srcdir)/man/unicode\:\:bidi_reorder.3 \ $(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \ @@ -119,6 +120,7 @@ man_MANS= \ $(srcdir)/man/unicode_bidi_calc_levels.3 \ $(srcdir)/man/unicode_bidi_calc_types.3 \ $(srcdir)/man/unicode_bidi_cleanup.3 \ + $(srcdir)/man/unicode_bidi_direction.3 \ $(srcdir)/man/unicode_bidi_embed.3 \ $(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \ $(srcdir)/man/unicode_bidi_logical_order.3 \ diff --git a/unicode/biditest2.C b/unicode/biditest2.C index 0e0472e..8e9d7da 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -533,6 +533,65 @@ void partial_reorder_cleanup() 0, 3); } +void null_character_test() +{ + std::u32string s{{0}}; + + auto res=unicode::bidi_calc(s); + + unicode::bidi_reorder(s, std::get<0>(res)); + + unicode::bidi_cleanup(s, std::get<0>(res), + [] + (size_t) + { + }, + UNICODE_BIDI_CLEANUP_EXTRA, + 0, 3); +} + +void direction_test() +{ + static const struct { + const char32_t *str; + unicode_bidi_level_t direction; + int is_explicit; + } tests[]={ + { + U"Hello", + UNICODE_BIDI_LR, + 1, + }, + { + U" ", + UNICODE_BIDI_LR, + 0, + }, + { + U"", + UNICODE_BIDI_LR, + 0, + }, + { + U"שלום", + UNICODE_BIDI_RL, + 1, + }, + }; + + for (const auto &t:tests) + { + auto ret=unicode::bidi_get_direction(t.str); + + if (ret.direction != t.direction || + ret.is_explicit != t.is_explicit) + { + std::cerr << "direction_test failed\n"; + exit(1); + } + } +} + int main(int argc, char **argv) { DEBUGDUMP=fopen("/dev/null", "w"); @@ -543,7 +602,9 @@ int main(int argc, char **argv) } exception_test(); partial_reorder_cleanup(); + null_character_test(); latin_test(); character_test(); + direction_test(); return 0; } diff --git a/unicode/book.xml b/unicode/book.xml index 0275058..c3ebc33 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -310,6 +310,7 @@ See COPYING for distribution information. <refname>unicode_bidi_embed</refname> <refname>unicode_bidi_embed_paragraph_level</refname> + <refname>unicode_bidi_direction</refname> <refname>unicode_bidi_type</refname> <refname>unicode_bidi_setbnl</refname> <refname>unicode_bidi_mirror</refname> @@ -404,6 +405,12 @@ See COPYING for distribution information. </funcprototype> <funcprototype> + <funcdef>struct unicode_bidi_direction <function>unicode_bidi_get_direction</function></funcdef> + <paramdef>char32_t *<parameter>c</parameter></paramdef> + <paramdef>size_t <parameter>n</parameter></paramdef> + </funcprototype> + + <funcprototype> <funcdef>enum_bidi_type_t <function>unicode_bidi_type</function></funcdef> <paramdef>char32_t <parameter>c</parameter></paramdef> </funcprototype> @@ -996,6 +1003,40 @@ See COPYING for distribution information. <title>Miscellaneous utility functions</title> <para> + <function>unicode_bidi_get_direction</function> + takes a pointer to a unicode string, the number of + characters in the unicode string, and determines + default paragraph level level. + <function>unicode_bidi_get_direction</function> returns + a <literal>struct</literal> with the following fields: + </para> + <variablelist> + <varlistentry> + <term><varname>direction</varname></term> + <listitem> + <para> + This value is either <literal>UNICODE_BIDI_LR</literal> + or <literal>UNICODE_BIDI_RL</literal> (left to right or + right to left). + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>is_explicit</varname></term> + <listitem> + <para> + This value is a flag. A non-0 value indicates that + the embedding level was derived from an explicit + character type (<literal>L</literal>, <literal>R</literal> + or <literal>AL</literal>) from the stirng. A 0 value + indicates the default paragraph direction, no explicit + character was found in the string. + </para> + </listitem> + </varlistentry> + </variablelist> + <para> <function>unicode_bidi_type</function> looks up each character's bi-directional character type. </para> @@ -2798,6 +2839,7 @@ See COPYING for distribution information. <refname>unicode::bidi_logical_order</refname> <refname>unicode::bidi_embed</refname> <refname>unicode::bidi_embed_paragraph_level</refname> + <refname>unicode::bidi_get_direction</refname> <refpurpose>unicode bi-directional algorithm</refpurpose> </refnamediv> @@ -2920,6 +2962,13 @@ See COPYING for distribution information. <paramdef>const std::u32string &<parameter>string</parameter></paramdef> <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef> </funcprototype> + + <funcprototype> + <funcdef>unicode_bidi_direction <function>bidi_get_direction</function></funcdef> + <paramdef>const std::u32string &<parameter>string</parameter></paramdef> + <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef> + <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef> + </funcprototype> </funcsynopsis> </refsynopsisdiv> @@ -3056,13 +3105,14 @@ auto [levels, level]=unicode::bidi_calc(types); <listitem> <para> <function>unicode::bidi_reorder</function>, - <function>unicode::bidi_cleanup</function>, and - <function>unicode::bidi_logical_order</function> + <function>unicode::bidi_cleanup</function>, + <function>unicode::bidi_logical_order</function> and + <function>unicode::bidi_get_direction</function> take two optional parameters (defaulted values or overloaded) specifying an optional starting position and number of characters that - define a subset of the original string that gets reordered - or cleaned up. + define a subset of the original string that gets reordered, + cleaned up, or has its direction determined. </para> <para> diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 1c7245c..57603da 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -2,7 +2,7 @@ #define courier_unicode_h /* -** Copyright 2000-2020 Double Precision, Inc. +** Copyright 2000-2021 Double Precision, Inc. ** See COPYING for distribution information. ** */ @@ -585,6 +585,25 @@ typedef unsigned char unicode_bidi_level_t; #define UNICODE_BIDI_RL ((unicode_bidi_level_t)1) #define UNICODE_BIDI_SKIP ((unicode_bidi_level_t)254) +/* +** What unicode_bidi_direction returns. +*/ + +struct unicode_bidi_direction { + + /* Direction of the given text */ + unicode_bidi_level_t direction; + + /* + ** The direction is explicit, if not direction is UNICODE_BIDI_LR by + ** default. + */ + int is_explicit; +}; + +struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *p, + size_t n); + extern unicode_bidi_level_t unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, const unicode_bidi_level_t * @@ -2336,6 +2355,12 @@ std::u32string bidi_embed(const std::u32string &string, extern char32_t bidi_embed_paragraph_level(const std::u32string &string, unicode_bidi_level_t level); +//! Compute default direction of text + +extern unicode_bidi_direction bidi_get_direction(const std::u32string &string, + size_t starting_pos=0, + size_t n=(size_t)-1); + #if 0 { #endif diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index 73440ed..b23b833 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -560,13 +560,17 @@ static void directional_status_stack_push stack->head=p; } -static unicode_bidi_level_t +static struct unicode_bidi_direction compute_paragraph_embedding_level(size_t i, size_t j, enum_bidi_type_t (*get)(size_t i, void *arg), void *arg) - { + struct unicode_bidi_direction ret; + + memset(&ret, 0, sizeof(ret)); + ret.direction=UNICODE_BIDI_LR; + unicode_bidi_level_t in_isolation=0; for (; i<j; ++i) @@ -586,13 +590,18 @@ compute_paragraph_embedding_level(size_t i, size_t j, if (t == UNICODE_BIDI_TYPE_AL || t == UNICODE_BIDI_TYPE_R) { - return UNICODE_BIDI_RL; + ret.direction=UNICODE_BIDI_RL; + ret.is_explicit=1; + break; } if (t == UNICODE_BIDI_TYPE_L) + { + ret.is_explicit=1; break; + } } } - return UNICODE_BIDI_LR; + return ret; } struct compute_paragraph_embedding_level_type_info { @@ -619,7 +628,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p, return compute_paragraph_embedding_level (i, j, get_enum_bidi_type_for_paragraph_embedding_level, - &info); + &info).direction; } static directional_status_stack_t @@ -2713,8 +2722,19 @@ char32_t unicode_bidi_embed_paragraph_level(const char32_t *str, if ((compute_paragraph_embedding_level (0, n, get_enum_bidi_type_for_embedding_paragraph_level, - &info) ^ paragraph_level) == 0) + &info).direction ^ paragraph_level) == 0) return 0; return (paragraph_level & 1) ? UNICODE_RLM:UNICODE_LRM; } + +struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *str, + size_t n) +{ + struct compute_paragraph_embedding_level_char_info info; + + info.str=str; + return compute_paragraph_embedding_level + (0, n, + get_enum_bidi_type_for_embedding_paragraph_level, &info); +} diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 10156a4..babb6bb 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -1,5 +1,5 @@ /* -** Copyright 2011-2020 Double Precision, Inc. +** Copyright 2011-2021 Double Precision, Inc. ** See COPYING for distribution information. ** */ @@ -919,3 +919,16 @@ char32_t unicode::bidi_embed_paragraph_level(const std::u32string &string, string.size(), level); } + +unicode_bidi_direction unicode::bidi_get_direction(const std::u32string &string, + size_t starting_pos, + size_t n) +{ + if (starting_pos >= string.size()) + starting_pos=string.size(); + + if (string.size()-starting_pos < n) + n=string.size()-starting_pos; + + return unicode_bidi_get_direction(string.c_str()+starting_pos, n); +} |
