summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Varshavchik2020-07-04 20:01:49 -0400
committerSam Varshavchik2020-07-12 15:56:45 -0400
commitacd2a932df810d7db2037e3962feb61ca21e2781 (patch)
tree8f8f3fc757ff93ba4e2ecd9730451ccf6d945055
parentd57946b6a21b33f26f189951e1c30ca901581b90 (diff)
downloadcourier-libs-acd2a932df810d7db2037e3962feb61ca21e2781.tar.bz2
Implement the Unicode bidirectional algorithm.
-rw-r--r--unicode/.gitignore1
-rw-r--r--unicode/Makefile.am16
-rw-r--r--unicode/bidi_classnames.h23
-rw-r--r--unicode/biditest.C286
-rw-r--r--unicode/courier-unicode.h.in39
-rw-r--r--unicode/mkbidiclassnames.pl21
-rw-r--r--unicode/unicode_bidi.c1598
7 files changed, 1974 insertions, 10 deletions
diff --git a/unicode/.gitignore b/unicode/.gitignore
index 3ebc3ca..49cf4df 100644
--- a/unicode/.gitignore
+++ b/unicode/.gitignore
@@ -3,6 +3,7 @@
/*.sig
/BidiBrackets.txt
/BidiMirroring.txt
+/BidiTest.txt
/Categories.txt
/DerivedBidiClass.txt
/EastAsianWidth.txt
diff --git a/unicode/Makefile.am b/unicode/Makefile.am
index 30af049..397987c 100644
--- a/unicode/Makefile.am
+++ b/unicode/Makefile.am
@@ -10,6 +10,7 @@ noinst_SCRIPTS=update.sh \
mkcommon.pm \
mkbidi.pl \
mkbidiclass.pl \
+ mkbidiclassnames.pl \
mkeastasianwidth.pl \
mkemojidata.pl \
mkgraphemebreak.pl \
@@ -35,6 +36,11 @@ update-www:
@$(MAKE) update-www-htmlent
@$(MAKE) update-www-categories
@$(MAKE) update-www-bidi
+ @$(MAKE) update-www-unicode-copyright
+
+update-www-unicode-copyright:
+ links -dump https://www.unicode.org/license.html >UNICODE-LICENSE.txt.tmp
+ mv UNICODE-LICENSE.txt.tmp UNICODE-LICENSE.txt
update-www-unicode:
@SHELL@ $(srcdir)/update.sh UnicodeData.txt http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
@@ -72,6 +78,7 @@ update-www-bidi:
@SHELL@ $(srcdir)/update.sh BidiBrackets.txt https://www.unicode.org/Public/UCD/latest/ucd/BidiBrackets.txt
@SHELL@ $(srcdir)/update.sh BidiMirroring.txt https://www.unicode.org/Public/UCD/latest/ucd/BidiMirroring.txt
@SHELL@ $(srcdir)/update.sh DerivedBidiClass.txt https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBidiClass.txt
+ @SHELL@ $(srcdir)/update.sh BidiTest.txt https://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt
lib_LTLIBRARIES=libcourier-unicode.la
include_HEADERS=courier-unicode.h \
@@ -117,6 +124,7 @@ BUILT_SOURCES=unicode_ultcasetab.c \
bidi_brackets.h \
bidi_brackets_v.h \
bidi_class.h \
+ bidi_classnames.h \
bidi_mirroring.h \
categoriestab.h \
eastasianwidth.h \
@@ -176,6 +184,11 @@ bidi_class.h: DerivedBidiClass.txt mkbidiclass.pl mkcommon.pm
bidi_mirroring.h: BidiMirroring.txt mkbidi.pl
@PERL@ -I$(srcdir) $(srcdir)/mkbidi.pl BidiMirroring.txt >bidi_mirroring.h.tmp
mv bidi_mirroring.h.tmp bidi_mirroring.h
+
+bidi_classnames.h: unicode_bidi.c mkbidiclassnames.pl
+ @PERL@ $(srcdir)/mkbidiclassnames.pl <$(srcdir)/unicode_bidi.c >bidi_classnames.h.tmp
+ mv bidi_classnames.h.tmp bidi_classnames.h
+
endif
unicodetest_SOURCES=unicodetest.c
@@ -208,7 +221,7 @@ scripttest_DEPENDENCIES=libcourier-unicode.la
scripttest_LDADD=libcourier-unicode.la
scripttest_LDFLAGS=-static
-biditest_SOURCES=biditest.C
+biditest_SOURCES=biditest.C bidi_classnames.h
biditest_DEPENDENCIES=libcourier-unicode.la
biditest_LDADD=libcourier-unicode.la
biditest_LDFLAGS=-static
@@ -283,6 +296,7 @@ check-am: unicodetest
test "`./biditest 30`" = "30 30 n"
test "`./biditest 8261`" = "8262 8262 o"
test "`./biditest 8262`" = "8261 8261 c"
+ ./biditest
if HAVE_DOCS
diff --git a/unicode/bidi_classnames.h b/unicode/bidi_classnames.h
new file mode 100644
index 0000000..61cf80b
--- /dev/null
+++ b/unicode/bidi_classnames.h
@@ -0,0 +1,23 @@
+{"AL", UNICODE_BIDI_CLASS_AL},
+{"AN", UNICODE_BIDI_CLASS_AN},
+{"B", UNICODE_BIDI_CLASS_B},
+{"BN", UNICODE_BIDI_CLASS_BN},
+{"CS", UNICODE_BIDI_CLASS_CS},
+{"EN", UNICODE_BIDI_CLASS_EN},
+{"ES", UNICODE_BIDI_CLASS_ES},
+{"ET", UNICODE_BIDI_CLASS_ET},
+{"FSI", UNICODE_BIDI_CLASS_FSI},
+{"L", UNICODE_BIDI_CLASS_L},
+{"LRE", UNICODE_BIDI_CLASS_LRE},
+{"LRI", UNICODE_BIDI_CLASS_LRI},
+{"LRO", UNICODE_BIDI_CLASS_LRO},
+{"NSM", UNICODE_BIDI_CLASS_NSM},
+{"ON", UNICODE_BIDI_CLASS_ON},
+{"PDF", UNICODE_BIDI_CLASS_PDF},
+{"PDI", UNICODE_BIDI_CLASS_PDI},
+{"R", UNICODE_BIDI_CLASS_R},
+{"RLE", UNICODE_BIDI_CLASS_RLE},
+{"RLI", UNICODE_BIDI_CLASS_RLI},
+{"RLO", UNICODE_BIDI_CLASS_RLO},
+{"S", UNICODE_BIDI_CLASS_S},
+{"WS", UNICODE_BIDI_CLASS_WS},
diff --git a/unicode/biditest.C b/unicode/biditest.C
index db4c541..c58da0d 100644
--- a/unicode/biditest.C
+++ b/unicode/biditest.C
@@ -1,6 +1,15 @@
#include "unicode_config.h"
#include "courier-unicode.h"
#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <algorithm>
+#include <iomanip>
+
+std::vector<std::string> testcase;
+
+FILE *DEBUGDUMP;
int main(int argc, char **argv)
{
@@ -12,6 +21,283 @@ int main(int argc, char **argv)
std::cout << unicode_bidi_mirror(c) << " "
<< unicode_bidi_bracket_type(c, &bt) << " ";
std::cout << (char)bt << std::endl;
+ exit (0);
+ }
+
+ DEBUGDUMP=fopen("/dev/null", "w");
+ if (!DEBUGDUMP)
+ {
+ perror("/dev/null");
+ exit(1);
}
+ std::ifstream fp("BidiTest.txt");
+
+ if (!fp.is_open())
+ exit(1);
+
+ size_t linenum=0;
+ size_t nextlogline=0;
+ std::string logmsg;
+
+ std::string buf;
+
+ std::vector<unicode_bidi_level_t> expected_levels;
+
+ while (1)
+ {
+ buf.clear();
+
+ if (std::getline(fp, buf).eof() && buf.empty())
+ break;
+
+ if (++linenum >= nextlogline)
+ {
+ std::cout << logmsg;
+
+ std::ostringstream o;
+
+ o << std::setw(6) << linenum << " lines processed... ";
+
+ logmsg=o.str();
+
+ std::cout << logmsg << std::flush;
+
+ std::fill(logmsg.begin(), logmsg.end(), '\b');
+
+ nextlogline += 20000;
+ }
+
+ buf.erase(std::find(buf.begin(), buf.end(), '#'), buf.end());
+
+ if (buf.substr(0, 8) == "@Levels:")
+ {
+ expected_levels.clear();
+
+ std::istringstream i(buf);
+
+ std::string word;
+
+ i >> word;
+
+ while (i >> word)
+ {
+ int n;
+
+ std::stringstream words(word);
+
+ if (words >> n)
+ {
+
+ expected_levels.push_back(n);
+ }
+ else
+ {
+ expected_levels
+ .push_back(UNICODE_BIDI_SKIP);
+ }
+ }
+ continue;
+ }
+
+ if (buf.substr(0, 1) == "@")
+ continue;
+
+ size_t semicolon=buf.find(';');
+
+ if (semicolon == buf.npos)
+ continue;
+
+ testcase.clear();
+
+ {
+ std::istringstream i(buf.substr(0, semicolon));
+
+ std::string word;
+
+ while (i >> word)
+ testcase.push_back(word);
+ }
+
+ int n;
+
+ {
+ std::istringstream i(buf.substr(semicolon+1));
+
+ if (!(i >> n))
+ {
+ std::cerr << "Cannot parse paragraph bitset: "
+ << buf.substr(semicolon+1)
+ << " on line " << linenum
+ << std::endl;
+ abort();
+ }
+ }
+
+ //if (linenum != 98950)
+ // continue;
+
+ std::vector<unicode_bidi_level_t> actual_levels;
+
+ std::vector<char32_t> dummy_input;
+
+ dummy_input.resize(testcase.size());
+ actual_levels.resize(testcase.size());
+
+ static const unicode_bidi_level_t level_0=0;
+ static const unicode_bidi_level_t level_1=1;
+
+ static const unicode_bidi_level_t *levels[3]=
+ {0, &level_0, &level_1};
+
+ for (auto level:levels)
+ {
+ if (n & 1)
+ {
+ unicode_bidi_calc(&dummy_input[0],
+ testcase.size(),
+ &actual_levels[0], level);
+
+ int matched=0;
+
+ if (actual_levels.size() ==
+ expected_levels.size())
+ {
+ size_t i=0;
+
+ matched=1;
+
+ for (i=0; i<actual_levels.size(); ++i)
+ {
+ if (expected_levels[i] ==
+ UNICODE_BIDI_SKIP)
+ continue;
+ if (expected_levels[i] !=
+ actual_levels[i])
+ {
+ matched=0;
+ break;
+ }
+ }
+ }
+
+ if (!matched)
+ {
+ fclose(DEBUGDUMP);
+ DEBUGDUMP=stderr;
+ std::cout << std::endl
+ << std::flush;
+ unicode_bidi_calc(&dummy_input[0],
+ testcase.size(),
+ &actual_levels[0],
+ level);
+
+ std::cerr << "Regression, line "
+ << linenum;
+
+ if (!level)
+ {
+ std::cerr << ", auto";
+ }
+ else
+ {
+ std::cerr <<
+ (*level ? ", RTL"
+ : ", LTR");
+ }
+ std::cerr << ": expected";
+
+ for (int l:expected_levels)
+ {
+ std::cerr << " " << l;
+ }
+ std::cerr << std::endl
+ << "Received:";
+
+ for (int l:actual_levels)
+ {
+ std::cerr << " " << l;
+ }
+ std::cerr << std::endl;
+ exit(1);
+ }
+ }
+
+ n >>= 1;
+ }
+ }
+
+ std::cout << logmsg;
+
+ std::fill(logmsg.begin(), logmsg.end(), ' ');
+ std::cout << logmsg << std::endl;
return 0;
}
+
+#define UNICODE_BIDI_TEST(i) (buf[i]=fudge_unicode_bidi(i))
+
+#define BIDI_DEBUG
+
+extern "C" {
+#if 0
+}
+#endif
+
+#include "unicode_bidi.c"
+
+static const struct {
+ char classname[8];
+ enum_bidi_class_t classenum;
+} bidiclassnames[]={
+
+#include "bidi_classnames.h"
+
+};
+
+const char *bidi_classname(enum_bidi_class_t classenum)
+{
+ for (const auto &cn:bidiclassnames)
+ {
+ if (cn.classenum == classenum)
+ return cn.classname;
+ }
+
+ return "???";
+}
+
+static const char *lookup_classname(const std::string &s)
+{
+ abort();
+}
+
+enum_bidi_class_t fudge_unicode_bidi(size_t i)
+{
+ if (i >= testcase.size())
+ {
+ std::cerr << "Test case:";
+
+ for (auto &n:testcase)
+ std::cerr << " " << n;
+ std::cerr << ": no value #" << i << std::endl;
+ abort();
+ }
+
+ for (const auto &cn:bidiclassnames)
+ {
+ if (testcase[i] == cn.classname)
+ {
+ return cn.classenum;
+ }
+ }
+
+ std::cerr << "Test case:";
+
+ for (auto &n:testcase)
+ std::cerr << " " << n;
+ std::cerr << ": unknown value: " << testcase[i];
+ abort();
+}
+
+#if 0
+{
+#endif
+}
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 68ece2c..67f3bda 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -554,7 +554,32 @@ size_t unicode_wbscan_end(unicode_wbscan_info_t i);
** values.
**
** unicode_bidi_bracket_type() returns the same character and
-** UNICODE_BIDI_n if the given character does not have these properties,
+** UNICODE_BIDI_n if the given character does not have these properties.
+**
+** unicode_bidi_calc() implements the Unicode Bidirectional Algorithm up to
+** step L1.
+**
+** Parameters:
+**
+** - A pointer to char32_t, the Unicode string.
+**
+** - Number of characters in the char32_t string
+**
+** - A pointer to an array of unicode_bidi_level_t values. The caller is
+** responsible for allocating and deallocating this array, which has the
+** same size as the Unicode string (the second parameter).
+**
+** - An optional pointer to a unicode_bidi_level_t value, or a null pointer.
+** A pointer to UNICODE_BIDI_LR or UNICODE_BIDI_RL sets the default paragraph
+** direction level. A null pointer calculates the default paragraph direction
+** level based on the string, as specified by the "P" rules in the algorithm.
+**
+** unicode_bidi_calc() fills in the unicode_bidi_level_t array with the
+** values corresponding to the embedding level of the corresponding character,
+** as specified in the Unicode Bidirection Algorithm (even for left-to-right,
+** and odd for right-to-left). A value of UNICODE_BIDI_SKIP designates
+** directional markers (from step X9). These characters should be removed
+** before using unicode_bidi_reorder().
*/
typedef char unicode_bidi_bracket_type_t;
@@ -568,6 +593,18 @@ extern char32_t unicode_bidi_mirror(char32_t c);
extern char32_t unicode_bidi_bracket_type(char32_t c,
unicode_bidi_bracket_type_t *ret);
+
+typedef unsigned char unicode_bidi_level_t;
+
+#define UNICODE_BIDI_LR ((unicode_bidi_level_t)0)
+#define UNICODE_BIDI_RL ((unicode_bidi_level_t)1)
+#define UNICODE_BIDI_SKIP ((unicode_bidi_level_t)254)
+
+extern void unicode_bidi_calc(const char32_t *p, size_t n,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *
+ initial_embedding_level);
+
/*
** A buffer that holds unicode characters, and dynamically grows as needed.
*/
diff --git a/unicode/mkbidiclassnames.pl b/unicode/mkbidiclassnames.pl
new file mode 100644
index 0000000..d3324a9
--- /dev/null
+++ b/unicode/mkbidiclassnames.pl
@@ -0,0 +1,21 @@
+#! /usr/bin/perl
+
+use strict;
+use warnings;
+
+while (<>)
+{
+ last if m@^/\* BIDI_CLASS_LIST@;
+}
+
+while (<>)
+{
+ last if m@^}@;
+ next if /\{/;
+ next if /^\s*$/;
+ next if m@/\*@;
+
+ die unless /UNICODE_BIDI_CLASS_(.*),/;
+
+ print "{\"$1\", UNICODE_BIDI_CLASS_$1},\n";
+}
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index e486621..38dcb44 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -86,6 +86,7 @@ char32_t unicode_bidi_bracket_type(char32_t c,
return c;
}
+/* BIDI_CLASS_LIST */
typedef enum {
UNICODE_BIDI_CLASS_AL,
UNICODE_BIDI_CLASS_AN,
@@ -114,13 +115,1594 @@ typedef enum {
#include "bidi_class.h"
-enum_bidi_class_t unicode_bidi_class(char32_t c)
+#ifdef BIDI_DEBUG
+enum_bidi_class_t fudge_unicode_bidi(size_t);
+const char *bidi_classname(enum_bidi_class_t);
+#endif
+
+#define max_depth 125
+
+typedef enum {
+ do_neutral,
+ do_right_to_left,
+ do_left_to_right
+} directional_override_status_t;
+
+#define is_isolate_initiator(c) \
+ ((c) == UNICODE_BIDI_CLASS_LRI || \
+ (c) == UNICODE_BIDI_CLASS_RLI || \
+ (c) == UNICODE_BIDI_CLASS_FSI)
+
+#define is_embedding_initiator(c) \
+ ((c) == UNICODE_BIDI_CLASS_LRE || \
+ (c) == UNICODE_BIDI_CLASS_RLE || \
+ (c) == UNICODE_BIDI_CLASS_LRO || \
+ (c) == UNICODE_BIDI_CLASS_RLO)
+
+/* BD13 implementation */
+
+/* A level run, specified as indexes */
+
+struct level_run {
+ size_t start;
+ size_t end; /* one past */
+};
+
+/* An isolating run sequence */
+
+struct isolating_run_sequence_s {
+ struct isolating_run_sequence_s *prev, *next; /* Linked list */
+
+ struct level_run *level_runs; /* All level runs in the sequence */
+ size_t n_level_runs; /* How many of them */
+ size_t cap_level_runs; /* Capacity of the level runs */
+ unicode_bidi_level_t embedding_level; /* This seq's embedding level */
+ enum_bidi_class_t sos, eos;
+};
+
+/* An iterator for an isolating run sequence */
+
+typedef struct {
+
+ /* My sequence */
+ struct isolating_run_sequence_s *seq;
+
+ /* Which level run I'm on */
+
+ size_t level_run_i;
+
+ /* Current index */
+
+ size_t i;
+} irs_iterator;
+
+static irs_iterator irs_begin(struct isolating_run_sequence_s *seq)
+{
+ irs_iterator iter;
+
+ iter.seq=seq;
+ iter.level_run_i=0;
+
+ /* Edge case, empty isolating run sequence */
+
+ while (iter.level_run_i < seq->n_level_runs)
+ {
+ iter.i=seq->level_runs[iter.level_run_i].start;
+
+ if (iter.i < seq->level_runs[iter.level_run_i].end)
+ break;
+
+ ++iter.level_run_i;
+ }
+ return iter;
+}
+
+static irs_iterator irs_end(struct isolating_run_sequence_s *seq)
+{
+ irs_iterator iter;
+
+ iter.seq=seq;
+ iter.level_run_i=seq->n_level_runs;
+ return iter;
+}
+
+static int irs_compare(const irs_iterator *a,
+ const irs_iterator *b)
+{
+ if (a->level_run_i < b->level_run_i)
+ return -1;
+ if (a->level_run_i > b->level_run_i)
+ return 1;
+
+ if (a->level_run_i == a->seq->n_level_runs)
+ return 0;
+
+ if (a->i < b->i)
+ return -1;
+
+ if (a->i > b->i)
+ return 1;
+ return 0;
+}
+
+static void irs_incr(irs_iterator *iter)
+{
+ if (iter->seq->n_level_runs == iter->level_run_i)
+ {
+ fprintf(stderr, "%s%s\n",
+ "Internal error: attempting to increment ",
+ "one past end of isolating run sequence iterator");
+ abort();
+ }
+
+ if (++iter->i >= iter->seq->level_runs[iter->level_run_i].end)
+ {
+ if (++iter->level_run_i < iter->seq->n_level_runs)
+ iter->i=iter->seq->level_runs[iter->level_run_i].start;
+ }
+}
+
+static void irs_decr(irs_iterator *iter)
+{
+ while (1)
+ {
+ if (iter->seq->n_level_runs > iter->level_run_i &&
+ iter->i > iter->seq->level_runs[iter->level_run_i].start)
+ {
+ --iter->i;
+ break;
+ }
+
+ if (iter->level_run_i == 0)
+ {
+ fprintf(stderr, "%s%s\n",
+ "Internal error: attempting to decrement the ",
+ "beginning isolating run sequence iterator");
+ abort();
+ }
+
+ iter->i=iter->seq->level_runs[--iter->level_run_i].end;
+ }
+}
+
+/* Used to build isolating run sequences link */
+
+struct isolating_run_sequence_link {
+ size_t push;
+ size_t pop;
+ struct isolating_run_sequence_s *seq;
+};
+
+/* Our isolating run sequences */
+
+struct isolating_run_sequences_s {
+ struct isolating_run_sequence_s *head, *tail; /* Linked list */
+
+ /* Links LRI and RLI to its matching PDI */
+
+ struct isolating_run_sequence_link *pdi_linkage; /* Recycle level_run */
+ size_t n_pdi_linkage;
+ size_t cap_pdi_linkage;
+};
+
+static int compare_irs_by_push(struct isolating_run_sequence_link **a,
+ struct isolating_run_sequence_link **b)
+{
+ if ((*a)->push < (*b)->push)
+ return -1;
+ if ((*a)->push > (*b)->push)
+ return 1;
+ return 0;
+}
+
+static struct isolating_run_sequence_link **sort_irs_links_by_push
+(struct isolating_run_sequences_s *seq)
+{
+ struct isolating_run_sequence_link **p;
+ size_t i;
+
+ if (seq->n_pdi_linkage == 0)
+ return 0;
+
+ p=(struct isolating_run_sequence_link **)
+ calloc(sizeof(struct isolating_run_sequence_link *),
+ seq->n_pdi_linkage);
+
+ for (i=0; i<seq->n_pdi_linkage; i++)
+ {
+ p[i]=seq->pdi_linkage+i;
+ }
+ qsort(p, seq->n_pdi_linkage, sizeof(*p),
+ (int (*)(const void *, const void *))compare_irs_by_push);
+ return p;
+}
+
+static struct isolating_run_sequence_s *
+isolating_run_sequences_init(struct isolating_run_sequences_s *p,
+ unicode_bidi_level_t embedding_level,
+ size_t i)
+{
+ struct isolating_run_sequence_s *seq=
+ (struct isolating_run_sequence_s *)
+ calloc(1, sizeof(struct isolating_run_sequence_s));
+
+ if (!seq) abort();
+
+ if ((seq->level_runs=(struct level_run *)
+ malloc(sizeof(struct level_run))) == 0) abort();
+
+ seq->level_runs->start=i;
+ seq->level_runs->end=i;
+
+ seq->n_level_runs=seq->cap_level_runs=1;
+ seq->embedding_level=embedding_level;
+
+ if (!p->head)
+ {
+ p->head=p->tail=seq;
+ }
+ else
+ {
+ p->tail->next=seq;
+ seq->prev=p->tail;
+ p->tail=seq;
+ }
+
+ return seq;
+}
+
+static void isolating_run_sequences_record(struct isolating_run_sequence_s *p,
+ size_t i)
+{
+ struct level_run *current_level_run=
+ &p->level_runs[p->n_level_runs-1];
+
+ if (current_level_run->start == current_level_run->end)
+ {
+ current_level_run->start=i;
+ current_level_run->end=i+1;
+ return;
+ }
+
+ if (current_level_run->end == i)
+ {
+ ++current_level_run->end;
+ return;
+ }
+
+ /*
+ ** Starting a new level run in the current isolating
+ ** run sequence.
+ */
+
+ if (p->n_level_runs == p->cap_level_runs)
+ {
+ p->cap_level_runs *= 2;
+
+ p->level_runs=(struct level_run *)
+ realloc(p->level_runs,
+ sizeof(struct level_run) *
+ p->cap_level_runs);
+ if (!p->level_runs)
+ abort();
+ }
+
+ current_level_run = p->level_runs + (p->n_level_runs++);
+
+ current_level_run->start=i;
+ current_level_run->end=i+1;
+}
+
+static void isolating_run_sequence_link(struct isolating_run_sequences_s *p,
+ size_t push,
+ size_t pop)
{
- return unicode_tab_lookup(c,
- unicode_indextab,
- sizeof(unicode_indextab)
- /sizeof(unicode_indextab[0]),
- unicode_rangetab,
- unicode_classtab,
- UNICODE_BIDI_CLASS_L);
+ if (p->n_pdi_linkage == p->cap_pdi_linkage)
+ {
+ p->cap_pdi_linkage=
+ p->cap_pdi_linkage == 0
+ ? 1 : p->cap_pdi_linkage*2;
+
+ p->pdi_linkage=(struct isolating_run_sequence_link *)
+ (p->pdi_linkage ?
+ realloc(p->pdi_linkage,
+ p->cap_pdi_linkage * sizeof(*p->pdi_linkage))
+ :
+ malloc(p->cap_pdi_linkage * sizeof(*p->pdi_linkage)));
+
+ if (!p->pdi_linkage)
+ abort();
+ }
+
+ p->pdi_linkage[p->n_pdi_linkage].push=push;
+ p->pdi_linkage[p->n_pdi_linkage].pop=pop;
+ p->pdi_linkage[p->n_pdi_linkage].seq=0;
+ ++p->n_pdi_linkage;
+}
+
+static void isolating_run_sequences_deinit(struct isolating_run_sequences_s *p)
+{
+ struct isolating_run_sequence_s *seq;
+
+ for (seq=p->head; seq; )
+ {
+ struct isolating_run_sequence_s *p=seq;
+
+ seq=seq->next;
+
+ free(p->level_runs);
+ free(p);
+ }
+
+ if (p->pdi_linkage)
+ free(p->pdi_linkage);
+}
+
+/* An entry on the directional status stack, used by the X rules */
+
+struct directional_status_stack_entry {
+ struct directional_status_stack_entry *next;
+ unicode_bidi_level_t embedding_level;
+ directional_override_status_t directional_override_status;
+ int directional_isolate_status;
+ size_t isolate_start;
+};
+
+typedef struct {
+ struct directional_status_stack_entry *head;
+
+ unicode_bidi_level_t paragraph_embedding_level;
+ const char32_t *chars;
+ enum_bidi_class_t *classes;
+ enum_bidi_class_t *orig_classes;
+ unicode_bidi_level_t *levels;
+ size_t size;
+ int overflow_isolate_count;
+ int overflow_embedding_count;
+ int valid_isolate_count;
+
+ struct isolating_run_sequences_s isolating_run_sequences;
+} *directional_status_stack_t;
+
+#ifdef BIDI_DEBUG
+void dump_classes(const char *prefix, directional_status_stack_t stack)
+{
+ fprintf(DEBUGDUMP, "%s: ", prefix);
+ for (size_t i=0; i<stack->size; ++i)
+ {
+ fprintf(DEBUGDUMP, " %s(%d)",
+ bidi_classname(stack->classes[i]),
+ (int)stack->levels[i]);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+
+void dump_orig_classes(const char *prefix, directional_status_stack_t stack)
+{
+ fprintf(DEBUGDUMP, "%s: ", prefix);
+
+ for (size_t i=0; i<stack->size; ++i)
+ {
+ fprintf(DEBUGDUMP, " %s(%s%s%d)",
+ bidi_classname(stack->classes[i]),
+ (stack->classes[i] != stack->orig_classes[i] ?
+ bidi_classname(stack->orig_classes[i]):""),
+ (stack->classes[i] != stack->orig_classes[i] ? "/":""),
+ (int)stack->levels[i]);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+#endif
+
+
+static void directional_status_stack_push
+(directional_status_stack_t stack,
+ unicode_bidi_level_t embedding_level,
+ directional_override_status_t directional_override_status,
+ int directional_isolate_status)
+{
+ struct directional_status_stack_entry *p=
+ (struct directional_status_stack_entry *)
+ malloc(sizeof(struct directional_status_stack_entry));
+
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP, "BIDI: Push level %d, override: %s, isolate: %s\n",
+ (int)embedding_level,
+ (directional_override_status == do_right_to_left ? "RTOL":
+ directional_override_status == do_left_to_right ? "LTOR":
+ directional_override_status == do_neutral ? "NEUTRAL"
+ : "ERROR"),
+ (directional_isolate_status ? "YES":"NO"));
+
+#endif
+
+ p->embedding_level=embedding_level;
+ p->directional_override_status=directional_override_status;
+ p->directional_isolate_status=directional_isolate_status;
+ p->next=stack->head;
+ stack->head=p;
+}
+
+static unicode_bidi_level_t
+compute_paragraph_embedding_level(const enum_bidi_class_t *p,
+ size_t i, size_t j)
+{
+ unicode_bidi_level_t in_isolation=0;
+
+ for (; i<j; ++i)
+ {
+ if (is_isolate_initiator(p[i]))
+ ++in_isolation;
+ else if (p[i] == UNICODE_BIDI_CLASS_PDI)
+ {
+ if (in_isolation)
+ --in_isolation;
+ }
+
+ if (in_isolation == 0)
+ {
+ if (p[i] == UNICODE_BIDI_CLASS_AL ||
+ p[i] == UNICODE_BIDI_CLASS_R)
+ {
+ return 1;
+ }
+ if (p[i] == UNICODE_BIDI_CLASS_L)
+ break;
+ }
+ }
+ return 0;
+}
+
+static directional_status_stack_t
+directional_status_stack_init(const char32_t *chars,
+ enum_bidi_class_t *classes, size_t n,
+ unicode_bidi_level_t *levels,
+ const unicode_bidi_level_t
+ *initial_embedding_level)
+{
+ directional_status_stack_t stack;
+
+ stack=(directional_status_stack_t)calloc(1, sizeof(*stack));
+
+ stack->paragraph_embedding_level=
+ initial_embedding_level
+ ? *initial_embedding_level & 1
+ : compute_paragraph_embedding_level(classes, 0, n);
+ stack->chars=chars;
+ stack->classes=classes;
+
+ if (n)
+ {
+ classes=(enum_bidi_class_t *)
+ malloc(sizeof(enum_bidi_class_t)*n);
+ if (!classes)
+ abort();
+ memcpy(classes, stack->classes, sizeof(enum_bidi_class_t)*n);
+ }
+ else
+ {
+ classes=0;
+ }
+ stack->orig_classes=classes;
+ stack->levels=levels;
+ stack->size=n;
+
+ directional_status_stack_push(stack,
+ stack->paragraph_embedding_level,
+ do_neutral, 0);
+
+ return stack;
+}
+
+static void directional_status_stack_pop(directional_status_stack_t stack)
+{
+ struct directional_status_stack_entry *head=stack->head;
+
+ if (!head)
+ return;
+
+ stack->head=head->next;
+ free(head);
+
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP, "BIDI: Pop\n");
+#endif
+}
+
+static void directional_status_stack_deinit(directional_status_stack_t stack)
+{
+ while (stack->head)
+ directional_status_stack_pop(stack);
+ if (stack->orig_classes)
+ free(stack->orig_classes);
+ isolating_run_sequences_deinit(&stack->isolating_run_sequences);
+ free(stack);
+}
+
+static void unicode_bidi_b(const char32_t *p,
+ size_t n,
+ enum_bidi_class_t *buf,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level);
+
+void unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level)
+{
+ /*
+ ** Look up the bidi class for each char32_t.
+ **
+ ** When we encounter a paragraph break we call unicode_bidi_b() to
+ ** process it.
+ */
+
+ enum_bidi_class_t *buf=
+ (enum_bidi_class_t *)malloc(n * sizeof(enum_bidi_class_t));
+
+ for (size_t i=0; i<n; ++i)
+ {
+ buf[i]=(enum_bidi_class_t)
+ unicode_tab_lookup(p[i],
+ unicode_indextab,
+ sizeof(unicode_indextab)
+ /sizeof(unicode_indextab[0]),
+ unicode_rangetab,
+ unicode_classtab,
+ UNICODE_BIDI_CLASS_L);
+#ifdef UNICODE_BIDI_TEST
+ UNICODE_BIDI_TEST(i);
+#endif
+ bufp[i]=UNICODE_BIDI_SKIP;
+ }
+
+ unicode_bidi_b(p, n,
+ buf,
+ bufp,
+ initial_embedding_level);
+
+ free(buf);
+}
+
+static void unicode_bidi_cl(directional_status_stack_t stack);
+
+static void unicode_bidi_b(const char32_t *p,
+ size_t n,
+ enum_bidi_class_t *buf,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level)
+{
+ directional_status_stack_t stack;
+
+ stack=directional_status_stack_init(p, buf, n, bufp,
+ initial_embedding_level);
+
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP, "BIDI: START: Paragraph embedding level: %d\n",
+ (int)stack->paragraph_embedding_level);
+#endif
+
+ unicode_bidi_cl(stack);
+
+ directional_status_stack_deinit(stack);
+}
+
+#define RESET_CLASS(p,stack) do { \
+ switch ((stack)->head->directional_override_status) { \
+ case do_neutral: break; \
+ case do_left_to_right: (p)=UNICODE_BIDI_CLASS_L; break; \
+ case do_right_to_left: (p)=UNICODE_BIDI_CLASS_R; break; \
+ } \
+ } while(0)
+
+static void unicode_bidi_w(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq);
+static void unicode_bidi_n(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq);
+
+#ifdef BIDI_DEBUG
+void dump_sequence_info(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+ fprintf(DEBUGDUMP, "Sequence: sos: %c, eos: %c:",
+ (seq->sos == UNICODE_BIDI_CLASS_L ? 'L':'R'),
+ (seq->eos == UNICODE_BIDI_CLASS_L ? 'L':'R'));
+
+ for (size_t i=0; i<seq->n_level_runs; ++i)
+ {
+ fprintf(DEBUGDUMP, "%s[%lu-%lu]",
+ i == 0 ? " ":", ",
+ (unsigned long)seq->level_runs[i].start,
+ (unsigned long)seq->level_runs[i].end-1);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+
+void dump_sequence(const char *what, directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+ irs_iterator beg=irs_begin(seq), end=irs_end(seq);
+
+ fprintf(DEBUGDUMP, "%s: ", what);
+ while (irs_compare(&beg, &end))
+ {
+ fprintf(DEBUGDUMP, " %s(%d)",
+ bidi_classname(stack->classes[beg.i]),
+ (int)stack->levels[beg.i]);
+ irs_incr(&beg);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+#endif
+
+static void unicode_bidi_cl(directional_status_stack_t stack)
+{
+#ifdef BIDI_DEBUG
+ dump_classes("Before X1", stack);
+#endif
+
+ for (size_t i=0; i<stack->size; i++)
+ {
+ int embedding_level;
+
+#define NEXT_ODD_EMBEDDING_LEVEL \
+ (embedding_level=stack->head->embedding_level, \
+ ++embedding_level, \
+ embedding_level |= 1)
+
+#define NEXT_EVEN_EMBEDDING_LEVEL \
+ (embedding_level=stack->head->embedding_level, \
+ embedding_level |= 1, \
+ ++embedding_level)
+
+ switch (stack->classes[i]) {
+ case UNICODE_BIDI_CLASS_RLE:
+ /* X2 */
+ NEXT_ODD_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_neutral, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+ case UNICODE_BIDI_CLASS_LRE:
+ /* X3 */
+
+ NEXT_EVEN_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_neutral, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+
+ case UNICODE_BIDI_CLASS_RLO:
+ /* X4 */
+ NEXT_ODD_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_right_to_left, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+
+ case UNICODE_BIDI_CLASS_LRO:
+ /* X5 */
+ NEXT_EVEN_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_left_to_right, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ enum_bidi_class_t cur_class=stack->classes[i];
+
+ if (cur_class == UNICODE_BIDI_CLASS_FSI) {
+ /* X5c */
+
+ size_t j=i;
+
+ unicode_bidi_level_t in_isolation=1;
+
+ while (++j < stack->size)
+ {
+ if (is_isolate_initiator(stack->classes[j]))
+ ++in_isolation;
+ else if (stack->classes[j] == UNICODE_BIDI_CLASS_PDI)
+ {
+ if (--in_isolation == 0)
+ break;
+ }
+ }
+
+ cur_class=compute_paragraph_embedding_level
+ (stack->classes, i+1, j) == 1
+ ? UNICODE_BIDI_CLASS_RLI
+ : UNICODE_BIDI_CLASS_LRI;
+ }
+
+ switch (cur_class) {
+ case UNICODE_BIDI_CLASS_RLI:
+ /* X5a */
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+
+ NEXT_ODD_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ ++stack->valid_isolate_count;
+ directional_status_stack_push
+ (stack, embedding_level, do_neutral, 1);
+ stack->head->isolate_start=i;
+ }
+ else
+ {
+ ++stack->overflow_isolate_count;
+ }
+ break;
+
+ case UNICODE_BIDI_CLASS_LRI:
+ /* X5b */
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+
+ NEXT_EVEN_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ ++stack->valid_isolate_count;
+ directional_status_stack_push
+ (stack, embedding_level, do_neutral, 1);
+ stack->head->isolate_start=i;
+ }
+ else
+ {
+ ++stack->overflow_isolate_count;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ switch (stack->orig_classes[i]) {
+ case UNICODE_BIDI_CLASS_BN:
+ case UNICODE_BIDI_CLASS_B:
+ case UNICODE_BIDI_CLASS_RLE:
+ case UNICODE_BIDI_CLASS_LRE:
+ case UNICODE_BIDI_CLASS_RLO:
+ case UNICODE_BIDI_CLASS_LRO:
+ case UNICODE_BIDI_CLASS_PDF:
+ case UNICODE_BIDI_CLASS_RLI:
+ case UNICODE_BIDI_CLASS_LRI:
+ case UNICODE_BIDI_CLASS_FSI:
+ case UNICODE_BIDI_CLASS_PDI:
+ break;
+ default:
+ /* X6 */
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+ break;
+ }
+
+ if (stack->classes[i] == UNICODE_BIDI_CLASS_PDI)
+ {
+ /* X6a */
+ if (stack->overflow_isolate_count > 0)
+ {
+ --stack->overflow_isolate_count;
+ }
+ else if (stack->valid_isolate_count == 0)
+ ;
+ else
+ {
+ stack->overflow_embedding_count=0;
+
+ while (!stack->head->directional_isolate_status)
+ {
+ directional_status_stack_pop(stack);
+
+ if (!stack->head)
+ {
+ fprintf(stderr,
+ "Internal error: stack "
+ "empty after pop\n");
+ abort();
+ }
+ }
+
+ isolating_run_sequence_link
+ (&stack->isolating_run_sequences,
+ stack->head->isolate_start,
+ i);
+
+ directional_status_stack_pop(stack);
+ --stack->valid_isolate_count;
+
+ if (!stack->head)
+ {
+ fprintf(stderr,
+ "Internal error: stack "
+ "empty after pop\n");
+ abort();
+ }
+ }
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+ }
+
+ if (stack->classes[i] == UNICODE_BIDI_CLASS_PDF)
+ {
+ /* X7 */
+
+ if (stack->overflow_isolate_count > 0)
+ ;
+ else if (stack->overflow_embedding_count > 0)
+ {
+ --stack->overflow_embedding_count;
+ }
+ else
+ {
+ stack->overflow_embedding_count=0;
+
+ if (!stack->head->directional_isolate_status
+ && stack->head->next)
+ {
+ directional_status_stack_pop(stack);
+ }
+ }
+ }
+
+ if (stack->classes[i] == UNICODE_BIDI_CLASS_B)
+ {
+ /* X8 */
+
+ stack->levels[i]=stack->paragraph_embedding_level;
+ }
+ }
+
+ /* X9 */
+
+#define IS_X9(class) \
+ ((class) == UNICODE_BIDI_CLASS_RLE || \
+ (class) == UNICODE_BIDI_CLASS_LRE || \
+ (class) == UNICODE_BIDI_CLASS_RLO || \
+ (class) == UNICODE_BIDI_CLASS_LRO || \
+ (class) == UNICODE_BIDI_CLASS_PDF || \
+ (class) == UNICODE_BIDI_CLASS_BN)
+
+ size_t next_pdi=0;
+ struct isolating_run_sequence_s *current_irs=0;
+ unicode_bidi_level_t current_level=0;
+
+ struct isolating_run_sequence_link **push_links=
+ sort_irs_links_by_push(&stack->isolating_run_sequences);
+ size_t next_rlilri=0;
+
+ for (size_t i=0; i<stack->size; ++i)
+ {
+ if (IS_X9(stack->classes[i]))
+ {
+ if (stack->levels[i] != UNICODE_BIDI_SKIP)
+ {
+ fprintf(stderr, "Internal error: X9 class did "
+ "not get skipped");
+ abort();
+ }
+ continue;
+ }
+
+ if (stack->levels[i] == UNICODE_BIDI_SKIP)
+ {
+ fprintf(stderr, "Internal error: non-X9 class did "
+ "get skipped for some reason");
+ abort();
+ }
+
+ if (!current_irs || current_level != stack->levels[i])
+ {
+ current_level = stack->levels[i];
+
+ if (next_pdi <
+ stack->isolating_run_sequences.n_pdi_linkage &&
+ stack->isolating_run_sequences.pdi_linkage
+ [next_pdi].pop == i)
+ {
+ if ((current_irs=
+ stack->isolating_run_sequences.pdi_linkage
+ [next_pdi].seq) == 0)
+ {
+ fprintf(stderr, "Internal error: "
+ "missing previous isolated "
+ "run sequence link\n");
+ abort();
+ }
+ ++next_pdi;
+ }
+ else
+ {
+ current_irs=isolating_run_sequences_init
+ (&stack->isolating_run_sequences,
+ stack->levels[i],
+ i);
+ }
+ }
+
+ isolating_run_sequences_record(current_irs, i);
+
+ if (next_rlilri < stack->isolating_run_sequences.n_pdi_linkage &&
+ push_links[next_rlilri]->push == i)
+ {
+ push_links[next_rlilri++]->seq=current_irs;
+ }
+ }
+
+ if (push_links)
+ free(push_links);
+
+ /* X10 */
+#ifdef BIDI_DEBUG
+ dump_classes("Before X10", stack);
+#endif
+
+ for (struct isolating_run_sequence_s *p=
+ stack->isolating_run_sequences.head; p; p=p->next)
+ {
+ p->sos=p->eos=UNICODE_BIDI_CLASS_L;
+
+ irs_iterator beg_iter=irs_begin(p), end_iter=irs_end(p);
+
+ if (irs_compare(&beg_iter, &end_iter) == 0)
+ continue; /* Edge case */
+
+ unicode_bidi_level_t before=
+ stack->paragraph_embedding_level;
+ unicode_bidi_level_t after=
+ stack->paragraph_embedding_level;
+
+
+ size_t first_i=beg_iter.i;
+
+ irs_decr(&end_iter);
+
+ size_t end_i=end_iter.i+1;
+
+ unicode_bidi_level_t start=stack->levels[first_i];
+ unicode_bidi_level_t end=stack->levels[end_i-1];
+
+ while (first_i > 0 && stack->levels[first_i-1] ==
+ UNICODE_BIDI_SKIP)
+ --first_i;
+
+ if (first_i > 0)
+ before=stack->levels[first_i-1];
+
+ if (!is_isolate_initiator(stack->classes[end_iter.i]))
+ {
+ while (end_i < stack->size &&
+ stack->levels[end_i] == UNICODE_BIDI_SKIP)
+ ++end_i;
+
+ if (end_i < stack->size)
+ after=stack->levels[end_i];
+ }
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP,
+ "Sequence: before=%d, start=%d, end=%d, after=%d\n",
+ (int)before, (int)start, (int)end, (int)after);
+#endif
+ if (start > before)
+ before=start;
+
+ if (end > after)
+ after=end;
+
+ if (before & 1)
+ p->sos=UNICODE_BIDI_CLASS_R;
+ if (after & 1)
+ p->eos=UNICODE_BIDI_CLASS_R;
+
+
+#ifdef BIDI_DEBUG
+ dump_sequence_info(stack, p);
+ dump_sequence("Contents", stack, p);
+#endif
+ }
+
+
+ for (struct isolating_run_sequence_s *p=
+ stack->isolating_run_sequences.head; p; p=p->next)
+ {
+#ifdef BIDI_DEBUG
+ dump_sequence_info(stack, p);
+ dump_sequence("Contents before W", stack, p);
+#endif
+
+ unicode_bidi_w(stack, p);
+
+#ifdef BIDI_DEBUG
+ dump_sequence("Contents after W", stack, p);
+#endif
+ unicode_bidi_n(stack, p);
+ }
+#ifdef BIDI_DEBUG
+ dump_orig_classes("Before L1", stack);
+#endif
+
+ /*
+ ** L1
+ */
+
+ size_t i=stack->size;
+
+ int seen_sb=1;
+
+ while (i > 0)
+ {
+ --i;
+
+ if (IS_X9(stack->orig_classes[i]))
+ continue;
+
+ switch (stack->orig_classes[i]) {
+ case UNICODE_BIDI_CLASS_WS:
+ case UNICODE_BIDI_CLASS_FSI:
+ case UNICODE_BIDI_CLASS_LRI:
+ case UNICODE_BIDI_CLASS_RLI:
+ case UNICODE_BIDI_CLASS_PDI:
+ if (seen_sb)
+ stack->levels[i]=
+ stack->paragraph_embedding_level;
+ break;
+ case UNICODE_BIDI_CLASS_S:
+ case UNICODE_BIDI_CLASS_B:
+ stack->levels[i]=stack->paragraph_embedding_level;
+ seen_sb=1;
+ break;
+ default:
+ seen_sb=0;
+ continue;
+ }
+ }
+}
+
+static void unicode_bidi_w(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+ irs_iterator iter=irs_begin(seq), end=irs_end(seq);
+ enum_bidi_class_t previous_type=seq->sos;
+
+ enum_bidi_class_t strong_type=UNICODE_BIDI_CLASS_R;
+
+ while (irs_compare(&iter, &end))
+ {
+ if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_NSM)
+ {
+ /* W1 */
+ stack->classes[iter.i] =
+ is_isolate_initiator(previous_type) ||
+ previous_type == UNICODE_BIDI_CLASS_PDI
+ ? UNICODE_BIDI_CLASS_ON
+ : previous_type;
+
+ }
+
+ /* W2 */
+
+ if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_EN &&
+ strong_type == UNICODE_BIDI_CLASS_AL)
+ {
+ stack->classes[iter.i] = UNICODE_BIDI_CLASS_AN;
+ }
+
+ /* W2 */
+ previous_type=stack->classes[iter.i];
+
+ switch (previous_type) {
+ case UNICODE_BIDI_CLASS_R:
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_AL:
+ strong_type=previous_type;
+ break;
+ default:
+ break;
+ }
+
+ irs_incr(&iter);
+ }
+
+ iter=irs_begin(seq);
+
+ previous_type=UNICODE_BIDI_CLASS_L;
+
+ int not_eol=irs_compare(&iter, &end);
+
+ while (not_eol)
+ {
+ /* W3 */
+ if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_AL)
+ stack->classes[iter.i] = UNICODE_BIDI_CLASS_R;
+
+ /* W4 */
+
+ enum_bidi_class_t this_type=stack->classes[iter.i];
+ irs_incr(&iter);
+
+ not_eol=irs_compare(&iter, &end);
+
+ if (not_eol &&
+ (
+ (this_type == UNICODE_BIDI_CLASS_ES &&
+ previous_type == UNICODE_BIDI_CLASS_EN)
+ ||
+ (this_type == UNICODE_BIDI_CLASS_CS &&
+ (previous_type == UNICODE_BIDI_CLASS_EN ||
+ previous_type == UNICODE_BIDI_CLASS_AN)
+ )
+ ) &&
+ stack->classes[iter.i] == previous_type)
+ {
+ irs_iterator prev=iter;
+
+ irs_decr(&prev);
+
+ stack->classes[prev.i]=previous_type;
+ }
+
+ if (not_eol)
+ previous_type=this_type;
+ }
+
+ iter=irs_begin(seq);
+
+ /* W5 */
+
+ previous_type=UNICODE_BIDI_CLASS_L; /* Doesn't match any part of W5 */
+
+ while (irs_compare(&iter, &end))
+ {
+ if (stack->classes[iter.i] != UNICODE_BIDI_CLASS_ET)
+ {
+ previous_type=stack->classes[iter.i];
+ irs_incr(&iter);
+ continue;
+ }
+
+ /* ET after EN */
+ if (previous_type == UNICODE_BIDI_CLASS_EN)
+ {
+ stack->classes[iter.i] = UNICODE_BIDI_CLASS_EN;
+ irs_incr(&iter);
+ continue;
+ }
+
+ /* See if EN follows these ETs */
+
+ irs_iterator start=iter;
+
+ while (irs_incr(&iter), irs_compare(&iter, &end))
+ {
+ previous_type=stack->classes[iter.i];
+
+ if (previous_type == UNICODE_BIDI_CLASS_ET)
+ continue;
+
+ if (previous_type == UNICODE_BIDI_CLASS_EN)
+ {
+ while (irs_compare(&start, &iter))
+ {
+ stack->classes[start.i]=
+ UNICODE_BIDI_CLASS_EN;
+ irs_incr(&start);
+ }
+ }
+ break;
+ }
+ }
+
+ /* W6 */
+
+ for (iter=irs_begin(seq);
+ irs_compare(&iter, &end); irs_incr(&iter))
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_ET:
+ case UNICODE_BIDI_CLASS_ES:
+ case UNICODE_BIDI_CLASS_CS:
+ /* W6 */
+ stack->classes[iter.i]=UNICODE_BIDI_CLASS_ON;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* W7 */
+ iter=irs_begin(seq);
+
+ previous_type=seq->sos;
+
+ while (irs_compare(&iter, &end))
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ previous_type=stack->classes[iter.i];
+ break;
+ case UNICODE_BIDI_CLASS_EN:
+ if (previous_type == UNICODE_BIDI_CLASS_L)
+ stack->classes[iter.i]=previous_type;
+ break;
+ default:
+ break;
+ }
+ irs_incr(&iter);
+ }
+}
+
+struct bidi_n_stack {
+ struct bidi_n_stack *next;
+
+ irs_iterator start;
+ irs_iterator end;
+ short has_e;
+ short has_o;
+ short matched;
+};
+
+static void unicode_bidi_n(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+#define NSTACKSIZE 63
+
+ struct bidi_n_stack *bracket_stack=0;
+ struct bidi_n_stack **bracket_stack_tail= &bracket_stack;
+
+ struct bidi_n_stack *stack_iters[NSTACKSIZE];
+ char32_t stack_chars[NSTACKSIZE];
+ size_t stackp=0;
+
+ /*
+ ** N0
+ **
+ ** Combined pass that implements BD16, and constructs, on the fly
+ ** whether each matched pair has an e or an o inside it.
+ */
+ irs_iterator beg=irs_begin(seq), iter=beg, end=irs_end(seq);
+
+ for (; irs_compare(&iter, &end); irs_incr(&iter))
+ {
+ unicode_bidi_bracket_type_t bracket_type;
+ char32_t open_bracket=
+ unicode_bidi_bracket_type(stack->chars[iter.i],
+ &bracket_type);
+
+ if (bracket_type == UNICODE_BIDI_o)
+ {
+ if (stackp >= NSTACKSIZE)
+ break; /* BD16 failure */
+
+ if (!((*bracket_stack_tail)=(struct bidi_n_stack *)
+ calloc(1, sizeof(struct bidi_n_stack))))
+ abort();
+
+ stack_iters[stackp]=*bracket_stack_tail;
+
+ (*bracket_stack_tail)->start=iter;
+
+ stack_chars[stackp]=stack->chars[iter.i];
+
+ bracket_stack_tail= &(*bracket_stack_tail)->next;
+ ++stackp;
+ continue;
+ }
+
+ if (bracket_type == UNICODE_BIDI_c) /* Should be "n" */
+ {
+ for (size_t i=stackp; i > 0; )
+ {
+ --i;
+ if (stack_chars[i] != open_bracket)
+ continue;
+
+ stack_iters[i]->end = iter;
+ stack_iters[i]->matched=1;
+ stackp=i;
+ break;
+ }
+ continue;
+ }
+
+ /*
+ ** So we check if this is an e or an o, and if so we
+ ** have a convenient list of all unclosed brackets, so
+ ** we record these facts there.
+ */
+
+ enum_bidi_class_t eoclass=stack->classes[iter.i];
+
+#define ADJUST_EOCLASS(eoclass) do { \
+ \
+ if ((eoclass) == UNICODE_BIDI_CLASS_EN || \
+ (eoclass) == UNICODE_BIDI_CLASS_AN) \
+ (eoclass)=UNICODE_BIDI_CLASS_R; \
+ } while (0)
+
+ ADJUST_EOCLASS(eoclass);
+
+#define E_CLASS (seq->embedding_level & 1 ? \
+ UNICODE_BIDI_CLASS_R:UNICODE_BIDI_CLASS_L)
+
+#define O_CLASS (seq->embedding_level & 1 ? \
+ UNICODE_BIDI_CLASS_L:UNICODE_BIDI_CLASS_R)
+
+ if (eoclass == E_CLASS)
+ {
+ for (size_t i=0; i<stackp; ++i)
+ stack_iters[i]->has_e=1;
+ }
+ else if (eoclass == O_CLASS)
+ {
+ for (size_t i=0; i<stackp; ++i)
+ stack_iters[i]->has_o=1;
+ }
+ }
+
+ while (bracket_stack)
+ {
+ struct bidi_n_stack *p=bracket_stack;
+
+ bracket_stack=bracket_stack->next;
+
+ if (p->matched)
+ {
+ int set=0;
+
+ if (p->has_e)
+ {
+ stack->classes[p->start.i]=
+ stack->classes[p->end.i]=
+ seq->embedding_level & 1
+ ? UNICODE_BIDI_CLASS_R
+ : UNICODE_BIDI_CLASS_L;
+ set=1;
+ } else if (p->has_o)
+ {
+ enum_bidi_class_t strong_type=seq->sos;
+
+ iter=p->start;
+
+ while (irs_compare(&beg, &iter))
+ {
+ irs_decr(&iter);
+
+ enum_bidi_class_t eoclass=
+ stack->classes[iter.i];
+
+ ADJUST_EOCLASS(eoclass);
+
+ switch (eoclass) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ break;
+ default:
+ continue;
+ }
+
+ strong_type=eoclass;
+ break;
+ }
+
+ if (strong_type == O_CLASS)
+ {
+ stack->classes[p->start.i]=
+ stack->classes[p->end.i]=
+ strong_type;
+ set=1;
+ }
+ }
+
+ if (set)
+ {
+ enum_bidi_class_t strong_type=
+ stack->classes[p->end.i];
+
+ while (irs_incr(&p->end),
+ irs_compare(&p->end, &end))
+ {
+ if (stack->orig_classes[p->end.i] !=
+ UNICODE_BIDI_CLASS_NSM)
+ break;
+
+ stack->classes[p->end.i]=strong_type;
+ }
+ }
+ }
+ free(p);
+ }
+
+ /* N1 */
+
+#define IS_NI(class) \
+ ((class) == UNICODE_BIDI_CLASS_B || \
+ (class) == UNICODE_BIDI_CLASS_S || \
+ (class) == UNICODE_BIDI_CLASS_WS || \
+ (class) == UNICODE_BIDI_CLASS_ON || \
+ (class) == UNICODE_BIDI_CLASS_FSI || \
+ (class) == UNICODE_BIDI_CLASS_LRI || \
+ (class) == UNICODE_BIDI_CLASS_RLI || \
+ (class) == UNICODE_BIDI_CLASS_PDI)
+
+ enum_bidi_class_t prev_type=seq->sos;
+
+ for (iter=beg; irs_compare(&iter, &end); )
+ {
+ /*
+ ** N1
+ */
+
+ enum_bidi_class_t this_type=stack->classes[iter.i];
+
+ ADJUST_EOCLASS(this_type);
+
+ if (!IS_NI(this_type))
+ {
+ switch (this_type) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ prev_type=this_type;
+ break;
+ default:
+ prev_type=UNICODE_BIDI_CLASS_ON; // Marker.
+ break;
+ }
+ irs_incr(&iter);
+ continue;
+ }
+
+ enum_bidi_class_t next_type=seq->eos;
+
+ irs_iterator start=iter;
+
+ while (irs_compare(&iter, &end))
+ {
+ if (IS_NI(stack->classes[iter.i]))
+ {
+ irs_incr(&iter);
+ continue;
+ }
+
+ enum_bidi_class_t other_type=stack->classes[iter.i];
+
+ ADJUST_EOCLASS(other_type);
+
+ switch (other_type) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ next_type=other_type;
+ break;
+ default:
+ next_type=UNICODE_BIDI_CLASS_BN; /* Marker */
+ break;
+ }
+ break;
+ }
+
+ while (irs_compare(&start, &iter))
+ {
+ // next_type can only match prev_type if both are L
+ // or R. If the character before the NIs was not L
+ // or R, prev_type is ON. If the character after the
+ // NIs was not L or R, next_type is BN.
+
+ if (next_type == prev_type)
+ {
+ stack->classes[start.i]=next_type; /* N1 */
+ }
+
+ irs_incr(&start);
+ }
+ }
+
+ for (iter=beg; irs_compare(&iter, &end); )
+ {
+ if (IS_NI(stack->classes[iter.i]))
+ {
+ stack->classes[iter.i]=
+ stack->levels[iter.i] & 1 ?
+ UNICODE_BIDI_CLASS_R :
+ UNICODE_BIDI_CLASS_L; /* N2 */
+ }
+ irs_incr(&iter);
+ }
+
+#ifdef BIDI_DEBUG
+ dump_sequence("Contents after N", stack, seq);
+#endif
+
+ /* I1 */
+ /* I2 */
+
+ for (iter=beg; irs_compare(&iter, &end); irs_incr(&iter))
+ {
+ if ((stack->levels[iter.i] & 1) == 0)
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_R:
+ ++stack->levels[iter.i];
+ break;
+ case UNICODE_BIDI_CLASS_AN:
+ case UNICODE_BIDI_CLASS_EN:
+ stack->levels[iter.i] += 2;
+ break;
+ default: break;
+ }
+ }
+ else
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_AN:
+ case UNICODE_BIDI_CLASS_EN:
+ ++stack->levels[iter.i];
+ break;
+ default: break;
+ }
+ }
+ }
+
+#ifdef BIDI_DEBUG
+ dump_sequence("Contents after I", stack, seq);
+#endif
}