diff options
| author | Sam Varshavchik | 2020-07-04 20:01:49 -0400 |
|---|---|---|
| committer | Sam Varshavchik | 2020-07-12 15:56:45 -0400 |
| commit | acd2a932df810d7db2037e3962feb61ca21e2781 (patch) | |
| tree | 8f8f3fc757ff93ba4e2ecd9730451ccf6d945055 | |
| parent | d57946b6a21b33f26f189951e1c30ca901581b90 (diff) | |
| download | courier-libs-acd2a932df810d7db2037e3962feb61ca21e2781.tar.bz2 | |
Implement the Unicode bidirectional algorithm.
| -rw-r--r-- | unicode/.gitignore | 1 | ||||
| -rw-r--r-- | unicode/Makefile.am | 16 | ||||
| -rw-r--r-- | unicode/bidi_classnames.h | 23 | ||||
| -rw-r--r-- | unicode/biditest.C | 286 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 39 | ||||
| -rw-r--r-- | unicode/mkbidiclassnames.pl | 21 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 1598 |
7 files changed, 1974 insertions, 10 deletions
diff --git a/unicode/.gitignore b/unicode/.gitignore index 3ebc3ca..49cf4df 100644 --- a/unicode/.gitignore +++ b/unicode/.gitignore @@ -3,6 +3,7 @@ /*.sig /BidiBrackets.txt /BidiMirroring.txt +/BidiTest.txt /Categories.txt /DerivedBidiClass.txt /EastAsianWidth.txt diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 30af049..397987c 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -10,6 +10,7 @@ noinst_SCRIPTS=update.sh \ mkcommon.pm \ mkbidi.pl \ mkbidiclass.pl \ + mkbidiclassnames.pl \ mkeastasianwidth.pl \ mkemojidata.pl \ mkgraphemebreak.pl \ @@ -35,6 +36,11 @@ update-www: @$(MAKE) update-www-htmlent @$(MAKE) update-www-categories @$(MAKE) update-www-bidi + @$(MAKE) update-www-unicode-copyright + +update-www-unicode-copyright: + links -dump https://www.unicode.org/license.html >UNICODE-LICENSE.txt.tmp + mv UNICODE-LICENSE.txt.tmp UNICODE-LICENSE.txt update-www-unicode: @SHELL@ $(srcdir)/update.sh UnicodeData.txt http://www.unicode.org/Public/UNIDATA/UnicodeData.txt @@ -72,6 +78,7 @@ update-www-bidi: @SHELL@ $(srcdir)/update.sh BidiBrackets.txt https://www.unicode.org/Public/UCD/latest/ucd/BidiBrackets.txt @SHELL@ $(srcdir)/update.sh BidiMirroring.txt https://www.unicode.org/Public/UCD/latest/ucd/BidiMirroring.txt @SHELL@ $(srcdir)/update.sh DerivedBidiClass.txt https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBidiClass.txt + @SHELL@ $(srcdir)/update.sh BidiTest.txt https://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt lib_LTLIBRARIES=libcourier-unicode.la include_HEADERS=courier-unicode.h \ @@ -117,6 +124,7 @@ BUILT_SOURCES=unicode_ultcasetab.c \ bidi_brackets.h \ bidi_brackets_v.h \ bidi_class.h \ + bidi_classnames.h \ bidi_mirroring.h \ categoriestab.h \ eastasianwidth.h \ @@ -176,6 +184,11 @@ bidi_class.h: DerivedBidiClass.txt mkbidiclass.pl mkcommon.pm bidi_mirroring.h: BidiMirroring.txt mkbidi.pl @PERL@ -I$(srcdir) $(srcdir)/mkbidi.pl BidiMirroring.txt >bidi_mirroring.h.tmp mv bidi_mirroring.h.tmp bidi_mirroring.h + +bidi_classnames.h: unicode_bidi.c mkbidiclassnames.pl + @PERL@ $(srcdir)/mkbidiclassnames.pl <$(srcdir)/unicode_bidi.c >bidi_classnames.h.tmp + mv bidi_classnames.h.tmp bidi_classnames.h + endif unicodetest_SOURCES=unicodetest.c @@ -208,7 +221,7 @@ scripttest_DEPENDENCIES=libcourier-unicode.la scripttest_LDADD=libcourier-unicode.la scripttest_LDFLAGS=-static -biditest_SOURCES=biditest.C +biditest_SOURCES=biditest.C bidi_classnames.h biditest_DEPENDENCIES=libcourier-unicode.la biditest_LDADD=libcourier-unicode.la biditest_LDFLAGS=-static @@ -283,6 +296,7 @@ check-am: unicodetest test "`./biditest 30`" = "30 30 n" test "`./biditest 8261`" = "8262 8262 o" test "`./biditest 8262`" = "8261 8261 c" + ./biditest if HAVE_DOCS diff --git a/unicode/bidi_classnames.h b/unicode/bidi_classnames.h new file mode 100644 index 0000000..61cf80b --- /dev/null +++ b/unicode/bidi_classnames.h @@ -0,0 +1,23 @@ +{"AL", UNICODE_BIDI_CLASS_AL}, +{"AN", UNICODE_BIDI_CLASS_AN}, +{"B", UNICODE_BIDI_CLASS_B}, +{"BN", UNICODE_BIDI_CLASS_BN}, +{"CS", UNICODE_BIDI_CLASS_CS}, +{"EN", UNICODE_BIDI_CLASS_EN}, +{"ES", UNICODE_BIDI_CLASS_ES}, +{"ET", UNICODE_BIDI_CLASS_ET}, +{"FSI", UNICODE_BIDI_CLASS_FSI}, +{"L", UNICODE_BIDI_CLASS_L}, +{"LRE", UNICODE_BIDI_CLASS_LRE}, +{"LRI", UNICODE_BIDI_CLASS_LRI}, +{"LRO", UNICODE_BIDI_CLASS_LRO}, +{"NSM", UNICODE_BIDI_CLASS_NSM}, +{"ON", UNICODE_BIDI_CLASS_ON}, +{"PDF", UNICODE_BIDI_CLASS_PDF}, +{"PDI", UNICODE_BIDI_CLASS_PDI}, +{"R", UNICODE_BIDI_CLASS_R}, +{"RLE", UNICODE_BIDI_CLASS_RLE}, +{"RLI", UNICODE_BIDI_CLASS_RLI}, +{"RLO", UNICODE_BIDI_CLASS_RLO}, +{"S", UNICODE_BIDI_CLASS_S}, +{"WS", UNICODE_BIDI_CLASS_WS}, diff --git a/unicode/biditest.C b/unicode/biditest.C index db4c541..c58da0d 100644 --- a/unicode/biditest.C +++ b/unicode/biditest.C @@ -1,6 +1,15 @@ #include "unicode_config.h" #include "courier-unicode.h" #include <iostream> +#include <fstream> +#include <sstream> +#include <string> +#include <algorithm> +#include <iomanip> + +std::vector<std::string> testcase; + +FILE *DEBUGDUMP; int main(int argc, char **argv) { @@ -12,6 +21,283 @@ int main(int argc, char **argv) std::cout << unicode_bidi_mirror(c) << " " << unicode_bidi_bracket_type(c, &bt) << " "; std::cout << (char)bt << std::endl; + exit (0); + } + + DEBUGDUMP=fopen("/dev/null", "w"); + if (!DEBUGDUMP) + { + perror("/dev/null"); + exit(1); } + std::ifstream fp("BidiTest.txt"); + + if (!fp.is_open()) + exit(1); + + size_t linenum=0; + size_t nextlogline=0; + std::string logmsg; + + std::string buf; + + std::vector<unicode_bidi_level_t> expected_levels; + + while (1) + { + buf.clear(); + + if (std::getline(fp, buf).eof() && buf.empty()) + break; + + if (++linenum >= nextlogline) + { + std::cout << logmsg; + + std::ostringstream o; + + o << std::setw(6) << linenum << " lines processed... "; + + logmsg=o.str(); + + std::cout << logmsg << std::flush; + + std::fill(logmsg.begin(), logmsg.end(), '\b'); + + nextlogline += 20000; + } + + buf.erase(std::find(buf.begin(), buf.end(), '#'), buf.end()); + + if (buf.substr(0, 8) == "@Levels:") + { + expected_levels.clear(); + + std::istringstream i(buf); + + std::string word; + + i >> word; + + while (i >> word) + { + int n; + + std::stringstream words(word); + + if (words >> n) + { + + expected_levels.push_back(n); + } + else + { + expected_levels + .push_back(UNICODE_BIDI_SKIP); + } + } + continue; + } + + if (buf.substr(0, 1) == "@") + continue; + + size_t semicolon=buf.find(';'); + + if (semicolon == buf.npos) + continue; + + testcase.clear(); + + { + std::istringstream i(buf.substr(0, semicolon)); + + std::string word; + + while (i >> word) + testcase.push_back(word); + } + + int n; + + { + std::istringstream i(buf.substr(semicolon+1)); + + if (!(i >> n)) + { + std::cerr << "Cannot parse paragraph bitset: " + << buf.substr(semicolon+1) + << " on line " << linenum + << std::endl; + abort(); + } + } + + //if (linenum != 98950) + // continue; + + std::vector<unicode_bidi_level_t> actual_levels; + + std::vector<char32_t> dummy_input; + + dummy_input.resize(testcase.size()); + actual_levels.resize(testcase.size()); + + static const unicode_bidi_level_t level_0=0; + static const unicode_bidi_level_t level_1=1; + + static const unicode_bidi_level_t *levels[3]= + {0, &level_0, &level_1}; + + for (auto level:levels) + { + if (n & 1) + { + unicode_bidi_calc(&dummy_input[0], + testcase.size(), + &actual_levels[0], level); + + int matched=0; + + if (actual_levels.size() == + expected_levels.size()) + { + size_t i=0; + + matched=1; + + for (i=0; i<actual_levels.size(); ++i) + { + if (expected_levels[i] == + UNICODE_BIDI_SKIP) + continue; + if (expected_levels[i] != + actual_levels[i]) + { + matched=0; + break; + } + } + } + + if (!matched) + { + fclose(DEBUGDUMP); + DEBUGDUMP=stderr; + std::cout << std::endl + << std::flush; + unicode_bidi_calc(&dummy_input[0], + testcase.size(), + &actual_levels[0], + level); + + std::cerr << "Regression, line " + << linenum; + + if (!level) + { + std::cerr << ", auto"; + } + else + { + std::cerr << + (*level ? ", RTL" + : ", LTR"); + } + std::cerr << ": expected"; + + for (int l:expected_levels) + { + std::cerr << " " << l; + } + std::cerr << std::endl + << "Received:"; + + for (int l:actual_levels) + { + std::cerr << " " << l; + } + std::cerr << std::endl; + exit(1); + } + } + + n >>= 1; + } + } + + std::cout << logmsg; + + std::fill(logmsg.begin(), logmsg.end(), ' '); + std::cout << logmsg << std::endl; return 0; } + +#define UNICODE_BIDI_TEST(i) (buf[i]=fudge_unicode_bidi(i)) + +#define BIDI_DEBUG + +extern "C" { +#if 0 +} +#endif + +#include "unicode_bidi.c" + +static const struct { + char classname[8]; + enum_bidi_class_t classenum; +} bidiclassnames[]={ + +#include "bidi_classnames.h" + +}; + +const char *bidi_classname(enum_bidi_class_t classenum) +{ + for (const auto &cn:bidiclassnames) + { + if (cn.classenum == classenum) + return cn.classname; + } + + return "???"; +} + +static const char *lookup_classname(const std::string &s) +{ + abort(); +} + +enum_bidi_class_t fudge_unicode_bidi(size_t i) +{ + if (i >= testcase.size()) + { + std::cerr << "Test case:"; + + for (auto &n:testcase) + std::cerr << " " << n; + std::cerr << ": no value #" << i << std::endl; + abort(); + } + + for (const auto &cn:bidiclassnames) + { + if (testcase[i] == cn.classname) + { + return cn.classenum; + } + } + + std::cerr << "Test case:"; + + for (auto &n:testcase) + std::cerr << " " << n; + std::cerr << ": unknown value: " << testcase[i]; + abort(); +} + +#if 0 +{ +#endif +} diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 68ece2c..67f3bda 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -554,7 +554,32 @@ size_t unicode_wbscan_end(unicode_wbscan_info_t i); ** values. ** ** unicode_bidi_bracket_type() returns the same character and -** UNICODE_BIDI_n if the given character does not have these properties, +** UNICODE_BIDI_n if the given character does not have these properties. +** +** unicode_bidi_calc() implements the Unicode Bidirectional Algorithm up to +** step L1. +** +** Parameters: +** +** - A pointer to char32_t, the Unicode string. +** +** - Number of characters in the char32_t string +** +** - A pointer to an array of unicode_bidi_level_t values. The caller is +** responsible for allocating and deallocating this array, which has the +** same size as the Unicode string (the second parameter). +** +** - An optional pointer to a unicode_bidi_level_t value, or a null pointer. +** A pointer to UNICODE_BIDI_LR or UNICODE_BIDI_RL sets the default paragraph +** direction level. A null pointer calculates the default paragraph direction +** level based on the string, as specified by the "P" rules in the algorithm. +** +** unicode_bidi_calc() fills in the unicode_bidi_level_t array with the +** values corresponding to the embedding level of the corresponding character, +** as specified in the Unicode Bidirection Algorithm (even for left-to-right, +** and odd for right-to-left). A value of UNICODE_BIDI_SKIP designates +** directional markers (from step X9). These characters should be removed +** before using unicode_bidi_reorder(). */ typedef char unicode_bidi_bracket_type_t; @@ -568,6 +593,18 @@ extern char32_t unicode_bidi_mirror(char32_t c); extern char32_t unicode_bidi_bracket_type(char32_t c, unicode_bidi_bracket_type_t *ret); + +typedef unsigned char unicode_bidi_level_t; + +#define UNICODE_BIDI_LR ((unicode_bidi_level_t)0) +#define UNICODE_BIDI_RL ((unicode_bidi_level_t)1) +#define UNICODE_BIDI_SKIP ((unicode_bidi_level_t)254) + +extern void unicode_bidi_calc(const char32_t *p, size_t n, + unicode_bidi_level_t *bufp, + const unicode_bidi_level_t * + initial_embedding_level); + /* ** A buffer that holds unicode characters, and dynamically grows as needed. */ diff --git a/unicode/mkbidiclassnames.pl b/unicode/mkbidiclassnames.pl new file mode 100644 index 0000000..d3324a9 --- /dev/null +++ b/unicode/mkbidiclassnames.pl @@ -0,0 +1,21 @@ +#! /usr/bin/perl + +use strict; +use warnings; + +while (<>) +{ + last if m@^/\* BIDI_CLASS_LIST@; +} + +while (<>) +{ + last if m@^}@; + next if /\{/; + next if /^\s*$/; + next if m@/\*@; + + die unless /UNICODE_BIDI_CLASS_(.*),/; + + print "{\"$1\", UNICODE_BIDI_CLASS_$1},\n"; +} diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index e486621..38dcb44 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -86,6 +86,7 @@ char32_t unicode_bidi_bracket_type(char32_t c, return c; } +/* BIDI_CLASS_LIST */ typedef enum { UNICODE_BIDI_CLASS_AL, UNICODE_BIDI_CLASS_AN, @@ -114,13 +115,1594 @@ typedef enum { #include "bidi_class.h" -enum_bidi_class_t unicode_bidi_class(char32_t c) +#ifdef BIDI_DEBUG +enum_bidi_class_t fudge_unicode_bidi(size_t); +const char *bidi_classname(enum_bidi_class_t); +#endif + +#define max_depth 125 + +typedef enum { + do_neutral, + do_right_to_left, + do_left_to_right +} directional_override_status_t; + +#define is_isolate_initiator(c) \ + ((c) == UNICODE_BIDI_CLASS_LRI || \ + (c) == UNICODE_BIDI_CLASS_RLI || \ + (c) == UNICODE_BIDI_CLASS_FSI) + +#define is_embedding_initiator(c) \ + ((c) == UNICODE_BIDI_CLASS_LRE || \ + (c) == UNICODE_BIDI_CLASS_RLE || \ + (c) == UNICODE_BIDI_CLASS_LRO || \ + (c) == UNICODE_BIDI_CLASS_RLO) + +/* BD13 implementation */ + +/* A level run, specified as indexes */ + +struct level_run { + size_t start; + size_t end; /* one past */ +}; + +/* An isolating run sequence */ + +struct isolating_run_sequence_s { + struct isolating_run_sequence_s *prev, *next; /* Linked list */ + + struct level_run *level_runs; /* All level runs in the sequence */ + size_t n_level_runs; /* How many of them */ + size_t cap_level_runs; /* Capacity of the level runs */ + unicode_bidi_level_t embedding_level; /* This seq's embedding level */ + enum_bidi_class_t sos, eos; +}; + +/* An iterator for an isolating run sequence */ + +typedef struct { + + /* My sequence */ + struct isolating_run_sequence_s *seq; + + /* Which level run I'm on */ + + size_t level_run_i; + + /* Current index */ + + size_t i; +} irs_iterator; + +static irs_iterator irs_begin(struct isolating_run_sequence_s *seq) +{ + irs_iterator iter; + + iter.seq=seq; + iter.level_run_i=0; + + /* Edge case, empty isolating run sequence */ + + while (iter.level_run_i < seq->n_level_runs) + { + iter.i=seq->level_runs[iter.level_run_i].start; + + if (iter.i < seq->level_runs[iter.level_run_i].end) + break; + + ++iter.level_run_i; + } + return iter; +} + +static irs_iterator irs_end(struct isolating_run_sequence_s *seq) +{ + irs_iterator iter; + + iter.seq=seq; + iter.level_run_i=seq->n_level_runs; + return iter; +} + +static int irs_compare(const irs_iterator *a, + const irs_iterator *b) +{ + if (a->level_run_i < b->level_run_i) + return -1; + if (a->level_run_i > b->level_run_i) + return 1; + + if (a->level_run_i == a->seq->n_level_runs) + return 0; + + if (a->i < b->i) + return -1; + + if (a->i > b->i) + return 1; + return 0; +} + +static void irs_incr(irs_iterator *iter) +{ + if (iter->seq->n_level_runs == iter->level_run_i) + { + fprintf(stderr, "%s%s\n", + "Internal error: attempting to increment ", + "one past end of isolating run sequence iterator"); + abort(); + } + + if (++iter->i >= iter->seq->level_runs[iter->level_run_i].end) + { + if (++iter->level_run_i < iter->seq->n_level_runs) + iter->i=iter->seq->level_runs[iter->level_run_i].start; + } +} + +static void irs_decr(irs_iterator *iter) +{ + while (1) + { + if (iter->seq->n_level_runs > iter->level_run_i && + iter->i > iter->seq->level_runs[iter->level_run_i].start) + { + --iter->i; + break; + } + + if (iter->level_run_i == 0) + { + fprintf(stderr, "%s%s\n", + "Internal error: attempting to decrement the ", + "beginning isolating run sequence iterator"); + abort(); + } + + iter->i=iter->seq->level_runs[--iter->level_run_i].end; + } +} + +/* Used to build isolating run sequences link */ + +struct isolating_run_sequence_link { + size_t push; + size_t pop; + struct isolating_run_sequence_s *seq; +}; + +/* Our isolating run sequences */ + +struct isolating_run_sequences_s { + struct isolating_run_sequence_s *head, *tail; /* Linked list */ + + /* Links LRI and RLI to its matching PDI */ + + struct isolating_run_sequence_link *pdi_linkage; /* Recycle level_run */ + size_t n_pdi_linkage; + size_t cap_pdi_linkage; +}; + +static int compare_irs_by_push(struct isolating_run_sequence_link **a, + struct isolating_run_sequence_link **b) +{ + if ((*a)->push < (*b)->push) + return -1; + if ((*a)->push > (*b)->push) + return 1; + return 0; +} + +static struct isolating_run_sequence_link **sort_irs_links_by_push +(struct isolating_run_sequences_s *seq) +{ + struct isolating_run_sequence_link **p; + size_t i; + + if (seq->n_pdi_linkage == 0) + return 0; + + p=(struct isolating_run_sequence_link **) + calloc(sizeof(struct isolating_run_sequence_link *), + seq->n_pdi_linkage); + + for (i=0; i<seq->n_pdi_linkage; i++) + { + p[i]=seq->pdi_linkage+i; + } + qsort(p, seq->n_pdi_linkage, sizeof(*p), + (int (*)(const void *, const void *))compare_irs_by_push); + return p; +} + +static struct isolating_run_sequence_s * +isolating_run_sequences_init(struct isolating_run_sequences_s *p, + unicode_bidi_level_t embedding_level, + size_t i) +{ + struct isolating_run_sequence_s *seq= + (struct isolating_run_sequence_s *) + calloc(1, sizeof(struct isolating_run_sequence_s)); + + if (!seq) abort(); + + if ((seq->level_runs=(struct level_run *) + malloc(sizeof(struct level_run))) == 0) abort(); + + seq->level_runs->start=i; + seq->level_runs->end=i; + + seq->n_level_runs=seq->cap_level_runs=1; + seq->embedding_level=embedding_level; + + if (!p->head) + { + p->head=p->tail=seq; + } + else + { + p->tail->next=seq; + seq->prev=p->tail; + p->tail=seq; + } + + return seq; +} + +static void isolating_run_sequences_record(struct isolating_run_sequence_s *p, + size_t i) +{ + struct level_run *current_level_run= + &p->level_runs[p->n_level_runs-1]; + + if (current_level_run->start == current_level_run->end) + { + current_level_run->start=i; + current_level_run->end=i+1; + return; + } + + if (current_level_run->end == i) + { + ++current_level_run->end; + return; + } + + /* + ** Starting a new level run in the current isolating + ** run sequence. + */ + + if (p->n_level_runs == p->cap_level_runs) + { + p->cap_level_runs *= 2; + + p->level_runs=(struct level_run *) + realloc(p->level_runs, + sizeof(struct level_run) * + p->cap_level_runs); + if (!p->level_runs) + abort(); + } + + current_level_run = p->level_runs + (p->n_level_runs++); + + current_level_run->start=i; + current_level_run->end=i+1; +} + +static void isolating_run_sequence_link(struct isolating_run_sequences_s *p, + size_t push, + size_t pop) { - return unicode_tab_lookup(c, - unicode_indextab, - sizeof(unicode_indextab) - /sizeof(unicode_indextab[0]), - unicode_rangetab, - unicode_classtab, - UNICODE_BIDI_CLASS_L); + if (p->n_pdi_linkage == p->cap_pdi_linkage) + { + p->cap_pdi_linkage= + p->cap_pdi_linkage == 0 + ? 1 : p->cap_pdi_linkage*2; + + p->pdi_linkage=(struct isolating_run_sequence_link *) + (p->pdi_linkage ? + realloc(p->pdi_linkage, + p->cap_pdi_linkage * sizeof(*p->pdi_linkage)) + : + malloc(p->cap_pdi_linkage * sizeof(*p->pdi_linkage))); + + if (!p->pdi_linkage) + abort(); + } + + p->pdi_linkage[p->n_pdi_linkage].push=push; + p->pdi_linkage[p->n_pdi_linkage].pop=pop; + p->pdi_linkage[p->n_pdi_linkage].seq=0; + ++p->n_pdi_linkage; +} + +static void isolating_run_sequences_deinit(struct isolating_run_sequences_s *p) +{ + struct isolating_run_sequence_s *seq; + + for (seq=p->head; seq; ) + { + struct isolating_run_sequence_s *p=seq; + + seq=seq->next; + + free(p->level_runs); + free(p); + } + + if (p->pdi_linkage) + free(p->pdi_linkage); +} + +/* An entry on the directional status stack, used by the X rules */ + +struct directional_status_stack_entry { + struct directional_status_stack_entry *next; + unicode_bidi_level_t embedding_level; + directional_override_status_t directional_override_status; + int directional_isolate_status; + size_t isolate_start; +}; + +typedef struct { + struct directional_status_stack_entry *head; + + unicode_bidi_level_t paragraph_embedding_level; + const char32_t *chars; + enum_bidi_class_t *classes; + enum_bidi_class_t *orig_classes; + unicode_bidi_level_t *levels; + size_t size; + int overflow_isolate_count; + int overflow_embedding_count; + int valid_isolate_count; + + struct isolating_run_sequences_s isolating_run_sequences; +} *directional_status_stack_t; + +#ifdef BIDI_DEBUG +void dump_classes(const char *prefix, directional_status_stack_t stack) +{ + fprintf(DEBUGDUMP, "%s: ", prefix); + for (size_t i=0; i<stack->size; ++i) + { + fprintf(DEBUGDUMP, " %s(%d)", + bidi_classname(stack->classes[i]), + (int)stack->levels[i]); + } + fprintf(DEBUGDUMP, "\n"); +} + +void dump_orig_classes(const char *prefix, directional_status_stack_t stack) +{ + fprintf(DEBUGDUMP, "%s: ", prefix); + + for (size_t i=0; i<stack->size; ++i) + { + fprintf(DEBUGDUMP, " %s(%s%s%d)", + bidi_classname(stack->classes[i]), + (stack->classes[i] != stack->orig_classes[i] ? + bidi_classname(stack->orig_classes[i]):""), + (stack->classes[i] != stack->orig_classes[i] ? "/":""), + (int)stack->levels[i]); + } + fprintf(DEBUGDUMP, "\n"); +} +#endif + + +static void directional_status_stack_push +(directional_status_stack_t stack, + unicode_bidi_level_t embedding_level, + directional_override_status_t directional_override_status, + int directional_isolate_status) +{ + struct directional_status_stack_entry *p= + (struct directional_status_stack_entry *) + malloc(sizeof(struct directional_status_stack_entry)); + +#ifdef BIDI_DEBUG + fprintf(DEBUGDUMP, "BIDI: Push level %d, override: %s, isolate: %s\n", + (int)embedding_level, + (directional_override_status == do_right_to_left ? "RTOL": + directional_override_status == do_left_to_right ? "LTOR": + directional_override_status == do_neutral ? "NEUTRAL" + : "ERROR"), + (directional_isolate_status ? "YES":"NO")); + +#endif + + p->embedding_level=embedding_level; + p->directional_override_status=directional_override_status; + p->directional_isolate_status=directional_isolate_status; + p->next=stack->head; + stack->head=p; +} + +static unicode_bidi_level_t +compute_paragraph_embedding_level(const enum_bidi_class_t *p, + size_t i, size_t j) +{ + unicode_bidi_level_t in_isolation=0; + + for (; i<j; ++i) + { + if (is_isolate_initiator(p[i])) + ++in_isolation; + else if (p[i] == UNICODE_BIDI_CLASS_PDI) + { + if (in_isolation) + --in_isolation; + } + + if (in_isolation == 0) + { + if (p[i] == UNICODE_BIDI_CLASS_AL || + p[i] == UNICODE_BIDI_CLASS_R) + { + return 1; + } + if (p[i] == UNICODE_BIDI_CLASS_L) + break; + } + } + return 0; +} + +static directional_status_stack_t +directional_status_stack_init(const char32_t *chars, + enum_bidi_class_t *classes, size_t n, + unicode_bidi_level_t *levels, + const unicode_bidi_level_t + *initial_embedding_level) +{ + directional_status_stack_t stack; + + stack=(directional_status_stack_t)calloc(1, sizeof(*stack)); + + stack->paragraph_embedding_level= + initial_embedding_level + ? *initial_embedding_level & 1 + : compute_paragraph_embedding_level(classes, 0, n); + stack->chars=chars; + stack->classes=classes; + + if (n) + { + classes=(enum_bidi_class_t *) + malloc(sizeof(enum_bidi_class_t)*n); + if (!classes) + abort(); + memcpy(classes, stack->classes, sizeof(enum_bidi_class_t)*n); + } + else + { + classes=0; + } + stack->orig_classes=classes; + stack->levels=levels; + stack->size=n; + + directional_status_stack_push(stack, + stack->paragraph_embedding_level, + do_neutral, 0); + + return stack; +} + +static void directional_status_stack_pop(directional_status_stack_t stack) +{ + struct directional_status_stack_entry *head=stack->head; + + if (!head) + return; + + stack->head=head->next; + free(head); + +#ifdef BIDI_DEBUG + fprintf(DEBUGDUMP, "BIDI: Pop\n"); +#endif +} + +static void directional_status_stack_deinit(directional_status_stack_t stack) +{ + while (stack->head) + directional_status_stack_pop(stack); + if (stack->orig_classes) + free(stack->orig_classes); + isolating_run_sequences_deinit(&stack->isolating_run_sequences); + free(stack); +} + +static void unicode_bidi_b(const char32_t *p, + size_t n, + enum_bidi_class_t *buf, + unicode_bidi_level_t *bufp, + const unicode_bidi_level_t *initial_embedding_level); + +void unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, + const unicode_bidi_level_t *initial_embedding_level) +{ + /* + ** Look up the bidi class for each char32_t. + ** + ** When we encounter a paragraph break we call unicode_bidi_b() to + ** process it. + */ + + enum_bidi_class_t *buf= + (enum_bidi_class_t *)malloc(n * sizeof(enum_bidi_class_t)); + + for (size_t i=0; i<n; ++i) + { + buf[i]=(enum_bidi_class_t) + unicode_tab_lookup(p[i], + unicode_indextab, + sizeof(unicode_indextab) + /sizeof(unicode_indextab[0]), + unicode_rangetab, + unicode_classtab, + UNICODE_BIDI_CLASS_L); +#ifdef UNICODE_BIDI_TEST + UNICODE_BIDI_TEST(i); +#endif + bufp[i]=UNICODE_BIDI_SKIP; + } + + unicode_bidi_b(p, n, + buf, + bufp, + initial_embedding_level); + + free(buf); +} + +static void unicode_bidi_cl(directional_status_stack_t stack); + +static void unicode_bidi_b(const char32_t *p, + size_t n, + enum_bidi_class_t *buf, + unicode_bidi_level_t *bufp, + const unicode_bidi_level_t *initial_embedding_level) +{ + directional_status_stack_t stack; + + stack=directional_status_stack_init(p, buf, n, bufp, + initial_embedding_level); + +#ifdef BIDI_DEBUG + fprintf(DEBUGDUMP, "BIDI: START: Paragraph embedding level: %d\n", + (int)stack->paragraph_embedding_level); +#endif + + unicode_bidi_cl(stack); + + directional_status_stack_deinit(stack); +} + +#define RESET_CLASS(p,stack) do { \ + switch ((stack)->head->directional_override_status) { \ + case do_neutral: break; \ + case do_left_to_right: (p)=UNICODE_BIDI_CLASS_L; break; \ + case do_right_to_left: (p)=UNICODE_BIDI_CLASS_R; break; \ + } \ + } while(0) + +static void unicode_bidi_w(directional_status_stack_t stack, + struct isolating_run_sequence_s *seq); +static void unicode_bidi_n(directional_status_stack_t stack, + struct isolating_run_sequence_s *seq); + +#ifdef BIDI_DEBUG +void dump_sequence_info(directional_status_stack_t stack, + struct isolating_run_sequence_s *seq) +{ + fprintf(DEBUGDUMP, "Sequence: sos: %c, eos: %c:", + (seq->sos == UNICODE_BIDI_CLASS_L ? 'L':'R'), + (seq->eos == UNICODE_BIDI_CLASS_L ? 'L':'R')); + + for (size_t i=0; i<seq->n_level_runs; ++i) + { + fprintf(DEBUGDUMP, "%s[%lu-%lu]", + i == 0 ? " ":", ", + (unsigned long)seq->level_runs[i].start, + (unsigned long)seq->level_runs[i].end-1); + } + fprintf(DEBUGDUMP, "\n"); +} + +void dump_sequence(const char *what, directional_status_stack_t stack, + struct isolating_run_sequence_s *seq) +{ + irs_iterator beg=irs_begin(seq), end=irs_end(seq); + + fprintf(DEBUGDUMP, "%s: ", what); + while (irs_compare(&beg, &end)) + { + fprintf(DEBUGDUMP, " %s(%d)", + bidi_classname(stack->classes[beg.i]), + (int)stack->levels[beg.i]); + irs_incr(&beg); + } + fprintf(DEBUGDUMP, "\n"); +} +#endif + +static void unicode_bidi_cl(directional_status_stack_t stack) +{ +#ifdef BIDI_DEBUG + dump_classes("Before X1", stack); +#endif + + for (size_t i=0; i<stack->size; i++) + { + int embedding_level; + +#define NEXT_ODD_EMBEDDING_LEVEL \ + (embedding_level=stack->head->embedding_level, \ + ++embedding_level, \ + embedding_level |= 1) + +#define NEXT_EVEN_EMBEDDING_LEVEL \ + (embedding_level=stack->head->embedding_level, \ + embedding_level |= 1, \ + ++embedding_level) + + switch (stack->classes[i]) { + case UNICODE_BIDI_CLASS_RLE: + /* X2 */ + NEXT_ODD_EMBEDDING_LEVEL; + + if (embedding_level <= max_depth && + stack->overflow_isolate_count == 0 && + stack->overflow_embedding_count == 0) + { + directional_status_stack_push + (stack, embedding_level, + do_neutral, 0); + } + else + { + if (stack->overflow_isolate_count == 0) + { + ++stack->overflow_embedding_count; + } + } + break; + case UNICODE_BIDI_CLASS_LRE: + /* X3 */ + + NEXT_EVEN_EMBEDDING_LEVEL; + + if (embedding_level <= max_depth && + stack->overflow_isolate_count == 0 && + stack->overflow_embedding_count == 0) + { + directional_status_stack_push + (stack, embedding_level, + do_neutral, 0); + } + else + { + if (stack->overflow_isolate_count == 0) + { + ++stack->overflow_embedding_count; + } + } + break; + + case UNICODE_BIDI_CLASS_RLO: + /* X4 */ + NEXT_ODD_EMBEDDING_LEVEL; + + if (embedding_level <= max_depth && + stack->overflow_isolate_count == 0 && + stack->overflow_embedding_count == 0) + { + directional_status_stack_push + (stack, embedding_level, + do_right_to_left, 0); + } + else + { + if (stack->overflow_isolate_count == 0) + { + ++stack->overflow_embedding_count; + } + } + break; + + case UNICODE_BIDI_CLASS_LRO: + /* X5 */ + NEXT_EVEN_EMBEDDING_LEVEL; + + if (embedding_level <= max_depth && + stack->overflow_isolate_count == 0 && + stack->overflow_embedding_count == 0) + { + directional_status_stack_push + (stack, embedding_level, + do_left_to_right, 0); + } + else + { + if (stack->overflow_isolate_count == 0) + { + ++stack->overflow_embedding_count; + } + } + break; + default: + break; + } + + enum_bidi_class_t cur_class=stack->classes[i]; + + if (cur_class == UNICODE_BIDI_CLASS_FSI) { + /* X5c */ + + size_t j=i; + + unicode_bidi_level_t in_isolation=1; + + while (++j < stack->size) + { + if (is_isolate_initiator(stack->classes[j])) + ++in_isolation; + else if (stack->classes[j] == UNICODE_BIDI_CLASS_PDI) + { + if (--in_isolation == 0) + break; + } + } + + cur_class=compute_paragraph_embedding_level + (stack->classes, i+1, j) == 1 + ? UNICODE_BIDI_CLASS_RLI + : UNICODE_BIDI_CLASS_LRI; + } + + switch (cur_class) { + case UNICODE_BIDI_CLASS_RLI: + /* X5a */ + stack->levels[i]=stack->head->embedding_level; + RESET_CLASS(stack->classes[i],stack); + + NEXT_ODD_EMBEDDING_LEVEL; + + if (embedding_level <= max_depth && + stack->overflow_isolate_count == 0 && + stack->overflow_embedding_count == 0) + { + ++stack->valid_isolate_count; + directional_status_stack_push + (stack, embedding_level, do_neutral, 1); + stack->head->isolate_start=i; + } + else + { + ++stack->overflow_isolate_count; + } + break; + + case UNICODE_BIDI_CLASS_LRI: + /* X5b */ + stack->levels[i]=stack->head->embedding_level; + RESET_CLASS(stack->classes[i],stack); + + NEXT_EVEN_EMBEDDING_LEVEL; + + if (embedding_level <= max_depth && + stack->overflow_isolate_count == 0 && + stack->overflow_embedding_count == 0) + { + ++stack->valid_isolate_count; + directional_status_stack_push + (stack, embedding_level, do_neutral, 1); + stack->head->isolate_start=i; + } + else + { + ++stack->overflow_isolate_count; + } + break; + + default: + break; + } + + switch (stack->orig_classes[i]) { + case UNICODE_BIDI_CLASS_BN: + case UNICODE_BIDI_CLASS_B: + case UNICODE_BIDI_CLASS_RLE: + case UNICODE_BIDI_CLASS_LRE: + case UNICODE_BIDI_CLASS_RLO: + case UNICODE_BIDI_CLASS_LRO: + case UNICODE_BIDI_CLASS_PDF: + case UNICODE_BIDI_CLASS_RLI: + case UNICODE_BIDI_CLASS_LRI: + case UNICODE_BIDI_CLASS_FSI: + case UNICODE_BIDI_CLASS_PDI: + break; + default: + /* X6 */ + stack->levels[i]=stack->head->embedding_level; + RESET_CLASS(stack->classes[i],stack); + break; + } + + if (stack->classes[i] == UNICODE_BIDI_CLASS_PDI) + { + /* X6a */ + if (stack->overflow_isolate_count > 0) + { + --stack->overflow_isolate_count; + } + else if (stack->valid_isolate_count == 0) + ; + else + { + stack->overflow_embedding_count=0; + + while (!stack->head->directional_isolate_status) + { + directional_status_stack_pop(stack); + + if (!stack->head) + { + fprintf(stderr, + "Internal error: stack " + "empty after pop\n"); + abort(); + } + } + + isolating_run_sequence_link + (&stack->isolating_run_sequences, + stack->head->isolate_start, + i); + + directional_status_stack_pop(stack); + --stack->valid_isolate_count; + + if (!stack->head) + { + fprintf(stderr, + "Internal error: stack " + "empty after pop\n"); + abort(); + } + } + stack->levels[i]=stack->head->embedding_level; + RESET_CLASS(stack->classes[i],stack); + } + + if (stack->classes[i] == UNICODE_BIDI_CLASS_PDF) + { + /* X7 */ + + if (stack->overflow_isolate_count > 0) + ; + else if (stack->overflow_embedding_count > 0) + { + --stack->overflow_embedding_count; + } + else + { + stack->overflow_embedding_count=0; + + if (!stack->head->directional_isolate_status + && stack->head->next) + { + directional_status_stack_pop(stack); + } + } + } + + if (stack->classes[i] == UNICODE_BIDI_CLASS_B) + { + /* X8 */ + + stack->levels[i]=stack->paragraph_embedding_level; + } + } + + /* X9 */ + +#define IS_X9(class) \ + ((class) == UNICODE_BIDI_CLASS_RLE || \ + (class) == UNICODE_BIDI_CLASS_LRE || \ + (class) == UNICODE_BIDI_CLASS_RLO || \ + (class) == UNICODE_BIDI_CLASS_LRO || \ + (class) == UNICODE_BIDI_CLASS_PDF || \ + (class) == UNICODE_BIDI_CLASS_BN) + + size_t next_pdi=0; + struct isolating_run_sequence_s *current_irs=0; + unicode_bidi_level_t current_level=0; + + struct isolating_run_sequence_link **push_links= + sort_irs_links_by_push(&stack->isolating_run_sequences); + size_t next_rlilri=0; + + for (size_t i=0; i<stack->size; ++i) + { + if (IS_X9(stack->classes[i])) + { + if (stack->levels[i] != UNICODE_BIDI_SKIP) + { + fprintf(stderr, "Internal error: X9 class did " + "not get skipped"); + abort(); + } + continue; + } + + if (stack->levels[i] == UNICODE_BIDI_SKIP) + { + fprintf(stderr, "Internal error: non-X9 class did " + "get skipped for some reason"); + abort(); + } + + if (!current_irs || current_level != stack->levels[i]) + { + current_level = stack->levels[i]; + + if (next_pdi < + stack->isolating_run_sequences.n_pdi_linkage && + stack->isolating_run_sequences.pdi_linkage + [next_pdi].pop == i) + { + if ((current_irs= + stack->isolating_run_sequences.pdi_linkage + [next_pdi].seq) == 0) + { + fprintf(stderr, "Internal error: " + "missing previous isolated " + "run sequence link\n"); + abort(); + } + ++next_pdi; + } + else + { + current_irs=isolating_run_sequences_init + (&stack->isolating_run_sequences, + stack->levels[i], + i); + } + } + + isolating_run_sequences_record(current_irs, i); + + if (next_rlilri < stack->isolating_run_sequences.n_pdi_linkage && + push_links[next_rlilri]->push == i) + { + push_links[next_rlilri++]->seq=current_irs; + } + } + + if (push_links) + free(push_links); + + /* X10 */ +#ifdef BIDI_DEBUG + dump_classes("Before X10", stack); +#endif + + for (struct isolating_run_sequence_s *p= + stack->isolating_run_sequences.head; p; p=p->next) + { + p->sos=p->eos=UNICODE_BIDI_CLASS_L; + + irs_iterator beg_iter=irs_begin(p), end_iter=irs_end(p); + + if (irs_compare(&beg_iter, &end_iter) == 0) + continue; /* Edge case */ + + unicode_bidi_level_t before= + stack->paragraph_embedding_level; + unicode_bidi_level_t after= + stack->paragraph_embedding_level; + + + size_t first_i=beg_iter.i; + + irs_decr(&end_iter); + + size_t end_i=end_iter.i+1; + + unicode_bidi_level_t start=stack->levels[first_i]; + unicode_bidi_level_t end=stack->levels[end_i-1]; + + while (first_i > 0 && stack->levels[first_i-1] == + UNICODE_BIDI_SKIP) + --first_i; + + if (first_i > 0) + before=stack->levels[first_i-1]; + + if (!is_isolate_initiator(stack->classes[end_iter.i])) + { + while (end_i < stack->size && + stack->levels[end_i] == UNICODE_BIDI_SKIP) + ++end_i; + + if (end_i < stack->size) + after=stack->levels[end_i]; + } +#ifdef BIDI_DEBUG + fprintf(DEBUGDUMP, + "Sequence: before=%d, start=%d, end=%d, after=%d\n", + (int)before, (int)start, (int)end, (int)after); +#endif + if (start > before) + before=start; + + if (end > after) + after=end; + + if (before & 1) + p->sos=UNICODE_BIDI_CLASS_R; + if (after & 1) + p->eos=UNICODE_BIDI_CLASS_R; + + +#ifdef BIDI_DEBUG + dump_sequence_info(stack, p); + dump_sequence("Contents", stack, p); +#endif + } + + + for (struct isolating_run_sequence_s *p= + stack->isolating_run_sequences.head; p; p=p->next) + { +#ifdef BIDI_DEBUG + dump_sequence_info(stack, p); + dump_sequence("Contents before W", stack, p); +#endif + + unicode_bidi_w(stack, p); + +#ifdef BIDI_DEBUG + dump_sequence("Contents after W", stack, p); +#endif + unicode_bidi_n(stack, p); + } +#ifdef BIDI_DEBUG + dump_orig_classes("Before L1", stack); +#endif + + /* + ** L1 + */ + + size_t i=stack->size; + + int seen_sb=1; + + while (i > 0) + { + --i; + + if (IS_X9(stack->orig_classes[i])) + continue; + + switch (stack->orig_classes[i]) { + case UNICODE_BIDI_CLASS_WS: + case UNICODE_BIDI_CLASS_FSI: + case UNICODE_BIDI_CLASS_LRI: + case UNICODE_BIDI_CLASS_RLI: + case UNICODE_BIDI_CLASS_PDI: + if (seen_sb) + stack->levels[i]= + stack->paragraph_embedding_level; + break; + case UNICODE_BIDI_CLASS_S: + case UNICODE_BIDI_CLASS_B: + stack->levels[i]=stack->paragraph_embedding_level; + seen_sb=1; + break; + default: + seen_sb=0; + continue; + } + } +} + +static void unicode_bidi_w(directional_status_stack_t stack, + struct isolating_run_sequence_s *seq) +{ + irs_iterator iter=irs_begin(seq), end=irs_end(seq); + enum_bidi_class_t previous_type=seq->sos; + + enum_bidi_class_t strong_type=UNICODE_BIDI_CLASS_R; + + while (irs_compare(&iter, &end)) + { + if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_NSM) + { + /* W1 */ + stack->classes[iter.i] = + is_isolate_initiator(previous_type) || + previous_type == UNICODE_BIDI_CLASS_PDI + ? UNICODE_BIDI_CLASS_ON + : previous_type; + + } + + /* W2 */ + + if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_EN && + strong_type == UNICODE_BIDI_CLASS_AL) + { + stack->classes[iter.i] = UNICODE_BIDI_CLASS_AN; + } + + /* W2 */ + previous_type=stack->classes[iter.i]; + + switch (previous_type) { + case UNICODE_BIDI_CLASS_R: + case UNICODE_BIDI_CLASS_L: + case UNICODE_BIDI_CLASS_AL: + strong_type=previous_type; + break; + default: + break; + } + + irs_incr(&iter); + } + + iter=irs_begin(seq); + + previous_type=UNICODE_BIDI_CLASS_L; + + int not_eol=irs_compare(&iter, &end); + + while (not_eol) + { + /* W3 */ + if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_AL) + stack->classes[iter.i] = UNICODE_BIDI_CLASS_R; + + /* W4 */ + + enum_bidi_class_t this_type=stack->classes[iter.i]; + irs_incr(&iter); + + not_eol=irs_compare(&iter, &end); + + if (not_eol && + ( + (this_type == UNICODE_BIDI_CLASS_ES && + previous_type == UNICODE_BIDI_CLASS_EN) + || + (this_type == UNICODE_BIDI_CLASS_CS && + (previous_type == UNICODE_BIDI_CLASS_EN || + previous_type == UNICODE_BIDI_CLASS_AN) + ) + ) && + stack->classes[iter.i] == previous_type) + { + irs_iterator prev=iter; + + irs_decr(&prev); + + stack->classes[prev.i]=previous_type; + } + + if (not_eol) + previous_type=this_type; + } + + iter=irs_begin(seq); + + /* W5 */ + + previous_type=UNICODE_BIDI_CLASS_L; /* Doesn't match any part of W5 */ + + while (irs_compare(&iter, &end)) + { + if (stack->classes[iter.i] != UNICODE_BIDI_CLASS_ET) + { + previous_type=stack->classes[iter.i]; + irs_incr(&iter); + continue; + } + + /* ET after EN */ + if (previous_type == UNICODE_BIDI_CLASS_EN) + { + stack->classes[iter.i] = UNICODE_BIDI_CLASS_EN; + irs_incr(&iter); + continue; + } + + /* See if EN follows these ETs */ + + irs_iterator start=iter; + + while (irs_incr(&iter), irs_compare(&iter, &end)) + { + previous_type=stack->classes[iter.i]; + + if (previous_type == UNICODE_BIDI_CLASS_ET) + continue; + + if (previous_type == UNICODE_BIDI_CLASS_EN) + { + while (irs_compare(&start, &iter)) + { + stack->classes[start.i]= + UNICODE_BIDI_CLASS_EN; + irs_incr(&start); + } + } + break; + } + } + + /* W6 */ + + for (iter=irs_begin(seq); + irs_compare(&iter, &end); irs_incr(&iter)) + { + switch (stack->classes[iter.i]) { + case UNICODE_BIDI_CLASS_ET: + case UNICODE_BIDI_CLASS_ES: + case UNICODE_BIDI_CLASS_CS: + /* W6 */ + stack->classes[iter.i]=UNICODE_BIDI_CLASS_ON; + break; + default: + break; + } + } + + /* W7 */ + iter=irs_begin(seq); + + previous_type=seq->sos; + + while (irs_compare(&iter, &end)) + { + switch (stack->classes[iter.i]) { + case UNICODE_BIDI_CLASS_L: + case UNICODE_BIDI_CLASS_R: + previous_type=stack->classes[iter.i]; + break; + case UNICODE_BIDI_CLASS_EN: + if (previous_type == UNICODE_BIDI_CLASS_L) + stack->classes[iter.i]=previous_type; + break; + default: + break; + } + irs_incr(&iter); + } +} + +struct bidi_n_stack { + struct bidi_n_stack *next; + + irs_iterator start; + irs_iterator end; + short has_e; + short has_o; + short matched; +}; + +static void unicode_bidi_n(directional_status_stack_t stack, + struct isolating_run_sequence_s *seq) +{ +#define NSTACKSIZE 63 + + struct bidi_n_stack *bracket_stack=0; + struct bidi_n_stack **bracket_stack_tail= &bracket_stack; + + struct bidi_n_stack *stack_iters[NSTACKSIZE]; + char32_t stack_chars[NSTACKSIZE]; + size_t stackp=0; + + /* + ** N0 + ** + ** Combined pass that implements BD16, and constructs, on the fly + ** whether each matched pair has an e or an o inside it. + */ + irs_iterator beg=irs_begin(seq), iter=beg, end=irs_end(seq); + + for (; irs_compare(&iter, &end); irs_incr(&iter)) + { + unicode_bidi_bracket_type_t bracket_type; + char32_t open_bracket= + unicode_bidi_bracket_type(stack->chars[iter.i], + &bracket_type); + + if (bracket_type == UNICODE_BIDI_o) + { + if (stackp >= NSTACKSIZE) + break; /* BD16 failure */ + + if (!((*bracket_stack_tail)=(struct bidi_n_stack *) + calloc(1, sizeof(struct bidi_n_stack)))) + abort(); + + stack_iters[stackp]=*bracket_stack_tail; + + (*bracket_stack_tail)->start=iter; + + stack_chars[stackp]=stack->chars[iter.i]; + + bracket_stack_tail= &(*bracket_stack_tail)->next; + ++stackp; + continue; + } + + if (bracket_type == UNICODE_BIDI_c) /* Should be "n" */ + { + for (size_t i=stackp; i > 0; ) + { + --i; + if (stack_chars[i] != open_bracket) + continue; + + stack_iters[i]->end = iter; + stack_iters[i]->matched=1; + stackp=i; + break; + } + continue; + } + + /* + ** So we check if this is an e or an o, and if so we + ** have a convenient list of all unclosed brackets, so + ** we record these facts there. + */ + + enum_bidi_class_t eoclass=stack->classes[iter.i]; + +#define ADJUST_EOCLASS(eoclass) do { \ + \ + if ((eoclass) == UNICODE_BIDI_CLASS_EN || \ + (eoclass) == UNICODE_BIDI_CLASS_AN) \ + (eoclass)=UNICODE_BIDI_CLASS_R; \ + } while (0) + + ADJUST_EOCLASS(eoclass); + +#define E_CLASS (seq->embedding_level & 1 ? \ + UNICODE_BIDI_CLASS_R:UNICODE_BIDI_CLASS_L) + +#define O_CLASS (seq->embedding_level & 1 ? \ + UNICODE_BIDI_CLASS_L:UNICODE_BIDI_CLASS_R) + + if (eoclass == E_CLASS) + { + for (size_t i=0; i<stackp; ++i) + stack_iters[i]->has_e=1; + } + else if (eoclass == O_CLASS) + { + for (size_t i=0; i<stackp; ++i) + stack_iters[i]->has_o=1; + } + } + + while (bracket_stack) + { + struct bidi_n_stack *p=bracket_stack; + + bracket_stack=bracket_stack->next; + + if (p->matched) + { + int set=0; + + if (p->has_e) + { + stack->classes[p->start.i]= + stack->classes[p->end.i]= + seq->embedding_level & 1 + ? UNICODE_BIDI_CLASS_R + : UNICODE_BIDI_CLASS_L; + set=1; + } else if (p->has_o) + { + enum_bidi_class_t strong_type=seq->sos; + + iter=p->start; + + while (irs_compare(&beg, &iter)) + { + irs_decr(&iter); + + enum_bidi_class_t eoclass= + stack->classes[iter.i]; + + ADJUST_EOCLASS(eoclass); + + switch (eoclass) { + case UNICODE_BIDI_CLASS_L: + case UNICODE_BIDI_CLASS_R: + break; + default: + continue; + } + + strong_type=eoclass; + break; + } + + if (strong_type == O_CLASS) + { + stack->classes[p->start.i]= + stack->classes[p->end.i]= + strong_type; + set=1; + } + } + + if (set) + { + enum_bidi_class_t strong_type= + stack->classes[p->end.i]; + + while (irs_incr(&p->end), + irs_compare(&p->end, &end)) + { + if (stack->orig_classes[p->end.i] != + UNICODE_BIDI_CLASS_NSM) + break; + + stack->classes[p->end.i]=strong_type; + } + } + } + free(p); + } + + /* N1 */ + +#define IS_NI(class) \ + ((class) == UNICODE_BIDI_CLASS_B || \ + (class) == UNICODE_BIDI_CLASS_S || \ + (class) == UNICODE_BIDI_CLASS_WS || \ + (class) == UNICODE_BIDI_CLASS_ON || \ + (class) == UNICODE_BIDI_CLASS_FSI || \ + (class) == UNICODE_BIDI_CLASS_LRI || \ + (class) == UNICODE_BIDI_CLASS_RLI || \ + (class) == UNICODE_BIDI_CLASS_PDI) + + enum_bidi_class_t prev_type=seq->sos; + + for (iter=beg; irs_compare(&iter, &end); ) + { + /* + ** N1 + */ + + enum_bidi_class_t this_type=stack->classes[iter.i]; + + ADJUST_EOCLASS(this_type); + + if (!IS_NI(this_type)) + { + switch (this_type) { + case UNICODE_BIDI_CLASS_L: + case UNICODE_BIDI_CLASS_R: + prev_type=this_type; + break; + default: + prev_type=UNICODE_BIDI_CLASS_ON; // Marker. + break; + } + irs_incr(&iter); + continue; + } + + enum_bidi_class_t next_type=seq->eos; + + irs_iterator start=iter; + + while (irs_compare(&iter, &end)) + { + if (IS_NI(stack->classes[iter.i])) + { + irs_incr(&iter); + continue; + } + + enum_bidi_class_t other_type=stack->classes[iter.i]; + + ADJUST_EOCLASS(other_type); + + switch (other_type) { + case UNICODE_BIDI_CLASS_L: + case UNICODE_BIDI_CLASS_R: + next_type=other_type; + break; + default: + next_type=UNICODE_BIDI_CLASS_BN; /* Marker */ + break; + } + break; + } + + while (irs_compare(&start, &iter)) + { + // next_type can only match prev_type if both are L + // or R. If the character before the NIs was not L + // or R, prev_type is ON. If the character after the + // NIs was not L or R, next_type is BN. + + if (next_type == prev_type) + { + stack->classes[start.i]=next_type; /* N1 */ + } + + irs_incr(&start); + } + } + + for (iter=beg; irs_compare(&iter, &end); ) + { + if (IS_NI(stack->classes[iter.i])) + { + stack->classes[iter.i]= + stack->levels[iter.i] & 1 ? + UNICODE_BIDI_CLASS_R : + UNICODE_BIDI_CLASS_L; /* N2 */ + } + irs_incr(&iter); + } + +#ifdef BIDI_DEBUG + dump_sequence("Contents after N", stack, seq); +#endif + + /* I1 */ + /* I2 */ + + for (iter=beg; irs_compare(&iter, &end); irs_incr(&iter)) + { + if ((stack->levels[iter.i] & 1) == 0) + { + switch (stack->classes[iter.i]) { + case UNICODE_BIDI_CLASS_R: + ++stack->levels[iter.i]; + break; + case UNICODE_BIDI_CLASS_AN: + case UNICODE_BIDI_CLASS_EN: + stack->levels[iter.i] += 2; + break; + default: break; + } + } + else + { + switch (stack->classes[iter.i]) { + case UNICODE_BIDI_CLASS_L: + case UNICODE_BIDI_CLASS_AN: + case UNICODE_BIDI_CLASS_EN: + ++stack->levels[iter.i]; + break; + default: break; + } + } + } + +#ifdef BIDI_DEBUG + dump_sequence("Contents after I", stack, seq); +#endif } |
