summaryrefslogtreecommitdiffstats
path: root/unicode/unicode_bidi.c
diff options
context:
space:
mode:
Diffstat (limited to 'unicode/unicode_bidi.c')
-rw-r--r--unicode/unicode_bidi.c1598
1 files changed, 1590 insertions, 8 deletions
diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c
index e486621..38dcb44 100644
--- a/unicode/unicode_bidi.c
+++ b/unicode/unicode_bidi.c
@@ -86,6 +86,7 @@ char32_t unicode_bidi_bracket_type(char32_t c,
return c;
}
+/* BIDI_CLASS_LIST */
typedef enum {
UNICODE_BIDI_CLASS_AL,
UNICODE_BIDI_CLASS_AN,
@@ -114,13 +115,1594 @@ typedef enum {
#include "bidi_class.h"
-enum_bidi_class_t unicode_bidi_class(char32_t c)
+#ifdef BIDI_DEBUG
+enum_bidi_class_t fudge_unicode_bidi(size_t);
+const char *bidi_classname(enum_bidi_class_t);
+#endif
+
+#define max_depth 125
+
+typedef enum {
+ do_neutral,
+ do_right_to_left,
+ do_left_to_right
+} directional_override_status_t;
+
+#define is_isolate_initiator(c) \
+ ((c) == UNICODE_BIDI_CLASS_LRI || \
+ (c) == UNICODE_BIDI_CLASS_RLI || \
+ (c) == UNICODE_BIDI_CLASS_FSI)
+
+#define is_embedding_initiator(c) \
+ ((c) == UNICODE_BIDI_CLASS_LRE || \
+ (c) == UNICODE_BIDI_CLASS_RLE || \
+ (c) == UNICODE_BIDI_CLASS_LRO || \
+ (c) == UNICODE_BIDI_CLASS_RLO)
+
+/* BD13 implementation */
+
+/* A level run, specified as indexes */
+
+struct level_run {
+ size_t start;
+ size_t end; /* one past */
+};
+
+/* An isolating run sequence */
+
+struct isolating_run_sequence_s {
+ struct isolating_run_sequence_s *prev, *next; /* Linked list */
+
+ struct level_run *level_runs; /* All level runs in the sequence */
+ size_t n_level_runs; /* How many of them */
+ size_t cap_level_runs; /* Capacity of the level runs */
+ unicode_bidi_level_t embedding_level; /* This seq's embedding level */
+ enum_bidi_class_t sos, eos;
+};
+
+/* An iterator for an isolating run sequence */
+
+typedef struct {
+
+ /* My sequence */
+ struct isolating_run_sequence_s *seq;
+
+ /* Which level run I'm on */
+
+ size_t level_run_i;
+
+ /* Current index */
+
+ size_t i;
+} irs_iterator;
+
+static irs_iterator irs_begin(struct isolating_run_sequence_s *seq)
+{
+ irs_iterator iter;
+
+ iter.seq=seq;
+ iter.level_run_i=0;
+
+ /* Edge case, empty isolating run sequence */
+
+ while (iter.level_run_i < seq->n_level_runs)
+ {
+ iter.i=seq->level_runs[iter.level_run_i].start;
+
+ if (iter.i < seq->level_runs[iter.level_run_i].end)
+ break;
+
+ ++iter.level_run_i;
+ }
+ return iter;
+}
+
+static irs_iterator irs_end(struct isolating_run_sequence_s *seq)
+{
+ irs_iterator iter;
+
+ iter.seq=seq;
+ iter.level_run_i=seq->n_level_runs;
+ return iter;
+}
+
+static int irs_compare(const irs_iterator *a,
+ const irs_iterator *b)
+{
+ if (a->level_run_i < b->level_run_i)
+ return -1;
+ if (a->level_run_i > b->level_run_i)
+ return 1;
+
+ if (a->level_run_i == a->seq->n_level_runs)
+ return 0;
+
+ if (a->i < b->i)
+ return -1;
+
+ if (a->i > b->i)
+ return 1;
+ return 0;
+}
+
+static void irs_incr(irs_iterator *iter)
+{
+ if (iter->seq->n_level_runs == iter->level_run_i)
+ {
+ fprintf(stderr, "%s%s\n",
+ "Internal error: attempting to increment ",
+ "one past end of isolating run sequence iterator");
+ abort();
+ }
+
+ if (++iter->i >= iter->seq->level_runs[iter->level_run_i].end)
+ {
+ if (++iter->level_run_i < iter->seq->n_level_runs)
+ iter->i=iter->seq->level_runs[iter->level_run_i].start;
+ }
+}
+
+static void irs_decr(irs_iterator *iter)
+{
+ while (1)
+ {
+ if (iter->seq->n_level_runs > iter->level_run_i &&
+ iter->i > iter->seq->level_runs[iter->level_run_i].start)
+ {
+ --iter->i;
+ break;
+ }
+
+ if (iter->level_run_i == 0)
+ {
+ fprintf(stderr, "%s%s\n",
+ "Internal error: attempting to decrement the ",
+ "beginning isolating run sequence iterator");
+ abort();
+ }
+
+ iter->i=iter->seq->level_runs[--iter->level_run_i].end;
+ }
+}
+
+/* Used to build isolating run sequences link */
+
+struct isolating_run_sequence_link {
+ size_t push;
+ size_t pop;
+ struct isolating_run_sequence_s *seq;
+};
+
+/* Our isolating run sequences */
+
+struct isolating_run_sequences_s {
+ struct isolating_run_sequence_s *head, *tail; /* Linked list */
+
+ /* Links LRI and RLI to its matching PDI */
+
+ struct isolating_run_sequence_link *pdi_linkage; /* Recycle level_run */
+ size_t n_pdi_linkage;
+ size_t cap_pdi_linkage;
+};
+
+static int compare_irs_by_push(struct isolating_run_sequence_link **a,
+ struct isolating_run_sequence_link **b)
+{
+ if ((*a)->push < (*b)->push)
+ return -1;
+ if ((*a)->push > (*b)->push)
+ return 1;
+ return 0;
+}
+
+static struct isolating_run_sequence_link **sort_irs_links_by_push
+(struct isolating_run_sequences_s *seq)
+{
+ struct isolating_run_sequence_link **p;
+ size_t i;
+
+ if (seq->n_pdi_linkage == 0)
+ return 0;
+
+ p=(struct isolating_run_sequence_link **)
+ calloc(sizeof(struct isolating_run_sequence_link *),
+ seq->n_pdi_linkage);
+
+ for (i=0; i<seq->n_pdi_linkage; i++)
+ {
+ p[i]=seq->pdi_linkage+i;
+ }
+ qsort(p, seq->n_pdi_linkage, sizeof(*p),
+ (int (*)(const void *, const void *))compare_irs_by_push);
+ return p;
+}
+
+static struct isolating_run_sequence_s *
+isolating_run_sequences_init(struct isolating_run_sequences_s *p,
+ unicode_bidi_level_t embedding_level,
+ size_t i)
+{
+ struct isolating_run_sequence_s *seq=
+ (struct isolating_run_sequence_s *)
+ calloc(1, sizeof(struct isolating_run_sequence_s));
+
+ if (!seq) abort();
+
+ if ((seq->level_runs=(struct level_run *)
+ malloc(sizeof(struct level_run))) == 0) abort();
+
+ seq->level_runs->start=i;
+ seq->level_runs->end=i;
+
+ seq->n_level_runs=seq->cap_level_runs=1;
+ seq->embedding_level=embedding_level;
+
+ if (!p->head)
+ {
+ p->head=p->tail=seq;
+ }
+ else
+ {
+ p->tail->next=seq;
+ seq->prev=p->tail;
+ p->tail=seq;
+ }
+
+ return seq;
+}
+
+static void isolating_run_sequences_record(struct isolating_run_sequence_s *p,
+ size_t i)
+{
+ struct level_run *current_level_run=
+ &p->level_runs[p->n_level_runs-1];
+
+ if (current_level_run->start == current_level_run->end)
+ {
+ current_level_run->start=i;
+ current_level_run->end=i+1;
+ return;
+ }
+
+ if (current_level_run->end == i)
+ {
+ ++current_level_run->end;
+ return;
+ }
+
+ /*
+ ** Starting a new level run in the current isolating
+ ** run sequence.
+ */
+
+ if (p->n_level_runs == p->cap_level_runs)
+ {
+ p->cap_level_runs *= 2;
+
+ p->level_runs=(struct level_run *)
+ realloc(p->level_runs,
+ sizeof(struct level_run) *
+ p->cap_level_runs);
+ if (!p->level_runs)
+ abort();
+ }
+
+ current_level_run = p->level_runs + (p->n_level_runs++);
+
+ current_level_run->start=i;
+ current_level_run->end=i+1;
+}
+
+static void isolating_run_sequence_link(struct isolating_run_sequences_s *p,
+ size_t push,
+ size_t pop)
{
- return unicode_tab_lookup(c,
- unicode_indextab,
- sizeof(unicode_indextab)
- /sizeof(unicode_indextab[0]),
- unicode_rangetab,
- unicode_classtab,
- UNICODE_BIDI_CLASS_L);
+ if (p->n_pdi_linkage == p->cap_pdi_linkage)
+ {
+ p->cap_pdi_linkage=
+ p->cap_pdi_linkage == 0
+ ? 1 : p->cap_pdi_linkage*2;
+
+ p->pdi_linkage=(struct isolating_run_sequence_link *)
+ (p->pdi_linkage ?
+ realloc(p->pdi_linkage,
+ p->cap_pdi_linkage * sizeof(*p->pdi_linkage))
+ :
+ malloc(p->cap_pdi_linkage * sizeof(*p->pdi_linkage)));
+
+ if (!p->pdi_linkage)
+ abort();
+ }
+
+ p->pdi_linkage[p->n_pdi_linkage].push=push;
+ p->pdi_linkage[p->n_pdi_linkage].pop=pop;
+ p->pdi_linkage[p->n_pdi_linkage].seq=0;
+ ++p->n_pdi_linkage;
+}
+
+static void isolating_run_sequences_deinit(struct isolating_run_sequences_s *p)
+{
+ struct isolating_run_sequence_s *seq;
+
+ for (seq=p->head; seq; )
+ {
+ struct isolating_run_sequence_s *p=seq;
+
+ seq=seq->next;
+
+ free(p->level_runs);
+ free(p);
+ }
+
+ if (p->pdi_linkage)
+ free(p->pdi_linkage);
+}
+
+/* An entry on the directional status stack, used by the X rules */
+
+struct directional_status_stack_entry {
+ struct directional_status_stack_entry *next;
+ unicode_bidi_level_t embedding_level;
+ directional_override_status_t directional_override_status;
+ int directional_isolate_status;
+ size_t isolate_start;
+};
+
+typedef struct {
+ struct directional_status_stack_entry *head;
+
+ unicode_bidi_level_t paragraph_embedding_level;
+ const char32_t *chars;
+ enum_bidi_class_t *classes;
+ enum_bidi_class_t *orig_classes;
+ unicode_bidi_level_t *levels;
+ size_t size;
+ int overflow_isolate_count;
+ int overflow_embedding_count;
+ int valid_isolate_count;
+
+ struct isolating_run_sequences_s isolating_run_sequences;
+} *directional_status_stack_t;
+
+#ifdef BIDI_DEBUG
+void dump_classes(const char *prefix, directional_status_stack_t stack)
+{
+ fprintf(DEBUGDUMP, "%s: ", prefix);
+ for (size_t i=0; i<stack->size; ++i)
+ {
+ fprintf(DEBUGDUMP, " %s(%d)",
+ bidi_classname(stack->classes[i]),
+ (int)stack->levels[i]);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+
+void dump_orig_classes(const char *prefix, directional_status_stack_t stack)
+{
+ fprintf(DEBUGDUMP, "%s: ", prefix);
+
+ for (size_t i=0; i<stack->size; ++i)
+ {
+ fprintf(DEBUGDUMP, " %s(%s%s%d)",
+ bidi_classname(stack->classes[i]),
+ (stack->classes[i] != stack->orig_classes[i] ?
+ bidi_classname(stack->orig_classes[i]):""),
+ (stack->classes[i] != stack->orig_classes[i] ? "/":""),
+ (int)stack->levels[i]);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+#endif
+
+
+static void directional_status_stack_push
+(directional_status_stack_t stack,
+ unicode_bidi_level_t embedding_level,
+ directional_override_status_t directional_override_status,
+ int directional_isolate_status)
+{
+ struct directional_status_stack_entry *p=
+ (struct directional_status_stack_entry *)
+ malloc(sizeof(struct directional_status_stack_entry));
+
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP, "BIDI: Push level %d, override: %s, isolate: %s\n",
+ (int)embedding_level,
+ (directional_override_status == do_right_to_left ? "RTOL":
+ directional_override_status == do_left_to_right ? "LTOR":
+ directional_override_status == do_neutral ? "NEUTRAL"
+ : "ERROR"),
+ (directional_isolate_status ? "YES":"NO"));
+
+#endif
+
+ p->embedding_level=embedding_level;
+ p->directional_override_status=directional_override_status;
+ p->directional_isolate_status=directional_isolate_status;
+ p->next=stack->head;
+ stack->head=p;
+}
+
+static unicode_bidi_level_t
+compute_paragraph_embedding_level(const enum_bidi_class_t *p,
+ size_t i, size_t j)
+{
+ unicode_bidi_level_t in_isolation=0;
+
+ for (; i<j; ++i)
+ {
+ if (is_isolate_initiator(p[i]))
+ ++in_isolation;
+ else if (p[i] == UNICODE_BIDI_CLASS_PDI)
+ {
+ if (in_isolation)
+ --in_isolation;
+ }
+
+ if (in_isolation == 0)
+ {
+ if (p[i] == UNICODE_BIDI_CLASS_AL ||
+ p[i] == UNICODE_BIDI_CLASS_R)
+ {
+ return 1;
+ }
+ if (p[i] == UNICODE_BIDI_CLASS_L)
+ break;
+ }
+ }
+ return 0;
+}
+
+static directional_status_stack_t
+directional_status_stack_init(const char32_t *chars,
+ enum_bidi_class_t *classes, size_t n,
+ unicode_bidi_level_t *levels,
+ const unicode_bidi_level_t
+ *initial_embedding_level)
+{
+ directional_status_stack_t stack;
+
+ stack=(directional_status_stack_t)calloc(1, sizeof(*stack));
+
+ stack->paragraph_embedding_level=
+ initial_embedding_level
+ ? *initial_embedding_level & 1
+ : compute_paragraph_embedding_level(classes, 0, n);
+ stack->chars=chars;
+ stack->classes=classes;
+
+ if (n)
+ {
+ classes=(enum_bidi_class_t *)
+ malloc(sizeof(enum_bidi_class_t)*n);
+ if (!classes)
+ abort();
+ memcpy(classes, stack->classes, sizeof(enum_bidi_class_t)*n);
+ }
+ else
+ {
+ classes=0;
+ }
+ stack->orig_classes=classes;
+ stack->levels=levels;
+ stack->size=n;
+
+ directional_status_stack_push(stack,
+ stack->paragraph_embedding_level,
+ do_neutral, 0);
+
+ return stack;
+}
+
+static void directional_status_stack_pop(directional_status_stack_t stack)
+{
+ struct directional_status_stack_entry *head=stack->head;
+
+ if (!head)
+ return;
+
+ stack->head=head->next;
+ free(head);
+
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP, "BIDI: Pop\n");
+#endif
+}
+
+static void directional_status_stack_deinit(directional_status_stack_t stack)
+{
+ while (stack->head)
+ directional_status_stack_pop(stack);
+ if (stack->orig_classes)
+ free(stack->orig_classes);
+ isolating_run_sequences_deinit(&stack->isolating_run_sequences);
+ free(stack);
+}
+
+static void unicode_bidi_b(const char32_t *p,
+ size_t n,
+ enum_bidi_class_t *buf,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level);
+
+void unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level)
+{
+ /*
+ ** Look up the bidi class for each char32_t.
+ **
+ ** When we encounter a paragraph break we call unicode_bidi_b() to
+ ** process it.
+ */
+
+ enum_bidi_class_t *buf=
+ (enum_bidi_class_t *)malloc(n * sizeof(enum_bidi_class_t));
+
+ for (size_t i=0; i<n; ++i)
+ {
+ buf[i]=(enum_bidi_class_t)
+ unicode_tab_lookup(p[i],
+ unicode_indextab,
+ sizeof(unicode_indextab)
+ /sizeof(unicode_indextab[0]),
+ unicode_rangetab,
+ unicode_classtab,
+ UNICODE_BIDI_CLASS_L);
+#ifdef UNICODE_BIDI_TEST
+ UNICODE_BIDI_TEST(i);
+#endif
+ bufp[i]=UNICODE_BIDI_SKIP;
+ }
+
+ unicode_bidi_b(p, n,
+ buf,
+ bufp,
+ initial_embedding_level);
+
+ free(buf);
+}
+
+static void unicode_bidi_cl(directional_status_stack_t stack);
+
+static void unicode_bidi_b(const char32_t *p,
+ size_t n,
+ enum_bidi_class_t *buf,
+ unicode_bidi_level_t *bufp,
+ const unicode_bidi_level_t *initial_embedding_level)
+{
+ directional_status_stack_t stack;
+
+ stack=directional_status_stack_init(p, buf, n, bufp,
+ initial_embedding_level);
+
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP, "BIDI: START: Paragraph embedding level: %d\n",
+ (int)stack->paragraph_embedding_level);
+#endif
+
+ unicode_bidi_cl(stack);
+
+ directional_status_stack_deinit(stack);
+}
+
+#define RESET_CLASS(p,stack) do { \
+ switch ((stack)->head->directional_override_status) { \
+ case do_neutral: break; \
+ case do_left_to_right: (p)=UNICODE_BIDI_CLASS_L; break; \
+ case do_right_to_left: (p)=UNICODE_BIDI_CLASS_R; break; \
+ } \
+ } while(0)
+
+static void unicode_bidi_w(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq);
+static void unicode_bidi_n(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq);
+
+#ifdef BIDI_DEBUG
+void dump_sequence_info(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+ fprintf(DEBUGDUMP, "Sequence: sos: %c, eos: %c:",
+ (seq->sos == UNICODE_BIDI_CLASS_L ? 'L':'R'),
+ (seq->eos == UNICODE_BIDI_CLASS_L ? 'L':'R'));
+
+ for (size_t i=0; i<seq->n_level_runs; ++i)
+ {
+ fprintf(DEBUGDUMP, "%s[%lu-%lu]",
+ i == 0 ? " ":", ",
+ (unsigned long)seq->level_runs[i].start,
+ (unsigned long)seq->level_runs[i].end-1);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+
+void dump_sequence(const char *what, directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+ irs_iterator beg=irs_begin(seq), end=irs_end(seq);
+
+ fprintf(DEBUGDUMP, "%s: ", what);
+ while (irs_compare(&beg, &end))
+ {
+ fprintf(DEBUGDUMP, " %s(%d)",
+ bidi_classname(stack->classes[beg.i]),
+ (int)stack->levels[beg.i]);
+ irs_incr(&beg);
+ }
+ fprintf(DEBUGDUMP, "\n");
+}
+#endif
+
+static void unicode_bidi_cl(directional_status_stack_t stack)
+{
+#ifdef BIDI_DEBUG
+ dump_classes("Before X1", stack);
+#endif
+
+ for (size_t i=0; i<stack->size; i++)
+ {
+ int embedding_level;
+
+#define NEXT_ODD_EMBEDDING_LEVEL \
+ (embedding_level=stack->head->embedding_level, \
+ ++embedding_level, \
+ embedding_level |= 1)
+
+#define NEXT_EVEN_EMBEDDING_LEVEL \
+ (embedding_level=stack->head->embedding_level, \
+ embedding_level |= 1, \
+ ++embedding_level)
+
+ switch (stack->classes[i]) {
+ case UNICODE_BIDI_CLASS_RLE:
+ /* X2 */
+ NEXT_ODD_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_neutral, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+ case UNICODE_BIDI_CLASS_LRE:
+ /* X3 */
+
+ NEXT_EVEN_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_neutral, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+
+ case UNICODE_BIDI_CLASS_RLO:
+ /* X4 */
+ NEXT_ODD_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_right_to_left, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+
+ case UNICODE_BIDI_CLASS_LRO:
+ /* X5 */
+ NEXT_EVEN_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ directional_status_stack_push
+ (stack, embedding_level,
+ do_left_to_right, 0);
+ }
+ else
+ {
+ if (stack->overflow_isolate_count == 0)
+ {
+ ++stack->overflow_embedding_count;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ enum_bidi_class_t cur_class=stack->classes[i];
+
+ if (cur_class == UNICODE_BIDI_CLASS_FSI) {
+ /* X5c */
+
+ size_t j=i;
+
+ unicode_bidi_level_t in_isolation=1;
+
+ while (++j < stack->size)
+ {
+ if (is_isolate_initiator(stack->classes[j]))
+ ++in_isolation;
+ else if (stack->classes[j] == UNICODE_BIDI_CLASS_PDI)
+ {
+ if (--in_isolation == 0)
+ break;
+ }
+ }
+
+ cur_class=compute_paragraph_embedding_level
+ (stack->classes, i+1, j) == 1
+ ? UNICODE_BIDI_CLASS_RLI
+ : UNICODE_BIDI_CLASS_LRI;
+ }
+
+ switch (cur_class) {
+ case UNICODE_BIDI_CLASS_RLI:
+ /* X5a */
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+
+ NEXT_ODD_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ ++stack->valid_isolate_count;
+ directional_status_stack_push
+ (stack, embedding_level, do_neutral, 1);
+ stack->head->isolate_start=i;
+ }
+ else
+ {
+ ++stack->overflow_isolate_count;
+ }
+ break;
+
+ case UNICODE_BIDI_CLASS_LRI:
+ /* X5b */
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+
+ NEXT_EVEN_EMBEDDING_LEVEL;
+
+ if (embedding_level <= max_depth &&
+ stack->overflow_isolate_count == 0 &&
+ stack->overflow_embedding_count == 0)
+ {
+ ++stack->valid_isolate_count;
+ directional_status_stack_push
+ (stack, embedding_level, do_neutral, 1);
+ stack->head->isolate_start=i;
+ }
+ else
+ {
+ ++stack->overflow_isolate_count;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ switch (stack->orig_classes[i]) {
+ case UNICODE_BIDI_CLASS_BN:
+ case UNICODE_BIDI_CLASS_B:
+ case UNICODE_BIDI_CLASS_RLE:
+ case UNICODE_BIDI_CLASS_LRE:
+ case UNICODE_BIDI_CLASS_RLO:
+ case UNICODE_BIDI_CLASS_LRO:
+ case UNICODE_BIDI_CLASS_PDF:
+ case UNICODE_BIDI_CLASS_RLI:
+ case UNICODE_BIDI_CLASS_LRI:
+ case UNICODE_BIDI_CLASS_FSI:
+ case UNICODE_BIDI_CLASS_PDI:
+ break;
+ default:
+ /* X6 */
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+ break;
+ }
+
+ if (stack->classes[i] == UNICODE_BIDI_CLASS_PDI)
+ {
+ /* X6a */
+ if (stack->overflow_isolate_count > 0)
+ {
+ --stack->overflow_isolate_count;
+ }
+ else if (stack->valid_isolate_count == 0)
+ ;
+ else
+ {
+ stack->overflow_embedding_count=0;
+
+ while (!stack->head->directional_isolate_status)
+ {
+ directional_status_stack_pop(stack);
+
+ if (!stack->head)
+ {
+ fprintf(stderr,
+ "Internal error: stack "
+ "empty after pop\n");
+ abort();
+ }
+ }
+
+ isolating_run_sequence_link
+ (&stack->isolating_run_sequences,
+ stack->head->isolate_start,
+ i);
+
+ directional_status_stack_pop(stack);
+ --stack->valid_isolate_count;
+
+ if (!stack->head)
+ {
+ fprintf(stderr,
+ "Internal error: stack "
+ "empty after pop\n");
+ abort();
+ }
+ }
+ stack->levels[i]=stack->head->embedding_level;
+ RESET_CLASS(stack->classes[i],stack);
+ }
+
+ if (stack->classes[i] == UNICODE_BIDI_CLASS_PDF)
+ {
+ /* X7 */
+
+ if (stack->overflow_isolate_count > 0)
+ ;
+ else if (stack->overflow_embedding_count > 0)
+ {
+ --stack->overflow_embedding_count;
+ }
+ else
+ {
+ stack->overflow_embedding_count=0;
+
+ if (!stack->head->directional_isolate_status
+ && stack->head->next)
+ {
+ directional_status_stack_pop(stack);
+ }
+ }
+ }
+
+ if (stack->classes[i] == UNICODE_BIDI_CLASS_B)
+ {
+ /* X8 */
+
+ stack->levels[i]=stack->paragraph_embedding_level;
+ }
+ }
+
+ /* X9 */
+
+#define IS_X9(class) \
+ ((class) == UNICODE_BIDI_CLASS_RLE || \
+ (class) == UNICODE_BIDI_CLASS_LRE || \
+ (class) == UNICODE_BIDI_CLASS_RLO || \
+ (class) == UNICODE_BIDI_CLASS_LRO || \
+ (class) == UNICODE_BIDI_CLASS_PDF || \
+ (class) == UNICODE_BIDI_CLASS_BN)
+
+ size_t next_pdi=0;
+ struct isolating_run_sequence_s *current_irs=0;
+ unicode_bidi_level_t current_level=0;
+
+ struct isolating_run_sequence_link **push_links=
+ sort_irs_links_by_push(&stack->isolating_run_sequences);
+ size_t next_rlilri=0;
+
+ for (size_t i=0; i<stack->size; ++i)
+ {
+ if (IS_X9(stack->classes[i]))
+ {
+ if (stack->levels[i] != UNICODE_BIDI_SKIP)
+ {
+ fprintf(stderr, "Internal error: X9 class did "
+ "not get skipped");
+ abort();
+ }
+ continue;
+ }
+
+ if (stack->levels[i] == UNICODE_BIDI_SKIP)
+ {
+ fprintf(stderr, "Internal error: non-X9 class did "
+ "get skipped for some reason");
+ abort();
+ }
+
+ if (!current_irs || current_level != stack->levels[i])
+ {
+ current_level = stack->levels[i];
+
+ if (next_pdi <
+ stack->isolating_run_sequences.n_pdi_linkage &&
+ stack->isolating_run_sequences.pdi_linkage
+ [next_pdi].pop == i)
+ {
+ if ((current_irs=
+ stack->isolating_run_sequences.pdi_linkage
+ [next_pdi].seq) == 0)
+ {
+ fprintf(stderr, "Internal error: "
+ "missing previous isolated "
+ "run sequence link\n");
+ abort();
+ }
+ ++next_pdi;
+ }
+ else
+ {
+ current_irs=isolating_run_sequences_init
+ (&stack->isolating_run_sequences,
+ stack->levels[i],
+ i);
+ }
+ }
+
+ isolating_run_sequences_record(current_irs, i);
+
+ if (next_rlilri < stack->isolating_run_sequences.n_pdi_linkage &&
+ push_links[next_rlilri]->push == i)
+ {
+ push_links[next_rlilri++]->seq=current_irs;
+ }
+ }
+
+ if (push_links)
+ free(push_links);
+
+ /* X10 */
+#ifdef BIDI_DEBUG
+ dump_classes("Before X10", stack);
+#endif
+
+ for (struct isolating_run_sequence_s *p=
+ stack->isolating_run_sequences.head; p; p=p->next)
+ {
+ p->sos=p->eos=UNICODE_BIDI_CLASS_L;
+
+ irs_iterator beg_iter=irs_begin(p), end_iter=irs_end(p);
+
+ if (irs_compare(&beg_iter, &end_iter) == 0)
+ continue; /* Edge case */
+
+ unicode_bidi_level_t before=
+ stack->paragraph_embedding_level;
+ unicode_bidi_level_t after=
+ stack->paragraph_embedding_level;
+
+
+ size_t first_i=beg_iter.i;
+
+ irs_decr(&end_iter);
+
+ size_t end_i=end_iter.i+1;
+
+ unicode_bidi_level_t start=stack->levels[first_i];
+ unicode_bidi_level_t end=stack->levels[end_i-1];
+
+ while (first_i > 0 && stack->levels[first_i-1] ==
+ UNICODE_BIDI_SKIP)
+ --first_i;
+
+ if (first_i > 0)
+ before=stack->levels[first_i-1];
+
+ if (!is_isolate_initiator(stack->classes[end_iter.i]))
+ {
+ while (end_i < stack->size &&
+ stack->levels[end_i] == UNICODE_BIDI_SKIP)
+ ++end_i;
+
+ if (end_i < stack->size)
+ after=stack->levels[end_i];
+ }
+#ifdef BIDI_DEBUG
+ fprintf(DEBUGDUMP,
+ "Sequence: before=%d, start=%d, end=%d, after=%d\n",
+ (int)before, (int)start, (int)end, (int)after);
+#endif
+ if (start > before)
+ before=start;
+
+ if (end > after)
+ after=end;
+
+ if (before & 1)
+ p->sos=UNICODE_BIDI_CLASS_R;
+ if (after & 1)
+ p->eos=UNICODE_BIDI_CLASS_R;
+
+
+#ifdef BIDI_DEBUG
+ dump_sequence_info(stack, p);
+ dump_sequence("Contents", stack, p);
+#endif
+ }
+
+
+ for (struct isolating_run_sequence_s *p=
+ stack->isolating_run_sequences.head; p; p=p->next)
+ {
+#ifdef BIDI_DEBUG
+ dump_sequence_info(stack, p);
+ dump_sequence("Contents before W", stack, p);
+#endif
+
+ unicode_bidi_w(stack, p);
+
+#ifdef BIDI_DEBUG
+ dump_sequence("Contents after W", stack, p);
+#endif
+ unicode_bidi_n(stack, p);
+ }
+#ifdef BIDI_DEBUG
+ dump_orig_classes("Before L1", stack);
+#endif
+
+ /*
+ ** L1
+ */
+
+ size_t i=stack->size;
+
+ int seen_sb=1;
+
+ while (i > 0)
+ {
+ --i;
+
+ if (IS_X9(stack->orig_classes[i]))
+ continue;
+
+ switch (stack->orig_classes[i]) {
+ case UNICODE_BIDI_CLASS_WS:
+ case UNICODE_BIDI_CLASS_FSI:
+ case UNICODE_BIDI_CLASS_LRI:
+ case UNICODE_BIDI_CLASS_RLI:
+ case UNICODE_BIDI_CLASS_PDI:
+ if (seen_sb)
+ stack->levels[i]=
+ stack->paragraph_embedding_level;
+ break;
+ case UNICODE_BIDI_CLASS_S:
+ case UNICODE_BIDI_CLASS_B:
+ stack->levels[i]=stack->paragraph_embedding_level;
+ seen_sb=1;
+ break;
+ default:
+ seen_sb=0;
+ continue;
+ }
+ }
+}
+
+static void unicode_bidi_w(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+ irs_iterator iter=irs_begin(seq), end=irs_end(seq);
+ enum_bidi_class_t previous_type=seq->sos;
+
+ enum_bidi_class_t strong_type=UNICODE_BIDI_CLASS_R;
+
+ while (irs_compare(&iter, &end))
+ {
+ if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_NSM)
+ {
+ /* W1 */
+ stack->classes[iter.i] =
+ is_isolate_initiator(previous_type) ||
+ previous_type == UNICODE_BIDI_CLASS_PDI
+ ? UNICODE_BIDI_CLASS_ON
+ : previous_type;
+
+ }
+
+ /* W2 */
+
+ if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_EN &&
+ strong_type == UNICODE_BIDI_CLASS_AL)
+ {
+ stack->classes[iter.i] = UNICODE_BIDI_CLASS_AN;
+ }
+
+ /* W2 */
+ previous_type=stack->classes[iter.i];
+
+ switch (previous_type) {
+ case UNICODE_BIDI_CLASS_R:
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_AL:
+ strong_type=previous_type;
+ break;
+ default:
+ break;
+ }
+
+ irs_incr(&iter);
+ }
+
+ iter=irs_begin(seq);
+
+ previous_type=UNICODE_BIDI_CLASS_L;
+
+ int not_eol=irs_compare(&iter, &end);
+
+ while (not_eol)
+ {
+ /* W3 */
+ if (stack->classes[iter.i] == UNICODE_BIDI_CLASS_AL)
+ stack->classes[iter.i] = UNICODE_BIDI_CLASS_R;
+
+ /* W4 */
+
+ enum_bidi_class_t this_type=stack->classes[iter.i];
+ irs_incr(&iter);
+
+ not_eol=irs_compare(&iter, &end);
+
+ if (not_eol &&
+ (
+ (this_type == UNICODE_BIDI_CLASS_ES &&
+ previous_type == UNICODE_BIDI_CLASS_EN)
+ ||
+ (this_type == UNICODE_BIDI_CLASS_CS &&
+ (previous_type == UNICODE_BIDI_CLASS_EN ||
+ previous_type == UNICODE_BIDI_CLASS_AN)
+ )
+ ) &&
+ stack->classes[iter.i] == previous_type)
+ {
+ irs_iterator prev=iter;
+
+ irs_decr(&prev);
+
+ stack->classes[prev.i]=previous_type;
+ }
+
+ if (not_eol)
+ previous_type=this_type;
+ }
+
+ iter=irs_begin(seq);
+
+ /* W5 */
+
+ previous_type=UNICODE_BIDI_CLASS_L; /* Doesn't match any part of W5 */
+
+ while (irs_compare(&iter, &end))
+ {
+ if (stack->classes[iter.i] != UNICODE_BIDI_CLASS_ET)
+ {
+ previous_type=stack->classes[iter.i];
+ irs_incr(&iter);
+ continue;
+ }
+
+ /* ET after EN */
+ if (previous_type == UNICODE_BIDI_CLASS_EN)
+ {
+ stack->classes[iter.i] = UNICODE_BIDI_CLASS_EN;
+ irs_incr(&iter);
+ continue;
+ }
+
+ /* See if EN follows these ETs */
+
+ irs_iterator start=iter;
+
+ while (irs_incr(&iter), irs_compare(&iter, &end))
+ {
+ previous_type=stack->classes[iter.i];
+
+ if (previous_type == UNICODE_BIDI_CLASS_ET)
+ continue;
+
+ if (previous_type == UNICODE_BIDI_CLASS_EN)
+ {
+ while (irs_compare(&start, &iter))
+ {
+ stack->classes[start.i]=
+ UNICODE_BIDI_CLASS_EN;
+ irs_incr(&start);
+ }
+ }
+ break;
+ }
+ }
+
+ /* W6 */
+
+ for (iter=irs_begin(seq);
+ irs_compare(&iter, &end); irs_incr(&iter))
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_ET:
+ case UNICODE_BIDI_CLASS_ES:
+ case UNICODE_BIDI_CLASS_CS:
+ /* W6 */
+ stack->classes[iter.i]=UNICODE_BIDI_CLASS_ON;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* W7 */
+ iter=irs_begin(seq);
+
+ previous_type=seq->sos;
+
+ while (irs_compare(&iter, &end))
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ previous_type=stack->classes[iter.i];
+ break;
+ case UNICODE_BIDI_CLASS_EN:
+ if (previous_type == UNICODE_BIDI_CLASS_L)
+ stack->classes[iter.i]=previous_type;
+ break;
+ default:
+ break;
+ }
+ irs_incr(&iter);
+ }
+}
+
+struct bidi_n_stack {
+ struct bidi_n_stack *next;
+
+ irs_iterator start;
+ irs_iterator end;
+ short has_e;
+ short has_o;
+ short matched;
+};
+
+static void unicode_bidi_n(directional_status_stack_t stack,
+ struct isolating_run_sequence_s *seq)
+{
+#define NSTACKSIZE 63
+
+ struct bidi_n_stack *bracket_stack=0;
+ struct bidi_n_stack **bracket_stack_tail= &bracket_stack;
+
+ struct bidi_n_stack *stack_iters[NSTACKSIZE];
+ char32_t stack_chars[NSTACKSIZE];
+ size_t stackp=0;
+
+ /*
+ ** N0
+ **
+ ** Combined pass that implements BD16, and constructs, on the fly
+ ** whether each matched pair has an e or an o inside it.
+ */
+ irs_iterator beg=irs_begin(seq), iter=beg, end=irs_end(seq);
+
+ for (; irs_compare(&iter, &end); irs_incr(&iter))
+ {
+ unicode_bidi_bracket_type_t bracket_type;
+ char32_t open_bracket=
+ unicode_bidi_bracket_type(stack->chars[iter.i],
+ &bracket_type);
+
+ if (bracket_type == UNICODE_BIDI_o)
+ {
+ if (stackp >= NSTACKSIZE)
+ break; /* BD16 failure */
+
+ if (!((*bracket_stack_tail)=(struct bidi_n_stack *)
+ calloc(1, sizeof(struct bidi_n_stack))))
+ abort();
+
+ stack_iters[stackp]=*bracket_stack_tail;
+
+ (*bracket_stack_tail)->start=iter;
+
+ stack_chars[stackp]=stack->chars[iter.i];
+
+ bracket_stack_tail= &(*bracket_stack_tail)->next;
+ ++stackp;
+ continue;
+ }
+
+ if (bracket_type == UNICODE_BIDI_c) /* Should be "n" */
+ {
+ for (size_t i=stackp; i > 0; )
+ {
+ --i;
+ if (stack_chars[i] != open_bracket)
+ continue;
+
+ stack_iters[i]->end = iter;
+ stack_iters[i]->matched=1;
+ stackp=i;
+ break;
+ }
+ continue;
+ }
+
+ /*
+ ** So we check if this is an e or an o, and if so we
+ ** have a convenient list of all unclosed brackets, so
+ ** we record these facts there.
+ */
+
+ enum_bidi_class_t eoclass=stack->classes[iter.i];
+
+#define ADJUST_EOCLASS(eoclass) do { \
+ \
+ if ((eoclass) == UNICODE_BIDI_CLASS_EN || \
+ (eoclass) == UNICODE_BIDI_CLASS_AN) \
+ (eoclass)=UNICODE_BIDI_CLASS_R; \
+ } while (0)
+
+ ADJUST_EOCLASS(eoclass);
+
+#define E_CLASS (seq->embedding_level & 1 ? \
+ UNICODE_BIDI_CLASS_R:UNICODE_BIDI_CLASS_L)
+
+#define O_CLASS (seq->embedding_level & 1 ? \
+ UNICODE_BIDI_CLASS_L:UNICODE_BIDI_CLASS_R)
+
+ if (eoclass == E_CLASS)
+ {
+ for (size_t i=0; i<stackp; ++i)
+ stack_iters[i]->has_e=1;
+ }
+ else if (eoclass == O_CLASS)
+ {
+ for (size_t i=0; i<stackp; ++i)
+ stack_iters[i]->has_o=1;
+ }
+ }
+
+ while (bracket_stack)
+ {
+ struct bidi_n_stack *p=bracket_stack;
+
+ bracket_stack=bracket_stack->next;
+
+ if (p->matched)
+ {
+ int set=0;
+
+ if (p->has_e)
+ {
+ stack->classes[p->start.i]=
+ stack->classes[p->end.i]=
+ seq->embedding_level & 1
+ ? UNICODE_BIDI_CLASS_R
+ : UNICODE_BIDI_CLASS_L;
+ set=1;
+ } else if (p->has_o)
+ {
+ enum_bidi_class_t strong_type=seq->sos;
+
+ iter=p->start;
+
+ while (irs_compare(&beg, &iter))
+ {
+ irs_decr(&iter);
+
+ enum_bidi_class_t eoclass=
+ stack->classes[iter.i];
+
+ ADJUST_EOCLASS(eoclass);
+
+ switch (eoclass) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ break;
+ default:
+ continue;
+ }
+
+ strong_type=eoclass;
+ break;
+ }
+
+ if (strong_type == O_CLASS)
+ {
+ stack->classes[p->start.i]=
+ stack->classes[p->end.i]=
+ strong_type;
+ set=1;
+ }
+ }
+
+ if (set)
+ {
+ enum_bidi_class_t strong_type=
+ stack->classes[p->end.i];
+
+ while (irs_incr(&p->end),
+ irs_compare(&p->end, &end))
+ {
+ if (stack->orig_classes[p->end.i] !=
+ UNICODE_BIDI_CLASS_NSM)
+ break;
+
+ stack->classes[p->end.i]=strong_type;
+ }
+ }
+ }
+ free(p);
+ }
+
+ /* N1 */
+
+#define IS_NI(class) \
+ ((class) == UNICODE_BIDI_CLASS_B || \
+ (class) == UNICODE_BIDI_CLASS_S || \
+ (class) == UNICODE_BIDI_CLASS_WS || \
+ (class) == UNICODE_BIDI_CLASS_ON || \
+ (class) == UNICODE_BIDI_CLASS_FSI || \
+ (class) == UNICODE_BIDI_CLASS_LRI || \
+ (class) == UNICODE_BIDI_CLASS_RLI || \
+ (class) == UNICODE_BIDI_CLASS_PDI)
+
+ enum_bidi_class_t prev_type=seq->sos;
+
+ for (iter=beg; irs_compare(&iter, &end); )
+ {
+ /*
+ ** N1
+ */
+
+ enum_bidi_class_t this_type=stack->classes[iter.i];
+
+ ADJUST_EOCLASS(this_type);
+
+ if (!IS_NI(this_type))
+ {
+ switch (this_type) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ prev_type=this_type;
+ break;
+ default:
+ prev_type=UNICODE_BIDI_CLASS_ON; // Marker.
+ break;
+ }
+ irs_incr(&iter);
+ continue;
+ }
+
+ enum_bidi_class_t next_type=seq->eos;
+
+ irs_iterator start=iter;
+
+ while (irs_compare(&iter, &end))
+ {
+ if (IS_NI(stack->classes[iter.i]))
+ {
+ irs_incr(&iter);
+ continue;
+ }
+
+ enum_bidi_class_t other_type=stack->classes[iter.i];
+
+ ADJUST_EOCLASS(other_type);
+
+ switch (other_type) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_R:
+ next_type=other_type;
+ break;
+ default:
+ next_type=UNICODE_BIDI_CLASS_BN; /* Marker */
+ break;
+ }
+ break;
+ }
+
+ while (irs_compare(&start, &iter))
+ {
+ // next_type can only match prev_type if both are L
+ // or R. If the character before the NIs was not L
+ // or R, prev_type is ON. If the character after the
+ // NIs was not L or R, next_type is BN.
+
+ if (next_type == prev_type)
+ {
+ stack->classes[start.i]=next_type; /* N1 */
+ }
+
+ irs_incr(&start);
+ }
+ }
+
+ for (iter=beg; irs_compare(&iter, &end); )
+ {
+ if (IS_NI(stack->classes[iter.i]))
+ {
+ stack->classes[iter.i]=
+ stack->levels[iter.i] & 1 ?
+ UNICODE_BIDI_CLASS_R :
+ UNICODE_BIDI_CLASS_L; /* N2 */
+ }
+ irs_incr(&iter);
+ }
+
+#ifdef BIDI_DEBUG
+ dump_sequence("Contents after N", stack, seq);
+#endif
+
+ /* I1 */
+ /* I2 */
+
+ for (iter=beg; irs_compare(&iter, &end); irs_incr(&iter))
+ {
+ if ((stack->levels[iter.i] & 1) == 0)
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_R:
+ ++stack->levels[iter.i];
+ break;
+ case UNICODE_BIDI_CLASS_AN:
+ case UNICODE_BIDI_CLASS_EN:
+ stack->levels[iter.i] += 2;
+ break;
+ default: break;
+ }
+ }
+ else
+ {
+ switch (stack->classes[iter.i]) {
+ case UNICODE_BIDI_CLASS_L:
+ case UNICODE_BIDI_CLASS_AN:
+ case UNICODE_BIDI_CLASS_EN:
+ ++stack->levels[iter.i];
+ break;
+ default: break;
+ }
+ }
+ }
+
+#ifdef BIDI_DEBUG
+ dump_sequence("Contents after I", stack, seq);
+#endif
}