diff options
| author | Sam Varshavchik | 2020-12-01 08:23:23 -0500 |
|---|---|---|
| committer | Sam Varshavchik | 2020-12-01 08:23:23 -0500 |
| commit | 6399d7b46a8a5f52361e8c827eac5e3d0e69479f (patch) | |
| tree | 8e43f45938c6e99d9ac31bd350f540ebfa512511 | |
| parent | 2e734d23becf9dfaf7c830235dd0e4ef5fcdb482 (diff) | |
| download | courier-libs-6399d7b46a8a5f52361e8c827eac5e3d0e69479f.tar.bz2 | |
Factor out a separate setbnl.
Also replace all references to "classes" with "types", for consistency.
| -rw-r--r-- | unicode/Makefile.am | 1 | ||||
| -rw-r--r-- | unicode/book.xml | 45 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 10 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 219 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 8 |
5 files changed, 179 insertions, 104 deletions
diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 25b0719..135617a 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -124,6 +124,7 @@ man_MANS= \ $(srcdir)/man/unicode_bidi_logical_order.3 \ $(srcdir)/man/unicode_bidi_mirror.3 \ $(srcdir)/man/unicode_bidi_reorder.3 \ + $(srcdir)/man/unicode_bidi_setbnl.3 \ $(srcdir)/man/unicode_bidi_type.3 \ $(srcdir)/man/unicode_canonical.3 \ $(srcdir)/man/unicode_category_lookup.3 \ diff --git a/unicode/book.xml b/unicode/book.xml index ad96d82..c0e0485 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -311,6 +311,7 @@ See COPYING for distribution information. <refname>unicode_bidi_embed_paragraph_level</refname> <refname>unicode_bidi_type</refname> + <refname>unicode_bidi_setbnl</refname> <refname>unicode_bidi_mirror</refname> <refname>unicode_bidi_bracket_type</refname> @@ -406,6 +407,14 @@ See COPYING for distribution information. <funcdef>enum_bidi_type_t <function>unicode_bidi_type</function></funcdef> <paramdef>char32_t <parameter>c</parameter></paramdef> </funcprototype> + + <funcprototype> + <funcdef>void <function>unicode_bidi_setbnl</function></funcdef> + <paramdef>char32_t *<parameter>p</parameter></paramdef> + <paramdef>const unicode_bidi_type_t *<parameter>types</parameter></paramdef> + <paramdef>size_t <parameter>n</parameter></paramdef> + </funcprototype> + </funcsynopsis> </refsynopsisdiv> <refsect1 id="unicode_bidi_descr"> @@ -982,6 +991,18 @@ See COPYING for distribution information. <function>unicode_bidi_type</function> looks up each character's bi-directional character type. </para> + + <para> + <function>unicode_bidi_setbnl</function> + takes a pointer to a unicode string, a pointer to an + array of <classname>enum_bidi_type_t</classname> values and + the number of characters in the string and the array. + <function>unicode_bidi_setbnl</function> replaces all + paragraph separators in the unicode string with a newline + character (same as the <literal>UNICODE_BIDI_CLEANUP_BNL</literal> + option to <function>unicode_bidi_cleanup</function>. + </para> + <para> <function>unicode_bidi_mirror</function> returns the glyph that's a mirror image of the parameter @@ -2787,6 +2808,15 @@ See COPYING for distribution information. <modifier>std::vector<unicode_bidi_type_t></modifier> <varname>types</varname> </fieldsynopsis> + + <methodsynopsis> + <void /> + <methodname>setbnl</methodname> + <methodparam> + <modifier>std::u32string &</modifier> + <parameter>string</parameter> + </methodparam> + </methodsynopsis> </classsynopsis> <funcsynopsis> @@ -2913,6 +2943,8 @@ std::u32string text; unicode::bidi_calc_types types{text}; +types.setbnl(text); // Optional + // types.types is a std::vector of enum_bidi_types_t values auto [levels, level]=unicode::bidi_calc(types); @@ -2933,6 +2965,19 @@ auto [levels, level]=unicode::bidi_calc(types); until <function>unicode::bidi_calc</function>() returns. </para> </note> + + <para> + The optional <methodname>setbnl</methodname>() method uses + <link linkend="unicode_bidi"> + <citerefentry> + <refentrytitle>unicode_bidi_setbnl</refentrytitle> + <manvolnum>3</manvolnum></citerefentry></link> + to replace paragraph separators with newline characters, + in the unicode string. It requires the same unicode string + that was passed to the constructor as a parameter (because + the constructor takes a constant reference, but this + method modifies the string. + </para> </listitem> <listitem> <para> diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index f8ab117..a3a59f4 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -629,6 +629,10 @@ extern enum_bidi_type_t unicode_bidi_type(char32_t c); extern void unicode_bidi_calc_types(const char32_t *p, size_t n, enum_bidi_type_t *buf); +extern void unicode_bidi_setbnl(char32_t *p, + const enum_bidi_type_t *types, + size_t n); + extern unicode_bidi_level_t unicode_bidi_calc_levels(const char32_t *p, const enum_bidi_type_t *types, @@ -2181,13 +2185,19 @@ struct bidi_calc_types { //! an existing std::u32string. bidi_calc_types(const std::u32string &); +#if __cplusplus >= 201103L //! Deleted constructor //! bidi_calc_types cannot be constructed from a temporary //! std::u32string. bidi_calc_types(std::u32string &&)=delete; +#endif + //! Replace all paragraph breaks by newlines. + + void setbnl(std::u32string &); //! Destructor + ~bidi_calc_types(); }; diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index cbb11dc..92fe8a7 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -466,8 +466,8 @@ typedef struct { unicode_bidi_level_t paragraph_embedding_level; const char32_t *chars; - enum_bidi_type_t *classes; - const enum_bidi_type_t *orig_classes; + enum_bidi_type_t *types; + const enum_bidi_type_t *orig_types; unicode_bidi_level_t *levels; size_t size; int overflow_isolate_count; @@ -500,29 +500,29 @@ const char *bidi_classname(enum_bidi_type_t classenum) } -void dump_classes(const char *prefix, directional_status_stack_t stack) +void dump_types(const char *prefix, directional_status_stack_t stack) { fprintf(DEBUGDUMP, "%s: ", prefix); for (size_t i=0; i<stack->size; ++i) { fprintf(DEBUGDUMP, " %s(%d)", - bidi_classname(stack->classes[i]), + bidi_classname(stack->types[i]), (int)stack->levels[i]); } fprintf(DEBUGDUMP, "\n"); } -void dump_orig_classes(const char *prefix, directional_status_stack_t stack) +void dump_orig_types(const char *prefix, directional_status_stack_t stack) { fprintf(DEBUGDUMP, "%s: ", prefix); for (size_t i=0; i<stack->size; ++i) { fprintf(DEBUGDUMP, " %s(%s%s%d)", - bidi_classname(stack->classes[i]), - (stack->classes[i] != stack->orig_classes[i] ? - bidi_classname(stack->orig_classes[i]):""), - (stack->classes[i] != stack->orig_classes[i] ? "/":""), + bidi_classname(stack->types[i]), + (stack->types[i] != stack->orig_types[i] ? + bidi_classname(stack->orig_types[i]):""), + (stack->types[i] != stack->orig_types[i] ? "/":""), (int)stack->levels[i]); } fprintf(DEBUGDUMP, "\n"); @@ -624,7 +624,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p, static directional_status_stack_t directional_status_stack_init(const char32_t *chars, - const enum_bidi_type_t *classes, size_t n, + const enum_bidi_type_t *types, size_t n, unicode_bidi_level_t *levels, const unicode_bidi_level_t *initial_embedding_level) @@ -636,22 +636,22 @@ directional_status_stack_init(const char32_t *chars, stack->paragraph_embedding_level= initial_embedding_level ? *initial_embedding_level & 1 - : compute_paragraph_embedding_level_from_types(classes, 0, n); + : compute_paragraph_embedding_level_from_types(types, 0, n); stack->chars=chars; - stack->orig_classes=classes; + stack->orig_types=types; if (n) { - stack->classes=(enum_bidi_type_t *) + stack->types=(enum_bidi_type_t *) malloc(sizeof(enum_bidi_type_t)*n); - if (!stack->classes) + if (!stack->types) abort(); - memcpy(stack->classes, stack->orig_classes, + memcpy(stack->types, stack->orig_types, sizeof(enum_bidi_type_t)*n); } else { - stack->classes=0; + stack->types=0; } stack->levels=levels; stack->size=n; @@ -682,8 +682,8 @@ static void directional_status_stack_deinit(directional_status_stack_t stack) { while (stack->head) directional_status_stack_pop(stack); - if (stack->classes) - free(stack->classes); + if (stack->types) + free(stack->types); isolating_run_sequences_deinit(&stack->isolating_run_sequences); free(stack); } @@ -716,6 +716,17 @@ void unicode_bidi_calc_types(const char32_t *p, size_t n, } } +void unicode_bidi_setbnl(char32_t *p, + const enum_bidi_type_t *types, + size_t n) +{ + for (size_t i=0; i<n; i++) + if (types[i] == UNICODE_BIDI_TYPE_B) + { + p[i]='\n'; + } +} + unicode_bidi_level_t unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, const unicode_bidi_level_t *initial_embedding_level) @@ -744,7 +755,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack); unicode_bidi_level_t unicode_bidi_calc_levels(const char32_t *p, - const enum_bidi_type_t *classes, + const enum_bidi_type_t *types, size_t n, unicode_bidi_level_t *bufp, const unicode_bidi_level_t *initial_embedding_level) @@ -756,7 +767,7 @@ unicode_bidi_calc_levels(const char32_t *p, bufp[i]=UNICODE_BIDI_SKIP; } - stack=directional_status_stack_init(p, classes, n, bufp, + stack=directional_status_stack_init(p, types, n, bufp, initial_embedding_level); unicode_bidi_level_t paragraph_embedding_level= @@ -782,7 +793,7 @@ unicode_bidi_calc_levels(const char32_t *p, } \ } while(0) -static void unicode_bidi_w(enum_bidi_type_t *classes, +static void unicode_bidi_w(enum_bidi_type_t *types, struct isolating_run_sequence_s *seq); static void unicode_bidi_n(directional_status_stack_t stack, struct isolating_run_sequence_s *seq); @@ -814,7 +825,7 @@ void dump_sequence(const char *what, directional_status_stack_t stack, while (irs_compare(&beg, &end)) { fprintf(DEBUGDUMP, " %s(%d)", - bidi_classname(stack->classes[beg.i]), + bidi_classname(stack->types[beg.i]), (int)stack->levels[beg.i]); irs_incr(&beg); } @@ -825,7 +836,7 @@ void dump_sequence(const char *what, directional_status_stack_t stack, static void unicode_bidi_cl(directional_status_stack_t stack) { #ifdef BIDI_DEBUG - dump_classes("Before X1", stack); + dump_types("Before X1", stack); #endif for (size_t i=0; i<stack->size; i++) @@ -842,7 +853,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) embedding_level |= 1, \ ++embedding_level) - switch (stack->classes[i]) { + switch (stack->types[i]) { case UNICODE_BIDI_TYPE_RLE: /* X2 */ NEXT_ODD_EMBEDDING_LEVEL; @@ -930,7 +941,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) break; } - enum_bidi_type_t cur_class=stack->classes[i]; + enum_bidi_type_t cur_class=stack->types[i]; if (cur_class == UNICODE_BIDI_TYPE_FSI) { /* X5c */ @@ -941,9 +952,9 @@ static void unicode_bidi_cl(directional_status_stack_t stack) while (++j < stack->size) { - if (is_isolate_initiator(stack->classes[j])) + if (is_isolate_initiator(stack->types[j])) ++in_isolation; - else if (stack->classes[j] == UNICODE_BIDI_TYPE_PDI) + else if (stack->types[j] == UNICODE_BIDI_TYPE_PDI) { if (--in_isolation == 0) break; @@ -951,7 +962,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) } cur_class=compute_paragraph_embedding_level_from_types - (stack->classes, i+1, j) == 1 + (stack->types, i+1, j) == 1 ? UNICODE_BIDI_TYPE_RLI : UNICODE_BIDI_TYPE_LRI; } @@ -960,7 +971,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) case UNICODE_BIDI_TYPE_RLI: /* X5a */ stack->levels[i]=stack->head->embedding_level; - RESET_CLASS(stack->classes[i],stack); + RESET_CLASS(stack->types[i],stack); NEXT_ODD_EMBEDDING_LEVEL; @@ -982,7 +993,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) case UNICODE_BIDI_TYPE_LRI: /* X5b */ stack->levels[i]=stack->head->embedding_level; - RESET_CLASS(stack->classes[i],stack); + RESET_CLASS(stack->types[i],stack); NEXT_EVEN_EMBEDDING_LEVEL; @@ -1005,14 +1016,14 @@ static void unicode_bidi_cl(directional_status_stack_t stack) break; } - if (!is_explicit_indicator(stack->orig_classes[i])) + if (!is_explicit_indicator(stack->orig_types[i])) { /* X6 */ stack->levels[i]=stack->head->embedding_level; - RESET_CLASS(stack->classes[i],stack); + RESET_CLASS(stack->types[i],stack); } - if (stack->classes[i] == UNICODE_BIDI_TYPE_PDI) + if (stack->types[i] == UNICODE_BIDI_TYPE_PDI) { /* X6a */ if (stack->overflow_isolate_count > 0) @@ -1055,10 +1066,10 @@ static void unicode_bidi_cl(directional_status_stack_t stack) } } stack->levels[i]=stack->head->embedding_level; - RESET_CLASS(stack->classes[i],stack); + RESET_CLASS(stack->types[i],stack); } - if (stack->classes[i] == UNICODE_BIDI_TYPE_PDF) + if (stack->types[i] == UNICODE_BIDI_TYPE_PDF) { /* X7 */ @@ -1080,7 +1091,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) } } - if (stack->classes[i] == UNICODE_BIDI_TYPE_B) + if (stack->types[i] == UNICODE_BIDI_TYPE_B) { /* X8 */ @@ -1108,7 +1119,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) for (size_t i=0; i<stack->size; ++i) { - if (IS_X9(stack->classes[i])) + if (IS_X9(stack->types[i])) { if (stack->levels[i] != UNICODE_BIDI_SKIP) { @@ -1169,7 +1180,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) /* X10 */ #ifdef BIDI_DEBUG - dump_classes("Before X10", stack); + dump_types("Before X10", stack); #endif for (struct isolating_run_sequence_s *p= @@ -1204,7 +1215,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) if (first_i > 0) before=stack->levels[first_i-1]; - if (!is_isolate_initiator(stack->classes[end_iter.i])) + if (!is_isolate_initiator(stack->types[end_iter.i])) { while (end_i < stack->size && stack->levels[end_i] == UNICODE_BIDI_SKIP) @@ -1247,7 +1258,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) dump_sequence("Contents before W", stack, p); #endif - unicode_bidi_w(stack->classes, p); + unicode_bidi_w(stack->types, p); #ifdef BIDI_DEBUG dump_sequence("Contents after W", stack, p); @@ -1255,7 +1266,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) unicode_bidi_n(stack, p); } #ifdef BIDI_DEBUG - dump_orig_classes("Before L1", stack); + dump_orig_types("Before L1", stack); #endif /* @@ -1270,10 +1281,10 @@ static void unicode_bidi_cl(directional_status_stack_t stack) { --i; - if (IS_X9(stack->orig_classes[i])) + if (IS_X9(stack->orig_types[i])) continue; - switch (stack->orig_classes[i]) { + switch (stack->orig_types[i]) { case UNICODE_BIDI_TYPE_WS: case UNICODE_BIDI_TYPE_FSI: case UNICODE_BIDI_TYPE_LRI: @@ -1295,7 +1306,7 @@ static void unicode_bidi_cl(directional_status_stack_t stack) } } -static void unicode_bidi_w(enum_bidi_type_t *classes, +static void unicode_bidi_w(enum_bidi_type_t *types, struct isolating_run_sequence_s *seq) { irs_iterator iter=irs_begin(seq), end=irs_end(seq); @@ -1305,10 +1316,10 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, while (irs_compare(&iter, &end)) { - if (classes[iter.i] == UNICODE_BIDI_TYPE_NSM) + if (types[iter.i] == UNICODE_BIDI_TYPE_NSM) { /* W1 */ - classes[iter.i] = + types[iter.i] = is_isolate_initiator(previous_type) || previous_type == UNICODE_BIDI_TYPE_PDI ? UNICODE_BIDI_TYPE_ON @@ -1318,14 +1329,14 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, /* W2 */ - if (classes[iter.i] == UNICODE_BIDI_TYPE_EN && + if (types[iter.i] == UNICODE_BIDI_TYPE_EN && strong_type == UNICODE_BIDI_TYPE_AL) { - classes[iter.i] = UNICODE_BIDI_TYPE_AN; + types[iter.i] = UNICODE_BIDI_TYPE_AN; } /* W2 */ - previous_type=classes[iter.i]; + previous_type=types[iter.i]; switch (previous_type) { case UNICODE_BIDI_TYPE_R: @@ -1349,12 +1360,12 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, while (not_eol) { /* W3 */ - if (classes[iter.i] == UNICODE_BIDI_TYPE_AL) - classes[iter.i] = UNICODE_BIDI_TYPE_R; + if (types[iter.i] == UNICODE_BIDI_TYPE_AL) + types[iter.i] = UNICODE_BIDI_TYPE_R; /* W4 */ - enum_bidi_type_t this_type=classes[iter.i]; + enum_bidi_type_t this_type=types[iter.i]; irs_incr(&iter); not_eol=irs_compare(&iter, &end); @@ -1369,13 +1380,13 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, previous_type == UNICODE_BIDI_TYPE_AN) ) ) && - classes[iter.i] == previous_type) + types[iter.i] == previous_type) { irs_iterator prev=iter; irs_decr(&prev); - classes[prev.i]=previous_type; + types[prev.i]=previous_type; } if (not_eol) @@ -1390,9 +1401,9 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, while (irs_compare(&iter, &end)) { - if (classes[iter.i] != UNICODE_BIDI_TYPE_ET) + if (types[iter.i] != UNICODE_BIDI_TYPE_ET) { - previous_type=classes[iter.i]; + previous_type=types[iter.i]; irs_incr(&iter); continue; } @@ -1400,7 +1411,7 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, /* ET after EN */ if (previous_type == UNICODE_BIDI_TYPE_EN) { - classes[iter.i] = UNICODE_BIDI_TYPE_EN; + types[iter.i] = UNICODE_BIDI_TYPE_EN; irs_incr(&iter); continue; } @@ -1411,7 +1422,7 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, while (irs_incr(&iter), irs_compare(&iter, &end)) { - previous_type=classes[iter.i]; + previous_type=types[iter.i]; if (previous_type == UNICODE_BIDI_TYPE_ET) continue; @@ -1420,7 +1431,7 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, { while (irs_compare(&start, &iter)) { - classes[start.i]= + types[start.i]= UNICODE_BIDI_TYPE_EN; irs_incr(&start); } @@ -1434,12 +1445,12 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, for (iter=irs_begin(seq); irs_compare(&iter, &end); irs_incr(&iter)) { - switch (classes[iter.i]) { + switch (types[iter.i]) { case UNICODE_BIDI_TYPE_ET: case UNICODE_BIDI_TYPE_ES: case UNICODE_BIDI_TYPE_CS: /* W6 */ - classes[iter.i]=UNICODE_BIDI_TYPE_ON; + types[iter.i]=UNICODE_BIDI_TYPE_ON; break; default: break; @@ -1453,14 +1464,14 @@ static void unicode_bidi_w(enum_bidi_type_t *classes, while (irs_compare(&iter, &end)) { - switch (classes[iter.i]) { + switch (types[iter.i]) { case UNICODE_BIDI_TYPE_L: case UNICODE_BIDI_TYPE_R: - previous_type=classes[iter.i]; + previous_type=types[iter.i]; break; case UNICODE_BIDI_TYPE_EN: if (previous_type == UNICODE_BIDI_TYPE_L) - classes[iter.i]=previous_type; + types[iter.i]=previous_type; break; default: break; @@ -1515,7 +1526,7 @@ static void unicode_bidi_n(directional_status_stack_t stack, char32_t open_or_close_bracket=0; - if (IS_NI(stack->classes[iter.i])) + if (IS_NI(stack->types[iter.i])) { open_or_close_bracket= unicode_bidi_bracket_type(stack->chars[iter.i], @@ -1599,7 +1610,7 @@ static void unicode_bidi_n(directional_status_stack_t stack, ** we record these facts there. */ - enum_bidi_type_t eoclass=stack->classes[iter.i]; + enum_bidi_type_t eoclass=stack->types[iter.i]; #define ADJUST_EOCLASS(eoclass) do { \ \ @@ -1682,8 +1693,8 @@ static void unicode_bidi_n(directional_status_stack_t stack, #endif if (p->has_e) { - stack->classes[p->start.i]= - stack->classes[p->end.i]= + stack->types[p->start.i]= + stack->types[p->end.i]= seq->embedding_level & 1 ? UNICODE_BIDI_TYPE_R : UNICODE_BIDI_TYPE_L; @@ -1699,7 +1710,7 @@ static void unicode_bidi_n(directional_status_stack_t stack, irs_decr(&iter); enum_bidi_type_t eoclass= - stack->classes[iter.i]; + stack->types[iter.i]; ADJUST_EOCLASS(eoclass); @@ -1720,8 +1731,8 @@ static void unicode_bidi_n(directional_status_stack_t stack, break; } - stack->classes[p->start.i]= - stack->classes[p->end.i]= + stack->types[p->start.i]= + stack->types[p->end.i]= strong_type; set=1; } @@ -1729,16 +1740,16 @@ static void unicode_bidi_n(directional_status_stack_t stack, if (set) { enum_bidi_type_t strong_type= - stack->classes[p->end.i]; + stack->types[p->end.i]; while (irs_incr(&p->end), irs_compare(&p->end, &end)) { - if (stack->orig_classes[p->end.i] != + if (stack->orig_types[p->end.i] != UNICODE_BIDI_TYPE_NSM) break; - stack->classes[p->end.i]=strong_type; + stack->types[p->end.i]=strong_type; } } } @@ -1755,7 +1766,7 @@ static void unicode_bidi_n(directional_status_stack_t stack, ** N1 */ - enum_bidi_type_t this_type=stack->classes[iter.i]; + enum_bidi_type_t this_type=stack->types[iter.i]; ADJUST_EOCLASS(this_type); @@ -1780,13 +1791,13 @@ static void unicode_bidi_n(directional_status_stack_t stack, while (irs_compare(&iter, &end)) { - if (IS_NI(stack->classes[iter.i])) + if (IS_NI(stack->types[iter.i])) { irs_incr(&iter); continue; } - enum_bidi_type_t other_type=stack->classes[iter.i]; + enum_bidi_type_t other_type=stack->types[iter.i]; ADJUST_EOCLASS(other_type); @@ -1811,7 +1822,7 @@ static void unicode_bidi_n(directional_status_stack_t stack, if (next_type == prev_type) { - stack->classes[start.i]=next_type; /* N1 */ + stack->types[start.i]=next_type; /* N1 */ } irs_incr(&start); @@ -1820,9 +1831,9 @@ static void unicode_bidi_n(directional_status_stack_t stack, for (iter=beg; irs_compare(&iter, &end); ) { - if (IS_NI(stack->classes[iter.i])) + if (IS_NI(stack->types[iter.i])) { - stack->classes[iter.i]= + stack->types[iter.i]= stack->levels[iter.i] & 1 ? UNICODE_BIDI_TYPE_R : UNICODE_BIDI_TYPE_L; /* N2 */ @@ -1841,7 +1852,7 @@ static void unicode_bidi_n(directional_status_stack_t stack, { if ((stack->levels[iter.i] & 1) == 0) { - switch (stack->classes[iter.i]) { + switch (stack->types[iter.i]) { case UNICODE_BIDI_TYPE_R: ++stack->levels[iter.i]; break; @@ -1854,7 +1865,7 @@ static void unicode_bidi_n(directional_status_stack_t stack, } else { - switch (stack->classes[iter.i]) { + switch (stack->types[iter.i]) { case UNICODE_BIDI_TYPE_L: case UNICODE_BIDI_TYPE_AN: case UNICODE_BIDI_TYPE_EN: @@ -2196,7 +2207,7 @@ static void need_marker_info_merge(struct need_marker_info *info, } static void emit_bidi_embed_levelrun(const char32_t *string, - enum_bidi_type_t *classes, + enum_bidi_type_t *types, struct bidi_embed_levelrun *run, unicode_bidi_level_t paragraph_level, unicode_bidi_level_t previous_level, @@ -2209,7 +2220,7 @@ static void emit_bidi_embed_levelrun(const char32_t *string, /* L1 */ -static int is_l1_on_or_after(const enum_bidi_type_t *classes, +static int is_l1_on_or_after(const enum_bidi_type_t *types, size_t n, size_t i, int atend) @@ -2219,7 +2230,7 @@ static int is_l1_on_or_after(const enum_bidi_type_t *classes, */ while (i<n) { - enum_bidi_type_t t=classes[i]; + enum_bidi_type_t t=types[i]; if (t == UNICODE_BIDI_TYPE_WS) { @@ -2264,14 +2275,14 @@ void unicode_bidi_embed(const char32_t *string, void *arg) { struct bidi_embed_levelrun *runs=0; - enum_bidi_type_t *classes= + enum_bidi_type_t *types= (enum_bidi_type_t *)calloc(n, sizeof(enum_bidi_type_t)); - if (!classes) + if (!types) abort(); for (size_t i=0; i<n; ++i) - classes[i]=unicode_bidi_type(string[i]); + types[i]=unicode_bidi_type(string[i]); compute_bidi_embed_levelruns(string, levels, n, @@ -2318,10 +2329,10 @@ void unicode_bidi_embed(const char32_t *string, need_marker_info_init(&need_marker); - if (classes[p->end-1] == UNICODE_BIDI_TYPE_WS) + if (types[p->end-1] == UNICODE_BIDI_TYPE_WS) { need_marker.need_marker= - is_l1_on_or_after(classes, n, + is_l1_on_or_after(types, n, p->end, 0); #ifdef BIDI_DEBUG @@ -2331,7 +2342,7 @@ void unicode_bidi_embed(const char32_t *string, } - emit_bidi_embed_levelrun(string, classes, + emit_bidi_embed_levelrun(string, types, p, paragraph_level, previous_level, next_level, @@ -2367,12 +2378,12 @@ void unicode_bidi_embed(const char32_t *string, size_t j=p->end; int end_with_ws= - classes[j-1] == UNICODE_BIDI_TYPE_WS; + types[j-1] == UNICODE_BIDI_TYPE_WS; while (j > p->start) { --j; - enum_bidi_type_t t=classes[j]; + enum_bidi_type_t t=types[j]; if (t == UNICODE_BIDI_TYPE_S || t == UNICODE_BIDI_TYPE_B) @@ -2419,7 +2430,7 @@ void unicode_bidi_embed(const char32_t *string, p->start=j; emit_bidi_embed_levelrun - (string, classes, p, paragraph_level, + (string, types, p, paragraph_level, previous_level, j == i @@ -2441,7 +2452,7 @@ void unicode_bidi_embed(const char32_t *string, if (end_with_ws) need_marker.need_marker= is_l1_on_or_after - (classes, n, + (types, n, orig_end, 0); need_marker_info_merge @@ -2458,7 +2469,7 @@ void unicode_bidi_embed(const char32_t *string, } free(p); } - free(classes); + free(types); } #define ADJUST_LR(t,e) do { \ @@ -2487,7 +2498,7 @@ void unicode_bidi_embed(const char32_t *string, } while (0) static void emit_bidi_embed_levelrun(const char32_t *string, - enum_bidi_type_t *classes, + enum_bidi_type_t *types, struct bidi_embed_levelrun *run, unicode_bidi_level_t paragraph_level, unicode_bidi_level_t previous_level, @@ -2522,18 +2533,18 @@ static void emit_bidi_embed_levelrun(const char32_t *string, seq.runs.cap_level_runs=1; lrun.start=run->start; lrun.end=run->end; - unicode_bidi_w(classes, &seq); + unicode_bidi_w(types, &seq); /* ** Peek at the first character's class. ** ** If the previous sequence's embedding level was the same, it ** guarantees the peristence of the embedding direction. We can - ** accept classes that default to our embedding level. + ** accept types that default to our embedding level. ** - ** Otherwise we recognize only strong classes. + ** Otherwise we recognize only strong types. */ - enum_bidi_type_t t=classes[run->start]; + enum_bidi_type_t t=types[run->start]; if (previous_level == run->level) { @@ -2590,7 +2601,7 @@ static void emit_bidi_embed_levelrun(const char32_t *string, */ while (i < end) { - enum_bidi_type_t t=classes[i]; + enum_bidi_type_t t=types[i]; ADJUST_LR(t, e_type); @@ -2631,7 +2642,7 @@ static void emit_bidi_embed_levelrun(const char32_t *string, (*emit)(&override_start, 1, arg); while (++i < end) { - enum_bidi_type_t t=classes[i]; + enum_bidi_type_t t=types[i]; switch (t) { case UNICODE_BIDI_TYPE_WS: @@ -2659,7 +2670,7 @@ static void emit_bidi_embed_levelrun(const char32_t *string, ** Make sure that if a different embedding level follows we will ** emit a marker, to ensure strong context. */ - t=classes[run->end-1]; + t=types[run->end-1]; if (next_level != run->level) { diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 4b864b3..48cc3c6 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -573,6 +573,14 @@ unicode::bidi_calc_types::bidi_calc_types(const std::u32string &s) unicode::bidi_calc_types::~bidi_calc_types()=default; +void unicode::bidi_calc_types::setbnl(std::u32string &s) +{ + if (s.empty() || s.size() != types.size()) + return; + + unicode_bidi_setbnl(&s[0], &types[0], s.size()); +} + std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> unicode::bidi_calc(const bidi_calc_types &s) { |
