diff options
Diffstat (limited to 'unicode/unicode_linebreak.c')
| -rw-r--r-- | unicode/unicode_linebreak.c | 516 |
1 files changed, 293 insertions, 223 deletions
diff --git a/unicode/unicode_linebreak.c b/unicode/unicode_linebreak.c index a843c6c..9d1e73c 100644 --- a/unicode/unicode_linebreak.c +++ b/unicode/unicode_linebreak.c @@ -19,39 +19,55 @@ #define UNICODE_LB_SOT 0xFF +struct state_t { + uint8_t lb; + uint8_t ew; +}; + +typedef struct state_t state_t; + struct unicode_lb_info { int (*cb_func)(int, void *); void *cb_arg; int opts; - uint8_t savedclass; + state_t savedclass; size_t savedcmcnt; - uint8_t prevclass_min1; - uint8_t prevclass; - uint8_t prevclass_nsp; + state_t prevclass_min1; + state_t prevclass; + state_t prevclass_nsp; - int (*next_handler)(struct unicode_lb_info *, uint8_t); + /* Flag -- recursively invoked after discarding LB25 */ + char nolb25; + + /* Flag -- seen a pair of RIs */ + char nolb30a; + + int (*next_handler)(struct unicode_lb_info *, state_t); int (*end_handler)(struct unicode_lb_info *); }; /* http://www.unicode.org/reports/tr14/#Algorithm */ -static int next_def(unicode_lb_info_t, uint8_t); +static int next_def(unicode_lb_info_t, state_t); static int end_def(unicode_lb_info_t); -static int next_lb25_seenophy(unicode_lb_info_t, uint8_t); +static int next_lb25_seenophy(unicode_lb_info_t, state_t); static int end_lb25_seenophy(unicode_lb_info_t); -static int next_lb25_seennu(unicode_lb_info_t, uint8_t); +static int next_lb25_seennu(unicode_lb_info_t, state_t); -static int next_lb25_seennuclcp(unicode_lb_info_t, uint8_t); +static int next_lb25_seennuclcp(unicode_lb_info_t, state_t); static void unicode_lb_reset(unicode_lb_info_t i) { - i->prevclass_min1=i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT; + i->prevclass.lb=UNICODE_LB_SOT; + i->prevclass.ew=UNICODE_EASTASIA_N; + + i->prevclass_min1=i->prevclass_nsp=i->prevclass; i->next_handler=next_def; i->end_handler=end_def; } @@ -88,7 +104,16 @@ static int end_def(unicode_lb_info_t i) /* LB3 N/A */ return 0; } -#define RESULT(x) (*i->cb_func)((x), i->cb_arg) + +/* #define DEBUG_LB */ + +#ifdef DEBUG_LB +#define RULE(x) ( (void)printf("%s\n", x)) +#else +#define RULE(x) ( (void)0 ) +#endif + +#define RESULT(x, msg) (RULE(msg),*i->cb_func)((x), i->cb_arg) int unicode_lb_next_cnt(unicode_lb_info_t i, const char32_t *chars, @@ -121,36 +146,62 @@ int unicode_lb_lookup(char32_t ch) int unicode_lb_next(unicode_lb_info_t i, char32_t ch) { - return (*i->next_handler)(i, (i->opts & UNICODE_LB_OPT_DASHWJ) && - (ch == 0x2012 || ch == 0x2013) - ? UNICODE_LB_WJ:unicode_lb_lookup(ch)); + state_t c; + + c.lb=unicode_lb_lookup(ch); + c.ew=unicode_eastasia(ch); + + if ((i->opts & UNICODE_LB_OPT_DASHWJ) && + (ch == 0x2012 || ch == 0x2013)) + { + c.lb=UNICODE_LB_WJ; + } + + return (*i->next_handler)(i, c); } -static int next_def_nolb25(unicode_lb_info_t i, - uint8_t uclass, - int nolb25); +static int next_def_common(unicode_lb_info_t i, + state_t uclass); + +/* +** Reset state for next_def_common. +*/ + +static void next_def_reset_common(unicode_lb_info_t i) +{ + i->nolb25=0; + i->nolb30a=0; +} /* ** Default logic for next unicode char. */ static int next_def(unicode_lb_info_t i, - uint8_t uclass) + state_t uclass) +{ + next_def_reset_common(i); + return next_def_common(i, uclass); +} + +static int next_def_seen_lb30a(unicode_lb_info_t i, + state_t uclass) { - return next_def_nolb25(i, uclass, 0); + i->next_handler=next_def; + next_def_reset_common(i); + i->nolb30a=1; + return next_def_common(i, uclass); } -static int next_def_nolb25(unicode_lb_info_t i, - uint8_t uclass, - /* Flag -- recursively invoked after discarding LB25 */ - int nolb25) +static int next_def_common(unicode_lb_info_t i, + state_t uclass) { /* Retrieve the previous unicode character's linebreak class. */ - uint8_t prevclass_min1=i->prevclass_min1; - uint8_t prevclass=i->prevclass; - uint8_t prevclass_nsp=i->prevclass_nsp; + state_t prevclass_min1=i->prevclass_min1; + state_t prevclass=i->prevclass; + state_t prevclass_nsp=i->prevclass_nsp; #define RESTORE (i->prevclass_min1=prevclass_min1, \ i->prevclass=prevclass, \ @@ -159,212 +210,217 @@ static int next_def_nolb25(unicode_lb_info_t i, i->prevclass_min1=i->prevclass; i->prevclass=uclass; - if (uclass != UNICODE_LB_SP) + if (uclass.lb != UNICODE_LB_SP) i->prevclass_nsp=uclass; - if (uclass == UNICODE_LB_NU) + if (uclass.lb == UNICODE_LB_NU) i->next_handler=next_lb25_seennu; /* LB25 */ - if (prevclass == UNICODE_LB_SOT) + if (prevclass.lb == UNICODE_LB_SOT) { - if (uclass == UNICODE_LB_CM) /* LB9 */ - i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; - - return RESULT(UNICODE_LB_NONE); /* LB2 */ + return RESULT(UNICODE_LB_NONE, "LB2"); } - if (prevclass == UNICODE_LB_CR && uclass == UNICODE_LB_LF) - return RESULT(UNICODE_LB_NONE); /* LB5 */ + if (prevclass.lb == UNICODE_LB_BK) + return RESULT(UNICODE_LB_MANDATORY, "LB4"); - switch (prevclass) { - case UNICODE_LB_BK: + if (prevclass.lb == UNICODE_LB_CR && uclass.lb == UNICODE_LB_LF) + return RESULT(UNICODE_LB_NONE, "LB5"); + + + switch (prevclass.lb) { case UNICODE_LB_CR: case UNICODE_LB_LF: case UNICODE_LB_NL: + return RESULT(UNICODE_LB_MANDATORY, "LB5"); + } - if (uclass == UNICODE_LB_CM) - { - i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; - /* LB9 */ - } - return RESULT(UNICODE_LB_MANDATORY); /* LB4, LB5 */ + switch (uclass.lb) { + /* LB6: */ + case UNICODE_LB_BK: + case UNICODE_LB_CR: + case UNICODE_LB_LF: + case UNICODE_LB_NL: + /* LB7: */ case UNICODE_LB_SP: case UNICODE_LB_ZW: - if (uclass == UNICODE_LB_CM) - i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; - /* LB10 */ - break; + + return RESULT(UNICODE_LB_NONE, "LB6, LB7"); default: break; } - switch (uclass) { + if (prevclass_nsp.lb == UNICODE_LB_ZW) + return RESULT(UNICODE_LB_ALLOWED, "LB8"); - /* LB6: */ + + if (prevclass.lb == UNICODE_LB_ZWJ) + return RESULT(UNICODE_LB_NONE, "LB8a"); + + switch (prevclass.lb) { case UNICODE_LB_BK: case UNICODE_LB_CR: case UNICODE_LB_LF: case UNICODE_LB_NL: - - /* LB7: */ case UNICODE_LB_SP: case UNICODE_LB_ZW: - - return RESULT(UNICODE_LB_NONE); - default: break; - } + default: - if (prevclass_nsp == UNICODE_LB_ZW) - return RESULT(UNICODE_LB_ALLOWED); /* LB8 */ + if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ) + { + RESTORE; + return RESULT(UNICODE_LB_NONE, "LB9"); + } + } - if (uclass == UNICODE_LB_CM) + if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ) { - RESTORE; - return RESULT(UNICODE_LB_NONE); /* LB9 */ + uclass.lb=UNICODE_LB_AL; + RULE("LB10"); + } + if (prevclass.lb == UNICODE_LB_CM || prevclass.lb == UNICODE_LB_ZWJ) + { + prevclass.lb=UNICODE_LB_AL; + RULE("LB10"); } - if (prevclass == UNICODE_LB_WJ || uclass == UNICODE_LB_WJ) - return RESULT(UNICODE_LB_NONE); /* LB11 */ + if (prevclass.lb == UNICODE_LB_WJ || uclass.lb == UNICODE_LB_WJ) + return RESULT(UNICODE_LB_NONE, "LB11"); - if (prevclass == UNICODE_LB_GL) - return RESULT(UNICODE_LB_NONE); /* LB12 */ + if (prevclass.lb == UNICODE_LB_GL) + return RESULT(UNICODE_LB_NONE, "LB12"); - if (uclass == UNICODE_LB_GL && - prevclass != UNICODE_LB_SP && - prevclass != UNICODE_LB_BA && - prevclass != UNICODE_LB_HY) - return RESULT(UNICODE_LB_NONE); /* LB12a */ + if (uclass.lb == UNICODE_LB_GL && + prevclass.lb != UNICODE_LB_SP && + prevclass.lb != UNICODE_LB_BA && + prevclass.lb != UNICODE_LB_HY) + return RESULT(UNICODE_LB_NONE, "LB12a"); - switch (uclass) { - case UNICODE_LB_SY: - if (i->opts & UNICODE_LB_OPT_SYBREAK) - { - if (prevclass == UNICODE_LB_SP) - return RESULT(UNICODE_LB_ALLOWED); - } + if (uclass.lb == UNICODE_LB_SY && + i->opts & UNICODE_LB_OPT_SYBREAK) + { + if (prevclass.lb == UNICODE_LB_SP) + return RESULT(UNICODE_LB_ALLOWED, "LB13 (tailored)"); + } - case UNICODE_LB_CL: - case UNICODE_LB_CP: - case UNICODE_LB_EX: - case UNICODE_LB_IS: - return RESULT(UNICODE_LB_NONE); /* LB13 */ - default: - break; + if (prevclass.lb != UNICODE_LB_NU) { + switch (uclass.lb) { + case UNICODE_LB_CL: + case UNICODE_LB_CP: + case UNICODE_LB_IS: + case UNICODE_LB_SY: + return RESULT(UNICODE_LB_NONE, "LB13"); + default: + break; + } } - if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass == UNICODE_LB_SY) - switch (uclass) { + if (uclass.lb == UNICODE_LB_EX) + return RESULT(UNICODE_LB_NONE, "LB13"); + + if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass.lb == UNICODE_LB_SY) + switch (uclass.lb) { case UNICODE_LB_EX: case UNICODE_LB_AL: case UNICODE_LB_ID: - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB13"); } - if (prevclass_nsp == UNICODE_LB_OP) - return RESULT(UNICODE_LB_NONE); /* LB14 */ - - if (prevclass_nsp == UNICODE_LB_QU && uclass == UNICODE_LB_OP) - return RESULT(UNICODE_LB_NONE); /* LB15 */ + if (prevclass_nsp.lb == UNICODE_LB_OP) + return RESULT(UNICODE_LB_NONE, "LB14"); - if ((prevclass_nsp == UNICODE_LB_CL || prevclass_nsp == UNICODE_LB_CP) - && uclass == UNICODE_LB_NS) - return RESULT(UNICODE_LB_NONE); /* LB16 */ + if (prevclass_nsp.lb == UNICODE_LB_QU && uclass.lb == UNICODE_LB_OP) + return RESULT(UNICODE_LB_NONE, "LB15"); - if (prevclass_nsp == UNICODE_LB_B2 && uclass == UNICODE_LB_B2) - return RESULT(UNICODE_LB_NONE); /* LB17 */ + if ((prevclass_nsp.lb == UNICODE_LB_CL || prevclass_nsp.lb == UNICODE_LB_CP) + && uclass.lb == UNICODE_LB_NS) + return RESULT(UNICODE_LB_NONE, "LB16"); - if (prevclass == UNICODE_LB_SP) - return RESULT(UNICODE_LB_ALLOWED); /* LB18 */ + if (prevclass_nsp.lb == UNICODE_LB_B2 && uclass.lb == UNICODE_LB_B2) + return RESULT(UNICODE_LB_NONE, "LB17"); - if (uclass == UNICODE_LB_QU || prevclass == UNICODE_LB_QU) - return RESULT(UNICODE_LB_NONE); /* LB19 */ + if (prevclass.lb == UNICODE_LB_SP) + return RESULT(UNICODE_LB_ALLOWED, "LB18"); - if (uclass == UNICODE_LB_CB || prevclass == UNICODE_LB_CB) - return RESULT(UNICODE_LB_ALLOWED); /* LB20 */ + if (uclass.lb == UNICODE_LB_QU || prevclass.lb == UNICODE_LB_QU) + return RESULT(UNICODE_LB_NONE, "LB19"); - /* LB21: */ + if (uclass.lb == UNICODE_LB_CB || prevclass.lb == UNICODE_LB_CB) + return RESULT(UNICODE_LB_ALLOWED, "LB20"); - switch (uclass) { + switch (uclass.lb) { case UNICODE_LB_BA: case UNICODE_LB_HY: case UNICODE_LB_NS: - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB21"); default: break; } - if (prevclass == UNICODE_LB_BB) - return RESULT(UNICODE_LB_NONE); + if (prevclass.lb == UNICODE_LB_BB) + return RESULT(UNICODE_LB_NONE, "LB21"); - /* LB21a: */ - if (prevclass_min1 == UNICODE_LB_HL && - (prevclass == UNICODE_LB_HY || prevclass == UNICODE_LB_BA)) - return RESULT(UNICODE_LB_NONE); + if (prevclass_min1.lb == UNICODE_LB_HL && + (prevclass.lb == UNICODE_LB_HY || prevclass.lb == UNICODE_LB_BA)) + return RESULT(UNICODE_LB_NONE, "LB21a"); - /* LB21b: */ - if (prevclass == UNICODE_LB_SY && uclass == UNICODE_LB_HL) - return RESULT(UNICODE_LB_NONE); + if (prevclass.lb == UNICODE_LB_SY && uclass.lb == UNICODE_LB_HL) + return RESULT(UNICODE_LB_NONE, "LB21b"); - if (uclass == UNICODE_LB_IN) - switch (prevclass) { - case UNICODE_LB_AL: - case UNICODE_LB_EX: - case UNICODE_LB_HL: - case UNICODE_LB_ID: - case UNICODE_LB_IN: - case UNICODE_LB_NU: - return RESULT(UNICODE_LB_NONE); /* LB22 */ - default: - break; - } + if (uclass.lb == UNICODE_LB_IN) + return RESULT(UNICODE_LB_NONE, "LB22"); + + if (prevclass.lb == UNICODE_LB_AL && uclass.lb == UNICODE_LB_NU) + return RESULT(UNICODE_LB_NONE, "LB23"); + if (prevclass.lb == UNICODE_LB_HL && uclass.lb == UNICODE_LB_NU) + return RESULT(UNICODE_LB_NONE, "LB23"); + + if (prevclass.lb == UNICODE_LB_NU && uclass.lb == UNICODE_LB_AL) + return RESULT(UNICODE_LB_NONE, "LB23"); + if (prevclass.lb == UNICODE_LB_NU && uclass.lb == UNICODE_LB_HL) + return RESULT(UNICODE_LB_NONE, "LB23"); - if (prevclass == UNICODE_LB_ID && uclass == UNICODE_LB_PO) - return RESULT(UNICODE_LB_NONE); /* LB23 */ - if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU) - return RESULT(UNICODE_LB_NONE); /* LB23 */ - if (prevclass == UNICODE_LB_HL && uclass == UNICODE_LB_NU) - return RESULT(UNICODE_LB_NONE); /* LB23 */ - - if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL) - return RESULT(UNICODE_LB_NONE); /* LB23 */ - if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_HL) - return RESULT(UNICODE_LB_NONE); /* LB23 */ - - - if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID) - return RESULT(UNICODE_LB_NONE); /* LB24 */ - if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL) - return RESULT(UNICODE_LB_NONE); /* LB24 */ - if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_HL) - return RESULT(UNICODE_LB_NONE); /* LB24 */ - if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL) - return RESULT(UNICODE_LB_NONE); /* LB24 */ - if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_HL) - return RESULT(UNICODE_LB_NONE); /* LB24 */ - - if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR) - switch (prevclass) { + if (prevclass.lb == UNICODE_LB_PR && + (uclass.lb == UNICODE_LB_ID || uclass.lb == UNICODE_LB_EB || + uclass.lb == UNICODE_LB_EM)) + return RESULT(UNICODE_LB_NONE, "LB23a"); + + if ((prevclass.lb == UNICODE_LB_ID || prevclass.lb == UNICODE_LB_EB || + prevclass.lb == UNICODE_LB_EM) && + uclass.lb == UNICODE_LB_PO) + return RESULT(UNICODE_LB_NONE, "LB23a"); + + if ((prevclass.lb == UNICODE_LB_PR || prevclass.lb == UNICODE_LB_PO) && + (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL)) + return RESULT(UNICODE_LB_NONE, "LB24"); + + if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL) && + (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO)) + return RESULT(UNICODE_LB_NONE, "LB24"); + + if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass.lb == UNICODE_LB_PR) + switch (prevclass.lb) { case UNICODE_LB_PR: case UNICODE_LB_AL: case UNICODE_LB_ID: - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB24 (tailored)"); } - if (!nolb25 && - (prevclass == UNICODE_LB_PR || prevclass == UNICODE_LB_PO)) + if (!i->nolb25 && + (prevclass.lb == UNICODE_LB_PR || prevclass.lb == UNICODE_LB_PO)) { - if (uclass == UNICODE_LB_NU) - return RESULT(UNICODE_LB_NONE); /* LB25 */ + if (uclass.lb == UNICODE_LB_NU) + return RESULT(UNICODE_LB_NONE, "LB25"); - if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY) + if (uclass.lb == UNICODE_LB_OP || uclass.lb == UNICODE_LB_HY) { RESTORE; - + RULE("LB25 (start)"); i->savedclass=uclass; i->savedcmcnt=0; i->next_handler=next_lb25_seenophy; @@ -373,81 +429,93 @@ static int next_def_nolb25(unicode_lb_info_t i, } } - if ((prevclass == UNICODE_LB_OP || prevclass == UNICODE_LB_HY) && - uclass == UNICODE_LB_NU) - return RESULT(UNICODE_LB_NONE); /* LB25 */ + if ((prevclass.lb == UNICODE_LB_OP || prevclass.lb == UNICODE_LB_HY) && + uclass.lb == UNICODE_LB_NU) + return RESULT(UNICODE_LB_NONE, "LB25"); /*****/ - if (prevclass == UNICODE_LB_JL) - switch (uclass) { + if (prevclass.lb == UNICODE_LB_JL) + switch (uclass.lb) { case UNICODE_LB_JL: case UNICODE_LB_JV: case UNICODE_LB_H2: case UNICODE_LB_H3: - return RESULT(UNICODE_LB_NONE); /* LB26 */ + return RESULT(UNICODE_LB_NONE, "LB26"); default: break; } - if ((prevclass == UNICODE_LB_JV || - prevclass == UNICODE_LB_H2) && - (uclass == UNICODE_LB_JV || - uclass == UNICODE_LB_JT)) - return RESULT(UNICODE_LB_NONE); /* LB26 */ + if ((prevclass.lb == UNICODE_LB_JV || + prevclass.lb == UNICODE_LB_H2) && + (uclass.lb == UNICODE_LB_JV || + uclass.lb == UNICODE_LB_JT)) + return RESULT(UNICODE_LB_NONE, "LB26"); - if ((prevclass == UNICODE_LB_JT || - prevclass == UNICODE_LB_H3) && - uclass == UNICODE_LB_JT) - return RESULT(UNICODE_LB_NONE); /* LB26 */ + if ((prevclass.lb == UNICODE_LB_JT || + prevclass.lb == UNICODE_LB_H3) && + uclass.lb == UNICODE_LB_JT) + return RESULT(UNICODE_LB_NONE, "LB26"); - switch (prevclass) { + switch (prevclass.lb) { case UNICODE_LB_JL: case UNICODE_LB_JV: case UNICODE_LB_JT: case UNICODE_LB_H2: case UNICODE_LB_H3: - if (uclass == UNICODE_LB_IN || uclass == UNICODE_LB_PO) - return RESULT(UNICODE_LB_NONE); /* LB27 */ + if (uclass.lb == UNICODE_LB_IN || uclass.lb == UNICODE_LB_PO) + return RESULT(UNICODE_LB_NONE, "LB27"); default: break; } - switch (uclass) { + switch (uclass.lb) { case UNICODE_LB_JL: case UNICODE_LB_JV: case UNICODE_LB_JT: case UNICODE_LB_H2: case UNICODE_LB_H3: - if (prevclass == UNICODE_LB_PR) - return RESULT(UNICODE_LB_NONE); /* LB27 */ + if (prevclass.lb == UNICODE_LB_PR) + return RESULT(UNICODE_LB_NONE, "LB27"); default: break; } - if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL) - && (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL)) - return RESULT(UNICODE_LB_NONE); /* LB28 */ - - if (prevclass == UNICODE_LB_IS && - (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL)) - return RESULT(UNICODE_LB_NONE); /* LB29 */ - - if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL - || prevclass == UNICODE_LB_NU) && - uclass == UNICODE_LB_OP) - return RESULT(UNICODE_LB_NONE); /* LB30 */ - - if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL - || uclass == UNICODE_LB_NU) && - prevclass == UNICODE_LB_CP) - return RESULT(UNICODE_LB_NONE); /* LB30 */ + if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL) + && (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL)) + return RESULT(UNICODE_LB_NONE, "LB28"); + + if (prevclass.lb == UNICODE_LB_IS && + (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL)) + return RESULT(UNICODE_LB_NONE, "LB29"); + + if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL + || prevclass.lb == UNICODE_LB_NU) && + (uclass.lb == UNICODE_LB_OP && uclass.ew != UNICODE_EASTASIA_F + && uclass.ew != UNICODE_EASTASIA_W + && uclass.ew != UNICODE_EASTASIA_H)) + return RESULT(UNICODE_LB_NONE, "LB30"); + + if ((uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL + || uclass.lb == UNICODE_LB_NU) && + (prevclass.lb == UNICODE_LB_CP + && prevclass.ew != UNICODE_EASTASIA_F + && prevclass.ew != UNICODE_EASTASIA_W + && prevclass.ew != UNICODE_EASTASIA_H)) + return RESULT(UNICODE_LB_NONE, "LB30"); + + if (uclass.lb == UNICODE_LB_RI && prevclass.lb == UNICODE_LB_RI && + !i->nolb30a) + { + i->next_handler=next_def_seen_lb30a; + return RESULT(UNICODE_LB_NONE, "LB30a"); + } - if (uclass == UNICODE_LB_RI && prevclass == UNICODE_LB_RI) - return RESULT(UNICODE_LB_NONE); /* LB30a */ + if (prevclass.lb == UNICODE_LB_EB && uclass.lb == UNICODE_LB_EM) + return RESULT(UNICODE_LB_NONE, "LB30b"); - return RESULT(UNICODE_LB_ALLOWED); /* LB31 */ + return RESULT(UNICODE_LB_ALLOWED, "LB31"); } /* @@ -459,7 +527,7 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i) { int rc; - /*uint8_t class=i->savedclass;*/ + /*state_t class=i->savedclass;*/ int nolb25_flag=1; i->next_handler=next_def; @@ -467,7 +535,9 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i) do { - rc=next_def_nolb25(i, i->savedclass, nolb25_flag); + next_def_reset_common(i); + i->nolb25=nolb25_flag; + rc=next_def_common(i, i->savedclass); if (rc) return rc; @@ -484,29 +554,29 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i) */ static int next_lb25_seenophy(unicode_lb_info_t i, - uint8_t uclass) + state_t uclass) { int rc; - if (uclass == UNICODE_LB_CM) + if (uclass.lb == UNICODE_LB_CM) { ++i->savedcmcnt; /* Keep track of CMs, and try again */ return 0; } - if (uclass != UNICODE_LB_NU) + if (uclass.lb != UNICODE_LB_NU) { rc=unwind_lb25_seenophy(i); if (rc) return rc; - return next_def_nolb25(i, uclass, 0); + return next_def(i, uclass); } do { - rc=RESULT(UNICODE_LB_NONE); /* (OP|HY) feedback */ + rc=RESULT(UNICODE_LB_NONE, "LB25 (OP|HY)"); /* (OP|HY) feedback */ if (rc) return rc; @@ -515,7 +585,7 @@ static int next_lb25_seenophy(unicode_lb_info_t i, i->next_handler=next_lb25_seennu; i->end_handler=end_def; i->prevclass=i->prevclass_nsp=uclass; - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB25"); } /* @@ -534,33 +604,33 @@ static int end_lb25_seenophy(unicode_lb_info_t i) /* ** Seen an NU, modified LB25 regexp. */ -static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass) +static int next_lb25_seennu(unicode_lb_info_t i, state_t uclass) { - if (uclass == UNICODE_LB_NU || uclass == UNICODE_LB_SY || - uclass == UNICODE_LB_IS) + if (uclass.lb == UNICODE_LB_NU || uclass.lb == UNICODE_LB_SY || + uclass.lb == UNICODE_LB_IS) { i->prevclass=i->prevclass_nsp=uclass; - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB25"); } - if (uclass == UNICODE_LB_CM) - return RESULT(UNICODE_LB_NONE); /* LB9 */ + if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ) + return RESULT(UNICODE_LB_NONE, "LB9 (LB25)"); - if (uclass == UNICODE_LB_CL || uclass == UNICODE_LB_CP) + if (uclass.lb == UNICODE_LB_CL || uclass.lb == UNICODE_LB_CP) { i->prevclass=i->prevclass_nsp=uclass; i->next_handler=next_lb25_seennuclcp; i->end_handler=end_def; - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB25"); } i->next_handler=next_def; i->end_handler=end_def; - if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO) + if (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO) { i->prevclass=i->prevclass_nsp=uclass; - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB25"); } return next_def(i, uclass); /* Not a prefix, process normally */ @@ -569,19 +639,19 @@ static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass) /* ** Seen CL|CP, in the modified LB25 regexp. */ -static int next_lb25_seennuclcp(unicode_lb_info_t i, uint8_t uclass) +static int next_lb25_seennuclcp(unicode_lb_info_t i, state_t uclass) { - if (uclass == UNICODE_LB_CM) - return RESULT(UNICODE_LB_NONE); /* LB9 */ + if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ) + return RESULT(UNICODE_LB_NONE, "LB9 (LB25)"); i->next_handler=next_def; i->end_handler=end_def; - if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO) + if (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO) { i->prevclass=i->prevclass_nsp=uclass; - return RESULT(UNICODE_LB_NONE); + return RESULT(UNICODE_LB_NONE, "LB25"); } return next_def(i, uclass); |
