summaryrefslogtreecommitdiffstats
path: root/unicode/unicode_linebreak.c
diff options
context:
space:
mode:
Diffstat (limited to 'unicode/unicode_linebreak.c')
-rw-r--r--unicode/unicode_linebreak.c516
1 files changed, 293 insertions, 223 deletions
diff --git a/unicode/unicode_linebreak.c b/unicode/unicode_linebreak.c
index a843c6c..9d1e73c 100644
--- a/unicode/unicode_linebreak.c
+++ b/unicode/unicode_linebreak.c
@@ -19,39 +19,55 @@
#define UNICODE_LB_SOT 0xFF
+struct state_t {
+ uint8_t lb;
+ uint8_t ew;
+};
+
+typedef struct state_t state_t;
+
struct unicode_lb_info {
int (*cb_func)(int, void *);
void *cb_arg;
int opts;
- uint8_t savedclass;
+ state_t savedclass;
size_t savedcmcnt;
- uint8_t prevclass_min1;
- uint8_t prevclass;
- uint8_t prevclass_nsp;
+ state_t prevclass_min1;
+ state_t prevclass;
+ state_t prevclass_nsp;
- int (*next_handler)(struct unicode_lb_info *, uint8_t);
+ /* Flag -- recursively invoked after discarding LB25 */
+ char nolb25;
+
+ /* Flag -- seen a pair of RIs */
+ char nolb30a;
+
+ int (*next_handler)(struct unicode_lb_info *, state_t);
int (*end_handler)(struct unicode_lb_info *);
};
/* http://www.unicode.org/reports/tr14/#Algorithm */
-static int next_def(unicode_lb_info_t, uint8_t);
+static int next_def(unicode_lb_info_t, state_t);
static int end_def(unicode_lb_info_t);
-static int next_lb25_seenophy(unicode_lb_info_t, uint8_t);
+static int next_lb25_seenophy(unicode_lb_info_t, state_t);
static int end_lb25_seenophy(unicode_lb_info_t);
-static int next_lb25_seennu(unicode_lb_info_t, uint8_t);
+static int next_lb25_seennu(unicode_lb_info_t, state_t);
-static int next_lb25_seennuclcp(unicode_lb_info_t, uint8_t);
+static int next_lb25_seennuclcp(unicode_lb_info_t, state_t);
static void unicode_lb_reset(unicode_lb_info_t i)
{
- i->prevclass_min1=i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT;
+ i->prevclass.lb=UNICODE_LB_SOT;
+ i->prevclass.ew=UNICODE_EASTASIA_N;
+
+ i->prevclass_min1=i->prevclass_nsp=i->prevclass;
i->next_handler=next_def;
i->end_handler=end_def;
}
@@ -88,7 +104,16 @@ static int end_def(unicode_lb_info_t i)
/* LB3 N/A */
return 0;
}
-#define RESULT(x) (*i->cb_func)((x), i->cb_arg)
+
+/* #define DEBUG_LB */
+
+#ifdef DEBUG_LB
+#define RULE(x) ( (void)printf("%s\n", x))
+#else
+#define RULE(x) ( (void)0 )
+#endif
+
+#define RESULT(x, msg) (RULE(msg),*i->cb_func)((x), i->cb_arg)
int unicode_lb_next_cnt(unicode_lb_info_t i,
const char32_t *chars,
@@ -121,36 +146,62 @@ int unicode_lb_lookup(char32_t ch)
int unicode_lb_next(unicode_lb_info_t i,
char32_t ch)
{
- return (*i->next_handler)(i, (i->opts & UNICODE_LB_OPT_DASHWJ) &&
- (ch == 0x2012 || ch == 0x2013)
- ? UNICODE_LB_WJ:unicode_lb_lookup(ch));
+ state_t c;
+
+ c.lb=unicode_lb_lookup(ch);
+ c.ew=unicode_eastasia(ch);
+
+ if ((i->opts & UNICODE_LB_OPT_DASHWJ) &&
+ (ch == 0x2012 || ch == 0x2013))
+ {
+ c.lb=UNICODE_LB_WJ;
+ }
+
+ return (*i->next_handler)(i, c);
}
-static int next_def_nolb25(unicode_lb_info_t i,
- uint8_t uclass,
- int nolb25);
+static int next_def_common(unicode_lb_info_t i,
+ state_t uclass);
+
+/*
+** Reset state for next_def_common.
+*/
+
+static void next_def_reset_common(unicode_lb_info_t i)
+{
+ i->nolb25=0;
+ i->nolb30a=0;
+}
/*
** Default logic for next unicode char.
*/
static int next_def(unicode_lb_info_t i,
- uint8_t uclass)
+ state_t uclass)
+{
+ next_def_reset_common(i);
+ return next_def_common(i, uclass);
+}
+
+static int next_def_seen_lb30a(unicode_lb_info_t i,
+ state_t uclass)
{
- return next_def_nolb25(i, uclass, 0);
+ i->next_handler=next_def;
+ next_def_reset_common(i);
+ i->nolb30a=1;
+ return next_def_common(i, uclass);
}
-static int next_def_nolb25(unicode_lb_info_t i,
- uint8_t uclass,
- /* Flag -- recursively invoked after discarding LB25 */
- int nolb25)
+static int next_def_common(unicode_lb_info_t i,
+ state_t uclass)
{
/* Retrieve the previous unicode character's linebreak class. */
- uint8_t prevclass_min1=i->prevclass_min1;
- uint8_t prevclass=i->prevclass;
- uint8_t prevclass_nsp=i->prevclass_nsp;
+ state_t prevclass_min1=i->prevclass_min1;
+ state_t prevclass=i->prevclass;
+ state_t prevclass_nsp=i->prevclass_nsp;
#define RESTORE (i->prevclass_min1=prevclass_min1, \
i->prevclass=prevclass, \
@@ -159,212 +210,217 @@ static int next_def_nolb25(unicode_lb_info_t i,
i->prevclass_min1=i->prevclass;
i->prevclass=uclass;
- if (uclass != UNICODE_LB_SP)
+ if (uclass.lb != UNICODE_LB_SP)
i->prevclass_nsp=uclass;
- if (uclass == UNICODE_LB_NU)
+ if (uclass.lb == UNICODE_LB_NU)
i->next_handler=next_lb25_seennu; /* LB25 */
- if (prevclass == UNICODE_LB_SOT)
+ if (prevclass.lb == UNICODE_LB_SOT)
{
- if (uclass == UNICODE_LB_CM) /* LB9 */
- i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
-
- return RESULT(UNICODE_LB_NONE); /* LB2 */
+ return RESULT(UNICODE_LB_NONE, "LB2");
}
- if (prevclass == UNICODE_LB_CR && uclass == UNICODE_LB_LF)
- return RESULT(UNICODE_LB_NONE); /* LB5 */
+ if (prevclass.lb == UNICODE_LB_BK)
+ return RESULT(UNICODE_LB_MANDATORY, "LB4");
- switch (prevclass) {
- case UNICODE_LB_BK:
+ if (prevclass.lb == UNICODE_LB_CR && uclass.lb == UNICODE_LB_LF)
+ return RESULT(UNICODE_LB_NONE, "LB5");
+
+
+ switch (prevclass.lb) {
case UNICODE_LB_CR:
case UNICODE_LB_LF:
case UNICODE_LB_NL:
+ return RESULT(UNICODE_LB_MANDATORY, "LB5");
+ }
- if (uclass == UNICODE_LB_CM)
- {
- i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
- /* LB9 */
- }
- return RESULT(UNICODE_LB_MANDATORY); /* LB4, LB5 */
+ switch (uclass.lb) {
+ /* LB6: */
+ case UNICODE_LB_BK:
+ case UNICODE_LB_CR:
+ case UNICODE_LB_LF:
+ case UNICODE_LB_NL:
+ /* LB7: */
case UNICODE_LB_SP:
case UNICODE_LB_ZW:
- if (uclass == UNICODE_LB_CM)
- i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL;
- /* LB10 */
- break;
+
+ return RESULT(UNICODE_LB_NONE, "LB6, LB7");
default:
break;
}
- switch (uclass) {
+ if (prevclass_nsp.lb == UNICODE_LB_ZW)
+ return RESULT(UNICODE_LB_ALLOWED, "LB8");
- /* LB6: */
+
+ if (prevclass.lb == UNICODE_LB_ZWJ)
+ return RESULT(UNICODE_LB_NONE, "LB8a");
+
+ switch (prevclass.lb) {
case UNICODE_LB_BK:
case UNICODE_LB_CR:
case UNICODE_LB_LF:
case UNICODE_LB_NL:
-
- /* LB7: */
case UNICODE_LB_SP:
case UNICODE_LB_ZW:
-
- return RESULT(UNICODE_LB_NONE);
- default:
break;
- }
+ default:
- if (prevclass_nsp == UNICODE_LB_ZW)
- return RESULT(UNICODE_LB_ALLOWED); /* LB8 */
+ if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ)
+ {
+ RESTORE;
+ return RESULT(UNICODE_LB_NONE, "LB9");
+ }
+ }
- if (uclass == UNICODE_LB_CM)
+ if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ)
{
- RESTORE;
- return RESULT(UNICODE_LB_NONE); /* LB9 */
+ uclass.lb=UNICODE_LB_AL;
+ RULE("LB10");
+ }
+ if (prevclass.lb == UNICODE_LB_CM || prevclass.lb == UNICODE_LB_ZWJ)
+ {
+ prevclass.lb=UNICODE_LB_AL;
+ RULE("LB10");
}
- if (prevclass == UNICODE_LB_WJ || uclass == UNICODE_LB_WJ)
- return RESULT(UNICODE_LB_NONE); /* LB11 */
+ if (prevclass.lb == UNICODE_LB_WJ || uclass.lb == UNICODE_LB_WJ)
+ return RESULT(UNICODE_LB_NONE, "LB11");
- if (prevclass == UNICODE_LB_GL)
- return RESULT(UNICODE_LB_NONE); /* LB12 */
+ if (prevclass.lb == UNICODE_LB_GL)
+ return RESULT(UNICODE_LB_NONE, "LB12");
- if (uclass == UNICODE_LB_GL &&
- prevclass != UNICODE_LB_SP &&
- prevclass != UNICODE_LB_BA &&
- prevclass != UNICODE_LB_HY)
- return RESULT(UNICODE_LB_NONE); /* LB12a */
+ if (uclass.lb == UNICODE_LB_GL &&
+ prevclass.lb != UNICODE_LB_SP &&
+ prevclass.lb != UNICODE_LB_BA &&
+ prevclass.lb != UNICODE_LB_HY)
+ return RESULT(UNICODE_LB_NONE, "LB12a");
- switch (uclass) {
- case UNICODE_LB_SY:
- if (i->opts & UNICODE_LB_OPT_SYBREAK)
- {
- if (prevclass == UNICODE_LB_SP)
- return RESULT(UNICODE_LB_ALLOWED);
- }
+ if (uclass.lb == UNICODE_LB_SY &&
+ i->opts & UNICODE_LB_OPT_SYBREAK)
+ {
+ if (prevclass.lb == UNICODE_LB_SP)
+ return RESULT(UNICODE_LB_ALLOWED, "LB13 (tailored)");
+ }
- case UNICODE_LB_CL:
- case UNICODE_LB_CP:
- case UNICODE_LB_EX:
- case UNICODE_LB_IS:
- return RESULT(UNICODE_LB_NONE); /* LB13 */
- default:
- break;
+ if (prevclass.lb != UNICODE_LB_NU) {
+ switch (uclass.lb) {
+ case UNICODE_LB_CL:
+ case UNICODE_LB_CP:
+ case UNICODE_LB_IS:
+ case UNICODE_LB_SY:
+ return RESULT(UNICODE_LB_NONE, "LB13");
+ default:
+ break;
+ }
}
- if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass == UNICODE_LB_SY)
- switch (uclass) {
+ if (uclass.lb == UNICODE_LB_EX)
+ return RESULT(UNICODE_LB_NONE, "LB13");
+
+ if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass.lb == UNICODE_LB_SY)
+ switch (uclass.lb) {
case UNICODE_LB_EX:
case UNICODE_LB_AL:
case UNICODE_LB_ID:
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB13");
}
- if (prevclass_nsp == UNICODE_LB_OP)
- return RESULT(UNICODE_LB_NONE); /* LB14 */
-
- if (prevclass_nsp == UNICODE_LB_QU && uclass == UNICODE_LB_OP)
- return RESULT(UNICODE_LB_NONE); /* LB15 */
+ if (prevclass_nsp.lb == UNICODE_LB_OP)
+ return RESULT(UNICODE_LB_NONE, "LB14");
- if ((prevclass_nsp == UNICODE_LB_CL || prevclass_nsp == UNICODE_LB_CP)
- && uclass == UNICODE_LB_NS)
- return RESULT(UNICODE_LB_NONE); /* LB16 */
+ if (prevclass_nsp.lb == UNICODE_LB_QU && uclass.lb == UNICODE_LB_OP)
+ return RESULT(UNICODE_LB_NONE, "LB15");
- if (prevclass_nsp == UNICODE_LB_B2 && uclass == UNICODE_LB_B2)
- return RESULT(UNICODE_LB_NONE); /* LB17 */
+ if ((prevclass_nsp.lb == UNICODE_LB_CL || prevclass_nsp.lb == UNICODE_LB_CP)
+ && uclass.lb == UNICODE_LB_NS)
+ return RESULT(UNICODE_LB_NONE, "LB16");
- if (prevclass == UNICODE_LB_SP)
- return RESULT(UNICODE_LB_ALLOWED); /* LB18 */
+ if (prevclass_nsp.lb == UNICODE_LB_B2 && uclass.lb == UNICODE_LB_B2)
+ return RESULT(UNICODE_LB_NONE, "LB17");
- if (uclass == UNICODE_LB_QU || prevclass == UNICODE_LB_QU)
- return RESULT(UNICODE_LB_NONE); /* LB19 */
+ if (prevclass.lb == UNICODE_LB_SP)
+ return RESULT(UNICODE_LB_ALLOWED, "LB18");
- if (uclass == UNICODE_LB_CB || prevclass == UNICODE_LB_CB)
- return RESULT(UNICODE_LB_ALLOWED); /* LB20 */
+ if (uclass.lb == UNICODE_LB_QU || prevclass.lb == UNICODE_LB_QU)
+ return RESULT(UNICODE_LB_NONE, "LB19");
- /* LB21: */
+ if (uclass.lb == UNICODE_LB_CB || prevclass.lb == UNICODE_LB_CB)
+ return RESULT(UNICODE_LB_ALLOWED, "LB20");
- switch (uclass) {
+ switch (uclass.lb) {
case UNICODE_LB_BA:
case UNICODE_LB_HY:
case UNICODE_LB_NS:
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB21");
default:
break;
}
- if (prevclass == UNICODE_LB_BB)
- return RESULT(UNICODE_LB_NONE);
+ if (prevclass.lb == UNICODE_LB_BB)
+ return RESULT(UNICODE_LB_NONE, "LB21");
- /* LB21a: */
- if (prevclass_min1 == UNICODE_LB_HL &&
- (prevclass == UNICODE_LB_HY || prevclass == UNICODE_LB_BA))
- return RESULT(UNICODE_LB_NONE);
+ if (prevclass_min1.lb == UNICODE_LB_HL &&
+ (prevclass.lb == UNICODE_LB_HY || prevclass.lb == UNICODE_LB_BA))
+ return RESULT(UNICODE_LB_NONE, "LB21a");
- /* LB21b: */
- if (prevclass == UNICODE_LB_SY && uclass == UNICODE_LB_HL)
- return RESULT(UNICODE_LB_NONE);
+ if (prevclass.lb == UNICODE_LB_SY && uclass.lb == UNICODE_LB_HL)
+ return RESULT(UNICODE_LB_NONE, "LB21b");
- if (uclass == UNICODE_LB_IN)
- switch (prevclass) {
- case UNICODE_LB_AL:
- case UNICODE_LB_EX:
- case UNICODE_LB_HL:
- case UNICODE_LB_ID:
- case UNICODE_LB_IN:
- case UNICODE_LB_NU:
- return RESULT(UNICODE_LB_NONE); /* LB22 */
- default:
- break;
- }
+ if (uclass.lb == UNICODE_LB_IN)
+ return RESULT(UNICODE_LB_NONE, "LB22");
+
+ if (prevclass.lb == UNICODE_LB_AL && uclass.lb == UNICODE_LB_NU)
+ return RESULT(UNICODE_LB_NONE, "LB23");
+ if (prevclass.lb == UNICODE_LB_HL && uclass.lb == UNICODE_LB_NU)
+ return RESULT(UNICODE_LB_NONE, "LB23");
+
+ if (prevclass.lb == UNICODE_LB_NU && uclass.lb == UNICODE_LB_AL)
+ return RESULT(UNICODE_LB_NONE, "LB23");
+ if (prevclass.lb == UNICODE_LB_NU && uclass.lb == UNICODE_LB_HL)
+ return RESULT(UNICODE_LB_NONE, "LB23");
- if (prevclass == UNICODE_LB_ID && uclass == UNICODE_LB_PO)
- return RESULT(UNICODE_LB_NONE); /* LB23 */
- if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU)
- return RESULT(UNICODE_LB_NONE); /* LB23 */
- if (prevclass == UNICODE_LB_HL && uclass == UNICODE_LB_NU)
- return RESULT(UNICODE_LB_NONE); /* LB23 */
-
- if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL)
- return RESULT(UNICODE_LB_NONE); /* LB23 */
- if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_HL)
- return RESULT(UNICODE_LB_NONE); /* LB23 */
-
-
- if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID)
- return RESULT(UNICODE_LB_NONE); /* LB24 */
- if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL)
- return RESULT(UNICODE_LB_NONE); /* LB24 */
- if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_HL)
- return RESULT(UNICODE_LB_NONE); /* LB24 */
- if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL)
- return RESULT(UNICODE_LB_NONE); /* LB24 */
- if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_HL)
- return RESULT(UNICODE_LB_NONE); /* LB24 */
-
- if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR)
- switch (prevclass) {
+ if (prevclass.lb == UNICODE_LB_PR &&
+ (uclass.lb == UNICODE_LB_ID || uclass.lb == UNICODE_LB_EB ||
+ uclass.lb == UNICODE_LB_EM))
+ return RESULT(UNICODE_LB_NONE, "LB23a");
+
+ if ((prevclass.lb == UNICODE_LB_ID || prevclass.lb == UNICODE_LB_EB ||
+ prevclass.lb == UNICODE_LB_EM) &&
+ uclass.lb == UNICODE_LB_PO)
+ return RESULT(UNICODE_LB_NONE, "LB23a");
+
+ if ((prevclass.lb == UNICODE_LB_PR || prevclass.lb == UNICODE_LB_PO) &&
+ (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL))
+ return RESULT(UNICODE_LB_NONE, "LB24");
+
+ if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL) &&
+ (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO))
+ return RESULT(UNICODE_LB_NONE, "LB24");
+
+ if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass.lb == UNICODE_LB_PR)
+ switch (prevclass.lb) {
case UNICODE_LB_PR:
case UNICODE_LB_AL:
case UNICODE_LB_ID:
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB24 (tailored)");
}
- if (!nolb25 &&
- (prevclass == UNICODE_LB_PR || prevclass == UNICODE_LB_PO))
+ if (!i->nolb25 &&
+ (prevclass.lb == UNICODE_LB_PR || prevclass.lb == UNICODE_LB_PO))
{
- if (uclass == UNICODE_LB_NU)
- return RESULT(UNICODE_LB_NONE); /* LB25 */
+ if (uclass.lb == UNICODE_LB_NU)
+ return RESULT(UNICODE_LB_NONE, "LB25");
- if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY)
+ if (uclass.lb == UNICODE_LB_OP || uclass.lb == UNICODE_LB_HY)
{
RESTORE;
-
+ RULE("LB25 (start)");
i->savedclass=uclass;
i->savedcmcnt=0;
i->next_handler=next_lb25_seenophy;
@@ -373,81 +429,93 @@ static int next_def_nolb25(unicode_lb_info_t i,
}
}
- if ((prevclass == UNICODE_LB_OP || prevclass == UNICODE_LB_HY) &&
- uclass == UNICODE_LB_NU)
- return RESULT(UNICODE_LB_NONE); /* LB25 */
+ if ((prevclass.lb == UNICODE_LB_OP || prevclass.lb == UNICODE_LB_HY) &&
+ uclass.lb == UNICODE_LB_NU)
+ return RESULT(UNICODE_LB_NONE, "LB25");
/*****/
- if (prevclass == UNICODE_LB_JL)
- switch (uclass) {
+ if (prevclass.lb == UNICODE_LB_JL)
+ switch (uclass.lb) {
case UNICODE_LB_JL:
case UNICODE_LB_JV:
case UNICODE_LB_H2:
case UNICODE_LB_H3:
- return RESULT(UNICODE_LB_NONE); /* LB26 */
+ return RESULT(UNICODE_LB_NONE, "LB26");
default:
break;
}
- if ((prevclass == UNICODE_LB_JV ||
- prevclass == UNICODE_LB_H2) &&
- (uclass == UNICODE_LB_JV ||
- uclass == UNICODE_LB_JT))
- return RESULT(UNICODE_LB_NONE); /* LB26 */
+ if ((prevclass.lb == UNICODE_LB_JV ||
+ prevclass.lb == UNICODE_LB_H2) &&
+ (uclass.lb == UNICODE_LB_JV ||
+ uclass.lb == UNICODE_LB_JT))
+ return RESULT(UNICODE_LB_NONE, "LB26");
- if ((prevclass == UNICODE_LB_JT ||
- prevclass == UNICODE_LB_H3) &&
- uclass == UNICODE_LB_JT)
- return RESULT(UNICODE_LB_NONE); /* LB26 */
+ if ((prevclass.lb == UNICODE_LB_JT ||
+ prevclass.lb == UNICODE_LB_H3) &&
+ uclass.lb == UNICODE_LB_JT)
+ return RESULT(UNICODE_LB_NONE, "LB26");
- switch (prevclass) {
+ switch (prevclass.lb) {
case UNICODE_LB_JL:
case UNICODE_LB_JV:
case UNICODE_LB_JT:
case UNICODE_LB_H2:
case UNICODE_LB_H3:
- if (uclass == UNICODE_LB_IN || uclass == UNICODE_LB_PO)
- return RESULT(UNICODE_LB_NONE); /* LB27 */
+ if (uclass.lb == UNICODE_LB_IN || uclass.lb == UNICODE_LB_PO)
+ return RESULT(UNICODE_LB_NONE, "LB27");
default:
break;
}
- switch (uclass) {
+ switch (uclass.lb) {
case UNICODE_LB_JL:
case UNICODE_LB_JV:
case UNICODE_LB_JT:
case UNICODE_LB_H2:
case UNICODE_LB_H3:
- if (prevclass == UNICODE_LB_PR)
- return RESULT(UNICODE_LB_NONE); /* LB27 */
+ if (prevclass.lb == UNICODE_LB_PR)
+ return RESULT(UNICODE_LB_NONE, "LB27");
default:
break;
}
- if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL)
- && (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL))
- return RESULT(UNICODE_LB_NONE); /* LB28 */
-
- if (prevclass == UNICODE_LB_IS &&
- (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL))
- return RESULT(UNICODE_LB_NONE); /* LB29 */
-
- if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL
- || prevclass == UNICODE_LB_NU) &&
- uclass == UNICODE_LB_OP)
- return RESULT(UNICODE_LB_NONE); /* LB30 */
-
- if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL
- || uclass == UNICODE_LB_NU) &&
- prevclass == UNICODE_LB_CP)
- return RESULT(UNICODE_LB_NONE); /* LB30 */
+ if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL)
+ && (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL))
+ return RESULT(UNICODE_LB_NONE, "LB28");
+
+ if (prevclass.lb == UNICODE_LB_IS &&
+ (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL))
+ return RESULT(UNICODE_LB_NONE, "LB29");
+
+ if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL
+ || prevclass.lb == UNICODE_LB_NU) &&
+ (uclass.lb == UNICODE_LB_OP && uclass.ew != UNICODE_EASTASIA_F
+ && uclass.ew != UNICODE_EASTASIA_W
+ && uclass.ew != UNICODE_EASTASIA_H))
+ return RESULT(UNICODE_LB_NONE, "LB30");
+
+ if ((uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL
+ || uclass.lb == UNICODE_LB_NU) &&
+ (prevclass.lb == UNICODE_LB_CP
+ && prevclass.ew != UNICODE_EASTASIA_F
+ && prevclass.ew != UNICODE_EASTASIA_W
+ && prevclass.ew != UNICODE_EASTASIA_H))
+ return RESULT(UNICODE_LB_NONE, "LB30");
+
+ if (uclass.lb == UNICODE_LB_RI && prevclass.lb == UNICODE_LB_RI &&
+ !i->nolb30a)
+ {
+ i->next_handler=next_def_seen_lb30a;
+ return RESULT(UNICODE_LB_NONE, "LB30a");
+ }
- if (uclass == UNICODE_LB_RI && prevclass == UNICODE_LB_RI)
- return RESULT(UNICODE_LB_NONE); /* LB30a */
+ if (prevclass.lb == UNICODE_LB_EB && uclass.lb == UNICODE_LB_EM)
+ return RESULT(UNICODE_LB_NONE, "LB30b");
- return RESULT(UNICODE_LB_ALLOWED); /* LB31 */
+ return RESULT(UNICODE_LB_ALLOWED, "LB31");
}
/*
@@ -459,7 +527,7 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i)
{
int rc;
- /*uint8_t class=i->savedclass;*/
+ /*state_t class=i->savedclass;*/
int nolb25_flag=1;
i->next_handler=next_def;
@@ -467,7 +535,9 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i)
do
{
- rc=next_def_nolb25(i, i->savedclass, nolb25_flag);
+ next_def_reset_common(i);
+ i->nolb25=nolb25_flag;
+ rc=next_def_common(i, i->savedclass);
if (rc)
return rc;
@@ -484,29 +554,29 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i)
*/
static int next_lb25_seenophy(unicode_lb_info_t i,
- uint8_t uclass)
+ state_t uclass)
{
int rc;
- if (uclass == UNICODE_LB_CM)
+ if (uclass.lb == UNICODE_LB_CM)
{
++i->savedcmcnt; /* Keep track of CMs, and try again */
return 0;
}
- if (uclass != UNICODE_LB_NU)
+ if (uclass.lb != UNICODE_LB_NU)
{
rc=unwind_lb25_seenophy(i);
if (rc)
return rc;
- return next_def_nolb25(i, uclass, 0);
+ return next_def(i, uclass);
}
do
{
- rc=RESULT(UNICODE_LB_NONE); /* (OP|HY) feedback */
+ rc=RESULT(UNICODE_LB_NONE, "LB25 (OP|HY)"); /* (OP|HY) feedback */
if (rc)
return rc;
@@ -515,7 +585,7 @@ static int next_lb25_seenophy(unicode_lb_info_t i,
i->next_handler=next_lb25_seennu;
i->end_handler=end_def;
i->prevclass=i->prevclass_nsp=uclass;
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB25");
}
/*
@@ -534,33 +604,33 @@ static int end_lb25_seenophy(unicode_lb_info_t i)
/*
** Seen an NU, modified LB25 regexp.
*/
-static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass)
+static int next_lb25_seennu(unicode_lb_info_t i, state_t uclass)
{
- if (uclass == UNICODE_LB_NU || uclass == UNICODE_LB_SY ||
- uclass == UNICODE_LB_IS)
+ if (uclass.lb == UNICODE_LB_NU || uclass.lb == UNICODE_LB_SY ||
+ uclass.lb == UNICODE_LB_IS)
{
i->prevclass=i->prevclass_nsp=uclass;
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB25");
}
- if (uclass == UNICODE_LB_CM)
- return RESULT(UNICODE_LB_NONE); /* LB9 */
+ if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ)
+ return RESULT(UNICODE_LB_NONE, "LB9 (LB25)");
- if (uclass == UNICODE_LB_CL || uclass == UNICODE_LB_CP)
+ if (uclass.lb == UNICODE_LB_CL || uclass.lb == UNICODE_LB_CP)
{
i->prevclass=i->prevclass_nsp=uclass;
i->next_handler=next_lb25_seennuclcp;
i->end_handler=end_def;
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB25");
}
i->next_handler=next_def;
i->end_handler=end_def;
- if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO)
+ if (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO)
{
i->prevclass=i->prevclass_nsp=uclass;
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB25");
}
return next_def(i, uclass); /* Not a prefix, process normally */
@@ -569,19 +639,19 @@ static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass)
/*
** Seen CL|CP, in the modified LB25 regexp.
*/
-static int next_lb25_seennuclcp(unicode_lb_info_t i, uint8_t uclass)
+static int next_lb25_seennuclcp(unicode_lb_info_t i, state_t uclass)
{
- if (uclass == UNICODE_LB_CM)
- return RESULT(UNICODE_LB_NONE); /* LB9 */
+ if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ)
+ return RESULT(UNICODE_LB_NONE, "LB9 (LB25)");
i->next_handler=next_def;
i->end_handler=end_def;
- if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO)
+ if (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO)
{
i->prevclass=i->prevclass_nsp=uclass;
- return RESULT(UNICODE_LB_NONE);
+ return RESULT(UNICODE_LB_NONE, "LB25");
}
return next_def(i, uclass);