diff options
Diffstat (limited to 'unicode/unicode_linebreak.c')
| -rw-r--r-- | unicode/unicode_linebreak.c | 516 | 
1 files changed, 293 insertions, 223 deletions
| diff --git a/unicode/unicode_linebreak.c b/unicode/unicode_linebreak.c index a843c6c..9d1e73c 100644 --- a/unicode/unicode_linebreak.c +++ b/unicode/unicode_linebreak.c @@ -19,39 +19,55 @@  #define UNICODE_LB_SOT	0xFF +struct state_t { +	uint8_t lb; +	uint8_t ew; +}; + +typedef struct state_t state_t; +  struct unicode_lb_info {  	int (*cb_func)(int, void *);  	void *cb_arg;  	int opts; -	uint8_t savedclass; +	state_t savedclass;  	size_t savedcmcnt; -	uint8_t prevclass_min1; -	uint8_t prevclass; -	uint8_t prevclass_nsp; +	state_t prevclass_min1; +	state_t prevclass; +	state_t prevclass_nsp; -	int (*next_handler)(struct unicode_lb_info *, uint8_t); +	/* Flag -- recursively invoked after discarding LB25 */ +	char nolb25; + +	/* Flag -- seen a pair of RIs */ +	char nolb30a; + +	int (*next_handler)(struct unicode_lb_info *, state_t);  	int (*end_handler)(struct unicode_lb_info *);  };  /* http://www.unicode.org/reports/tr14/#Algorithm */ -static int next_def(unicode_lb_info_t, uint8_t); +static int next_def(unicode_lb_info_t, state_t);  static int end_def(unicode_lb_info_t); -static int next_lb25_seenophy(unicode_lb_info_t, uint8_t); +static int next_lb25_seenophy(unicode_lb_info_t, state_t);  static int end_lb25_seenophy(unicode_lb_info_t); -static int next_lb25_seennu(unicode_lb_info_t, uint8_t); +static int next_lb25_seennu(unicode_lb_info_t, state_t); -static int next_lb25_seennuclcp(unicode_lb_info_t, uint8_t); +static int next_lb25_seennuclcp(unicode_lb_info_t, state_t);  static void unicode_lb_reset(unicode_lb_info_t i)  { -	i->prevclass_min1=i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT; +	i->prevclass.lb=UNICODE_LB_SOT; +	i->prevclass.ew=UNICODE_EASTASIA_N; + +	i->prevclass_min1=i->prevclass_nsp=i->prevclass;  	i->next_handler=next_def;  	i->end_handler=end_def;  } @@ -88,7 +104,16 @@ static int end_def(unicode_lb_info_t i)  	/* LB3 N/A */  	return 0;  } -#define RESULT(x) (*i->cb_func)((x), i->cb_arg) + +/* #define DEBUG_LB */ + +#ifdef DEBUG_LB +#define RULE(x) ( (void)printf("%s\n", x)) +#else +#define RULE(x) ( (void)0 ) +#endif + +#define RESULT(x, msg) (RULE(msg),*i->cb_func)((x), i->cb_arg)  int unicode_lb_next_cnt(unicode_lb_info_t i,  			const char32_t *chars, @@ -121,36 +146,62 @@ int unicode_lb_lookup(char32_t ch)  int unicode_lb_next(unicode_lb_info_t i,  		    char32_t ch)  { -	return (*i->next_handler)(i, (i->opts & UNICODE_LB_OPT_DASHWJ) && -				  (ch == 0x2012 || ch == 0x2013) -				  ? UNICODE_LB_WJ:unicode_lb_lookup(ch)); +	state_t c; + +	c.lb=unicode_lb_lookup(ch); +	c.ew=unicode_eastasia(ch); + +	if ((i->opts & UNICODE_LB_OPT_DASHWJ) && +	    (ch == 0x2012 || ch == 0x2013)) +	{ +		c.lb=UNICODE_LB_WJ; +	} + +	return (*i->next_handler)(i, c);  } -static int next_def_nolb25(unicode_lb_info_t i, -			   uint8_t uclass, -			   int nolb25); +static int next_def_common(unicode_lb_info_t i, +			   state_t uclass); + +/* +** Reset state for next_def_common. +*/ + +static void next_def_reset_common(unicode_lb_info_t i) +{ +	i->nolb25=0; +	i->nolb30a=0; +}  /*  ** Default logic for next unicode char.  */  static int next_def(unicode_lb_info_t i, -		    uint8_t uclass) +		    state_t uclass) +{ +	next_def_reset_common(i); +	return next_def_common(i, uclass); +} + +static int next_def_seen_lb30a(unicode_lb_info_t i, +			       state_t uclass)  { -	return next_def_nolb25(i, uclass, 0); +	i->next_handler=next_def; +	next_def_reset_common(i); +	i->nolb30a=1; +	return next_def_common(i, uclass);  } -static int next_def_nolb25(unicode_lb_info_t i, -			   uint8_t uclass, -			   /* Flag -- recursively invoked after discarding LB25 */ -			   int nolb25) +static int next_def_common(unicode_lb_info_t i, +			   state_t uclass)  {  	/* Retrieve the previous unicode character's linebreak class. */ -	uint8_t prevclass_min1=i->prevclass_min1; -	uint8_t prevclass=i->prevclass; -	uint8_t prevclass_nsp=i->prevclass_nsp; +	state_t prevclass_min1=i->prevclass_min1; +	state_t prevclass=i->prevclass; +	state_t prevclass_nsp=i->prevclass_nsp;  #define RESTORE (i->prevclass_min1=prevclass_min1,			\  		 i->prevclass=prevclass,				\ @@ -159,212 +210,217 @@ static int next_def_nolb25(unicode_lb_info_t i,  	i->prevclass_min1=i->prevclass;  	i->prevclass=uclass; -	if (uclass != UNICODE_LB_SP) +	if (uclass.lb != UNICODE_LB_SP)  		i->prevclass_nsp=uclass; -	if (uclass == UNICODE_LB_NU) +	if (uclass.lb == UNICODE_LB_NU)  		i->next_handler=next_lb25_seennu; /* LB25 */ -	if (prevclass == UNICODE_LB_SOT) +	if (prevclass.lb == UNICODE_LB_SOT)  	{ -		if (uclass == UNICODE_LB_CM) /* LB9 */ -			i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; - -		return RESULT(UNICODE_LB_NONE); /* LB2 */ +		return RESULT(UNICODE_LB_NONE, "LB2");  	} -	if (prevclass == UNICODE_LB_CR && uclass == UNICODE_LB_LF) -		return RESULT(UNICODE_LB_NONE); /* LB5 */ +	if (prevclass.lb == UNICODE_LB_BK) +		return RESULT(UNICODE_LB_MANDATORY, "LB4"); -	switch (prevclass) { -	case UNICODE_LB_BK: +	if (prevclass.lb == UNICODE_LB_CR && uclass.lb == UNICODE_LB_LF) +		return RESULT(UNICODE_LB_NONE, "LB5"); + + +	switch (prevclass.lb) {  	case UNICODE_LB_CR:  	case UNICODE_LB_LF:  	case UNICODE_LB_NL: +		return RESULT(UNICODE_LB_MANDATORY, "LB5"); +	} -		if (uclass == UNICODE_LB_CM) -		{ -			i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; -			/* LB9 */ -		} -		return RESULT(UNICODE_LB_MANDATORY); /* LB4, LB5 */ +	switch (uclass.lb) { +		/* LB6: */ +	case UNICODE_LB_BK: +	case UNICODE_LB_CR: +	case UNICODE_LB_LF: +	case UNICODE_LB_NL: +		/* LB7: */  	case UNICODE_LB_SP:  	case UNICODE_LB_ZW: -		if (uclass == UNICODE_LB_CM) -			i->prevclass=i->prevclass_nsp=uclass=UNICODE_LB_AL; -		/* LB10 */ -		break; + +		return RESULT(UNICODE_LB_NONE, "LB6, LB7");  	default:  		break;  	} -	switch (uclass) { +	if (prevclass_nsp.lb == UNICODE_LB_ZW) +		return RESULT(UNICODE_LB_ALLOWED, "LB8"); -		/* LB6: */ + +	if (prevclass.lb == UNICODE_LB_ZWJ) +		return RESULT(UNICODE_LB_NONE, "LB8a"); + +	switch (prevclass.lb) {  	case UNICODE_LB_BK:  	case UNICODE_LB_CR:  	case UNICODE_LB_LF:  	case UNICODE_LB_NL: - -		/* LB7: */  	case UNICODE_LB_SP:  	case UNICODE_LB_ZW: - -		return RESULT(UNICODE_LB_NONE); -	default:  		break; -	} +	default: -	if (prevclass_nsp == UNICODE_LB_ZW) -		return RESULT(UNICODE_LB_ALLOWED); /* LB8 */ +		if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ) +		{ +			RESTORE; +			return RESULT(UNICODE_LB_NONE, "LB9"); +		} +	} -	if (uclass == UNICODE_LB_CM) +	if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ)  	{ -		RESTORE; -		return RESULT(UNICODE_LB_NONE); /* LB9 */ +		uclass.lb=UNICODE_LB_AL; +		RULE("LB10"); +	} +	if (prevclass.lb == UNICODE_LB_CM || prevclass.lb == UNICODE_LB_ZWJ) +	{ +		prevclass.lb=UNICODE_LB_AL; +		RULE("LB10");  	} -	if (prevclass == UNICODE_LB_WJ || uclass == UNICODE_LB_WJ) -		return RESULT(UNICODE_LB_NONE); /* LB11 */ +	if (prevclass.lb == UNICODE_LB_WJ || uclass.lb == UNICODE_LB_WJ) +		return RESULT(UNICODE_LB_NONE, "LB11"); -	if (prevclass == UNICODE_LB_GL) -		return RESULT(UNICODE_LB_NONE); /* LB12 */ +	if (prevclass.lb == UNICODE_LB_GL) +		return RESULT(UNICODE_LB_NONE, "LB12"); -	if (uclass == UNICODE_LB_GL && -	    prevclass != UNICODE_LB_SP && -	    prevclass != UNICODE_LB_BA && -	    prevclass != UNICODE_LB_HY) -		return RESULT(UNICODE_LB_NONE); /* LB12a */ +	if (uclass.lb == UNICODE_LB_GL && +	    prevclass.lb != UNICODE_LB_SP && +	    prevclass.lb != UNICODE_LB_BA && +	    prevclass.lb != UNICODE_LB_HY) +		return RESULT(UNICODE_LB_NONE, "LB12a"); -	switch (uclass) { -	case UNICODE_LB_SY: -		if (i->opts & UNICODE_LB_OPT_SYBREAK) -		{ -			if (prevclass == UNICODE_LB_SP) -				return RESULT(UNICODE_LB_ALLOWED); -		} +	if (uclass.lb == UNICODE_LB_SY && +	    i->opts & UNICODE_LB_OPT_SYBREAK) +	{ +		if (prevclass.lb == UNICODE_LB_SP) +			return RESULT(UNICODE_LB_ALLOWED, "LB13 (tailored)"); +	} -	case UNICODE_LB_CL: -	case UNICODE_LB_CP: -	case UNICODE_LB_EX: -	case UNICODE_LB_IS: -		return RESULT(UNICODE_LB_NONE); /* LB13 */ -	default: -		break; +	if (prevclass.lb != UNICODE_LB_NU) { +		switch (uclass.lb) { +		case UNICODE_LB_CL: +		case UNICODE_LB_CP: +		case UNICODE_LB_IS: +		case UNICODE_LB_SY: +			return RESULT(UNICODE_LB_NONE, "LB13"); +		default: +			break; +		}  	} -	if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass == UNICODE_LB_SY) -		switch (uclass) { +	if (uclass.lb == UNICODE_LB_EX) +		return RESULT(UNICODE_LB_NONE, "LB13"); + +	if ((i->opts & UNICODE_LB_OPT_SYBREAK) && prevclass.lb == UNICODE_LB_SY) +		switch (uclass.lb) {  		case UNICODE_LB_EX:  		case UNICODE_LB_AL:  		case UNICODE_LB_ID: -			return RESULT(UNICODE_LB_NONE); +			return RESULT(UNICODE_LB_NONE, "LB13");  		} -	if (prevclass_nsp == UNICODE_LB_OP) -		return RESULT(UNICODE_LB_NONE); /* LB14 */ - -	if (prevclass_nsp == UNICODE_LB_QU && uclass == UNICODE_LB_OP) -		return RESULT(UNICODE_LB_NONE); /* LB15 */ +	if (prevclass_nsp.lb == UNICODE_LB_OP) +		return RESULT(UNICODE_LB_NONE, "LB14"); -	if ((prevclass_nsp == UNICODE_LB_CL || prevclass_nsp == UNICODE_LB_CP) -	    && uclass == UNICODE_LB_NS) -		return RESULT(UNICODE_LB_NONE); /* LB16 */ +	if (prevclass_nsp.lb == UNICODE_LB_QU && uclass.lb == UNICODE_LB_OP) +		return RESULT(UNICODE_LB_NONE, "LB15"); -	if (prevclass_nsp == UNICODE_LB_B2 && uclass == UNICODE_LB_B2) -		return RESULT(UNICODE_LB_NONE); /* LB17 */ +	if ((prevclass_nsp.lb == UNICODE_LB_CL || prevclass_nsp.lb == UNICODE_LB_CP) +	    && uclass.lb == UNICODE_LB_NS) +		return RESULT(UNICODE_LB_NONE, "LB16"); -	if (prevclass == UNICODE_LB_SP) -		return RESULT(UNICODE_LB_ALLOWED); /* LB18 */ +	if (prevclass_nsp.lb == UNICODE_LB_B2 && uclass.lb == UNICODE_LB_B2) +		return RESULT(UNICODE_LB_NONE, "LB17"); -	if (uclass == UNICODE_LB_QU || prevclass == UNICODE_LB_QU) -		return RESULT(UNICODE_LB_NONE); /* LB19 */ +	if (prevclass.lb == UNICODE_LB_SP) +		return RESULT(UNICODE_LB_ALLOWED, "LB18"); -	if (uclass == UNICODE_LB_CB || prevclass == UNICODE_LB_CB) -		return RESULT(UNICODE_LB_ALLOWED); /* LB20 */ +	if (uclass.lb == UNICODE_LB_QU || prevclass.lb == UNICODE_LB_QU) +		return RESULT(UNICODE_LB_NONE, "LB19"); -	/* LB21: */ +	if (uclass.lb == UNICODE_LB_CB || prevclass.lb == UNICODE_LB_CB) +		return RESULT(UNICODE_LB_ALLOWED, "LB20"); -	switch (uclass) { +	switch (uclass.lb) {  	case UNICODE_LB_BA:  	case UNICODE_LB_HY:  	case UNICODE_LB_NS: -		return RESULT(UNICODE_LB_NONE); +		return RESULT(UNICODE_LB_NONE, "LB21");  	default:  		break;  	} -	if (prevclass == UNICODE_LB_BB) -		return RESULT(UNICODE_LB_NONE); +	if (prevclass.lb == UNICODE_LB_BB) +		return RESULT(UNICODE_LB_NONE, "LB21"); -	/* LB21a: */ -	if (prevclass_min1 == UNICODE_LB_HL && -	    (prevclass == UNICODE_LB_HY || prevclass == UNICODE_LB_BA)) -		return RESULT(UNICODE_LB_NONE); +	if (prevclass_min1.lb == UNICODE_LB_HL && +	    (prevclass.lb == UNICODE_LB_HY || prevclass.lb == UNICODE_LB_BA)) +		return RESULT(UNICODE_LB_NONE, "LB21a"); -	/* LB21b: */ -	if (prevclass == UNICODE_LB_SY && uclass == UNICODE_LB_HL) -		return RESULT(UNICODE_LB_NONE); +	if (prevclass.lb == UNICODE_LB_SY && uclass.lb == UNICODE_LB_HL) +		return RESULT(UNICODE_LB_NONE, "LB21b"); -	if (uclass == UNICODE_LB_IN) -		switch (prevclass) { -		case UNICODE_LB_AL: -		case UNICODE_LB_EX: -		case UNICODE_LB_HL: -		case UNICODE_LB_ID: -		case UNICODE_LB_IN: -		case UNICODE_LB_NU: -			return RESULT(UNICODE_LB_NONE); /* LB22 */ -		default: -			break; -		} +	if (uclass.lb == UNICODE_LB_IN) +		return RESULT(UNICODE_LB_NONE, "LB22"); + +	if (prevclass.lb == UNICODE_LB_AL && uclass.lb == UNICODE_LB_NU) +		return RESULT(UNICODE_LB_NONE, "LB23"); +	if (prevclass.lb == UNICODE_LB_HL && uclass.lb == UNICODE_LB_NU) +		return RESULT(UNICODE_LB_NONE, "LB23"); + +	if (prevclass.lb == UNICODE_LB_NU && uclass.lb == UNICODE_LB_AL) +		return RESULT(UNICODE_LB_NONE, "LB23"); +	if (prevclass.lb == UNICODE_LB_NU && uclass.lb == UNICODE_LB_HL) +		return RESULT(UNICODE_LB_NONE, "LB23"); -	if (prevclass == UNICODE_LB_ID && uclass == UNICODE_LB_PO) -		return RESULT(UNICODE_LB_NONE); /* LB23 */ -	if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU) -		return RESULT(UNICODE_LB_NONE); /* LB23 */ -	if (prevclass == UNICODE_LB_HL && uclass == UNICODE_LB_NU) -		return RESULT(UNICODE_LB_NONE); /* LB23 */ - -	if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL) -		return RESULT(UNICODE_LB_NONE); /* LB23 */ -	if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_HL) -		return RESULT(UNICODE_LB_NONE); /* LB23 */ - - -	if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID) -		return RESULT(UNICODE_LB_NONE); /* LB24 */ -	if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL) -		return RESULT(UNICODE_LB_NONE); /* LB24 */ -	if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_HL) -		return RESULT(UNICODE_LB_NONE); /* LB24 */ -	if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL) -		return RESULT(UNICODE_LB_NONE); /* LB24 */ -	if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_HL) -		return RESULT(UNICODE_LB_NONE); /* LB24 */ - -	if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR) -		switch (prevclass) { +	if (prevclass.lb == UNICODE_LB_PR && +	    (uclass.lb == UNICODE_LB_ID || uclass.lb == UNICODE_LB_EB || +	     uclass.lb == UNICODE_LB_EM)) +		return RESULT(UNICODE_LB_NONE, "LB23a"); + +	if ((prevclass.lb == UNICODE_LB_ID || prevclass.lb == UNICODE_LB_EB || +	     prevclass.lb == UNICODE_LB_EM) && +	    uclass.lb == UNICODE_LB_PO) +		return RESULT(UNICODE_LB_NONE, "LB23a"); + +	if ((prevclass.lb == UNICODE_LB_PR || prevclass.lb == UNICODE_LB_PO) && +	    (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL)) +		return RESULT(UNICODE_LB_NONE, "LB24"); + +	if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL) && +	    (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO)) +		return RESULT(UNICODE_LB_NONE, "LB24"); + +	if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass.lb == UNICODE_LB_PR) +		switch (prevclass.lb) {  		case UNICODE_LB_PR:  		case UNICODE_LB_AL:  		case UNICODE_LB_ID: -			return RESULT(UNICODE_LB_NONE); +			return RESULT(UNICODE_LB_NONE, "LB24 (tailored)");  		} -	if (!nolb25 && -	    (prevclass == UNICODE_LB_PR || prevclass == UNICODE_LB_PO)) +	if (!i->nolb25 && +	    (prevclass.lb == UNICODE_LB_PR || prevclass.lb == UNICODE_LB_PO))  	{ -		if (uclass == UNICODE_LB_NU) -			return RESULT(UNICODE_LB_NONE); /* LB25 */ +		if (uclass.lb == UNICODE_LB_NU) +			return RESULT(UNICODE_LB_NONE, "LB25"); -		if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY) +		if (uclass.lb == UNICODE_LB_OP || uclass.lb == UNICODE_LB_HY)  		{  			RESTORE; - +			RULE("LB25 (start)");  			i->savedclass=uclass;  			i->savedcmcnt=0;  			i->next_handler=next_lb25_seenophy; @@ -373,81 +429,93 @@ static int next_def_nolb25(unicode_lb_info_t i,  		}  	} -	if ((prevclass == UNICODE_LB_OP || prevclass == UNICODE_LB_HY) && -	    uclass == UNICODE_LB_NU) -		return RESULT(UNICODE_LB_NONE); /* LB25 */ +	if ((prevclass.lb == UNICODE_LB_OP || prevclass.lb == UNICODE_LB_HY) && +	    uclass.lb == UNICODE_LB_NU) +		return RESULT(UNICODE_LB_NONE, "LB25");  	/*****/ -	if (prevclass == UNICODE_LB_JL) -		switch (uclass) { +	if (prevclass.lb == UNICODE_LB_JL) +		switch (uclass.lb) {  		case UNICODE_LB_JL:  		case UNICODE_LB_JV:  		case UNICODE_LB_H2:  		case UNICODE_LB_H3: -			return RESULT(UNICODE_LB_NONE); /* LB26 */ +			return RESULT(UNICODE_LB_NONE, "LB26");  		default:  			break;  		} -	if ((prevclass == UNICODE_LB_JV || -	     prevclass == UNICODE_LB_H2) && -	    (uclass == UNICODE_LB_JV || -	     uclass == UNICODE_LB_JT)) -		return RESULT(UNICODE_LB_NONE); /* LB26 */ +	if ((prevclass.lb == UNICODE_LB_JV || +	     prevclass.lb == UNICODE_LB_H2) && +	    (uclass.lb == UNICODE_LB_JV || +	     uclass.lb == UNICODE_LB_JT)) +		return RESULT(UNICODE_LB_NONE, "LB26"); -	if ((prevclass == UNICODE_LB_JT || -	     prevclass == UNICODE_LB_H3) && -	    uclass == UNICODE_LB_JT) -		return RESULT(UNICODE_LB_NONE); /* LB26 */ +	if ((prevclass.lb == UNICODE_LB_JT || +	     prevclass.lb == UNICODE_LB_H3) && +	    uclass.lb == UNICODE_LB_JT) +		return RESULT(UNICODE_LB_NONE, "LB26"); -	switch (prevclass) { +	switch (prevclass.lb) {  	case UNICODE_LB_JL:  	case UNICODE_LB_JV:  	case UNICODE_LB_JT:  	case UNICODE_LB_H2:  	case UNICODE_LB_H3: -		if (uclass == UNICODE_LB_IN || uclass == UNICODE_LB_PO) -			return RESULT(UNICODE_LB_NONE); /* LB27 */ +		if (uclass.lb == UNICODE_LB_IN || uclass.lb == UNICODE_LB_PO) +			return RESULT(UNICODE_LB_NONE, "LB27");  	default:  		break;  	} -	switch (uclass) { +	switch (uclass.lb) {  	case UNICODE_LB_JL:  	case UNICODE_LB_JV:  	case UNICODE_LB_JT:  	case UNICODE_LB_H2:  	case UNICODE_LB_H3: -		if (prevclass == UNICODE_LB_PR) -			return RESULT(UNICODE_LB_NONE); /* LB27 */ +		if (prevclass.lb == UNICODE_LB_PR) +			return RESULT(UNICODE_LB_NONE, "LB27");  	default:  		break;  	} -	if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL) -	    && (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL)) -		return RESULT(UNICODE_LB_NONE); /* LB28 */ - -	if (prevclass == UNICODE_LB_IS && -	    (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL)) -		return RESULT(UNICODE_LB_NONE); /* LB29 */ - -	if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL -	     || prevclass == UNICODE_LB_NU) && -	    uclass == UNICODE_LB_OP) -		return RESULT(UNICODE_LB_NONE); /* LB30 */ - -	if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL -	     || uclass == UNICODE_LB_NU) && -	    prevclass == UNICODE_LB_CP) -		return RESULT(UNICODE_LB_NONE); /* LB30 */ +	if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL) +	    && (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL)) +		return RESULT(UNICODE_LB_NONE, "LB28"); + +	if (prevclass.lb == UNICODE_LB_IS && +	    (uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL)) +		return RESULT(UNICODE_LB_NONE, "LB29"); + +	if ((prevclass.lb == UNICODE_LB_AL || prevclass.lb == UNICODE_LB_HL +	     || prevclass.lb == UNICODE_LB_NU) && +	    (uclass.lb == UNICODE_LB_OP && uclass.ew != UNICODE_EASTASIA_F +	     && uclass.ew != UNICODE_EASTASIA_W +	     && uclass.ew != UNICODE_EASTASIA_H)) +		return RESULT(UNICODE_LB_NONE, "LB30"); + +	if ((uclass.lb == UNICODE_LB_AL || uclass.lb == UNICODE_LB_HL +	     || uclass.lb == UNICODE_LB_NU) && +	    (prevclass.lb == UNICODE_LB_CP +	     && prevclass.ew != UNICODE_EASTASIA_F +	     && prevclass.ew != UNICODE_EASTASIA_W +	     && prevclass.ew != UNICODE_EASTASIA_H)) +		return RESULT(UNICODE_LB_NONE, "LB30"); + +	if (uclass.lb == UNICODE_LB_RI && prevclass.lb == UNICODE_LB_RI && +	    !i->nolb30a) +	{ +		i->next_handler=next_def_seen_lb30a; +		return RESULT(UNICODE_LB_NONE, "LB30a"); +	} -	if (uclass == UNICODE_LB_RI && prevclass == UNICODE_LB_RI) -		return RESULT(UNICODE_LB_NONE); /* LB30a */ +	if (prevclass.lb == UNICODE_LB_EB && uclass.lb == UNICODE_LB_EM) +		return RESULT(UNICODE_LB_NONE, "LB30b"); -	return RESULT(UNICODE_LB_ALLOWED); /* LB31 */ +	return RESULT(UNICODE_LB_ALLOWED, "LB31");  }  /* @@ -459,7 +527,7 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i)  {  	int rc; -	/*uint8_t class=i->savedclass;*/ +	/*state_t class=i->savedclass;*/  	int nolb25_flag=1;  	i->next_handler=next_def; @@ -467,7 +535,9 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i)  	do  	{ -		rc=next_def_nolb25(i, i->savedclass, nolb25_flag); +		next_def_reset_common(i); +		i->nolb25=nolb25_flag; +		rc=next_def_common(i, i->savedclass);  		if (rc)  			return rc; @@ -484,29 +554,29 @@ static int unwind_lb25_seenophy(unicode_lb_info_t i)  */  static int next_lb25_seenophy(unicode_lb_info_t i, -			      uint8_t uclass) +			      state_t uclass)  {  	int rc; -	if (uclass == UNICODE_LB_CM) +	if (uclass.lb == UNICODE_LB_CM)  	{  		++i->savedcmcnt; /* Keep track of CMs, and try again */  		return 0;  	} -	if (uclass != UNICODE_LB_NU) +	if (uclass.lb != UNICODE_LB_NU)  	{  		rc=unwind_lb25_seenophy(i);  		if (rc)  			return rc; -		return next_def_nolb25(i, uclass, 0); +		return next_def(i, uclass);  	}  	do  	{ -		rc=RESULT(UNICODE_LB_NONE); /* (OP|HY) feedback */ +		rc=RESULT(UNICODE_LB_NONE, "LB25 (OP|HY)"); /* (OP|HY) feedback */  		if (rc)  			return rc; @@ -515,7 +585,7 @@ static int next_lb25_seenophy(unicode_lb_info_t i,  	i->next_handler=next_lb25_seennu;  	i->end_handler=end_def;  	i->prevclass=i->prevclass_nsp=uclass; -	return RESULT(UNICODE_LB_NONE); +	return RESULT(UNICODE_LB_NONE, "LB25");  }  /* @@ -534,33 +604,33 @@ static int end_lb25_seenophy(unicode_lb_info_t i)  /*  ** Seen an NU, modified LB25 regexp.  */ -static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass) +static int next_lb25_seennu(unicode_lb_info_t i, state_t uclass)  { -	if (uclass == UNICODE_LB_NU || uclass == UNICODE_LB_SY || -	    uclass == UNICODE_LB_IS) +	if (uclass.lb == UNICODE_LB_NU || uclass.lb == UNICODE_LB_SY || +	    uclass.lb == UNICODE_LB_IS)  	{  		i->prevclass=i->prevclass_nsp=uclass; -		return RESULT(UNICODE_LB_NONE); +		return RESULT(UNICODE_LB_NONE, "LB25");  	} -	if (uclass == UNICODE_LB_CM) -		return RESULT(UNICODE_LB_NONE); /* LB9 */ +	if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ) +		return RESULT(UNICODE_LB_NONE, "LB9 (LB25)"); -	if (uclass == UNICODE_LB_CL || uclass == UNICODE_LB_CP) +	if (uclass.lb == UNICODE_LB_CL || uclass.lb == UNICODE_LB_CP)  	{  		i->prevclass=i->prevclass_nsp=uclass;  		i->next_handler=next_lb25_seennuclcp;  		i->end_handler=end_def; -		return RESULT(UNICODE_LB_NONE); +		return RESULT(UNICODE_LB_NONE, "LB25");  	}  	i->next_handler=next_def;  	i->end_handler=end_def; -	if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO) +	if (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO)  	{  		i->prevclass=i->prevclass_nsp=uclass; -		return RESULT(UNICODE_LB_NONE); +		return RESULT(UNICODE_LB_NONE, "LB25");  	}  	return next_def(i, uclass); /* Not a prefix, process normally */ @@ -569,19 +639,19 @@ static int next_lb25_seennu(unicode_lb_info_t i, uint8_t uclass)  /*  ** Seen CL|CP, in the modified LB25 regexp.  */ -static int next_lb25_seennuclcp(unicode_lb_info_t i, uint8_t uclass) +static int next_lb25_seennuclcp(unicode_lb_info_t i, state_t uclass)  { -	if (uclass == UNICODE_LB_CM) -		return RESULT(UNICODE_LB_NONE); /* LB9 */ +	if (uclass.lb == UNICODE_LB_CM || uclass.lb == UNICODE_LB_ZWJ) +		return RESULT(UNICODE_LB_NONE, "LB9 (LB25)");  	i->next_handler=next_def;  	i->end_handler=end_def; -	if (uclass == UNICODE_LB_PR || uclass == UNICODE_LB_PO) +	if (uclass.lb == UNICODE_LB_PR || uclass.lb == UNICODE_LB_PO)  	{  		i->prevclass=i->prevclass_nsp=uclass; -		return RESULT(UNICODE_LB_NONE); +		return RESULT(UNICODE_LB_NONE, "LB25");  	}  	return next_def(i, uclass); | 
