diff options
Diffstat (limited to 'unicode/unicode_linebreak.c')
| -rw-r--r-- | unicode/unicode_linebreak.c | 45 | 
1 files changed, 35 insertions, 10 deletions
| diff --git a/unicode/unicode_linebreak.c b/unicode/unicode_linebreak.c index 1105dec..9b30ae4 100644 --- a/unicode/unicode_linebreak.c +++ b/unicode/unicode_linebreak.c @@ -1,5 +1,5 @@  /* -** Copyright 2011 Double Precision, Inc. +** Copyright 2011-2013 Double Precision, Inc.  ** See COPYING for distribution information.  **  */ @@ -28,6 +28,7 @@ struct unicode_lb_info {  	uint8_t savedclass;  	size_t savedcmcnt; +	uint8_t prevclass_min1;  	uint8_t prevclass;  	uint8_t prevclass_nsp; @@ -50,7 +51,7 @@ static int next_lb25_seennuclcp(unicode_lb_info_t, uint8_t);  static void unicode_lb_reset(unicode_lb_info_t i)  { -	i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT; +	i->prevclass_min1=i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT;  	i->next_handler=next_def;  	i->end_handler=end_def;  } @@ -147,10 +148,15 @@ static int next_def_nolb25(unicode_lb_info_t i,  	/* Retrieve the previous unicode character's linebreak class. */ +	uint8_t prevclass_min1=i->prevclass_min1;  	uint8_t prevclass=i->prevclass;  	uint8_t prevclass_nsp=i->prevclass_nsp; +#define RESTORE (i->prevclass_min1=prevclass_min1,			\ +		 i->prevclass=prevclass,				\ +		 i->prevclass_nsp=prevclass_nsp)			\  	/* Save this unicode char's linebreak class, for the next goaround */ +	i->prevclass_min1=i->prevclass;  	i->prevclass=uclass;  	if (uclass != UNICODE_LB_SP) @@ -216,8 +222,7 @@ static int next_def_nolb25(unicode_lb_info_t i,  	if (uclass == UNICODE_LB_CM)  	{ -		i->prevclass=prevclass; -		i->prevclass_nsp=prevclass_nsp; +		RESTORE;  		return RESULT(UNICODE_LB_NONE); /* LB9 */  	} @@ -295,9 +300,15 @@ static int next_def_nolb25(unicode_lb_info_t i,  	if (prevclass == UNICODE_LB_BB)  		return RESULT(UNICODE_LB_NONE); +	/* LB21a: */ +	if (prevclass_min1 == UNICODE_LB_HL && +	    (prevclass == UNICODE_LB_HY || prevclass == UNICODE_LB_BA)) +		return RESULT(UNICODE_LB_NONE); +  	if (uclass == UNICODE_LB_IN)  		switch (prevclass) {  		case UNICODE_LB_AL: +		case UNICODE_LB_HL:  		case UNICODE_LB_ID:  		case UNICODE_LB_IN:  		case UNICODE_LB_NU: @@ -311,17 +322,25 @@ static int next_def_nolb25(unicode_lb_info_t i,  		return RESULT(UNICODE_LB_NONE); /* LB23 */  	if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU)  		return RESULT(UNICODE_LB_NONE); /* LB23 */ +	if (prevclass == UNICODE_LB_HL && uclass == UNICODE_LB_NU) +		return RESULT(UNICODE_LB_NONE); /* LB23 */  	if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL)  		return RESULT(UNICODE_LB_NONE); /* LB23 */ +	if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_HL) +		return RESULT(UNICODE_LB_NONE); /* LB23 */  	if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID)  		return RESULT(UNICODE_LB_NONE); /* LB24 */  	if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL)  		return RESULT(UNICODE_LB_NONE); /* LB24 */ +	if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_HL) +		return RESULT(UNICODE_LB_NONE); /* LB24 */  	if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL)  		return RESULT(UNICODE_LB_NONE); /* LB24 */ +	if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_HL) +		return RESULT(UNICODE_LB_NONE); /* LB24 */  	if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR)  		switch (prevclass) { @@ -339,8 +358,7 @@ static int next_def_nolb25(unicode_lb_info_t i,  		if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY)  		{ -			i->prevclass=prevclass; -			i->prevclass_nsp=prevclass_nsp; +			RESTORE;  			i->savedclass=uclass;  			i->savedcmcnt=0; @@ -403,20 +421,27 @@ static int next_def_nolb25(unicode_lb_info_t i,  		break;  	} -	if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_AL) +	if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL) +	    && (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL))  		return RESULT(UNICODE_LB_NONE); /* LB28 */ -	if (prevclass == UNICODE_LB_IS && uclass == UNICODE_LB_AL) +	if (prevclass == UNICODE_LB_IS && +	    (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL))  		return RESULT(UNICODE_LB_NONE); /* LB29 */ -	if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_NU) && +	if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL +	     || prevclass == UNICODE_LB_NU) &&  	    uclass == UNICODE_LB_OP)  		return RESULT(UNICODE_LB_NONE); /* LB30 */ -	if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_NU) && +	if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL +	     || uclass == UNICODE_LB_NU) &&  	    prevclass == UNICODE_LB_CP)  		return RESULT(UNICODE_LB_NONE); /* LB30 */ +	if (uclass == UNICODE_LB_RI && prevclass == UNICODE_LB_RI) +		return RESULT(UNICODE_LB_NONE); /* LB30a */ +  	return RESULT(UNICODE_LB_ALLOWED); /* LB31 */  } | 
