summaryrefslogtreecommitdiffstats
path: root/unicode/unicode_linebreak.c
diff options
context:
space:
mode:
authorSam Varshavchik2013-12-29 09:31:59 -0500
committerSam Varshavchik2013-12-29 09:31:59 -0500
commit17317c25aecbf38f43bfcf8e7a63194cd345e696 (patch)
treed12e47126ea281cb42cce345e174a8be2d726b6c /unicode/unicode_linebreak.c
parent6c5a9de8c051f9c98fa76cc0318f54290edc7ad4 (diff)
downloadcourier-libs-17317c25aecbf38f43bfcf8e7a63194cd345e696.tar.bz2
2013-12-29 Sam Varshavchik <mrsam@courier-mta.com>
* libunicode: Updated unicode word, grapheme, and linebreaking rules * to the Unicode 6.3.0 standard.
Diffstat (limited to 'unicode/unicode_linebreak.c')
-rw-r--r--unicode/unicode_linebreak.c45
1 files changed, 35 insertions, 10 deletions
diff --git a/unicode/unicode_linebreak.c b/unicode/unicode_linebreak.c
index 1105dec..9b30ae4 100644
--- a/unicode/unicode_linebreak.c
+++ b/unicode/unicode_linebreak.c
@@ -1,5 +1,5 @@
/*
-** Copyright 2011 Double Precision, Inc.
+** Copyright 2011-2013 Double Precision, Inc.
** See COPYING for distribution information.
**
*/
@@ -28,6 +28,7 @@ struct unicode_lb_info {
uint8_t savedclass;
size_t savedcmcnt;
+ uint8_t prevclass_min1;
uint8_t prevclass;
uint8_t prevclass_nsp;
@@ -50,7 +51,7 @@ static int next_lb25_seennuclcp(unicode_lb_info_t, uint8_t);
static void unicode_lb_reset(unicode_lb_info_t i)
{
- i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT;
+ i->prevclass_min1=i->prevclass=i->prevclass_nsp=UNICODE_LB_SOT;
i->next_handler=next_def;
i->end_handler=end_def;
}
@@ -147,10 +148,15 @@ static int next_def_nolb25(unicode_lb_info_t i,
/* Retrieve the previous unicode character's linebreak class. */
+ uint8_t prevclass_min1=i->prevclass_min1;
uint8_t prevclass=i->prevclass;
uint8_t prevclass_nsp=i->prevclass_nsp;
+#define RESTORE (i->prevclass_min1=prevclass_min1, \
+ i->prevclass=prevclass, \
+ i->prevclass_nsp=prevclass_nsp) \
/* Save this unicode char's linebreak class, for the next goaround */
+ i->prevclass_min1=i->prevclass;
i->prevclass=uclass;
if (uclass != UNICODE_LB_SP)
@@ -216,8 +222,7 @@ static int next_def_nolb25(unicode_lb_info_t i,
if (uclass == UNICODE_LB_CM)
{
- i->prevclass=prevclass;
- i->prevclass_nsp=prevclass_nsp;
+ RESTORE;
return RESULT(UNICODE_LB_NONE); /* LB9 */
}
@@ -295,9 +300,15 @@ static int next_def_nolb25(unicode_lb_info_t i,
if (prevclass == UNICODE_LB_BB)
return RESULT(UNICODE_LB_NONE);
+ /* LB21a: */
+ if (prevclass_min1 == UNICODE_LB_HL &&
+ (prevclass == UNICODE_LB_HY || prevclass == UNICODE_LB_BA))
+ return RESULT(UNICODE_LB_NONE);
+
if (uclass == UNICODE_LB_IN)
switch (prevclass) {
case UNICODE_LB_AL:
+ case UNICODE_LB_HL:
case UNICODE_LB_ID:
case UNICODE_LB_IN:
case UNICODE_LB_NU:
@@ -311,17 +322,25 @@ static int next_def_nolb25(unicode_lb_info_t i,
return RESULT(UNICODE_LB_NONE); /* LB23 */
if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_NU)
return RESULT(UNICODE_LB_NONE); /* LB23 */
+ if (prevclass == UNICODE_LB_HL && uclass == UNICODE_LB_NU)
+ return RESULT(UNICODE_LB_NONE); /* LB23 */
if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_AL)
return RESULT(UNICODE_LB_NONE); /* LB23 */
+ if (prevclass == UNICODE_LB_NU && uclass == UNICODE_LB_HL)
+ return RESULT(UNICODE_LB_NONE); /* LB23 */
if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_ID)
return RESULT(UNICODE_LB_NONE); /* LB24 */
if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_AL)
return RESULT(UNICODE_LB_NONE); /* LB24 */
+ if (prevclass == UNICODE_LB_PR && uclass == UNICODE_LB_HL)
+ return RESULT(UNICODE_LB_NONE); /* LB24 */
if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_AL)
return RESULT(UNICODE_LB_NONE); /* LB24 */
+ if (prevclass == UNICODE_LB_PO && uclass == UNICODE_LB_HL)
+ return RESULT(UNICODE_LB_NONE); /* LB24 */
if ((i->opts & UNICODE_LB_OPT_PRBREAK) && uclass == UNICODE_LB_PR)
switch (prevclass) {
@@ -339,8 +358,7 @@ static int next_def_nolb25(unicode_lb_info_t i,
if (uclass == UNICODE_LB_OP || uclass == UNICODE_LB_HY)
{
- i->prevclass=prevclass;
- i->prevclass_nsp=prevclass_nsp;
+ RESTORE;
i->savedclass=uclass;
i->savedcmcnt=0;
@@ -403,20 +421,27 @@ static int next_def_nolb25(unicode_lb_info_t i,
break;
}
- if (prevclass == UNICODE_LB_AL && uclass == UNICODE_LB_AL)
+ if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL)
+ && (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL))
return RESULT(UNICODE_LB_NONE); /* LB28 */
- if (prevclass == UNICODE_LB_IS && uclass == UNICODE_LB_AL)
+ if (prevclass == UNICODE_LB_IS &&
+ (uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL))
return RESULT(UNICODE_LB_NONE); /* LB29 */
- if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_NU) &&
+ if ((prevclass == UNICODE_LB_AL || prevclass == UNICODE_LB_HL
+ || prevclass == UNICODE_LB_NU) &&
uclass == UNICODE_LB_OP)
return RESULT(UNICODE_LB_NONE); /* LB30 */
- if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_NU) &&
+ if ((uclass == UNICODE_LB_AL || uclass == UNICODE_LB_HL
+ || uclass == UNICODE_LB_NU) &&
prevclass == UNICODE_LB_CP)
return RESULT(UNICODE_LB_NONE); /* LB30 */
+ if (uclass == UNICODE_LB_RI && prevclass == UNICODE_LB_RI)
+ return RESULT(UNICODE_LB_NONE); /* LB30a */
+
return RESULT(UNICODE_LB_ALLOWED); /* LB31 */
}