/* ** Copyright 2011 Double Precision, Inc. ** See COPYING for distribution information. ** */ #include "unicode_config.h" #include "unicode.h" #include #include #include #define UNICODE_GRAPHEMEBREAK_ANY 0x00 #define UNICODE_GRAPHEMEBREAK_CR 0x01 #define UNICODE_GRAPHEMEBREAK_LF 0x02 #define UNICODE_GRAPHEMEBREAK_Control 0x03 #define UNICODE_GRAPHEMEBREAK_Extend 0x04 #define UNICODE_GRAPHEMEBREAK_Prepend 0x05 #define UNICODE_GRAPHEMEBREAK_SpacingMark 0x06 #define UNICODE_GRAPHEMEBREAK_L 0x07 #define UNICODE_GRAPHEMEBREAK_V 0x08 #define UNICODE_GRAPHEMEBREAK_T 0x09 #define UNICODE_GRAPHEMEBREAK_LV 0x0A #define UNICODE_GRAPHEMEBREAK_LVT 0x0B #include "graphemebreaktab.h" int unicode_grapheme_break(unicode_char a, unicode_char b) { uint8_t ac=unicode_tab_lookup(a, unicode_indextab, sizeof(unicode_indextab)/sizeof(unicode_indextab[0]), unicode_rangetab, unicode_classtab, UNICODE_GRAPHEMEBREAK_ANY), bc=unicode_tab_lookup(b, unicode_indextab, sizeof(unicode_indextab)/sizeof(unicode_indextab[0]), unicode_rangetab, unicode_classtab, UNICODE_GRAPHEMEBREAK_ANY); /* GB1 and GB2 are implied */ if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF) return 0; /* GB3 */ switch (ac) { case UNICODE_GRAPHEMEBREAK_CR: case UNICODE_GRAPHEMEBREAK_LF: case UNICODE_GRAPHEMEBREAK_Control: return 1; /* GB4 */ default: break; } switch (bc) { case UNICODE_GRAPHEMEBREAK_CR: case UNICODE_GRAPHEMEBREAK_LF: case UNICODE_GRAPHEMEBREAK_Control: return 1; /* GB5 */ default: break; } if (ac == UNICODE_GRAPHEMEBREAK_L) switch (bc) { case UNICODE_GRAPHEMEBREAK_L: case UNICODE_GRAPHEMEBREAK_V: case UNICODE_GRAPHEMEBREAK_LV: case UNICODE_GRAPHEMEBREAK_LVT: return 0; /* GB6 */ } if ((ac == UNICODE_GRAPHEMEBREAK_LV || ac == UNICODE_GRAPHEMEBREAK_V) && (bc == UNICODE_GRAPHEMEBREAK_V || bc == UNICODE_GRAPHEMEBREAK_T)) return 0; /* GB7 */ if ((ac == UNICODE_GRAPHEMEBREAK_LVT || ac == UNICODE_GRAPHEMEBREAK_T) && bc == UNICODE_GRAPHEMEBREAK_T) return 0; /* GB8 */ if (bc == UNICODE_GRAPHEMEBREAK_Extend) return 0; /* GB9 */ if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark) return 0; /* GB9a */ if (ac == UNICODE_GRAPHEMEBREAK_Prepend) return 0; /* GB9b */ return 1; /* GB10 */ }