1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
/*
** Copyright 2011 Double Precision, Inc.
** See COPYING for distribution information.
**
*/
#include "unicode_config.h"
#include "unicode.h"
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#define UNICODE_GRAPHEMEBREAK_ANY 0x00
#define UNICODE_GRAPHEMEBREAK_CR 0x01
#define UNICODE_GRAPHEMEBREAK_LF 0x02
#define UNICODE_GRAPHEMEBREAK_Control 0x03
#define UNICODE_GRAPHEMEBREAK_Extend 0x04
#define UNICODE_GRAPHEMEBREAK_Prepend 0x05
#define UNICODE_GRAPHEMEBREAK_SpacingMark 0x06
#define UNICODE_GRAPHEMEBREAK_L 0x07
#define UNICODE_GRAPHEMEBREAK_V 0x08
#define UNICODE_GRAPHEMEBREAK_T 0x09
#define UNICODE_GRAPHEMEBREAK_LV 0x0A
#define UNICODE_GRAPHEMEBREAK_LVT 0x0B
#include "graphemebreaktab.h"
int unicode_grapheme_break(unicode_char a, unicode_char b)
{
uint8_t ac=unicode_tab_lookup(a, unicode_indextab,
sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
unicode_rangetab,
unicode_classtab,
UNICODE_GRAPHEMEBREAK_ANY),
bc=unicode_tab_lookup(b, unicode_indextab,
sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
unicode_rangetab,
unicode_classtab,
UNICODE_GRAPHEMEBREAK_ANY);
/* GB1 and GB2 are implied */
if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF)
return 0; /* GB3 */
switch (ac) {
case UNICODE_GRAPHEMEBREAK_CR:
case UNICODE_GRAPHEMEBREAK_LF:
case UNICODE_GRAPHEMEBREAK_Control:
return 1; /* GB4 */
default:
break;
}
switch (bc) {
case UNICODE_GRAPHEMEBREAK_CR:
case UNICODE_GRAPHEMEBREAK_LF:
case UNICODE_GRAPHEMEBREAK_Control:
return 1; /* GB5 */
default:
break;
}
if (ac == UNICODE_GRAPHEMEBREAK_L)
switch (bc) {
case UNICODE_GRAPHEMEBREAK_L:
case UNICODE_GRAPHEMEBREAK_V:
case UNICODE_GRAPHEMEBREAK_LV:
case UNICODE_GRAPHEMEBREAK_LVT:
return 0; /* GB6 */
}
if ((ac == UNICODE_GRAPHEMEBREAK_LV ||
ac == UNICODE_GRAPHEMEBREAK_V) &&
(bc == UNICODE_GRAPHEMEBREAK_V ||
bc == UNICODE_GRAPHEMEBREAK_T))
return 0; /* GB7 */
if ((ac == UNICODE_GRAPHEMEBREAK_LVT ||
ac == UNICODE_GRAPHEMEBREAK_T) &&
bc == UNICODE_GRAPHEMEBREAK_T)
return 0; /* GB8 */
if (bc == UNICODE_GRAPHEMEBREAK_Extend)
return 0; /* GB9 */
if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark)
return 0; /* GB9a */
if (ac == UNICODE_GRAPHEMEBREAK_Prepend)
return 0; /* GB9b */
return 1; /* GB10 */
}
|