summaryrefslogtreecommitdiffstats
path: root/unicode/unicode_graphemebreak.c
blob: 5939dc8cd8bdb74d6e71fbeb28dc188006633e18 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/*
** Copyright 2011 Double Precision, Inc.
** See COPYING for distribution information.
**
*/

#include	"unicode_config.h"
#include	"unicode.h"
#include	<unistd.h>
#include	<stdint.h>
#include	<stdlib.h>

#define UNICODE_GRAPHEMEBREAK_ANY		0x00
#define UNICODE_GRAPHEMEBREAK_CR		0x01
#define UNICODE_GRAPHEMEBREAK_LF		0x02
#define UNICODE_GRAPHEMEBREAK_Control		0x03
#define UNICODE_GRAPHEMEBREAK_Extend		0x04
#define UNICODE_GRAPHEMEBREAK_Prepend		0x05
#define UNICODE_GRAPHEMEBREAK_SpacingMark	0x06
#define UNICODE_GRAPHEMEBREAK_L			0x07
#define UNICODE_GRAPHEMEBREAK_V			0x08
#define UNICODE_GRAPHEMEBREAK_T			0x09
#define UNICODE_GRAPHEMEBREAK_LV		0x0A
#define UNICODE_GRAPHEMEBREAK_LVT		0x0B

#include "graphemebreaktab.h"

int unicode_grapheme_break(unicode_char a, unicode_char b)
{
	uint8_t ac=unicode_tab_lookup(a, unicode_indextab,
			 sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
			 unicode_rangetab,
			 unicode_classtab,
			 UNICODE_GRAPHEMEBREAK_ANY),
		bc=unicode_tab_lookup(b, unicode_indextab,
			 sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
			 unicode_rangetab,
			 unicode_classtab,
			 UNICODE_GRAPHEMEBREAK_ANY);

	/* GB1 and GB2 are implied */

	if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF)
		return 0; /* GB3 */


	switch (ac) {
	case UNICODE_GRAPHEMEBREAK_CR:
	case UNICODE_GRAPHEMEBREAK_LF:
	case UNICODE_GRAPHEMEBREAK_Control:
		return 1; /* GB4 */
	default:
		break;
	}

	switch (bc) {
	case UNICODE_GRAPHEMEBREAK_CR:
	case UNICODE_GRAPHEMEBREAK_LF:
	case UNICODE_GRAPHEMEBREAK_Control:
		return 1; /* GB5 */
	default:
		break;
	}

	if (ac == UNICODE_GRAPHEMEBREAK_L)
		switch (bc) {
		case UNICODE_GRAPHEMEBREAK_L:
		case UNICODE_GRAPHEMEBREAK_V:
		case UNICODE_GRAPHEMEBREAK_LV:
		case UNICODE_GRAPHEMEBREAK_LVT:
			return 0; /* GB6 */
		}

	if ((ac == UNICODE_GRAPHEMEBREAK_LV ||
	     ac == UNICODE_GRAPHEMEBREAK_V) &&
	    (bc == UNICODE_GRAPHEMEBREAK_V ||
	     bc == UNICODE_GRAPHEMEBREAK_T))
		return 0; /* GB7 */

	if ((ac == UNICODE_GRAPHEMEBREAK_LVT ||
	     ac == UNICODE_GRAPHEMEBREAK_T) &&
	    bc == UNICODE_GRAPHEMEBREAK_T)
		return 0; /* GB8 */

	if (bc == UNICODE_GRAPHEMEBREAK_Extend)
		return 0; /* GB9 */

	if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark)
		return 0; /* GB9a */

	if (ac == UNICODE_GRAPHEMEBREAK_Prepend)
		return 0; /* GB9b */

	return 1; /* GB10 */
}