summaryrefslogtreecommitdiffstats
path: root/unicode/unicode_categories.c
blob: 3c6e409d7e54aaa4b1b540744078e41c5c6672ef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/*
** Copyright 2015 Double Precision, Inc.
** See COPYING for distribution information.
**
*/

#include	"unicode_config.h"
#include	"courier-unicode.h"
#include	"categoriestab.h"
#include	"linebreaktab_internal.h"

uint32_t unicode_category_lookup(char32_t ch)
{
	return unicode_tab32_lookup(ch,
				    unicode_starting_indextab,
				    unicode_starting_pagetab,
				    sizeof(unicode_starting_indextab)/
				    sizeof(unicode_starting_indextab[0]),
				    unicode_rangetab,
				    sizeof(unicode_rangetab)/
				    sizeof(unicode_rangetab[0]),
				    unicode_classtab,
				    0);
}

int unicode_isspace(char32_t ch)
{
	switch (unicode_lb_lookup(ch)) {
	case UNICODE_LB_BK:
	case UNICODE_LB_CR:
	case UNICODE_LB_LF:
	case UNICODE_LB_NL:
	case UNICODE_LB_SP:
		return 1;
	}

	return unicode_isblank(ch);
}

int unicode_isblank(char32_t ch)
{
	if (ch == 9)
		return 1;

	if ((unicode_category_lookup(ch) & UNICODE_CATEGORY_2) ==
	    UNICODE_CATEGORY_2_SPACE)
		return 1;
	return 0;
}

int unicode_isalpha(char32_t ch)
{
	return (unicode_category_lookup(ch) & UNICODE_CATEGORY_1) ==
		UNICODE_CATEGORY_1_LETTER;
}

int unicode_isdigit(char32_t ch)
{
	return unicode_category_lookup(ch) ==
		(UNICODE_CATEGORY_1_NUMBER | UNICODE_CATEGORY_2_DIGIT);
}

int unicode_isalnum(char32_t ch)
{
	return unicode_isalpha(ch) || unicode_isdigit(ch);
}

int unicode_isgraph(char32_t ch)
{
	return (ch >= ' ' && !unicode_isspace(ch));
}

int unicode_ispunct(char32_t ch)
{
	return (unicode_category_lookup(ch) & UNICODE_CATEGORY_1) ==
		UNICODE_CATEGORY_1_PUNCTUATION;
}

int unicode_islower(char32_t ch)
{
	return unicode_isalpha(ch) && ch == unicode_lc(ch);
}

int unicode_isupper(char32_t ch)
{
	return unicode_isalpha(ch) && ch == unicode_uc(ch);
}