diff options
Diffstat (limited to 'unicode/courier-unicode.h.in')
| -rw-r--r-- | unicode/courier-unicode.h.in | 83 |
1 files changed, 76 insertions, 7 deletions
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 085f085..4bcd935 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -75,6 +75,24 @@ extern char32_t unicode_tc(char32_t); char32_t unicode_html40ent_lookup(const char *n); /* +** East Asian Width lookup. +** +** unicode_eastasia looks up the EastAsianWidth property for the given +** Unicode character. +*/ + +#define UNICODE_EASTASIA_A 'A' /* Ambiguous */ +#define UNICODE_EASTASIA_F 'F' /* Full width */ +#define UNICODE_EASTASIA_H 'H' /* Half width */ +#define UNICODE_EASTASIA_N '/' /* Unassigned */ +#define UNICODE_EASTASIA_Na 'N' /* Narrow */ +#define UNICODE_EASTASIA_W 'W' /* Wide */ + +typedef char unicode_eastasia_t; + +unicode_eastasia_t unicode_eastasia(char32_t); + +/* ** ** Return "width" of unicode character. ** @@ -181,15 +199,66 @@ extern int unicode_islower(char32_t ch); extern int unicode_isupper(char32_t ch); /* +** Implementation of Unicode emoji classification, as per +** http://www.unicode.org/reports/tr51/tr51-18.html +** +** Given a char32_t, returns the character's emoji value, which is a bitmask: +** +*/ + +#define UNICODE_EMOJI_NONE 0 +#define UNICODE_EMOJI 1 +#define UNICODE_EMOJI_PRESENTATION 2 +#define UNICODE_EMOJI_MODIFIER 4 +#define UNICODE_EMOJI_MODIFIER_BASE 8 +#define UNICODE_EMOJI_COMPONENT 16 +#define UNICODE_EMOJI_EXTENDED_PICTOGRAPHIC 32 + +typedef unsigned char unicode_emoji_t; + +extern unicode_emoji_t unicode_emoji_lookup(char32_t); + +/* Look up just one of the properties, returns non-0 if the char has it */ + +extern int unicode_emoji(char32_t); +extern int unicode_emoji_presentation(char32_t); +extern int unicode_emoji_modifier(char32_t); +extern int unicode_emoji_modifier_base(char32_t); +extern int unicode_emoji_component(char32_t); +extern int unicode_emoji_extended_pictographic(char32_t); + +/* ** Implementation of grapheme cluster boundary rules, as per -** http://www.unicode.org/reports/tr29/tr29-27.html +** http://www.unicode.org/reports/tr29/tr29-37.html ** including GB9a and GB9b. ** -** Returns non-zero if there's a grapheme break between the two referenced -** characters. +** unicode_grapheme_break_init() allocates an opaque +** unicode_grapheme_break_info_t handle, and +** unicode_grapheme_break_destroy() destroys it. +** +** Passing the handle to unicode_grapheme_break_next() returns non-0 if +** there's a grapheme break before the given character (and after the +** character that got passed in the previous call). +** +** The first call to unicode_grapheme_break_next() returns 1, as per GB1. +** +** unicode_grapheme_break() is a simplified interface that returns non-0 +** if there is a grapheme break between the two characters. This simplified +** interface is equivalent to calling unicode_grapheme_break_init(), +** followed by two calls to unicode_grapheme_break_next(), and finally +** unicode_grapheme_break_deinit(), and returns the result of the second +** call to unicode_grapheme_break_next(). */ -int unicode_grapheme_break(char32_t a, char32_t b); +struct unicode_grapheme_break_info_s; + +typedef struct unicode_grapheme_break_info_s *unicode_grapheme_break_info_t; + +extern unicode_grapheme_break_info_t unicode_grapheme_break_init(); +extern int unicode_grapheme_break_next(unicode_grapheme_break_info_t, char32_t); +extern void unicode_grapheme_break_deinit(unicode_grapheme_break_info_t); + +extern int unicode_grapheme_break(char32_t a, char32_t b); typedef enum { @@ -199,14 +268,14 @@ typedef enum { /* ** Look up the unicode script property, as per -** http://www.unicode.org/reports/tr24/tr24-24.html +** http://www.unicode.org/reports/tr24/tr24-31.html */ unicode_script_t unicode_script(char32_t a); /* ** Implementation of line break rules, as per -** http://www.unicode.org/reports/tr14/tr14-35.html +** http://www.unicode.org/reports/tr14/tr14-45.html ** ** Invoke unicode_lb_init() to initialize the linebreaking algorithm. The ** first parameter is a callback function that gets invoked with two @@ -365,7 +434,7 @@ extern void unicode_lbc_set_opts(unicode_lbc_info_t i, int opts); /* ** Implemention of word break rules, as per -** http://www.unicode.org/reports/tr29/tr29-27.html +** http://www.unicode.org/reports/tr29/tr29-37.html ** ** Invoke unicode_wb_init() to initialize the wordbreaking algorithm. The ** first parameter is a callback function that gets invoked with two |
