summaryrefslogtreecommitdiffstats
path: root/unicode/courier-unicode.h.in
diff options
context:
space:
mode:
Diffstat (limited to 'unicode/courier-unicode.h.in')
-rw-r--r--unicode/courier-unicode.h.in83
1 files changed, 76 insertions, 7 deletions
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 085f085..4bcd935 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -75,6 +75,24 @@ extern char32_t unicode_tc(char32_t);
char32_t unicode_html40ent_lookup(const char *n);
/*
+** East Asian Width lookup.
+**
+** unicode_eastasia looks up the EastAsianWidth property for the given
+** Unicode character.
+*/
+
+#define UNICODE_EASTASIA_A 'A' /* Ambiguous */
+#define UNICODE_EASTASIA_F 'F' /* Full width */
+#define UNICODE_EASTASIA_H 'H' /* Half width */
+#define UNICODE_EASTASIA_N '/' /* Unassigned */
+#define UNICODE_EASTASIA_Na 'N' /* Narrow */
+#define UNICODE_EASTASIA_W 'W' /* Wide */
+
+typedef char unicode_eastasia_t;
+
+unicode_eastasia_t unicode_eastasia(char32_t);
+
+/*
**
** Return "width" of unicode character.
**
@@ -181,15 +199,66 @@ extern int unicode_islower(char32_t ch);
extern int unicode_isupper(char32_t ch);
/*
+** Implementation of Unicode emoji classification, as per
+** http://www.unicode.org/reports/tr51/tr51-18.html
+**
+** Given a char32_t, returns the character's emoji value, which is a bitmask:
+**
+*/
+
+#define UNICODE_EMOJI_NONE 0
+#define UNICODE_EMOJI 1
+#define UNICODE_EMOJI_PRESENTATION 2
+#define UNICODE_EMOJI_MODIFIER 4
+#define UNICODE_EMOJI_MODIFIER_BASE 8
+#define UNICODE_EMOJI_COMPONENT 16
+#define UNICODE_EMOJI_EXTENDED_PICTOGRAPHIC 32
+
+typedef unsigned char unicode_emoji_t;
+
+extern unicode_emoji_t unicode_emoji_lookup(char32_t);
+
+/* Look up just one of the properties, returns non-0 if the char has it */
+
+extern int unicode_emoji(char32_t);
+extern int unicode_emoji_presentation(char32_t);
+extern int unicode_emoji_modifier(char32_t);
+extern int unicode_emoji_modifier_base(char32_t);
+extern int unicode_emoji_component(char32_t);
+extern int unicode_emoji_extended_pictographic(char32_t);
+
+/*
** Implementation of grapheme cluster boundary rules, as per
-** http://www.unicode.org/reports/tr29/tr29-27.html
+** http://www.unicode.org/reports/tr29/tr29-37.html
** including GB9a and GB9b.
**
-** Returns non-zero if there's a grapheme break between the two referenced
-** characters.
+** unicode_grapheme_break_init() allocates an opaque
+** unicode_grapheme_break_info_t handle, and
+** unicode_grapheme_break_destroy() destroys it.
+**
+** Passing the handle to unicode_grapheme_break_next() returns non-0 if
+** there's a grapheme break before the given character (and after the
+** character that got passed in the previous call).
+**
+** The first call to unicode_grapheme_break_next() returns 1, as per GB1.
+**
+** unicode_grapheme_break() is a simplified interface that returns non-0
+** if there is a grapheme break between the two characters. This simplified
+** interface is equivalent to calling unicode_grapheme_break_init(),
+** followed by two calls to unicode_grapheme_break_next(), and finally
+** unicode_grapheme_break_deinit(), and returns the result of the second
+** call to unicode_grapheme_break_next().
*/
-int unicode_grapheme_break(char32_t a, char32_t b);
+struct unicode_grapheme_break_info_s;
+
+typedef struct unicode_grapheme_break_info_s *unicode_grapheme_break_info_t;
+
+extern unicode_grapheme_break_info_t unicode_grapheme_break_init();
+extern int unicode_grapheme_break_next(unicode_grapheme_break_info_t, char32_t);
+extern void unicode_grapheme_break_deinit(unicode_grapheme_break_info_t);
+
+extern int unicode_grapheme_break(char32_t a, char32_t b);
typedef enum {
@@ -199,14 +268,14 @@ typedef enum {
/*
** Look up the unicode script property, as per
-** http://www.unicode.org/reports/tr24/tr24-24.html
+** http://www.unicode.org/reports/tr24/tr24-31.html
*/
unicode_script_t unicode_script(char32_t a);
/*
** Implementation of line break rules, as per
-** http://www.unicode.org/reports/tr14/tr14-35.html
+** http://www.unicode.org/reports/tr14/tr14-45.html
**
** Invoke unicode_lb_init() to initialize the linebreaking algorithm. The
** first parameter is a callback function that gets invoked with two
@@ -365,7 +434,7 @@ extern void unicode_lbc_set_opts(unicode_lbc_info_t i, int opts);
/*
** Implemention of word break rules, as per
-** http://www.unicode.org/reports/tr29/tr29-27.html
+** http://www.unicode.org/reports/tr29/tr29-37.html
**
** Invoke unicode_wb_init() to initialize the wordbreaking algorithm. The
** first parameter is a callback function that gets invoked with two