1 files changed, 418 insertions, 80 deletions
diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in
index 085f085..cc9dbbb 100644
--- a/unicode/courier-unicode.h.in
+++ b/unicode/courier-unicode.h.in
@@ -2,7 +2,7 @@
 #define	courier_unicode_h
 
 /*
-** Copyright 2000-2018 Double Precision, Inc.
+** Copyright 2000-2020 Double Precision, Inc.
 ** See COPYING for distribution information.
 **
 */
@@ -12,6 +12,7 @@
 #include <string>
 #include <vector>
 #include <list>
+#include <functional>
 
 extern "C" {
 #endif
@@ -40,7 +41,7 @@ typedef uint32_t char32_t;
 #endif
 #endif
 
-#define COURIER_UNICODE_VERSION 210
+#define COURIER_UNICODE_VERSION 220
 
 /*
 ** The system default character set, from the locale.
@@ -75,6 +76,24 @@ extern char32_t unicode_tc(char32_t);
 char32_t unicode_html40ent_lookup(const char *n);
 
 /*
+** East Asian Width lookup.
+**
+** unicode_eastasia looks up the EastAsianWidth property for the given
+** Unicode character.
+*/
+
+#define UNICODE_EASTASIA_A	'A'	/* Ambiguous */
+#define UNICODE_EASTASIA_F	'F'	/* Full width */
+#define UNICODE_EASTASIA_H	'H'	/* Half width */
+#define UNICODE_EASTASIA_N	'/'	/* Unassigned */
+#define UNICODE_EASTASIA_Na	'N'	/* Narrow */
+#define UNICODE_EASTASIA_W	'W'	/* Wide */
+
+typedef char unicode_eastasia_t;
+
+unicode_eastasia_t unicode_eastasia(char32_t);
+
+/*
 **
 ** Return "width" of unicode character.
 **
@@ -181,15 +200,66 @@ extern int unicode_islower(char32_t ch);
 extern int unicode_isupper(char32_t ch);
 
 /*
+** Implementation of Unicode emoji classification, as per
+** http://www.unicode.org/reports/tr51/tr51-18.html
+**
+** Given a char32_t, returns the character's emoji value, which is a bitmask:
+**
+*/
+
+#define UNICODE_EMOJI_NONE			0
+#define UNICODE_EMOJI				1
+#define UNICODE_EMOJI_PRESENTATION		2
+#define UNICODE_EMOJI_MODIFIER			4
+#define UNICODE_EMOJI_MODIFIER_BASE		8
+#define UNICODE_EMOJI_COMPONENT			16
+#define UNICODE_EMOJI_EXTENDED_PICTOGRAPHIC	32
+
+typedef unsigned char unicode_emoji_t;
+
+extern unicode_emoji_t unicode_emoji_lookup(char32_t);
+
+/* Look up just one of the properties, returns non-0 if the char has it */
+
+extern int unicode_emoji(char32_t);
+extern int unicode_emoji_presentation(char32_t);
+extern int unicode_emoji_modifier(char32_t);
+extern int unicode_emoji_modifier_base(char32_t);
+extern int unicode_emoji_component(char32_t);
+extern int unicode_emoji_extended_pictographic(char32_t);
+
+/*
 ** Implementation of grapheme cluster boundary rules, as per
-** http://www.unicode.org/reports/tr29/tr29-27.html
+** http://www.unicode.org/reports/tr29/tr29-37.html
 ** including  GB9a and GB9b.
 **
-** Returns non-zero if there's a grapheme break between the two referenced
-** characters.
+** unicode_grapheme_break_init() allocates an opaque
+** unicode_grapheme_break_info_t handle, and
+** unicode_grapheme_break_destroy() destroys it.
+**
+** Passing the handle to unicode_grapheme_break_next() returns non-0 if
+** there's a grapheme break before the given character (and after the
+** character that got passed in the previous call).
+**
+** The first call to unicode_grapheme_break_next() returns 1, as per GB1.
+**
+** unicode_grapheme_break() is a simplified interface that returns non-0
+** if there is a grapheme break between the two characters. This simplified
+** interface is equivalent to calling unicode_grapheme_break_init(),
+** followed by two calls to unicode_grapheme_break_next(), and finally
+** unicode_grapheme_break_deinit(), and returns the result of the second
+** call to unicode_grapheme_break_next().
 */
 
-int unicode_grapheme_break(char32_t a, char32_t b);
+struct unicode_grapheme_break_info_s;
+
+typedef struct unicode_grapheme_break_info_s *unicode_grapheme_break_info_t;
+
+extern unicode_grapheme_break_info_t unicode_grapheme_break_init();
+extern int unicode_grapheme_break_next(unicode_grapheme_break_info_t, char32_t);
+extern void unicode_grapheme_break_deinit(unicode_grapheme_break_info_t);
+
+extern int unicode_grapheme_break(char32_t a, char32_t b);
 
 typedef enum {
 
@@ -199,14 +269,14 @@ typedef enum {
 
 /*
 ** Look up the unicode script property, as per
-** http://www.unicode.org/reports/tr24/tr24-24.html
+** http://www.unicode.org/reports/tr24/tr24-31.html
 */
 
 unicode_script_t unicode_script(char32_t a);
 
 /*
 ** Implementation of line break rules, as per
-** http://www.unicode.org/reports/tr14/tr14-35.html
+** http://www.unicode.org/reports/tr14/tr14-45.html
 **
 ** Invoke unicode_lb_init() to initialize the linebreaking algorithm. The
 ** first parameter is a callback function that gets invoked with two
@@ -365,7 +435,7 @@ extern void unicode_lbc_set_opts(unicode_lbc_info_t i, int opts);
 
 /*
 ** Implemention of word break rules, as per
-** http://www.unicode.org/reports/tr29/tr29-27.html
+** http://www.unicode.org/reports/tr29/tr29-37.html
 **
 ** Invoke unicode_wb_init() to initialize the wordbreaking algorithm. The
 ** first parameter is a callback function that gets invoked with two
@@ -466,6 +536,154 @@ int unicode_wbscan_next(unicode_wbscan_info_t i, char32_t ch);
 
 size_t unicode_wbscan_end(unicode_wbscan_info_t i);
 
+/* Unicode directional markers */
+
+#define UNICODE_LRM	0x200E /* Left-to-right marker */
+#define UNICODE_RLM	0x200F /* Right-to-left marker */
+#define UNICODE_ALM	0x061C /* Right-to-left Arabic marker */
+#define UNICODE_LRI	0x2066 /* Left-to-right isolate */
+#define UNICODE_RLI	0x2067 /* Right-to-left isolate */
+#define UNICODE_PDI	0x2069 /* Pop isolate */
+#define UNICODE_RLO	0x202e /* Right-to-left override */
+#define UNICODE_LRO	0x202d /* Left-to-right override */
+#define UNICODE_PDF	0x202c /* Pop directional override */
+
+
+typedef char unicode_bidi_bracket_type_t;
+
+#define UNICODE_BIDI_n  'n'
+#define UNICODE_BIDI_o	'o'
+#define UNICODE_BIDI_c	'c'
+
+extern char32_t unicode_bidi_mirror(char32_t c);
+
+extern char32_t unicode_bidi_bracket_type(char32_t c,
+					  unicode_bidi_bracket_type_t *ret);
+
+
+typedef unsigned char unicode_bidi_level_t;
+
+#define UNICODE_BIDI_LR		((unicode_bidi_level_t)0)
+#define UNICODE_BIDI_RL		((unicode_bidi_level_t)1)
+#define UNICODE_BIDI_SKIP	((unicode_bidi_level_t)254)
+
+extern unicode_bidi_level_t unicode_bidi_calc(const char32_t *p, size_t n,
+					      unicode_bidi_level_t *bufp,
+					      const unicode_bidi_level_t *
+					      initial_embedding_level);
+
+extern void unicode_bidi_reorder(char32_t *p,
+				 unicode_bidi_level_t *levels,
+				 size_t n,
+				 void (*reorder_callback)(size_t, size_t,
+							  void *),
+				 void *arg);
+
+/* BIDI_TYPE_LIST */
+typedef enum {
+	      UNICODE_BIDI_TYPE_AL,
+	      UNICODE_BIDI_TYPE_AN,
+	      UNICODE_BIDI_TYPE_B,
+	      UNICODE_BIDI_TYPE_BN,
+	      UNICODE_BIDI_TYPE_CS,
+	      UNICODE_BIDI_TYPE_EN,
+	      UNICODE_BIDI_TYPE_ES,
+	      UNICODE_BIDI_TYPE_ET,
+	      UNICODE_BIDI_TYPE_FSI,
+	      UNICODE_BIDI_TYPE_L,
+	      UNICODE_BIDI_TYPE_LRE,
+	      UNICODE_BIDI_TYPE_LRI,
+	      UNICODE_BIDI_TYPE_LRO,
+	      UNICODE_BIDI_TYPE_NSM,
+	      UNICODE_BIDI_TYPE_ON,
+	      UNICODE_BIDI_TYPE_PDF,
+	      UNICODE_BIDI_TYPE_PDI,
+	      UNICODE_BIDI_TYPE_R,
+	      UNICODE_BIDI_TYPE_RLE,
+	      UNICODE_BIDI_TYPE_RLI,
+	      UNICODE_BIDI_TYPE_RLO,
+	      UNICODE_BIDI_TYPE_S,
+	      UNICODE_BIDI_TYPE_WS,
+} enum_bidi_type_t;
+
+extern enum_bidi_type_t unicode_bidi_type(char32_t c);
+
+extern size_t unicode_bidi_cleanup(char32_t *string,
+				   unicode_bidi_level_t *levels,
+				   size_t n,
+				   void (*removed_callback)(size_t, void *),
+				   void *);
+
+extern size_t unicode_bidi_extra_cleanup(char32_t *string,
+					 unicode_bidi_level_t *levels,
+					 size_t n,
+					 void (*removed_callback)(size_t,
+								  void *),
+					 void *);
+
+extern void unicode_bidi_logical_order(char32_t *string,
+				       unicode_bidi_level_t *levels,
+				       size_t n,
+				       unicode_bidi_level_t paragraph_embedding,
+				       void (*reorder_callback)(size_t, size_t,
+								void *),
+				       void *arg);
+
+extern void unicode_bidi_embed(const char32_t *string,
+			       const unicode_bidi_level_t *levels,
+			       size_t n,
+			       unicode_bidi_level_t paragraph_embedding,
+			       void (*emit)(const char32_t *string,
+					    size_t n,
+					    void *arg),
+			       void *arg);
+
+extern char32_t unicode_bidi_embed_paragraph_level(const char32_t *str,
+						   size_t n,
+						   unicode_bidi_level_t);
+
+/*
+** unicode_canonical() returns the canonical mapping of the given Unicode
+** character. The returned structure specifies:
+**
+** - A pointer to the canonical decomposition of the given character.
+** - Number of characters in the canonical decomposition.
+** - An optional formatting tag.
+**
+** A null pointer, and a 0 character count gets returned for characters
+** without a canonical decomposition.
+**
+*/
+
+typedef enum {
+	      UNICODE_CANONICAL_FMT_NONE=0,
+
+	      UNICODE_CANONICAL_FMT_CIRCLE,
+	      UNICODE_CANONICAL_FMT_COMPAT,
+	      UNICODE_CANONICAL_FMT_FINAL,
+	      UNICODE_CANONICAL_FMT_FONT,
+	      UNICODE_CANONICAL_FMT_FRACTION,
+	      UNICODE_CANONICAL_FMT_INITIAL,
+	      UNICODE_CANONICAL_FMT_ISOLATED,
+	      UNICODE_CANONICAL_FMT_MEDIAL,
+	      UNICODE_CANONICAL_FMT_NARROW,
+	      UNICODE_CANONICAL_FMT_NOBREAK,
+	      UNICODE_CANONICAL_FMT_SMALL,
+	      UNICODE_CANONICAL_FMT_SQUARE,
+	      UNICODE_CANONICAL_FMT_SUB,
+	      UNICODE_CANONICAL_FMT_SUPER,
+	      UNICODE_CANONICAL_FMT_VERTICAL,
+	      UNICODE_CANONICAL_FMT_WIDE,
+} unicode_canonical_fmt_t;
+
+typedef struct {
+	const char32_t *canonical_chars;
+	size_t n_canonical_chars;
+	unicode_canonical_fmt_t format;
+} unicode_canonical_t;
+
+extern unicode_canonical_t unicode_canonical(char32_t);
+
 /*
 ** A buffer that holds unicode characters, and dynamically grows as needed.
 */
@@ -1407,11 +1625,11 @@ class linebreak_callback_base {
 	int opts;
 
 #if __cplusplus >= 201103L
- public:
+public:
 	linebreak_callback_base(const linebreak_callback_base &)=delete;
 	linebreak_callback_base &operator=(const
 					   linebreak_callback_base &)=delete;
- private:
+private:
 #else
 	linebreak_callback_base(const linebreak_callback_base &);
 	/* NOT IMPLEMENTED */
@@ -1420,7 +1638,7 @@ class linebreak_callback_base {
 					   linebreak_callback_base &);
 	/* NOT IMPLEMENTED */
 #endif
- public:
+public:
 	linebreak_callback_base();
 	virtual ~linebreak_callback_base();
 
@@ -1433,8 +1651,8 @@ class linebreak_callback_base {
 	linebreak_callback_base &operator<<(char32_t uc);
 
 	template<typename iter_type>
-		linebreak_callback_base &operator()(iter_type beg_iter,
-						    iter_type end_iter)
+	linebreak_callback_base &operator()(iter_type beg_iter,
+					    iter_type end_iter)
 	{
 		while (beg_iter != end_iter)
 			operator<<(*beg_iter++);
@@ -1442,17 +1660,17 @@ class linebreak_callback_base {
 	}
 
 	template<typename container_type>
-		linebreak_callback_base &operator()(const container_type &vec)
+	linebreak_callback_base &operator()(const container_type &vec)
 	{
 		return operator()(vec.begin(), vec.end());
 	}
- private:
+private:
 	virtual int callback(int);
 };
 
 class linebreak_callback_save_buf : public linebreak_callback_base {
 
- public:
+public:
 	std::list<int> lb_buf;
 
 	linebreak_callback_save_buf();
@@ -1460,7 +1678,7 @@ class linebreak_callback_save_buf : public linebreak_callback_base {
 
 	using linebreak_callback_base::operator<<;
 	using linebreak_callback_base::operator();
- private:
+private:
 	int callback(int value);
 };
 
@@ -1470,7 +1688,7 @@ class linebreak_callback_save_buf : public linebreak_callback_base {
 */
 
 template<typename input_t> class linebreak_iter
-: public std::iterator<std::input_iterator_tag, int, void>
+	: public std::iterator<std::input_iterator_tag, int, void>
 {
 	mutable input_t iter_value, end_iter_value;
 
@@ -1500,16 +1718,16 @@ template<typename input_t> class linebreak_iter
 
 	mutable value_type bufvalue;
 
- public:
- linebreak_iter(const input_t &iter_valueArg,
-		const input_t &iter_endvalueArg)
-	 : iter_value(iter_valueArg),
-		end_iter_value(iter_endvalueArg),
-		buf(new linebreak_callback_save_buf)
-		{
-		}
+public:
+	linebreak_iter(const input_t &iter_valueArg,
+		       const input_t &iter_endvalueArg)
+		: iter_value(iter_valueArg),
+		  end_iter_value(iter_endvalueArg),
+		  buf(new linebreak_callback_save_buf)
+	{
+	}
 
- linebreak_iter() : buf(NULL)
+	linebreak_iter() : buf(NULL)
 	{
 	}
 
@@ -1525,23 +1743,23 @@ template<typename input_t> class linebreak_iter
 			delete buf;
 	}
 
- linebreak_iter(const linebreak_iter<input_t> &v)
-	 : buf(NULL)
+	linebreak_iter(const linebreak_iter<input_t> &v)
+		: buf(NULL)
 	{
 		operator=(v);
 	}
 
 	linebreak_iter<input_t> &operator=(const
 					   linebreak_iter<input_t> &v)
-		{
-			if (buf)
-				delete buf;
-			buf=v.buf;
-			iter_value=v.iter_value;
-			end_iter_value=v.end_iter_value;
-			v.buf=NULL;
-			return *this;
-		}
+	{
+		if (buf)
+			delete buf;
+		buf=v.buf;
+		iter_value=v.iter_value;
+		end_iter_value=v.end_iter_value;
+		v.buf=NULL;
+		return *this;
+	}
 
 	bool operator==(const linebreak_iter<input_t> &v) const
 	{
@@ -1591,14 +1809,14 @@ class linebreakc_callback_base {
 	int opts;
 
 #if __cplusplus >= 201103L
- public:
+public:
 	linebreakc_callback_base(const linebreakc_callback_base &)
-		=delete;
+	=delete;
 
 	linebreakc_callback_base &operator=(const
 					    linebreakc_callback_base
 					    &)=delete;
- private:
+private:
 #else
 	linebreakc_callback_base(const linebreakc_callback_base &);
 	/* NOT IMPLEMENTED */
@@ -1609,7 +1827,7 @@ class linebreakc_callback_base {
 	/* NOT IMPLEMENTED */
 #endif
 
- public:
+public:
 	linebreakc_callback_base();
 	virtual ~linebreakc_callback_base();
 
@@ -1622,8 +1840,8 @@ class linebreakc_callback_base {
 	linebreakc_callback_base &operator<<(char32_t uc);
 
 	template<typename iter_type>
-		linebreakc_callback_base &operator()(iter_type beg_iter,
-						     iter_type end_iter)
+	linebreakc_callback_base &operator()(iter_type beg_iter,
+					     iter_type end_iter)
 	{
 		while (beg_iter != end_iter)
 			operator<<(*beg_iter++);
@@ -1636,13 +1854,13 @@ class linebreakc_callback_base {
 	{
 		return operator()(vec.begin(), vec.end());
 	}
- private:
+private:
 	virtual int callback(int, char32_t);
 };
 
 class linebreakc_callback_save_buf : public linebreakc_callback_base {
 
- public:
+public:
 	std::list<std::pair<int, char32_t> > lb_buf;
 
 	linebreakc_callback_save_buf();
@@ -1650,7 +1868,7 @@ class linebreakc_callback_save_buf : public linebreakc_callback_base {
 
 	using linebreakc_callback_base::operator<<;
 	using linebreakc_callback_base::operator();
- private:
+private:
 	int callback(int, char32_t);
 };
 
@@ -1663,8 +1881,8 @@ class linebreakc_callback_save_buf : public linebreakc_callback_base {
 */
 
 template<typename input_t> class linebreakc_iter
-: public std::iterator<std::input_iterator_tag,
-	std::pair<int, char32_t>, void>
+	: public std::iterator<std::input_iterator_tag,
+			       std::pair<int, char32_t>, void>
 {
 	mutable input_t iter_value, end_iter_value;
 
@@ -1695,16 +1913,16 @@ template<typename input_t> class linebreakc_iter
 
 	mutable value_type bufvalue;
 
- public:
- linebreakc_iter(const input_t &iter_valueArg,
-		 const input_t &iter_endvalueArg)
-	 : iter_value(iter_valueArg),
-		end_iter_value(iter_endvalueArg),
-		buf(new linebreakc_callback_save_buf)
-		{
-		}
+public:
+	linebreakc_iter(const input_t &iter_valueArg,
+			const input_t &iter_endvalueArg)
+		: iter_value(iter_valueArg),
+		  end_iter_value(iter_endvalueArg),
+		  buf(new linebreakc_callback_save_buf)
+	{
+	}
 
- linebreakc_iter() : buf(NULL)
+	linebreakc_iter() : buf(NULL)
 	{
 	}
 
@@ -1714,23 +1932,23 @@ template<typename input_t> class linebreakc_iter
 			delete buf;
 	}
 
- linebreakc_iter(const linebreakc_iter<input_t> &v)
-	 : buf(NULL)
+	linebreakc_iter(const linebreakc_iter<input_t> &v)
+		: buf(NULL)
 	{
 		operator=(v);
 	}
 
 	linebreakc_iter<input_t> &operator=(const
 					    linebreakc_iter<input_t> &v)
-		{
-			if (buf)
-				delete buf;
-			buf=v.buf;
-			iter_value=v.iter_value;
-			end_iter_value=v.end_iter_value;
-			v.buf=NULL;
-			return *this;
-		}
+	{
+		if (buf)
+			delete buf;
+		buf=v.buf;
+		iter_value=v.iter_value;
+		end_iter_value=v.end_iter_value;
+		v.buf=NULL;
+		return *this;
+	}
 
 	bool operator==(const linebreakc_iter<input_t> &v) const
 	{
@@ -1786,13 +2004,13 @@ class wordbreak_callback_base {
 	unicode_wb_info_t handle;
 
 #if __cplusplus >= 201103L
- public:
+public:
 	wordbreak_callback_base(const wordbreak_callback_base &)=delete;
 
 	wordbreak_callback_base &operator=(const
 					   wordbreak_callback_base &)
-		=delete;
- private:
+	=delete;
+private:
 #else
 	wordbreak_callback_base(const wordbreak_callback_base &);
 	/* NOT IMPLEMENTED */
@@ -1801,7 +2019,7 @@ class wordbreak_callback_base {
 					   wordbreak_callback_base &);
 	/* NOT IMPLEMENTED */
 #endif
- public:
+public:
 	wordbreak_callback_base();
 	virtual ~wordbreak_callback_base();
 
@@ -1812,8 +2030,8 @@ class wordbreak_callback_base {
 	wordbreak_callback_base &operator<<(char32_t uc);
 
 	template<typename iter_type>
-		wordbreak_callback_base &operator()(iter_type beg_iter,
-						    iter_type end_iter)
+	wordbreak_callback_base &operator()(iter_type beg_iter,
+					    iter_type end_iter)
 	{
 		while (beg_iter != end_iter)
 			operator<<(*beg_iter++);
@@ -1826,7 +2044,7 @@ class wordbreak_callback_base {
 	{
 		return operator()(vec.begin(), vec.end());
 	}
- private:
+private:
 	virtual int callback(bool);
 };
 
@@ -1839,10 +2057,10 @@ class wordbreakscan {
 	unicode_wbscan_info_t handle;
 
 #if __cplusplus >= 201103L
- public:
+public:
 	wordbreakscan(const wordbreakscan &)=delete;
 	wordbreakscan &operator=(const wordbreakscan &)=delete;
- private:
+private:
 #else
 	wordbreakscan(const wordbreakscan &);
 	/* NOT IMPLEMENTED */
@@ -1850,7 +2068,7 @@ class wordbreakscan {
 	wordbreakscan &operator=(const wordbreakscan &);
 	/* NOT IMPLEMENTED */
 #endif
- public:
+public:
 
 	wordbreakscan();
 	~wordbreakscan();
@@ -1886,6 +2104,126 @@ std::u32string tolower(const std::u32string &u);
 
 std::u32string toupper(const std::u32string &u);
 
+//! Calculate bidirectional embedding levels
+
+//! Returns the bidirectional embedding levels, and the paragraph
+//! embedding level.
+
+std::tuple<std::vector<unicode_bidi_level_t>,
+	   unicode_bidi_level_t> bidi_calc(const std::u32string &s);
+
+//! Calculate bidirectional embedding levels
+
+//! Overload calculates the embedding levels using a predetermined
+//! paragraph embedding level.
+//!
+//! Returns the bidirectional embedding levels, and the same paragraph
+//! embedding level.
+
+std::tuple<std::vector<unicode_bidi_level_t>,
+	   unicode_bidi_level_t> bidi_calc(const std::u32string &s,
+					   unicode_bidi_level_t level);
+
+//! Reorder bidirectional text
+
+//! Reorders the string and levels in place.
+//!
+//! Non-0 return value indicates the string and levels' sizes do not match.
+
+int bidi_reorder(std::u32string &string,
+		 std::vector<unicode_bidi_level_t> &levels,
+		 const std::function<void (size_t, size_t)>
+		 &reorder_callback=[](size_t, size_t){});
+
+//! Dry-run reorder bidirectional text
+void bidi_reorder(std::vector<unicode_bidi_level_t> &levels,
+		  const std::function<void (size_t, size_t)>
+		  &reorder_callback=[](size_t, size_t){});
+
+//! Remove directional markers
+
+//! Removes them from the string, in place. Optional lambda gets notified
+//! of the index (in the original string, of each removed marker.
+
+void bidi_cleanup(std::u32string &string,
+		  const std::function<void (size_t)> &removed_callback=
+		  [](size_t) {});
+
+//! Also remove them from the embedding direction level buffer.
+
+//! Returns non-0 in case of non-matching level buffer size.
+
+int bidi_cleanup(std::u32string &string,
+		 std::vector<unicode_bidi_level_t> &levels,
+		 const std::function<void (size_t)> &removed_callback=
+		  [](size_t) {});
+
+
+//! Remove directional markers and isolation markers.
+
+//! Removes them from the string, in place. Optional lambda gets notified
+//! of the index (in the original string, of each removed marker.
+
+void bidi_extra_cleanup(std::u32string &string,
+			const std::function<void (size_t)>
+			&removed_callback=
+			[](size_t) {});
+
+//! Also remove them from the embedding direction level buffer.
+
+//! Returns non-0 in case of non-matching level buffer size.
+
+int bidi_extra_cleanup(std::u32string &string,
+		       std::vector<unicode_bidi_level_t> &levels,
+		       const std::function<void (size_t)>
+		       &removed_callback=
+		       [](size_t) {});
+
+//! Convert Unicode string from canonical rendering order to logical order.
+int bidi_logical_order(std::u32string &string,
+		       std::vector<unicode_bidi_level_t> &levels,
+		       unicode_bidi_level_t paragraph_embedding,
+		       const std::function<void (size_t, size_t)>
+		       &lambda=[](size_t,size_t){});
+
+//! Convert Unicode string from canonical rendering order to logical order.
+void bidi_logical_order(std::vector<unicode_bidi_level_t> &levels,
+			unicode_bidi_level_t paragraph_embedding,
+			const std::function<void (size_t, size_t)>
+			&lambda);
+
+//! Embed directional and isolation markers
+
+//! Non-0 return value indicates the string and levels' sizes do not match.
+//!
+//! The lambda gets called repeatedly, to specify the contents of the
+//! string with embedded direction markers.
+
+int bidi_embed(const std::u32string &string,
+	       const std::vector<unicode_bidi_level_t> &levels,
+	       unicode_bidi_level_t paragraph_embedding,
+	       const std::function<void (const char32_t *string,
+					 size_t n)> &lambda);
+
+//! Embed directional and isolation markers
+
+//! \overload
+//!
+//! Provides a lambda that collects the new string, and returns it. An
+//! empty string gets returned if the string and levels' sizes do not match.
+
+std::u32string bidi_embed(const std::u32string &string,
+			  const std::vector<unicode_bidi_level_t> &levels,
+			  unicode_bidi_level_t paragraph_embedding);
+
+//! Check if a directional marker needs to be inserted
+
+//! In order for the unicode string to have the specified default
+//! paragraph embedding level.
+
+extern char32_t bidi_embed_paragraph_level(const std::u32string &string,
+					   unicode_bidi_level_t level);
+
 #if 0
 {
 #endif