diff options
| author | Sam Varshavchik | 2021-01-06 22:05:50 -0500 | 
|---|---|---|
| committer | Sam Varshavchik | 2021-01-06 22:05:53 -0500 | 
| commit | 23fffe52808157e36795af52266cc27ac03cbcb9 (patch) | |
| tree | f0c62446b3f5d1dc3fbe623206719dee42ac36c5 | |
| parent | f6d00ac939f7c9c63eca8dbb2a237b3aa24bc85b (diff) | |
| download | courier-libs-23fffe52808157e36795af52266cc27ac03cbcb9.tar.bz2 | |
courier-unicode: implement bidi get_direction.
| -rw-r--r-- | unicode/Makefile.am | 2 | ||||
| -rw-r--r-- | unicode/biditest2.C | 61 | ||||
| -rw-r--r-- | unicode/book.xml | 58 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 27 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 32 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 15 | 
6 files changed, 183 insertions, 12 deletions
| diff --git a/unicode/Makefile.am b/unicode/Makefile.am index 135617a..7ba36f1 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -95,6 +95,7 @@ man_MANS= \          $(srcdir)/man/unicode\:\:bidi_cleanup.3 \          $(srcdir)/man/unicode\:\:bidi_embed.3 \          $(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \ +        $(srcdir)/man/unicode\:\:bidi_get_direction.3 \          $(srcdir)/man/unicode\:\:bidi_logical_order.3 \          $(srcdir)/man/unicode\:\:bidi_reorder.3 \          $(srcdir)/man/unicode\:\:iconvert\:\:convert.3 \ @@ -119,6 +120,7 @@ man_MANS= \          $(srcdir)/man/unicode_bidi_calc_levels.3 \          $(srcdir)/man/unicode_bidi_calc_types.3 \          $(srcdir)/man/unicode_bidi_cleanup.3 \ +        $(srcdir)/man/unicode_bidi_direction.3 \          $(srcdir)/man/unicode_bidi_embed.3 \          $(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \          $(srcdir)/man/unicode_bidi_logical_order.3 \ diff --git a/unicode/biditest2.C b/unicode/biditest2.C index 0e0472e..8e9d7da 100644 --- a/unicode/biditest2.C +++ b/unicode/biditest2.C @@ -533,6 +533,65 @@ void partial_reorder_cleanup()  			      0, 3);  } +void null_character_test() +{ +	std::u32string s{{0}}; + +	auto res=unicode::bidi_calc(s); + +	unicode::bidi_reorder(s, std::get<0>(res)); + +	unicode::bidi_cleanup(s, std::get<0>(res), +			      [] +			      (size_t) +			      { +			      }, +			      UNICODE_BIDI_CLEANUP_EXTRA, +			      0, 3); +} + +void direction_test() +{ +	static const struct { +		const char32_t *str; +		unicode_bidi_level_t direction; +		int is_explicit; +	} tests[]={ +		{ +			U"Hello", +			UNICODE_BIDI_LR, +			1, +		}, +		{ +			U" ", +			UNICODE_BIDI_LR, +			0, +		}, +		{ +			U"", +			UNICODE_BIDI_LR, +			0, +		}, +		{ +			U"שלום", +			UNICODE_BIDI_RL, +			1, +		}, +	}; + +	for (const auto &t:tests) +	{ +		auto ret=unicode::bidi_get_direction(t.str); + +		if (ret.direction != t.direction || +		    ret.is_explicit != t.is_explicit) +		{ +			std::cerr << "direction_test failed\n"; +			exit(1); +		} +	} +} +  int main(int argc, char **argv)  {  	DEBUGDUMP=fopen("/dev/null", "w"); @@ -543,7 +602,9 @@ int main(int argc, char **argv)  	}  	exception_test();  	partial_reorder_cleanup(); +	null_character_test();  	latin_test();  	character_test(); +	direction_test();  	return 0;  } diff --git a/unicode/book.xml b/unicode/book.xml index 0275058..c3ebc33 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -310,6 +310,7 @@ See COPYING for distribution information.  	  <refname>unicode_bidi_embed</refname>  	  <refname>unicode_bidi_embed_paragraph_level</refname> +	  <refname>unicode_bidi_direction</refname>  	  <refname>unicode_bidi_type</refname>  	  <refname>unicode_bidi_setbnl</refname>  	  <refname>unicode_bidi_mirror</refname> @@ -404,6 +405,12 @@ See COPYING for distribution information.  	    </funcprototype>  	    <funcprototype> +	      <funcdef>struct unicode_bidi_direction <function>unicode_bidi_get_direction</function></funcdef> +              <paramdef>char32_t *<parameter>c</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype>  	      <funcdef>enum_bidi_type_t <function>unicode_bidi_type</function></funcdef>                <paramdef>char32_t <parameter>c</parameter></paramdef>  	    </funcprototype> @@ -996,6 +1003,40 @@ See COPYING for distribution information.  	    <title>Miscellaneous utility functions</title>  	    <para> +	      <function>unicode_bidi_get_direction</function> +	      takes a pointer to a unicode string, the number of +	      characters in the unicode string, and determines +	      default paragraph level level. +	      <function>unicode_bidi_get_direction</function> returns +	      a <literal>struct</literal> with the following fields: +	    </para> +	    <variablelist> +	      <varlistentry> +		<term><varname>direction</varname></term> +		<listitem> +		  <para> +		    This value is either <literal>UNICODE_BIDI_LR</literal> +		    or <literal>UNICODE_BIDI_RL</literal> (left to right or +		    right to left). +		  </para> +		</listitem> +	      </varlistentry> + +	      <varlistentry> +		<term><varname>is_explicit</varname></term> +		<listitem> +		  <para> +		    This value is a flag. A non-0 value indicates that +		    the embedding level was derived from an explicit +		    character type (<literal>L</literal>, <literal>R</literal> +		    or <literal>AL</literal>) from the stirng. A 0 value +		    indicates the default paragraph direction, no explicit +		    character was found in the string. +		  </para> +		</listitem> +	      </varlistentry> +	    </variablelist> +	    <para>  	      <function>unicode_bidi_type</function>  	      looks up each character's bi-directional character type.  	    </para> @@ -2798,6 +2839,7 @@ See COPYING for distribution information.  	  <refname>unicode::bidi_logical_order</refname>  	  <refname>unicode::bidi_embed</refname>  	  <refname>unicode::bidi_embed_paragraph_level</refname> +	  <refname>unicode::bidi_get_direction</refname>  	  <refpurpose>unicode bi-directional algorithm</refpurpose>  	</refnamediv> @@ -2920,6 +2962,13 @@ See COPYING for distribution information.                <paramdef>const std::u32string &<parameter>string</parameter></paramdef>                <paramdef>unicode_bidi_level_t <parameter>paragraph_embedding</parameter></paramdef>  	    </funcprototype> + +	    <funcprototype> +	      <funcdef>unicode_bidi_direction <function>bidi_get_direction</function></funcdef> +              <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>size_t <parameter>starting_pos</parameter>=0</paramdef> +	      <paramdef>size_t <parameter>n</parameter>=(size_t)-1</paramdef> +	    </funcprototype>            </funcsynopsis>  	</refsynopsisdiv> @@ -3056,13 +3105,14 @@ auto [levels, level]=unicode::bidi_calc(types);  	    <listitem>  	      <para>  		<function>unicode::bidi_reorder</function>, -		<function>unicode::bidi_cleanup</function>, and -		<function>unicode::bidi_logical_order</function> +		<function>unicode::bidi_cleanup</function>, +		<function>unicode::bidi_logical_order</function> and +		<function>unicode::bidi_get_direction</function>  		take two optional  		parameters (defaulted values or overloaded) specifying  		an optional starting position and number of characters that -		define a subset of the original string that gets reordered -		or cleaned up. +		define a subset of the original string that gets reordered, +		cleaned up, or has its direction determined.  	      </para>  	      <para> diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 1c7245c..57603da 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -2,7 +2,7 @@  #define	courier_unicode_h  /* -** Copyright 2000-2020 Double Precision, Inc. +** Copyright 2000-2021 Double Precision, Inc.  ** See COPYING for distribution information.  **  */ @@ -585,6 +585,25 @@ typedef unsigned char unicode_bidi_level_t;  #define UNICODE_BIDI_RL		((unicode_bidi_level_t)1)  #define UNICODE_BIDI_SKIP	((unicode_bidi_level_t)254) +/* +** What unicode_bidi_direction returns. +*/ + +struct unicode_bidi_direction { + +	/* Direction of the given text */ +	unicode_bidi_level_t direction; + +	/* +	** The direction is explicit, if not direction is UNICODE_BIDI_LR by +	** default. +	*/ +	int is_explicit; +}; + +struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *p, +							 size_t n); +  extern unicode_bidi_level_t unicode_bidi_calc(const char32_t *p, size_t n,  					      unicode_bidi_level_t *bufp,  					      const unicode_bidi_level_t * @@ -2336,6 +2355,12 @@ std::u32string bidi_embed(const std::u32string &string,  extern char32_t bidi_embed_paragraph_level(const std::u32string &string,  					   unicode_bidi_level_t level); +//! Compute default direction of text + +extern unicode_bidi_direction bidi_get_direction(const std::u32string &string, +						 size_t starting_pos=0, +						 size_t n=(size_t)-1); +  #if 0  {  #endif diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index 73440ed..b23b833 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -560,13 +560,17 @@ static void directional_status_stack_push  	stack->head=p;  } -static unicode_bidi_level_t +static struct unicode_bidi_direction  compute_paragraph_embedding_level(size_t i, size_t j,  				  enum_bidi_type_t (*get)(size_t i,  							  void *arg),  				  void *arg) -  { +	struct unicode_bidi_direction ret; + +	memset(&ret, 0, sizeof(ret)); +	ret.direction=UNICODE_BIDI_LR; +  	unicode_bidi_level_t in_isolation=0;  	for (; i<j; ++i) @@ -586,13 +590,18 @@ compute_paragraph_embedding_level(size_t i, size_t j,  			if (t == UNICODE_BIDI_TYPE_AL ||  			    t == UNICODE_BIDI_TYPE_R)  			{ -				return UNICODE_BIDI_RL; +				ret.direction=UNICODE_BIDI_RL; +				ret.is_explicit=1; +				break;  			}  			if (t == UNICODE_BIDI_TYPE_L) +			{ +				ret.is_explicit=1;  				break; +			}  		}  	} -	return UNICODE_BIDI_LR; +	return ret;  }  struct compute_paragraph_embedding_level_type_info { @@ -619,7 +628,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p,  	return compute_paragraph_embedding_level  		(i, j,  		 get_enum_bidi_type_for_paragraph_embedding_level, -		 &info); +		 &info).direction;  }  static directional_status_stack_t @@ -2713,8 +2722,19 @@ char32_t unicode_bidi_embed_paragraph_level(const char32_t *str,  	if ((compute_paragraph_embedding_level  	     (0, n,  	      get_enum_bidi_type_for_embedding_paragraph_level, -	      &info) ^ paragraph_level) == 0) +	      &info).direction ^ paragraph_level) == 0)  		return 0;  	return (paragraph_level & 1) ? UNICODE_RLM:UNICODE_LRM;  } + +struct unicode_bidi_direction unicode_bidi_get_direction(const char32_t *str, +							 size_t n) +{ +	struct compute_paragraph_embedding_level_char_info info; + +	info.str=str; +	return compute_paragraph_embedding_level +		(0, n, +		 get_enum_bidi_type_for_embedding_paragraph_level, &info); +} diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index 10156a4..babb6bb 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -1,5 +1,5 @@  /* -** Copyright 2011-2020 Double Precision, Inc. +** Copyright 2011-2021 Double Precision, Inc.  ** See COPYING for distribution information.  **  */ @@ -919,3 +919,16 @@ char32_t unicode::bidi_embed_paragraph_level(const std::u32string &string,  						  string.size(),  						  level);  } + +unicode_bidi_direction unicode::bidi_get_direction(const std::u32string &string, +						   size_t starting_pos, +						   size_t n) +{ +	if (starting_pos >= string.size()) +		starting_pos=string.size(); + +	if (string.size()-starting_pos < n) +		n=string.size()-starting_pos; + +	return unicode_bidi_get_direction(string.c_str()+starting_pos, n); +} | 
