diff options
| -rw-r--r-- | unicode/Makefile.am | 5 | ||||
| -rw-r--r-- | unicode/book.xml | 188 | ||||
| -rw-r--r-- | unicode/courier-unicode.h.in | 46 | ||||
| -rw-r--r-- | unicode/unicode_bidi.c | 85 | ||||
| -rw-r--r-- | unicode/unicodecpp.C | 31 | 
5 files changed, 292 insertions, 63 deletions
| diff --git a/unicode/Makefile.am b/unicode/Makefile.am index dbc71aa..25b0719 100644 --- a/unicode/Makefile.am +++ b/unicode/Makefile.am @@ -91,6 +91,7 @@ man_MANS= \          $(srcdir)/man/courier-unicode.7 \          $(srcdir)/man/unicode\:\:bidi.3 \          $(srcdir)/man/unicode\:\:bidi_calc.3 \ +        $(srcdir)/man/unicode\:\:bidi_calc_types.3 \          $(srcdir)/man/unicode\:\:bidi_cleanup.3 \          $(srcdir)/man/unicode\:\:bidi_embed.3 \          $(srcdir)/man/unicode\:\:bidi_embed_paragraph_level.3 \ @@ -115,6 +116,8 @@ man_MANS= \          $(srcdir)/man/unicode_bidi.3 \          $(srcdir)/man/unicode_bidi_bracket_type.3 \          $(srcdir)/man/unicode_bidi_calc.3 \ +        $(srcdir)/man/unicode_bidi_calc_levels.3 \ +        $(srcdir)/man/unicode_bidi_calc_types.3 \          $(srcdir)/man/unicode_bidi_cleanup.3 \          $(srcdir)/man/unicode_bidi_embed.3 \          $(srcdir)/man/unicode_bidi_embed_paragraph_level.3 \ @@ -432,7 +435,7 @@ docs.stamp:  	rm -rf man.tmp  	mkdir man.tmp  	d=`cd $(srcdir); pwd`; cd man.tmp; xsltproc --nonet --xinclude \ -	http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl\ +        http://cdn.docbook.org/release/xsl-nons/current//manpages/docbook.xsl\  		$$d/book.xml  	mkdir -p man  	rm -f man/*.[123456789] diff --git a/unicode/book.xml b/unicode/book.xml index b0342ea..ad96d82 100644 --- a/unicode/book.xml +++ b/unicode/book.xml @@ -301,6 +301,8 @@ See COPYING for distribution information.  	<refnamediv>  	  <refname>unicode_bidi</refname> +	  <refname>unicode_bidi_calc_levels</refname> +	  <refname>unicode_bidi_calc_types</refname>  	  <refname>unicode_bidi_calc</refname>  	  <refname>unicode_bidi_reorder</refname>  	  <refname>unicode_bidi_cleanup</refname> @@ -318,6 +320,23 @@ See COPYING for distribution information.  	<refsynopsisdiv>  	  <funcsynopsis>  	    <funcsynopsisinfo>#include <courier-unicode.h>

unicode_bidi_level_t lr=UNICODE_BIDI_LR;</funcsynopsisinfo> + +	    <funcprototype> +	      <funcdef>void <function>unicode_bidi_calc_types</function></funcdef> +              <paramdef>const char32_t *<parameter>p</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>unicode_bidi_type_t *<parameter>types</parameter></paramdef> +	    </funcprototype> + +	    <funcprototype> +	      <funcdef>void <function>unicode_bidi_calc_levels</function></funcdef> +              <paramdef>const char32_t *<parameter>p</parameter></paramdef> +              <paramdef>const unicode_bidi_type_t *<parameter>types</parameter></paramdef> +              <paramdef>size_t <parameter>n</parameter></paramdef> +              <paramdef>unicode_bidi_level_t *<parameter>levels</parameter></paramdef> +              <paramdef>const unicode_bidi_level_t *<parameter>initial_embedding_level</parameter></paramdef> +	    </funcprototype> +  	    <funcprototype>  	      <funcdef>void <function>unicode_bidi_calc</function></funcdef>                <paramdef>const char32_t *<parameter>p</parameter></paramdef> @@ -417,19 +436,49 @@ See COPYING for distribution information.  	      <listitem>  		<para>  		  Allocate an array of +		  <structname>unicode_bidi_type_t</structname> that's the +		  same size as the Unicode string. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  Allocate an array of  		  <structname>unicode_bidi_level_t</structname> that's the  		  same size as the Unicode string.  		</para>  	      </listitem> +  	      <listitem>  		<para> -		  Use <function>unicode_bidi_calc</function>() to compute +		  Use <function>unicode_bidi_calc_types</function>() to compute +		  the Unicode string's characters' bi-directional types, +		  and populate the +		  <structname>unicode_bidi_type_t</structname> buffer. +		</para> +	      </listitem> + +	      <listitem> +		<para> +		  Use <function>unicode_bidi_calc_levels</function>() to compute  		  the Unicode string's characters' bi-directional embedding  		  level (executes the Bi-Directional algorithm up to and  		  including step L1). This populates the  		  <structname>unicode_bidi_level_t</structname> buffer.  		</para>  	      </listitem> + +	      <listitem> +		<para> +		  Alternatively: allocate only the +		  <structname>unicode_bidi_level_t</structname> array +		  and use <function>unicode_bidi_calc</function>(), which +		  <function>malloc</function>()s the +		  <structname>unicode_bidi_type_t</structname> buffer, +		  calls <function>unicode_bidi_calc_levels</function>(), +		  and then <function>free</function>()s the buffer. +		</para> +	      </listitem> +  	      <listitem>  		<para>  		  Use <function>unicode_bidi_reorder</function>() to reverse @@ -451,7 +500,7 @@ See COPYING for distribution information.  	    <para>  	      The parameters to -	      <function>unicode_bidi_calc</function>() are: +	      <function>unicode_bidi_calc_types</function>() are:  	    </para>  	    <itemizedlist> @@ -468,6 +517,42 @@ See COPYING for distribution information.  	      <listitem>  		<para>  		  A pointer to an array of +		  <structname>unicode_bidi_type_t</structname> values. +		  The caller is +		  responsible for allocating and deallocating this array, +		  which has the same size as the Unicode string. +		</para> +	      </listitem> +	    </itemizedlist> + +	    <para> +	      The parameters to +	      <function>unicode_bidi_calc_levels</function>() are: +	    </para> + +	    <itemizedlist> +	      <listitem> +		<para> +		  A pointer to the Unicode string. +		</para> +	      </listitem> + +	      <listitem> +		<para> +		  A pointer to the buffer that was passed to +		  <function>unicode_bidi_calc_types</function>(). +		</para> +	      </listitem> + +	      <listitem> +		<para> +		  Number of characters in the Unicode string and the +		  <structname>unicode_bidi_type_t</structname> buffer. +		</para> +	      </listitem> +	      <listitem> +		<para> +		  A pointer to an array of  		  <structname>unicode_bidi_level_t</structname> values.  		  The caller is  		  responsible for allocating and deallocating this array, @@ -488,7 +573,18 @@ See COPYING for distribution information.  	    </itemizedlist>  	    <para> -	      <function>unicode_bidi_calc</function>() fills in the +	      The parameters to <function>unicode_bidi_calc</function>() are +	      the same except for the +	      <structname>unicode_bidi_type_t</structname> pointer. +	      <function>unicode_bidi_calc</function>() allocates this +	      buffer by itself and calls +	      <function>unicode_bidi_calc_types</function>, and +	      destroys the buffer before returning. +	    </para> + +	    <para> +	      <function>unicode_bidi_calc</function>() +	      and <function>unicode_bidi_calc_levels</function>() fill in the  	      <structname>unicode_bidi_level_t</structname> array with the  	      values corresponding to the embedding level of the  	      corresponding character, @@ -500,7 +596,9 @@ See COPYING for distribution information.  	    </para>  	    <para> -	      <function>unicode_bidi_calc</function>() returns the resolved +	      <function>unicode_bidi_calc</function>() +	      and <function>unicode_bidi_calc_levels</function>() +	      return the resolved  	      paragraph direction level, which  	      always matches the passed in level, if specified, else it  	      reports the @@ -510,7 +608,8 @@ See COPYING for distribution information.  	    <para>  	      <function>unicode_bidi_reorder</function>() takes the actual  	      unicode string together with the embedding values from -	      <function>unicode_bidi_calc</function>, then reverses the +	      <function>unicode_bidi_calc</function> or +	      <function>unicode_bidi_calc_levels</function>(), then reverses the  	      bi-directional string, as specified by step L2 of the bi-directional  	      algorithm.  	      The parameters to @@ -698,7 +797,8 @@ See COPYING for distribution information.  	      basic,  	      but the resulting bi-directional string produces the same  	      canonical rendering order after applying -	      <function>unicode_bidi_calc()</function>, +	      <function>unicode_bidi_calc()</function> or +	      <function>unicode_bidi_calc_levels</function>(),  	      <function>unicode_reorder()</function> and  	      <function>unicode_bidi_cleanup()</function>  	      (with the canonical option), @@ -847,7 +947,9 @@ See COPYING for distribution information.  	      default paragraph embedding level and returns 0 if it matches.  	      Otherwise it returns a directional marker that should be  	      <emphasis>prepended</emphasis> to the Unicode string to allow -	      <function>unicode_bidi_calc</function>'s optional paragraph +	      <function>unicode_bidi_calc</function>'s +	      (or <function>unicode_bidi_calc_levels</function>()) +	      optional paragraph  	      embedding level pointer's value to be <literal>NULL</literal>,  	      but derive the same default embedding level.  	      The parameters to @@ -2661,6 +2763,7 @@ See COPYING for distribution information.  	<refnamediv>  	  <refname>unicode::bidi</refname>  	  <refname>unicode::bidi_calc</refname> +	  <refname>unicode::bidi_calc_types</refname>  	  <refname>unicode::bidi_reorder</refname>  	  <refname>unicode::bidi_cleanup</refname>  	  <refname>unicode::bidi_logical_order</refname> @@ -2670,16 +2773,31 @@ See COPYING for distribution information.  	</refnamediv>  	<refsynopsisdiv> +	  <synopsis>#include <courier-unicode.h></synopsis> +	  <classsynopsis class="class" language="C++"> +	    <ooclass> +	      <classname>struct unicode::bidi_calc_types</classname> +	    </ooclass> +	    <constructorsynopsis> +	      <methodname>bidi_calc_types</methodname> +	      <methodparam><modifier>const std::u32string &</modifier><parameter>string</parameter> +	      </methodparam> +	    </constructorsynopsis> +	    <fieldsynopsis> +	      <modifier>std::vector<unicode_bidi_type_t></modifier> +	      <varname>types</varname> +	    </fieldsynopsis> +	  </classsynopsis> +  	  <funcsynopsis> -	    <funcsynopsisinfo>#include <courier-unicode.h></funcsynopsisinfo>  	    <funcprototype>                <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> <function>unicode::bidi_calc</function></funcdef> -	      <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>const unicode::bidi_calc_types &<parameter>ustring</parameter></paramdef>  	    </funcprototype>  	    <funcprototype>                <funcdef>std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> <function>unicode::bidi_calc</function></funcdef> -	      <paramdef>const std::u32string &<parameter>string</parameter></paramdef> +	      <paramdef>const unicode::bidi_calc_types &<parameter>ustring</parameter></paramdef>  	      <paramdef>unicode_bidi_level_t <parameter>embedding_level</parameter></paramdef>  	    </funcprototype> @@ -2766,9 +2884,55 @@ See COPYING for distribution information.  	    <listitem>  	      <para>                  <function>unicode::bidi_calc</function> returns the -		directional embedding value buffer and the paragraph -		embedding level. +		directional embedding value buffer and the calculated paragraph +		embedding level. Its <parameter>ustring</parameter> +		is implicitly converted from a +		<classname>std::u32string</classname>:                </para> +	      <blockquote> +		<informalexample> +		  <programlisting><![CDATA[ +std::u32string text; + +auto [levels, level]=unicode::bidi_calc(text); + +]]></programlisting> +		</informalexample> +	      </blockquote> + +	      <para> +		Alternatively a <classname>unicode::bidi_calc_types</classname> +		objects gets constructed from the same +		<classname>std::u32string</classname> and then passed +		directly to <function>unicode::bidi_calc</function>: +	      </para> +	      <blockquote> +		<informalexample> +		  <programlisting><![CDATA[ +std::u32string text; + +unicode::bidi_calc_types types{text}; + +// types.types is a std::vector of enum_bidi_types_t values + +auto [levels, level]=unicode::bidi_calc(types); + +]]></programlisting> +		</informalexample> +	      </blockquote> +	      <para> +		This provides the means to access the intermediate +		<classname>enum_bidi_types_t</classname> values that +		get calculated from the Unicode text string. +	      </para> + +	      <note> +		<para> +		  In all cases the <classname>std::u32string</classname> +		  cannot be a temporary object, and it must remain in scope +		  until <function>unicode::bidi_calc</function>() returns. +		</para> +	      </note>              </listitem>  	    <listitem>  	      <para> diff --git a/unicode/courier-unicode.h.in b/unicode/courier-unicode.h.in index 3de76d3..f8ab117 100644 --- a/unicode/courier-unicode.h.in +++ b/unicode/courier-unicode.h.in @@ -626,6 +626,16 @@ typedef enum {  extern enum_bidi_type_t unicode_bidi_type(char32_t c); +extern void unicode_bidi_calc_types(const char32_t *p, size_t n, +				    enum_bidi_type_t *buf); + +extern unicode_bidi_level_t unicode_bidi_calc_levels(const char32_t *p, +						     const enum_bidi_type_t +						     *types, +						     size_t n, +						     unicode_bidi_level_t *bufp, +						     const unicode_bidi_level_t +						     *initial_embedding_level);  /* Bitmask options to unicode_bidi_cleanup */  /* @@ -2153,13 +2163,45 @@ std::u32string tolower(const std::u32string &u);  std::u32string toupper(const std::u32string &u); +//! Calculate bidirectional character types + +//! Passed as a parameter to bidi_calc(), supplying the string and the +//! calculated bidirectional types. + +struct bidi_calc_types { +	const std::u32string &s; + +	//! Calculated bidirectional types. + +	std::vector<enum_bidi_type_t> types; + +	//! A reference to an existing std::u32string + +	//! bidi_calc_types can be constructed only from a reference to +	//! an existing std::u32string. +	bidi_calc_types(const std::u32string &); + +	//! Deleted constructor + +	//! bidi_calc_types cannot be constructed from a temporary +	//! std::u32string. +	bidi_calc_types(std::u32string &&)=delete; + +	//! Destructor +	~bidi_calc_types(); +}; +  //! Calculate bidirectional embedding levels  //! Returns the bidirectional embedding levels, and the paragraph  //! embedding level. +//! +//! The first parameter can be implicitly converted from an existing +//! std::u32string object. Alternatively a bidi_calc_types helper +//! can be constructed explicitly, and then passed in directly.  std::tuple<std::vector<unicode_bidi_level_t>, -	   unicode_bidi_level_t> bidi_calc(const std::u32string &s); +	   unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s);  //! Calculate bidirectional embedding levels @@ -2170,7 +2212,7 @@ std::tuple<std::vector<unicode_bidi_level_t>,  //! embedding level.  std::tuple<std::vector<unicode_bidi_level_t>, -	   unicode_bidi_level_t> bidi_calc(const std::u32string &s, +	   unicode_bidi_level_t> bidi_calc(const bidi_calc_types &s,  					   unicode_bidi_level_t level);  //! Reorder bidirectional text diff --git a/unicode/unicode_bidi.c b/unicode/unicode_bidi.c index cfae12f..cbb11dc 100644 --- a/unicode/unicode_bidi.c +++ b/unicode/unicode_bidi.c @@ -467,7 +467,7 @@ typedef struct {  	unicode_bidi_level_t paragraph_embedding_level;  	const char32_t    *chars;  	enum_bidi_type_t *classes; -	enum_bidi_type_t *orig_classes; +	const enum_bidi_type_t *orig_classes;  	unicode_bidi_level_t *levels;  	size_t size;  	int overflow_isolate_count; @@ -624,7 +624,7 @@ compute_paragraph_embedding_level_from_types(const enum_bidi_type_t *p,  static directional_status_stack_t  directional_status_stack_init(const char32_t *chars, -			      enum_bidi_type_t *classes, size_t n, +			      const enum_bidi_type_t *classes, size_t n,  			      unicode_bidi_level_t *levels,  			      const unicode_bidi_level_t  			      *initial_embedding_level) @@ -638,21 +638,21 @@ directional_status_stack_init(const char32_t *chars,  		? *initial_embedding_level & 1  		: compute_paragraph_embedding_level_from_types(classes, 0, n);  	stack->chars=chars; -	stack->classes=classes; +	stack->orig_classes=classes;  	if (n)  	{ -		classes=(enum_bidi_type_t *) +		stack->classes=(enum_bidi_type_t *)  			malloc(sizeof(enum_bidi_type_t)*n); -		if (!classes) +		if (!stack->classes)  			abort(); -		memcpy(classes, stack->classes, sizeof(enum_bidi_type_t)*n); +		memcpy(stack->classes, stack->orig_classes, +		       sizeof(enum_bidi_type_t)*n);  	}  	else  	{ -		classes=0; +		stack->classes=0;  	} -	stack->orig_classes=classes;  	stack->levels=levels;  	stack->size=n; @@ -682,19 +682,12 @@ static void directional_status_stack_deinit(directional_status_stack_t stack)  {  	while (stack->head)  		directional_status_stack_pop(stack); -	if (stack->orig_classes) -		free(stack->orig_classes); +	if (stack->classes) +		free(stack->classes);  	isolating_run_sequences_deinit(&stack->isolating_run_sequences);  	free(stack);  } -static unicode_bidi_level_t -unicode_bidi_b(const char32_t *p, -	       size_t n, -	       enum_bidi_type_t *buf, -	       unicode_bidi_level_t *bufp, -	       const unicode_bidi_level_t *initial_embedding_level); -  enum_bidi_type_t unicode_bidi_type(char32_t c)  {  	return (enum_bidi_type_t) @@ -707,35 +700,40 @@ enum_bidi_type_t unicode_bidi_type(char32_t c)  				   UNICODE_BIDI_TYPE_L);  } -unicode_bidi_level_t -unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, -		  const unicode_bidi_level_t *initial_embedding_level) + +void unicode_bidi_calc_types(const char32_t *p, size_t n, +			     enum_bidi_type_t *buf)  {  	/*  	** Look up the bidi class for each char32_t. -	** -	** When we encounter a paragraph break we call unicode_bidi_b() to -	** process it.  	*/ - -	enum_bidi_type_t *buf= -		(enum_bidi_type_t *)malloc(n * sizeof(enum_bidi_type_t)); - -	if (!buf) -		abort();  	for (size_t i=0; i<n; ++i)  	{  		buf[i]=unicode_bidi_type(p[i]);  #ifdef UNICODE_BIDI_TEST  		UNICODE_BIDI_TEST(i);  #endif -		bufp[i]=UNICODE_BIDI_SKIP;  	} +} -	unicode_bidi_level_t level=unicode_bidi_b(p, n, -						  buf, -						  bufp, -						  initial_embedding_level); +unicode_bidi_level_t +unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp, +		  const unicode_bidi_level_t *initial_embedding_level) +{ +	enum_bidi_type_t *buf= +		(enum_bidi_type_t *)malloc(n * sizeof(enum_bidi_type_t)); + +	if (!buf) +		abort(); + +	unicode_bidi_calc_types(p, n, buf); + +	unicode_bidi_level_t level= +		unicode_bidi_calc_levels(p, +					 buf, +					 n, +					 bufp, +					 initial_embedding_level);  	free(buf); @@ -744,16 +742,21 @@ unicode_bidi_calc(const char32_t *p, size_t n, unicode_bidi_level_t *bufp,  static void unicode_bidi_cl(directional_status_stack_t stack); -static unicode_bidi_level_t -unicode_bidi_b(const char32_t *p, -	       size_t n, -	       enum_bidi_type_t *buf, -	       unicode_bidi_level_t *bufp, -	       const unicode_bidi_level_t *initial_embedding_level) +unicode_bidi_level_t +unicode_bidi_calc_levels(const char32_t *p, +			 const enum_bidi_type_t *classes, +			 size_t n, +			 unicode_bidi_level_t *bufp, +			 const unicode_bidi_level_t *initial_embedding_level)  {  	directional_status_stack_t stack; -	stack=directional_status_stack_init(p, buf, n, bufp, +	for (size_t i=0; i<n; ++i) +	{ +		bufp[i]=UNICODE_BIDI_SKIP; +	} + +	stack=directional_status_stack_init(p, classes, n, bufp,  					    initial_embedding_level);  	unicode_bidi_level_t paragraph_embedding_level= diff --git a/unicode/unicodecpp.C b/unicode/unicodecpp.C index a0d5ac4..4b864b3 100644 --- a/unicode/unicodecpp.C +++ b/unicode/unicodecpp.C @@ -562,16 +562,30 @@ std::u32string unicode::toupper(const std::u32string &u)  	return copy;  } + +unicode::bidi_calc_types::bidi_calc_types(const std::u32string &s) +	: s{s} +{ +	types.resize(s.size()); +	if (!s.empty()) +		unicode_bidi_calc_types(s.c_str(), s.size(), &types[0]); +} + +unicode::bidi_calc_types::~bidi_calc_types()=default; +  std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> -unicode::bidi_calc(const std::u32string &s) +unicode::bidi_calc(const bidi_calc_types &s)  {  	return unicode::bidi_calc(s, UNICODE_BIDI_SKIP);  }  std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t> -unicode::bidi_calc(const std::u32string &s, +unicode::bidi_calc(const bidi_calc_types &st,  		   unicode_bidi_level_t paragraph_embedding_level)  { +	if (st.s.size() != st.types.size()) +		return { {}, UNICODE_BIDI_LR }; +  	const unicode_bidi_level_t *initial_embedding_level=0;  	if (paragraph_embedding_level == UNICODE_BIDI_LR || @@ -583,14 +597,17 @@ unicode::bidi_calc(const std::u32string &s,  	std::tuple<std::vector<unicode_bidi_level_t>, unicode_bidi_level_t>  		ret; -	std::get<0>(ret).resize(s.size()); +	std::get<0>(ret).resize(st.s.size());  	std::get<1>(ret)=UNICODE_BIDI_LR; -	if (s.size()) +	if (st.s.size())  	{ -		std::get<1>(ret)=unicode_bidi_calc(s.c_str(), s.size(), -						   &std::get<0>(ret)[0], -						   initial_embedding_level); +		std::get<1>(ret)= +			unicode_bidi_calc_levels(st.s.c_str(), +						 &st.types[0], +						 st.s.size(), +						 &std::get<0>(ret)[0], +						 initial_embedding_level);  	}  	return ret;  } | 
