courier-unicode: canonical decomposition and composition implementation.

Move unicode_canonical() into a larger library of functions that handles full Unicode decomposition and composition.
author: Sam Varshavchik 2021-03-07 17:46:01 -0500
committer: Sam Varshavchik 2021-03-10 22:52:34 -0500
commit: 18fc31347b80597f4100f96c86799fe130786781 (patch)
tree: 08b641332ec55232a34d3656d6435559a847fcce /unicode/unicode_normalization.c
parent: 92bcce9b28d5d123af67ff0201cd97508af21326 (diff)
download: courier-libs-18fc31347b80597f4100f96c86799fe130786781.tar.bz2
1 files changed, 938 insertions, 0 deletions
diff --git a/unicode/unicode_normalization.c b/unicode/unicode_normalization.c
new file mode 100644
index 0000000..ea9e256
--- /dev/null
+++ b/unicode/unicode_normalization.c
@@ -0,0 +1,938 @@
+/*
+** Copyright 2021 Double Precision, Inc.
+** See COPYING for distribution information.
+**
+*/
+
+#include	"unicode_config.h"
+#include	"courier-unicode.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include	"normalization_defs.h"
+#include	"normalization.h"
+
+/*
+** Lookup NFC quick check.
+*/
+static int unicode_nfc_qc(char32_t ch)
+{
+	return unicode_tab_lookup(ch,
+				  nfc_qc_starting_indextab,
+				  nfc_qc_starting_pagetab,
+				  sizeof(nfc_qc_starting_indextab)/
+				  sizeof(nfc_qc_starting_indextab[0]),
+				  nfc_qc_rangetab,
+				  sizeof(nfc_qc_rangetab)/
+				  sizeof(nfc_qc_rangetab[0]),
+				  nfc_qc_classtab,
+				  UNICODE_NFC_QC_Y);
+}
+
+/*
+** Lookup nfkc quick check.
+*/
+static int unicode_nfkc_qc(char32_t ch)
+{
+	return unicode_tab_lookup(ch,
+				  nfkc_qc_starting_indextab,
+				  nfkc_qc_starting_pagetab,
+				  sizeof(nfkc_qc_starting_indextab)/
+				  sizeof(nfkc_qc_starting_indextab[0]),
+				  nfkc_qc_rangetab,
+				  sizeof(nfkc_qc_rangetab)/
+				  sizeof(nfkc_qc_rangetab[0]),
+				  nfkc_qc_classtab,
+				  UNICODE_NFKC_QC_Y);
+}
+
+/*
+** Lookup a character's canonical combining class.
+*/
+
+static uint8_t unicode_ccc(char32_t ch)
+{
+	return unicode_tab_lookup(ch,
+				  ccc_starting_indextab,
+				  ccc_starting_pagetab,
+				  sizeof(ccc_starting_indextab)/
+				  sizeof(ccc_starting_indextab[0]),
+				  ccc_rangetab,
+				  sizeof(ccc_rangetab)/
+				  sizeof(ccc_rangetab[0]),
+				  ccc_classtab,
+				  0);
+}
+
+/*
+** Look up a character's decomposition
+*/
+
+static const struct decomposition_info *decomp_lookup_search(char32_t ch)
+{
+	size_t hash;
+	size_t i;
+
+	if (ch == 0)
+		return NULL;
+
+	hash=(size_t)(ch % (sizeof(decomp_lookup)
+			    /sizeof(decomp_lookup[0])));
+
+
+	for (i=0; i<sizeof(decomp_lookup[hash])/sizeof(decomp_lookup[hash][0]);
+	     ++i)
+	{
+		if (decomp_lookup[hash][i].ch == ch)
+			return &decomp_lookup[hash][i];
+	}
+	return NULL;
+}
+
+unicode_canonical_t unicode_canonical(char32_t ch)
+{
+	const struct decomposition_info *info=decomp_lookup_search(ch);
+	unicode_canonical_t ret;
+
+	memset(&ret, 0, sizeof(ret));
+
+	if (info)
+	{
+		ret.format=info->decomp_type;
+		ret.n_canonical_chars=info->decomp_size;
+		ret.canonical_chars=decompositions+info->decomp_index;
+	}
+	return ret;
+}
+
+/*
+** Scan through a string, finding each character to decompose.
+**
+** This invokes a provided callback, for each character that's getting
+** decomposed.
+*/
+
+static void search_for_decompose(struct unicode_decompose_info *info,
+				 void (*f)(struct unicode_decompose_info *,
+					   size_t,
+					   const struct decomposition_info *,
+					   void *),
+				 void *arg)
+{
+	size_t i;
+
+	for (i=0; i<info->string_size; ++i)
+	{
+		const struct decomposition_info *comp_info;
+
+		/*
+		** Skip over characters that have a quick check flag set.
+		*/
+		if (info->decompose_flags & UNICODE_DECOMPOSE_FLAG_QC)
+		{
+			if (info->decompose_flags &
+			    UNICODE_DECOMPOSE_FLAG_COMPAT
+			    ? unicode_nfkc_qc(info->string[i])
+			    == UNICODE_NFKC_QC_Y
+			    : unicode_nfc_qc(info->string[i])
+			    == UNICODE_NFC_QC_Y)
+				continue;
+		}
+
+		comp_info=decomp_lookup_search(info->string[i]);
+		if (comp_info &&
+		    /* Should be the case: */
+		    comp_info->decomp_size > 0 &&
+
+		    /*
+		    ** Pick only canonical decompositions, unless we're
+		    ** doing a compatibility decomposition.
+		    */
+		    (comp_info->decomp_type == UNICODE_CANONICAL_FMT_NONE ||
+		     info->decompose_flags &
+		     UNICODE_DECOMPOSE_FLAG_COMPAT))
+			(*f)(info, i, comp_info, arg);
+	}
+}
+
+/*
+** Metadata we store about the decomposition.
+*/
+
+struct decompose_meta {
+
+	/* Number of characters, in total, that we are decomposing. */
+	size_t nchars;
+
+	/* And their indices in the original string. */
+	size_t *indexes;
+
+	/*
+	** For each character how many we're adding, in addition to the
+	** character being decomposed. This is 0 for a 1:1 decomposition.
+	**/
+	size_t *sizes;
+
+	/*
+	** And the information about each individual decomposition.
+	*/
+	const struct decomposition_info **infos;
+};
+
+/* Pass 1: count the number of characters to decompose. */
+
+static void decompose_meta_count(struct unicode_decompose_info *info,
+				 size_t i,
+				 const struct decomposition_info *cinfo,
+				 void *arg)
+{
+	struct decompose_meta *ptr=(struct decompose_meta *)arg;
+
+	++ptr->nchars;
+}
+
+/* Pass 2: compile a list of characters to decompose. */
+
+static void decompose_meta_save(struct unicode_decompose_info *info,
+				size_t i,
+				const struct decomposition_info *cinfo,
+				void *arg)
+{
+	struct decompose_meta *ptr=(struct decompose_meta *)arg;
+
+	ptr->indexes[ptr->nchars]=i;
+	ptr->sizes[ptr->nchars]=cinfo->decomp_size-1;
+	ptr->infos[ptr->nchars]=cinfo;
+
+	++ptr->nchars;
+}
+
+size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info,
+					 const size_t *sizes,
+					 size_t n)
+{
+	size_t i;
+	size_t new_size=info->string_size;
+
+	for (i=0; i<n; ++i)
+		new_size += sizes[i];
+
+	return new_size;
+}
+
+static int unicode_decompose_reallocate(struct unicode_decompose_info *info,
+					 const size_t *offsets,
+					 const size_t *sizes,
+					 size_t n)
+{
+	size_t new_size=unicode_decompose_reallocate_size(info, sizes, n);
+	char32_t *str=(char32_t *)realloc(info->string,
+					  (new_size+1) * sizeof(char32_t));
+
+	if (str == NULL)
+		return -1;
+
+	info->string=str;
+	str[new_size]=0;
+	/* If the original was null-terminated, this one will be too,
+	** we realloc-ed an extra char32_t */
+	return 0;
+}
+
+void unicode_decompose_info_init(struct unicode_decompose_info *info,
+				 char32_t *string,
+				 size_t string_size,
+				 void *arg)
+{
+	memset(info, 0, sizeof(*info));
+
+	info->string=string;
+	if (string && string_size == (size_t)-1)
+	{
+		for (string_size=0; string[string_size]; ++string_size)
+			;
+	}
+
+	info->reallocate=unicode_decompose_reallocate;
+	info->string_size=string_size;
+	info->arg=arg;
+}
+
+void unicode_decompose_info_deinit(struct unicode_decompose_info *info)
+{
+}
+
+int unicode_decompose(struct unicode_decompose_info *info)
+{
+	int replaced;
+	int rc=0;
+
+	do
+	{
+		struct decompose_meta meta;
+		char32_t *string_end, *string_put;
+		size_t next_insert_pos;
+		size_t i;
+		size_t old_pos;
+
+		replaced=0;
+
+		/*
+		** Make two passes over the string, on the first pass count
+		** the decompositions.
+		*/
+
+		meta.nchars=0;
+		search_for_decompose(info, decompose_meta_count, &meta);
+
+		if (!meta.nchars)
+			continue; /* We're done */
+
+		/* We'll want to make another pass */
+		replaced=1;
+
+		/*
+		** We have two size_t pointers to allocate dynamically,
+		** allocate both of them together.
+		*/
+		meta.indexes=(size_t *)malloc(meta.nchars * 2 * sizeof(size_t));
+		if (!meta.indexes)
+		{
+			rc= -1;
+			break;
+		}
+
+		meta.infos=(const struct decomposition_info **)
+			malloc(sizeof(const struct decomposition_info *)
+			       *meta.nchars);
+
+		if (!meta.infos)
+		{
+			free(meta.indexes);
+			rc= -1;
+			break;
+		}
+
+		/*
+		** And here's the second size_t * that we dynamically allocated
+		*/
+		meta.sizes=meta.indexes+meta.nchars;
+
+		/* And now make a second pass to load the decompositions */
+		meta.nchars=0;
+
+		search_for_decompose(info, decompose_meta_save, &meta);
+
+		rc=(*info->reallocate)(info,
+				       meta.indexes, meta.sizes,
+				       meta.nchars);
+
+		if (rc)
+		{
+			/* Error, ensure the code below does nothing */
+			meta.nchars=0;
+			replaced=0;
+		}
+
+		/*
+		** Insert decompositions. We make a pass from the end of
+		** the string to the beginning of it. We already know what
+		** all the decompositions are, on this pass.
+		**
+		** We'll be copying the characters from the existing string
+		** from the end of the string to its beginning, starting with
+		** the new ending position.
+		**
+		** Each time we reach a position where the decomposition
+		** occurs, we inject it. In this manner we'll be processing
+		** each decomposition starting with the last one in the
+		** string, going back to the first one.
+		**
+		** So, the next_insert_pos that we will do is the
+		** last one:
+		*/
+
+		next_insert_pos=meta.nchars;
+
+		/*
+		** We start shifting over from the end of the string up.
+		** Here's the current end of the string.
+		*/
+		string_end=info->string+info->string_size;
+		old_pos=info->string_size;
+
+		/*
+		** And compute where we're putting all the characters
+		** by adding up all the sizes.
+		*/
+
+		string_put=string_end;
+		for (i=0; i<meta.nchars; ++i)
+		{
+			/* While we're here we'll adjust string_size too... */
+			info->string_size += meta.sizes[i];
+			string_put += meta.sizes[i];
+		}
+
+		/* We only need to keep going until we finished shifting. */
+
+		while (next_insert_pos)
+		{
+			const struct decomposition_info *decompose=NULL;
+
+			if (--old_pos == meta.indexes[next_insert_pos-1])
+			{
+				/*
+				** Reach the next (previous, really) insert
+				** position. We only adjust the pointer here,
+				** we'll be copying the decomposition below.
+				** Here we're just quietly skipping ahead by
+				** the number of new characters in the
+				** decomposition.
+				*/
+				--next_insert_pos;
+				string_put -= meta.sizes[next_insert_pos];
+				decompose=meta.infos[next_insert_pos];
+			}
+			*--string_put=*--string_end;
+
+			if (decompose)
+			{
+				/* The decomposition goes here */
+				memcpy(string_put,
+				       decompositions + decompose->decomp_index,
+				       decompose->decomp_size
+				       * sizeof(char32_t));
+			}
+		}
+
+		free(meta.indexes);
+		free(meta.infos);
+
+		/*
+		** Make another pass, perhaps.
+		*/
+	} while (replaced);
+
+	return rc;
+}
+
+/*
+** Canonical compositions are all two characters > one character.
+*/
+
+static char32_t lookup_composition(char32_t a, char32_t b)
+{
+	size_t i, j;
+
+	i=((size_t)a * canonical_mult1
+	   + (size_t)b * canonical_mult2)
+		% (sizeof(canonical_compositions_lookup) /
+		   sizeof(canonical_compositions_lookup[0]));
+
+	j= i+1 < (sizeof(canonical_compositions_lookup) /
+		  sizeof(canonical_compositions_lookup[0]))
+		? canonical_compositions_lookup[i+1]
+		: sizeof(canonical_compositions)/
+		sizeof(canonical_compositions[0]);
+	i=canonical_compositions_lookup[i];
+
+	while (i<j)
+	{
+		if (canonical_compositions[i][0] == a &&
+		    canonical_compositions[i][1] == b)
+		{
+			return canonical_compositions[i][2];
+		}
+		++i;
+	}
+	return 0;
+}
+
+/*
+** Collect consecutive sequence of composable characters. We cache each
+** character's composition level.
+*/
+
+struct char_and_level {
+	char32_t ch;		/* The character */
+	size_t index;		/* Its position in the original string */
+	uint8_t level;		/* Its combining level */
+};
+
+/*
+** A growing buffer of consecutive composition characters
+*/
+
+struct chars_and_levels {
+	struct char_and_level *ptr;
+	size_t size;
+	size_t reserved;
+};
+
+/*
+** Initialize this buffer.
+*/
+static int chars_and_levels_init(struct chars_and_levels *p)
+{
+	p->ptr=malloc( sizeof(struct char_and_level)*(p->reserved=1) );
+	if (!p->ptr)
+		return -1;
+	p->size=0;
+
+	return 0;
+}
+
+/*
+** Add char+level to the buffer, growing it if needed.
+*/
+
+static int add_char_and_level(struct chars_and_levels *p, char32_t ch,
+			      size_t index,
+			      uint8_t level)
+{
+	if (p->reserved <= p->size)
+	{
+		size_t n=p->reserved * 2;
+
+		struct char_and_level *ptr=
+			realloc(p->ptr, sizeof(struct char_and_level)*n);
+
+		if (!ptr)
+			return -1;
+		p->reserved=n;
+		p->ptr=ptr;
+	}
+	p->ptr[p->size].ch=ch;
+	p->ptr[p->size].index=index;
+	p->ptr[p->size].level=level;
+	++p->size;
+	return 0;
+}
+
+/*
+** Deallocate the buffer.
+*/
+
+static void chars_and_levels_deinit(struct chars_and_levels *p)
+{
+	if (p->ptr)
+		free(p->ptr);
+}
+
+static int unicode_composition_init2(const char32_t *string,
+				     size_t string_size,
+				     int flags,
+				     struct chars_and_levels *clptr,
+				     struct unicode_compositions ***tail_ptr);
+
+int unicode_composition_init(const char32_t *string,
+			     size_t string_size,
+			     int flags,
+			     struct unicode_compositions **ret)
+{
+	/*
+	** Initialize a singly-linked unicode_compositions_list.
+	**
+	** Initialize the tail pointer. We'll be adding onto the tail pointer
+	** as we find each composition.
+	**
+	** Initialize the chars_and_levels buffer.
+	*/
+
+	struct unicode_compositions *list=NULL;
+	struct unicode_compositions **tail=&list;
+	struct chars_and_levels cl;
+	int c;
+
+	if (chars_and_levels_init(&cl))
+		return -1;
+
+	/*
+	** Call unicode_composition_init2 to do all the work.
+	**
+	** When it returns we can deinit the chars_and_levels buffer.
+	**
+	** If it fails we can also deinitialize the linked list, and
+	** return a NULL pointer.
+	*/
+	c=unicode_composition_init2(string, string_size, flags,
+				    &cl, &tail);
+	chars_and_levels_deinit(&cl);
+
+	if (c)
+	{
+		unicode_composition_deinit(list);
+		list=NULL;
+	}
+
+	*ret=list;
+	return c;
+}
+
+static int compose_chars_and_levels(const char32_t *starterptr,
+				    size_t starter_index,
+				    int flags,
+				    struct chars_and_levels *clptr,
+				    struct unicode_compositions
+				    **last_compositionptr,
+				    struct unicode_compositions ***tail_ptr);
+
+static int create_new_composition(size_t starter_index,
+				  size_t n_combining_marks,
+				  struct unicode_compositions **ptr);
+
+static int unicode_composition_init2(const char32_t *string,
+				     size_t string_size,
+				     int flags,
+				     struct chars_and_levels *clptr,
+				     struct unicode_compositions ***tail_ptr)
+{
+	size_t i;
+	struct unicode_compositions *last_composition=NULL;
+
+	/*
+	** Here we consecutively scan the string and look up each character's
+	** composition level.
+	**
+	** Each time we get a composition level of 0 we update starterptr to
+	** point to the starter character, and save its index here.
+	*/
+
+	const char32_t *starterptr=NULL;
+	size_t starter_index=0;
+	for (i=0; i<string_size; ++i)
+	{
+		uint8_t ccc=unicode_ccc(string[i]);
+		char32_t new_char;
+
+		if (ccc == 0)
+		{
+			/*
+			** Starter. If there were any preceding composing
+			** characters, then compose them.
+			*/
+			if (compose_chars_and_levels(starterptr,
+						     starter_index,
+						     flags,
+						     clptr,
+						     &last_composition,
+						     tail_ptr))
+				return -1;
+
+			/*
+			** It's possible for a starter to combine with its
+			** preceding starter.
+			*/
+
+			if (starterptr &&
+			    /* Did we just compose this starter? */
+			    last_composition &&
+			    last_composition->index == starter_index &&
+
+			    /*
+			    ** Did we compose everything, didn't leave
+			    ** any combined marks behind?
+			    */
+			    last_composition->n_composition == 1)
+			{
+				/*
+				** So, check if we can combine with that
+				** last starter. *starterptr is the
+				** original starter, the new one is here.
+				*/
+				new_char=lookup_composition
+					(last_composition->composition[0],
+					 string[i]);
+
+				if (new_char != 0)
+				{
+					/*
+					** Just update the composed char.
+					*/
+					last_composition->composition[0]=
+						new_char;
+
+					/*
+					** And incrementing n_composed.
+					** This nukes this starter, as if
+					** it was a part of the composition!
+					*/
+					++last_composition->n_composed;
+					continue;
+				}
+			}
+			else if (starterptr && starter_index+1 == i &&
+				/*
+				** Ok, the last starter was not composed,
+				** and it was the previous character. The
+				** comparison against the starter_index is
+				** just, really, a sanity check.
+				*/
+				 (new_char=
+				  lookup_composition(*starterptr,
+						     string[i])) != 0)
+			{
+				/*
+				** We'll need to manually create a composition
+				** from two starters here.
+				*/
+
+				struct unicode_compositions *new_composition;
+
+				if (create_new_composition(starter_index,
+							   1, &new_composition))
+					return -1;
+
+				last_composition=new_composition;
+				**tail_ptr=new_composition;
+				*tail_ptr= &new_composition->next;
+
+				new_composition->n_composed=2;
+				new_composition->n_composition=1;
+				new_composition->composition[0]=new_char;
+				continue;
+			}
+			/*
+			** And this is a new starter.
+			*/
+			starterptr=&string[i];
+			starter_index=i;
+		}
+		else
+		{
+			/* Add composing characters */
+
+			if (add_char_and_level(clptr, string[i], i, ccc))
+				return -1;
+		}
+	}
+
+	/* We could've finish the string with some composition */
+
+	return compose_chars_and_levels(starterptr,
+					starter_index,
+					flags,
+					clptr,
+					&last_composition,
+					tail_ptr);
+}
+
+/*
+** sort combining characters by their canonical combining class
+*/
+
+static int compare_levels(const void *a, const void *b)
+{
+	const struct char_and_level *ca=(const struct char_and_level *)a;
+	const struct char_and_level *cb=(const struct char_and_level *)b;
+
+	return ca->level < cb->level ? -1 :
+		ca->level > cb->level ? 1
+
+		/* Same combining level, compare their indexes */
+		: ca->index < cb->index ? -1
+		: ca->index > cb->index ? 1
+		: 0;
+}
+
+static int create_new_composition(size_t starter_index,
+				  size_t n_combining_marks,
+				  struct unicode_compositions **ptr)
+{
+	struct unicode_compositions *c=
+		(struct unicode_compositions *)
+		malloc(sizeof(struct unicode_compositions));
+
+	if (!c)
+		return -1;
+
+	c->index=starter_index;
+	c->next=NULL;
+
+	/* Worst case: nothing is composed */
+
+	if ((c->composition=malloc(sizeof(char32_t) *
+				   n_combining_marks)) == NULL)
+	{
+		free(c);
+		return -1;
+	}
+	*ptr=c;
+	return 0;
+}
+
+static int compose_chars_and_levels(const char32_t *starterptr,
+				    size_t starter_index,
+				    int flags,
+				    struct chars_and_levels *clptr,
+				    struct unicode_compositions
+				    **last_compositionptr,
+				    struct unicode_compositions ***tail_ptr)
+{
+	struct unicode_compositions *new_composition;
+	char32_t starter=0;
+	size_t i;
+	int composed;
+
+	if (clptr->size == 0)
+		/* Nothing to do, no composable chars since last starter */
+		return 0;
+
+	qsort(clptr->ptr, clptr->size, sizeof(struct char_and_level),
+	      compare_levels);
+
+	if (create_new_composition(starter_index,
+				   clptr->size,
+				   &new_composition))
+		return -1;
+
+	composed=0;
+
+	if (starterptr)
+	{
+		starter=*starterptr;
+
+		for (i=0; i<clptr->size; ++i)
+		{
+			char32_t new_char=lookup_composition(starter,
+							     clptr->ptr[i].ch);
+
+			if (new_char)
+			{
+				starter=new_char;
+
+				/*
+				** These must have a non-0 level, so mark
+				** these ones by setting their level to 0.
+				*/
+				clptr->ptr[i].level=0;
+				composed=1;
+
+				if (flags & UNICODE_COMPOSE_FLAG_ONESHOT)
+					break;
+			}
+		}
+	}
+
+	/*
+	** new_composition->composition is the same size as the
+	** number of composable characters. If we composed at
+	** least once, we know that we can fit the starter character
+	** and whatever was not composed into that buffer.
+	*/
+	if (composed)
+	{
+		size_t j;
+
+		new_composition->n_composed=clptr->size+1;
+
+		new_composition->composition[0]=starter;
+
+		i=1;
+		if (!(flags & UNICODE_COMPOSE_FLAG_REMOVEUNUSED))
+		{
+			for (j=0; j<clptr->size; ++j)
+			{
+				/*
+				** The ones that were used in the composition
+				** have their level reset to 0.
+				*/
+				if (clptr->ptr[j].level)
+				{
+					new_composition->composition[i++]=
+						clptr->ptr[j].ch;
+				}
+			}
+		}
+		new_composition->n_composition=i;
+	} else if (!starterptr && (flags & UNICODE_COMPOSE_FLAG_REMOVEUNUSED))
+	{
+		/*
+		** Edge case, this flag is set, composition characters at the
+		** beginning of the string. We will create an empty
+		** new_composition.
+		*/
+
+		new_composition->n_composed=clptr->size;
+		new_composition->n_composition=0;
+		composed=1;
+	}
+
+	if (composed)
+	{
+		*last_compositionptr=new_composition;
+		**tail_ptr=new_composition;
+		*tail_ptr=&new_composition->next;
+	}
+	else
+	{
+		free(new_composition->composition);
+		free(new_composition);
+		new_composition=NULL;
+	}
+
+	clptr->size=0; /* Start a new list of composable characters */
+	return 0;
+}
+
+void unicode_composition_deinit(struct unicode_compositions *ptr)
+{
+	while (ptr)
+	{
+		struct unicode_compositions *next=ptr->next;
+
+		if (ptr->composition)
+			free(ptr->composition);
+		free(ptr);
+		ptr=next;
+	}
+}
+
+size_t unicode_composition_apply(char32_t *string,
+				 size_t string_size,
+				 struct unicode_compositions *compositions)
+{
+	size_t j=0;
+	size_t i;
+
+	for (i=0; i<string_size; )
+	{
+		if (compositions && compositions->index == i)
+		{
+			size_t k;
+
+			for (k=0; k<compositions->n_composition; ++k)
+				string[j++]=compositions->composition[k];
+			i += compositions->n_composed;
+
+			compositions=compositions->next;
+		}
+		else
+		{
+			string[j++]=string[i++];
+		}
+	}
+
+	if (j < string_size)
+		string[j]=0;
+	return j;
+}
+
+int unicode_compose(char32_t *string,
+		    size_t string_size,
+		    int flags,
+		    size_t *new_size)
+{
+	struct unicode_compositions *composes;
+
+	if (unicode_composition_init(string, string_size, flags, &composes))
+		return -1;
+
+	*new_size=unicode_composition_apply(string, string_size, composes);
+
+	unicode_composition_deinit(composes);
+
+	return 0;
+}
author	Sam Varshavchik	2021-03-07 17:46:01 -0500
committer	Sam Varshavchik	2021-03-10 22:52:34 -0500
commit	18fc31347b80597f4100f96c86799fe130786781 (patch)
tree	08b641332ec55232a34d3656d6435559a847fcce /unicode/unicode_normalization.c
parent	92bcce9b28d5d123af67ff0201cd97508af21326 (diff)
download	courier-libs-18fc31347b80597f4100f96c86799fe130786781.tar.bz2