diff options
| author | Sam Varshavchik | 2021-03-12 07:15:38 -0500 |
|---|---|---|
| committer | Sam Varshavchik | 2021-03-12 20:27:32 -0500 |
| commit | cf15bdb799c6b8b395087480fe3e89fb8b53cc12 (patch) | |
| tree | bb10f5f2f04a3abdf82ccece78eee35544dd82fe /unicode/unicode_normalization.c | |
| parent | 18fc31347b80597f4100f96c86799fe130786781 (diff) | |
| download | courier-libs-cf15bdb799c6b8b395087480fe3e89fb8b53cc12.tar.bz2 | |
courier-unicode: further changes to the canonical compose/decompose.
Diffstat (limited to 'unicode/unicode_normalization.c')
| -rw-r--r-- | unicode/unicode_normalization.c | 205 |
1 files changed, 130 insertions, 75 deletions
diff --git a/unicode/unicode_normalization.c b/unicode/unicode_normalization.c index ea9e256..93e691f 100644 --- a/unicode/unicode_normalization.c +++ b/unicode/unicode_normalization.c @@ -51,7 +51,7 @@ static int unicode_nfkc_qc(char32_t ch) ** Lookup a character's canonical combining class. */ -static uint8_t unicode_ccc(char32_t ch) +uint8_t unicode_ccc(char32_t ch) { return unicode_tab_lookup(ch, ccc_starting_indextab, @@ -113,8 +113,8 @@ unicode_canonical_t unicode_canonical(char32_t ch) ** decomposed. */ -static void search_for_decompose(struct unicode_decompose_info *info, - void (*f)(struct unicode_decompose_info *, +static void search_for_decompose(unicode_decomposition_t *info, + void (*f)(unicode_decomposition_t *, size_t, const struct decomposition_info *, void *), @@ -182,7 +182,7 @@ struct decompose_meta { /* Pass 1: count the number of characters to decompose. */ -static void decompose_meta_count(struct unicode_decompose_info *info, +static void decompose_meta_count(unicode_decomposition_t *info, size_t i, const struct decomposition_info *cinfo, void *arg) @@ -194,7 +194,7 @@ static void decompose_meta_count(struct unicode_decompose_info *info, /* Pass 2: compile a list of characters to decompose. */ -static void decompose_meta_save(struct unicode_decompose_info *info, +static void decompose_meta_save(unicode_decomposition_t *info, size_t i, const struct decomposition_info *cinfo, void *arg) @@ -208,7 +208,7 @@ static void decompose_meta_save(struct unicode_decompose_info *info, ++ptr->nchars; } -size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info, +size_t unicode_decompose_reallocate_size(unicode_decomposition_t *info, const size_t *sizes, size_t n) { @@ -221,7 +221,7 @@ size_t unicode_decompose_reallocate_size(struct unicode_decompose_info *info, return new_size; } -static int unicode_decompose_reallocate(struct unicode_decompose_info *info, +static int unicode_decompose_reallocate(unicode_decomposition_t *info, const size_t *offsets, const size_t *sizes, size_t n) @@ -240,10 +240,10 @@ static int unicode_decompose_reallocate(struct unicode_decompose_info *info, return 0; } -void unicode_decompose_info_init(struct unicode_decompose_info *info, - char32_t *string, - size_t string_size, - void *arg) +void unicode_decomposition_init(unicode_decomposition_t *info, + char32_t *string, + size_t string_size, + void *arg) { memset(info, 0, sizeof(*info)); @@ -259,11 +259,11 @@ void unicode_decompose_info_init(struct unicode_decompose_info *info, info->arg=arg; } -void unicode_decompose_info_deinit(struct unicode_decompose_info *info) +void unicode_decomposition_deinit(unicode_decomposition_t *info) { } -int unicode_decompose(struct unicode_decompose_info *info) +int unicode_decompose(unicode_decomposition_t *info) { int replaced; int rc=0; @@ -450,6 +450,14 @@ static char32_t lookup_composition(char32_t a, char32_t b) return 0; } +/* Temporary linked list, until all compositions get built. */ + +struct unicode_compose_info_list { + struct unicode_compose_info_list *next; + struct unicode_compose_info *info; +}; + + /* ** Collect consecutive sequence of composable characters. We cache each ** character's composition level. @@ -525,15 +533,15 @@ static int unicode_composition_init2(const char32_t *string, size_t string_size, int flags, struct chars_and_levels *clptr, - struct unicode_compositions ***tail_ptr); + struct unicode_compose_info_list ***tail_ptr); int unicode_composition_init(const char32_t *string, size_t string_size, int flags, - struct unicode_compositions **ret) + unicode_composition_t *info) { /* - ** Initialize a singly-linked unicode_compositions_list. + ** Initialize a singly-linked unicode_compose_info_list_list. ** ** Initialize the tail pointer. We'll be adding onto the tail pointer ** as we find each composition. @@ -541,11 +549,14 @@ int unicode_composition_init(const char32_t *string, ** Initialize the chars_and_levels buffer. */ - struct unicode_compositions *list=NULL; - struct unicode_compositions **tail=&list; + struct unicode_compose_info_list *list=NULL; + struct unicode_compose_info_list **tail=&list; struct chars_and_levels cl; int c; + info->n_compositions=0; + info->compositions=0; + if (chars_and_levels_init(&cl)) return -1; @@ -561,13 +572,51 @@ int unicode_composition_init(const char32_t *string, &cl, &tail); chars_and_levels_deinit(&cl); + if (c == 0) + { + struct unicode_compose_info_list *ptr; + + info->n_compositions=0; + + for (ptr=list; ptr; ptr=ptr->next) + ++info->n_compositions; + + if ((info->compositions=(struct unicode_compose_info **) + malloc(sizeof(struct unicode_composition_info *) + * (info->n_compositions+1))) == NULL) + { + c= -1; + info->n_compositions=0; + } + } + + if (c == 0) + { + struct unicode_compose_info_list *ptr; + size_t i=0; + + while (list) + { + ptr=list->next; + info->compositions[i++]=list->info; + free(list); + list=ptr; + } + info->compositions[i]=NULL; + } + if (c) { - unicode_composition_deinit(list); - list=NULL; + while (list) + { + struct unicode_compose_info_list *next=list->next; + + free(list->info); + free(list); + list=next; + } } - *ret=list; return c; } @@ -575,22 +624,22 @@ static int compose_chars_and_levels(const char32_t *starterptr, size_t starter_index, int flags, struct chars_and_levels *clptr, - struct unicode_compositions + struct unicode_compose_info_list **last_compositionptr, - struct unicode_compositions ***tail_ptr); + struct unicode_compose_info_list ***tail_ptr); static int create_new_composition(size_t starter_index, size_t n_combining_marks, - struct unicode_compositions **ptr); + struct unicode_compose_info_list **ptr); static int unicode_composition_init2(const char32_t *string, size_t string_size, int flags, struct chars_and_levels *clptr, - struct unicode_compositions ***tail_ptr) + struct unicode_compose_info_list ***tail_ptr) { size_t i; - struct unicode_compositions *last_composition=NULL; + struct unicode_compose_info_list *last_composition=NULL; /* ** Here we consecutively scan the string and look up each character's @@ -629,13 +678,13 @@ static int unicode_composition_init2(const char32_t *string, if (starterptr && /* Did we just compose this starter? */ last_composition && - last_composition->index == starter_index && + last_composition->info->index == starter_index && /* ** Did we compose everything, didn't leave ** any combined marks behind? */ - last_composition->n_composition == 1) + last_composition->info->n_composition == 1) { /* ** So, check if we can combine with that @@ -643,7 +692,7 @@ static int unicode_composition_init2(const char32_t *string, ** original starter, the new one is here. */ new_char=lookup_composition - (last_composition->composition[0], + (last_composition->info->composition[0], string[i]); if (new_char != 0) @@ -651,7 +700,7 @@ static int unicode_composition_init2(const char32_t *string, /* ** Just update the composed char. */ - last_composition->composition[0]= + last_composition->info->composition[0]= new_char; /* @@ -659,7 +708,7 @@ static int unicode_composition_init2(const char32_t *string, ** This nukes this starter, as if ** it was a part of the composition! */ - ++last_composition->n_composed; + ++last_composition->info->n_composed; continue; } } @@ -679,7 +728,7 @@ static int unicode_composition_init2(const char32_t *string, ** from two starters here. */ - struct unicode_compositions *new_composition; + struct unicode_compose_info_list *new_composition; if (create_new_composition(starter_index, 1, &new_composition)) @@ -689,9 +738,9 @@ static int unicode_composition_init2(const char32_t *string, **tail_ptr=new_composition; *tail_ptr= &new_composition->next; - new_composition->n_composed=2; - new_composition->n_composition=1; - new_composition->composition[0]=new_char; + new_composition->info->n_composed=2; + new_composition->info->n_composition=1; + new_composition->info->composition[0]=new_char; continue; } /* @@ -739,26 +788,30 @@ static int compare_levels(const void *a, const void *b) static int create_new_composition(size_t starter_index, size_t n_combining_marks, - struct unicode_compositions **ptr) + struct unicode_compose_info_list **ptr) { - struct unicode_compositions *c= - (struct unicode_compositions *) - malloc(sizeof(struct unicode_compositions)); + struct unicode_compose_info_list *c= + (struct unicode_compose_info_list *) + malloc(sizeof(struct unicode_compose_info_list)); if (!c) return -1; - c->index=starter_index; - c->next=NULL; + c->info=malloc(sizeof(struct unicode_compose_info)+ + sizeof(char32_t) * n_combining_marks); - /* Worst case: nothing is composed */ - - if ((c->composition=malloc(sizeof(char32_t) * - n_combining_marks)) == NULL) + if (!c->info) { free(c); return -1; } + + c->info->index=starter_index; + c->info->composition=(char32_t *)(c->info+1); + c->next=NULL; + + /* Worst case: nothing is composed */ + *ptr=c; return 0; } @@ -767,11 +820,11 @@ static int compose_chars_and_levels(const char32_t *starterptr, size_t starter_index, int flags, struct chars_and_levels *clptr, - struct unicode_compositions + struct unicode_compose_info_list **last_compositionptr, - struct unicode_compositions ***tail_ptr) + struct unicode_compose_info_list ***tail_ptr) { - struct unicode_compositions *new_composition; + struct unicode_compose_info_list *new_composition; char32_t starter=0; size_t i; int composed; @@ -826,9 +879,9 @@ static int compose_chars_and_levels(const char32_t *starterptr, { size_t j; - new_composition->n_composed=clptr->size+1; + new_composition->info->n_composed=clptr->size+1; - new_composition->composition[0]=starter; + new_composition->info->composition[0]=starter; i=1; if (!(flags & UNICODE_COMPOSE_FLAG_REMOVEUNUSED)) @@ -841,12 +894,12 @@ static int compose_chars_and_levels(const char32_t *starterptr, */ if (clptr->ptr[j].level) { - new_composition->composition[i++]= + new_composition->info->composition[i++]= clptr->ptr[j].ch; } } } - new_composition->n_composition=i; + new_composition->info->n_composition=i; } else if (!starterptr && (flags & UNICODE_COMPOSE_FLAG_REMOVEUNUSED)) { /* @@ -855,8 +908,8 @@ static int compose_chars_and_levels(const char32_t *starterptr, ** new_composition. */ - new_composition->n_composed=clptr->size; - new_composition->n_composition=0; + new_composition->info->n_composed=clptr->size; + new_composition->info->n_composition=0; composed=1; } @@ -868,7 +921,7 @@ static int compose_chars_and_levels(const char32_t *starterptr, } else { - free(new_composition->composition); + free(new_composition->info); free(new_composition); new_composition=NULL; } @@ -877,37 +930,39 @@ static int compose_chars_and_levels(const char32_t *starterptr, return 0; } -void unicode_composition_deinit(struct unicode_compositions *ptr) +void unicode_composition_deinit(unicode_composition_t *info) { - while (ptr) - { - struct unicode_compositions *next=ptr->next; + size_t i; - if (ptr->composition) - free(ptr->composition); - free(ptr); - ptr=next; - } + for (i=0; i<info->n_compositions; ++i) + free(info->compositions[i]); + + if (info->compositions) + free(info->compositions); + info->compositions=0; + info->n_compositions=0; } size_t unicode_composition_apply(char32_t *string, size_t string_size, - struct unicode_compositions *compositions) + unicode_composition_t *info) { size_t j=0; size_t i; + size_t c_index=0; for (i=0; i<string_size; ) { - if (compositions && compositions->index == i) + if (c_index < info->n_compositions && + info->compositions[c_index]->index == i) { size_t k; + struct unicode_compose_info *compose= + info->compositions[c_index++]; - for (k=0; k<compositions->n_composition; ++k) - string[j++]=compositions->composition[k]; - i += compositions->n_composed; - - compositions=compositions->next; + for (k=0; k<compose->n_composition; ++k) + string[j++]=compose->composition[k]; + i += compose->n_composed; } else { @@ -925,14 +980,14 @@ int unicode_compose(char32_t *string, int flags, size_t *new_size) { - struct unicode_compositions *composes; + unicode_composition_t info; - if (unicode_composition_init(string, string_size, flags, &composes)) + if (unicode_composition_init(string, string_size, flags, &info)) return -1; - *new_size=unicode_composition_apply(string, string_size, composes); + *new_size=unicode_composition_apply(string, string_size, &info); - unicode_composition_deinit(composes); + unicode_composition_deinit(&info); return 0; } |
