diff options
| author | Sam Varshavchik | 2020-07-03 16:50:10 -0400 | 
|---|---|---|
| committer | Sam Varshavchik | 2020-07-12 15:56:45 -0400 | 
| commit | 195a98e0dfef34d997a5bca76fac3a81e3ad67bc (patch) | |
| tree | 79427212117d640814a42f0f94a62412593aff86 /unicode/mkemojidata.pl | |
| parent | 0c23605497ed7b423169e07c53321877ddf6614b (diff) | |
| download | courier-libs-195a98e0dfef34d997a5bca76fac3a81e3ad67bc.tar.bz2 | |
Unicode 13 update.
Diffstat (limited to 'unicode/mkemojidata.pl')
| -rw-r--r-- | unicode/mkemojidata.pl | 51 | 
1 files changed, 51 insertions, 0 deletions
| diff --git a/unicode/mkemojidata.pl b/unicode/mkemojidata.pl new file mode 100644 index 0000000..45296c8 --- /dev/null +++ b/unicode/mkemojidata.pl @@ -0,0 +1,51 @@ +#! /usr/bin/perl +# +# Compile emoji-data.txt into C array declarations. +# +# The arrays' structure is [firstchar, lastchar], listing the emojis with the +# given property. + +use strict; +use warnings; + +open(F, "<emoji-data.txt") || die; + +my $curclass; +my $lastl; + +while (defined($_=<F>)) +{ +    chomp; + +    next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s#]+)\s*/; + +    my $f=$1; +    my $l=$3; +    my $t=$4; + +    $l=$f unless $l; + +    eval "\$f=0x$f"; +    eval "\$l=0x$l"; + +    if ((! defined $curclass) || $t ne $curclass) +    { +	if (defined $curclass) +	{ +	    print "};\n\n"; +	} + +	$curclass = $t; + +	print "static const char32_t unicode_emoji_" . lc($curclass) +	    . "_lookup[][2]={\n"; +    } +    else +    { +	die "Not sorted\n" unless $l > $lastl; +    } +    print "\t{$f, $l},\n"; + +    $lastl=$l; +} +print "};\n\n"; | 
