summaryrefslogtreecommitdiffstats
path: root/unicode/mkemojidata.pl
blob: 45296c899861f07ac313b052b041084afc255ddd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#! /usr/bin/perl
#
# Compile emoji-data.txt into C array declarations.
#
# The arrays' structure is [firstchar, lastchar], listing the emojis with the
# given property.

use strict;
use warnings;

open(F, "<emoji-data.txt") || die;

my $curclass;
my $lastl;

while (defined($_=<F>))
{
    chomp;

    next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s#]+)\s*/;

    my $f=$1;
    my $l=$3;
    my $t=$4;

    $l=$f unless $l;

    eval "\$f=0x$f";
    eval "\$l=0x$l";

    if ((! defined $curclass) || $t ne $curclass)
    {
	if (defined $curclass)
	{
	    print "};\n\n";
	}

	$curclass = $t;

	print "static const char32_t unicode_emoji_" . lc($curclass)
	    . "_lookup[][2]={\n";
    }
    else
    {
	die "Not sorted\n" unless $l > $lastl;
    }
    print "\t{$f, $l},\n";

    $lastl=$l;
}
print "};\n\n";