summaryrefslogtreecommitdiffstats
path: root/unicode/mkcategories.pl
blob: c4edaa6d0c84ca0bb9e9df56444b3515f960b94e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#! /usr/bin/perl
#
# Compile Categories.txt into C array declarations.
#
# The array's structure is [firstchar, lastchar, mask], giving the
# category of the character range firstchar-lastchar.
#
# The ranges are sorted in numerical order.
#
# An array gets generated for each block of 4096 unicode characters.
#
# Finally, two arrays get declared: a pointer to an array for each 4096
# unicode character block, and the number of elements in the array.
#
# The pointer is NULL for each block of 4096 unicode characters that is not
# defined in Categories.txt

use strict;
use warnings;
use mkcommon;

my %categories;

my $obj=mkcommon->new(classtype => "uint32_t");

open(F, "Categories.txt") || die;

while (defined($_=<F>))
{
    chomp;

    my @w=split(/\t/);

    my $f = $w[0];

    my @combined_category;

    my $categories = \%categories;

    foreach my $i (2..5)
    {
	my $c = uc($w[$i]);

	last unless $c;

	$c =~ s/[;\-\s]+/_/g;

	my $n = $i-1;

	$c = "UNICODE_CATEGORY_${n}_$c";

	$categories = ($categories->{$c} //= {});

	push @combined_category, $c;
    }

    eval "\$f=0x$f";

    $obj->range($f, $f, join("|",@combined_category));
}

open(H, ">courier-unicode-categories-tab.h.tmp") or die;

my @counters = (0, 0, 0, 0);
my %seen;

sub print_categories {
    my ($categories, $level) = @_;

    my @names = sort keys %$categories;

    foreach my $name (@names)
    {
	my $seen = 1;
	my $v = $seen{$name};

	unless ($v)
	{
	    $seen = 0;

	    die if ($v = ++$counters[$level]) > 255;
	    $seen{$name} = $v;
	}

	my $s = "";

	$s .= "/* "
	    if $seen;

	$s .= "#define "
	    . ("    " x $level)
	    . $name ;

	$s = substr($s . (" " x 66), 0, 66)
	    if length($s) < 66;

	print H "$s 0x"
	    . ("00" x $level) . sprintf("%02x", $v)
	    . ("00" x (3-$level));

	print H " */"
	    if $seen;
	print H "\n";

	print_categories($categories->{$name}, $level+1);
    }
}

print_categories(\%categories, 0);
$obj->output;
close(H) or die;
rename("courier-unicode-categories-tab.h.tmp",
    "courier-unicode-categories-tab.h") or die;