summaryrefslogtreecommitdiffstats
path: root/unicode/mkultcase.pl
blob: 918637ab43ca9a74231d0f0a9b0f42e100d635da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Copyright 2000-2004 Double Precision, Inc.
# See COPYING for distribution information.
#
#
# Generate unicode upper/lower/titlecase translations.

print '/*
** Copyright 2000-2004 Double Precision, Inc.
** See COPYING for distribution information.
**
*/

#include "courier-unicode.h"

';

my $set=shift;

open (U, "UnicodeData.txt") || die "$!\n";

while (<U>)
{
	chomp;

my @fields= split /;/;

my ($code, $uc, $lc, $tc);

	$code="0x$fields[0]";
	eval "\$code=$code;";

	$uc=$fields[12];
	if ($uc ne "")
	{
		eval "\$uc=0x$uc;";
		$UC{$code}=$uc;
		$FLAG{$code}=1;
	}

	$lc=$fields[13];
	if ($lc ne "")
	{
		eval "\$lc=0x$lc;";
		$LC{$code}=$lc;
		$FLAG{$code}=1;
	}

	$tc=$fields[14];
	if ($tc ne "")
	{
		eval "\$tc=0x$tc;";
		$TC{$code}=$tc;
		$FLAG{$code}=1;
	}
}

close(U);

my $tabsize=2048;

grep ($bucket[ $_ % $tabsize ] .= "$_\n", sort keys %FLAG);

my $maxcnt=0;

for ($i=0; $i < $tabsize; $i++)
{
    my $cnt=0;

    grep ( ++$cnt, split (/\n/, $bucket[$i]));

    $maxcnt=$cnt if $cnt > $maxcnt;
}

print "const unsigned unicode_case_hash=$tabsize;\n";
print "/* unicode_case_maxbucket="
    . ($maxcnt+2) . "*/\n";

print "const char32_t unicode_case_tab[][4]={\n";

my $idx=0;

for ($i=0; $i<$tabsize; $i++)
{
    $offset[$i]=$idx;

    grep {
	my $j=$_;
	my $u=$UC{$j}+0;
	my $l=$LC{$j}+0;
	my $t=$TC{$j}+0;

	if ($u || $l || $t)
	{
	    $u=$j unless $u;
	    $l=$j unless $l;
	    $t=$u unless $t;

	    printf("{0x%04x,0x%04x,0x%04x,0x%04x},",$j,$u,$l,$t);
	    print "\n" if ($idx % 4) == 3;
	    ++$idx;
	}
    } split(/\n/, $bucket[$i]);
}
print "{0,0,0,0}};

const unsigned unicode_case_offset[$tabsize]={
";

for ($i=0; $i<$tabsize;$i++)
{
    printf("%4d", $offset[$i]);
    print "," if $i < $tabsize-1;

    print "\n" if ($i % 16) == 15;
}
print "};\n";