1 files changed, 192 insertions, 0 deletions
diff --git a/unicode/mkcommon.pm b/unicode/mkcommon.pm
new file mode 100644
index 0000000..2969f32
--- /dev/null
+++ b/unicode/mkcommon.pm
@@ -0,0 +1,192 @@
+package mkcommon;
+
+use 5.012002;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+# Items to export into callers namespace by default. Note: do not export
+# names by default without a very good reason. Use EXPORT_OK instead.
+# Do not simply export all your public functions/methods/constants.
+
+# This allows declaration	use mkcommon ':all';
+# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
+# will save memory.
+our %EXPORT_TAGS = ( 'all' => [ qw(
+	
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+	
+);
+
+our $VERSION = '0.01';
+
+my $BLOCK_SIZE=256;
+
+# Preloaded methods go here.
+
+sub new {
+
+    my $this=shift;
+
+    my $class = ref($this) || $this;
+    my $self = {};
+    bless $self, $class;
+
+    $$self{'char_array'}=[];
+    $$self{'char_class'}=[];
+    $$self{'char_start'}=[0];
+
+    $$self{'last_block'}=-1;
+    $$self{'last'}="";
+    $$self{'last_f'}=0;
+    $$self{'last_l'}=0;
+
+    return $self;
+}
+
+sub _doemit_block {
+    my $this=shift;
+
+    my $f=shift;
+    my $l=shift;
+
+    push @{$$this{'char_array'}}, [$f, $l];
+    push @{$$this{'char_class'}}, $$this{'last'};
+}
+
+sub _doemit_endblock {
+
+    my $this=shift;
+
+    push @{$$this{'char_start'}}, $#{$$this{'char_array'}}+1;
+}
+
+# _doemit invokes _doemit_block() for each unicode char range with a given
+# linebreaking class. However, once a unicode char range starts in a different
+# $BLOCK_SIZE character class, call _doemit_endblock() before calling _doemit_block().
+#
+# If a single unicode char range crosses a $BLOCK_SIZE character class boundary,
+# split it at the boundary; call _doemit_endblock() to finish the current $BLOCK_SIZE
+# char boundary, call _doemit_endblock(), then call _doemit_block() for the
+# rest of the char range.
+
+
+sub _doemit {
+
+    my $this=shift;
+
+    $this->_doemit_endblock()
+	if int($$this{'last_f'} / $BLOCK_SIZE)
+	!= $$this{'last_block'} && $$this{'last_block'} != -1;
+
+    if (int($$this{'last_f'} / $BLOCK_SIZE) != int($$this{'last_l'} / $BLOCK_SIZE))
+    {
+	while (int($$this{'last_f'} / $BLOCK_SIZE) != int($$this{'last_l'} / $BLOCK_SIZE))
+	{
+	    my $n=int($$this{'last_f'} / $BLOCK_SIZE) * $BLOCK_SIZE + ($BLOCK_SIZE-1);
+
+	    $this->_doemit_block($$this{'last_f'}, $n);
+	    $this->_doemit_endblock();
+	    $$this{'last_f'}=$n+1;
+	}
+    }
+    $this->_doemit_block($$this{'last_f'}, $$this{'last_l'});
+
+    $$this{'last_block'}=int($$this{'last_l'} / $BLOCK_SIZE);
+}
+
+#
+# Coalesce adjacent unicode char blocks that have the same linebreaking
+# property. Invoke _doemit() for the accumulate unicode char range once
+# a range with a different linebreaking class is seen.
+
+sub range {
+
+    my $this=shift;
+
+    my $f=shift;
+    my $l=shift;
+    my $t=shift;
+
+    if ($$this{'last_l'} + 1 == $f && $$this{'last'} eq $t)
+    {
+	$$this{'last_l'}=$l;
+	return;
+    }
+
+    $this->_doemit() if $$this{'last'};  # New linebreaking class
+
+    $$this{'last_f'}=$f;
+    $$this{'last_l'}=$l;
+    $$this{'last'}=$t;
+}
+
+sub output {
+    my $this=shift;
+
+    $this->_doemit();  # Emit last linebreaking unicode char range class
+
+    $this->_doemit_endblock(); # End of the most recent $BLOCK_SIZE char range class
+
+    print "static const uint8_t unicode_rangetab[][2]={\n";
+
+    my $comma="\t";
+
+    my $modulo=sprintf("0x%X", $BLOCK_SIZE-1);
+
+    foreach ( @{$$this{'char_array'}} )
+    {
+	print "${comma}{0x" . sprintf("%04x", $$_[0]) . " & $modulo, 0x"
+	    . sprintf("%04x", $$_[1]) . " & $modulo}";
+	$comma=",\n\t";
+    }
+
+    print "};\n\n";
+
+    print "static const uint8_t unicode_classtab[]={\n";
+
+    $comma="\t";
+    foreach ( @{$$this{'char_class'}} )
+    {
+	print "${comma}$_";
+	$comma=",\n\t";
+    }
+
+    print "};\n\n";
+
+    print "static const size_t unicode_indextab[]={\n";
+
+    $comma="\t";
+
+    my $prev_block=-1;
+    foreach (@{$$this{'char_start'}})
+    {
+	my $sp=$_;
+	my $cnt=1;
+
+	if ($sp <= $#{$$this{'char_array'}})
+	{
+	    my $block=int($$this{'char_array'}->[$sp]->[0] / $BLOCK_SIZE);
+
+	    $cnt = $block - $prev_block;
+	    $prev_block=$block;
+	}
+
+	foreach (1..$cnt)
+	{
+	    print "$comma$sp";
+	    $comma=",\n\t";
+	}
+    }
+
+    print "};\n\n";
+}
+
+1;