diff options
Diffstat (limited to 'scripts/unicode.pl')
| -rw-r--r-- | scripts/unicode.pl | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/scripts/unicode.pl b/scripts/unicode.pl new file mode 100644 index 0000000..01ff7bf --- /dev/null +++ b/scripts/unicode.pl @@ -0,0 +1,206 @@ +use 5.014; # strict, Unicode 6, unicode regexp modifiers +BEGIN { + require charnames; + if ($^V gt v5.16.0) { + charnames->import(':loose'); + } else { + print "unicode.pl: Loose unicode matching not supported on this version of Perl."; + print "Upgrade to 5.16 or newer for case-insensitive names."; + charnames->import(':full'); + } +} +use Encode qw(decode_utf8 encode_utf8); +use POSIX (); +use Unicode::UCD qw(charblock charblocks charinfo); + +use Irssi qw(command_bind command_bind_first); +our $VERSION = "1"; +our %IRSSI = ( + authors => 'David Leadbeater', + contact => 'dgl@dgl.cx', + name => 'unicode', + description => 'Get infomation about unicode characters', + license => 'WTFPL <http://dgl.cx/licence>', + url => 'http://dgl.cx/irssi', +); + +my $CHARCODE_RE = qr/(?:\d+|(?:U\+|0x)[0-9a-f]+)/ai; + +my $pipe_in_progress; + +my $USAGE = <<'EOF'; +/UNICODE <character | code | name | block name> + +Print details about Unicode characters or blocks. + +Details about a single character: + /unicode 😸 + /unicode U+1F626 + +Print details about a block (more concise): + /unicode Emoticons + +Print details about a range: + /unicode U+1F600..U+1F700 + +Find a character: + /unicode /\bcat\b/ +EOF + +if (Irssi::settings_get_str('term_charset') !~ /utf-8/i) { + print "\x{3}4unicode.pl\x{3}: term_charset is not set to UTF-8. ", + "Please set your terminal and Irssi to use UTF-8 so this script works correctly."; + print "Current settings:"; + print " Irssi term_charset = ", Irssi::settings_get_str('term_charset'); + print " $_ = $ENV{$_}" for grep /^(?:LANG|LC_|TERM$)/, keys %ENV; +} + +# TODO: Can we fix Irssi to not need encoding here? +sub p { Irssi::active_win()->print(encode_utf8("@_"), MSGLEVEL_CLIENTCRAP) } + +command_bind_first help => sub { + my($arg) = @_; + return unless $arg =~ /^unicode\s*$/i; + print $USAGE; + print "[Perl internal unicode version " . Unicode::UCD::UnicodeVersion() . "]"; + Irssi::signal_stop(); +}; + +command_bind unicode => sub { + my($arg) = @_; + + if(!$arg) { + print "Usage: /UNICODE <character | code | name | block name>"; + print "See /help unicode for more."; + return; + } + + # Decode is always required right now, but really irssi core should handle + # this so written in a future proof way. + $arg = decode_utf8 $arg unless Encode::is_utf8($arg, 1); + + if (length $arg == 1) { + # Single character + print_info(ord $arg, 1); + } elsif ($arg =~ /^$CHARCODE_RE\s*$/) { + # Character code (decimal or hex) + print_info($arg, 1); + } elsif ($arg =~ /^($CHARCODE_RE)\s*\.\.\s*($CHARCODE_RE)\s*$/) { + # Character range + my($start, $end) = (charinfo($1), charinfo($2)); + print_info($_) for hex $start->{code} .. hex $end->{code}; + } elsif ($arg =~ m{/(.*)/\s*$}) { + my $re = qr/$1/i; + if ($pipe_in_progress) { + p "Another unicode search is in progress"; + return; + } + fork_wrapper(sub { # Child + my($fh) = @_; + my @found; + my $data = ""; + # This is not a public API at all, but taking 2 minutes when using the + # public API is a bit of a joke, so we take advantage of perl's cache if + # we can. + $data = do "unicore/Name.pl"; + if (!$data) { + for my $block(map { $_->[0] } values %{charblocks()}) { + for($block->[0] .. $block->[1]) { + my $name = charnames::viacode($_); + next unless $name; + $data .= sprintf "%X %s\n", $_, $name; + } + } + } + while ($data =~ /(?:^([A-F0-9]+).*$re)/gm) { + push @found, $1; + } + if(@found > 100) { + syswrite $fh, "- More than 100 matches found, aborting"; + } else { + syswrite $fh, "@found"; + } + }, + sub { # Parent + my($line) = @_; + if ($line =~ /^- (.*)/) { + p $1; + } elsif (!$line) { + p "No matches found"; + } else { + print_info($_) for sort { hex $a <=> hex $b } split / /, $line; + } + }); + } else { + # Character (or named sequence) or block name + my $string = charnames::string_vianame($arg); + if ($string) { + # Character(s) found + for my $char(split //, $string) { + print_info(ord $char); + } + } elsif(charblock $arg) { + my $block = charblock($arg); + print_info($_) for $block->[0]->[0] .. $block->[0]->[1]; + } else { + p "Not found. Try for example /unicode /\\bcat\\b/ for partial matching."; + } + } +}; + +sub print_info { + my($character, $extra) = @_; + my $info = charinfo $character; + + if (!$info) { + p "Character not found" if $extra; + } else { + p chr(hex $info->{code}) . " (U+$info->{code}): $info->{name}"; + return unless $extra; + + my %extra; + for(qw(block category script)) { + $extra{$_} = $info->{$_} + } + # Optional things + for(qw(decimal digit numeric upper lower title)) { + $extra{$_} = $info->{$_} if $info->{$_}; + } + p " " x (7 + length $info->{code}), join(", ", map { "$_=$extra{$_}" } sort keys %extra); + } +} + +# Based on scriptassist. +sub fork_wrapper { + my($child, $parent) = @_; + + pipe(my $rfh, my $wfh); + + my $pid = fork; + $pipe_in_progress = 1; + + return unless defined $pid; + + if($pid) { + close $wfh; + Irssi::pidwait_add($pid); + my $pipetag; + my @args = ($rfh, \$pipetag, $parent); + $pipetag = Irssi::input_add(fileno($rfh), INPUT_READ, \&pipe_input, \@args); + } else { + eval { + $child->($wfh); + }; + syswrite $wfh, "- $@" if $@; + POSIX::_exit(1); + } +} + +sub pipe_input { + my ($rfh, $pipetag, $parent) = @{$_[0]}; + my $line = <$rfh>; + close($rfh); + Irssi::input_remove($$pipetag); + $pipe_in_progress = 0; + $parent->($line); +} |
