use 5.014; # strict, Unicode 6, unicode regexp modifiers BEGIN { require charnames; if ($^V gt v5.16.0) { charnames->import(':loose'); } else { print "unicode.pl: Loose unicode matching not supported on this version of Perl."; print "Upgrade to 5.16 or newer for case-insensitive names."; charnames->import(':full'); } } use Encode qw(decode_utf8 encode_utf8); use POSIX (); use Unicode::UCD qw(charblock charblocks charinfo); use Irssi qw(command_bind command_bind_first); our $VERSION = "2"; our %IRSSI = ( authors => 'David Leadbeater', contact => 'dgl@dgl.cx', name => 'unicode', description => 'Get infomation about unicode characters', license => 'WTFPL ', url => 'http://dgl.cx/irssi', ); my $CHARCODE_RE = qr/(?:\d+|(?:U\+|0x)[0-9a-f]+)/ai; my $pipe_in_progress; my $USAGE = <<'EOF'; /UNICODE Print details about Unicode characters or blocks. Details about a single character: /unicode 😸 /unicode U+1F626 Print details about a block (more concise): /unicode Emoticons Print details about a range: /unicode U+1F600..U+1F700 Find a character: /unicode /\bcat\b/ EOF if (Irssi::settings_get_str('term_charset') !~ /utf-8/i) { print "\x{3}4unicode.pl\x{3}: term_charset is not set to UTF-8. ", "Please set your terminal and Irssi to use UTF-8 so this script works correctly."; print "Current settings:"; print " Irssi term_charset = ", Irssi::settings_get_str('term_charset'); print " $_ = $ENV{$_}" for grep /^(?:LANG|LC_|TERM$)/, keys %ENV; } # TODO: Can we fix Irssi to not need encoding here? sub p { Irssi::active_win()->print(encode_utf8("@_"), MSGLEVEL_CLIENTCRAP) } command_bind_first help => sub { my($arg) = @_; return unless $arg =~ /^unicode\s*$/i; print $USAGE; print "[Perl internal unicode version " . Unicode::UCD::UnicodeVersion() . "]"; Irssi::signal_stop(); }; command_bind unicode => sub { my($arg) = @_; if(!$arg) { print "Usage: /UNICODE "; print "See /help unicode for more."; return; } # Decode is always required right now, but really irssi core should handle # this so written in a future proof way. $arg = decode_utf8 $arg unless Encode::is_utf8($arg, 1); if (length $arg == 1) { # Single character print_info(ord $arg, 1); } elsif ($arg =~ /^$CHARCODE_RE\s*$/) { # Character code (decimal or hex) print_info($arg, 1); } elsif ($arg =~ /^($CHARCODE_RE)\s*\.\.\s*($CHARCODE_RE)\s*$/) { # Character range my($start, $end) = (charinfo($1), charinfo($2)); print_info($_) for hex $start->{code} .. hex $end->{code}; } elsif ($arg =~ m{/(.*)/\s*$}) { my $re = qr/$1/i; if ($pipe_in_progress) { p "Another unicode search is in progress"; return; } fork_wrapper(sub { # Child my($fh) = @_; my @found; my $data = ""; # This is not a public API at all, but taking 2 minutes when using the # public API is a bit of a joke, so we take advantage of perl's cache if # we can. $data = do "unicore/Name.pl"; if (!$data) { for my $block(map { $_->[0] } values %{charblocks()}) { for($block->[0] .. $block->[1]) { my $name = charnames::viacode($_); next unless $name; $data .= sprintf "%X %s\n", $_, $name; } } } while ($data =~ /(?:^([A-F0-9]+).*$re)/gm) { push @found, $1; } if(@found > 100) { syswrite $fh, "- More than 100 matches found, aborting"; } else { syswrite $fh, "@found"; } }, sub { # Parent my($line) = @_; if ($line =~ /^- (.*)/) { p $1; } elsif (!$line) { p "No matches found"; } else { print_info($_) for sort { hex $a <=> hex $b } split / /, $line; } }); } else { # Character (or named sequence) or block name my $string = charnames::string_vianame($arg); if ($string) { # Character(s) found for my $char(split //, $string) { print_info(ord $char); } } elsif(charblock $arg) { my $block = charblock($arg); print_info($_) for $block->[0]->[0] .. $block->[0]->[1]; } else { p "Not found. Try for example /unicode /\\bcat\\b/ for partial matching."; } } }; sub print_info { my($character, $extra) = @_; my $info = charinfo $character; if (!$info) { p "Character not found" if $extra; } else { p chr(hex $info->{code}) . " (U+$info->{code}): $info->{name}"; return unless $extra; my %extra; for(qw(block category script)) { $extra{$_} = $info->{$_} } # Optional things for(qw(decimal digit numeric upper lower title)) { $extra{$_} = $info->{$_} if $info->{$_}; } $extra{"utf-8 (hex)"} = join "", map sprintf("\\x%02x", ord), split //, encode_utf8 chr(hex $info->{code}); p " " x (7 + length $info->{code}), join(", ", map { "$_=$extra{$_}" } sort keys %extra); } } # Based on scriptassist. sub fork_wrapper { my($child, $parent) = @_; pipe(my $rfh, my $wfh); my $pid = fork; $pipe_in_progress = 1; return unless defined $pid; if($pid) { close $wfh; Irssi::pidwait_add($pid); my $pipetag; my @args = ($rfh, \$pipetag, $parent); $pipetag = Irssi::input_add(fileno($rfh), INPUT_READ, \&pipe_input, \@args); } else { eval { $child->($wfh); }; syswrite $wfh, "- $@" if $@; POSIX::_exit(1); } } sub pipe_input { my ($rfh, $pipetag, $parent) = @{$_[0]}; my $line = <$rfh>; close($rfh); Irssi::input_remove($$pipetag); $pipe_in_progress = 0; $parent->($line); } command_bind charblocks => sub { my @blocks = sort keys %{charblocks()}; print for @blocks; }