diff options
| author | David Leadbeater | 2014-07-06 15:16:01 +0100 | 
|---|---|---|
| committer | David Leadbeater | 2014-07-10 21:10:36 +0100 | 
| commit | bf0d7086c0286f37971217303e00b6df274a721b (patch) | |
| tree | 1fb81496d8adeda233b84b5efe45a7b53974cb6d /scripts/unicode.pl | |
| parent | 3f9855162e635466abd8c5e3f0fb9dfbc1464e2a (diff) | |
| download | scripts.irssi.org-bf0d7086c0286f37971217303e00b6df274a721b.tar.bz2 | |
Add unicode.pl
Diffstat (limited to 'scripts/unicode.pl')
| -rw-r--r-- | scripts/unicode.pl | 206 | 
1 files changed, 206 insertions, 0 deletions
| diff --git a/scripts/unicode.pl b/scripts/unicode.pl new file mode 100644 index 0000000..01ff7bf --- /dev/null +++ b/scripts/unicode.pl @@ -0,0 +1,206 @@ +use 5.014;  # strict, Unicode 6, unicode regexp modifiers +BEGIN { +  require charnames; +  if ($^V gt v5.16.0) { +    charnames->import(':loose'); +  } else { +    print "unicode.pl: Loose unicode matching not supported on this version of Perl."; +    print "Upgrade to 5.16 or newer for case-insensitive names."; +    charnames->import(':full'); +  } +} +use Encode qw(decode_utf8 encode_utf8); +use POSIX (); +use Unicode::UCD qw(charblock charblocks charinfo); + +use Irssi qw(command_bind command_bind_first); +our $VERSION = "1"; +our %IRSSI = ( +    authors     => 'David Leadbeater', +    contact     => 'dgl@dgl.cx', +    name        => 'unicode', +    description => 'Get infomation about unicode characters', +    license     => 'WTFPL <http://dgl.cx/licence>', +    url         => 'http://dgl.cx/irssi', +); + +my $CHARCODE_RE = qr/(?:\d+|(?:U\+|0x)[0-9a-f]+)/ai; + +my $pipe_in_progress; + +my $USAGE = <<'EOF'; +/UNICODE <character | code | name | block name> + +Print details about Unicode characters or blocks. + +Details about a single character: +  /unicode 😸 +  /unicode U+1F626 + +Print details about a block (more concise): +  /unicode Emoticons + +Print details about a range: +  /unicode U+1F600..U+1F700 + +Find a character: +  /unicode /\bcat\b/ +EOF + +if (Irssi::settings_get_str('term_charset') !~ /utf-8/i) { +  print "\x{3}4unicode.pl\x{3}: term_charset is not set to UTF-8. ", +    "Please set your terminal and Irssi to use UTF-8 so this script works correctly."; +  print "Current settings:"; +  print "  Irssi term_charset = ", Irssi::settings_get_str('term_charset'); +  print "  $_ = $ENV{$_}" for grep /^(?:LANG|LC_|TERM$)/, keys %ENV; +} + +# TODO: Can we fix Irssi to not need encoding here? +sub p { Irssi::active_win()->print(encode_utf8("@_"), MSGLEVEL_CLIENTCRAP) } + +command_bind_first help => sub { +  my($arg) = @_; +  return unless $arg =~ /^unicode\s*$/i; +  print $USAGE; +  print "[Perl internal unicode version " . Unicode::UCD::UnicodeVersion() . "]"; +  Irssi::signal_stop(); +}; + +command_bind unicode => sub { +  my($arg) = @_; + +  if(!$arg) { +    print "Usage: /UNICODE <character | code | name | block name>"; +    print "See /help unicode for more."; +    return; +  } + +  # Decode is always required right now, but really irssi core should handle +  # this so written in a future proof way. +  $arg = decode_utf8 $arg unless Encode::is_utf8($arg, 1); + +  if (length $arg == 1) { +    # Single character +    print_info(ord $arg, 1); +  } elsif ($arg =~ /^$CHARCODE_RE\s*$/) { +    # Character code (decimal or hex) +    print_info($arg, 1); +  } elsif ($arg =~ /^($CHARCODE_RE)\s*\.\.\s*($CHARCODE_RE)\s*$/) { +    # Character range +    my($start, $end) = (charinfo($1), charinfo($2)); +    print_info($_) for hex $start->{code} .. hex $end->{code}; +  } elsif ($arg =~ m{/(.*)/\s*$}) { +    my $re = qr/$1/i; +    if ($pipe_in_progress) { +      p "Another unicode search is in progress"; +      return; +    } +    fork_wrapper(sub { # Child +      my($fh) = @_; +      my @found; +      my $data = ""; +      # This is not a public API at all, but taking 2 minutes when using the +      # public API is a bit of a joke, so we take advantage of perl's cache if +      # we can. +      $data = do "unicore/Name.pl"; +      if (!$data) { +        for my $block(map { $_->[0] } values %{charblocks()}) { +          for($block->[0] .. $block->[1]) { +            my $name = charnames::viacode($_); +            next unless $name; +            $data .= sprintf "%X %s\n", $_, $name; +          } +        } +      } +      while ($data =~ /(?:^([A-F0-9]+).*$re)/gm) { +        push @found, $1; +      } +      if(@found > 100) { +        syswrite $fh, "- More than 100 matches found, aborting"; +      } else { +        syswrite $fh, "@found"; +      } +    }, +    sub { # Parent +      my($line) = @_; +      if ($line =~ /^- (.*)/) { +        p $1; +      } elsif (!$line) { +        p "No matches found"; +      } else { +        print_info($_) for sort { hex $a <=> hex $b } split / /, $line; +      } +    }); +  } else { +    # Character (or named sequence) or block name +    my $string = charnames::string_vianame($arg); +    if ($string) { +      # Character(s) found +      for my $char(split //, $string) { +        print_info(ord $char); +      } +    } elsif(charblock $arg) { +      my $block = charblock($arg); +      print_info($_) for $block->[0]->[0] .. $block->[0]->[1]; +    } else { +      p "Not found. Try for example /unicode /\\bcat\\b/ for partial matching."; +    } +  } +}; + +sub print_info { +  my($character, $extra) = @_; +  my $info = charinfo $character; + +  if (!$info) { +    p "Character not found" if $extra; +  } else { +    p chr(hex $info->{code}) . " (U+$info->{code}): $info->{name}"; +    return unless $extra; + +    my %extra; +    for(qw(block category script)) { +      $extra{$_} = $info->{$_} +    } +    # Optional things +    for(qw(decimal digit numeric upper lower title)) { +      $extra{$_} = $info->{$_} if $info->{$_}; +    } +    p " " x (7 + length $info->{code}), join(", ", map { "$_=$extra{$_}" } sort keys %extra); +  } +} + +# Based on scriptassist. +sub fork_wrapper { +  my($child, $parent) = @_; + +  pipe(my $rfh, my $wfh); + +  my $pid = fork; +  $pipe_in_progress = 1; + +  return unless defined $pid; + +  if($pid) { +    close $wfh; +    Irssi::pidwait_add($pid); +    my $pipetag; +    my @args = ($rfh, \$pipetag, $parent); +    $pipetag = Irssi::input_add(fileno($rfh), INPUT_READ, \&pipe_input, \@args); +  } else { +    eval { +      $child->($wfh); +    }; +    syswrite $wfh, "- $@" if $@; +    POSIX::_exit(1); +  } +} + +sub pipe_input { +  my ($rfh, $pipetag, $parent) = @{$_[0]}; +  my $line = <$rfh>; +  close($rfh); +  Irssi::input_remove($$pipetag); +  $pipe_in_progress = 0; +  $parent->($line); +} | 
