1 files changed, 206 insertions, 0 deletions
diff --git a/scripts/unicode.pl b/scripts/unicode.pl
new file mode 100644
index 0000000..01ff7bf
--- /dev/null
+++ b/scripts/unicode.pl
@@ -0,0 +1,206 @@
+use 5.014;  # strict, Unicode 6, unicode regexp modifiers
+BEGIN {
+  require charnames;
+  if ($^V gt v5.16.0) {
+    charnames->import(':loose');
+  } else {
+    print "unicode.pl: Loose unicode matching not supported on this version of Perl.";
+    print "Upgrade to 5.16 or newer for case-insensitive names.";
+    charnames->import(':full');
+  }
+}
+use Encode qw(decode_utf8 encode_utf8);
+use POSIX ();
+use Unicode::UCD qw(charblock charblocks charinfo);
+
+use Irssi qw(command_bind command_bind_first);
+our $VERSION = "1";
+our %IRSSI = (
+    authors     => 'David Leadbeater',
+    contact     => 'dgl@dgl.cx',
+    name        => 'unicode',
+    description => 'Get infomation about unicode characters',
+    license     => 'WTFPL <http://dgl.cx/licence>',
+    url         => 'http://dgl.cx/irssi',
+);
+
+my $CHARCODE_RE = qr/(?:\d+|(?:U\+|0x)[0-9a-f]+)/ai;
+
+my $pipe_in_progress;
+
+my $USAGE = <<'EOF';
+/UNICODE <character | code | name | block name>
+
+Print details about Unicode characters or blocks.
+
+Details about a single character:
+  /unicode 😸
+  /unicode U+1F626
+
+Print details about a block (more concise):
+  /unicode Emoticons
+
+Print details about a range:
+  /unicode U+1F600..U+1F700
+
+Find a character:
+  /unicode /\bcat\b/
+EOF
+
+if (Irssi::settings_get_str('term_charset') !~ /utf-8/i) {
+  print "\x{3}4unicode.pl\x{3}: term_charset is not set to UTF-8. ",
+    "Please set your terminal and Irssi to use UTF-8 so this script works correctly.";
+  print "Current settings:";
+  print "  Irssi term_charset = ", Irssi::settings_get_str('term_charset');
+  print "  $_ = $ENV{$_}" for grep /^(?:LANG|LC_|TERM$)/, keys %ENV;
+}
+
+# TODO: Can we fix Irssi to not need encoding here?
+sub p { Irssi::active_win()->print(encode_utf8("@_"), MSGLEVEL_CLIENTCRAP) }
+
+command_bind_first help => sub {
+  my($arg) = @_;
+  return unless $arg =~ /^unicode\s*$/i;
+  print $USAGE;
+  print "[Perl internal unicode version " . Unicode::UCD::UnicodeVersion() . "]";
+  Irssi::signal_stop();
+};
+
+command_bind unicode => sub {
+  my($arg) = @_;
+
+  if(!$arg) {
+    print "Usage: /UNICODE <character | code | name | block name>";
+    print "See /help unicode for more.";
+    return;
+  }
+
+  # Decode is always required right now, but really irssi core should handle
+  # this so written in a future proof way.
+  $arg = decode_utf8 $arg unless Encode::is_utf8($arg, 1);
+
+  if (length $arg == 1) {
+    # Single character
+    print_info(ord $arg, 1);
+  } elsif ($arg =~ /^$CHARCODE_RE\s*$/) {
+    # Character code (decimal or hex)
+    print_info($arg, 1);
+  } elsif ($arg =~ /^($CHARCODE_RE)\s*\.\.\s*($CHARCODE_RE)\s*$/) {
+    # Character range
+    my($start, $end) = (charinfo($1), charinfo($2));
+    print_info($_) for hex $start->{code} .. hex $end->{code};
+  } elsif ($arg =~ m{/(.*)/\s*$}) {
+    my $re = qr/$1/i;
+    if ($pipe_in_progress) {
+      p "Another unicode search is in progress";
+      return;
+    }
+    fork_wrapper(sub { # Child
+      my($fh) = @_;
+      my @found;
+      my $data = "";
+      # This is not a public API at all, but taking 2 minutes when using the
+      # public API is a bit of a joke, so we take advantage of perl's cache if
+      # we can.
+      $data = do "unicore/Name.pl";
+      if (!$data) {
+        for my $block(map { $_->[0] } values %{charblocks()}) {
+          for($block->[0] .. $block->[1]) {
+            my $name = charnames::viacode($_);
+            next unless $name;
+            $data .= sprintf "%X %s\n", $_, $name;
+          }
+        }
+      }
+      while ($data =~ /(?:^([A-F0-9]+).*$re)/gm) {
+        push @found, $1;
+      }
+      if(@found > 100) {
+        syswrite $fh, "- More than 100 matches found, aborting";
+      } else {
+        syswrite $fh, "@found";
+      }
+    },
+    sub { # Parent
+      my($line) = @_;
+      if ($line =~ /^- (.*)/) {
+        p $1;
+      } elsif (!$line) {
+        p "No matches found";
+      } else {
+        print_info($_) for sort { hex $a <=> hex $b } split / /, $line;
+      }
+    });
+  } else {
+    # Character (or named sequence) or block name
+    my $string = charnames::string_vianame($arg);
+    if ($string) {
+      # Character(s) found
+      for my $char(split //, $string) {
+        print_info(ord $char);
+      }
+    } elsif(charblock $arg) {
+      my $block = charblock($arg);
+      print_info($_) for $block->[0]->[0] .. $block->[0]->[1];
+    } else {
+      p "Not found. Try for example /unicode /\\bcat\\b/ for partial matching.";
+    }
+  }
+};
+
+sub print_info {
+  my($character, $extra) = @_;
+  my $info = charinfo $character;
+
+  if (!$info) {
+    p "Character not found" if $extra;
+  } else {
+    p chr(hex $info->{code}) . " (U+$info->{code}): $info->{name}";
+    return unless $extra;
+
+    my %extra;
+    for(qw(block category script)) {
+      $extra{$_} = $info->{$_}
+    }
+    # Optional things
+    for(qw(decimal digit numeric upper lower title)) {
+      $extra{$_} = $info->{$_} if $info->{$_};
+    }
+    p " " x (7 + length $info->{code}), join(", ", map { "$_=$extra{$_}" } sort keys %extra);
+  }
+}
+
+# Based on scriptassist.
+sub fork_wrapper {
+  my($child, $parent) = @_;
+
+  pipe(my $rfh, my $wfh);
+
+  my $pid = fork;
+  $pipe_in_progress = 1;
+
+  return unless defined $pid;
+
+  if($pid) {
+    close $wfh;
+    Irssi::pidwait_add($pid);
+    my $pipetag;
+    my @args = ($rfh, \$pipetag, $parent);
+    $pipetag = Irssi::input_add(fileno($rfh), INPUT_READ, \&pipe_input, \@args);
+  } else {
+    eval {
+      $child->($wfh);
+    };
+    syswrite $wfh, "- $@" if $@;
+    POSIX::_exit(1);
+  }
+}
+
+sub pipe_input {
+  my ($rfh, $pipetag, $parent) = @{$_[0]};
+  my $line = <$rfh>;
+  close($rfh);
+  Irssi::input_remove($$pipetag);
+  $pipe_in_progress = 0;
+  $parent->($line);
+}