summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Leadbeater2014-07-06 15:16:01 +0100
committerDavid Leadbeater2014-07-10 21:10:36 +0100
commitbf0d7086c0286f37971217303e00b6df274a721b (patch)
tree1fb81496d8adeda233b84b5efe45a7b53974cb6d
parent3f9855162e635466abd8c5e3f0fb9dfbc1464e2a (diff)
downloadscripts.irssi.org-bf0d7086c0286f37971217303e00b6df274a721b.tar.bz2
Add unicode.pl
-rw-r--r--_data/scripts.yaml9
-rw-r--r--scripts/unicode.pl206
2 files changed, 215 insertions, 0 deletions
diff --git a/_data/scripts.yaml b/_data/scripts.yaml
index feb6ebd..037ce86 100644
--- a/_data/scripts.yaml
+++ b/_data/scripts.yaml
@@ -4496,3 +4496,12 @@
name: "smartfilter"
url: "none"
version: "0.1"
+
+- authors: "David Leadbeater"
+ contact: "dgl@dgl.cx"
+ description: "Get infomation about unicode characters"
+ filename: "unicode.pl"
+ modified: "2014-07-06"
+ license: "WTFPL"
+ name: "unicode"
+ version: "1"
diff --git a/scripts/unicode.pl b/scripts/unicode.pl
new file mode 100644
index 0000000..01ff7bf
--- /dev/null
+++ b/scripts/unicode.pl
@@ -0,0 +1,206 @@
+use 5.014; # strict, Unicode 6, unicode regexp modifiers
+BEGIN {
+ require charnames;
+ if ($^V gt v5.16.0) {
+ charnames->import(':loose');
+ } else {
+ print "unicode.pl: Loose unicode matching not supported on this version of Perl.";
+ print "Upgrade to 5.16 or newer for case-insensitive names.";
+ charnames->import(':full');
+ }
+}
+use Encode qw(decode_utf8 encode_utf8);
+use POSIX ();
+use Unicode::UCD qw(charblock charblocks charinfo);
+
+use Irssi qw(command_bind command_bind_first);
+our $VERSION = "1";
+our %IRSSI = (
+ authors => 'David Leadbeater',
+ contact => 'dgl@dgl.cx',
+ name => 'unicode',
+ description => 'Get infomation about unicode characters',
+ license => 'WTFPL <http://dgl.cx/licence>',
+ url => 'http://dgl.cx/irssi',
+);
+
+my $CHARCODE_RE = qr/(?:\d+|(?:U\+|0x)[0-9a-f]+)/ai;
+
+my $pipe_in_progress;
+
+my $USAGE = <<'EOF';
+/UNICODE <character | code | name | block name>
+
+Print details about Unicode characters or blocks.
+
+Details about a single character:
+ /unicode 😸
+ /unicode U+1F626
+
+Print details about a block (more concise):
+ /unicode Emoticons
+
+Print details about a range:
+ /unicode U+1F600..U+1F700
+
+Find a character:
+ /unicode /\bcat\b/
+EOF
+
+if (Irssi::settings_get_str('term_charset') !~ /utf-8/i) {
+ print "\x{3}4unicode.pl\x{3}: term_charset is not set to UTF-8. ",
+ "Please set your terminal and Irssi to use UTF-8 so this script works correctly.";
+ print "Current settings:";
+ print " Irssi term_charset = ", Irssi::settings_get_str('term_charset');
+ print " $_ = $ENV{$_}" for grep /^(?:LANG|LC_|TERM$)/, keys %ENV;
+}
+
+# TODO: Can we fix Irssi to not need encoding here?
+sub p { Irssi::active_win()->print(encode_utf8("@_"), MSGLEVEL_CLIENTCRAP) }
+
+command_bind_first help => sub {
+ my($arg) = @_;
+ return unless $arg =~ /^unicode\s*$/i;
+ print $USAGE;
+ print "[Perl internal unicode version " . Unicode::UCD::UnicodeVersion() . "]";
+ Irssi::signal_stop();
+};
+
+command_bind unicode => sub {
+ my($arg) = @_;
+
+ if(!$arg) {
+ print "Usage: /UNICODE <character | code | name | block name>";
+ print "See /help unicode for more.";
+ return;
+ }
+
+ # Decode is always required right now, but really irssi core should handle
+ # this so written in a future proof way.
+ $arg = decode_utf8 $arg unless Encode::is_utf8($arg, 1);
+
+ if (length $arg == 1) {
+ # Single character
+ print_info(ord $arg, 1);
+ } elsif ($arg =~ /^$CHARCODE_RE\s*$/) {
+ # Character code (decimal or hex)
+ print_info($arg, 1);
+ } elsif ($arg =~ /^($CHARCODE_RE)\s*\.\.\s*($CHARCODE_RE)\s*$/) {
+ # Character range
+ my($start, $end) = (charinfo($1), charinfo($2));
+ print_info($_) for hex $start->{code} .. hex $end->{code};
+ } elsif ($arg =~ m{/(.*)/\s*$}) {
+ my $re = qr/$1/i;
+ if ($pipe_in_progress) {
+ p "Another unicode search is in progress";
+ return;
+ }
+ fork_wrapper(sub { # Child
+ my($fh) = @_;
+ my @found;
+ my $data = "";
+ # This is not a public API at all, but taking 2 minutes when using the
+ # public API is a bit of a joke, so we take advantage of perl's cache if
+ # we can.
+ $data = do "unicore/Name.pl";
+ if (!$data) {
+ for my $block(map { $_->[0] } values %{charblocks()}) {
+ for($block->[0] .. $block->[1]) {
+ my $name = charnames::viacode($_);
+ next unless $name;
+ $data .= sprintf "%X %s\n", $_, $name;
+ }
+ }
+ }
+ while ($data =~ /(?:^([A-F0-9]+).*$re)/gm) {
+ push @found, $1;
+ }
+ if(@found > 100) {
+ syswrite $fh, "- More than 100 matches found, aborting";
+ } else {
+ syswrite $fh, "@found";
+ }
+ },
+ sub { # Parent
+ my($line) = @_;
+ if ($line =~ /^- (.*)/) {
+ p $1;
+ } elsif (!$line) {
+ p "No matches found";
+ } else {
+ print_info($_) for sort { hex $a <=> hex $b } split / /, $line;
+ }
+ });
+ } else {
+ # Character (or named sequence) or block name
+ my $string = charnames::string_vianame($arg);
+ if ($string) {
+ # Character(s) found
+ for my $char(split //, $string) {
+ print_info(ord $char);
+ }
+ } elsif(charblock $arg) {
+ my $block = charblock($arg);
+ print_info($_) for $block->[0]->[0] .. $block->[0]->[1];
+ } else {
+ p "Not found. Try for example /unicode /\\bcat\\b/ for partial matching.";
+ }
+ }
+};
+
+sub print_info {
+ my($character, $extra) = @_;
+ my $info = charinfo $character;
+
+ if (!$info) {
+ p "Character not found" if $extra;
+ } else {
+ p chr(hex $info->{code}) . " (U+$info->{code}): $info->{name}";
+ return unless $extra;
+
+ my %extra;
+ for(qw(block category script)) {
+ $extra{$_} = $info->{$_}
+ }
+ # Optional things
+ for(qw(decimal digit numeric upper lower title)) {
+ $extra{$_} = $info->{$_} if $info->{$_};
+ }
+ p " " x (7 + length $info->{code}), join(", ", map { "$_=$extra{$_}" } sort keys %extra);
+ }
+}
+
+# Based on scriptassist.
+sub fork_wrapper {
+ my($child, $parent) = @_;
+
+ pipe(my $rfh, my $wfh);
+
+ my $pid = fork;
+ $pipe_in_progress = 1;
+
+ return unless defined $pid;
+
+ if($pid) {
+ close $wfh;
+ Irssi::pidwait_add($pid);
+ my $pipetag;
+ my @args = ($rfh, \$pipetag, $parent);
+ $pipetag = Irssi::input_add(fileno($rfh), INPUT_READ, \&pipe_input, \@args);
+ } else {
+ eval {
+ $child->($wfh);
+ };
+ syswrite $wfh, "- $@" if $@;
+ POSIX::_exit(1);
+ }
+}
+
+sub pipe_input {
+ my ($rfh, $pipetag, $parent) = @{$_[0]};
+ my $line = <$rfh>;
+ close($rfh);
+ Irssi::input_remove($$pipetag);
+ $pipe_in_progress = 0;
+ $parent->($line);
+}