#!/usr/bin/env perl -w # vim: set sw=2 ts=2 sta et: # GTrans: Automatic translation in Irssi using the Google Language API # by Sven Ulland . License: GPLv2 # # DOCUMENTATION # ------------- # Introduction: # This script brings the power of the Google Language API to Irssi. # In short, it provides a quick way to translate incoming and # outgoing IRC messages with minimal effort. While the result is # far from professional quality, it is vastly superior to most other # automatic translation engines. # # Prerequisites: # Better results are achieved if you write properly. # # Only UTF-8 text is supported. Make sure your terminal handles it. # # The WebService::Google::Language Perl module is required for the # script to work. It is unlikely that your system provides binary # packages for this module, so you probably have to install it # manually or through the CPAN shell: # # $ perl -MCPAN -e "install WebService::Google::Language" # # Quick testing: # To quickly test the script to see what it can do, you can run the # following command after starting Irssi and loading the script. It # will translate the text and display the result in the current # window. No text will be sent to IRC. # # /gtrans --test fi:this is a small test # # Another example to translate text and send it to the target # (channel or query) in the currently active window: # # /gtrans fi:hello! this is a small test # # Normal operation: # When loaded with default settings, the script does nothing. The # reason for this is to maintain privacy: It is not a good idea to # submit potentially sensitive information directly to Google. # # Automatic translation requires that the channel or nick that sends # or receives the message, is in a whitelist. The following scenario # will enable automatic translation for the channel #mychan and nick # 'james': # # /set gtrans_my_lang en # /set gtrans_input_auto ON # /set gtrans_output_auto 2 # /set gtrans_output_auto_lang fi # /set gtrans_whitelist #mychan james # # Incoming or outgoing messages on the #mychan channel and queries # from/to james will now be automatically translated: Incoming # messages will be translated from any language to English; outgoing # messages will be translated from any language to Finnish. # # Settings: # The available settings are described below. The default value is # shown in parentheses. # # gtrans_input_auto (ON) # ON: Translate incoming messages that match gtrans_whitelist. # Translate to the language specified by gtrans_my_lang. # OFF: Don't translate incoming messages. # # gtrans_show_orig (ON) # ON: Show the original, untranslated message, and display the # translation on the next line. Applies to both incoming and # outgoing messages. # OFF: Translate messages transparently, hide original text. # # gtrans_output_auto (1) # 0: Don't translate outgoing messages. # 1: Translate outgoing messages only when the text is prefixed # by ":". Example: fi:this is a small test. This will # override the whitelist. # 2: Translate outgoing messages automatically to the language # specified by gtrans_output_auto_lang. Target has to match # the whitelist. # # gtrans_output_auto_lang ("fi") # xx: Set automatic output language to "xx". This applies to # automatically translated outgoing messages when # gtrans_output_auto is set to 2. # # gtrans_my_lang ("en") # xx: Space-separated list of languages that should not be # translated. Incoming messages will be translated to the # first language in this list. Note: The language will be # detected by sending the message to the Google API. # # gtrans_debug (0) # 0: No debugging. # 1: Light debugging. Useful to see what's going on. # 2: Normal debugging. Slightly more verbose. # 3: Medium debugging. Useful for troubleshooting. # 4: Verbose debugging. Significant output. # 5: Very verbose debugging. Lots of output. # # gtrans_whitelist ("") # xx: Space-separated list of channels and nicks that can be # translated. This applies to both incoming and outgoing # messages. Specify "*" to whitelist everything. # # Links / more info: # List of supported languages in the Google Language API: # # # WebService::Google::Language Perl module at CPAN: # # # TODO list: # * What determines the value of isreliable? The API doesn't say. # * Translate incoming/outgoing notices. # * Translate incoming/outgoing topics. # + Keep un-/translated topic in topic bar with a toggle. # * Make debugging levels and messages more consistent. # * Make whitelist work with servers/connections too. # * Interact better with logging. # * Better code reuse. Lots of duplication now. # * Verify compatibility with other scripts/themes/configurations. # use strict; use vars qw($VERSION %IRSSI); use Irssi; $VERSION = "0.0.1"; %IRSSI = ( authors => "Sven Ulland", contact => "svensven\@gmail.com", name => "GTrans", description => "Translation via the Google Language API", license => "GPLv2", url => "http://scripts.irssi.org/", changed => $VERSION, modules => "WebService::Google::Language", commands => "gtrans" ); use Data::Dumper qw(Dumper); use WebService::Google::Language; my $service = WebService::Google::Language->new( "referer" => "http://scripts.irssi.org/", "agent" => "$IRSSI{name} $VERSION for Irssi", "timeout" => 5, "src" => "", "dest" => "", ); # Urgh. $glob_cmdpass is set to 1 when using gtrans_cmd() and later # checked in event_output_msg(). The reason is that event_output_msg() # is called twice: first by cmd_gtrans(), then by the event "send # text". my $glob_cmdpass = 0; sub dbg { my ($level, $msg) = @_; return unless ($level <= Irssi::settings_get_int("gtrans_debug")); my %dbgcol = ( 1 => "%G", 2 => "%Y", 3 => "%C", 4 => "%M", 5 => "%R", ); print CLIENTCRAP "%W$IRSSI{name} " . "%Bdebug%W($dbgcol{$level}$level%W)>%n $msg"; } sub err { my $msg = shift; print CLIENTCRAP "%W$IRSSI{name} %Rerror%W>%n $msg"; } sub inf { my $msg = shift; print CLIENTCRAP "%W$IRSSI{name} %Ginfo%W>%n $msg"; } sub usage { print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . "/$IRSSI{commands} [-t|--test] :"; print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . "Example: %W/$IRSSI{commands} fr:this message " . "will be translated to french and sent to the " . "currently active window.%n"; print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . "Example: %W/$IRSSI{commands} -t fi:this " . "message will be translated to finnish, but " . "*won't* be sent out. use this to test " . "translations.%n"; print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . "There are several settings to modify " . "translation behaviour. Type %W/set gtrans%n to " . "see the available settings. See the script " . "source for documentation."; } sub dehtml { # FIXME: The only HTML entity seen so far is ' $_[0] =~ s/'/'/g; } sub wgl_process { my %args = @_; dbg(5, "wgl_process(): input %args: " . Dumper(\%args)); my $result = $args{func}(%args); dbg(4, "wgl_process() wgl_func() output: " . Dumper(\$result)); my $ok = 1; if ($result->error) { err(sprintf "wgl_process() wgl_func() code %s: %s", $result->code, $result->message); $ok = 0; } return $result; } sub event_input_msg { my $subname = "event_input_msg"; my ($server, $msg, $nick, $address, $target) = @_; return unless Irssi::settings_get_bool("gtrans_input_auto"); my $sig = Irssi::signal_get_emitted(); my $witem; dbg(5, "$subname() args: " . Dumper(\@_)); my $do_translation = 0; if ($sig eq "message private") { # Private message. $witem = Irssi::window_item_find($nick); # Check whether the source $nick is in the whitelist. dbg(3, "$subname() Looking for nick \"$nick\" in whitelist"); foreach (split(/ /, Irssi::settings_get_str("gtrans_whitelist"))) { $do_translation = 1 if ($nick eq $_ or $_ eq "*"); } } else { # $sig eq "message public" # Public message. $witem = Irssi::window_item_find($target); # Check whether $target is in the whitelist. dbg(3, "$subname() Looking for channel \"$target\" " . "in whitelist"); foreach (split(/ /, Irssi::settings_get_str("gtrans_whitelist"))) { $do_translation = 1 if ($target eq $_ or $_ eq "*"); } } unless ($do_translation) { dbg(1, sprintf "Channel (\"$target\") or nick (\"$nick\") is " . "not whitelisted"); return; } dbg(2, sprintf "$subname() Channel (\"$target\") or nick " . "(\"$nick\") is whitelisted"); # Prepare arguments for language detection. utf8::decode($msg); my %args = ( "func" => sub { $service->detect(@_) }, "text" => $msg, ); # Run language detection. my $result = wgl_process(%args); dbg(4, "$subname() wgl_process() detect returned: " . Dumper(\$result)); if ($result->error) { dbg(1, "$subname(): Language detection failed"); err(sprintf "Language detection failed with code %s: %s", $result->code, $result->message); return; } # Don't translate my languages. foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) { $do_translation = 0 if($result->language eq $_); } unless ($do_translation) { dbg(2, "$subname() Incoming language " . "\"$result->language\" matches my lang(s). " . "Not translating."); return; } dbg(1, sprintf "Detected language \"%s\", confidence %.3f", $result->language, $result->confidence); my $confidence = $result->confidence; my $reliable = $result->is_reliable; # Prepare arguments for translation. my %args = ( "func" => sub { $service->translate(@_) }, "text" => $msg, "dest" => (split(/ /, Irssi::settings_get_str("gtrans_my_lang")))[0] ); # Run translation. my $result = wgl_process(%args); dbg(4, "$subname() wgl_process() translate returned: " . Dumper(\$result)); if ($result->error) { dbg(1, "Translation failed"); err(sprintf "Translation failed with code %s: %s", $result->code, $result->message); return; } if (Irssi::settings_get_bool("gtrans_show_orig")) { my $trmsg = sprintf "[%%B%s%%n:%s%.2f%%n] %s", $result->language, $reliable ? "%g" : "%r", $confidence, $result->translation; utf8::decode($trmsg); dehtml($trmsg); Irssi::signal_continue($server, $msg, $nick, $address, $target); $witem->print($trmsg, MSGLEVEL_CLIENTCRAP); } else { $msg = sprintf "[%s:%.2f] %s", $result->language, $confidence, $result->translation; utf8::decode($msg); dehtml($msg); Irssi::signal_continue($server, $msg, $nick, $address, $target); } dbg(1, "Incoming translation successful"); } sub event_output_msg { my $subname = "event_output_msg"; my ($msg, $server, $witem, $force_lang) = @_; dbg(5, "$subname() args: " . Dumper(\@_)); # Safeguard to stop double translations when using /gtrans. if ($glob_cmdpass) { $glob_cmdpass = 0; Irssi::signal_continue($msg, $server, $witem); return; } return unless ( (Irssi::settings_get_int("gtrans_output_auto") > 0 and Irssi::settings_get_int("gtrans_output_auto") <= 2) or $force_lang); # Determine destination language before doing translation. my $dest_lang; if($force_lang) { $dest_lang = $force_lang; } elsif (Irssi::settings_get_int("gtrans_output_auto") eq 1) { # Semiauto translation. Here we preprocess the msg to determine # destination language. The WGL API cannot fetch the list of valid # languages, so we simply try to see if the language is valid. if ( $msg =~ /^([a-z]{2}(-[a-z]{2})?):(.*)/i) { dbg(2, "$subname() dest_lang \"$1\", msg \"$3\""); $dest_lang = $1; $msg = $3; } } elsif (Irssi::settings_get_int("gtrans_output_auto") eq 2) { # Fully automated translation. # To avoid accidents, verify that $witem->{name} is whitelisted. dbg(3, "$subname() Looking for target \"" . $witem->{name} . "\" in whitelist"); my $do_translation = 0; foreach (split(/ /, Irssi::settings_get_str("gtrans_whitelist"))) { $do_translation = 1 if ($witem->{name} eq $_); $do_translation = 1 if ($_ eq "*"); } unless ($do_translation) { dbg(1, sprintf "Target \"" . $witem->{name} . "\" is " . "not whitelisted"); return; } dbg(2, sprintf "$subname() Target \"" . $witem->{name} . "\" is whitelisted"); $dest_lang = Irssi::settings_get_str("gtrans_output_auto_lang"); } unless ($dest_lang and $msg) { dbg(1, "Empty destination language or message"); return; } # Prepare arguments for translation. utf8::decode($msg); my %args = ( "func" => sub { $service->translate(@_) }, "text" => $msg, "dest" => $dest_lang ); # Run translation. my $result = wgl_process(%args); dbg(4, "$subname() wgl_process() output: " . Dumper(\$result)); if ($result->error) { dbg(1, "$subname() Translation failed"); err(sprintf "Translation failed with code %s: %s", $result->code, $result->message); return; } my $trmsg; if ($result->language ne $dest_lang) { $trmsg = $result->translation; utf8::decode($trmsg); dehtml($trmsg); } if($force_lang) { # Emit new signal, since we came from cmd_gtrans(). $glob_cmdpass = 1; # Don't translate in event_output_msg() dbg(3, "$subname():" . __LINE__ . " Emitting \"send text\" signal"); Irssi::signal_emit("send text", $trmsg, $server, $witem); return; } Irssi::signal_continue($trmsg, $server, $witem); if (Irssi::settings_get_bool("gtrans_show_orig")) { my $origmsg = sprintf "[orig:%%B%s%%n] %s", $result->language, $msg; $witem->print($origmsg, MSGLEVEL_CLIENTCRAP); } dbg(1, "Outbound auto-translation successful"); } # FIXME: While topic translation is implemented, it needs more work to # be useful. Until it is, the code is not active. #sub event_topic { # # signal "message own_public" parameters: # # my ($server, $channel, $topic, $nick, $target) = @_; # # return unless Irssi::settings_get_bool("gtrans_topic_auto"); # # dbg(5, "event_topic() args: " . Dumper(\@_)); # # my ($server, $channel, $msg, $nick, $target) = @_; # # my $do_translation = 0; # # # Check whether $channel is in the whitelist. # dbg(3, "event_topic() Looking for channel \"$channel\" in " . # "whitelist"); # foreach (split(/ /, # Irssi::settings_get_str("gtrans_whitelist"))) { # $do_translation = 1 if ($channel eq $_); # $do_translation = 1 if ($_ eq "*"); # } # # unless ($do_translation) { # dbg(1, sprintf "Channel $channel is not whitelisted. " . # "Not translating topic"); # return; # } # # dbg(2, sprintf "event_topic() Channel $channel is whitelisted"); # # # Prepare arguments for language detection. # utf8::decode($msg); # my %args = ( # "func" => sub { $service->detect(@_) }, # "text" => $msg, # ); # # # Run language detection. # my $result = wgl_process(%args); # # dbg(4, "event_topic() wgl_process() detect returned: " . # Dumper(\$result)); # # if ($result->error) { # dbg(1, "event_topic(): Language detection failed"); # err(sprintf "Language detection failed with code %s: %s", # $result->code, $result->message); # return; # } # # # Don't translate my languages. # foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) { # $do_translation = 0 if($result->language eq $_); # } # # unless ($do_translation) { # dbg(2, "event_topic() Incoming language " . # "\"$result->language\" matches my lang(s). " . # "Not translating."); # return; # } # # dbg(1, sprintf "Detected language \"%s\", confidence %.3f", # $result->language, $result->confidence); # # my $confidence = $result->confidence; # # # Prepare arguments for translation. # my %args = ( # "func" => sub { $service->translate(@_) }, # "text" => $msg, # "dest" => (split(/ /, # Irssi::settings_get_str("gtrans_my_lang")))[0] # ); # # # Run translation. # my $result = wgl_process(%args); # # dbg(4, "event_topic() wgl_process() translate returned: " . # Dumper(\$result)); # # if ($result->error) { # dbg(1, "Topic translation failed"); # err(sprintf "Topic translation failed with code %s: %s", # $result->code, $result->message); # return; # } # # # FIXME: Don't alter messages! # $msg = sprintf "[%s:%.2f] %s", # $result->language, $confidence, $result->translation; # # utf8::decode($msg); # dehtml($msg); # # # FIXME: More info about result? # dbg(1, "Incoming topic translation successful"); # # Irssi::signal_continue($server, $channel, $msg, $nick, $target); #} sub cmd_gtrans { my $subname = "cmd_gtrans"; my ($msg, $server, $witem) = @_; dbg(5, "$subname() input: " . Dumper(\@_)); if ($msg =~ /^(|help|-h|--help|-t|--test)$/) { usage(); return; } my $testing_mode = 0; if ($msg =~ /^(-t|--test) /) { $testing_mode = 1; $msg =~ s/^(-t|--test) //; } return unless ($testing_mode or ($witem and ($witem->{type} eq "CHANNEL" or $witem->{type} eq "QUERY"))); # Determine destination language before doing translation. my $dest_lang; # FIXME: What about languages on the form "xx-yy"? if ( $msg =~ /^([a-z]{2}):(.*)/i) { dbg(2, "$subname() dest_lang \"$1\", msg \"$2\""); $dest_lang = $1; $msg = $2; } else { dbg(2, "$subname() syntax error"); } unless ($dest_lang and $msg) { err("Empty destination language or message"); usage(); return; } if ($testing_mode) { # Prepare arguments for translation. utf8::decode($msg); my %args = ( "func" => sub { $service->translate(@_) }, "text" => $msg, "dest" => $dest_lang ); # Run translation. my $result = wgl_process(%args); dbg(4, "$subname() wgl_process() output: " . Dumper(\$result)); if ($result->error) { dbg(1, "$subname(): Translation failed"); err(sprintf "Translation failed with code %s: %s", $result->code, $result->message); return; } $msg = $result->translation; utf8::decode($msg); dehtml($msg); dbg(1, "Outbound translation successful"); $witem = Irssi::active_win(); $witem->print(sprintf ("%%GGTrans test (%%B%s%%n->%%B%s%%G):%%n %s", $result->language, $dest_lang, $msg), MSGLEVEL_CLIENTCRAP); } else { event_output_msg($msg, $server, $witem, $dest_lang); } } print CLIENTCRAP "%W$IRSSI{name} loaded. " . "Hints: %n/$IRSSI{commands} help"; # Register gtrans settings. Irssi::settings_add_bool("gtrans", "gtrans_input_auto", 1); #Irssi::settings_add_bool("gtrans", "gtrans_topic_auto", 0); Irssi::settings_add_bool("gtrans", "gtrans_show_orig", 1); Irssi::settings_add_int ("gtrans", "gtrans_output_auto", 1); Irssi::settings_add_str ("gtrans", "gtrans_output_auto_lang", "fi"); Irssi::settings_add_str ("gtrans", "gtrans_my_lang", "en"); Irssi::settings_add_int ("gtrans", "gtrans_debug", 0); Irssi::settings_add_str ("gtrans", "gtrans_whitelist", ""); # Register /gtrans command. Irssi::command_bind("gtrans", "cmd_gtrans"); # Register events for incoming messages/actions. Irssi::signal_add_last("message public", "event_input_msg"); Irssi::signal_add_last("message private", "event_input_msg"); # Register events for outgoing messages/actions. Irssi::signal_add("send text", "event_output_msg"); #TODO: Register events that need special handling. #Irssi::signal_add("message irc action", "event_input_msg"); #Irssi::signal_add("message irc notice", "event_input_msg"); #Irssi::signal_add("message irc own_action", "event_output_msg"); #Irssi::signal_add("message irc own_notice", "event_output_msg"); #Irssi::signal_add("event topic", "event_topic");