diff options
Diffstat (limited to 'scripts/aspell.pl')
| -rw-r--r-- | scripts/aspell.pl | 725 | 
1 files changed, 725 insertions, 0 deletions
| diff --git a/scripts/aspell.pl b/scripts/aspell.pl new file mode 100644 index 0000000..b6a254e --- /dev/null +++ b/scripts/aspell.pl @@ -0,0 +1,725 @@ +=pod + +=head1 NAME + +aspell.pl + +=head1 DESCRIPTION + +A spellchecker based on GNU ASpell which allows you to interactively +select the correct spellings for misspelled words in your input field. + +=head1 INSTALLATION + +Copy into your F<~/.irssi/scripts/> directory and load with +C</SCRIPT LOAD F<filename>>. + +=head1 SETUP + +Settings: + +    aspell_debug              0 +    aspell_ignore_chan_nicks  1 +    aspell_suggest_colour     '%g' +    aspell_language           'en_GB' +    aspell_irssi_dict         '~/.irssi/irssi.dict' + +B<Note:> Americans may wish to change the language to en_US. This can be done +with the command C</SET aspell_language en_US> once the script is loaded. + +=head1 USAGE + +Bind a key to /spellcheck, and then invoke it when you have +an input-line that you wish to check. + +If it is entirely correct, nothing will appear to happen. This is a good thing. +Otherwise, a small split window will appear at the top of the Irssi session +showing you the misspelled word, and a selection of 10 possible candidates. + +You may select one of the by pressing the appropriate number from C<0-9>, or +skip the word entirely by hitting the C<Space> bar. + +If there are more than 10 possible candidates for a word, you can cycle through +the 10-word "pages" with the C<n> (next) and C<p> (prev) keys. + +Pressing Escape, or any other key, will exit the spellcheck altogether, although +it can be later restarted. + +=head1 AUTHORS + +Copyright E<copy> 2011 Isaac Good C<E<lt>irssi@isaacgood.comE<gt>> + +Copyright E<copy> 2011 Tom Feist C<E<lt>shabble+irssi@metavore.orgE<gt>> + +=head1 LICENCE + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=head1 BUGS + +See README file. + +=head1 TODO + +See README file. + +=cut + + +use warnings; +use strict; +use Data::Dumper; +use Irssi; +use Irssi::Irc; +use Irssi::TextUI; + +use File::Spec; + +# Magic. Somehow remedies: +# "Can't locate object method "nicks" via package "Irssi::Irc::Query" Bug +# Actually, that's a bunch of lies, but I'm pretty sure there is something +# it fixes. Otherwise, a bit of cargo-culting can't hurt. + +{ package Irssi::Nick } + +eval { +    use Text::Aspell; +}; + +if ($@ && $@ =~ m/Can't locate/) { +    print '%_Bugger, please insteall Text::Aspell%_' +} + + +our $VERSION = '1.6.1'; +our %IRSSI = ( +              authors     => 'Isaac Good (yitz_), Tom Feist (shabble)', +              contact     => 'irssi@isaacgood.com, shabble+irssi@metavore.org', +              name        => 'aspell', +              description => 'ASpell spellchecking system for Irssi', +              license     => 'MIT', +              updated     => "2011-10-27", +             ); + +# --------------------------- +#           Globals +# --------------------------- + +# CONFIG SETTINGS +# =============== + +# Settings cached vars +my $DEBUG; + +# The colour that the suggestions are rendered in in the split windowpane. +my $suggestion_colour; + +# Whether to bother spellchecking strings that match nicks in the current channel. +my $ignore_chan_nicks; + +# path to local aspell irssi dictionary file. +my $irssi_dict_filepath; + +# Language to use. It follows the same format of the LANG environment variable +# on most systems. It consists of the two letter ISO 639 language code and an +# optional two letter ISO 3166 country code after a dash or underscore. The +# default value is based on the value of the LC_MESSAGES locale. +my $aspell_language; + + +# OTHER GLOBALS +# ============= + +# current line, broken into hashref 'objects' storing word and positional data. +my @word_pos_array; +# index of word we're currently processing. +my $index; +my $active_word_obj; + +# list of all possible suggestions for current misspelled word +my @suggestions; +# page number - we only show 10 results per page so we can select with 0-9 +my $suggestion_page; + +# the spellchecker object. +my $aspell; + +# some window references to manage the window splitting and restoration +my $split_win_ref; +my $original_win_ref; + +# keypress handling flag. +my $corrections_active; + + +#my $bacon = 1; + +# --------------------------- +#      key constants +# --------------------------- + +sub K_ESC () { 27  } +sub K_RET () { 10  } +sub K_SPC () { 32  } +sub K_0   () { 48  } +sub K_9   () { 57  } +sub K_N   () { 110 } +sub K_P   () { 112 } +sub K_I   () { 105 } + +# used for printing stuff to the split window we don't want logged. +sub PRN_LEVEL () { MSGLEVEL_CLIENTCRAP | MSGLEVEL_NEVER } +sub AS_CFG    () { "aspellchecker" } + +# --------------------------- +#        Teh Codez +# --------------------------- + +sub check_line { +	my ($line) = @_; + +    # reset everything +    $suggestion_page    = 0; +    $corrections_active = 0; +    $index              = 0; +    @word_pos_array     = (); +    @suggestions        = (); +    close_temp_split(); + +    # split into an array of words on whitespace, keeping track of +    # positions of each, as well as the size of whitespace. + +    my $pos = 0; + +    _debug('check_line processing "%s"', $line); + +    while ($line =~ m/\G(\S+)(\s*)/g) { +        my ($word, $ws) = ($1, $2); # word, whitespace + +        my $prefix_punct = ''; +        my $suffix_punct = ''; + +        if ($word =~ m/^([^a-zA-Z0-9]+)/) { +            $prefix_punct = $1; +        } +        if ($word =~ m/([^a-zA-Z0-9]+)$/) { +            $suffix_punct = $1; +        } + +        my $pp_len = length($prefix_punct); +        my $sp_len = length($suffix_punct); + +        my $actual_len  = length($word) - ($pp_len + $sp_len); +        my $actual_word = substr($word, $pp_len, $actual_len); + +        if($DEBUG and ($pp_len or $sp_len)) { +            _debug("prefix punc: %s, suffix punc: %s, actual word: %s", +                   $prefix_punct, $suffix_punct, $actual_word); +        } + + +        my $actual_pos  = $pos + $pp_len; + +        my $obj = { +                   word         => $actual_word, +                   pos          => $actual_pos, +                   len          => $actual_len, +                   prefix_punct => $prefix_punct, +                   suffix_punct => $suffix_punct, +                  }; + +        push @word_pos_array, $obj; +        $pos += length ($word . $ws); +    } + +    return unless @word_pos_array > 0; + +    process_word($word_pos_array[0]); +} + +sub process_word { +    my ($word_obj) = @_; + +    my $word = $word_obj->{word}; + +    # That's a whole lotta tryin'! +    my $channel = $original_win_ref->{active}; +    if (not defined $channel) { +        if (exists Irssi::active_win()->{active}) { +            $channel = Irssi::active_win()->{active}; +        } elsif (defined Irssi::active_win()) { +            my @items = Irssi::active_win()->items; +            $channel = $items[0] if @items; +        } else { +            $channel = Irssi::parse_special('$C'); +        } +    } + +    if ($word =~ m/^\d+$/) { + +        _debug("Skipping $word that is entirely numeric"); +        spellcheck_next_word(); # aspell thinks numbers are wrong. + +    } elsif (word_matches_chan_nick($channel, $word_obj)) { +        # skip to next word if it's actually a nick +        # (and the option is set) - checked for in the matches() func. +        _debug("Skipping $word that matches nick in channel"); +        spellcheck_next_word(); + +    } elsif (not $aspell->check($word)) { + +        _debug("Word '%s' is incorrect", $word); + +        my $sugg_ref = get_suggestions($word); + +        if (defined $sugg_ref && ref($sugg_ref) eq 'ARRAY') { +            @suggestions = @$sugg_ref; +        } + +        if (scalar(@suggestions) == 0) { + +            spellcheck_next_word(); + +        } elsif (not temp_split_active()) { + +            $corrections_active = 1; +            highlight_incorrect_word($word_obj); +            _debug("Creating temp split to show candidates"); +            create_temp_split(); + +        } else { + +            print_suggestions(); +        } +    } else { + +        spellcheck_next_word(); +    } +} + +sub get_suggestions { +    my ($word) = @_; +    my @candidates = $aspell->suggest($word); +    _debug("Candidates for '$word' are %s", join(", ", @candidates)); +    # if ($bacon) { +    return \@candidates; +    # } else { +    #     return undef; +    # } +} + +sub word_matches_chan_nick { +    my ($channel, $word_obj) = @_; + +    return 0 unless $ignore_chan_nicks; +    return 0 unless defined $channel and ref $channel; + +    my @nicks; +    if (not exists ($channel->{type})) { +        return 0; +    } elsif ($channel->{type} eq 'QUERY') { + +        # TODO: Maybe we need to parse ->{address} instead, but +        # it appears empty on test dumps. + +        exists $channel->{name} +          and push @nicks, { nick => $channel->{name} }; + +        exists $channel->{visible_name} +          and push @nicks, { nick => $channel->{visible_name} }; + +    } elsif($channel->{type} eq 'CHANNEL') { +        @nicks = $channel->nicks(); +    } + +    my $nick_hash; + +    $nick_hash->{$_}++ for (map { $_->{nick} } @nicks); + +    _debug("Nicks: %s",  Dumper($nick_hash)); + +    # try various combinations of the word with its surrounding +    # punctuation. +    my $plain_word = $word_obj->{word}; +    return 1 if exists $nick_hash->{$plain_word}; +    my $pp_word = $word_obj->{prefix_punct} . $word_obj->{word}; +    return 1 if exists $nick_hash->{$pp_word}; +    my $sp_word = $word_obj->{word} . $word_obj->{suffix_punct}; +    return 1 if exists $nick_hash->{$pp_word}; +    my $full_word = +      $word_obj->{prefix_punct} +      . $word_obj->{word} +      . $word_obj->{suffix_punct}; +    return 1 if exists $nick_hash->{$full_word}; + +    return 0; +} + +# Read from the input line +sub cmd_spellcheck_line { +    my ($args, $server, $witem) = @_; + +    if (defined $witem) { +        $original_win_ref = $witem->window; +    } else { +        $original_win_ref = Irssi::active_win; +    } + +	my $inputline = _input(); +    check_line($inputline); +} + +sub spellcheck_finish { +    $corrections_active = 0; +    close_temp_split(); + +    # stick the cursor at the end of the input line? +    my $input = _input(); +    my $end = length($input); +    Irssi::gui_input_set_pos($end); +} + +sub sig_gui_key_pressed { +    my ($key) = @_; +    return unless $corrections_active; + +    my $char = chr($key); + +    if ($key == K_ESC) { +        spellcheck_finish(); + +    } elsif ($key >= K_0 && $key <= K_9) { +        _debug("Selecting word: $char of page: $suggestion_page"); +        spellcheck_select_word($char + ($suggestion_page * 10)); + +    } elsif ($key == K_SPC) { +        _debug("skipping word"); +        spellcheck_next_word(); +    } elsif ($key == K_I) { + +        my $current_word = $word_pos_array[$index]; +        $aspell->add_to_personal($current_word->{word}); +        $aspell->save_all_word_lists(); + +        _print('Saved %s to personal dictionary', $current_word->{word}); + +        spellcheck_next_word(); + +    } elsif ($key == K_N) { # next 10 results + +        if ((scalar @suggestions) > (10 * ($suggestion_page + 1))) { +            $suggestion_page++; +        } else { +            $suggestion_page = 0; +        } +        print_suggestions(); + +    } elsif ($key == K_P) { # prev 10 results +        if ($suggestion_page > 0) { +            $suggestion_page--; +        } +        print_suggestions(); + +    } else { +        spellcheck_finish(); +    } + +    Irssi::signal_stop(); +} + +sub spellcheck_next_word { +    $index++; +    $suggestion_page = 0; + +    if ($index >= @word_pos_array) { +        _debug("End of words"); +        spellcheck_finish(); +        return; +    } + +    _debug("moving onto the next word: $index"); +    process_word($word_pos_array[$index]); + +} +sub spellcheck_select_word { +    my ($num) = @_; + +    if ($num > $#suggestions) { +        _debug("$num past end of suggestions list."); +        return 0; +    } + +    my $word = $suggestions[$num]; +    _debug("Selected word $num: $word as correction"); +    correct_input_line_word($word_pos_array[$index], $word); +    return 1; +} + +sub _debug { +    my ($fmt, @args) = @_; +    return unless $DEBUG; + +    $fmt = '%%RDEBUG:%%n ' . $fmt; +    my $str = sprintf($fmt, @args); +    Irssi::window_find_refnum(1)->print($str); +} + +sub _print { +    my ($fmt, @args) = @_; +    my $str = sprintf($fmt, @args); +    Irssi::active_win->print('%g' . $str . '%n'); +} + +sub temp_split_active () { +    return defined $split_win_ref; +} + +sub create_temp_split { +    #$original_win_ref = Irssi::active_win(); +    Irssi::signal_add_first('window created', 'sig_win_created'); +    Irssi::command('window new split'); +    Irssi::signal_remove('window created', 'sig_win_created'); +} + +sub UNLOAD { +    _print("%%RASpell spellchecker Version %s unloading...%%n", $VERSION); +    close_temp_split(); +} + +sub close_temp_split { + +    my $original_refnum = -1; +    my $active_refnum   = -2; + +    my $active_win = Irssi::active_win(); + +    if (defined $active_win && ref($active_win) =~ m/^Irssi::/) { +        if (exists $active_win->{refnum}) { +            $active_refnum = $active_win->{refnum}; +        } +    } + +    if (defined $original_win_ref && ref($original_win_ref) =~ m/^Irssi::/) { +        if (exists $original_win_ref->{refnum}) { +            $original_refnum = $original_win_ref->{refnum}; +        } +    } + +    if ($original_refnum != $active_refnum && $original_refnum > 0) { +        Irssi::command("window goto $original_refnum"); +    } + +    if (defined($split_win_ref) && ref($split_win_ref) =~ m/^Irssi::/) { +        if (exists $split_win_ref->{refnum}) { +            my $split_refnum = $split_win_ref->{refnum}; +            _debug("split_refnum is %d", $split_refnum); +            _debug("splitwin has: %s", join(", ", map { $_->{name} } +                                            $split_win_ref->items())); +            Irssi::command("window close $split_refnum"); +            undef $split_win_ref; +        } else { +            _debug("refnum isn't in the split_win_ref"); +        } +    } else { +        _debug("winref is undef or broken"); +    } +} + +sub sig_win_created { +    my ($win) = @_; +    $split_win_ref = $win; +    # printing directly from this handler causes irssi to segfault. +    Irssi::timeout_add_once(10, \&configure_split_win, {}); +} + +sub configure_split_win { +    $split_win_ref->command('window size 3'); +    $split_win_ref->command('window name ASpell Suggestions'); + +    print_suggestions(); +} + +sub correct_input_line_word { +    my ($word_obj, $correction) = @_; +    my $input = _input(); + +    my $word = $word_obj->{word}; +    my $pos  = $word_obj->{pos}; +    my $len  = $word_obj->{len}; + +    # handle punctuation. +    # - Internal punctuation: "they're" "Bob's"  should be replaced if necessary +    # - external punctuation: "eg:" should not. +    # this will also have impact on the position adjustments. + +    _debug("Index of incorrect word is %d", $index); +    _debug("Correcting word %s (%d) with %s", $word, $pos, $correction); + + +    #my $corrected_word = $prefix_punct . $correction . $suffix_punct; + +    my $new_length  = length $correction; + +    my $diff        = $new_length - $len; +    _debug("diff between $word and $correction is $diff"); + +    # record the fix in the array. +    $word_pos_array[$index] = { word => $correction, pos => $pos + $diff }; +    # do the actual fixing of the input string +    substr($input, $pos, $len) = $correction; + + +    # now we have to go through and fix up all teh positions since +    # the correction might be a different length. + +    foreach my $new_obj (@word_pos_array[$index..$#word_pos_array]) { +        #starting at $index, add the diff to each position. +        $new_obj->{pos} += $diff; +    } + +    _debug("Setting input to new value: '%s'", $input); + +    # put the corrected string back into the input field. +    Irssi::gui_input_set($input); + +    _debug("-------------------------------------------------"); +    spellcheck_next_word(); +} + +# move the cursor to the beginning of the word in question. +sub highlight_incorrect_word { +    my ($word_obj) = @_; +    Irssi::gui_input_set_pos($word_obj->{pos}); +} + +sub print_suggestions { +    my $count = scalar @suggestions; +    my $pages = int ($count / 10); +    my $bot = $suggestion_page * 10; +    my $top = $bot + 9; + +    $top = $#suggestions if $top > $#suggestions; + +    my @visible = @suggestions[$bot..$top]; +    my $i = 0; + +    @visible = map { +        '(%_' . $suggestion_colour . ($i++) . '%n) ' # bold/coloured selection num +          . $suggestion_colour . $_ . '%n' # coloured selection option +    } @visible; + +    # disable timestamps to ensure a clean window. +    my $orig_ts_level = Irssi::parse_special('$timestamp_level'); +    $split_win_ref->command("^set timestamp_level $orig_ts_level -CLIENTCRAP"); + +    # clear the window +    $split_win_ref->command("/^scrollback clear"); +    my $msg = sprintf('%s [Pg %d/%d] Select a number or <SPC> to skip this ' +                      . 'word. Press <i> to save this word to your personal ' +                      . 'dictionary. Any other key cancels%s', +                      '%_', $suggestion_page + 1, $pages + 1, '%_'); + +    my $word = $word_pos_array[$index]->{word}; + +    $split_win_ref->print($msg, PRN_LEVEL);                   # header +    $split_win_ref->print('%_%R"' . $word . '"%n '            # erroneous word +                          .  join(" ", @visible), PRN_LEVEL); # suggestions + +    # restore timestamp settings. +    $split_win_ref->command("^set timestamp_level $orig_ts_level"); + +} + +sub sig_setup_changed { +    $DEBUG +      = Irssi::settings_get_bool('aspell_debug'); +    $suggestion_colour +      = Irssi::settings_get_str('aspell_suggest_colour'); +    $ignore_chan_nicks +      = Irssi::settings_get_bool('aspell_ignore_chan_nicks'); + + + +    my $old_lang = $aspell_language; + +    $aspell_language +      = Irssi::settings_get_str('aspell_language'); + + +    my $old_filepath = $irssi_dict_filepath; + +    $irssi_dict_filepath +      = Irssi::settings_get_str('aspell_irssi_dict'); + +    _debug("Filepath: $irssi_dict_filepath"); + +    if ((not defined $old_filepath) or +        ($irssi_dict_filepath ne $old_filepath)) { +        reinit_aspell(); +    } + +    _debug("Language: $aspell_language"); + +    if ((not defined $old_lang) or +    ($old_lang ne $aspell_language)) { +        reinit_aspell(); +    } + +} + +sub _input { +    return Irssi::parse_special('$L'); +} + +sub reinit_aspell { +    $aspell = Text::Aspell->new; +    $aspell->set_option('lang',     $aspell_language); +    $aspell->set_option('personal', $irssi_dict_filepath); +    $aspell->create_speller(); +} + +# sub cmd_break_cands { +#     $bacon = !$bacon; +#     _print("Bacon is now: %s", $bacon?"true":"false"); +# } + +sub init { +    my $default_dict_path +      = File::Spec->catfile(Irssi::get_irssi_dir,                 "irssi.dict"); +    Irssi::settings_add_bool(AS_CFG, 'aspell_debug',              0); +    Irssi::settings_add_bool(AS_CFG, 'aspell_ignore_chan_nicks',  1); +    Irssi::settings_add_str(AS_CFG,  'aspell_suggest_colour',     '%g'); +    Irssi::settings_add_str(AS_CFG,  'aspell_language',           'en_GB'); +    Irssi::settings_add_str(AS_CFG,  'aspell_irssi_dict',   $default_dict_path); + +    sig_setup_changed(); + +    Irssi::signal_add('setup changed' => \&sig_setup_changed); + +    _print("%%RASpell spellchecker Version %s loaded%%n", $VERSION); + +    $corrections_active = 0; +    $index              = 0; + +    Irssi::signal_add_first('gui key pressed' => \&sig_gui_key_pressed); +    Irssi::command_bind('spellcheck'          => \&cmd_spellcheck_line); +    #Irssi::command_bind('breakon' => \&cmd_break_cands); +} + +init(); | 
