diff options
Diffstat (limited to 'scripts/gsi.pl')
| -rw-r--r-- | scripts/gsi.pl | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/scripts/gsi.pl b/scripts/gsi.pl new file mode 100644 index 0000000..d874bd7 --- /dev/null +++ b/scripts/gsi.pl @@ -0,0 +1,154 @@ +#!/usr/bin/perl -w +# <<< MISSION STATEMENT >>> +# +# gsi.pl +# Looks up an 8 digit number in the Norwegian yellowpages... +# ( http://www.gulesider.no/ ) +# +# Prints information after removing identical entries. +# Written by <mistr@sensewave.com> for irssi 0.8.9 +# +# TODO: +# - enhance the regexes (less stripping, better matching) +# - shrink code (more generalized subs) +# - add functionality for name and address lookups +# +# <<< BEGING CODE >>> +use strict; +use LWP::UserAgent; +use URI::Heuristic; +use vars qw($VERSION %IRSSI); + +$VERSION = "220904-04:30:00"; + +my %IRSSI = ( + authors => "mistr", + contact => "mistr\@sensewave.com", + name => "gsi", + modules => 'LWP::UserAgent, URI::Heuristic', + description => "/gsi <phone nr> checks number via http://gulesider.no. Norwegian 8-digit numbers only. Nice if you have caller-ID and are as paranoid as me.", + license => "Public Domain", + url => "http://irssi.org/scripts", + changed => "$VERSION" +); +# No need to change +my $owner = "mistr.atat.sensewave.dotdot.com"; +my $banner = "[http://gulesider.no]"; +# Don't touch +Irssi::settings_add_bool('gsi', 'gsi_debug', 0); +Irssi::print("Set gsi_debug ON for debugging output"); +Irssi::command_bind('gsi', 'cmd_gsi'); +Irssi::print("Added command /gsi"); + +# Subs +sub cmd_gsi { + my $debug = Irssi::settings_get_bool('gsi_debug'); + undef $debug unless ( $debug == 1 ) ; + my ($lookup,$server,$witem) = @_; + $lookup =~ s/\s+//g; + if ( $lookup =~ m/^([0-9]{8}?)$/ ) { + $lookup = $1; + } else { + print CLIENTCRAP "%R>>%n Syntax error. Use /gsi <8digitnumber>"; + return; + } + print CLIENTCRAP "%R>>%n Looking up $lookup"; + my $address = "http://www.gulesider.no/gsi/numberSearch.do?tel="; + $address .= $lookup; + chomp(my $raw_url = $address); + my $url = URI::Heuristic::uf_urlstr($raw_url); + my $ua = LWP::UserAgent->new(); + $ua->agent("$owner"); + my $req = HTTP::Request->new(GET => $url); + $req->referer("$owner"); + my $response = $ua->request($req); + if ($response->is_error()) { + print CLIENTCRAP "%R>>%n Something went wrong fetching by HTTP"; + return; + } else { + my $rawdata = $response->content(); # get the data + $_ = $rawdata; + if ( m/0 treff\./s ) { + print CLIENTCRAP "%R>>%n $banner No hits."; + undef $lookup; + return; + } elsif ( /S\ø\;ket\ ga\ treff\ i(.*)Gule Sider(.*)og(.*)Telefonkatalogen(.*)/ms ) { + print CLIENTCRAP "%R>>%n $banner Multiple listings. Manual search needed."; + print CLIENTCRAP "%R>>%n \($address\)"; + undef $lookup; + return; + } + my $result = codezap( $rawdata ); + ( $debug ) && Irssi::print("debug - $result"); + $_ = $result; + if ( /\([0-9]+ treff\)(.*)function\ submitDrill\(select\)/ ) { # multiple hits + my $rest = $1; + ( $debug ) && Irssi::print("debug - MULTIPLE HITS"); + $rest =~ s/[vV]is.treffene.i.kart//g; + $rest =~ s/[Tt]reff.i.+\(\d+.treff\)//g; + my ($result, %sorted); + while ($_ = $rest) { + m/^[ ]*(.+?)\ (\d{2,}[\d ]+\d{2,3})[ ]+/; + my $info = $1; + my $number = $2; + $rest = $'; + ( $debug ) && Irssi::print("debug - $info - $number"); + $result = $info . " " . $number; + $sorted{$result}++; + } + foreach $result (sort keys %sorted) { + print CLIENTCRAP "%R>>%n $banner $result"; + } + undef $lookup; + return; + } elsif ( m/.*totalt 1 treff\. (.+) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?Send.*/ ) { + ( $debug ) && Irssi::print("debug - 1 HIT STANDARD"); + my $info = $1; + my $number = $2; + my $other = $3; + if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; } + $info =~ s/[Ss]e ogs.+? [A-Z ]+[A-Z]{2,} //; + $result = splitwords( $info ); + $result .= " $number" + } elsif ( /.*treffene i kart (.*) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?\'\)\;/) { + ( $debug ) && Irssi::print("debug - 1 HIT OTHER"); + my $info = $1; + my $number = $2; + my $other = $3; + if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; } + $result = splitwords( $info ); + $result .= " $number" + } else { + ( $debug ) && Irssi::print("debug - FAILED REGEX"); + $result = "Unrecognized reply from server"; + } + print CLIENTCRAP "%R>>%n $banner $result"; + undef $lookup; + return; + } +} + +sub codezap { + my $zap = join('', @_); + $zap =~ s/\ \;?//g; + $zap =~ s/\&\;?/\&/g; + $zap =~ s/\<.+?\>/ /msg; + $zap =~ s/\s+/ /mg; + $zap =~ s/ +/ /mg; + $zap =~ s/^ +$//mg; + return "$zap"; +} + +sub splitwords { + my $workload = join('', @_); + my @result; + foreach ( split(' ', $workload) ) { + if (m/([A-Z][^A-Z ]+)([A-Z][^A-Z ]+)/) { + push(@result, $1 . " " . $2); + } else { + push(@result, $_); + } + } + return join(' ', @result); +} + |
