summaryrefslogtreecommitdiffstats
path: root/scripts/gsi.pl
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/gsi.pl')
-rw-r--r--scripts/gsi.pl154
1 files changed, 154 insertions, 0 deletions
diff --git a/scripts/gsi.pl b/scripts/gsi.pl
new file mode 100644
index 0000000..d874bd7
--- /dev/null
+++ b/scripts/gsi.pl
@@ -0,0 +1,154 @@
+#!/usr/bin/perl -w
+# <<< MISSION STATEMENT >>>
+#
+# gsi.pl
+# Looks up an 8 digit number in the Norwegian yellowpages...
+# ( http://www.gulesider.no/ )
+#
+# Prints information after removing identical entries.
+# Written by <mistr@sensewave.com> for irssi 0.8.9
+#
+# TODO:
+# - enhance the regexes (less stripping, better matching)
+# - shrink code (more generalized subs)
+# - add functionality for name and address lookups
+#
+# <<< BEGING CODE >>>
+use strict;
+use LWP::UserAgent;
+use URI::Heuristic;
+use vars qw($VERSION %IRSSI);
+
+$VERSION = "220904-04:30:00";
+
+my %IRSSI = (
+ authors => "mistr",
+ contact => "mistr\@sensewave.com",
+ name => "gsi",
+ modules => 'LWP::UserAgent, URI::Heuristic',
+ description => "/gsi <phone nr> checks number via http://gulesider.no. Norwegian 8-digit numbers only. Nice if you have caller-ID and are as paranoid as me.",
+ license => "Public Domain",
+ url => "http://irssi.org/scripts",
+ changed => "$VERSION"
+);
+# No need to change
+my $owner = "mistr.atat.sensewave.dotdot.com";
+my $banner = "[http://gulesider.no]";
+# Don't touch
+Irssi::settings_add_bool('gsi', 'gsi_debug', 0);
+Irssi::print("Set gsi_debug ON for debugging output");
+Irssi::command_bind('gsi', 'cmd_gsi');
+Irssi::print("Added command /gsi");
+
+# Subs
+sub cmd_gsi {
+ my $debug = Irssi::settings_get_bool('gsi_debug');
+ undef $debug unless ( $debug == 1 ) ;
+ my ($lookup,$server,$witem) = @_;
+ $lookup =~ s/\s+//g;
+ if ( $lookup =~ m/^([0-9]{8}?)$/ ) {
+ $lookup = $1;
+ } else {
+ print CLIENTCRAP "%R>>%n Syntax error. Use /gsi <8digitnumber>";
+ return;
+ }
+ print CLIENTCRAP "%R>>%n Looking up $lookup";
+ my $address = "http://www.gulesider.no/gsi/numberSearch.do?tel=";
+ $address .= $lookup;
+ chomp(my $raw_url = $address);
+ my $url = URI::Heuristic::uf_urlstr($raw_url);
+ my $ua = LWP::UserAgent->new();
+ $ua->agent("$owner");
+ my $req = HTTP::Request->new(GET => $url);
+ $req->referer("$owner");
+ my $response = $ua->request($req);
+ if ($response->is_error()) {
+ print CLIENTCRAP "%R>>%n Something went wrong fetching by HTTP";
+ return;
+ } else {
+ my $rawdata = $response->content(); # get the data
+ $_ = $rawdata;
+ if ( m/0 treff\./s ) {
+ print CLIENTCRAP "%R>>%n $banner No hits.";
+ undef $lookup;
+ return;
+ } elsif ( /S\&oslash\;ket\ ga\ treff\ i(.*)Gule Sider(.*)og(.*)Telefonkatalogen(.*)/ms ) {
+ print CLIENTCRAP "%R>>%n $banner Multiple listings. Manual search needed.";
+ print CLIENTCRAP "%R>>%n \($address\)";
+ undef $lookup;
+ return;
+ }
+ my $result = codezap( $rawdata );
+ ( $debug ) && Irssi::print("debug - $result");
+ $_ = $result;
+ if ( /\([0-9]+ treff\)(.*)function\ submitDrill\(select\)/ ) { # multiple hits
+ my $rest = $1;
+ ( $debug ) && Irssi::print("debug - MULTIPLE HITS");
+ $rest =~ s/[vV]is.treffene.i.kart//g;
+ $rest =~ s/[Tt]reff.i.+\(\d+.treff\)//g;
+ my ($result, %sorted);
+ while ($_ = $rest) {
+ m/^[ ]*(.+?)\ (\d{2,}[\d ]+\d{2,3})[ ]+/;
+ my $info = $1;
+ my $number = $2;
+ $rest = $';
+ ( $debug ) && Irssi::print("debug - $info - $number");
+ $result = $info . " " . $number;
+ $sorted{$result}++;
+ }
+ foreach $result (sort keys %sorted) {
+ print CLIENTCRAP "%R>>%n $banner $result";
+ }
+ undef $lookup;
+ return;
+ } elsif ( m/.*totalt 1 treff\. (.+) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?Send.*/ ) {
+ ( $debug ) && Irssi::print("debug - 1 HIT STANDARD");
+ my $info = $1;
+ my $number = $2;
+ my $other = $3;
+ if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; }
+ $info =~ s/[Ss]e ogs.+? [A-Z ]+[A-Z]{2,} //;
+ $result = splitwords( $info );
+ $result .= " $number"
+ } elsif ( /.*treffene i kart (.*) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?\'\)\;/) {
+ ( $debug ) && Irssi::print("debug - 1 HIT OTHER");
+ my $info = $1;
+ my $number = $2;
+ my $other = $3;
+ if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; }
+ $result = splitwords( $info );
+ $result .= " $number"
+ } else {
+ ( $debug ) && Irssi::print("debug - FAILED REGEX");
+ $result = "Unrecognized reply from server";
+ }
+ print CLIENTCRAP "%R>>%n $banner $result";
+ undef $lookup;
+ return;
+ }
+}
+
+sub codezap {
+ my $zap = join('', @_);
+ $zap =~ s/\&nbsp\;?//g;
+ $zap =~ s/\&amp\;?/\&/g;
+ $zap =~ s/\<.+?\>/ /msg;
+ $zap =~ s/\s+/ /mg;
+ $zap =~ s/ +/ /mg;
+ $zap =~ s/^ +$//mg;
+ return "$zap";
+}
+
+sub splitwords {
+ my $workload = join('', @_);
+ my @result;
+ foreach ( split(' ', $workload) ) {
+ if (m/([A-Z][^A-Z ]+)([A-Z][^A-Z ]+)/) {
+ push(@result, $1 . " " . $2);
+ } else {
+ push(@result, $_);
+ }
+ }
+ return join(' ', @result);
+}
+