#!/usr/bin/perl use Net::NNTP; # this comes from libnet.pm use Text::CSV_XS; # unfortunately have to install both these modules. #use CGI qw(:standard escapeHTML); use CGI qw(:standard ); use CGI::Carp qw(fatalsToBrowser); use File::Copy; %CONF = (); %URL = (); %EMAIL = (); @KEYWORDS = (); @NEWS_GROUPS = (); @NEWS_SERVERS = (); $page = ""; undef (@EXCLUDE); $| = 1; $html = 0; $verbose = 0; $msg2 = ""; print STDERR "Got here"; if (-t STDIN && -t STDOUT) { $html = 0; } else { $html = 1; } process_config_file(); print STDERR "Got here2"; if (! $html) { # if interactive, want to print to output file. print "Interactive\n"; check_config(); print "Starting search...\n"; open STDOUT, "> $CONF{OUTPUT}"; search(); } else { # want to print HTML $query = new CGI(); print $query->header (-TYPE=>'text/html'); print $query->start_html ('Deb\'s Newsgroup Search'); print ""; unless (param) { PrintForm ($verbose); } print STDERR "Got here4"; if ($query->param('action') eq 'Save') { if (CheckConfig()) { SaveConfig (); process_config_file(); } PrintForm($verbose); } if ($query->param('action') eq 'Search') { if (CheckConfig()) { print "<\/a>\n"; print "
"; print "Go to URL list.<\/a>
"; print "
Go to EMAILS list.<\/a>
"; process_query (); search (); } else { PrintForm($verbose); } } if ($query=>param('action') eq 'Help') { $verbose = 1; PrintForm($verbose); } } # output html format print STDERR "Got to the end.\n"; exit; sub search { print "Connecting to $CONF{NEWS_SERVER}...\n"; print "
\n" if ($html); $nntp = Net::NNTP->new ($CONF{NEWS_SERVER}); if (!$nntp) { print "Can't connect to news server, $@.\n"; print "
\n" if ($html); exit; } if ($html) { foreach $newsgroup (@NEWS_GROUPS) { print "Go to articles in
$newsgroup<\/a>

"; print "Newsgroup: $newsgroup.\n"; } } $last = "top"; foreach $newsgroup (@NEWS_GROUPS) { $newsgroup =~ s/^\s+//g; $newsgroup =~ s/\s+$//g; @list = $nntp->group($newsgroup); print a({-name=>"$newsgroup"}). "
" if ($html); # print "
" if ($html); if ($list[3] eq "") { print "Couldn't connect to $newsgroup, $!, $@.\n"; exit; } print "$list[3] has $list[0] articles, starting at $list[1] and ending at $list[2].\n"; print "
" if ($html); if ($CONF{STARTFROM} =~ /beginning/) { $start = $list[1]; # $list[2] - $CONF{MAX_TO_SEARCH}; } elsif ($CONF{STARTFROM} =~ /end/) { $start = $list[2] - $CONF{MAX_TO_SEARCH}; } $post = $nntp->postok(); if ($post) { print "You are allowed to post to $list[3] on this server.\n"; print "

" if ($html); } else { print "You are NOT allowed to post to $list[3] on this server.\n"; print "

" if ($html); } # loop through each article $bgcolor="eeeeee"; ARTICLE: for ($i = $start; $i <= $start + $CONF{MAX_TO_SEARCH}; $i++) { undef %matches; $num_matches = 0; $flag = 0; $prev = $i; $msg2 = ""; $msg = $nntp->body($i); # if there are EXCLUDE words specified, and there's a match # in the article, skip the article. if (defined (@EXCLUDE)) { foreach $word (@EXCLUDE) { if (grep (/$word/i, @$msg)){ # Skip current article next ARTICLE; } } # foreach } # if EXCLUDE # only print if there's a match foreach $line (@$msg) { $temp = $line; $temp =~ s/\n//g; $msg2 .= $temp; # $msg2 .= $line; foreach $word (@KEYWORDS) { $word =~ s/\s+$//g; if ($line =~ m/$word/i) { # $line =~ s/$word/$word<\/strong>/g; $matches{$word}++; $num_matches++; } if ($num_matches >= $CONF{TOLERANCE_MIN}) { $flag = 1; } } # foreach word to search } # if we had at least the number of matches we were # looking for in this article, print the article. if ($flag) { # num of matches for each word $tot = 0; $skip = 0; $msgout = a({-name=>"$i"}) . "<\/a>" if ($html); $msgout .= "\nArticle #".$i." had ".$num_matches." matches: ("; foreach $word (keys %matches) { $msgout .= "$word - $matches{$word} "; } foreach $word (@KEYWORDS) { if (!defined($matches{$word})) { $matches{$word} = 0; if ($CONF{AND_KEYWORDS}) { #next ARTICLE; $skip = 1; } } } # foreach keyword if (!$skip) { $msgout =~ s/ $/)\n/; $next = $i+1; print "\n" if ($html); print "
" if ($html); # Alternate table bg colors. if ($bgcolor ne "ffffff") { $bgcolor = "ffffff"; } else { $bgcolor = "eeeeee"; } print "(from $newsgroup)\n"; print "
" if ($html); print "[Prev]<\/a> [Top]<\/a>[Next]<\/a> \n" if ($html); $msg2 =~ s/"/[q]/g; $msg2 =~ s/&/[amp]/g; $msg2 =~ s/#/[pound]/g; print "[Email to friend]<\/a> \n" if ($html); print $msgout; $last = $i; print "
\n" if ($html); $head = $nntp->head($i); (@post_date) = grep (/^Date: /, @$head); foreach $line (@post_date) { print $line; print "
\n" if ($html); } foreach $line (@$msg) { if ($html) { $line =~ s/(http:\/\/.*?)(,)/
$1<\/a>$2/g or $line =~ s/(http:\/\/.*?)( )/$1<\/a>$2/g or $line =~ s/(http:\/\/.*?)$/$1<\/a>/g or $line =~ s/(http:\/\/.*?\/)/$1<\/a>/g or $line =~ s/(www\..*?)( |$|[,>:)])/$1<\/a>$2/g; $temp = $1; if ($temp !~ /http/) { $temp = "http:\/\/".$temp; } if (ref($temp) =~ /HASH/) { print "

TEMP MATCHED HASH: (1) was $temp line: $line.<\/h1>
\n"; } $URL{$temp} = $temp; if ($line =~ /(\w+\@.*?)(>|\s+|$)/) { $line =~ s/(\w+\@.*?)(>|\s+|$)/
$1<\/a>/g and $temp = $1; $temp =~ s/\s+$//g; $temp =~ s/\.+$//g; #print "Article $i had $temp
"; $EMAIL{$temp} = $temp; } } print $line; print "
\n" if ($html); } # foreach $flag = 0; print "
<\/a>" if ($html); print "

" if ($html); } # if !skip } # if flag } # for $i } # foreach newsgroup $nntp->quit; print "done.\n"; print "
\n" if ($html); if ($html) { print "
Goto Top<\/a>
\n"; print "
\n"; print "Unique URLS:
\n"; print "<\/a>"; foreach $url (keys %URL) { if ($url !~ /HASH/) { print "
$url<\/a>
\n"; } } print "
Goto Top<\/a>
\n"; print "
\n"; print "Unique EMAILS:
\n"; print "<\/a>"; foreach $email (keys %EMAIL) { if ($email !~ /HASH/) { print "
$email<\/a>
"; } } if ($html) { print end_html(); } } print STDERR "SEARCH is Done.\n"; } # search sub process_config_file { if (-e "newspl.rc") { open CNF, "< newspl.rc"; @file = ; foreach $line (@file) { if ($line !~ /^#/ && $line !~ /^$/) { ($var, $value) = split (/=/, $line); $var =~ s/^\s+//g; $var =~ s/\s+$//g; $value =~ s/^\s+//g; $value =~ s/\s+$//g; chomp($value); if ($var !~ /^KEYWORDS$/ && $var !~ /^NEWS_GROUPS$/ && $var !~ /^EXCLUDE$/ && $var !~ /^OUTPUT$/) { $var =~ s/ //g; $value =~ s/ //g; $CONF{$var} = $value; if (!$html) { print "CONF{$var} = $value.\n"; } } else { $value =~ s/,\s+/,/g; if ($var =~ /KEYWORDS/) { $KEYWORDS = $value; } elsif ($var =~ /NEWS_GROUPS/) { $NEWS_GROUPS = $value; } elsif ($var =~ /EXCLUDE/) { $EXCLUDE = $value; $CONF{EXCLUDE} = $value; if (!$html) { print "CONF{$var} = $value.\n"; } } elsif ($var =~ /OUTPUT/) { $CONF{$var} = $value; if (!$html) { print "CONF{$var} = $value.\n"; } } } # else } # if not comment or blank line } # foreach } # if config file exists $csv = Text::CSV_XS->new(); @KEYWORDS = process_fields ($KEYWORDS, "KEYWORDS"); @NEWS_GROUPS = process_fields ($NEWS_GROUPS, "NEWS_GROUPS"); @EXCLUDE = process_fields ($EXCLUDE, "EXCLUDE"); } # process_config_file sub check_config { foreach $var (keys %CONF) { # $CONF{$var} = $value; print "$var = $CONF{$var}? "; $new = <>; chomp ($new); if ($new ne "") { $CONF{$var} = $new; } } # foreach @KEYWORDS = get_new_values ("KEYWORDS", $KEYWORDS, @KEYWORDS); @NEWS_GROUPS = get_new_values ("NEWS_GROUPS",$NEWS_GROUPS,@NEWS_GROUPS); print "The newsgroups are: @NEWS_GROUPS"; @EXCLUDE = get_new_values ("EXCLUDE", $EXCLUDE, @EXCLUDE); } # check_config sub process_fields { my ($list, $what) = @_; $status = $csv->parse($list); if ($status) { @list = $csv->fields(); } else { $err = $csv->error_input; print "parse() failed on argument: $err (KEYWORDS: $list).\n"; print "
\n" if ($html); exit; } return @list; } # process_fields sub PrintForm { my ($verbose) = (@_); print STDERR "Got here5"; print STDERR "Verbose: $verbose.\n"; $page .= "
" . h3('Deb\'s Newsgroup Search'). start_form ({-action=>'news.cgi', -name=>'searchform', -method=>'POST'}); $page .= CGI::start_table() . Tr(th({-colspan=>2})); foreach $line (@file) { $page .= "\n"; if (($line =~ /^#/ || $line =~ /^$/) && $verbose) { $line =~ s/^\s*#//; $page .= "$line
"; } else { if ($line =~ /^NEWS_SERVER/) { $page .= Tr(td('News Server'). CGI::td({-colspan=>2}, CGI::textfield({-size=>20,-name=>'NEWS_SERVER', -default=>$CONF{NEWS_SERVER}, -maxlength=>60}))); } elsif ($line =~ /^NEWS_GROUPS/) { $page .= Tr(td('News Groups'). td({-colspan=>2}, CGI::textfield({-size=>20,-name=>'NEWS_GROUPS', -default=>$NEWS_GROUPS}))); } elsif ($line =~ /^MAX_TO_SEARCH/) { $page .= Tr(td('Max to Search'). td({-colspan=>2}, CGI::textfield({-size=>20,-name=>'MAX_TO_SEARCH', -default=>$CONF{MAX_TO_SEARCH}}))); } elsif ($line =~ /^STARTFROM/) { $page .= Tr(td('Start from'), td({-colspan=>2}, CGI::radio_group({-name=>'STARTFROM', -values=>['beginning','end'], -default=>$CONF{STARTFROM}}))); } elsif ($line =~ /^OUTPUT/) { $page .= Tr(td('Output File'). td({-colspan=>2}, CGI::textfield({-size=>20,-name=>'OUTPUT', -default=>$CONF{OUTPUT}, -maxlength=>80}))); } elsif ($line =~ /^TOLERANCE_MIN/) { $page .= Tr(td('Tolerance Min'). td({-colspan=>2}, CGI::textfield({-size=>3,-name=>'TOLERANCE_MIN', -default=>$CONF{TOLERANCE_MIN}}))); } elsif ($line =~ /^KEYWORDS/) { $page .= Tr(td('Keywords'). td({-colspan=>2}, CGI::textfield({-size=>40,-name=>'KEYWORDS', -maxlength=>80, -default=>$KEYWORDS}))); } elsif ($line =~ /^AND_KEYWORDS/) { $page .= Tr(td('And Keywords?'). td({-colspan=>2}, CGI::textfield({-size=>2,-name=>'AND_KEYWORDS', -default=>$CONF{AND_KEYWORDS}}))); } elsif ($line =~ /^EXCLUDE/) { $page .= Tr(td('Exclude'). td({-colspan=>2}, CGI::textfield({-size=>20,-name=>'EXCLUDE', -default=>$CONF{EXCLUDE}}))); } } } $page .= CGI::end_table(); $page .= submit (-name=>'action', -value=>'Save'). submit (-name=>'action', -value=>'Search'). submit (-name=>'action', -value=>'Help'); #$page .= end_html(); print STDERR "Got here6\npage: $page\n\n"; print "$page\n"; # exit; } #PrintForm sub process_query { $CONF{NEWS_SERVER} = param ('NEWS_SERVER'); $CONF{MAX_TO_SEARCH} = param ('MAX_TO_SEARCH'); $CONF{STARTFROM} = param ('STARTFROM'); $CONF{OUTPUT} = param ('OUTPUT'); $CONF{TOLERANCE_MIN} = param ('TOLERANCE_MIN'); $CONF{AND_KEYWORDS} = param ('AND_KEYWORDS'); $NEWS_GROUPS = param ('NEWS_GROUPS'); $NEWS_SERVERS = $CONF{NEWS_SERVER}; $KEYWORDS = param ('KEYWORDS'); $EXCLUDE = param ('EXCLUDE'); @KEYWORDS = process_fields ($KEYWORDS, "KEYWORDS"); @NEWS_GROUPS = process_fields ($NEWS_GROUPS, "NEWS_GROUPS"); @NEWS_SERVERS = process_fields ($NEWS_SERVERS, "NEWS_SERVERS"); @EXCLUDE = process_fields ($EXCLUDE, "EXCLUDE"); } # process_query sub SaveConfig { process_query(); if (-e "newspl.rc") { open CNF, "< newspl.rc"; @file = ; } open NEW, "> new.rc" || die "Can't open new.rc, $!.\n"; foreach $line (@file) { if ($line =~ /^#/ || $line =~ /^$/) { print NEW "$line"; } else { if ($line =~ /^NEWS_SERVER/) { print NEW "NEWS_SERVER = $CONF{NEWS_SERVER}\n"; } elsif ($line =~ /^NEWS_GROUPS/) { print NEW "NEWS_GROUPS = $NEWS_GROUPS\n"; } elsif ($line =~ /^MAX_TO_SEARCH/) { print NEW "MAX_TO_SEARCH = $CONF{MAX_TO_SEARCH}\n"; } elsif ($line =~ /^STARTFROM/) { print NEW "STARTFROM = $CONF{STARTFROM}\n"; } elsif ($line =~ /^OUTPUT/) { print NEW "OUTPUT = $CONF{OUTPUT}\n"; } elsif ($line =~ /^TOLERANCE_MIN/) { print NEW "TOLERANCE_MIN = $CONF{TOLERANCE_MIN}\n"; } elsif ($line =~ /^KEYWORDS/) { print NEW "KEYWORDS = $KEYWORDS\n"; } elsif ($line =~ /^AND_KEYWORDS/) { print NEW "AND_KEYWORDS = $CONF{AND_KEYWORDS}\n"; } elsif ($line =~ /^EXCLUDE/) { print NEW "EXCLUDE = $EXCLUDE\n"; } } } # foreach close (NEW); close (CNF); $status = copy ("new.rc", "newspl.rc"); if (!$status) { print "Error with copy: $!.\n"; exit; } else { print "Config file saved.\n"; } } # SaveConfig sub CheckConfig { if ($query->param('NEWS_SERVER') eq "") { print "News server was blank.
"; print "

Please enter your News Server.
<\/h2>"; return (0); } elsif ($query->param('NEWS_GROUPS') eq "") { print "

Please enter at least one News Group (separate by commas).
<\/h2>"; return (0); } elsif ($query->param('MAX_TO_SEARCH') eq "") { print "

Please enter Max to Search.
<\/h2>"; return (0); } elsif ($query->param('STARTFROM') eq "") { print "

Please enter where to Start from.
<\/h2>"; return (0); } elsif ($query->param('TOLERANCE_MIN') eq "") { print "

Tolerance Min must be entered.
<\/h2>"; return (0); } elsif ($query->param('KEYWORDS') eq "") { print "

At least one Keyword must be entered. To enter more, separate each keyword with a comma.
<\/h2>"; return (0); } elsif ($query->param('AND_KEYWORDS') eq "") { print "

And Keywords must be entered.
<\/h2>"; return (0); } return (1); } # CheckConfig sub get_new_values { my ($text, $var, @array) = (@_); my $new; print "In get_new_values: text: $text, var: $var, array: @array.\n"; print "$text = $var? "; $new = <>; chomp ($new); if ($new ne "") { @array = process_fields ($var, $text); } return @array; } # get_new_values