#!/usr/local/bin/perl
# 
# An attempt to rewrite the Hypertext Archie Gateway (CGI) so its not as 
# hard on the system (leaving temp files, 4 processes per request, etc.)
#
# Version 1.1	03-19-96
#
# Public Domain
# AA.pl by Brandon Long (blong@uiuc.edu) based on 
# AA 1.2 by Guy Brooker (guy@jw.estec.esa.nl), 
# CGI routines from Carlos Pero (cpero@ncsa.uiuc.edu)
#
#
# This script relies on the C-archie client from Brendan Kehoe
# Once installed, please set the correct path below.
# You can obtain the latest version from a comp.sources archive, or at
# file://ftp.uu.net/networking/info-service/archie/clients
# 
# QUERIES
# If no query is specified, returns a default form.
#
# Originally written for Perl 4 on AIX 3.2.5, but should work with minor
# modification on Perl 5.
#
#
# Version 1.0
# 12-10-95	Brandon Long (blong@uiuc.edu)
#
# Version 1.1
# 03-19-96	Brandon Long (blong@uiuc.edu)
#		Cleaned up, and now returns a default form if no query is
#               specified.

# ****************** Configurable Variables *******************

$ARCHIE         = "/local/bin/archie";
$DEFAULT_SERVER = "archie.rutgers.edu";
$DEFAULT_NICE   = 500;
$DEFAULT_HITS   = 20;
$DEFAULT_ARCHIE_OPTS = "-s"; # Case Insensitive Substring Match

$ARCHIE_URL = "/cgi-bin/AA.pl";
$counter_file   = "/u/Web/archie/archie.count";
$query_log_file = "/u/Web/archie/archie.log";
$query_log_url = "/archie/archie.log";

# Set this to 1 if perl flock() works on your system (doesn't
# on Solaris) or to 0 if it doesn't.
$LOCK_ON = 1;

#*************************************************************

# flock() values
$LOCK_SH = 1;
$LOCK_EX = 2;
$LOCK_NB = 4;
$LOCK_UN = 8;

&cgi_receive;
&cgi_decode;

if (!defined($FORM{query})) {
  &display_form();
  exit(0);
}


if (!(-x $ARCHIE)) {
  &return_error("\$Archie Program: $ARCHIE Not Found");
}

if ($FORM{type} eq "Case Insensitive Substring Match") {
  $OPT_search = "-s";
} elsif ($FORM{type} eq "Exact Match") {
  $OPT_search = "-e";
} elsif ($FORM{type} eq "Case Sensitive Substring Match") {
  $OPT_search = "-c";
} elsif ($FORM{type} eq "Regular Expression Match") {
  $OPT_search = "-r";
} else {
  $OPT_search = $DEFAULT_ARCHIE_OPTS;
}

if ($FORM{server} eq "United Kingdom") {
  $ArchieServer="archie.doc.ic.ac.uk";
} elsif ($FORM{server} eq "ANS archie server") {
  $ArchieServer="archie.ans.net";
} elsif ($FORM{server} eq "Australia") {
  $ArchieServer="archie.au";
} elsif ($FORM{server} eq "Austria") {
  $ArchieServer="archie.univie.ac.at";
} elsif ($FORM{server} eq "Canada") {
  $ArchieServer="archie.mcgill.ca";
} elsif ($FORM{server} eq "Finland") {
  $ArchieServer="archie.funet.fi";
} elsif ($FORM{server} eq "Germany") {
  $ArchieServer="archie.th-darmstadt.de";
} elsif ($FORM{server} eq "Internic") {
  $ArchieServer="archie.internic.net";
} elsif ($FORM{server} eq "Israel") {
  $ArchieServer="archie.ac.il";
} elsif ($FORM{server} eq "Italy") {
  $ArchieServer="archie.unipi.it";
} elsif ($FORM{server} eq "Japan") {
  $ArchieServer="archie.wide.ad.jp";
} elsif ($FORM{server} eq "Korea") {
  $ArchieServer="archie.kr";
} elsif ($FORM{server} eq "New Zealand") {
  $ArchieServer="archie.nz";
} elsif ($FORM{server} eq "Rutgers University") {
  $ArchieServer="archie.rutgers.edu";
} elsif ($FORM{server} eq "Spain") {
  $ArchieServer="archie.rediris.es";
} elsif ($FORM{server} eq "SURAnet") {
  $ArchieServer="archie.sura.net";
} elsif ($FORM{server} eq "Sweden") {
  $ArchieServer="archie.luth.se";
} elsif ($FORM{server} eq "Taiwan") {
  $ArchieServer="archie.ncu.edu.tw";
} elsif ($FORM{server} eq "University of Nebraska") {
  $ArchieServer="archie.unl.edu";
} else {
  $ArchieServer="$DEFAULT_SERVER";
}

if ($FORM{order} eq "date") {
  $OPT_order = "-t";
} elsif ($FORM{order} eq "host") {
  $OPT_order = "";
} else {
  $OPT_order = "";
}

if ($FORM{nice} eq "Not Nice At All") {
  $OPT_nice = "-N0";
} elsif ($FORM{nice} eq "Nice") {
  $OPT_nice = "-N500";
} elsif ($FORM{nice} eq "Nicer") {
  $OPT_nicer = "-N1000";
} elsif ($FORM{nice} eq "Very Nice") {
  $OPT_nicer = "-N5000";
} elsif ($FORM{nice} eq "Extremely Nice") {
  $OPT_nicer = "-N10000";
} elsif ($FORM{nice} eq "Nicest") {
  $OPT_nicer = "-N32765";
} else {
  $OPT_nicer = "-N$DEFAULT_NICE";
}

if ($FORM{hits} =~ /1[0-9][0-9]/) {
  $OPT_hits = "-m$FORM{hits}";
} elsif ($FORM{hits} =~ /[0-9][0-9]/) {
  $OPT_hits = "-m$FORM{hits}";
} else {
  $OPT_hits = "-m$DEFAULT_HITS";
}


open(INPUT,"$ARCHIE $OPT_hits $OPT_nice $OPT_search $OPT_order -h $ArchieServer $FORM{query} |") || &return_error("$|");

&return_header;
print "<BODY>\n";
print "<H1>Archie Gateway: Results for Query: $FORM{query}</H1>\n";

$first = 1;
$num_hosts = 0;
$num_dirs = 0;
$num_files = 0;

while(<INPUT>) {
#  print $_;
  chop;
  if (/Host/) {
    $host = $_;
    $host =~ s/(Host\s+)(\S*)\s*/$2/;
    if (!$first) {
      print "</DL></DL>";
      print "<P>\n";
    } else { $first = 0; }
#    print "<IMG SRC=\"/icons/ftp.gif\"> ";
    print "<B>Host <A HREF=\"ftp://$host/\">";
    print "$host</A></B><BR>\n";
    print "<DL><DL>";
    $num_hosts++;
  } elsif (/Location:/) {
    $loc = $_;
    $loc =~ s/(\s*Location:\s+)(\S*)\s*/$2/;
    print "<DT>";
#    print "<IMG SRC=\"/icons/rhook.xbm\">";
    print "<IMG SRC=\"/icons/menu.gif\"> ";
    print "<A HREF=\"ftp://$host$loc/\">";
    print "$loc</A><BR>\n";
    $num_dirs++;
  } elsif (/FILE/) {
    $file = $_;
    $other = $_;
    $file =~ s/(\s*FILE [-drwx]{10}\s+[0-9]*\s+[A-Za-z]+[0-9]*[0-9]*\s+[0-9]*\s+[0-9]*\s+)(\S*)\s*/$2/;
    $other =~ s/(\s*FILE [-drwx]{10}\s+[0-9]*\s+[A-Za-z]+[0-9]*[0-9]*\s+[0-9]*\s+[0-9]*\s+)(\S*)\s*/$1/;
#    print "<DD>"; 
#    print "<IMG SRC=\"/icons/rhook.xbm\">";
#    print "<IMG SRC=\"/icons/line.xbm\">";
    print "<IMG SRC=\"/icons/binary.gif\"> ";
    print "<TT>$other<BR>";
    print "<DD><A HREF=\"ftp://$host$loc/$file\">";
    print "$file</A></TT><BR>\n";
    $num_files++;
  }
}

if ($first) {
   print "<B>No Results!</B>\n";
}

($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
 
$date = $year;
$mon++;
if ($mon < 10) { $date .= "0".$mon; }
  else { $date .= $mon; }
if ($mday < 10) { $date .= "0".$mday; }
  else { $date .= $mday; }

print "</DL></DL><P>\n";
print "<HR><B>Summary for Query: $FORM{query}</B><P>\n";
print "Archie at $ArchieServer found ";
print "$num_files files in $num_dirs directories on $num_hosts hosts.\n";
print "<HR>\n";
print "<A HREF=\"$ARCHIE_URL\"><IMG ALT=\"[Back]\" SRC=\"/images/back.gif\">\n";
print "Back to Archie Query Form</A>";
print "<HR>\n";
print "<a href=\"http://www.uiuc.edu/ph/www/blong\">Brandon C. Long</a> /\n";
print "blong@uiuc.edu / \n";
print "<a href=\"http://hoohoo.ncsa.uiuc.edu/~blong/pgpkey.html\">PGP</a> / $date / \n";
&count_hit;
&log_query;
print "</BODY>\n";

#&return_error("Nothing happened $ArchieServer");
#******************** Functions *****************************

sub return_header {
    &cgi_header(200,OK);
    print "<HEAD>\n";
    print "<TITLE>Archie Gateway: Results for Query: $FORM{query}</TITLE>\n";
    print "</HEAD>\n";
}

sub return_error {
    &cgi_header(200,OK);
    print "<HEAD>\n";
    print "<TITLE>Archie Gateway: Error</TITLE>\n";
    print "</HEAD>\n";
    print "<BODY>\n";
    print "<H1>Archie Gateway</H1>\n";
    print "An error occurred while processing your request.<BR>\n";
    print "Error: @_\n";
    print "</BODY>\n";
    exit(0);
}
    
#******************** CGI routines from cgi-dump ***********************
sub cgi_header {
    local($code, $text) = @_;
    print "HTTP/1.0 $code $text\n";
    print "Content-type: text/html\n";
    print "\n";
}        

sub cgi_receive {
    #### Assign unique ID to user based on IP and browser
    $USER_ID = crypt($ENV{'REMOTE_ADDR'}, $ENV{'HTTP_USER_AGENT'}); 

    if ($ENV{'REQUEST_METHOD'} eq "POST") {
        read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
    }
    else {
        $buffer = $ENV{'QUERY_STRING'};
    }
}

sub cgi_decode {
    @pairs = split(/&/, $buffer);

    foreach $pair (@pairs) {
        ($name, $value) = split(/=/, $pair);

        $name  =~ tr/+/ /;
        $value =~ tr/+/ /;
        $name  =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
        $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

        # Strip out any bad, bad characters
        $value =~ s/;/ /g;
        $value =~ s/^!/ /g;      #### Allow in sentences
        $value =~ s/[\cM\n]/ /g; #### Mainly for textareas


        ## Skip blank text entry fields
        next unless ($value);

        ## Check for "dynamic" field names
        ## Mainly for on-the-fly input names, especially checkboxes
        if ($name =~ /^dynamic/) {
            $name = $value;
            $value = "on";
        }
        $FORM{$name} = $value;
    }
}

sub count_hit {
  open(COUNTER,"+<$counter_file");
  ($LOCK_ON) && flock(COUNTER,$LOCK_EX);
  @count = <COUNTER>;
  print "$count[0]";
  $count[0]++;
  seek(COUNTER,0,0);
  print COUNTER "$count[0]";
  ($LOCK_ON) && flock(COUNTER,$LOCK_UN);
  close(COUNTER);
}

sub log_query {
  open(LOG,">>$query_log_file");
  ($LOCK_ON) && flock(LOG,$LOCK_EX);
  print LOG "$date - $FORM{query}\n";
  ($LOCK_ON) && flock(LOG,$LOCK_UN);
  close(LOG);
}

sub display_form {

print <<"EOFORM";

<HEAD><TITLE>Archie Request Form</TITLE></HEAD>
<BODY>
<H1>Archie Request Form</H1>

This is a form based Archie gateway for the WWW.<br>
Please remember that Archie searches can take a long time... 
<p>
You might just want to check out the <a href="/ftp/">Monster FTP Sites List</a>
instead.

<P>
<HR>

<FORM ACTION="$ARCHIE_URL">

What would you like to search for? <INPUT NAME="query"> <br>
See past search <a href="$query_log_url">keywords</a>

<P>
There are several types of search: 
<SELECT NAME="type">
<OPTION>Case Insensitive Substring Match
<OPTION>Exact Match
<OPTION>Case Sensitive Substring Match
<OPTION>Regular Expression Match
</SELECT>

<P>

The results can be sorted
<INPUT TYPE="radio" NAME="order" VALUE="host" CHECKED >By Host or
<INPUT TYPE="radio" NAME="order" VALUE="date">By Date 

</UL>

<P>

The impact on other users can be:
<SELECT NAME="nice">
<OPTION>Not Nice At All
<OPTION>Nice
<OPTION>Nicer
<OPTION>Very Nice
<OPTION>Extremely Nice
<OPTION>Nicest
</SELECT>
<P>

Several Archie Servers can be used:
<SELECT NAME="server">
<OPTION Selected>University of Nebraska
<OPTION>United Kingdom
<OPTION>ANS archie server
<OPTION>Australia
<OPTION>Austria
<OPTION>Canada
<OPTION>Finland
<OPTION>Germany
<OPTION>Internic
<OPTION>Israel
<OPTION>Italy
<OPTION>Japan
<OPTION>Korea
<OPTION>New Zealand
<OPTION>Rutgers University
<OPTION>Spain
<OPTION>SURAnet
<OPTION>Sweden
<OPTION>Taiwan
</SELECT>
<P>

You can restrict the number of results returned (default 10):
<INPUT NAME="hits" SIZE=5 VALUE=10> 

<P>
Press this button to submit the query: 
<inPUT TYPE="submit" VALUE="Submit">. <P>

To reset the form, press this button: <INPUT TYPE="reset" VALUE="Reset">.

</FORM>

<HR>
<H2>What is archie ?</H2>
<QUOTE>
 "Archie" is a database of anonymous ftp sites and their contents.
   The software for it was written by the "Archie Group" (Peter
   Deutsch, Alan Emtage, Bill Heelan, and Mike Parker) at McGill
   University in Montreal, Canada, and they maintain the database as
   well.
<P>
   "Archie" keeps track of the entire contents of a very large number
   of anonymous ftp sites, and allows you to search for files on those
   sites using various different kinds of filename searches.
</QUOTE>
<HR>
<I>
<a href="http://www.uiuc.edu/ph/www/blong">Brandon C. Long</A> /
blong@uiuc.edu / 
<a href="http://hoohoo.ncsa.uiuc.edu/~blong/pgpkey.html">PGP</A> 
</I>
</BODY>
EOFORM
}
