#!/usr/local/bin/perl
#
# usage: 
#  get_links.pl [-D] url
#  	-D	Download all http links to current directory

$WEBGRAB = "/home/blong/bin/webgrab";


$download = 0;

$arg = 0;

while ($arg <= $#ARGV) {
  if ($ARGV[$arg] eq "-D") {
    $download = 1;
  } else {
    $url = $ARGV[$arg];
  }
  $arg++;
}

if (!defined($url)) {
  print "URL required\n";
  exit 1;
}

open(REMOTE,"$WEBGRAB -s $url|") || die "Problem reaching remote sight: $|";

#$_ = <REMOTE>;

while(<REMOTE>) {
  chop;
  while (/<[^>]*>/) {
    $tmp = $_;
    s/([^<]*)(<[^>]*>)(.*)/$1$3/;
    $tmp =~ s/([^<]*)(<[^>]*>)(.*)/$2/;
    if ($tmp =~ /[Aa].+[Hh][Rr][Ee][Ff]/) {
      $tmp =~ s/(<\s*[Aa].+[Hh][Rr][Ee][Ff]\s*=\s*["]*)([^">]*)([" >]*)/$2/;
      if ($tmp =~ /^\/.*/) {
	$tmp2 = $url;
	$tmp2 =~ s/(http:\/\/[^\/]+)(\/.*)/$1/;
	$tmp = $tmp2.$tmp;
      } elsif ($tmp =~ /#/) {
	 $tmp = $url.$tmp;
      } elsif ($tmp =~ /[A-Za-z]+:/) {
      } elsif ($tmp =~ /\.\./) {
	$tmp2 = $url;
	while (($tmp =~ /\.\./) && ($tmp2 =~ /\//)) {
	  $tmp2 =~ s/(.*\/)(.*\/)(.*)/$1/;
	  $tmp =~ s/\.\.//;
        }
	if ($tmp =~ /\/.*/) {
	  $tmp =~ s/\///;
        }
 	$tmp = $tmp2.$tmp;	
      } else {
	$tmp2 = $url;
	$tmp2 =~ s/(.*\/)(.*)/$1/;
	$tmp = $tmp2.$tmp;
      }
      print "$tmp\n";
      if (($download == 1)  && ($tmp =~ /http:/)) {
        $tmp2 = $tmp;
	$tmp2 =~ s/(http:.*\/)(.*)/$2/;
        system("$WEBGRAB -s $tmp > $tmp2");
      }
    }
  }
#  print "$_\n";
}

close(REMOTE);


