#!/usr/bin/perl -Tw
# 
# Jon Hart <warchild@spoofed.org>
#
# Everone needs a pet termite.  Especially on the web.
#
# Given a bunch of input (i.e., index.html, `lynx -dump
# http://foo.somewhere.biz`, etc), weasel our way through the web tree
# looking for directories, files, and scripts that might exist.  Very
# useful when doing information gathering on a particular host.  Tries to
# brute force script extensions, methods, directories and even possible 
# renaming.
#
# A couple of very real situations where this would be useful include:
#
#  * Admin of foo.com edits http://foo.com/index.asp with vim, leaving
#    .index.asp.swp available for viewing
#
#  * Admin of foo.com makes a backup copy of index.asp
#
#  * Admin of foo.com dumps all old content in /old
#
#
#
# Copyright (c) 2003, Jon Hart 
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice, 
#     this list of conditions and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice, 
#     this list of conditions and the following disclaimer in the documentation 
#     and/or other materials provided with the distribution.
#   * Neither the name of the organization nor the names of its contributors may
#     be used to endorse or promote products derived from this software without 
#     specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

use strict;
use diagnostics;
use LWP::UserAgent;
use Getopt::Long;

GetOptions( "verbose"   => \(my $opt_verbose),
            "interactive"   => \(my $interactive),
            "extensions=s"     => \(my $opt_extensions),
            "moves=s"   => \(my $opt_moves),
            "backups=s"   => \(my $opt_backups),
            "methods=s" => \(my $opt_methods),
            "sleep=s"   => \(my $opt_sleep),
            "crawl"     => \(my $opt_crawl),
            "directories=s"     => \(my $opt_directories)) or die "Unknown option: $!\n";

$| = 1;
my %extensions = map { $_, 1 } qw(.asp .aspx .cgi .css .html .htm .HTML .HTM .inc .js .jar .jsp .php .pl .pm .xml);
my %moves = map { $_, 1 } qw(.|.swp |~);
my %backups = map { $_, 1 } qw(.orig .bak .old .tar .tar.gz .tar.bz2 .zip .Z);
my %methods = map { $_, 1 } qw(GET POST);
my %directories = map { $_, 1 } qw(/ /cgi-bin/ /bin/ /old/ /backup/ /new/);
my @files;
my @links;
my %parts;
my $sleep = 0;
my $it = 0;
my $check_count = 0;
my $total_checks = 0;
my $skipped = 0;
my @ticks = qw( / - \ | );
my ($request, $response, $rescode);

if (defined($opt_extensions)) {
   %extensions = ();
   foreach (split(/\s+/, $opt_extensions)) {
      $extensions{$_} = 1;
   }

}

if (defined($opt_moves)) {
   %moves = ();
   foreach (split(/\s+/, $opt_moves)) {
      $moves{$_} = 1;
   }
}

if (defined($opt_backups)) {
   %backups = ();
   foreach (split(/\s+/, $opt_backups)) {
      $backups{$_} = 1;
   }
}

if (defined($opt_methods)) {
   %methods = ();
   foreach (split(/\s+/, uc($opt_methods))) {
      $methods{$_} = 1;
   }
}

if (defined($opt_directories)) {
   %directories = ();
   foreach (split(/\s+/, $opt_directories)) {
      $directories{$_} = 1;
   }
}

if (defined($opt_sleep)) {
   $sleep = $opt_sleep;
} 

unless (@ARGV) { &usage; exit(1); }

my $target = $ARGV[0];


shift(@ARGV);

if (defined($opt_crawl)) {
   use HTML::LinkExtor;
   #use URI::URL;
   &crawl($target);
   &dircrawl($target);
} elsif (@ARGV) {
   @files = @ARGV;
   &fixdirs;
   foreach (@files) {
      &termite($_);
   }
} else {
   &fixdirs;
   if ($interactive) {
      print("Enter files to check (whitespace or newline separated)\n");
   }
   while (<STDIN>) {
      chomp();
      foreach (split(/\s_/, $_)) {
         if ($interactive) {
            @files = ();
            $check_count = 0;
         }
         push(@files, $_);
         &termite($_);

         if ($interactive) {
            print("\n");
         }
      }
   }
}


        


sub check {
   # given a HTTP method and url, see if it "exists"
   my $method = shift;
   my $url = shift;

   my $ua = LWP::UserAgent->new;
   $ua->agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)");

   if (defined($opt_verbose)) { print("Checking $method $url\n"); }
   my ($request, $response, $rescode, $link);
   $request = HTTP::Request->new($method => "$url");
   $response = $ua->request($request);
   $rescode = $response->status_line;
   $check_count++;

   if ($response->is_success || !($rescode =~ /^404/)) {
      if (!defined($opt_verbose)) { print("\e[0E"); }
      print("$method $url -> $rescode\n");
      return 0;
   } else {
      return -1;
   }
}

sub crawl {
   my $site = shift;
   my $ua = LWP::UserAgent->new;
   $ua->agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)");

   my $parser = HTML::LinkExtor->new(\&dropimgs);

   my $response = $ua->request(HTTP::Request->new(GET => $site), sub {$parser->parse($_[0])});
   my $exists = 0;
   my ($newtarget, $newlink) = "";

   if ($response->is_success) {
      foreach my $file (@links) {
         $exists = 0;
         # skip root dirs
         if ($file =~ /\/$/) { next; }
         # skip offsite links for now
         if ($file =~ /^http:\/\//) {
            if (!($file =~ /^$target/)) {
               print("Skipping $file\n");
               next;
            }
         }
         # skip mailto
         if ($file =~ /^mailto:/) { next; }
         
         # tear off all parameters that may be being passed
         # to the URL in question
         if ($file =~ /\?/) { $file =~ s/\?.*//; }
         # ditto with location markers ('#')
         if ($file =~ /\#/) { $file =~ s/\#.*//; }

         if ($file =~ /^$/) { next; }
      
         # first, just handle the files that we get ...
         foreach my $mv (sort (keys %moves, keys %backups)) {
            METHOD: foreach my $method (sort keys %methods) {
               my $link = $file . $mv;

               if ($mv =~ /\|/) {
                  my @edit = split(/\|/, $mv);
                  if ($file =~ /\//) {
                     # if $file starts with /, do the move and put the / back
                     (my $newfile = $file) =~ s/^\///;

                     # if the file contains a / (like /foo/bar, we are only going to deal with 
                     # /bar, so pull out /foo and save it away
                     if ($file =~ /^(.*)\/([^\/]*)/) {
                        $link = $1 . "/" . $edit[0] . $2 . $edit[1];
                     } else {
                        $link = "/" . $edit[0] . $newfile . $edit[1];
                     }
                  } else {
                     $link = $edit[0] . $file . $edit[1];
                  }
               }

               if ((!($link =~ /^\//)) && (!($target =~ /\/$/))) {
                  ($newtarget = $target) =~ s/$/\//;
                  $newlink = $link;
               } elsif ($link =~ /^\//) {
                  # the link is absolute, so rip off all the remainder from the target
                  ($newtarget = $target) =~ s/(\/\/[^\/]*)\/.*/$1/;
                  $newlink = $link;
               } else {
                  $newtarget = $target;
                  $newlink = $link;
               }

               #$newlink =~ s/.*\/\/[^\/]*(\/.*)/$1/;
               $exists = &check($method, $newtarget . $newlink);
               
               # cram all the parts of this url for later usage;
               my @tmp = split(/\//, $newlink);
               while ($#tmp >= 1) {
                  $parts{$newtarget . join('/', @tmp)} = 1;
                  pop(@tmp);
               }

               if (!defined($opt_verbose)) { &tick }
               sleep($sleep);
               
               # if this file doesn't exist, don't bother anymore...
               if ($exists == -1) { last METHOD; }
            }
         }
      }
   }
}

sub dircrawl {
   # dumbly assume that, for a given url http://foo.com/bar/baf/blah, 
   # /bar/baf/blah, /bar/baf and /bar are all directories on the filesystem
   # perform checks accordingly
   foreach my $part (sort keys %parts) {
      foreach my $mv (sort (sort keys %backups)) {
         METHOD: foreach my $method (sort keys %methods) {
            if (&check($method, $part . $mv) == -1) {
               if (!defined($opt_verbose)) { &tick }
               sleep($sleep);
               last METHOD;
            } else {
               if (!defined($opt_verbose)) { &tick }
               sleep($sleep);
            }
         }
      }
   }
}

sub dropimgs {
   my ($tag, %attr) = @_;
   return if $tag eq 'img';
   push(@links, values %attr);
}

sub fixdirs {
   if (defined($opt_verbose)) { print("Checking directories...\n"); }
   foreach my $dir (sort keys %directories) {
      foreach my $method (sort keys %methods) {
         if (&check($method, $target . $dir) == -1) {
            # this directory doesn't exist, so never check it again.
            
            if (defined($opt_verbose)) { print("Removing $target$dir\n"); }
               
            delete $directories{$dir};
            sleep($sleep);
            last;
         }
         sleep($sleep);
      }
   }
   if (defined($opt_verbose)) { print("Done\n\n"); }
}

sub termite {
   # walk through all possible combinations of directories,
   # script names, file extensions, and any attempt at renaming or
   # disabling.
   my $file = shift;
   foreach my $dir (sort keys %directories) {
      foreach my $ext (sort keys %extensions) {
         foreach my $mv ("", (sort keys %moves, sort keys %backups)) {
            METHOD: foreach my $method (sort keys %methods) {
               my $link = $file . $ext . $mv;
               if ($mv =~ /\|/) {
                  my @edit = split(/\|/, $mv);
                  $link = $edit[0] . $file . $ext . $edit[1];
               }
               my $res = &check($method, $target . $dir . $link); 

               if (!defined($opt_verbose)) { &tick; }
               sleep($sleep);
               
               # if this file doesn't exist, don't bother anymore...
               if ($res == -1) { last METHOD; }
            }
         }
      }
   }
}

sub tick {
   # stupid little progress ticker so you know its doin' something
   $it = 0 if ++$it > $#ticks;
   print("\e[0E", $ticks[$it], "\e[0E");
}

sub usage {

print <<EOF;

Termite v0.5 by Jon Hart <jhart\@spoofed.org>

Used to brute force file names on webservers

Usage: $0 <http[s]://host[:port]> [options]

EOF
print("Option\tDescription\tDefault\n");
print("-"x80, "\n");
print("--directories <s>\tDirectory names\t\"", join(" ", sort keys %directories), "\"\n");
print("--extensions <s>\tFile extensions\t\"", join(" ", sort keys %extensions), "\"\n");
print("--moves <s>\tFile renames\t\"", join(" ", sort keys %moves), "\"\n");
print("--backups <s>\tFile/dir backups\t\"", join(" ", sort keys %backups), "\"\n");
print("--methods <s>\tRequest methods\t\"", join(" ", sort keys %methods), "\"\n");
print("--sleep <i>\tSleep Interval\t0\n");
print("--verbose\tBe verbose\n");
print("--interactive\tRead files and do checks interactively\n");

print <<EOF;
Examples:

$0 http://yoursite index
$0 https://yoursite:8443 --dir secure backup search user


EOF
}
