#!/usr/bin/ruby # # $Id: htgrep 4 2008-03-02 04:47:09Z warchild $ # # htgrep, grep for the web. # # Jon Hart # # Copyright (c) 2008, Jon Hart # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the nor the # names of its contributors may be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY Jon Hart ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. require 'rubygems' require 'hawler' require 'hawleroptions' def grep (uri, referer, response) line = 0 if (response.nil?) puts "#{uri} from #{referer} was nil" else response.body.each do |l| line += 1 if (l =~ /#{@match}/) puts "#{uri}:#{line} #{l}" end end end end options = HawlerOptions.parse(ARGV) if (ARGV.empty?) puts options.help else @match = ARGV.shift ARGV.each do |site| crawler = Hawler.new(site, method(:grep)) options.each_pair do |o,v| crawler.send("#{o}=",v) end crawler.start end end