#!/usr/bin/python
import urllib
from sgmllib import SGMLParser
import sys
import re
import os
import shutil
class list_urls(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, av):
links = [a_tag for href, a_tag in av if href == 'href']
if links:
self.urls.extend(links)
def url_scanner(url):
conn = urllib.urlopen(url)
scan = list_urls()
scan.feed(conn.read())
conn.close()
scan.close()
n = re.sub("http://", "", url)
name = n.split(".")
log_title = name[0] + name[1] + "_urls.log"
print log_title
log_file = open("logs/" + log_title, "w")
for url in scan.urls:
print url
log_file.write(url + "\n")
log_file.close()
print "\n\nscanning finished and logged."
def html(url):
go = urllib.urlopen(url)
s = go.read()
go.close()
n = re.sub("http://", "", url)
name = n.split(".")
log_title = name[0] + name[1] + "_html_source_code.log"
log_file = open("logs/" + log_title, "w")
log_file.write(s)
log_file.close()
print s
print "\n\npage saved."
def autoscan(f):
fil = open(f, "r")
for lines in fil.readlines():
print "\n\n"+lines+"-----------------------------------------\n"
conn = urllib.urlopen(lines)
scan = list_urls()
scan.feed(conn.read())
conn.close()
scan.close()
fil.close()
n = re.sub("http://", "", lines)
name = n.split(".")
log_title = name[0] + name[1] + "_urls.log"
log_file = open("logs/" + log_title, "w")
for lines in scan.urls:
print "\n"+lines
log_file.write(lines + "\n")
print "\n\nscanning finished and logged."
def help():
try:
version = open("version", "r")
print "\n"+version.read(os.path.getsize("version"))
except:
pass
print "USAGE:\t\tpython " + sys.argv[0] + " [url\logfile] [option]"
print "\nOPTIONS:"
print "\t\t-url\turl scanner"
print "\t\t-sc\thtml source code"
print "\t\t-auto\tauto scan"
print "\t\t-cl\tto clear the log files"
sys.exit()
def remove_logs():
try:
shutil.rmtree('logs')
print "logs removed"
except:
print "cannot remove log files. maybe they've been already removed."
def main():
try:
URL = sys.argv[1]
OPTION = sys.argv[2]
except:
help()
log_dir = "logs"
if not os.path.exists(log_dir):
os.mkdir(log_dir, 0777)
if(sys.argv[2] == "-u"):
print "scanning " + sys.argv[1] + " ...\n\n"
url_scanner(sys.argv[1])
elif(sys.argv[2] == "-sc"):
print "retrieving " + sys.argv[1] + " HTML source code ...\n\n"
html(sys.argv[1])
elif(sys.argv[2] == "-auto"):
print "scanning " + sys.argv[1] + " ...\n\n"
autoscan(sys.argv[1])
else:
help()
if __name__ == "__main__":
try:
URL = sys.argv[1]
except:
help()
if(sys.argv[1] == "-cl"):
remove_logs()
sys.exit()
main()