#!/usr/bin/python """A Web browser""" import sys, os, string, re, base64 import urllib, urllib2, urlparse, httplib, htmlentitydefs from HTMLParser import HTMLParser from ftplib import FTP special = ['html', 'title'] # Block level http://www.w3.org/TR/REC-CSS2/sample block = ['address', 'blockquote', 'body', 'fieldset', 'form', 'frame', 'frameset', 'iframe', 'noframes', 'object', 'p', 'applet', 'center', 'dir', 'hr', 'menu', 'pre', 'ul', 'dl', 'ol', 'tr'] head = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] flow = ['li', 'div', 'dd', 'dt'] block.extend(head) def format(s, pre): if not pre: while string.count(s, ' ') > 0: s = string.replace(s, ' ', ' ') if s in ('', ' '): pass elif s == ' \n': print '' # hack for now elif '\n' in s: for x in string.split(s, '\n'): align(x) else: align(s) return '' def align(s, w=78): # if len(s) == 0: pass if len(s) < w: print s else: if ' ' in s[:w]: bits = string.split(s[:w], ' ') rest = bits.pop()+s[w:] s = string.join(bits, ' ') else: rest, s = s[w:], s[:w] print s align(rest) class MyHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.start, self.end, self.data = None, None, '' self.preformat, self.hidden = 0, 0 def pre(self): if self.preformat == 1: return 1 elif self.end == 'pre': return 1 else: return 0 def handle_starttag(self, tag, attrs): self.start = tag if (self.data is not '') and (tag in block): self.data = format(self.data, self.pre()) # Now for some more stuff if tag == 'pre': self.data += '
\n'
self.preformat = 1
elif tag == 'li': self.data += ' * '
elif tag == 'dd': self.data += ' defintion: '
elif tag == 'head': self.hidden = 1
elif tag == 'img':
for attr in attrs:
if attr[0] == 'alt': self.data += attr[1]
elif tag in ('blockquote', 'q'):
self.data += 'quote'
for attr in attrs:
if attr[0] == 'cite': self.data += ' (%s)' % attr[1]
self.data += ': '
elif tag == 'a':
self.data += '<'
for attr in attrs:
if attr[0] == 'href': self.data += '<%s> ' % attr[1]
elif tag in head: self.data += '@ '
def handle_endtag(self, tag):
self.end = tag
if tag == 'pre':
self.data += '\n'
self.preformat = 0
if tag == 'a': self.data += '>'
elif tag == 'head': self.hidden = 0
elif tag in block: self.data = format(self.data+'\n', self.pre())
elif tag in flow: self.data = format(self.data, self.pre())
def handle_data(self, data):
if (self.start in block) or (self.start == 'title'):
data = string.lstrip(data)
if self.start == 'pre' and self.end != 'pre':
self.data += data
elif self.start not in ('script', 'applet'):
data = string.replace(data, '\n', ' ')
data = string.replace(data, '\r', ' ')
data = string.replace(data, '\t', ' ')
if (not self.hidden) or (self.start == 'title'):
self.data += data
def handle_entityref(self, name):
if name in htmlentitydefs.entitydefs.keys():
self.data += htmlentitydefs.entitydefs[name]
else: self.data += '?'
def httpget(uri):
uri = urlparse.urlparse(uri)
n, p = uri[1], uri[2]
h = httplib.HTTP(n)
h.putrequest('GET', p)
h.putheader('Host', n)
h.putheader('User-Agent', 'browser.py/x.beta')
# h.putheader('Accept', 'text/html, text/plain')
# h.putheader('Connection', 'Keep-Alive')
h.endheaders()
code, msg, headers = h.getreply()
data = h.getfile().read()
return code, msg, headers, data
def gethttp(uri):
sys.stderr.write('Getting %s...\n' % uri)
code, msg, info, data = httpget(uri)
if code in (301, 302):
sys.stderr.write('Code was %s (%s), continue? [Y/N]: ' % \
(str(code), info['location']))
x = raw_input()
if x.lower() == 'y': gethttp(info['location'])
else: fs(code, msg, info, data)
else: fs(code, msg, info, data)
def fs(code, msg, info, data):
type = ''
if 'content-type' in info.keys():
sys.stderr.write('Got it: %s\n' % info['content-type'])
type = info['content-type']
else: sys.stderr.write('Got it\n')
if ('-text' in sys.argv) or ('--text' in sys.argv): type = 'text/plain'
if ('-head' in sys.argv) or ('--head' in sys.argv):
print '%s %s\n\n%s' % (str(code), str(msg), str(info))
elif type[:9] == 'text/html': MyHTMLParser().feed(data)
elif type[:10] == 'text/plain': print data
def getftp(uri):
parsed, userpass = urlparse.urlparse(uri), 'anonymous:anonymous'
n, p = parsed[1], parsed[2]
if string.count(n, '@') == 1: userpass, n = n.split('@', 1)
if ':' in userpass: user, pswd = userpass.split(':', 1)
else: user = userpass
for x in (user, pswd): x = base64.encodestring(urllib.unquote(x)).strip()
ftp = FTP(n, user, pswd)
dir = p.split('/')
fn = dir.pop()
ftp.cwd(string.join(dir, '/'))
if fn[5:] == '.html': ftp.retrbinary('RETR %s' % fn, MyHTMLParser().feed)
else: ftp.retrbinary('RETR %s' % fn, sys.stdout.write)
ftp.quit()
def geturi(uri):
if uri[:7] == 'http://': gethttp(uri)
elif uri[:6] == 'ftp://': getftp(uri)
else: getfile(uri)
def prompt():
"""Prompt for a URI, and get the URI entered."""
sys.stderr.write('URI: ')
geturi(raw_input())
def run():
argv = []
for arg in sys.argv:
if arg[0:2] == '--': arg = arg[1:]
argv.append(arg)
if ('-pipe' in argv) or ('-p' in argv): geturi(sys.stdin.read())
elif len(argv) >= 2: geturi(argv[1])
else: prompt()
if __name__=="__main__":
run()