#!/usr/bin/python
"""A Web browser"""

import sys, os, string, re, base64
import urllib, urllib2, urlparse, httplib, htmlentitydefs
from HTMLParser import HTMLParser
from ftplib import FTP

special = ['html', 'title']
# Block level http://www.w3.org/TR/REC-CSS2/sample
block = ['address', 'blockquote', 'body', 'fieldset', 'form', 'frame', 
'frameset', 'iframe', 'noframes', 'object', 'p', 'applet', 'center', 'dir', 
'hr', 'menu', 'pre', 'ul', 'dl', 'ol', 'tr']
head = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
flow = ['li', 'div', 'dd', 'dt']
block.extend(head)

def format(s, pre): 
   if not pre: 
      while string.count(s, '  ') > 0: s = string.replace(s, '  ', ' ')
   if s in ('', ' '): pass
   elif s == ' \n': print '' # hack for now
   elif '\n' in s: 
      for x in string.split(s, '\n'): align(x)
   else: align(s)
   return ''

def align(s, w=78): 
   # if len(s) == 0: pass
   if len(s) < w: print s
   else: 
      if ' ' in s[:w]: 
         bits = string.split(s[:w], ' ')
         rest = bits.pop()+s[w:]
         s = string.join(bits, ' ')
      else: rest, s = s[w:], s[:w]
      print s
      align(rest)

class MyHTMLParser(HTMLParser):
   def __init__(self): 
      HTMLParser.__init__(self)
      self.start, self.end, self.data = None, None, ''
      self.preformat, self.hidden = 0, 0

   def pre(self): 
      if self.preformat == 1: return 1
      elif self.end == 'pre': return 1
      else: return 0

   def handle_starttag(self, tag, attrs): 
      self.start = tag
      if (self.data is not '') and (tag in block): 
         self.data = format(self.data, self.pre())
      # Now for some more stuff
      if tag == 'pre': 
         self.data += '<pre>\n'
         self.preformat = 1
      elif tag == 'li': self.data += ' * '
      elif tag == 'dd': self.data += ' defintion: '
      elif tag == 'head': self.hidden = 1
      elif tag == 'img': 
         for attr in attrs: 
            if attr[0] == 'alt': self.data += attr[1]
      elif tag in ('blockquote', 'q'): 
         self.data += 'quote'
         for attr in attrs: 
            if attr[0] == 'cite': self.data += ' (%s)' % attr[1]
         self.data += ': '
      elif tag == 'a': 
         self.data += '<'
         for attr in attrs: 
            if attr[0] == 'href': self.data += '<%s> ' % attr[1]
      elif tag in head: self.data += '@ '

   def handle_endtag(self, tag): 
      self.end = tag
      if tag == 'pre': 
         self.data += '\n</pre>'
         self.preformat = 0
      if tag == 'a': self.data += '>'
      elif tag == 'head': self.hidden = 0
      elif tag in block: self.data = format(self.data+'\n', self.pre())
      elif tag in flow: self.data = format(self.data, self.pre())

   def handle_data(self, data): 
      if (self.start in block) or (self.start == 'title'): 
         data = string.lstrip(data)
      if self.start == 'pre' and self.end != 'pre': 
         self.data += data
      elif self.start not in ('script', 'applet'): 
         data = string.replace(data, '\n', ' ')
         data = string.replace(data, '\r', ' ')
         data = string.replace(data, '\t', ' ')
         if (not self.hidden) or (self.start == 'title'): 
            self.data += data

   def handle_entityref(self, name): 
      if name in htmlentitydefs.entitydefs.keys(): 
         self.data += htmlentitydefs.entitydefs[name]
      else: self.data += '?'

def httpget(uri): 
   uri = urlparse.urlparse(uri)
   n, p = uri[1], uri[2]
   h = httplib.HTTP(n)
   h.putrequest('GET', p)
   h.putheader('Host', n)
   h.putheader('User-Agent', 'browser.py/x.beta')
   # h.putheader('Accept', 'text/html, text/plain')
   # h.putheader('Connection', 'Keep-Alive')
   h.endheaders()
   code, msg, headers = h.getreply()
   data = h.getfile().read()
   return code, msg, headers, data

def gethttp(uri): 
   sys.stderr.write('Getting %s...\n' % uri)
   code, msg, info, data = httpget(uri)
   if code in (301, 302): 
      sys.stderr.write('Code was %s (%s), continue? [Y/N]: ' % \
               (str(code), info['location']))
      x = raw_input()
      if x.lower() == 'y': gethttp(info['location'])
      else: fs(code, msg, info, data)
   else: fs(code, msg, info, data)

def fs(code, msg, info, data): 
   type = ''
   if 'content-type' in info.keys(): 
      sys.stderr.write('Got it: %s\n' % info['content-type'])
      type = info['content-type']
   else: sys.stderr.write('Got it\n')
   if ('-text' in sys.argv) or ('--text' in sys.argv): type = 'text/plain'
   if ('-head' in sys.argv) or ('--head' in sys.argv): 
      print '%s %s\n\n%s' % (str(code), str(msg), str(info))
   elif type[:9] == 'text/html': MyHTMLParser().feed(data)
   elif type[:10] == 'text/plain': print data

def getftp(uri): 
   parsed, userpass = urlparse.urlparse(uri), 'anonymous:anonymous'
   n, p = parsed[1], parsed[2]
   if string.count(n, '@') == 1: userpass, n = n.split('@', 1)
   if ':' in userpass: user, pswd = userpass.split(':', 1)
   else: user = userpass
   for x in (user, pswd): x = base64.encodestring(urllib.unquote(x)).strip()
   ftp = FTP(n, user, pswd)
   dir = p.split('/')
   fn = dir.pop()
   ftp.cwd(string.join(dir, '/'))
   if fn[5:] == '.html': ftp.retrbinary('RETR %s' % fn, MyHTMLParser().feed)
   else: ftp.retrbinary('RETR %s' % fn, sys.stdout.write)
   ftp.quit()

def geturi(uri): 
   if uri[:7] == 'http://': gethttp(uri)
   elif uri[:6] == 'ftp://': getftp(uri)
   else: getfile(uri)

def prompt(): 
   """Prompt for a URI, and get the URI entered."""
   sys.stderr.write('URI: ')
   geturi(raw_input())

def run(): 
   argv = []
   for arg in sys.argv: 
      if arg[0:2] == '--': arg = arg[1:]
      argv.append(arg)
   if ('-pipe' in argv) or ('-p' in argv): geturi(sys.stdin.read())
   elif len(argv) >= 2: geturi(argv[1])
   else: prompt()

if __name__=="__main__": 
   run()