#!/usr/bin/env python
import re, os, datetime
dates = {}
total = {}
r_tag = re.compile(r'<([A-Za-z0-9]+)')
r_xmp = re.compile(r'(?is)
(.*?)')
for root, dirs, files in os.walk('.'):
for name in files:
filename = os.path.join(root, name)
if not filename.endswith('.html'): continue
mtime = os.path.getmtime(filename)
t = str(datetime.datetime.fromtimestamp(mtime))
if not (t.startswith('1990') or t.startswith('1991')): continue
bytes = r_xmp.sub('...', open(filename).read())
tags = [tag.lower() for tag in r_tag.findall(bytes)]
date = map(int, t[:10].split('-'))
date[1] = date[1] - 1
date = 'new Date' + str(tuple(date))
if not dates.has_key(date):
dates[date] = {}
for tag in tags:
tag = '"' + tag + '"'
if not dates[date].has_key(tag):
dates[date][tag] = 0
dates[date][tag] += 1
if not total.has_key(tag):
total[tag] = 0
total[tag] += 1
for date, tags in dates.iteritems():
for tag, count in tags.iteritems():
print ' {date: %s, tag: %s, count: %s},' % (date, tag, count)
total = sorted(((a, b) for (b, a) in total.iteritems()), reverse=True)
print '[' + ', '.join(pair[1] for pair in total) + ']'