#!/usr/bin/python
"""Some program
Requirements:
* Log single lines to a persistent store, hashing to preserve integrity
Enter various information: todo items,
Search the information
Be able to mark items as public or private, and have two HTML versions
Be able to enter data and match against a RegExp
File metadata is stored in meta.n3; item metadata is stored in the item
itself as a series of at least one byte long flags
'P' = public (world readable)
'D' = deleted/delenda/deprecated
@@
* Temporary metadata sets (use case? possibly display. multiple commands?)
* Globbing? File name pattern matching
Done:
* Remove old items? Or mark as "done"?
* Display properties of D items
* Add metadata to old items
* Append to old items?
* Amount to show
* List files
* Show all metadata
* Write D or not on export?
* Write to public-html
* Move/delete/copy files?
Possibles:
* References to other items?
Extensibility limitations:
* Can't extend metadata properties in the same namespace
* Only extensible to 26 item metadata attributes
(c) Copyright Sean B. Palmer, 2003. GPL 2: Share and Enjoy!
"""
from __future__ import nested_scopes
import sys, os, re, time, random
# # # # #
# [Start of configurable stuff]
#
# You should configure this:
basedir = 'c:/web/infomesh/2003'
# You may configure these:
metafn = 'meta.n3'
DEBUG = 1
#
# [End of configurable stuff]
# # # # #
os.chdir(basedir)
# # # # # # # # # # # # # # # # #
#
# Metadata Grunging Code
#
b_ns = "http://infomesh.net/2003/b/terms#"
def triple(subj, pred, obj):
return '<%s> <%s> "%s" .' % (subj, b_ns + pred, obj)
def writemeta(mode, s):
if mode == 'w': open(metafn, 'w').write(s)
elif mode == 'a': open(metafn, 'a').write(s + '\n')
else: raise "Unknown mode: %s" % mode
def readmeta():
try: return open(metafn).read()
except IOError:
writemeta('w', '')
return ''
def metaburp(subj, pred, obj):
"""e.g. @sort notes alpha|ralpha|date|rdate|len|rlen
this works because { ?x log:uri
[ log:startswith "http://infomesh.net/2003/b/terms#" ] } =>
{ ?x a owl:UniqueProperty; rdfs:range rdfs:Literal } ."""
# if it's already in there, no change
if readmeta().find(triple(subj, pred, obj)) > -1: pass
# if the prop's already set, change it
elif re.compile(triple(subj, pred, '.*?')).search(readmeta()):
writemeta('w', re.sub(triple(subj, pred, '.*?'),
triple(subj, pred, obj), readmeta()))
# otherwise, add it
else: writemeta('a', triple(subj, pred, obj))
def metaslurp(subj, pred):
objs = re.compile(triple(subj, pred, '(.*?)')).findall(readmeta())
if len(objs) > 0:
if len(objs) > 1:
print >> sys.stderr, 'Error: %s is corrupted' % metafn
return objs[0]
else: return None
def getMetadata(subj, pred):
m = metaslurp(subj, pred)
if m: print '@%s %s is set to: "%s"' % (subj, pred, m),
else: print '@%s %s is not set' % (subj, pred),
#
#
# # # # # # # # # # # # # # # # #
def datetime():
"""Return the current UTC time as standard date format."""
return time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime())
regexps = []
def keep(match):
regexps.append(match)
return match
def archive(*args):
import sha
if not os.path.exists('archive/'): os.mkdir('archive')
for name in getfiles():
if os.path.exists('archive/%s' % name):
old, new = open('archive/%s' % name).read(), open(name).read()
if len(new) < len(old):
print 'may be corrupted, not archived: %s' % name
continue
elif (sha.new(old).hexdigest() != sha.new(new).hexdigest()):
open('archive/%s' % name, 'w').write(new)
print 'archived: %s' % name
else: print 'already archived: %s' % name
else:
open('archive/%s' % name, 'w').write(open(name).read())
print 'newly archived: %s' % name
# # # # # # # # # # # # # # # # #
#
# Item stuff
#
# item.serialize() is the inverse of deserialize(line)
#
t_re = re.compile('^(....).(..).(..).(..).(..).(..)$')
def timeToLong(t):
m = t_re.match(t)
if m: return long(''.join(m.groups()))
else: return None
class Item:
def __init__(self, uid, content, metadata=None, timestamp=None):
if metadata is None: metadata = ''
if timestamp is None: timestamp = datetime()
self.uid = uid
self.content = content
self.metadata = metadata
self.timestamp = timestamp
def __lt__(self, item):
return timeToLong(self.timestamp) < timeToLong(item.timestamp)
def __gt__(self, item):
return timeToLong(self.timestamp) > timeToLong(item.timestamp)
def __str__(self):
return ' '.join((self.timestamp, self.uid, self.metadata, self.content))
def size(self):
return len(self.content)
def serialize(self, format=None, custom=None):
uid, metadata, content, timestamp = (self.uid, self.metadata,
self.content, self.timestamp.replace('T', ' '))
if not format: return self.__str__()
elif format.lower() == 'text':
p = metaslurp('#all', 'prefix') or ''
if not p.endswith(' '): p += ' '
if custom: return re.sub('%\([a-z]+\)', '\g<0>s', custom) % {
'prefix': p, 'content': content, 'meta': metadata,
'timestamp': timestamp, 'uid': uid }
else: return "%s%s (%s, %s)" % (p, content, timestamp, uid)
elif format.lower() == 'html':
metadata = (metadata and ' class="%s"' % ' '.join(metadata)) or ''
return '
# %s (%s)' % \
(uid, metadata, uid, content, timestamp)
elif format.lower() == 'n3': return """[] <%smetadata> "%s"; \
\n<%suid> "%s"; <%stimestamp> "%s"; <%scontent> "%s" .""" % (b_ns,
metadata, b_ns, uid, b_ns, self.timestamp, b_ns, content)
p = '(?m)^([\d-]{10}T[\d:]{8}) ([A-Za-z0-9]+) ([A-Z]*) (.*?)$'
item_p = re.compile(p)
def deserialize(line):
try: timestamp, uid, metadata, content = item_p.match(line).groups()
except: raise line
item = Item(uid, content, metadata, timestamp)
if DEBUG: assert item.serialize() == line, "Item is borked!"
return item
def readFile(name):
"""Opens a file (a pickled set of items), and returns an item list."""
s = open(name).read()
s = s.replace('\r', '') # @@ ugh
open(name, 'w').write(s) # @@ double ugh
return [deserialize(line) for line in s.splitlines()]
#
#
# # # # # # # # # # # # # # # # #
def norm(name, items):
n = metaslurp(name, 'textformat')
a = metaslurp('#all', 'textformat')
if n or a: result = [item.serialize('text', n or a) for item in items]
else: result = [item.serialize('text') for item in items]
return '\n'.join(result)
def htmlize(name, items):
h = """\n\n%s\n\n\
\n\n%s
\n\n\
\n"""
result = [h % (name, name, item.serialize('html')) for item in items]
return '\n'.join(result)
def n3ize(items):
return '\n'.join([item.serialize('n3') for item in items])
def getItemsByDate(items, pattern):
# e.g. 2003-04-14 12:25:*
pattern = 'T'.join(pattern.split(' '))
pattern = pattern.replace('*', '.*?')
p = re.compile('(?m)^%s$' % pattern)
result = []
for item in items:
if p.match(item.timestamp): result.append(item)
return result
def getByDate(name, pattern):
items = readFile(name)
result = getItemsByDate(items, pattern)
print norm(name, result),
def getItemByID(items, uid):
for i in xrange(len(items)):
if items[i].uid == uid:
return i, items[i]
return None
def getByID(name, uid):
items = readFile(name)
i_item = getItemByID(items, uid)
if i_item:
i, item = i_item
print norm(name, [item]),
def filterByMeta(items, meta):
flags = re.compile(r'(-?[A-Z])').findall(meta.upper())
for flag in flags:
if flag.startswith('-'):
flag = flag[-1]
items = filter(lambda item: flag not in item.metadata, items)
else: items = filter(lambda item: flag in item.metadata, items)
return items
def setItemMeta(name, uid, meta, mode=None):
assert meta.isalpha()
meta = meta.upper()
items = readFile(name)
i, item = getItemByID(items, uid)
if (not mode) or (mode == 'a'):
for flag in meta:
if flag not in item.metadata: item.metadata += meta
elif mode == 'w': item.metadata = meta
items[i] = Item(item.uid, item.content, item.metadata, item.timestamp)
result = '\n'.join([item.serialize() for item in items])
open(name, 'w').write(result + '\n')
def appendToItem(name, uid, content):
# @@ should this change the date?
items = readFile(name)
i, item = getItemByID(items, uid)
item.content += ' ' + content
items[i] = Item(item.uid, item.content, item.metadata, item.timestamp)
result = '\n'.join([item.serialize() for item in items])
open(name, 'w').write(result + '\n')
def getfiles():
listdir = os.listdir('.')
return filter(lambda fn: os.path.isfile(fn) and fn.isalpha(), listdir)
def listFiles(pattern):
p = metaslurp('#all', 'prefix') or ''
if not p.endswith(' '): p += ' '
print '\n'.join([p + name for name in getfiles()])
def cp(name, new): # @@ metadata
open(new, 'w').write(open(name).read())
print "Copied %s to %s" % (name, new)
def mv(name, new): # @@ metadata
os.rename(name, new)
print "Moved %s to %s" % (name, new)
def rm(name): # @@ metadata
if raw_input('Sure? [y/n]: ') == 'y':
os.remove(name)
print "Removed %s" % name
def getID(id_seq):
uid = None
for i in xrange(100): # 18 * 25 * 25 = 11,250 combinations
alpha = 'bdfghjkmnprstvwyz' # c, q, x redundant, l appears as 1
alphanum = alpha + '2357890'
tid = random.choice(alpha)
tid += ''.join([random.choice(alphanum) for i in range(2)])
if tid not in id_seq:
uid = tid
break
if uid is None:
print >> sys.stderr, "if you're using this program *that* " \
"intensively, you might want to consider tweaking def getID..."
while 1: # 768,369,472 combinations
alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
alphanum = alpha + '1234567890'
tid = random.choice(alpha)
tid += ''.join([random.choice(alphanum) for i in range(4)])
if tid not in id_seq:
uid = tid
break
return uid
def burp(name, content, metadata=None):
assert name.isalpha(), "file name must match [A-Za-z]+"
# @@ case sensitivity depends upon the file system
if metadata is None: metadata = ''
t = datetime()
content = content.replace('\n', '').replace('\r', '')
if not os.path.exists(name):
metaburp(name, 'created', t)
metaburp(name, 'order', 'date')
uid = getID([])
metaburp(name, 'uids', uid)
else:
id_seq = (metaslurp(name, 'uids') or '').split(' ') # @@
uid = getID(id_seq)
metaburp(name, 'uids', ' '.join(id_seq + [uid]))
item = Item(uid, content, metadata, t)
# write the content out to the file
open(name, 'a').write(item.serialize() + '\n')
print 'Item created in "%s" with uID "%s"' % (name, uid)
show_int = re.compile('^-?\d+$')
show_sdate = re.compile('^(-?\d+-)?[ymdh]$')
show_adate = re.compile('^\d+[YMWDH]$')
def slurp(name, amt=None):
items = readFile(name)
# arange the items as given in the meta
order = metaslurp(name, 'order') or 'date'
if order == "date": items.sort()
elif order == "alpha": items.sort(lambda x, y: (x.content > y.content) - 1)
elif order == "len": items.sort(lambda x, y: (x.size() > y.size()) - 1)
if (metaslurp(name, 'way') or 'f').startswith('r'): items.reverse()
# filter out D items if required
if not metaslurp(name, 'showdeleted'):
items = filterByMeta(items, '-D')
# show the required amount
amt = amt or metaslurp(name, 'show')
if amt:
if show_int.match(amt):
amt = int(amt)
if amt > 0: items = items[:amt]
elif amt < 0: items = items[amt:]
elif show_sdate.match(amt):
if len(amt) > 1:
min, amt = int(amt[:-2]), amt[-1:]
else: min = 0
amt = {'y': 4, 'm': 7, 'd': 10, 'h': 13}[amt]
t = datetime()
tent = filter(lambda item: item.timestamp[:amt] == t[:amt], items)
if len(tent) < abs(min):
if min > 0: items = items[:min]
elif min < 0: items = items[min:]
else: items = tent
elif show_adate.match(amt):
try: from strptime import strptime
except ImportError: strptime = None
def timeInSecs(t): # rather experimental! @@ warning: bugs ahead
if strptime: return time.mktime(strptime(t, '%Y-%m-%dT%H:%M:%S'))
else: return timeToLong(t)
amt, unit = int(amt[:-1]), amt[-1:]
scalar = {'Y':31557600, 'M':2592000, 'W':436800, 'D':86400, 'H':3600}
items = filter(lambda item: (timeInSecs(item.timestamp) >
(time.time() - (scalar[unit] * amt))), items)
print norm(name, items),
def export(type):
if not os.path.exists('%s-files/' % type):
os.mkdir('%s-files' % type)
for fn in getfiles():
items = readFile(fn)
if type == 'text':
open('text-files/%s.txt' % fn, 'w').write(norm(fn, items))
elif type == 'html':
public = filterByMeta(items, 'P')
if public:
pd, p = 'public-html-files',
try: open('%s/%s.html' % (pd, fn), 'w').write(htmlize(fn, public))
except IOError:
if not os.path.exists(pd): os.mkdir(pd)
open('html-files/%s.html' % fn, 'w').write(htmlize(fn, items))
elif type == 'n3': open('n3-files/%s.n3' % fn, 'w').write(n3ize(items))
# # # # # # # # # # # # # # # # #
#
# Search code
#
def grep(file, pattern):
p = re.compile(pattern)
if (not file) or (file == '*'):
files = getfiles()
else: files = [file]
for fn in files:
for line in [item.serialize('text') for item in readFile(fn)]:
if p.search(line):
if len(files) > 1: print '%s: %s' % (fn, line)
else: print line
scanWords = re.compile(r'[A-Za-z0-9]+').findall
def find(words):
words = [w.lower() for w in words.split(' ')]
if words[0].startswith('.'):
files, words = [words[0][1:]], words[1:]
else: files = getfiles()
results = []
for fn in files:
for line in [item.serialize('text') for item in readFile(fn)]:
found = 1
line_words = [line_word.lower() for line_word in scanWords(line)]
for word in words:
if word not in line_words:
found = 0
break
if found:
if len(files) > 1: print '%s: %s' % (fn, line)
else: print line
def compileItems():
raw_items = []
for name in getfiles():
raw_items.extend(readFile(name))
return [(dict([(w, 0) for w in scanWords(item)]), item) for item in
[raw_item.serialize('text') for raw_item in raw_items]]
def qfind(words, items):
words = filter(lambda w: w.strip(), words.split(' '))
results = []
for item in items:
found = 1
for word in words:
if not item[0].has_key(word):
if (' ' + item[1]).find(' '+word) == -1: found = 0
if found: results.append(item)
return results
def qformat(items):
if len(items) > 20: return '\nToo many results...' + ('\n' * 19)
else:
result = []
for item in items:
if len(item[1]) > 80:
result.append(item[1][:48] + '[...]' + item[1][-27:])
else: result.append(item[1])
return '\n' + '\n'.join(result) + ('\n' * (20 - len(result)))
def qsearch(pattern):
s, compiled = '', compileItems()
items = qfind(s, compiled)
searches = {s: items}
os.popen('stty -icanon -echo').close()
try:
print ' (%s)' % s, qformat(items)
while 1:
char = sys.stdin.read(1)
if char == '\n': break
elif char == '\x7f':
s = s[:-1]
if searches.has_key(s): items = searches[s]
else: items = qfind(s, compiled)
else:
s += char
items = qfind(s, items)
print '(%s)' % s, qformat(items)
except Exception, e: print e
os.popen('stty icanon echo').close()
#
#
# # # # # # # # # # # # # # # # #
def aparse(arg):
return arg.split(' ')
def caparse(arg):
i = arg.index(' ')
return arg[:i], arg[i+1:].split(' ')
def parse2(arg):
i = arg.index(' ')
return arg[:i], arg[i+1:]
def parse3(arg):
i = arg.index(' ')
arg1, arg2 = arg[:i], arg[i+1:]
i = arg2.index(' ')
return arg1, arg2[:i], arg2[i+1:]
metadata_command = re.compile('^@([A-Za-z]+) (#?[A-Za-z]+) (.*?)$')
global_command = re.compile('^%([A-Za-z]+) ?(.*?)$')
general_command = re.compile('^([A-Za-z]+)(?:-([A-Z]+))? (.*?)$')
def parse(cmd):
"""Parse a command line, and call the relevant action.
General: [-]
Global commands: %
Metadata: @
"""
if DEBUG: print >> sys.stderr, "Got command:", cmd
# go through custom commands
cf = 'custom.conf'
try: custom = open(cf).read().splitlines()
except IOError: custom = []
for line in range(len(custom)):
try: pattern, repl = eval('(%s)' % custom[line])
except: print >> sys.stderr, "%s is borked at line %s" % (cf, line)
else: cmd = re.sub(pattern, repl, cmd)
if keep(general_command.match(cmd)):
name, metadata, content = regexps.pop().groups()
burp(name, content, metadata)
elif keep(global_command.match(cmd)):
command, arg = regexps.pop().groups()
commands = { 'cat': lambda arg: apply(slurp, aparse(arg)),
'find': find,
'grep': lambda arg: apply(grep, parse2(arg)),
'export': export,
'getbydate': lambda arg: apply(getByDate, parse2(arg)),
'getbyid': lambda arg: apply(getByID, parse2(arg)),
'ls': listFiles,
'setmeta': lambda arg: apply(setItemMeta, aparse(arg)),
'append': lambda arg: apply(appendToItem, parse3(arg)),
'getmetadata': lambda arg: apply(getMetadata, parse2(arg)),
'archive': archive,
'qsearch': qsearch,
'cp': lambda arg: apply(cp, parse2(arg)),
'mv': lambda arg: apply(mv, parse2(arg)),
'rm': rm }
commands_keys = commands.keys() + ['commands']
commands_keys.sort()
commands['commands'] = (lambda arg:
sys.stdout.write('\n'.join(commands_keys)))
commands[command](arg)
elif keep(metadata_command.match(cmd)):
pred, subj, obj = regexps.pop().groups()
metaburp(subj, pred, obj)
elif cmd.startswith(';;'):
# e.g. b ";;@show notes 3;;%cat notes;;@show notes 10-m"
cmds = cmd[2:].split(';;')
for cmd in cmds: parse(cmd)
else: print >> sys.stderr, "Couldn't interpret command"
def main(argv):
cmd = ' '.join(argv[1:])
parse(cmd)
if __name__=="__main__":
main(sys.argv)