#!/usr/bin/python """Some program Requirements: * Log single lines to a persistent store, hashing to preserve integrity Enter various information: todo items, Search the information Be able to mark items as public or private, and have two HTML versions Be able to enter data and match against a RegExp File metadata is stored in meta.n3; item metadata is stored in the item itself as a series of at least one byte long flags 'P' = public (world readable) 'D' = deleted/delenda/deprecated @@ * Temporary metadata sets (use case? possibly display. multiple commands?) * Globbing? File name pattern matching Done: * Remove old items? Or mark as "done"? * Display properties of D items * Add metadata to old items * Append to old items? * Amount to show * List files * Show all metadata * Write D or not on export? * Write to public-html * Move/delete/copy files? Possibles: * References to other items? Extensibility limitations: * Can't extend metadata properties in the same namespace * Only extensible to 26 item metadata attributes (c) Copyright Sean B. Palmer, 2003. GPL 2: Share and Enjoy! """ from __future__ import nested_scopes import sys, os, re, time, random # # # # # # [Start of configurable stuff] # # You should configure this: basedir = 'c:/web/infomesh/2003' # You may configure these: metafn = 'meta.n3' DEBUG = 1 # # [End of configurable stuff] # # # # # os.chdir(basedir) # # # # # # # # # # # # # # # # # # # Metadata Grunging Code # b_ns = "http://infomesh.net/2003/b/terms#" def triple(subj, pred, obj): return '<%s> <%s> "%s" .' % (subj, b_ns + pred, obj) def writemeta(mode, s): if mode == 'w': open(metafn, 'w').write(s) elif mode == 'a': open(metafn, 'a').write(s + '\n') else: raise "Unknown mode: %s" % mode def readmeta(): try: return open(metafn).read() except IOError: writemeta('w', '') return '' def metaburp(subj, pred, obj): """e.g. @sort notes alpha|ralpha|date|rdate|len|rlen this works because { ?x log:uri [ log:startswith "http://infomesh.net/2003/b/terms#" ] } => { ?x a owl:UniqueProperty; rdfs:range rdfs:Literal } .""" # if it's already in there, no change if readmeta().find(triple(subj, pred, obj)) > -1: pass # if the prop's already set, change it elif re.compile(triple(subj, pred, '.*?')).search(readmeta()): writemeta('w', re.sub(triple(subj, pred, '.*?'), triple(subj, pred, obj), readmeta())) # otherwise, add it else: writemeta('a', triple(subj, pred, obj)) def metaslurp(subj, pred): objs = re.compile(triple(subj, pred, '(.*?)')).findall(readmeta()) if len(objs) > 0: if len(objs) > 1: print >> sys.stderr, 'Error: %s is corrupted' % metafn return objs[0] else: return None def getMetadata(subj, pred): m = metaslurp(subj, pred) if m: print '@%s %s is set to: "%s"' % (subj, pred, m), else: print '@%s %s is not set' % (subj, pred), # # # # # # # # # # # # # # # # # # # def datetime(): """Return the current UTC time as standard date format.""" return time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) regexps = [] def keep(match): regexps.append(match) return match def archive(*args): import sha if not os.path.exists('archive/'): os.mkdir('archive') for name in getfiles(): if os.path.exists('archive/%s' % name): old, new = open('archive/%s' % name).read(), open(name).read() if len(new) < len(old): print 'may be corrupted, not archived: %s' % name continue elif (sha.new(old).hexdigest() != sha.new(new).hexdigest()): open('archive/%s' % name, 'w').write(new) print 'archived: %s' % name else: print 'already archived: %s' % name else: open('archive/%s' % name, 'w').write(open(name).read()) print 'newly archived: %s' % name # # # # # # # # # # # # # # # # # # # Item stuff # # item.serialize() is the inverse of deserialize(line) # t_re = re.compile('^(....).(..).(..).(..).(..).(..)$') def timeToLong(t): m = t_re.match(t) if m: return long(''.join(m.groups())) else: return None class Item: def __init__(self, uid, content, metadata=None, timestamp=None): if metadata is None: metadata = '' if timestamp is None: timestamp = datetime() self.uid = uid self.content = content self.metadata = metadata self.timestamp = timestamp def __lt__(self, item): return timeToLong(self.timestamp) < timeToLong(item.timestamp) def __gt__(self, item): return timeToLong(self.timestamp) > timeToLong(item.timestamp) def __str__(self): return ' '.join((self.timestamp, self.uid, self.metadata, self.content)) def size(self): return len(self.content) def serialize(self, format=None, custom=None): uid, metadata, content, timestamp = (self.uid, self.metadata, self.content, self.timestamp.replace('T', ' ')) if not format: return self.__str__() elif format.lower() == 'text': p = metaslurp('#all', 'prefix') or '' if not p.endswith(' '): p += ' ' if custom: return re.sub('%\([a-z]+\)', '\g<0>s', custom) % { 'prefix': p, 'content': content, 'meta': metadata, 'timestamp': timestamp, 'uid': uid } else: return "%s%s (%s, %s)" % (p, content, timestamp, uid) elif format.lower() == 'html': metadata = (metadata and ' class="%s"' % ' '.join(metadata)) or '' return '
  • # %s (%s)
  • ' % \ (uid, metadata, uid, content, timestamp) elif format.lower() == 'n3': return """[] <%smetadata> "%s"; \ \n<%suid> "%s"; <%stimestamp> "%s"; <%scontent> "%s" .""" % (b_ns, metadata, b_ns, uid, b_ns, self.timestamp, b_ns, content) p = '(?m)^([\d-]{10}T[\d:]{8}) ([A-Za-z0-9]+) ([A-Z]*) (.*?)$' item_p = re.compile(p) def deserialize(line): try: timestamp, uid, metadata, content = item_p.match(line).groups() except: raise line item = Item(uid, content, metadata, timestamp) if DEBUG: assert item.serialize() == line, "Item is borked!" return item def readFile(name): """Opens a file (a pickled set of items), and returns an item list.""" s = open(name).read() s = s.replace('\r', '') # @@ ugh open(name, 'w').write(s) # @@ double ugh return [deserialize(line) for line in s.splitlines()] # # # # # # # # # # # # # # # # # # # def norm(name, items): n = metaslurp(name, 'textformat') a = metaslurp('#all', 'textformat') if n or a: result = [item.serialize('text', n or a) for item in items] else: result = [item.serialize('text') for item in items] return '\n'.join(result) def htmlize(name, items): h = """\n\n%s\n\n\ \n\n

    %s

    \n\n\ \n""" result = [h % (name, name, item.serialize('html')) for item in items] return '\n'.join(result) def n3ize(items): return '\n'.join([item.serialize('n3') for item in items]) def getItemsByDate(items, pattern): # e.g. 2003-04-14 12:25:* pattern = 'T'.join(pattern.split(' ')) pattern = pattern.replace('*', '.*?') p = re.compile('(?m)^%s$' % pattern) result = [] for item in items: if p.match(item.timestamp): result.append(item) return result def getByDate(name, pattern): items = readFile(name) result = getItemsByDate(items, pattern) print norm(name, result), def getItemByID(items, uid): for i in xrange(len(items)): if items[i].uid == uid: return i, items[i] return None def getByID(name, uid): items = readFile(name) i_item = getItemByID(items, uid) if i_item: i, item = i_item print norm(name, [item]), def filterByMeta(items, meta): flags = re.compile(r'(-?[A-Z])').findall(meta.upper()) for flag in flags: if flag.startswith('-'): flag = flag[-1] items = filter(lambda item: flag not in item.metadata, items) else: items = filter(lambda item: flag in item.metadata, items) return items def setItemMeta(name, uid, meta, mode=None): assert meta.isalpha() meta = meta.upper() items = readFile(name) i, item = getItemByID(items, uid) if (not mode) or (mode == 'a'): for flag in meta: if flag not in item.metadata: item.metadata += meta elif mode == 'w': item.metadata = meta items[i] = Item(item.uid, item.content, item.metadata, item.timestamp) result = '\n'.join([item.serialize() for item in items]) open(name, 'w').write(result + '\n') def appendToItem(name, uid, content): # @@ should this change the date? items = readFile(name) i, item = getItemByID(items, uid) item.content += ' ' + content items[i] = Item(item.uid, item.content, item.metadata, item.timestamp) result = '\n'.join([item.serialize() for item in items]) open(name, 'w').write(result + '\n') def getfiles(): listdir = os.listdir('.') return filter(lambda fn: os.path.isfile(fn) and fn.isalpha(), listdir) def listFiles(pattern): p = metaslurp('#all', 'prefix') or '' if not p.endswith(' '): p += ' ' print '\n'.join([p + name for name in getfiles()]) def cp(name, new): # @@ metadata open(new, 'w').write(open(name).read()) print "Copied %s to %s" % (name, new) def mv(name, new): # @@ metadata os.rename(name, new) print "Moved %s to %s" % (name, new) def rm(name): # @@ metadata if raw_input('Sure? [y/n]: ') == 'y': os.remove(name) print "Removed %s" % name def getID(id_seq): uid = None for i in xrange(100): # 18 * 25 * 25 = 11,250 combinations alpha = 'bdfghjkmnprstvwyz' # c, q, x redundant, l appears as 1 alphanum = alpha + '2357890' tid = random.choice(alpha) tid += ''.join([random.choice(alphanum) for i in range(2)]) if tid not in id_seq: uid = tid break if uid is None: print >> sys.stderr, "if you're using this program *that* " \ "intensively, you might want to consider tweaking def getID..." while 1: # 768,369,472 combinations alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' alphanum = alpha + '1234567890' tid = random.choice(alpha) tid += ''.join([random.choice(alphanum) for i in range(4)]) if tid not in id_seq: uid = tid break return uid def burp(name, content, metadata=None): assert name.isalpha(), "file name must match [A-Za-z]+" # @@ case sensitivity depends upon the file system if metadata is None: metadata = '' t = datetime() content = content.replace('\n', '').replace('\r', '') if not os.path.exists(name): metaburp(name, 'created', t) metaburp(name, 'order', 'date') uid = getID([]) metaburp(name, 'uids', uid) else: id_seq = (metaslurp(name, 'uids') or '').split(' ') # @@ uid = getID(id_seq) metaburp(name, 'uids', ' '.join(id_seq + [uid])) item = Item(uid, content, metadata, t) # write the content out to the file open(name, 'a').write(item.serialize() + '\n') print 'Item created in "%s" with uID "%s"' % (name, uid) show_int = re.compile('^-?\d+$') show_sdate = re.compile('^(-?\d+-)?[ymdh]$') show_adate = re.compile('^\d+[YMWDH]$') def slurp(name, amt=None): items = readFile(name) # arange the items as given in the meta order = metaslurp(name, 'order') or 'date' if order == "date": items.sort() elif order == "alpha": items.sort(lambda x, y: (x.content > y.content) - 1) elif order == "len": items.sort(lambda x, y: (x.size() > y.size()) - 1) if (metaslurp(name, 'way') or 'f').startswith('r'): items.reverse() # filter out D items if required if not metaslurp(name, 'showdeleted'): items = filterByMeta(items, '-D') # show the required amount amt = amt or metaslurp(name, 'show') if amt: if show_int.match(amt): amt = int(amt) if amt > 0: items = items[:amt] elif amt < 0: items = items[amt:] elif show_sdate.match(amt): if len(amt) > 1: min, amt = int(amt[:-2]), amt[-1:] else: min = 0 amt = {'y': 4, 'm': 7, 'd': 10, 'h': 13}[amt] t = datetime() tent = filter(lambda item: item.timestamp[:amt] == t[:amt], items) if len(tent) < abs(min): if min > 0: items = items[:min] elif min < 0: items = items[min:] else: items = tent elif show_adate.match(amt): try: from strptime import strptime except ImportError: strptime = None def timeInSecs(t): # rather experimental! @@ warning: bugs ahead if strptime: return time.mktime(strptime(t, '%Y-%m-%dT%H:%M:%S')) else: return timeToLong(t) amt, unit = int(amt[:-1]), amt[-1:] scalar = {'Y':31557600, 'M':2592000, 'W':436800, 'D':86400, 'H':3600} items = filter(lambda item: (timeInSecs(item.timestamp) > (time.time() - (scalar[unit] * amt))), items) print norm(name, items), def export(type): if not os.path.exists('%s-files/' % type): os.mkdir('%s-files' % type) for fn in getfiles(): items = readFile(fn) if type == 'text': open('text-files/%s.txt' % fn, 'w').write(norm(fn, items)) elif type == 'html': public = filterByMeta(items, 'P') if public: pd, p = 'public-html-files', try: open('%s/%s.html' % (pd, fn), 'w').write(htmlize(fn, public)) except IOError: if not os.path.exists(pd): os.mkdir(pd) open('html-files/%s.html' % fn, 'w').write(htmlize(fn, items)) elif type == 'n3': open('n3-files/%s.n3' % fn, 'w').write(n3ize(items)) # # # # # # # # # # # # # # # # # # # Search code # def grep(file, pattern): p = re.compile(pattern) if (not file) or (file == '*'): files = getfiles() else: files = [file] for fn in files: for line in [item.serialize('text') for item in readFile(fn)]: if p.search(line): if len(files) > 1: print '%s: %s' % (fn, line) else: print line scanWords = re.compile(r'[A-Za-z0-9]+').findall def find(words): words = [w.lower() for w in words.split(' ')] if words[0].startswith('.'): files, words = [words[0][1:]], words[1:] else: files = getfiles() results = [] for fn in files: for line in [item.serialize('text') for item in readFile(fn)]: found = 1 line_words = [line_word.lower() for line_word in scanWords(line)] for word in words: if word not in line_words: found = 0 break if found: if len(files) > 1: print '%s: %s' % (fn, line) else: print line def compileItems(): raw_items = [] for name in getfiles(): raw_items.extend(readFile(name)) return [(dict([(w, 0) for w in scanWords(item)]), item) for item in [raw_item.serialize('text') for raw_item in raw_items]] def qfind(words, items): words = filter(lambda w: w.strip(), words.split(' ')) results = [] for item in items: found = 1 for word in words: if not item[0].has_key(word): if (' ' + item[1]).find(' '+word) == -1: found = 0 if found: results.append(item) return results def qformat(items): if len(items) > 20: return '\nToo many results...' + ('\n' * 19) else: result = [] for item in items: if len(item[1]) > 80: result.append(item[1][:48] + '[...]' + item[1][-27:]) else: result.append(item[1]) return '\n' + '\n'.join(result) + ('\n' * (20 - len(result))) def qsearch(pattern): s, compiled = '', compileItems() items = qfind(s, compiled) searches = {s: items} os.popen('stty -icanon -echo').close() try: print ' (%s)' % s, qformat(items) while 1: char = sys.stdin.read(1) if char == '\n': break elif char == '\x7f': s = s[:-1] if searches.has_key(s): items = searches[s] else: items = qfind(s, compiled) else: s += char items = qfind(s, items) print '(%s)' % s, qformat(items) except Exception, e: print e os.popen('stty icanon echo').close() # # # # # # # # # # # # # # # # # # # def aparse(arg): return arg.split(' ') def caparse(arg): i = arg.index(' ') return arg[:i], arg[i+1:].split(' ') def parse2(arg): i = arg.index(' ') return arg[:i], arg[i+1:] def parse3(arg): i = arg.index(' ') arg1, arg2 = arg[:i], arg[i+1:] i = arg2.index(' ') return arg1, arg2[:i], arg2[i+1:] metadata_command = re.compile('^@([A-Za-z]+) (#?[A-Za-z]+) (.*?)$') global_command = re.compile('^%([A-Za-z]+) ?(.*?)$') general_command = re.compile('^([A-Za-z]+)(?:-([A-Z]+))? (.*?)$') def parse(cmd): """Parse a command line, and call the relevant action. General: [-] Global commands: % Metadata: @ """ if DEBUG: print >> sys.stderr, "Got command:", cmd # go through custom commands cf = 'custom.conf' try: custom = open(cf).read().splitlines() except IOError: custom = [] for line in range(len(custom)): try: pattern, repl = eval('(%s)' % custom[line]) except: print >> sys.stderr, "%s is borked at line %s" % (cf, line) else: cmd = re.sub(pattern, repl, cmd) if keep(general_command.match(cmd)): name, metadata, content = regexps.pop().groups() burp(name, content, metadata) elif keep(global_command.match(cmd)): command, arg = regexps.pop().groups() commands = { 'cat': lambda arg: apply(slurp, aparse(arg)), 'find': find, 'grep': lambda arg: apply(grep, parse2(arg)), 'export': export, 'getbydate': lambda arg: apply(getByDate, parse2(arg)), 'getbyid': lambda arg: apply(getByID, parse2(arg)), 'ls': listFiles, 'setmeta': lambda arg: apply(setItemMeta, aparse(arg)), 'append': lambda arg: apply(appendToItem, parse3(arg)), 'getmetadata': lambda arg: apply(getMetadata, parse2(arg)), 'archive': archive, 'qsearch': qsearch, 'cp': lambda arg: apply(cp, parse2(arg)), 'mv': lambda arg: apply(mv, parse2(arg)), 'rm': rm } commands_keys = commands.keys() + ['commands'] commands_keys.sort() commands['commands'] = (lambda arg: sys.stdout.write('\n'.join(commands_keys))) commands[command](arg) elif keep(metadata_command.match(cmd)): pred, subj, obj = regexps.pop().groups() metaburp(subj, pred, obj) elif cmd.startswith(';;'): # e.g. b ";;@show notes 3;;%cat notes;;@show notes 10-m" cmds = cmd[2:].split(';;') for cmd in cmds: parse(cmd) else: print >> sys.stderr, "Couldn't interpret command" def main(argv): cmd = ' '.join(argv[1:]) parse(cmd) if __name__=="__main__": main(sys.argv)