#!/usr/bin/python """A note taking program dealie Requirements: * The ability to... * add anything from rough random notes to structured data * query all data: fast, and simple to complex (query/find/search) * "bail out" and use b.py instead * publish public and private parts of the data * mark information as used or out of date, and to filter that * configure as many aspects of the program as possible * add metadata to any item, irregardless of date * append to literals in place (or augment them generally) * sort and put limits on data display * find out metadata for a node--state? Design issues: * Store in one big database, or separate it out and load as required? * List of standard well created terms on the Semantic Web required? * No "special" metadata: all in RDF triples * Some can be stored, for example, as space seperated literals * Lots of seperate modules, or two big ones? (use eep3, perhaps) Todo: * Metadata queries * Big list of all the keywords in meta? * Export to various formats, mainly HTML and plain text * Search functions * Append to literals in place, e.g. annotated resources' n:content * Show raw data for a note * Remove bits of metadata (@@ for show, all?) * Raw queries * Big directory of cool property URIs to use? searching through space? * urllib.URLopener.version = UAString Done: * Filter out deleted items (those with the keyword d) * custom.conf * Update keywords for annotated resources * Reverse mapping of URIs to QNames """ import sys, os, re, time, calendar from random import choice as rand import eep3 # local RDF library # # # # # # # # # [Start of configuration variables] # # WARNING: changing these may cause the program to bork: DEBUG = 1 data_fn = "data.n3" meta_fn = "meta.n3" n_ns = "http://infomesh.net/2003/n/terms#" # # [End of configurable stuff] # # # # # # # # # Stuff that this program knows about... rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdfs_ns = "http://www.w3.org/2000/01/rdf-schema#" owl_ns = "http://www.w3.org/2002/07/owl#" n = eep3.Namespace(n_ns) rdf = eep3.Namespace(rdf_ns) rdfs = eep3.Namespace(rdfs_ns) owl = eep3.Namespace(owl_ns) # Utilities def datetime(): """Return the current UTC time as standard date format.""" return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) def dateToInt(s): date = re.compile(r'[1-9]\d*|0(?=\D|\Z)').findall(s) return calendar.timegm(tuple([int(i) for i in date + list('000')])) def write(*args): sys.stdout.write(' '.join(['%s' % arg for arg in args]) + '\n') # regexps = [] # # def keep(match): # regexps.append(match) # return match # Triple reading, writing, and initialization def initializeMeta(): import urllib, sha try: s = urllib.urlopen('file:c:/web/infomesh/2003/eep3/meta.n3').read() except: raise "Please download the file manually, or connect and retry!" # @@ do hash checking here open(meta_fn, 'w').write(s) files = {} def readTriples(fn): if fn == data_fn: if not os.path.exists(data_fn): open(data_fn, 'w').write('') elif fn == meta_fn: if not os.path.exists(meta_fn): initializeMeta() flastmod = os.path.getmtime(fn) if files.has_key(fn): if flastmod == files[fn][0]: return files[fn][1] triples = eep3.parseNTriples(open(fn).read()) files[fn] = (flastmod, triples) return triples def writeTriples(fn, triples, mode=None): if not mode: mode = 'a' open(fn, mode).write(eep3.serialize(triples) + '\n') def getNSPrefixes(): """Return a list of {prefix:ns} mappings.""" result = {} q = eep3.Formula((eep3.triple("?x", n.bind, "?y"),)) for r in eep3.query(readTriples(meta_fn), q, 'results'): for triple in r: result[triple[2].value] = triple[0].value return result # Retrieval def metaslurp(subj, pred, sic=None): try: objects = readTriples(meta_fn)._triples[pred][subj] assert len(objects) == 1 if sic: return objects[0] else: return objects[0].value except KeyError: return None def simpleGetByClass(data, u): results = [] names = [binding["?thing"] for binding in eep3.query(data, eep3.formula(("?thing", rdf.type, u)), 'bindings')] for name in names: results.append(eep3.Formula( data.query(eep3.triple(name, "?x", "?y"), strict=1) )) return results def getByClass(data, uri, label=None, timestamp=None, keywords=None): results = [] for name in [binding["?thing"] for binding in eep3.query(data, eep3.formula(("?thing", rdf.type, uri)), 'bindings')]: res = eep3.Formula(data.query(eep3.triple(name, "?x", "?y"), strict=1)) if label: q = eep3.triple(name, n.label, "?y") if label == res.query(q)[0][2].value: return [res] else: ftimestamp, fkeywords = 1, 1 if timestamp: rt = res.query(eep3.triple(name, n.timestamp, "?y"))[0][2].value ts = ('%sT%sZ' % tuple(timestamp.split(' '))).replace('*', '.*?') if not re.compile('(?m)^%s$' % ts).match(rt): ftimestamp = 0 if keywords: # print name # @@ graceful fail here? repair? words = res.query(eep3.triple(name, n.keywords, "?y"))[0][2].value words = words.split(' ') for w in keywords.split(' '): if w.startswith('-'): if w[1:] in words: fkeywords = 0 elif w not in words: fkeywords = 0 if ftimestamp and fkeywords: results.append(res) if not label: return results else: return None # Display and sorting def sort(items): # this may be the horkiest sorting algorithm possible item_class = items[0]._triples[rdf.type][items[0][0][0]][0] if items[0]._triples.has_key(n.content): sortby = n.content else: sortby = n.label order = metaslurp(item_class, n.order) or "date" way = metaslurp(item_class, n.way) or "f" if order == "date": items.sort(lambda x, y: \ (dateToInt(x._triples[n.timestamp][x[0][0]][0].value) > dateToInt(y._triples[n.timestamp][y[0][0]][0].value)) - 1) elif order == "alpha": items.sort(lambda x, y: \ (x._triples[sortby][x[0][0]][0].value > y._triples[sortby][y[0][0]][0].value) - 1) elif order == "len": items.sort(lambda x, y: \ (len(x._triples[sortby][x[0][0]][0].value) > len(y._triples[sortby][y[0][0]][0].value)) - 1) if way.startswith('r'): items.reverse() return items show_sdate = re.compile(r'(?i)^(?:(-?\d+)-)?(\d+)?([ymdh])$') def amount(items): # items should be a sorted list of items... item_class = items[0]._triples[rdf.type][items[0][0][0]][0] amt = metaslurp(item_class, n.show) if amt: m = show_sdate.match(amt) if m: min, scalar, unit = m.groups() min, scalar, unit = int(min or '0'), int(scalar or '1'), unit.lower() units = {'y':31557600, 'm':2592000, 'w':436800, 'd':86400, 'h':3600} tent = filter(lambda item: (dateToInt(item[n.timestamp][name][0]) > (dateToInt(datetime()) - (scalar*units[unit]))), items) if len(tent) < abs(min): if min > 0: items = items[:min] elif min < 0: items = items[min:] else: items = tent else: amt = int(amt) if amt > 0: items = items[:amt] elif amt < 0: items = items[amt:] return items i_re = re.compile('(%\((?:([a-z_]+):)?([A-Za-z_]+)\))') def display(item): # display an AnnotatedResource name = item[0][0] item_type = item._triples[rdf.type][name][0] # get the format flag, if there is one (there should be) default = "%(label) %(keywords) %(timestamp) %(rdf:type)" format = metaslurp(item_type, n.format) or default prefixes = getNSPrefixes() bits = i_re.findall(format) # @@ sync this with the constructor? mappings = dict([(local, prefixes[p] + local) for bit, p, local in bits]) for bit, p, local in bits: pred = eep3.Article(mappings[local]) if item._triples.has_key(pred): repl = item._triples[pred][name][0].value if pred == n.timestamp: repl = repl.replace('T', ' ').replace('Z', '') format = format.replace(bit, repl) elif readTriples(meta_fn)._triples.has_key(pred): format = format.replace(bit, metaslurp(n.NDotPy, pred)) return format def displayItems(items): items = sort(items) items = amount(items) for item in items: write(display(item)) # Creation def longRandomName(n=None): if n is None: n = 5 # 768,369,472 combinations chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890' return rand(chars[:52]) + ''.join([rand(chars) for i in xrange(n)]) def makeLabel(data, uri, suggest=None): chars = 'bdefghjkmnoprstuvwyz357890' # c, q, x redundant, l ~= 1 tid = suggest or rand(chars[:20]) + rand(chars) + rand(chars) id_seq = [b["?label"] for b in eep3.query(data, eep3.formula( ("?x", n.label, "?label"), ("?x", rdf.type, uri)), 'bindings')] for i in xrange(100): # 20 * 26 * 26 = 13520 combinations if tid not in id_seq: return tid # @@ barf if suggestion in id_seq? tid = rand(chars[:20]) + rand(chars) + rand(chars) rn = longRandomName() if rn not in id_seq: return rn else: raise "Couldn't generate a label" def createInstance(data, uri, words, mappings, args): f = eep3.Formula() subj = eep3.Article('_:' + longRandomName(10)) label = '"%s"' % makeLabel(data, uri, args.get('label')) if args.has_key('label'): # @@ delete the suggested label, if any? if '<%s>' % mappings['label'] == `n.label`: del args['label'] if args.has_key('keywords'): if '<%s>' % mappings['keywords'] == `n.keywords`: words.extend(args['keywords'].split('.')) del args['keywords'] for name in args.keys(): pred = eep3.Article('<%s>' % mappings[name]) if metaslurp(pred, rdfs.range, 1) == rdfs.Literal: f.append(eep3.triple(subj, pred, '"%s"' % args[name])) else: f.append(eep3.triple(subj, mappings[name], args[name])) for pred, obj in ((n.label, label), (n.keywords, '"%s"' % ' '.join(words)), (n.timestamp, '"%s"' % datetime()), (rdf.type, uri)): f.append(eep3.triple(subj, pred, obj)) return f def createSimpleInstance(data, uri): f = eep3.Formula() subj = eep3.Article('_:' + longRandomName(10)) f.append(eep3.triple(subj, rdf.type, uri)) f.append(eep3.triple(subj, n.label, '"%s"' % makeLabel(data, uri))) f.append(eep3.triple(subj, n.timestamp, '"%s"' % datetime())) return f def getConstructors(uri): meta = readTriples(meta_fn) constructors = [constructor[2].value for constructor in meta.query(eep3.triple(uri, n.constructor, '?x'))] constructors.sort(lambda x, y: (len(x) < len(y)) - 1) return constructors p_re = re.compile('%(?:([a-z_]+):)?([A-Za-z_]+)') def parseConstructor(s): i = s.index(' ') keywords, s = filter(lambda k: k != '', s[:i].split('.')), s[i+1:] prefixes = getNSPrefixes() mappings = dict([(n, prefixes[p] + n) for p, n in p_re.findall(s)]) return keywords, mappings, re.sub(p_re, '(?P<\g<2>>.+)', s) def parseClass(data, uri, cmd): for s in getConstructors(uri): keywords, mappings, regexp = parseConstructor(s) m = re.compile(regexp).match(cmd) if m: return createInstance(data, uri, keywords, mappings, m.groupdict()) raise "Command line has no constructor" qname_regexp = re.compile(r"^(?:([a-z_]+):)?([A-Za-z_]+)$") def qnameToURI(qname): m = qname_regexp.match(qname) # @@ m.groups -- redundant? if m: prefix, name = m.groups() return eep3.Article('<%s>' % (getNSPrefixes()[prefix or ''] + name)) elif qname.startswith('<') and qname.endswith('>'): return eep3.Article('<%s>' % qname[1:-1]) else: raise "%r is not a valid QName or URI" % qname def uriToQName(uri): if isinstance(uri, eep3.Article): uri = uri.value # reverse the NSprefixes rmappings = {} mappings = getNSPrefixes() for prefix in mappings.keys(): rmappings[mappings[prefix]] = prefix bindings = rmappings.keys() bindings.sort() # er... sort by length? for binding in bindings: if uri.startswith(binding): prefix = rmappings[binding] if prefix: return prefix + ':' + uri[len(binding):] else: return uri[len(binding):] return '<%s>' % uri def argToSubjObjt(pred, arg): # get the range and domain for the uri from the meta domain = metaslurp(pred, rdfs.domain, 1) range = metaslurp(pred, rdfs.range, 1) parts = arg.split(' ') parts = (parts[0], ' '.join(parts[1:-1]), parts[-1]) if domain != rdfs.Literal: if range != rdfs.Literal: join = '' else: join = ' ' parts = (qnameToURI(parts[0]), join.join(parts[1:])) else: parts = (eep3.Article('"%s"' % parts[0:1]), parts[-1]) if range != rdfs.Literal: return parts[0], qnameToURI(parts[-1]) else: return parts[0], eep3.Article('"%s"' % parts[-1]) def parseProperty(uri, cmd): pred = eep3.Article('<%s>' % uri) subj, objt = argToSubjObjt(pred, cmd) write(subj, eep3.Article(uri), objt) # Command line parsing # @@ remove _ from prefixes? command_regexp_s = r'^((?:[a-z_]+:)?[A-Za-z_]+):?(?: (.*))?$' command_regexp = re.compile(command_regexp_s) def addInfo(arg): data = readTriples(data_fn) meta = readTriples(meta_fn) qname, cmd = command_regexp.match(arg).groups() uri = qnameToURI(qname) if DEBUG: print >> sys.stderr, "Got command:", prefix, name, `cmd`, uri if eep3.triple(uri, rdf.type, rdfs.Class) in meta: thing = parseClass(data, uri, cmd) label = thing.query(eep3.triple("?x", n.label, "?y"))[0][2].value writeTriples(data_fn, thing) print >> sys.stderr, "Added %s(%s)" % (name, label) else: parseProperty(uri, cmd) def displayClass(arg): data = readTriples(data_fn) # @@ when should we load these? meta = readTriples(meta_fn) # @@ make n:label global, not just class-wide? qname, keywords = command_regexp.match(arg).groups() uri = qnameToURI(qname) if DEBUG: print >> sys.stderr, "Got command:", `uri` if eep3.triple(uri, rdf.type, rdfs.Class) in meta: w = ((metaslurp(uri, n.showdel) or "0").startswith("0") and "-d") or "d" if keywords: w += " " + ' '.join(keywords.split('.')) displayItems(getByClass(data, uri, keywords=w)) else: raise "%s is not a registered rdfs:Class" % uri def addMetadata(arg): i = arg.index(' ') pred, arg = qnameToURI(arg[:i]), arg[i+1:] subj, objt = argToSubjObjt(pred, arg) # if it's an owl:FunctionalProperty, replace the triple meta = readTriples(meta_fn) if eep3.triple(pred, rdf.type, owl.FunctionalProperty) in meta: meta.remove(eep3.triple(subj, pred, "?x")) meta.append(eep3.triple(subj, pred, objt)) writeTriples(meta_fn, meta, 'w') print >> sys.stderr, 'wrote:', subj, pred, objt def test(arg): write(uriToQName('http://www.w3.org/2000/01/rdf-schema#Literal')) write(uriToQName('http://www.w3.org/2002/blargh#Something')) write(uriToQName('http://infomesh.net/2003/n/terms#Item')) sys.exit(0) data = readTriples(data_fn) meta = readTriples(meta_fn) mappings = getNSPrefixes() try: prefix, name, cmd = command_regexp.match(arg).groups() except: write("blargh") else: uri = mappings[prefix or ''] + name if DEBUG: print >> sys.stderr, "Got command:", `prefix`, `name`, `cmd`, `uri` if eep3.triple(uri, rdf.type, rdfs.Class) in meta: write(simpleGetByClass(data, uri)) t = "2003-04-23 08:25:0*" displayItems(getByClass(data, uri, label="knr")) write() displayItems(getByClass(data, uri, timestamp=t)) t = time.clock() for i in range(1000): meta = readTriples(meta_fn) print 'new', time.clock() - t, (time.clock() - t)/1000 assert len(meta) == 10 + 3 def addKeywords(cmd): data = readTriples(data_fn) i = cmd.index(' ') qname, cmd = cmd[:i], cmd[i+1:] uri, i = qnameToURI(qname), cmd.index(' ') label, cmd = cmd[:i], cmd[i+1:] words = cmd.split('.') item = getByClass(data, uri, label=label)[0] keywords = item._triples[n.keywords][item[0][0]][0].value data.remove(eep3.triple(item[0][0], n.keywords, '"%s"' % keywords)) keywords = keywords.split(' ') for word in words: if word not in keywords: keywords.append(word) keywords = '"%s"' % ' '.join(keywords) data.append(eep3.triple(item[0][0], n.keywords, keywords)) writeTriples(data_fn, data, 'w') def addRawTriples(cmd): if DEBUG: print >> sys.stderr, "Blargh, you're adding raw triples?" data = readTriples(data_fn) input = eep3.parseNTriples(cmd) data.extend(input) writeTriples(data_fn, data) print >> sys.stderr, "Wrote %s triples to data storage" % len(input) def showKeywords(cmd): keywords = {} uri = qnameToURI(cmd) data = readTriples(data_fn) query = eep3.formula(("?x", n.keywords, "?key"), ("?x", rdf.type, uri)) literals = [b["?key"] for b in eep3.query(data, query, 'bindings')] for literal in literals: words = literal.value.split(' ') for word in words: if not keywords.has_key(word): keywords[word] = 0 keys = keywords.keys() keys.sort() write('\n'.join(keys)) def parse(cmd): """Slurp in a command line and perform the relevant action.""" # go through custom commands cf = 'custom.conf' try: custom = open(cf).read().splitlines() except IOError: custom = [] for line in xrange(len(custom)): try: pattern, repl = eval('(%s)' % custom[line]) except: print >> sys.stderr, "%s is borked at line %s" % (cf, line) else: cmd = re.sub(pattern, repl, cmd) commands = { '': addInfo, 'display': displayClass, 'meta': addMetadata, 'test': test, 'raw': addRawTriples, 'keywords': showKeywords, 'addkey': addKeywords } m = re.compile(r'^@([a-z]*) ?(.*)$').match(cmd) if m: commands[m.group(1)](m.group(2)) elif cmd.startswith(';;'): for cmd in cmd[2:].split(';;'): parse(cmd) else: raise "Unknown command: %r" % cmd # @@ proper raises def main(argv): cmd = ' '.join(argv[1:]) parse(cmd) if __name__=="__main__": main(sys.argv)