#!/usr/bin/python """NTuples module for the Semantic Web API.""" __author__ = 'Sean B. Palmer' __license__ = 'Copyright (C) 2002 Sean B. Palmer. GNU GPL 2' import re, api def group(*n): return '(%s)' % '|'.join(n) URI = r'<[^ >]*>' Name = r'[A-Za-z][A-Za-z0-9_]*' bNode = r'_:' + Name Univar = r'\?' + Name Literal = r'"[^"\\]*(?:\\.[^"\\]*)*"' WS = r'[ \t\n\r]' Tokens = group(URI, Literal, bNode, Name, Univar, '\.', WS) Token = re.compile(Tokens, re.S) Comment = re.compile(r'([ \t]*\#[^\n]*)', re.S) def notComment(s): if Comment.match(s): return '' else: return s def toke(s, FILTER=1): """XTriples tokenizer. Takes in a string, returns a raw token list.""" if len(s) == 0: raise 'Document has no content' s = '\n'.join([notComment(line) for line in s.replace('\r\n', '\n').replace('\r', '\n').split('\n')]).strip() if FILTER: return filter(lambda x: x not in list(' \t\n\r'), Token.findall(s)) else: return Token.findall(s) class NTuplesParser: def __init__(self, t=None): self.data, self.store = [], api.TupleStore() if t is not None: self.feedTokens(t) def token(self, t): """Deposit a token.""" if t != '.': self.data.append(t) else: self.makeTuple() def feedTokens(self, tokens): for t in tokens: self.token(t) def makeTuple(self): self.store.append(api.Tuple([api.Article(x) for x in self.data])) self.data = [] def give(self): return self.store class NQuadsParser: def __init__(self, t=None): self.data, self.store = [], api.Store() if t is not None: self.feedTokens(t) def token(self, t): """Deposit a token.""" if t != '.': self.data.append(t) else: self.makeTuple() def feedTokens(self, tokens): for t in tokens: self.token(t) def makeTuple(self): if len(self.data) == 3: self.data.append(None) self.store.append( api.Triple([api.Article(x) for x in self.data[:3]]), `self.data[3]` ) self.data = [] def give(self): return self.store def parsent(s, strict=1): a, t = r'(<.*?>|_:\S+|\?\S+|"[^"\\]*(?:\\.[^"\\]*)*")', api.TripleStore() rt = r'^[ \t]*%s[ \t]%s[ \t]%s[ \t]*.[ \t]*$' % (a, a, a) for line in s.replace('\r\n', '\n').replace('\r', '\n').split('\n'): match = re.compile(rt).findall(line)[:1] if match: x = match[0] t.append(api.Triple( [api.Article(x[0]), api.Article(x[1]), api.Article(x[2])] )) elif strict and ((line+'#'.strip()[0] == '#') or (len(line.strip()) == 0)): continue elif strict: raise 'Line is invalid', line return t def parseNTuples(s): t = toke(s) x = NTuplesParser(t) return x.give() def parse(s): t = toke(s) x = NQuadsParser(t) return x.give() def serialize(s): if (isinstance(s, api.TupleStore) or isinstance(s, api.TripleStore)): return '\n'.join([' '.join([`y` for y in x])+' .' for x in s]) elif isinstance(s, api.Store): formulae, result = s.formulae(), '' formulae.sort() for formula in formulae: x = s[formula] if formula is None: result += '\n'+'\n'.join([' '.join([`y` for y in t])+' .' for t in x]) else: result += '\n'+'\n'.join([' '.join([`y` for y in t]+[formula])+' .' for t in x]) return result else: raise "Unknown store type: "+`s.__class__` if __name__=="__main__": print __doc__