import sys import string import re import util import ntriples import rdfstore S, P, O, C = 0, 1, 2, 3 V, T = 0, 1 URI = 'tag:infomesh.net,2001-08-07:URI' LIT = 'tag:infomesh.net,2001-08-07:Literal' ANON = 'tag:infomesh.net,2001-08-07:Anon' VAR = 'tag:infomesh.net,2001-08-07:Var' RC = 'tag:infomesh.net,2001-08-07:RootContext' CONT = 'tag:infomesh.net,2001-08-07:Context' # For extensibility # NTriples = ntriples.NTriples class SWIPTQL(ntriples.NTriples): """A simple query language: NTriples with ?x vars.""" def __init__(self): ntriples.NTriples.__init__(self) self.q = r'(<[^>]+>|_:[^\s]+|\?[^\s]+|\"(?:\\\"|[^"])*\")' self.qt = r'[ \t]*'+self.q+r'[ \t]+'+self.q+r'[ \t]+'+self.q+self.eol self.regq = re.compile(self.qt, re.S) self.uvs = [] def appendq(self, line): """Parses and validates a line, and then adds it to self.quads""" # print str(line) if self.regq.match(line): terms = self.regq.findall(line)[0] for term in terms: if term[0] == '<' and term[-1] == '>': # Term is a URI-view self.CurrentQuad.append((term[1:-1], URI)) elif term[:2] == '_:': # Term is an unlabelled node bnode = term[2:] # This filters out incorrect bNodes! CWM produced these... if re.compile(r'[A-Za-z][A-Za-z0-9]*', re.S).match(bnode): # Check the bnode isn't already used in a different context bnode = self.correctbnode(bnode) self.bnodes.append((bnode, self.CurrentContext)) self.CurrentQuad.append((bnode, ANON)) else: raise 'bnode: "'+str(term[2:])+'" is not a valid bnode' elif term[0] == '"' and term[-1] == '"': self.CurrentQuad.append((util.unescape(term[1:-1]), LIT)) elif term[0] == '?': # Term is an unlabelled node bnode = term[1:] # This filters out incorrect bNodes! CWM produced these... if re.compile(r'[A-Za-z][A-Za-z0-9]*', re.S).match(bnode): # Check the bnode isn't already used in a different context bnode = self.correctbnode(bnode) self.uvs.append((bnode, self.CurrentContext)) self.CurrentQuad.append((term[1:], VAR)) else: raise 'bnode: "'+str(term[1:])+'" is not a valid bnode' else: raise 'Term '+str(term)+' is not a valid NTriples term.' self.CurrentQuad.append((self.CurrentContext, RC)) self.add(self.CurrentQuad) self.CurrentQuad = [] # Reset the current quad! elif self.regc.match(line): pass # Line is just whitespace, or is a comment else: raise 'line: "'+line+'" isn\'t fine' # Validity error! def run(): x = SWIPTQL() x.parsen(sys.argv[1]) x.parsen(sys.argv[1]) x.parsen(sys.argv[1]) x.printquads() # Main program if __name__ == "__main__": run() # Phew