import sys import string import random import pickle import util S, P, O, C = 0, 1, 2, 3 V, T = 0, 1 URI = 'tag:infomesh.net,2001-08-07:URI' LIT = 'tag:infomesh.net,2001-08-07:Literal' ANON = 'tag:infomesh.net,2001-08-07:Anon' RC = 'tag:infomesh.net,2001-08-07:RootContext' CONT = 'tag:infomesh.net,2001-08-07:Context' # For extensibility RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' class RDFStore: """This is an internal store for RDF triples (actually, quads). It's basically a Python list. RDFStore() creates a new empty document, or you can feed it some data (has to be a Python list) a la RDFStore(data=mydata). An RDFStore then is simply a list of quads. Each quad is made up of a subject [S], a predicate [P], and and object [O], as is traditional in triples, plus a context flag [C]:- [[P, S, O, C], [P, S, O, C], [P, S, O, C], ...] Each term (P/S/O/C) is a Python tuple (i.e. immutable), which consists of a value [V], and a type [T] [[(V,T),(V,T),(V,T),(V,T)],[(V,T),(V,T),(V,T),(V,T)],...] The value is the atomic value of the term, and the type is one of Resource (URI), Anonymous (ANON), Literal (LIT), Root Context (RC), or Context (C). You could add more functions to this class by subclassing it, for example, when handling rules files.""" def __init__(self, data=[]): self.quads = data # self.store = self.quads self.CurrentQuad = [] self.CurrentContext = None self.rand = None self.bnodes = [] def add(self, s): """This adds a quad (s) to the store.""" if len(s) == 3: s.append([util.generatecontext(), RC]) return self.quads.append(s) def addq(self, s): """This adds a quad (s) to the store.""" do = 1 if len(s) == 3: s.append([util.generatecontext(), RC]) for quad in self.quads: if quad == s: do = None if do: return self.quads.append(s) def merge(self, *s): # @@ """This merges any number of Docs into the current store. For example, the function:- b = docb x, y, z = docx, docy, docy b.merge(x, y, z) Would merge all the docs and store the results in b.data.""" for m in s: for x in m.data: self.add(x) def empty(self): """This empties the store.""" self.quads = [] def rawstats(self): return 'Number of triples in the document: ' + str(len(self.quads)) def prettyformat(self): """This pretty formats the store, for possible output.""" s = self.quads s = str(s) s = string.replace(s,"[(", "[\n (") s = string.replace(s,")]", ")\n]") s = string.replace(s,"',", "',\n") s = string.replace(s,"),", "),\n") return s def ntriples(self, quads=None): """This outputs an RDFStore as NTriples.""" n3 = '' # print 'doing NTriples' # print str(quads) if not quads: quads = self.quads for quad in quads: for term in quad: if term[T] == URI: n3 += '<' + term[V] + '>' elif term[T] == ANON: n3 += '_:' + term[V] elif term[T] == LIT: n3 += '"' + term[V] + '"' n3 += ' ' n3 += '.\n' return n3 def surf(self): """This outputs an RDFStore as SURF.""" surf = '\n' for quad in self.quad: surf += ' \n' for term in range(len(quad)): if term == S and triple[term][T] == URI: # subj is a URI surf += ' \n' elif term == S and triple[term][T] == ANON: # subj is ANON surf += ' \n' elif term == S and triple[term][T] == LIT: # subj is a LIT surf += ' ' + triple[term][V] + '\n' elif term == P and triple[term][T] == URI: # pred is a URI surf += '

\n' elif term == P and triple[term][T] == ANON: # pred is ANON surf += '

\n' elif term == P and triple[term][T] == LIT: # pred is a LIT surf += '

' + triple[term][V] + '

\n' elif term == O and triple[term][T] == URI: # obj is a URI surf += ' \n' elif term == O and triple[term][T] == ANON: # obj is ANON surf += ' \n' elif term == O and triple[term][T] == LIT: # obj is a LIT surf += ' ' + triple[term][V] + '\n' surf += '
\n' return surf + '
' def xrdf(self): """This outputs a Doc as XML RDF.""" xrdf = '\n' for quad in self.quads: for term in range(len(quad)): t = str(quad[term][V]) if quad[term][T] == URI: txmlns = util.uri2qname(quad[term][V])[0] tname = util.uri2qname(quad[term][V])[1] # txmlns = str(triple[term][V])[:-1] # tname = str(triple[term][V])[-1] # @@ separating out "x#y" if term == S and quad[term][T] == URI: xrdf += '\n' elif term == S and quad[term][T] == ANON: xrdf += '\n' elif term == S and quad[term][T] == LIT: # @@ encoding xrdf += '\n' elif term == P and quad[term][T] == URI: xrdf += '<'+tname+' xmlns="'+txmlns+'" \n' elif term == P and quad[term][T] == ANON: sys.stderr.write('Kaboom!') # i.e. not implementable xrdf += '<'+t+' xmlns="anon:_" \n' # A new URI Scheme... elif term == P and quad[term][T] == LIT: xrdf += '<'+tname+' xmlns="'+txmlns+'" \n' elif term == O and quad[term][T] == URI: xrdf += ' rdf:resource="'+t+'"/>\n' elif term == O and quad[term][T] == ANON: xrdf += ' rdf:resource="anon:_'+t+'"/>\n' elif term == O and quad[term][T] == LIT: # This is a hack xrdf += ' rdf:resource="data:,'+t+'"/>\n' xrdf += '\n' return xrdf + '' def test(self): x = str(self.prettyformat())+'\n'+str(self.n3())+'\n'+str(self.surf()) return x def reify(self, replace=1): self.reified = [] sub, pred, obj = (RDF_S, URI), (RDF_P, URI), (RDF_O, URI) for x in self.data: random = str(random.randint(1,10000000)) xlabel = [random,ANON] self.reified.append([xlabel,sub,x[S]]) self.reified.append([xlabel,pred,x[P]]) self.reified.append([xlabel,obj,x[O]]) self.data = self.reified self.reified = None def transfertoquads(self, quads=None): """This outputs an RDFStore as NTriples.""" if quads: self.quads = quads else: self.quads = self.quads[:] def pickle(self, file): f = open(file, 'w+') d = self.quads pickle.dump(d, f) # f.close() def unpickle(self, file): f = open(file, 'r') d = pickle.load(f) # f.close() self.quads = d def printquads(self): quads = string.replace(str(self.quads), ', (', ', \n (') quads = string.replace(quads, '], [', '], \n\n[') print quads+'\n'+str(self.bnodes)