import sys
import string
import random
import pickle
import util
S, P, O, C = 0, 1, 2, 3
V, T = 0, 1
URI = 'tag:infomesh.net,2001-08-07:URI'
LIT = 'tag:infomesh.net,2001-08-07:Literal'
ANON = 'tag:infomesh.net,2001-08-07:Anon'
RC = 'tag:infomesh.net,2001-08-07:RootContext'
CONT = 'tag:infomesh.net,2001-08-07:Context' # For extensibility
RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
class RDFStore:
"""This is an internal store for RDF triples (actually, quads). It's
basically a Python list.
RDFStore() creates a new empty document, or you can feed it
some data (has to be a Python list) a la RDFStore(data=mydata).
An RDFStore then is simply a list of quads. Each quad is made
up of a subject [S], a predicate [P], and and object [O], as is
traditional in triples, plus a context flag [C]:-
[[P, S, O, C], [P, S, O, C], [P, S, O, C], ...]
Each term (P/S/O/C) is a Python tuple (i.e. immutable), which
consists of a value [V], and a type [T]
[[(V,T),(V,T),(V,T),(V,T)],[(V,T),(V,T),(V,T),(V,T)],...]
The value is the atomic value of the term, and the type is
one of Resource (URI), Anonymous (ANON), Literal (LIT), Root
Context (RC), or Context (C).
You could add more functions to this class by subclassing
it, for example, when handling rules files."""
def __init__(self, data=[]):
self.quads = data
# self.store = self.quads
self.CurrentQuad = []
self.CurrentContext = None
self.rand = None
self.bnodes = []
def add(self, s):
"""This adds a quad (s) to the store."""
if len(s) == 3: s.append([util.generatecontext(), RC])
return self.quads.append(s)
def addq(self, s):
"""This adds a quad (s) to the store."""
do = 1
if len(s) == 3: s.append([util.generatecontext(), RC])
for quad in self.quads:
if quad == s: do = None
if do: return self.quads.append(s)
def merge(self, *s): # @@
"""This merges any number of Docs into the current store. For
example, the function:-
b = docb
x, y, z = docx, docy, docy
b.merge(x, y, z)
Would merge all the docs and store the results in b.data."""
for m in s:
for x in m.data: self.add(x)
def empty(self):
"""This empties the store."""
self.quads = []
def rawstats(self):
return 'Number of triples in the document: ' + str(len(self.quads))
def prettyformat(self):
"""This pretty formats the store, for possible output."""
s = self.quads
s = str(s)
s = string.replace(s,"[(", "[\n (")
s = string.replace(s,")]", ")\n]")
s = string.replace(s,"',", "',\n")
s = string.replace(s,"),", "),\n")
return s
def ntriples(self, quads=None):
"""This outputs an RDFStore as NTriples."""
n3 = ''
# print 'doing NTriples'
# print str(quads)
if not quads: quads = self.quads
for quad in quads:
for term in quad:
if term[T] == URI:
n3 += '<' + term[V] + '>'
elif term[T] == ANON:
n3 += '_:' + term[V]
elif term[T] == LIT:
n3 += '"' + term[V] + '"'
n3 += ' '
n3 += '.\n'
return n3
def surf(self):
"""This outputs an RDFStore as SURF."""
surf = '\n'
for quad in self.quad:
surf += ' \n'
for term in range(len(quad)):
if term == S and triple[term][T] == URI: # subj is a URI
surf += ' \n'
elif term == S and triple[term][T] == ANON: # subj is ANON
surf += ' \n'
elif term == S and triple[term][T] == LIT: # subj is a LIT
surf += ' ' + triple[term][V] + '\n'
elif term == P and triple[term][T] == URI: # pred is a URI
surf += ' \n'
elif term == P and triple[term][T] == ANON: # pred is ANON
surf += ' \n'
elif term == P and triple[term][T] == LIT: # pred is a LIT
surf += ' ' + triple[term][V] + '
\n'
elif term == O and triple[term][T] == URI: # obj is a URI
surf += ' \n'
elif term == O and triple[term][T] == ANON: # obj is ANON
surf += ' \n'
elif term == O and triple[term][T] == LIT: # obj is a LIT
surf += ' ' + triple[term][V] + '\n'
surf += ' \n'
return surf + ''
def xrdf(self):
"""This outputs a Doc as XML RDF."""
xrdf = '\n'
for quad in self.quads:
for term in range(len(quad)):
t = str(quad[term][V])
if quad[term][T] == URI:
txmlns = util.uri2qname(quad[term][V])[0]
tname = util.uri2qname(quad[term][V])[1]
# txmlns = str(triple[term][V])[:-1]
# tname = str(triple[term][V])[-1]
# @@ separating out "x#y"
if term == S and quad[term][T] == URI:
xrdf += '\n'
elif term == S and quad[term][T] == ANON:
xrdf += '\n'
elif term == S and quad[term][T] == LIT: # @@ encoding
xrdf += '\n'
elif term == P and quad[term][T] == URI:
xrdf += '<'+tname+' xmlns="'+txmlns+'" \n'
elif term == P and quad[term][T] == ANON:
sys.stderr.write('Kaboom!') # i.e. not implementable
xrdf += '<'+t+' xmlns="anon:_" \n' # A new URI Scheme...
elif term == P and quad[term][T] == LIT:
xrdf += '<'+tname+' xmlns="'+txmlns+'" \n'
elif term == O and quad[term][T] == URI:
xrdf += ' rdf:resource="'+t+'"/>\n'
elif term == O and quad[term][T] == ANON:
xrdf += ' rdf:resource="anon:_'+t+'"/>\n'
elif term == O and quad[term][T] == LIT: # This is a hack
xrdf += ' rdf:resource="data:,'+t+'"/>\n'
xrdf += '\n'
return xrdf + ''
def test(self):
x = str(self.prettyformat())+'\n'+str(self.n3())+'\n'+str(self.surf())
return x
def reify(self, replace=1):
self.reified = []
sub, pred, obj = (RDF_S, URI), (RDF_P, URI), (RDF_O, URI)
for x in self.data:
random = str(random.randint(1,10000000))
xlabel = [random,ANON]
self.reified.append([xlabel,sub,x[S]])
self.reified.append([xlabel,pred,x[P]])
self.reified.append([xlabel,obj,x[O]])
self.data = self.reified
self.reified = None
def transfertoquads(self, quads=None):
"""This outputs an RDFStore as NTriples."""
if quads: self.quads = quads
else: self.quads = self.quads[:]
def pickle(self, file):
f = open(file, 'w+')
d = self.quads
pickle.dump(d, f)
# f.close()
def unpickle(self, file):
f = open(file, 'r')
d = pickle.load(f)
# f.close()
self.quads = d
def printquads(self):
quads = string.replace(str(self.quads), ', (', ', \n (')
quads = string.replace(quads, '], [', '], \n\n[')
print quads+'\n'+str(self.bnodes)