import sys
import string
import re
import util
import rdfstore

S, P, O, C = 0, 1, 2, 3
V, T = 0, 1
URI = 'tag:infomesh.net,2001-08-07:URI'
LIT = 'tag:infomesh.net,2001-08-07:Literal'
ANON = 'tag:infomesh.net,2001-08-07:Anon'
RC = 'tag:infomesh.net,2001-08-07:RootContext'
CONT = 'tag:infomesh.net,2001-08-07:Context' # For extensibility

class NTriples(rdfstore.RDFStore): 
   """An NTriples document.
      cf. http://www.w3.org/TR/2001/WD-rdf-testcases-20010912/#ntriples"""

   def __init__(self):
      self.document = None
      rdfstore.RDFStore.__init__(self, data=[])
      # self.quads = rdfstore.RDFStore.quads
      self.t = r'(<[^>]+>|_:[^\s]+|\"(?:\\\"|[^"])*\")'
      self.eol = r'[ \t]*.[ \t]*'
      self.rt = r'[ \t]*'+self.t+r'[ \t]+'+self.t+r'[ \t]+'+self.t+self.eol
      self.regt = re.compile(self.rt, re.S)
      self.comment = r'([ \t]*\#[^\n])|([ \t]+)'
      self.regc = re.compile(self.comment, re.S)

   def parsen(self, fn):
      """Parse an NTriples document passed as a file name."""
      f = open(fn, 'r'); self.parse(f); f.close()

   def parse(self, f):
      self.document = f.read() # Read the file into self.document
      self.normnl() # Normalize the new lines in self.document
      self.parsent() # Parse self.document into self.quads

   def normnl(self):
      """Normalizes the newlines within a document."""
      if len(self.document) == 0: raise 'Document has no content'
      else:
         self.document = string.replace(self.document, '\r\n', '\n')
         self.document = string.replace(self.document, '\r', '\n')

   def parsent(self):
      """Parse the document into self.quads"""
      if '\n' in self.document: lines = string.split(self.document, '\n')
      else: lines = [self.document]
      self.CurrentContext = util.generatecontext()
      self.rand = util.generateint()
      # print 'document: '+self.document
      # print 'lines: '+str(lines)
      # print 'len(lines)'+str(len(lines))
      for line in lines: 
         if len(line) == 0: continue # line has no content (a double '\n')
         self.appendq(line)

   def appendq(self, line):
      """Parses and validates a line, and then adds it to self.quads"""
      # print str(line)
      if self.regt.match(line):
         terms = self.regt.findall(line)[0]
         for term in terms: 
            if term[0] == '<' and term[-1] == '>': # Term is a URI-view
               self.CurrentQuad.append((term[1:-1], URI))
            elif term[:2] == '_:': # Term is an unlabelled node
               bnode = term[2:]
               # This filters out incorrect bNodes! CWM produced these...
               if re.compile(r'[A-Za-z][A-Za-z0-9]*', re.S).match(bnode): 
                  # Check the bnode isn't already used in a different context
                  bnode = self.correctbnode(bnode)
                  self.bnodes.append((bnode, self.CurrentContext))
                  self.CurrentQuad.append((bnode, ANON))
               else: raise 'bnode: "'+str(term[2:])+'" is not a valid bnode'
            elif term[0] == '"' and term[-1] == '"': 
               self.CurrentQuad.append((util.unescape(term[1:-1]), LIT))
            else: raise 'Term '+str(term)+' is not a valid NTriples term.'
         self.CurrentQuad.append((self.CurrentContext, RC))
         self.add(self.CurrentQuad)
         self.CurrentQuad = [] # Reset the current quad!
      elif self.regc.match(line): pass # Line is just whitespace, or is a comment
      else: raise 'line: "'+line+'" isn\'t fine' # Validity error!

   def correctbnode(self, bnode): 
      """This checks to see if a bnode is already in the store under a different 
         root context, and if so modifies the bnode recursively until it fits."""
      add = 0 # This flags whether we should modify the bnode
      for ac in self.bnodes: 
         if ac[0] == bnode and ac[1] != self.CurrentContext: add = 1
         else: continue
      if add == 1: 
         bnode = bnode+self.rand # Must be consistent!
         bnode = self.correctbnode(bnode)
      else: pass
      return bnode

   def validaten(self, fn):
      """Parse an NTriples document passed as a file name."""
      f = open(fn, 'r'); self.validate(f); f.close()

   def validate(self, f):
      self.document = f.read() # Read the file into self.document
      self.normnl(); self.validatent()

   def validatent(self): 
      """Validate the document"""
      if '\n' in self.document: lines = string.split(self.document, '\n')
      else: lines = self.document
      for line in lines: 
         if len(line) == 0: continue
      if self.regt.match(line):
         terms = self.regt.findall(line)[0]
         for term in terms: 
            if term[:2] == '_:': 
               bnode = term[2:]
               if re.compile(r'[A-Za-z][A-Za-z0-9]*', re.S).match(bnode): pass
               else: raise 'bnode: "'+str(term[2:])+'" is not a valid bnode'
            else: raise 'Term '+str(term)+' is not a valid NTriples term.'
      elif self.regc.match(line): pass
      else: raise 'line: "'+line+'" isn\'t fine'
      print self.document

def run(): 
   x = NTriples()
   x.parsen(sys.argv[1])
   sys.argv.append('')
   if sys.argv[2] == '-xrdf':
      print x.xrdf()
   else: x.printquads()

# Main program

if __name__ == "__main__":
    run()

# Phew