#!/usr/bin/python """Normalizes the NTriples output from CWM.""" __author__ = 'Sean B. Palmer' __license__ = 'Copyright (C) 2001 Sean B. Palmer. GNU GPL 2' import sys, re def normalize(f): s = f.read().replace('\r', '') ls = re.compile(r'"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""', re.S).findall(s) for long in ls: s = s.replace(long, long.replace('\n', '\\n')[2:-2]) s = s.replace('\t', ' ') while s.count(' ') > 0: s = s.replace(' ', ' ') s = '\n'.join([line.strip() for line in s.split('\n')]).strip() while s.count('\n\n') > 0: s = s.replace('\n\n', '\n') s = '\n'.join(filter(lambda x: x[0] != '#', s.split('\n'))).strip() return s if __name__=="__main__": if ('-p' in sys.argv) or ('--p' in sys.argv): print normalize(sys.stdin) elif len(sys.argv) > 1: print normalize(open(sys.argv[1], 'r')) else: print __doc__