1 from STTools import *
2 from ConvertXML import *
3 import sys, os
4 thisPath = os.path.dirname(os.path.abspath(__file__))
5 sys.path.append(os.path.abspath(os.path.join(thisPath,"../../JariSandbox/ComplexPPI/Source")))
6 import Tools.GeniaSentenceSplitter
7
8 -def roundTrip(input, output, sentenceSplitter, workdir=None):
9 if workdir != None and not os.path.exists(workdir):
10 os.makedirs(workdir)
11 print >> sys.stderr, "Loading STFormat"
12 documents = loadSet(input)
13 print >> sys.stderr, "Converting to Interaction XML"
14 if workdir != None:
15 xml = toInteractionXML(documents, "ER", os.path.join(workdir, "documents.xml"))
16 else:
17 xml = toInteractionXML(documents)
18 print >> sys.stderr, "Splitting Sentences"
19 if workdir != None:
20 sentenceSplitter.makeSentences(xml, os.path.join(workdir, "sentences.xml"), postProcess=True)
21 else:
22 sentenceSplitter.makeSentences(xml)
23 print >> sys.stderr, "Converting back to STFormat"
24 documents = toSTFormat(xml)
25 print >> sys.stderr, "Writing STFormat"
26 writeSet(documents, output)
27
28 if __name__=="__main__":
29 import sys
30
31 from optparse import OptionParser
32
33 try:
34 import psyco
35 psyco.full()
36 print >> sys.stderr, "Found Psyco, using"
37 except ImportError:
38 print >> sys.stderr, "Psyco not installed"
39
40 optparser = OptionParser(description="Convert ST format to interaction XML and back")
41 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in interaction xml format", metavar="FILE")
42 optparser.add_option("-o", "--output", default=None, dest="output", help="Output file in interaction xml format.")
43 optparser.add_option("-w", "--workdir", default=None, dest="workdir", help="Output file in interaction xml format.")
44 (options, args) = optparser.parse_args()
45
46 roundTrip(input=options.input, output=options.output, sentenceSplitter=Tools.GeniaSentenceSplitter, workdir=options.workdir)
47