Package TEES :: Package Utils :: Package InteractionXML :: Module FixAltOffsets
[hide private]

Source Code for Module TEES.Utils.InteractionXML.FixAltOffsets

 1  __version__ = "$Revision: 1.1 $" 
 2   
 3  import sys,os 
 4  try: 
 5      import xml.etree.cElementTree as ET 
 6  except ImportError: 
 7      import cElementTree as ET 
 8  import Utils.ElementTreeUtils as ETUtils 
 9  import Utils.Range as Range 
10   
11  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..") 
12  from Utils.ProgressCounter import ProgressCounter 
13   
14 -def fixAltOffsets(input, output=None):
15 print >> sys.stderr, "Loading corpus", input 16 corpusTree = ETUtils.ETFromObj(input) 17 print >> sys.stderr, "Corpus file loaded" 18 corpusRoot = corpusTree.getroot() 19 20 docCount = 0 21 sentencesCreated = 0 22 sentences = [x for x in corpusRoot.getiterator("sentence")] 23 counter = ProgressCounter(len(sentences), "FixAltOffsets") 24 fixCount = 0 25 # fix spans 26 for sentence in sentences: 27 counter.update(1, "Fixing AltOffsets for sentence ("+sentence.get("id")+"): ") 28 sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset")) 29 for entity in sentence.findall("entity"): 30 altOffsetString = entity.get("altOffset") 31 if altOffsetString == None: 32 continue 33 #print altOffsetString 34 altOffsets = Range.charOffsetToTuples(altOffsetString) 35 assert len(altOffsets) == 1 36 for i in range(len(altOffsets)): 37 altOffset = altOffsets[i] 38 altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0]) 39 entity.set("altOffset", Range.tuplesToCharOffset(altOffsets)) 40 fixCount += 1 41 42 print >> sys.stderr, "Fixed", fixCount, "altOffsets" 43 44 if output != None: 45 print >> sys.stderr, "Writing output to", output 46 ETUtils.write(corpusRoot, output) 47 return corpusTree
48 49 if __name__=="__main__": 50 import sys 51 52 from optparse import OptionParser 53 # Import Psyco if available 54 try: 55 import psyco 56 psyco.full() 57 print >> sys.stderr, "Found Psyco, using" 58 except ImportError: 59 print >> sys.stderr, "Psyco not installed" 60 61 optparser = OptionParser(usage="%prog [options]\n") 62 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in interaction xml format", metavar="FILE") 63 optparser.add_option("-o", "--output", default=None, dest="output", help="Output file in interaction xml format.") 64 (options, args) = optparser.parse_args() 65 66 makeSentences(input=options.input, output=options.output) 67