Package TEES :: Package Utils :: Package InteractionXML :: Module ResolveIdentityChains
[hide private]

Source Code for Module TEES.Utils.InteractionXML.ResolveIdentityChains

 1  import sys, os 
 2  try: 
 3      import xml.etree.cElementTree as ET 
 4  except ImportError: 
 5      import cElementTree as ET 
 6  import Utils.ElementTreeUtils as ETUtils 
 7  extraPath = os.path.dirname(os.path.abspath(__file__))+"/../.." 
 8  sys.path.append(extraPath) 
 9  from Utils.ProgressCounter import ProgressCounter 
10  import Core.SentenceGraph as SentenceGraph 
11   
12  if __name__=="__main__": 
13      print >> sys.stderr, "##### Resolve identity chains #####" 
14      from optparse import OptionParser 
15      optparser = OptionParser(usage="%prog [options]\n") 
16      optparser.add_option("-i", "--input", default=None, dest="input", help="File from which is read the XML-structure from which elements are copied", metavar="FILE") 
17      optparser.add_option("-o", "--output", default=None, dest="output", help="The file to which the new XML structure is saved. If None, will be the same as target.", metavar="FILE") 
18      optparser.add_option("-t", "--tokenization", default=None, dest="tokenization", help="Tokenization element name") 
19      optparser.add_option("-p", "--parse", default=None, dest="parse", help="Parse element name") 
20      (options, args) = optparser.parse_args() 
21   
22      print >> sys.stderr, "Loading input file", options.input 
23      corpusElements = SentenceGraph.loadCorpus(options.input, options.parse, options.tokenization) 
24       
25      counter = ProgressCounter(len(corpusElements.sentences), "Resolving chains") 
26      tags = ["e1","e2"] 
27      for sentence in corpusElements.sentences: 
28          counter.update(1, "Resolving chains for ("+sentence.sentence.attrib["id"]+"): ") 
29          identityChainDict = {} 
30          tokenHeadScores = sentence.sentenceGraph.getTokenHeadScores() 
31          for interaction in sentence.interactions: 
32              if interaction.attrib["type"] == "identity": 
33                  e1 = sentence.entitiesById[interaction.attrib["e1"]] 
34                  e2 = sentence.entitiesById[interaction.attrib["e2"]] 
35                  t1 = sentence.sentenceGraph.entityHeadTokenByEntity[e1] 
36                  t2 = sentence.sentenceGraph.entityHeadTokenByEntity[e2] 
37                  if tokenHeadScores[t2] > tokenHeadScores[t1]: 
38                      identityChainDict[interaction.attrib["e1"]] = interaction.attrib["e2"] 
39                  else: 
40                      identityChainDict[interaction.attrib["e2"]] = interaction.attrib["e1"] 
41          for interaction in sentence.interactions: 
42              if interaction.attrib["type"] != "identity": 
43                  for tag in tags: 
44                      id = interaction.attrib[tag] 
45                      while identityChainDict.has_key(id): 
46                          id = identityChainDict[id] 
47                      if id != interaction.attrib[tag]: 
48                          interaction.attrib[tag] = id 
49       
50      print >> sys.stderr, "Writing output", options.output 
51      ETUtils.write(corpusElements.rootElement, options.output) 
52