1 import sys, os
2 try:
3 import xml.etree.cElementTree as ET
4 except ImportError:
5 import cElementTree as ET
6 import Utils.ElementTreeUtils as ETUtils
7 extraPath = os.path.dirname(os.path.abspath(__file__))+"/../.."
8 sys.path.append(extraPath)
9 from Utils.ProgressCounter import ProgressCounter
10 import Core.SentenceGraph as SentenceGraph
11
12 if __name__=="__main__":
13 print >> sys.stderr, "##### Resolve identity chains #####"
14 from optparse import OptionParser
15 optparser = OptionParser(usage="%prog [options]\n")
16 optparser.add_option("-i", "--input", default=None, dest="input", help="File from which is read the XML-structure from which elements are copied", metavar="FILE")
17 optparser.add_option("-o", "--output", default=None, dest="output", help="The file to which the new XML structure is saved. If None, will be the same as target.", metavar="FILE")
18 optparser.add_option("-t", "--tokenization", default=None, dest="tokenization", help="Tokenization element name")
19 optparser.add_option("-p", "--parse", default=None, dest="parse", help="Parse element name")
20 (options, args) = optparser.parse_args()
21
22 print >> sys.stderr, "Loading input file", options.input
23 corpusElements = SentenceGraph.loadCorpus(options.input, options.parse, options.tokenization)
24
25 counter = ProgressCounter(len(corpusElements.sentences), "Resolving chains")
26 tags = ["e1","e2"]
27 for sentence in corpusElements.sentences:
28 counter.update(1, "Resolving chains for ("+sentence.sentence.attrib["id"]+"): ")
29 identityChainDict = {}
30 tokenHeadScores = sentence.sentenceGraph.getTokenHeadScores()
31 for interaction in sentence.interactions:
32 if interaction.attrib["type"] == "identity":
33 e1 = sentence.entitiesById[interaction.attrib["e1"]]
34 e2 = sentence.entitiesById[interaction.attrib["e2"]]
35 t1 = sentence.sentenceGraph.entityHeadTokenByEntity[e1]
36 t2 = sentence.sentenceGraph.entityHeadTokenByEntity[e2]
37 if tokenHeadScores[t2] > tokenHeadScores[t1]:
38 identityChainDict[interaction.attrib["e1"]] = interaction.attrib["e2"]
39 else:
40 identityChainDict[interaction.attrib["e2"]] = interaction.attrib["e1"]
41 for interaction in sentence.interactions:
42 if interaction.attrib["type"] != "identity":
43 for tag in tags:
44 id = interaction.attrib[tag]
45 while identityChainDict.has_key(id):
46 id = identityChainDict[id]
47 if id != interaction.attrib[tag]:
48 interaction.attrib[tag] = id
49
50 print >> sys.stderr, "Writing output", options.output
51 ETUtils.write(corpusElements.rootElement, options.output)
52