1 import sys, os
2 try:
3 import xml.etree.cElementTree as ET
4 except ImportError:
5 import cElementTree as ET
6 import Utils.ElementTreeUtils as ETUtils
7
8 if __name__=="__main__":
9 print >> sys.stderr, "##### Merge named entity types #####"
10 from optparse import OptionParser
11 optparser = OptionParser(usage="%prog [options]\n")
12 optparser.add_option("-i", "--input", default=None, dest="input", help="", metavar="FILE")
13 optparser.add_option("-o", "--output", default=None, dest="output", help="", metavar="FILE")
14 (options, args) = optparser.parse_args()
15
16 print >> sys.stderr, "Loading input file", options.input
17 sourceTree = ET.parse(options.input)
18 sourceRoot = sourceTree.getroot()
19
20 print >> sys.stderr, "Merging named entity types"
21 entities = sourceRoot.getiterator("entity")
22 mergedByType = {}
23 for entity in entities:
24 if entity.attrib.has_key("isName") and entity.attrib["isName"] == "True":
25 if not mergedByType.has_key(entity.attrib["type"]):
26 mergedByType[entity.attrib["type"]] = 0
27 mergedByType[entity.attrib["type"]] += 1
28 entity.attrib["type"] = "Gene/protein/RNA"
29
30 print >> sys.stderr, "Merged:"
31 for k in sorted(mergedByType.keys()):
32 print >> sys.stderr, " " + k + ": " + str(mergedByType[k])
33
34 print >> sys.stderr, "Writing output", options.output
35 ETUtils.write(sourceRoot, options.output)
36