Package TEES :: Package Utils :: Package InteractionXML :: Module MergeNamedEntityTypes
[hide private]

Source Code for Module TEES.Utils.InteractionXML.MergeNamedEntityTypes

 1  import sys, os 
 2  try: 
 3      import xml.etree.cElementTree as ET 
 4  except ImportError: 
 5      import cElementTree as ET 
 6  import Utils.ElementTreeUtils as ETUtils 
 7   
 8  if __name__=="__main__": 
 9      print >> sys.stderr, "##### Merge named entity types #####" 
10      from optparse import OptionParser 
11      optparser = OptionParser(usage="%prog [options]\n") 
12      optparser.add_option("-i", "--input", default=None, dest="input", help="", metavar="FILE") 
13      optparser.add_option("-o", "--output", default=None, dest="output", help="", metavar="FILE") 
14      (options, args) = optparser.parse_args() 
15   
16      print >> sys.stderr, "Loading input file", options.input 
17      sourceTree = ET.parse(options.input) 
18      sourceRoot = sourceTree.getroot() 
19       
20      print >> sys.stderr, "Merging named entity types" 
21      entities = sourceRoot.getiterator("entity") 
22      mergedByType = {} 
23      for entity in entities: 
24          if entity.attrib.has_key("isName") and entity.attrib["isName"] == "True": 
25              if not mergedByType.has_key(entity.attrib["type"]): 
26                  mergedByType[entity.attrib["type"]] = 0 
27              mergedByType[entity.attrib["type"]] += 1 
28              entity.attrib["type"] = "Gene/protein/RNA" 
29       
30      print >> sys.stderr, "Merged:" 
31      for k in sorted(mergedByType.keys()): 
32          print >> sys.stderr, "  " + k + ": " + str(mergedByType[k]) 
33               
34      print >> sys.stderr, "Writing output", options.output 
35      ETUtils.write(sourceRoot, options.output) 
36