Package TEES :: Package Utils :: Package InteractionXML :: Module DeleteAttributes
[hide private]

Source Code for Module TEES.Utils.InteractionXML.DeleteAttributes

 1  import sys, os, copy 
 2  extraPath = os.path.dirname(os.path.abspath(__file__))+"/../.." 
 3  sys.path.append(extraPath) 
 4  from Utils.ProgressCounter import ProgressCounter 
 5  try: 
 6      import xml.etree.cElementTree as ET 
 7  except ImportError: 
 8      import cElementTree as ET 
 9  import Utils.ElementTreeUtils as ETUtils 
10       
11 -def removeAttributes(parent, elementName, attributes, countsByType):
12 for element in parent.getchildren(): 13 if element.tag == elementName: 14 for attribute in attributes: 15 if element.get(attribute) != None: 16 del element.attrib[attribute] 17 countsByType[elementName + ":" + attribute] += 1 18 removeAttributes(element, elementName, attributes, countsByType)
19
20 -def processCorpus(inputFilename, outputFilename, rules):
21 print >> sys.stderr, "Loading corpus file", inputFilename 22 if inputFilename.rsplit(".",1)[-1] == "gz": 23 import gzip 24 corpusTree = ET.parse(gzip.open(inputFilename)) 25 else: 26 corpusTree = ET.parse(inputFilename) 27 corpusRoot = corpusTree.getroot() 28 29 countsByType = {} 30 for key in sorted(rules.keys()): 31 for attribute in rules[key]: 32 countsByType[key + ":" + attribute] = 0 33 removeAttributes(corpusRoot, key, rules[key], countsByType) 34 35 print >> sys.stderr, "Removed" 36 for k in sorted(countsByType.keys()): 37 print >> sys.stderr, " " + k + ":", countsByType[k] 38 39 if outputFilename != None: 40 print >> sys.stderr, "Writing output to", outputFilename 41 ETUtils.write(corpusRoot, outputFilename) 42 return corpusTree
43 44 if __name__=="__main__": 45 import sys 46 print >> sys.stderr, "##### Split elements with merged types #####" 47 48 from optparse import OptionParser 49 # Import Psyco if available 50 try: 51 import psyco 52 psyco.full() 53 print >> sys.stderr, "Found Psyco, using" 54 except ImportError: 55 print >> sys.stderr, "Psyco not installed" 56 57 optparser = OptionParser(usage="%prog [options]\nPath generator.") 58 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in interaction xml format", metavar="FILE") 59 optparser.add_option("-o", "--output", default=None, dest="output", help="Output file in interaction xml format.") 60 optparser.add_option("-r", "--rules", default=None, dest="rules", help="dictionary of python dictionaries with attribute:value pairs.") 61 (options, args) = optparser.parse_args() 62 63 if options.input == None: 64 print >> sys.stderr, "Error, input file not defined." 65 optparser.print_help() 66 sys.exit(1) 67 if options.output == None: 68 print >> sys.stderr, "Error, output file not defined." 69 optparser.print_help() 70 sys.exit(1) 71 72 # Rules e.g. "{\"pair\":{},\"interaction\":{},\"entity\":{\"isName\":\"False\"}}" 73 rules = eval(options.rules) 74 print >> sys.stderr, "Rules:", rules 75 processCorpus(options.input, options.output, rules) 76