1 import sys, os, copy
2 extraPath = os.path.dirname(os.path.abspath(__file__))+"/../.."
3 sys.path.append(extraPath)
4 from Utils.ProgressCounter import ProgressCounter
5 try:
6 import xml.etree.cElementTree as ET
7 except ImportError:
8 import cElementTree as ET
9 import Utils.ElementTreeUtils as ETUtils
10
12 for element in parent.getchildren():
13 if element.tag == elementName:
14 for attribute in attributes:
15 if element.get(attribute) != None:
16 del element.attrib[attribute]
17 countsByType[elementName + ":" + attribute] += 1
18 removeAttributes(element, elementName, attributes, countsByType)
19
21 print >> sys.stderr, "Loading corpus file", inputFilename
22 if inputFilename.rsplit(".",1)[-1] == "gz":
23 import gzip
24 corpusTree = ET.parse(gzip.open(inputFilename))
25 else:
26 corpusTree = ET.parse(inputFilename)
27 corpusRoot = corpusTree.getroot()
28
29 countsByType = {}
30 for key in sorted(rules.keys()):
31 for attribute in rules[key]:
32 countsByType[key + ":" + attribute] = 0
33 removeAttributes(corpusRoot, key, rules[key], countsByType)
34
35 print >> sys.stderr, "Removed"
36 for k in sorted(countsByType.keys()):
37 print >> sys.stderr, " " + k + ":", countsByType[k]
38
39 if outputFilename != None:
40 print >> sys.stderr, "Writing output to", outputFilename
41 ETUtils.write(corpusRoot, outputFilename)
42 return corpusTree
43
44 if __name__=="__main__":
45 import sys
46 print >> sys.stderr, "##### Split elements with merged types #####"
47
48 from optparse import OptionParser
49
50 try:
51 import psyco
52 psyco.full()
53 print >> sys.stderr, "Found Psyco, using"
54 except ImportError:
55 print >> sys.stderr, "Psyco not installed"
56
57 optparser = OptionParser(usage="%prog [options]\nPath generator.")
58 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in interaction xml format", metavar="FILE")
59 optparser.add_option("-o", "--output", default=None, dest="output", help="Output file in interaction xml format.")
60 optparser.add_option("-r", "--rules", default=None, dest="rules", help="dictionary of python dictionaries with attribute:value pairs.")
61 (options, args) = optparser.parse_args()
62
63 if options.input == None:
64 print >> sys.stderr, "Error, input file not defined."
65 optparser.print_help()
66 sys.exit(1)
67 if options.output == None:
68 print >> sys.stderr, "Error, output file not defined."
69 optparser.print_help()
70 sys.exit(1)
71
72
73 rules = eval(options.rules)
74 print >> sys.stderr, "Rules:", rules
75 processCorpus(options.input, options.output, rules)
76