1 __version__ = "$Revision: 1.2 $"
2
3 try:
4 import xml.etree.cElementTree as ET
5 except ImportError:
6 import cElementTree as ET
7 import Utils.ElementTreeUtils as ETUtils
8 import sys
9 import CorpusElements
10 from optparse import OptionParser
11
12 if __name__=="__main__":
13 print >> sys.stderr, "##### Merge Parse #####"
14
15 try:
16 import psyco
17 psyco.full()
18 print >> sys.stderr, "Found Psyco, using"
19 except ImportError:
20 print >> sys.stderr, "Psyco not installed"
21
22 optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.")
23 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE")
24 optparser.add_option("-o", "--output", default=None, dest="output", help="Corpus in analysis format", metavar="FILE")
25 optparser.add_option("-p", "--parse1", default=None, dest="parse1", help="Parse element name")
26 optparser.add_option("-q", "--parse2", default=None, dest="parse2", help="Parse element name")
27 optparser.add_option("-n", "--name", default=None, dest="name", help="New parse element name")
28 (options, args) = optparser.parse_args()
29 assert(options.input != None)
30 assert(options.output != None)
31
32 print >> sys.stderr, "Loading input file", options.input
33 inputRoot = ET.parse(options.input).getroot()
34 for sentence in inputRoot.getiterator("sentence"):
35 targetAnalysesElement = sentence.find("sentenceanalyses")
36 assert(targetAnalysesElement != None)
37 targetParsesElement = targetAnalysesElement.find("parses")
38 assert(targetParsesElement != None)
39
40 targetParseElements = targetParsesElement.findall("parse")
41 parse1 = None
42 parse2 = None
43 for parseElement in targetParseElements:
44 if parseElement.attrib["parser"] == options.parse1:
45 parse1 = parseElement
46 elif parseElement.attrib["parser"] == options.parse2:
47 parse2 = parseElement
48 assert(parse1 != parse2 and parse1 != None and parse2 != None)
49
50 targetTokenizationsElement = targetAnalysesElement.find("tokenizations")
51 assert(targetTokenizationsElement != None)
52 tokenization1 = None
53 tokenization2 = None
54 for tokenizationElement in targetTokenizationsElement.findall("tokenization"):
55 if tokenizationElement.attrib["tokenizer"] == parse1.attrib["tokenizer"]:
56 tokenization1 = tokenizationElement
57 if tokenizationElement.attrib["tokenizer"] == parse2.attrib["tokenizer"]:
58 tokenization2 = tokenizationElement
59 assert(tokenization1 == tokenization2 and tokenization1 != None and tokenization2 != None)
60
61
62
63
64
65 newParse = ET.Element("parse")
66 newParse.attrib["parser"] = options.name
67 newParse.attrib["tokenizer"] = tokenization1.attrib["tokenizer"]
68 for dependency in parse1.findall("dependency"):
69 newParse.append(dependency)
70 for dependency in parse2.findall("dependency"):
71 newParse.append(dependency)
72 targetParsesElement.append(newParse)
73 print >> sys.stderr, "Writing output to", options.output
74 ETUtils.write(inputRoot, options.output)
75