Package TEES :: Package Utils :: Package InteractionXML :: Module MergeParse
[hide private]

Source Code for Module TEES.Utils.InteractionXML.MergeParse

 1  __version__ = "$Revision: 1.2 $" 
 2   
 3  try: 
 4      import xml.etree.cElementTree as ET 
 5  except ImportError: 
 6      import cElementTree as ET 
 7  import Utils.ElementTreeUtils as ETUtils 
 8  import sys 
 9  import CorpusElements 
10  from optparse import OptionParser 
11   
12  if __name__=="__main__": 
13      print >> sys.stderr, "##### Merge Parse #####" 
14      # Import Psyco if available 
15      try: 
16          import psyco 
17          psyco.full() 
18          print >> sys.stderr, "Found Psyco, using" 
19      except ImportError: 
20          print >> sys.stderr, "Psyco not installed" 
21   
22      optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.") 
23      optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE") 
24      optparser.add_option("-o", "--output", default=None, dest="output", help="Corpus in analysis format", metavar="FILE") 
25      optparser.add_option("-p", "--parse1", default=None, dest="parse1", help="Parse element name") 
26      optparser.add_option("-q", "--parse2", default=None, dest="parse2", help="Parse element name") 
27      optparser.add_option("-n", "--name", default=None, dest="name", help="New parse element name") 
28      (options, args) = optparser.parse_args() 
29      assert(options.input != None) 
30      assert(options.output != None) 
31       
32      print >> sys.stderr, "Loading input file", options.input 
33      inputRoot = ET.parse(options.input).getroot() 
34      for sentence in inputRoot.getiterator("sentence"): 
35          targetAnalysesElement = sentence.find("sentenceanalyses") 
36          assert(targetAnalysesElement != None) 
37          targetParsesElement = targetAnalysesElement.find("parses") 
38          assert(targetParsesElement != None) 
39          # Check whether parse already exists 
40          targetParseElements = targetParsesElement.findall("parse") 
41          parse1 = None 
42          parse2 = None 
43          for parseElement in targetParseElements: 
44              if parseElement.attrib["parser"] == options.parse1: 
45                  parse1 = parseElement 
46              elif parseElement.attrib["parser"] == options.parse2: 
47                  parse2 = parseElement 
48          assert(parse1 != parse2 and parse1 != None and parse2 != None) 
49   
50          targetTokenizationsElement = targetAnalysesElement.find("tokenizations") 
51          assert(targetTokenizationsElement != None) 
52          tokenization1 = None 
53          tokenization2 = None 
54          for tokenizationElement in targetTokenizationsElement.findall("tokenization"): 
55              if tokenizationElement.attrib["tokenizer"] == parse1.attrib["tokenizer"]: 
56                  tokenization1 = tokenizationElement 
57              if tokenizationElement.attrib["tokenizer"] == parse2.attrib["tokenizer"]: 
58                  tokenization2 = tokenizationElement 
59          assert(tokenization1 == tokenization2 and tokenization1 != None and tokenization2 != None) 
60  #        if tokenization1 != tokenization2: 
61  #            tokens1 = tokenization1.findall("token") 
62  #            tokens2 = tokenization1.findall("token") 
63  #            assert(len(tokens1) == len(tokens2)) 
64  #            for i in range(len(tokens1)): 
65          newParse = ET.Element("parse") 
66          newParse.attrib["parser"] = options.name 
67          newParse.attrib["tokenizer"] = tokenization1.attrib["tokenizer"] 
68          for dependency in parse1.findall("dependency"): 
69              newParse.append(dependency) 
70          for dependency in parse2.findall("dependency"): 
71              newParse.append(dependency) 
72          targetParsesElement.append(newParse) 
73      print >> sys.stderr, "Writing output to", options.output 
74      ETUtils.write(inputRoot, options.output) 
75