1 import sys, os
2 thisPath = os.path.dirname(os.path.abspath(__file__))
3 sys.path.append(os.path.abspath(os.path.join(thisPath,"../..")))
4 import Utils.ElementTreeUtils as ETUtils
5
7 input = ETUtils.ETFromObj(input)
8 root = input.getroot()
9 removed = 0
10 preserved = 0
11 for document in root.findall("document"):
12 sentMap = {}
13 for sentence in document.findall("sentence"):
14 sentMap[sentence.get("id")] = sentence
15 connected = set()
16 for interaction in document.getiterator("interaction"):
17 connected.add(interaction.get("e1"))
18 connected.add(interaction.get("e2"))
19 entities = []
20 for entity in document.getiterator("entity"):
21 entities.append(entity)
22 for entity in entities:
23 if entity.get("isName") == "True":
24 continue
25 eId = entity.get("id")
26 if eId not in connected:
27 if eId.find(".s") != -1:
28 sentMap[eId.rsplit(".", 1)[0]].remove(entity)
29 else:
30 document.remove(entity)
31 removed += 1
32 else:
33 preserved += 1
34
35 print >> sys.stderr, "Removed", removed, "entities, preserved", preserved, "entities"
36
37 if output != None:
38 print >> sys.stderr, "Writing output to", output
39 ETUtils.write(root, output)
40 return input
41
42 if __name__=="__main__":
43 import sys
44
45 from optparse import OptionParser
46
47 try:
48 import psyco
49 psyco.full()
50 print >> sys.stderr, "Found Psyco, using"
51 except ImportError:
52 print >> sys.stderr, "Psyco not installed"
53
54 optparser = OptionParser(usage="%prog [options]\n")
55 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in interaction xml format", metavar="FILE")
56 optparser.add_option("-o", "--output", default=None, dest="output", help="Output file in interaction xml format.")
57 (options, args) = optparser.parse_args()
58
59 if options.input == None:
60 print >> sys.stderr, "Error, first input file not defined."
61 optparser.print_help()
62 sys.exit(1)
63 if options.output == None:
64 print >> sys.stderr, "Error, output file not defined."
65 optparser.print_help()
66 sys.exit(1)
67
68 removeUnconnectedEntities(options.input, options.output)
69