1 import sys
2 from SentenceExampleWriter import SentenceExampleWriter
3 import Utils.InteractionXML.IDUtils as IDUtils
4 try:
5 import xml.etree.cElementTree as ET
6 except ImportError:
7 import cElementTree as ET
8 import Utils.Libraries.combine as combine
9
14
15 - def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None):
16 sentenceElement = sentenceObject.sentence
17 self.sentenceId = sentenceElement.get("id")
18 self.assertSameSentence(examples, self.sentenceId)
19
20 sentenceAnalysesElement = None
21 sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
22 if sentenceAnalysesElement == None:
23 sentenceAnalysesElement = sentenceElement.find("analyses")
24 if sentenceAnalysesElement != None:
25 sentenceElement.remove(sentenceAnalysesElement)
26
27
28 interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
29
30 entities = self.removeNonNameEntities(sentenceElement)
31
32
33 interactionsToKeep = []
34 for interaction in interactions:
35 if interaction.get("type") != "neg":
36 interactionsToKeep.append(interaction)
37 interactions = interactionsToKeep
38
39
40 cutoff = 100
41
42 if len(interactions) > cutoff:
43
44 if sentenceAnalysesElement != None:
45 sentenceElement.append(sentenceAnalysesElement)
46
47 print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has more than", cutoff, "interactions, removing all."
48 return
49
50 interactionsByEntity = {}
51 interactionsById = {}
52 for entity in entities:
53 interactionsByEntity[entity.get("id")] = []
54 for interaction in interactions:
55 e1Id = interaction.get("e1")
56 if not interactionsByEntity.has_key(e1Id):
57 interactionsByEntity[e1Id] = []
58 interactionsByEntity[e1Id].append(interaction)
59 interactionsById[interaction.get("id")] = interaction
60
61
62 self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
63 self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
64 self.newEntities = []
65 self.newInteractions = []
66
67
68 self.entitiesByHeadByType = {}
69
70
71
72
73 for entity in sentenceObject.entities:
74
75 offset = entity.get("headOffset")
76 if not self.entitiesByHeadByType.has_key(offset):
77 self.entitiesByHeadByType[offset] = {}
78
79 eType = entity.get("type")
80 if entity.get("isName") != "True":
81 self.entitiesByHeadByType[offset][eType] = []
82 else:
83 if not self.entitiesByHeadByType[offset].has_key(eType):
84 self.entitiesByHeadByType[offset][eType] = []
85 self.entitiesByHeadByType[offset][eType].append(entity)
86
87 entityKeys = sentenceObject.entitiesById.keys()
88 exampleByEntityId = {}
89 for example in examples:
90
91
92 eId = example[3]["e"]
93 assert eId in entityKeys
94 if not exampleByEntityId.has_key(eId):
95 exampleByEntityId[eId] = []
96 exampleByEntityId[eId].append(example)
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 argumentsByExample = {}
132 positiveExamples = []
133 exampleIdCount = 0
134 for entity in entities:
135
136 if entity.get("id") not in exampleByEntityId:
137 simpleEventInteractions = interactionsByEntity[entity.get("id")]
138 numCauses = 0
139 numThemes = 0
140 for interaction in simpleEventInteractions[:]:
141 if self.isIntersentence(interaction):
142 print "Warning, intersentence interaction for", entity.get("id"), entity.get("type")
143 simpleEventInteractions.remove(interaction)
144 continue
145 if interaction.get("type") == "neg":
146 simpleEventInteractions.remove(interaction)
147 continue
148 iType = interaction.get("type")
149 if iType == "Cause":
150 numCauses += 1
151 elif iType == "Theme":
152 numThemes += 1
153 eType = entity.get("type")
154 assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType != "Binding"), (numThemes,numCauses,eType,entity.get("id"), [x[0] for x in examples], entityKeys)
155
156 for interaction in simpleEventInteractions:
157 self.counts["simple-" + eType + "-" + interaction.get("type")] += 1
158 exampleId = "simple." + str(exampleIdCount)
159 exampleIdCount += 1
160 positiveExamples.append([exampleId,None,None,None])
161 argumentsByExample[exampleId] = [interaction]
162
163
164
165 for example in examples:
166
167 if predictionsByExample[example[0]][0] == 1:
168 continue
169 positiveExamples.append(example)
170 arguments = []
171 for iId in example[3]["i"].split(","):
172 if iId == "":
173 assert "etype" in example[3], example[3]
174 assert example[3]["etype"] == "Process", example[3]
175 break
176 arg = interactionsById[iId]
177 if self.isIntersentence(arg):
178 continue
179 assert arg.get("type") != "neg"
180 arguments.append(arg)
181 argumentsByExample[example[0]] = arguments
182
183
184
185
186
187
188 examplesLeft = len(positiveExamples)
189 exampleAdded = {}
190 for example in positiveExamples:
191 exampleAdded[example[0]] = False
192 forceAdd = False
193 forcedCount = 0
194 while examplesLeft > 0:
195 if len(self.newEntities) > 100:
196 print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has generated more than", cutoff, "events, skipping the rest."
197 break
198 examplesAddedThisRound = 0
199
200 for example in positiveExamples:
201 if len(self.newEntities) > 100:
202 break
203 if exampleAdded[example[0]]:
204 continue
205 arguments = argumentsByExample[example[0]]
206
207
208
209 if forceAdd or self.argumentEntitiesExist(arguments, sentenceObject):
210 umType = "complex"
211 predictionStrength = None
212 if example[0].find("simple") != -1:
213 umType = "simple"
214 else:
215
216 predictionStrength = self.getPredictionStrength(example, predictionsByExample, classSet, classIds)
217
218 if umType != "simple" and "etype" in example[3] and example[3]["etype"] == "Process" and len(arguments) == 0:
219 origProcess = sentenceObject.entitiesById[example[3]["e"]]
220
221 newProcess = self.addEntity(origProcess)
222 newProcess.set("umType", umType)
223 if predictionStrength != None:
224 newProcess.set("umStrength", str(predictionStrength))
225 else:
226 self.addEvent(arguments, sentenceObject, umType, forceAdd, predictionStrength, exampleNotes=example[3])
227 exampleAdded[example[0]] = True
228 examplesLeft -= 1
229 examplesAddedThisRound += 1
230 forceAdd = False
231 if examplesLeft > 0 and examplesAddedThisRound == 0:
232
233
234
235
236
237
238
239 forcedCount += 1
240
241 forceAdd = True
242
243
244 for element in self.newEntities + self.newInteractions:
245 sentenceElement.append(element)
246
247
248 if sentenceAnalysesElement != None:
249 sentenceElement.append(sentenceAnalysesElement)
250
251
252
254 """
255 Checks whether entity elements have already been created
256 for the argument entities, i.e. whether the argument events
257 have been inserted.
258 """
259 for arg in arguments:
260 e2Id = arg.get("e2")
261 origE2 = sentenceObject.entitiesById[e2Id]
262 e2HeadOffset = origE2.get("headOffset")
263 e2Type = origE2.get("type")
264 if len(self.entitiesByHeadByType[e2HeadOffset][e2Type]) == 0:
265 return False
266 return True
267
268 - def addEvent(self, arguments, sentenceObject, umType="unknown", forceAdd=False, predictionStrength=None, exampleNotes=None):
269 assert len(arguments) > 0, (sentenceObject.sentence.get("id"), exampleNotes)
270
271 e1Id = None
272 origE1 = None
273 argEntities = [[]] * (len(arguments))
274 for i in range(len(arguments)):
275 arg = arguments[i]
276 argE1Id = arg.get("e1")
277
278 if e1Id != None:
279 assert e1Id == argE1Id
280 else:
281 e1Id = argE1Id
282 origE1 = sentenceObject.entitiesById[argE1Id]
283
284 e2Id = arg.get("e2")
285 origE2 = sentenceObject.entitiesById[e2Id]
286 e2HeadOffset = origE2.get("headOffset")
287 e2Type = origE2.get("type")
288 argEntities[i] = self.entitiesByHeadByType[e2HeadOffset][e2Type]
289 if len(argEntities[i]) == 0:
290 assert forceAdd
291 if origE2.get("isName") != "True":
292 argEntities[i] = [self.addEntity(origE2)]
293 else:
294 argEntities[i] = origE2
295
296 entityCombinations = combine.combine(*argEntities)
297 for combination in entityCombinations:
298 assert origE1 != None, (sentenceObject.sentence.get("id"), exampleNotes, [(x.get("id"), x.get("e1"), x.get("e2")) for x in arguments])
299 root = self.addEntity(origE1)
300 root.set("umType", umType)
301 if predictionStrength != None:
302 root.set("umStrength", str(predictionStrength))
303 for i in range(len(arguments)):
304 self.addInteraction(root, combination[i], arguments[i])
305
307 entityElement = ET.Element("entity")
308 assert entity.get("isName") != "True", entity.attrib
309 entityElement.set("isName", "False")
310 entityElement.set("charOffset", entity.get("charOffset"))
311 entityElement.set("headOffset", entity.get("headOffset"))
312 entityElement.set("text", entity.get("text"))
313 entityElement.set("id", self.sentenceId + ".e" + str(self.entityCount))
314 entityElement.set("type", entity.get("type"))
315 if entity.get("predictions") != None:
316 entityElement.set("predictions", entity.get("predictions"))
317
318 eType = entityElement.get("type")
319 headOffset = entityElement.get("headOffset")
320 if not self.entitiesByHeadByType[headOffset].has_key(eType):
321 self.entitiesByHeadByType[headOffset][eType] = []
322 self.entitiesByHeadByType[headOffset][eType].append(entityElement)
323 self.newEntities.append(entityElement)
324 self.entityCount += 1
325
326 return entityElement
327
329 interactionElement = ET.Element("interaction")
330 interactionElement.attrib["directed"] = "Unknown"
331 interactionElement.attrib["e1"] = e1.get("id")
332 interactionElement.attrib["e2"] = e2.get("id")
333 interactionElement.attrib["id"] = self.sentenceId + ".i" + str(self.interactionCount)
334 interactionElement.set("type", arg.get("type"))
335 if arg.get("predictions") != None:
336 interactionElement.set("predictions", arg.get("predictions"))
337 self.newInteractions.append(interactionElement)
338 self.interactionCount += 1
339
340 return interactionElement
341
343 e1MajorId, e1MinorId = interaction.get("e1").rsplit(".e", 1)
344 e2MajorId, e2MinorId = interaction.get("e2").rsplit(".e", 1)
345 return e1MajorId != e2MajorId
346
348 prediction = predictionsByExample[example[0]]
349 if len(prediction) == 1:
350 return 0
351 predClass = prediction[0]
352
353 predictionStrength = self.getPredictionStrengthString(prediction, classSet, classIds)
354 return predictionStrength
355