Package TEES :: Package ExampleWriters :: Module UnmergingExampleWriter
[hide private]

Source Code for Module TEES.ExampleWriters.UnmergingExampleWriter

  1  import sys 
  2  from SentenceExampleWriter import SentenceExampleWriter 
  3  import Utils.InteractionXML.IDUtils as IDUtils 
  4  try: 
  5      import xml.etree.cElementTree as ET 
  6  except ImportError: 
  7      import cElementTree as ET 
  8  import Utils.Libraries.combine as combine 
  9   
10 -class UnmergingExampleWriter(SentenceExampleWriter):
11 - def __init__(self):
12 self.xType = "um" 13 SentenceExampleWriter.__init__(self)
14
15 - def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None):
16 sentenceElement = sentenceObject.sentence 17 self.sentenceId = sentenceElement.get("id") 18 self.assertSameSentence(examples, self.sentenceId) 19 # detach analyses-element 20 sentenceAnalysesElement = None 21 sentenceAnalysesElement = sentenceElement.find("sentenceanalyses") 22 if sentenceAnalysesElement == None: 23 sentenceAnalysesElement = sentenceElement.find("analyses") 24 if sentenceAnalysesElement != None: 25 sentenceElement.remove(sentenceAnalysesElement) 26 27 # remove pairs and interactions 28 interactions = self.removeChildren(sentenceElement, ["pair", "interaction"]) 29 # remove entities 30 entities = self.removeNonNameEntities(sentenceElement) 31 32 # filter interactions 33 interactionsToKeep = [] 34 for interaction in interactions: 35 if interaction.get("type") != "neg": 36 interactionsToKeep.append(interaction) 37 interactions = interactionsToKeep 38 39 # early out 40 cutoff = 100 41 #if len(interactions) == 0 or len(interactions) > cutoff: 42 if len(interactions) > cutoff: 43 # re-attach the analyses-element 44 if sentenceAnalysesElement != None: 45 sentenceElement.append(sentenceAnalysesElement) 46 #if len(interactions) > cutoff: 47 print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has more than", cutoff, "interactions, removing all." 48 return 49 50 interactionsByEntity = {} 51 interactionsById = {} 52 for entity in entities: 53 interactionsByEntity[entity.get("id")] = [] 54 for interaction in interactions: 55 e1Id = interaction.get("e1") 56 if not interactionsByEntity.has_key(e1Id): 57 interactionsByEntity[e1Id] = [] 58 interactionsByEntity[e1Id].append(interaction) 59 interactionsById[interaction.get("id")] = interaction 60 61 # NOTE! Following won't work for pairs 62 self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity")) 63 self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction")) 64 self.newEntities = [] 65 self.newInteractions = [] 66 67 # Mapping for connecting the events 68 self.entitiesByHeadByType = {} 69 #self.tokenByOffset = {} 70 #for token in sentenceObject.tokens: 71 # self.tokenByOffset[token.get("charOffset")] = token 72 # self.entityByHeadByType[token.get("charOffset")] = {} 73 for entity in sentenceObject.entities: 74 # by offset 75 offset = entity.get("headOffset") 76 if not self.entitiesByHeadByType.has_key(offset): 77 self.entitiesByHeadByType[offset] = {} 78 # by type 79 eType = entity.get("type") 80 if entity.get("isName") != "True": 81 self.entitiesByHeadByType[offset][eType] = [] 82 else: # add names to structure 83 if not self.entitiesByHeadByType[offset].has_key(eType): 84 self.entitiesByHeadByType[offset][eType] = [] 85 self.entitiesByHeadByType[offset][eType].append(entity) 86 87 entityKeys = sentenceObject.entitiesById.keys() 88 exampleByEntityId = {} 89 for example in examples: 90 #if predictionsByExample[example[0]][0] == 1: # negative 91 # continue 92 eId = example[3]["e"] 93 assert eId in entityKeys 94 if not exampleByEntityId.has_key(eId): 95 exampleByEntityId[eId] = [] 96 exampleByEntityId[eId].append(example) 97 98 # This doesn't work, it was an attempt to include 99 # only the positive example with the highest prediction strength 100 # for key in sorted(exampleByEntityId.keys()): 101 # eType = sentenceObject.entitiesById[key].get("type") 102 # eExamples = exampleByEntityId[key] 103 # if eType == "Binding" and len(eExamples) > 1: 104 # maxArgs = -1 105 # maxStr = -999999999 106 # for example in eExamples: 107 # if predictionsByExample[example[0]][0] == 1: 108 # continue 109 # numArgs = example[3]["i"].count(",") + 1 110 # if numArgs > maxArgs: 111 # maxArgs = numArgs 112 # predClass = predictionsByExample[example[0]][0] 113 # predictionStrength = predictionsByExample[example[0]][predClass] 114 # if predictionStrength > maxStr: 115 # maxStr = predictionStrength 116 # #print maxArgs, len(eExamples) 117 # for example in eExamples: 118 # if predictionsByExample[example[0]][0] == 1: 119 # continue 120 # predClass = predictionsByExample[example[0]][0] 121 # predictionStrength = predictionsByExample[example[0]][predClass] 122 # if predictionStrength != maxStr: 123 # examples.remove(example) 124 # #if example[3]["i"].count(",") + 1 < maxArgs: 125 # # examples.remove(example) 126 127 #self.newEntitiesById = {} 128 #self.outEdgesByEntity = {} 129 130 # Gather arguments for the simple, one-argument events 131 argumentsByExample = {} 132 positiveExamples = [] 133 exampleIdCount = 0 134 for entity in entities: 135 # If no example, case is unambiguous 136 if entity.get("id") not in exampleByEntityId: 137 simpleEventInteractions = interactionsByEntity[entity.get("id")] 138 numCauses = 0 139 numThemes = 0 140 for interaction in simpleEventInteractions[:]: 141 if self.isIntersentence(interaction): 142 print "Warning, intersentence interaction for", entity.get("id"), entity.get("type") 143 simpleEventInteractions.remove(interaction) 144 continue 145 if interaction.get("type") == "neg": 146 simpleEventInteractions.remove(interaction) 147 continue 148 iType = interaction.get("type") 149 if iType == "Cause": 150 numCauses += 1 151 elif iType == "Theme": 152 numThemes += 1 153 eType = entity.get("type") 154 assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType != "Binding"), (numThemes,numCauses,eType,entity.get("id"), [x[0] for x in examples], entityKeys) 155 #assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType == "Binding"), (numThemes,numCauses,eType,entity.get("id")) 156 for interaction in simpleEventInteractions: 157 self.counts["simple-" + eType + "-" + interaction.get("type")] += 1 158 exampleId = "simple." + str(exampleIdCount) 159 exampleIdCount += 1 160 positiveExamples.append([exampleId,None,None,None]) 161 argumentsByExample[exampleId] = [interaction] 162 #self.addEvent([interaction], sentenceObject, "simple") 163 164 # Gather arguments for predicted, unmerged events 165 for example in examples: 166 #print predictionsByExample[example[0]] 167 if predictionsByExample[example[0]][0] == 1: # negative 168 continue 169 positiveExamples.append(example) 170 arguments = [] 171 for iId in example[3]["i"].split(","): 172 if iId == "": # processes can have 0 arguments 173 assert "etype" in example[3], example[3] 174 assert example[3]["etype"] == "Process", example[3] 175 break 176 arg = interactionsById[iId] 177 if self.isIntersentence(arg): 178 continue 179 assert arg.get("type") != "neg" 180 arguments.append(arg) 181 argumentsByExample[example[0]] = arguments 182 183 # Loop until all positive examples are added. This process 184 # assumes that the events (mostly) form a directed acyclic 185 # graph, which can written by "growing" the structure from 186 # the "leaf" events, and consecutively adding levels of 187 # nesting events. 188 examplesLeft = len(positiveExamples) 189 exampleAdded = {} 190 for example in positiveExamples: 191 exampleAdded[example[0]] = False 192 forceAdd = False 193 forcedCount = 0 194 while examplesLeft > 0: 195 if len(self.newEntities) > 100: 196 print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has generated more than", cutoff, "events, skipping the rest." 197 break 198 examplesAddedThisRound = 0 199 # For each round, loop through the potentially remaining examples 200 for example in positiveExamples: 201 if len(self.newEntities) > 100: 202 break 203 if exampleAdded[example[0]]: # This event has already been inserted 204 continue 205 arguments = argumentsByExample[example[0]] 206 # An event can be added if all of its argument events have already 207 # been added. Addition is forced if lack of argument events blocks 208 # the process. 209 if forceAdd or self.argumentEntitiesExist(arguments, sentenceObject): 210 umType = "complex" # mark the root entity in the output xml 211 predictionStrength = None 212 if example[0].find("simple") != -1: 213 umType = "simple" 214 else: 215 # Prediction strength is only available for classified argument groups 216 predictionStrength = self.getPredictionStrength(example, predictionsByExample, classSet, classIds) 217 #print example 218 if umType != "simple" and "etype" in example[3] and example[3]["etype"] == "Process" and len(arguments) == 0: 219 origProcess = sentenceObject.entitiesById[example[3]["e"]] 220 # Put back the original entity 221 newProcess = self.addEntity(origProcess) 222 newProcess.set("umType", umType) 223 if predictionStrength != None: 224 newProcess.set("umStrength", str(predictionStrength)) 225 else: # example has arguments 226 self.addEvent(arguments, sentenceObject, umType, forceAdd, predictionStrength, exampleNotes=example[3]) 227 exampleAdded[example[0]] = True 228 examplesLeft -= 1 229 examplesAddedThisRound += 1 230 forceAdd = False 231 if examplesLeft > 0 and examplesAddedThisRound == 0: 232 # If there are examples left, but nothing was added, this 233 # means that some nested events are missing. Theoretically 234 # this could also be because two events are referring to 235 # each other, preventing each other's insertion. In any 236 # case this is solved by simply forcing the addition of 237 # the first non-inserted event, by creating 0-argument 238 # entities for its argument events. 239 forcedCount += 1 240 #print "Warning, forcing event addition" 241 forceAdd = True 242 243 # Attach the new elements 244 for element in self.newEntities + self.newInteractions: 245 sentenceElement.append(element) 246 247 # re-attach the analyses-element 248 if sentenceAnalysesElement != None: 249 sentenceElement.append(sentenceAnalysesElement)
250 251 #print "Warning, forced addition of", forcedCount, "events" 252
253 - def argumentEntitiesExist(self, arguments, sentenceObject):
254 """ 255 Checks whether entity elements have already been created 256 for the argument entities, i.e. whether the argument events 257 have been inserted. 258 """ 259 for arg in arguments: 260 e2Id = arg.get("e2") 261 origE2 = sentenceObject.entitiesById[e2Id] 262 e2HeadOffset = origE2.get("headOffset") 263 e2Type = origE2.get("type") 264 if len(self.entitiesByHeadByType[e2HeadOffset][e2Type]) == 0: 265 return False 266 return True
267
268 - def addEvent(self, arguments, sentenceObject, umType="unknown", forceAdd=False, predictionStrength=None, exampleNotes=None):
269 assert len(arguments) > 0, (sentenceObject.sentence.get("id"), exampleNotes) 270 # Collect e2 entities linked by this event 271 e1Id = None 272 origE1 = None 273 argEntities = [[]] * (len(arguments)) 274 for i in range(len(arguments)): 275 arg = arguments[i] 276 argE1Id = arg.get("e1") 277 # Take the entity trigger node from the e1 attribute of the argument 278 if e1Id != None: # trigger has already been found 279 assert e1Id == argE1Id 280 else: # find the trigger 281 e1Id = argE1Id 282 origE1 = sentenceObject.entitiesById[argE1Id] 283 284 e2Id = arg.get("e2") 285 origE2 = sentenceObject.entitiesById[e2Id] 286 e2HeadOffset = origE2.get("headOffset") 287 e2Type = origE2.get("type") 288 argEntities[i] = self.entitiesByHeadByType[e2HeadOffset][e2Type] 289 if len(argEntities[i]) == 0: 290 assert forceAdd 291 if origE2.get("isName") != "True": 292 argEntities[i] = [self.addEntity(origE2)] 293 else: 294 argEntities[i] = origE2 295 296 entityCombinations = combine.combine(*argEntities) 297 for combination in entityCombinations: 298 assert origE1 != None, (sentenceObject.sentence.get("id"), exampleNotes, [(x.get("id"), x.get("e1"), x.get("e2")) for x in arguments]) 299 root = self.addEntity(origE1) 300 root.set("umType", umType) 301 if predictionStrength != None: 302 root.set("umStrength", str(predictionStrength)) 303 for i in range(len(arguments)): 304 self.addInteraction(root, combination[i], arguments[i])
305
306 - def addEntity(self, entity):
307 entityElement = ET.Element("entity") 308 assert entity.get("isName") != "True", entity.attrib 309 entityElement.set("isName", "False") 310 entityElement.set("charOffset", entity.get("charOffset")) 311 entityElement.set("headOffset", entity.get("headOffset")) 312 entityElement.set("text", entity.get("text")) 313 entityElement.set("id", self.sentenceId + ".e" + str(self.entityCount)) 314 entityElement.set("type", entity.get("type")) 315 if entity.get("predictions") != None: 316 entityElement.set("predictions", entity.get("predictions")) 317 # Add to dictionary 318 eType = entityElement.get("type") 319 headOffset = entityElement.get("headOffset") 320 if not self.entitiesByHeadByType[headOffset].has_key(eType): 321 self.entitiesByHeadByType[headOffset][eType] = [] 322 self.entitiesByHeadByType[headOffset][eType].append(entityElement) 323 self.newEntities.append(entityElement) 324 self.entityCount += 1 325 326 return entityElement
327
328 - def addInteraction(self, e1, e2, arg):
329 interactionElement = ET.Element("interaction") 330 interactionElement.attrib["directed"] = "Unknown" 331 interactionElement.attrib["e1"] = e1.get("id") 332 interactionElement.attrib["e2"] = e2.get("id") 333 interactionElement.attrib["id"] = self.sentenceId + ".i" + str(self.interactionCount) 334 interactionElement.set("type", arg.get("type")) 335 if arg.get("predictions") != None: 336 interactionElement.set("predictions", arg.get("predictions")) 337 self.newInteractions.append(interactionElement) 338 self.interactionCount += 1 339 340 return interactionElement
341
342 - def isIntersentence(self, interaction):
343 e1MajorId, e1MinorId = interaction.get("e1").rsplit(".e", 1) 344 e2MajorId, e2MinorId = interaction.get("e2").rsplit(".e", 1) 345 return e1MajorId != e2MajorId
346
347 - def getPredictionStrength(self, example, predictionsByExample, classSet, classIds):
348 prediction = predictionsByExample[example[0]] 349 if len(prediction) == 1: 350 return 0 351 predClass = prediction[0] 352 #predictionStrength = [predClass] 353 predictionStrength = self.getPredictionStrengthString(prediction, classSet, classIds) 354 return predictionStrength
355