TEES.Evaluators.EvaluateInteractionXML

1 """ 2 For comparing a predicted interaction XML against a gold standard 3 """ 4 import sys, os 5 sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..") 6 #print os.path.dirname(os.path.abspath(__file__))+"/.." 7 from Utils.ProgressCounter import ProgressCounter 8 from optparse import OptionParser 9 import Core.ExampleUtils as ExampleUtils 10 from Core.IdSet import IdSet 11 import Utils.TableUtils as TableUtils 12 import Utils.InteractionXML.CorpusElements as CorpusElements 13 import copy 14 from collections import defaultdict 15 16 # for entities to match, they have to have the same head offsets and same type

17 -def compareEntitiesSimple(e1,e2,tokens=None):

18 if e1.get("headOffset") == e2.get("headOffset") and e1.get("type") == e2.get("type"): 19 return True 20 else: 21 return False

22

23 -def compareEntitiesStrict(e1,e2,tokens=None):

24 # HORRIBLE HACK 25 if e1.get("charOffset")[:-1] == e1.get("headOffset")[:-1]: 26 e1.set("charOffset", e1.get("headOffset")) 27 if e2.get("charOffset")[:-1] == e2.get("headOffset")[:-1]: 28 e2.set("charOffset", e2.get("headOffset")) 29 30 31 32 if e1.get("charOffset") == e2.get("charOffset") and e1.get("type") == e2.get("type"): 33 return True 34 else: 35 return False

36 37 # not used

38 -def compareEntitiesByGENIARelaxedOffsetMethod(e1, e2, tokens):

39 e1Offset = Range.charOffsetToSingleTuple(e1.get("charOffset")) 40 e2Offset = Range.charOffsetToSingleTuple(e2.get("charOffset")) 41 goldOffset = [99999999999,-999999999999999] 42 for i in range(len(tokens)): 43 token = tokens[i] 44 tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset")) 45 if Range.overlap(tokenOffset,e2Offset): 46 if i > 0: 47 prevOffset = Range.charOffsetToSingleTuple(tokens[i-1].get("charOffset")) 48 else: 49 prevOffset = tokenOffset 50 if goldOffset[0] > prevOffset[0]: 51 goldOffset[0] = prevOffset[0] 52 53 if i < len(tokens)-1: 54 nextOffset = Range.charOffsetToSingleTuple(tokens[i+1].get("charOffset")) 55 else: 56 nextOffset = tokenOffset 57 if goldOffset[1] < nextOffset[1]: 58 goldOffset[1] = nextOffset[1] 59 60 if e1Offset[0] >= goldOffset[1] and e1Offset[1] <= goldOffset[1]: 61 return True 62 else: 63 return False

64 65 # Produces a mapping that connects matching entities from prediction (from) 66 # to gold standard (to).

67 -def mapEntities(entitiesFrom, entitiesTo, tokens=None, compareFunction=compareEntitiesSimple):

68 entityMap = {} 69 for entityFrom in entitiesFrom: 70 entityMap[entityFrom] = [] 71 for entityTo in entitiesTo: 72 if compareFunction(entityFrom, entityTo, tokens): 73 entityMap[entityFrom].append(entityTo) 74 return entityMap

75 76 ## Splits merged types generated from overlapping entities/edges into their components 77 #def getElementTypes(element): 78 # typeName = element.get("type") 79 # if typeName.find("---") != -1: 80 # return typeName.split("---") 81 # else: 82 # return [typeName] 83

84 -def getEventPredictions(entityMap, allGoldEntities, interactionMap, classSet, negativeClassId):

85 examples = [] 86 predictions = [] 87 id = "Unknown.x0" 88 # analyze events 89 for predictedEntity, goldEntities in entityMap.iteritems(): 90 if predictedEntity.get("isName") == "True": 91 continue 92 found = False 93 predictedEntityType = predictedEntity.get("type") 94 for goldEntity in goldEntities: 95 goldEntityType = goldEntity.get("type") 96 if predictedEntityType != goldEntityType: # whatever the arguments, this is a false positive 97 examples.append( [id, classSet.getId(goldEntity.get("type")), None, None] ) 98 predictions.append( [classSet.getId(predictedEntity.get("type"))] ) 99 else: # mapped entity types match, check the arguments 100 if interactionMap[predictedEntity.get("id")]: # arguments are correct, this is a true positive 101 examples.append( [id, classSet.getId(goldEntity.get("type")), None, None] ) 102 predictions.append( [classSet.getId(predictedEntity.get("type"))] ) 103 else: # an error in arguments, this is a false positive for the type of the entity 104 examples.append( [id, negativeClassId, None, None] ) 105 predictions.append( [classSet.getId(predictedEntity.get("type"))] ) 106 found = True # entitiesTo has at least one item 107 if not found: # false positive prediction due to entity span not being in gold 108 examples.append( [id, negativeClassId, None, None] ) 109 predictions.append( [classSet.getId(predictedEntity.get("type"))] ) 110 # mappedTargetEntities will contain all gold entities for which is mapped at least 111 # one predicted entity. Those gold entities not in mappedTargetEntities are then 112 # undetected ones, i.e. false negatives. 113 mappedTargetEntities = set() 114 for eList in entityMap.values(): 115 for e in eList: 116 mappedTargetEntities.add(e) 117 for e in allGoldEntities: 118 if e.get("isName") == "True": 119 continue 120 if not e in mappedTargetEntities: # false negative gold 121 examples.append( [id, classSet.getId(e.get("type")), None, None] ) 122 predictions.append( [negativeClassId] ) 123 #predictions.append( ((id, classSet.getId(e.get("type"))), negativeClassId, None, None) ) 124 assert len(examples) == len(predictions) 125 return examples, predictions

126 127 # Uses the mapped entities to give predictions for a single sentence

128 -def getEntityPredictions(entityMap, targetEntities, classSet, negativeClassId):

129 examples = [] 130 predictions = [] 131 id = "Unknown.x0" 132 for entityFrom, entitiesTo in entityMap.iteritems(): 133 if entityFrom.get("isName") == "True": 134 continue 135 found = False 136 for entityTo in entitiesTo: 137 examples.append( [id, classSet.getId(entityTo.get("type")), None, None] ) 138 predictions.append( [classSet.getId(entityFrom.get("type"))] ) 139 #predictions.append( ((id, classSet.getId(entityTo.get("type"))), classSet.getId(entityFrom.get("type")), None, None) ) 140 found = True # entitiesTo has at least one item 141 if not found: # false positive prediction 142 examples.append( [id, negativeClassId, None, None] ) 143 predictions.append( [classSet.getId(entityFrom.get("type"))] ) 144 #predictions.append( ((id, negativeClassId), classSet.getId(entityFrom.get("type")), None, None) ) 145 # mappedTargetEntities will contain all gold entities for which is mapped at least 146 # one predicted entity. Those gold entities not in mappedTargetEntities are then 147 # undetected ones, i.e. false negatives. 148 mappedTargetEntities = set() 149 for eList in entityMap.values(): 150 for e in eList: 151 mappedTargetEntities.add(e) 152 for e in targetEntities: 153 if e.get("isName") == "True": 154 continue 155 if not e in mappedTargetEntities: # false negative gold 156 examples.append( [id, classSet.getId(e.get("type")), None, None] ) 157 predictions.append( [negativeClassId] ) 158 #predictions.append( ((id, classSet.getId(e.get("type"))), negativeClassId, None, None) ) 159 assert len(examples) == len(predictions) 160 return examples, predictions

161 162 # Uses mapped entities and predicted and gold interactions to provide 163 # predictions for the interactions

164 -def getInteractionPredictions(interactionsFrom, interactionsTo, entityMap, classSet, negativeClassId):

165 examples = [] 166 predictions = [] 167 id = "Unknown.x0" 168 fromEntityIdToElement = {} 169 for key in entityMap.keys(): 170 entityId = key.get("id") 171 assert not fromEntityIdToElement.has_key(entityId), entityId 172 fromEntityIdToElement[entityId] = key 173 174 # Keep track of false positives caused by false positive entities 175 falseEntity = defaultdict(lambda: defaultdict(int)) 176 177 toInteractionsWithPredictions = set() 178 events = {} 179 for predictedEntity in entityMap.keys(): 180 events[predictedEntity.get("id")] = True # mark all events as positive (if no arguments, gold or predicted, remains positive) 181 for interactionFrom in interactionsFrom: 182 goldE1Ids = [] 183 goldE2Ids = [] 184 if interactionFrom.get("e1") not in fromEntityIdToElement or interactionFrom.get("e2") not in fromEntityIdToElement: 185 print >> sys.stderr, "Warning, interaction", interactionFrom.get("id"), [interactionFrom.get("e1"), interactionFrom.get("e2")], "links to a non-existing entity" 186 else: 187 # Select gold entities for entity-ids referred to in the predicted interaction 188 for goldEntity in entityMap[fromEntityIdToElement[interactionFrom.get("e1")]]: 189 goldE1Ids.append(goldEntity.get("id")) 190 for goldEntity in entityMap[fromEntityIdToElement[interactionFrom.get("e2")]]: 191 goldE2Ids.append(goldEntity.get("id")) 192 193 if len(goldE1Ids) == 0 or len(goldE2Ids) == 0: 194 falseEntity[interactionFrom.get("type")][0] += 1 195 196 found = False 197 # Go through all gold interactions 198 for interactionTo in interactionsTo: 199 if interactionTo.get("e1") in goldE1Ids and interactionTo.get("e2") in goldE2Ids: # this gold interaction matches the predicted one 200 toInteractionsWithPredictions.add(interactionTo) 201 examples.append( [id, classSet.getId(interactionTo.get("type")),None,None] ) 202 predictions.append( [classSet.getId(interactionFrom.get("type"))] ) 203 found = True 204 if not found: # false positive prediction 205 examples.append( [id,negativeClassId,None,None] ) 206 predictions.append( [classSet.getId(interactionFrom.get("type"))] ) 207 events[interactionFrom.get("e1")] = False # false positive argument -> incorrect event 208 # Get ids of gold entities that had a correct prediction 209 reverseEntityMap = {} 210 for predictedEntity, goldEntities in entityMap.iteritems(): 211 for goldEntity in goldEntities: 212 #assert goldEntity.get("id") not in reverseEntityMap, (predictedEntity.get("id"), [x.get("id") for x in goldEntities]) 213 # One gold entity can map to more than one predicted entities, 214 # due to predicted entities created by splitting a prediction 215 if goldEntity.get("id") not in reverseEntityMap: 216 reverseEntityMap[goldEntity.get("id")] = [] 217 reverseEntityMap[goldEntity.get("id")].append(predictedEntity.get("id")) 218 mappedGoldEntities = reverseEntityMap.keys() 219 # Process gold interactions that did not have a prediction 220 for interactionTo in interactionsTo: 221 if interactionTo not in toInteractionsWithPredictions: # false negative gold 222 examples.append( [id, classSet.getId(interactionTo.get("type")), None, None] ) 223 predictions.append( [negativeClassId] ) 224 #predictions.append( ((id, classSet.getId(interactionTo.get("type"))), negativeClassId, None, None) ) 225 if interactionTo.get("e1") not in mappedGoldEntities or interactionTo.get("e2") not in mappedGoldEntities: 226 falseEntity[interactionTo.get("type")][1] += 1 227 if interactionTo.get("e1") in reverseEntityMap: # mark an event false due to a missing gold interaction 228 for predictedEntityId in reverseEntityMap[interactionTo.get("e1")]: 229 events[predictedEntityId] = False # missing argument -> incorrect event 230 assert len(examples) == len(predictions) 231 return examples, predictions, falseEntity, events

232 233 # Compares a prediction (from) to a gold (to) sentence

234 -def processDocument(fromDocumentSentences, toDocumentSentences, target, classSets, negativeClassId, entityMatchFunction):

235 #splitMerged(fromSentence) # modify element tree to split merged elements into multiple elements 236 if toDocumentSentences != None: 237 assert len(fromDocumentSentences) == len(toDocumentSentences) 238 else: 239 toDocumentSentences = [None] * len(fromDocumentSentences) 240 entityMap = {} 241 allToEntities = [] 242 for fromSentence, toSentence in zip(fromDocumentSentences, toDocumentSentences): 243 if toSentence != None: 244 assert fromSentence.sentence.get("id") == toSentence.sentence.get("id") 245 entitiesFrom = [] 246 for e in fromSentence.entities: 247 if e.get("type") != "neg": 248 entitiesFrom.append(e) 249 entitiesTo = [] 250 if toSentence != None: 251 entitiesTo = toSentence.entities 252 allToEntities.extend(entitiesTo) 253 tokens = fromSentence.tokens 254 # map predicted entities to gold entities 255 sentenceEntityMap = mapEntities(entitiesFrom, entitiesTo, tokens, compareFunction=entityMatchFunction) 256 for entity in sentenceEntityMap.keys(): 257 assert entity not in entityMap 258 entityMap[entity] = sentenceEntityMap[entity] 259 260 # select interactions 261 fromInteractions = [] 262 for fromSentence in fromDocumentSentences: 263 for interaction in fromSentence.interactions + fromSentence.pairs: 264 if interaction.get("type") != "neg": 265 fromInteractions.append(interaction) 266 toInteractions = [] 267 for toSentence in toDocumentSentences: 268 if toSentence != None: 269 toInteractions.extend(toSentence.interactions) 270 toInteractions.extend(toSentence.pairs) 271 272 # get predictions for predicted edges/entities vs. gold edges/entities 273 entityPredictions = [] 274 interactionPredictions = [] 275 falseEntity = defaultdict(lambda: defaultdict(int)) 276 if target == "entities" or target == "both": 277 entityExamples, entityPredictions = getEntityPredictions(entityMap, allToEntities, classSets["entity"], negativeClassId) 278 if target == "interactions" or target == "both": 279 interactionExamples, interactionPredictions, sentFalseEntity, interactionMap = getInteractionPredictions(fromInteractions, toInteractions, entityMap, classSets["interaction"], negativeClassId) 280 for k,v in sentFalseEntity.iteritems(): 281 falseEntity[k][0] += v[0] 282 falseEntity[k][1] += v[1] 283 if target == "events" or target == "both": 284 eventExamples, eventPredictions = getEventPredictions(entityMap, allToEntities, interactionMap, classSets["entity"], negativeClassId) 285 286 return (entityExamples, entityPredictions), (interactionExamples, interactionPredictions), (eventExamples, eventPredictions), falseEntity

287 288 # Compares a prediction (from) to a gold (to) corpus

289 -def processCorpora(EvaluatorClass, fromCorpus, toCorpus, target, classSets, negativeClassId, entityMatchFunction):

290 entityExamples = [] 291 entityPredictions = [] 292 interactionExamples = [] 293 interactionPredictions = [] 294 eventExamples = [] 295 eventPredictions = [] 296 falseEntity = defaultdict(lambda: defaultdict(int)) 297 counter = ProgressCounter(len(fromCorpus.sentences), "Corpus Processing") 298 # Loop through the sentences and collect all predictions 299 toCorpusSentences = None 300 if toCorpus != None: 301 toCorpusSentences = toCorpus.documentSentences 302 for i in range(len(fromCorpus.documentSentences)): 303 if len(fromCorpus.documentSentences[i]) > 0: 304 counter.update(len(fromCorpus.documentSentences[i]), fromCorpus.documentSentences[i][0].sentence.get("id").rsplit(".", 1)[0]) 305 if toCorpusSentences != None: 306 newEntityExPred, newInteractionExPred, newEventExPred, sentFalseEntity = processDocument(fromCorpus.documentSentences[i], toCorpusSentences[i], target, classSets, negativeClassId, entityMatchFunction) 307 else: 308 newEntityExPred, newInteractionExPred, newEventExPred, sentFalseEntity = processDocument(fromCorpus.documentSentences[i], None, target, classSets, negativeClassId, entityMatchFunction) 309 entityExamples.extend(newEntityExPred[0]) 310 entityPredictions.extend(newEntityExPred[1]) 311 interactionExamples.extend(newInteractionExPred[0]) 312 interactionPredictions.extend(newInteractionExPred[1]) 313 eventExamples.extend(newEventExPred[0]) 314 eventPredictions.extend(newEventExPred[1]) 315 for k,v in sentFalseEntity.iteritems(): 316 falseEntity[k][0] += v[0] 317 falseEntity[k][1] += v[1] 318 319 # Process the predictions with an evaluator and print the results 320 evaluator = None 321 if len(entityPredictions) > 0: 322 evaluator = EvaluatorClass(entityExamples, entityPredictions, classSet=classSets["entity"]) 323 print evaluator.toStringConcise(title="Entities") 324 if len(interactionPredictions) > 0: 325 evaluator = EvaluatorClass(interactionExamples, interactionPredictions, classSet=classSets["interaction"]) 326 print evaluator.toStringConcise(title="Interactions") 327 #print "Interactions (fp ent->fp int, fn-ent->fn-int )" 328 #for key in sorted(falseEntity.keys()): 329 # print "", key, falseEntity[key][0], "/", falseEntity[key][1] 330 if len(eventPredictions) > 0: 331 evaluator = EvaluatorClass(eventExamples, eventPredictions, classSet=classSets["entity"]) 332 print evaluator.toStringConcise(title="Events") 333 return evaluator

334 335 ## Splits entities/edges with merged types into separate elements 336 #def splitMerged(sentence): 337 # for sourceList in [sentence.entities, sentence.interactions, sentence.pairs]: 338 # for element in sourceList[:]: 339 # types = getElementTypes(element) 340 # if len(types) > 1: 341 # for type in types: 342 # newElement = copy.copy(element) 343 # newElement.set("type", type) 344 # sourceList.append(newElement) 345 # sourceList.remove(element) 346

347 -def run(EvaluatorClass, inputCorpusFile, goldCorpusFile, parse, tokenization=None, target="both", entityMatchFunction=compareEntitiesSimple, removeIntersentenceInteractions=False):

348 print >> sys.stderr, "##### EvaluateInteractionXML #####" 349 print >> sys.stderr, "Comparing input", inputCorpusFile, "to gold", goldCorpusFile 350 # Class sets are used to convert the types to ids that the evaluator can use 351 classSets = {} 352 if EvaluatorClass.type == "binary": 353 classSets["entity"] = IdSet(idDict={"True":1,"False":-1}, locked=True) 354 classSets["interaction"] = IdSet(idDict={"True":1,"False":-1}, locked=True) 355 negativeClassId = -1 356 elif EvaluatorClass.type == "multiclass": 357 classSets["entity"] = IdSet(idDict={"neg":1}, locked=False) 358 classSets["interaction"] = IdSet(idDict={"neg":1}, locked=False) 359 negativeClassId = 1 360 else: 361 sys.exit("Unknown evaluator type") 362 363 # Load corpus and make sentence graphs 364 goldCorpusElements = None 365 if goldCorpusFile != None: 366 goldCorpusElements = CorpusElements.loadCorpus(goldCorpusFile, parse, tokenization, removeIntersentenceInteractions) 367 predictedCorpusElements = CorpusElements.loadCorpus(inputCorpusFile, parse, tokenization, removeIntersentenceInteractions) 368 369 # Compare the corpora and print results on screen 370 return processCorpora(EvaluatorClass, predictedCorpusElements, goldCorpusElements, target, classSets, negativeClassId, entityMatchFunction)

371 372 if __name__=="__main__": 373 import sys, os 374 # Import Psyco if available 375 try: 376 import psyco 377 psyco.full() 378 print >> sys.stderr, "Found Psyco, using" 379 except ImportError: 380 print >> sys.stderr, "Psyco not installed" 381 optparser = OptionParser(usage="%prog [options]\nCalculate f-score and other statistics.") 382 optparser.add_option("-i", "--input", default=None, dest="input", help="Predictions in interaction XML", metavar="FILE") 383 optparser.add_option("-g", "--gold", default=None, dest="gold", help="Gold standard in interaction XML", metavar="FILE") 384 #optparser.add_option("-o", "--output", default=None, dest="output", help="Output file for the statistics") 385 optparser.add_option("-r", "--target", default="both", dest="target", help="edges/entities/both (default: both)") 386 optparser.add_option("-e", "--evaluator", default="AveragingMultiClassEvaluator", dest="evaluator", help="Prediction evaluator class") 387 # optparser.add_option("-t", "--tokenization", default="split-McClosky", dest="tokenization", help="tokenization") 388 optparser.add_option("-p", "--parse", default="McCC", dest="parse", help="parse") 389 optparser.add_option("-m", "--matching", default="SIMPLE", dest="matching", help="matching function") 390 optparser.add_option("--no_intersentence", default=False, action="store_true", dest="no_intersentence", help="Exclude intersentence interactions from evaluation") 391 (options, args) = optparser.parse_args() 392 393 assert options.matching in ["SIMPLE", "STRICT"] 394 if options.matching == "SIMPLE": 395 entityMatchFunction = compareEntitiesSimple 396 elif options.matching == "STRICT": 397 entityMatchFunction = compareEntitiesStrict 398 399 # Load the selected evaluator class 400 print >> sys.stderr, "Importing modules" 401 exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as Evaluator" 402 403 run(Evaluator, options.input, options.gold, options.parse, options.tokenization, options.target, entityMatchFunction=entityMatchFunction, removeIntersentenceInteractions=options.no_intersentence) 404

Source Code for Module TEES.Evaluators.EvaluateInteractionXML