1 """
2 For comparing a predicted interaction XML against a gold standard
3 """
4 import sys, os
5 sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..")
6
7 from Utils.ProgressCounter import ProgressCounter
8 from optparse import OptionParser
9 import Core.ExampleUtils as ExampleUtils
10 from Core.IdSet import IdSet
11 import Utils.TableUtils as TableUtils
12 import Utils.InteractionXML.CorpusElements as CorpusElements
13 import copy
14 from collections import defaultdict
15
16
18 if e1.get("headOffset") == e2.get("headOffset") and e1.get("type") == e2.get("type"):
19 return True
20 else:
21 return False
22
24
25 if e1.get("charOffset")[:-1] == e1.get("headOffset")[:-1]:
26 e1.set("charOffset", e1.get("headOffset"))
27 if e2.get("charOffset")[:-1] == e2.get("headOffset")[:-1]:
28 e2.set("charOffset", e2.get("headOffset"))
29
30
31
32 if e1.get("charOffset") == e2.get("charOffset") and e1.get("type") == e2.get("type"):
33 return True
34 else:
35 return False
36
37
64
65
66
68 entityMap = {}
69 for entityFrom in entitiesFrom:
70 entityMap[entityFrom] = []
71 for entityTo in entitiesTo:
72 if compareFunction(entityFrom, entityTo, tokens):
73 entityMap[entityFrom].append(entityTo)
74 return entityMap
75
76
77
78
79
80
81
82
83
85 examples = []
86 predictions = []
87 id = "Unknown.x0"
88
89 for predictedEntity, goldEntities in entityMap.iteritems():
90 if predictedEntity.get("isName") == "True":
91 continue
92 found = False
93 predictedEntityType = predictedEntity.get("type")
94 for goldEntity in goldEntities:
95 goldEntityType = goldEntity.get("type")
96 if predictedEntityType != goldEntityType:
97 examples.append( [id, classSet.getId(goldEntity.get("type")), None, None] )
98 predictions.append( [classSet.getId(predictedEntity.get("type"))] )
99 else:
100 if interactionMap[predictedEntity.get("id")]:
101 examples.append( [id, classSet.getId(goldEntity.get("type")), None, None] )
102 predictions.append( [classSet.getId(predictedEntity.get("type"))] )
103 else:
104 examples.append( [id, negativeClassId, None, None] )
105 predictions.append( [classSet.getId(predictedEntity.get("type"))] )
106 found = True
107 if not found:
108 examples.append( [id, negativeClassId, None, None] )
109 predictions.append( [classSet.getId(predictedEntity.get("type"))] )
110
111
112
113 mappedTargetEntities = set()
114 for eList in entityMap.values():
115 for e in eList:
116 mappedTargetEntities.add(e)
117 for e in allGoldEntities:
118 if e.get("isName") == "True":
119 continue
120 if not e in mappedTargetEntities:
121 examples.append( [id, classSet.getId(e.get("type")), None, None] )
122 predictions.append( [negativeClassId] )
123
124 assert len(examples) == len(predictions)
125 return examples, predictions
126
127
129 examples = []
130 predictions = []
131 id = "Unknown.x0"
132 for entityFrom, entitiesTo in entityMap.iteritems():
133 if entityFrom.get("isName") == "True":
134 continue
135 found = False
136 for entityTo in entitiesTo:
137 examples.append( [id, classSet.getId(entityTo.get("type")), None, None] )
138 predictions.append( [classSet.getId(entityFrom.get("type"))] )
139
140 found = True
141 if not found:
142 examples.append( [id, negativeClassId, None, None] )
143 predictions.append( [classSet.getId(entityFrom.get("type"))] )
144
145
146
147
148 mappedTargetEntities = set()
149 for eList in entityMap.values():
150 for e in eList:
151 mappedTargetEntities.add(e)
152 for e in targetEntities:
153 if e.get("isName") == "True":
154 continue
155 if not e in mappedTargetEntities:
156 examples.append( [id, classSet.getId(e.get("type")), None, None] )
157 predictions.append( [negativeClassId] )
158
159 assert len(examples) == len(predictions)
160 return examples, predictions
161
162
163
165 examples = []
166 predictions = []
167 id = "Unknown.x0"
168 fromEntityIdToElement = {}
169 for key in entityMap.keys():
170 entityId = key.get("id")
171 assert not fromEntityIdToElement.has_key(entityId), entityId
172 fromEntityIdToElement[entityId] = key
173
174
175 falseEntity = defaultdict(lambda: defaultdict(int))
176
177 toInteractionsWithPredictions = set()
178 events = {}
179 for predictedEntity in entityMap.keys():
180 events[predictedEntity.get("id")] = True
181 for interactionFrom in interactionsFrom:
182 goldE1Ids = []
183 goldE2Ids = []
184 if interactionFrom.get("e1") not in fromEntityIdToElement or interactionFrom.get("e2") not in fromEntityIdToElement:
185 print >> sys.stderr, "Warning, interaction", interactionFrom.get("id"), [interactionFrom.get("e1"), interactionFrom.get("e2")], "links to a non-existing entity"
186 else:
187
188 for goldEntity in entityMap[fromEntityIdToElement[interactionFrom.get("e1")]]:
189 goldE1Ids.append(goldEntity.get("id"))
190 for goldEntity in entityMap[fromEntityIdToElement[interactionFrom.get("e2")]]:
191 goldE2Ids.append(goldEntity.get("id"))
192
193 if len(goldE1Ids) == 0 or len(goldE2Ids) == 0:
194 falseEntity[interactionFrom.get("type")][0] += 1
195
196 found = False
197
198 for interactionTo in interactionsTo:
199 if interactionTo.get("e1") in goldE1Ids and interactionTo.get("e2") in goldE2Ids:
200 toInteractionsWithPredictions.add(interactionTo)
201 examples.append( [id, classSet.getId(interactionTo.get("type")),None,None] )
202 predictions.append( [classSet.getId(interactionFrom.get("type"))] )
203 found = True
204 if not found:
205 examples.append( [id,negativeClassId,None,None] )
206 predictions.append( [classSet.getId(interactionFrom.get("type"))] )
207 events[interactionFrom.get("e1")] = False
208
209 reverseEntityMap = {}
210 for predictedEntity, goldEntities in entityMap.iteritems():
211 for goldEntity in goldEntities:
212
213
214
215 if goldEntity.get("id") not in reverseEntityMap:
216 reverseEntityMap[goldEntity.get("id")] = []
217 reverseEntityMap[goldEntity.get("id")].append(predictedEntity.get("id"))
218 mappedGoldEntities = reverseEntityMap.keys()
219
220 for interactionTo in interactionsTo:
221 if interactionTo not in toInteractionsWithPredictions:
222 examples.append( [id, classSet.getId(interactionTo.get("type")), None, None] )
223 predictions.append( [negativeClassId] )
224
225 if interactionTo.get("e1") not in mappedGoldEntities or interactionTo.get("e2") not in mappedGoldEntities:
226 falseEntity[interactionTo.get("type")][1] += 1
227 if interactionTo.get("e1") in reverseEntityMap:
228 for predictedEntityId in reverseEntityMap[interactionTo.get("e1")]:
229 events[predictedEntityId] = False
230 assert len(examples) == len(predictions)
231 return examples, predictions, falseEntity, events
232
233
234 -def processDocument(fromDocumentSentences, toDocumentSentences, target, classSets, negativeClassId, entityMatchFunction):
235
236 if toDocumentSentences != None:
237 assert len(fromDocumentSentences) == len(toDocumentSentences)
238 else:
239 toDocumentSentences = [None] * len(fromDocumentSentences)
240 entityMap = {}
241 allToEntities = []
242 for fromSentence, toSentence in zip(fromDocumentSentences, toDocumentSentences):
243 if toSentence != None:
244 assert fromSentence.sentence.get("id") == toSentence.sentence.get("id")
245 entitiesFrom = []
246 for e in fromSentence.entities:
247 if e.get("type") != "neg":
248 entitiesFrom.append(e)
249 entitiesTo = []
250 if toSentence != None:
251 entitiesTo = toSentence.entities
252 allToEntities.extend(entitiesTo)
253 tokens = fromSentence.tokens
254
255 sentenceEntityMap = mapEntities(entitiesFrom, entitiesTo, tokens, compareFunction=entityMatchFunction)
256 for entity in sentenceEntityMap.keys():
257 assert entity not in entityMap
258 entityMap[entity] = sentenceEntityMap[entity]
259
260
261 fromInteractions = []
262 for fromSentence in fromDocumentSentences:
263 for interaction in fromSentence.interactions + fromSentence.pairs:
264 if interaction.get("type") != "neg":
265 fromInteractions.append(interaction)
266 toInteractions = []
267 for toSentence in toDocumentSentences:
268 if toSentence != None:
269 toInteractions.extend(toSentence.interactions)
270 toInteractions.extend(toSentence.pairs)
271
272
273 entityPredictions = []
274 interactionPredictions = []
275 falseEntity = defaultdict(lambda: defaultdict(int))
276 if target == "entities" or target == "both":
277 entityExamples, entityPredictions = getEntityPredictions(entityMap, allToEntities, classSets["entity"], negativeClassId)
278 if target == "interactions" or target == "both":
279 interactionExamples, interactionPredictions, sentFalseEntity, interactionMap = getInteractionPredictions(fromInteractions, toInteractions, entityMap, classSets["interaction"], negativeClassId)
280 for k,v in sentFalseEntity.iteritems():
281 falseEntity[k][0] += v[0]
282 falseEntity[k][1] += v[1]
283 if target == "events" or target == "both":
284 eventExamples, eventPredictions = getEventPredictions(entityMap, allToEntities, interactionMap, classSets["entity"], negativeClassId)
285
286 return (entityExamples, entityPredictions), (interactionExamples, interactionPredictions), (eventExamples, eventPredictions), falseEntity
287
288
289 -def processCorpora(EvaluatorClass, fromCorpus, toCorpus, target, classSets, negativeClassId, entityMatchFunction):
290 entityExamples = []
291 entityPredictions = []
292 interactionExamples = []
293 interactionPredictions = []
294 eventExamples = []
295 eventPredictions = []
296 falseEntity = defaultdict(lambda: defaultdict(int))
297 counter = ProgressCounter(len(fromCorpus.sentences), "Corpus Processing")
298
299 toCorpusSentences = None
300 if toCorpus != None:
301 toCorpusSentences = toCorpus.documentSentences
302 for i in range(len(fromCorpus.documentSentences)):
303 if len(fromCorpus.documentSentences[i]) > 0:
304 counter.update(len(fromCorpus.documentSentences[i]), fromCorpus.documentSentences[i][0].sentence.get("id").rsplit(".", 1)[0])
305 if toCorpusSentences != None:
306 newEntityExPred, newInteractionExPred, newEventExPred, sentFalseEntity = processDocument(fromCorpus.documentSentences[i], toCorpusSentences[i], target, classSets, negativeClassId, entityMatchFunction)
307 else:
308 newEntityExPred, newInteractionExPred, newEventExPred, sentFalseEntity = processDocument(fromCorpus.documentSentences[i], None, target, classSets, negativeClassId, entityMatchFunction)
309 entityExamples.extend(newEntityExPred[0])
310 entityPredictions.extend(newEntityExPred[1])
311 interactionExamples.extend(newInteractionExPred[0])
312 interactionPredictions.extend(newInteractionExPred[1])
313 eventExamples.extend(newEventExPred[0])
314 eventPredictions.extend(newEventExPred[1])
315 for k,v in sentFalseEntity.iteritems():
316 falseEntity[k][0] += v[0]
317 falseEntity[k][1] += v[1]
318
319
320 evaluator = None
321 if len(entityPredictions) > 0:
322 evaluator = EvaluatorClass(entityExamples, entityPredictions, classSet=classSets["entity"])
323 print evaluator.toStringConcise(title="Entities")
324 if len(interactionPredictions) > 0:
325 evaluator = EvaluatorClass(interactionExamples, interactionPredictions, classSet=classSets["interaction"])
326 print evaluator.toStringConcise(title="Interactions")
327
328
329
330 if len(eventPredictions) > 0:
331 evaluator = EvaluatorClass(eventExamples, eventPredictions, classSet=classSets["entity"])
332 print evaluator.toStringConcise(title="Events")
333 return evaluator
334
335
336
337
338
339
340
341
342
343
344
345
346
347 -def run(EvaluatorClass, inputCorpusFile, goldCorpusFile, parse, tokenization=None, target="both", entityMatchFunction=compareEntitiesSimple, removeIntersentenceInteractions=False):
348 print >> sys.stderr, "##### EvaluateInteractionXML #####"
349 print >> sys.stderr, "Comparing input", inputCorpusFile, "to gold", goldCorpusFile
350
351 classSets = {}
352 if EvaluatorClass.type == "binary":
353 classSets["entity"] = IdSet(idDict={"True":1,"False":-1}, locked=True)
354 classSets["interaction"] = IdSet(idDict={"True":1,"False":-1}, locked=True)
355 negativeClassId = -1
356 elif EvaluatorClass.type == "multiclass":
357 classSets["entity"] = IdSet(idDict={"neg":1}, locked=False)
358 classSets["interaction"] = IdSet(idDict={"neg":1}, locked=False)
359 negativeClassId = 1
360 else:
361 sys.exit("Unknown evaluator type")
362
363
364 goldCorpusElements = None
365 if goldCorpusFile != None:
366 goldCorpusElements = CorpusElements.loadCorpus(goldCorpusFile, parse, tokenization, removeIntersentenceInteractions)
367 predictedCorpusElements = CorpusElements.loadCorpus(inputCorpusFile, parse, tokenization, removeIntersentenceInteractions)
368
369
370 return processCorpora(EvaluatorClass, predictedCorpusElements, goldCorpusElements, target, classSets, negativeClassId, entityMatchFunction)
371
372 if __name__=="__main__":
373 import sys, os
374
375 try:
376 import psyco
377 psyco.full()
378 print >> sys.stderr, "Found Psyco, using"
379 except ImportError:
380 print >> sys.stderr, "Psyco not installed"
381 optparser = OptionParser(usage="%prog [options]\nCalculate f-score and other statistics.")
382 optparser.add_option("-i", "--input", default=None, dest="input", help="Predictions in interaction XML", metavar="FILE")
383 optparser.add_option("-g", "--gold", default=None, dest="gold", help="Gold standard in interaction XML", metavar="FILE")
384
385 optparser.add_option("-r", "--target", default="both", dest="target", help="edges/entities/both (default: both)")
386 optparser.add_option("-e", "--evaluator", default="AveragingMultiClassEvaluator", dest="evaluator", help="Prediction evaluator class")
387
388 optparser.add_option("-p", "--parse", default="McCC", dest="parse", help="parse")
389 optparser.add_option("-m", "--matching", default="SIMPLE", dest="matching", help="matching function")
390 optparser.add_option("--no_intersentence", default=False, action="store_true", dest="no_intersentence", help="Exclude intersentence interactions from evaluation")
391 (options, args) = optparser.parse_args()
392
393 assert options.matching in ["SIMPLE", "STRICT"]
394 if options.matching == "SIMPLE":
395 entityMatchFunction = compareEntitiesSimple
396 elif options.matching == "STRICT":
397 entityMatchFunction = compareEntitiesStrict
398
399
400 print >> sys.stderr, "Importing modules"
401 exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as Evaluator"
402
403 run(Evaluator, options.input, options.gold, options.parse, options.tokenization, options.target, entityMatchFunction=entityMatchFunction, removeIntersentenceInteractions=options.no_intersentence)
404