1  import sys, os, shutil 
  2  import subprocess 
  3  import tempfile 
  4  thisPath = os.path.dirname(os.path.abspath(__file__)) 
  5  sys.path.append(os.path.join(thisPath,"..")) 
  6  import Utils.Settings as Settings 
  7  import Utils.Download as Download 
  8   
  9   
 10   
 11   
 12  evaluateGE09 = None 
 13   
 14   
 15   
 16 -def install(destDir=None, downloadDir=None, redownload=False): 
  17      print >> sys.stderr, "Installing BioNLP'11 evaluators" 
 18      settings = {} 
 19      if downloadDir == None: 
 20          downloadDir = Settings.DATAPATH 
 21      if destDir == None: 
 22          destDir = Settings.DATAPATH 
 23      for corpus in ["GE", "BB", "BI", "CO"]: 
 24          print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator" 
 25          settings[corpus + "_EVALUATOR"] = Download.getTopDir(destDir + "/tools/evaluators/", Download.downloadAndExtract(Settings.URL[corpus + "_EVALUATOR"], destDir + "/tools/evaluators/", downloadDir + "/tools/download/")) 
 26          print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator gold data" 
 27          Download.downloadAndExtract(Settings.URL[corpus + "_DEVEL"], destDir + "/tools/evaluators/gold/" + corpus + "-devel", downloadDir + "/corpora/BioNLP11-original/corpus/", os.path.basename(Settings.URL[corpus + "_DEVEL"])[:-len(".tar.gz")]) 
 28      return settings 
  29   
 30   
 31   
 32   
 33   
 34   
 35   
 36   
 37   
 38   
 39   
 40   
 41   
 42   
 43   
 44   
 46      lines = lines[3:] 
 47      results = {} 
 48      for line in lines: 
 49          if line[0] == "-": 
 50              continue 
 51          splits = line.strip().split() 
 52           
 53          name = splits[0] 
 54          name = name.replace("=","") 
 55          name = name.replace("[","") 
 56          name = name.replace("]","") 
 57          results[name] = {} 
 58           
 59          results[name]["gold"] = int(splits[1]) 
 60          results[name]["gold_match"] = int(splits[3][:-1]) 
 61          results[name]["answer"] = int(splits[4]) 
 62          results[name]["answer_match"] = int(splits[6][:-1]) 
 63          results[name]["recall"] = float(splits[7]) 
 64          results[name]["precision"] = float(splits[8]) 
 65          results[name]["fscore"] = float(splits[9]) 
 66      return results 
  67           
 69      for line in lines: 
 70          print >> sys.stderr, line[:-1] 
  71   
 73      import Core.Split as Split 
 74      files = os.listdir(path) 
 75      docNumbers = set() 
 76      for file in files: 
 77          numPart = file.split(".",1)[0] 
 78          if numPart.isdigit(): 
 79              docNumbers.add(int(numPart)) 
 80      docNumbers = list(docNumbers) 
 81      folds = Split.getFolds(len(docNumbers), folds, seed) 
 82      foldByDocNumber = {} 
 83      for i in range(len(docNumbers)): 
 84          foldByDocNumber[docNumbers[i]] = folds[i] 
 85      return foldByDocNumber 
  86   
 88      files = os.listdir(path) 
 89      for file in files: 
 90          numPart = file.split(".",1)[0] 
 91          if numPart.isdigit(): 
 92              numPart = int(numPart) 
 93              assert folds.has_key(numPart) 
 94              if folds[numPart] == foldToRemove: 
 95                  os.remove(os.path.join(path, file)) 
  96   
 98      results = [] 
 99      for i in range(folds): 
100          results.append( evaluate(sourceDir, task, folds, i) ) 
101      print >> sys.stderr, "##### Variance estimation results #####" 
102      for r in results: 
103          print >> sys.stderr, r["approximate"]["ALL-TOTAL"] 
 104   
106      goldDocIds = set() 
107      for filename in os.listdir(goldDir): 
108          if filename[-4:] == ".txt": 
109              goldDocIds.add(filename.split(".", 1)[0]) 
110      for filename in os.listdir(sourceDir): 
111          if filename.find(".a2") != -1: 
112              if filename.split(".", 1)[0] in goldDocIds: 
113                  return True 
114      return False 
 115   
116   
117   
118   
119   
120   
121   
122   
123   
124   
125   
126   
128      if task in ["GE", "GE09"]: 
129          path = ["approximate", "ALL-TOTAL", "fscore"] 
130      elif task in ["EPI", "ID", "REN"]: 
131          path = ["TOTAL", "fscore"] 
132      elif task in ["BB", "BI"]: 
133          path = ["fscore"] 
134      elif task == "CO": 
135          path = ["MENTION LINKING", "fscore"] 
136      else: 
137          assert False 
138       
139      current = results 
140      for step in path: 
141          if step in current: 
142              current = current[step] 
143          else: 
144              return -1 
145      return current 
 146   
147 -def evaluate(source, task, goldDir=None, debug=False): 
 148      print >> sys.stderr, "BioNLP'11 task", task, "devel evaluation" 
149       
150      subTask = "1" 
151      if "." in task: 
152          task, subTask = task.split(".") 
153       
154      if task in ["GE", "GE09"]: 
155          results = evaluateGE(source, task, subTask, goldDir=goldDir, debug=debug) 
156      elif task in ["EPI", "ID"]: 
157          results = evaluateEPIorID(task, source, goldDir) 
158      elif task == "REN": 
159          results = evaluateREN(source, goldDir) 
160      elif task in ["BB", "BI"]: 
161          results = evaluateBX(task, source, goldDir) 
162      elif task == "CO": 
163          results = evaluateCO(source, goldDir) 
164      else: 
165          results = None 
166          print >> sys.stderr, "No BioNLP'11 evaluator for task", task 
167       
168      if results == None: 
169          return None 
170      return (getFScore(results, task), results) 
 171   
173       
174      if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): 
175          print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" 
176          evaluatorDir = None 
177      else: 
178          evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) 
179       
180      tempdir = None 
181      if sourceDir.endswith(".tar.gz"): 
182          tempdir = tempfile.mkdtemp() 
183          Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) 
184          sourceDir = os.path.join(tempdir, "source") 
185      elif corpus == "GE09":  
186          tempdir = tempfile.mkdtemp() 
187          shutil.copytree(sourceDir, os.path.join(tempdir, "source")) 
188          sourceDir = os.path.join(tempdir, "source") 
189       
190      if goldDir == None: 
191          if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): 
192              print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" 
193              return evaluatorDir, None 
194          goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"]) 
195      if not os.path.exists(goldDir): 
196          print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" 
197          goldDir = None 
198      if goldDir != None and goldDir.endswith(".tar.gz"): 
199          if tempdir == None: 
200              tempdir = tempfile.mkdtemp() 
201          goldDir = Download.getTopDir(os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) 
202          print >> sys.stderr, "Uncompressed evaluation gold to", goldDir 
203      if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): 
204          print >> sys.stderr, "Evaluation input has no gold documents" 
205          goldDir = None 
206       
207      sourceDir = os.path.abspath(sourceDir) 
208      if evaluatorDir != None: 
209          evaluatorDir = os.path.abspath(evaluatorDir) 
210      if goldDir != None: 
211          goldDir = os.path.abspath(goldDir) 
212      if tempdir != None: 
213          tempdir = os.path.abspath(tempdir) 
214      return evaluatorDir, sourceDir, goldDir, tempdir 
 215   
216 -def evaluateGE(sourceDir, mainTask="GE", task=1, goldDir=None, folds=-1, foldToRemove=-1, evaluations=["strict", "approximate", "decomposition"], verbose=True, silent=False, debug=False): 
 217      task = str(task) 
218      assert mainTask in ["GE", "GE09"], mainTask 
219      assert task in ["1","2","3"], task 
220      if not silent: 
221          print >> sys.stderr, mainTask, "task", task, "evaluation of", sourceDir, "against", goldDir 
222      if mainTask == "GE": 
223          evaluatorDir, sourceDir, goldDir, tempDir = checkEvaluator("GE", sourceDir, goldDir) 
224          taskSuffix = ".a2" 
225      else: 
226          evaluatorDir, sourceDir, goldDir, tempDir = checkEvaluator("GE09", sourceDir, goldDir) 
227           
228          taskSuffix = ".a2.t1" 
229          for filename in os.listdir(sourceDir): 
230              if filename.endswith(".a2"): 
231                  if task == 1: 
232                      taskSuffix = ".a2.t1" 
233                  elif task == 2: 
234                      taskSuffix = ".a2.t12" 
235                  else: 
236                      taskSuffix = ".a2.t123" 
237                  shutil.move(os.path.join(sourceDir, filename), os.path.join(sourceDir, filename.rsplit(".", 1)[0] + taskSuffix)) 
238      if goldDir == None: 
239          return None 
240       
241      origDir = os.getcwd() 
242      os.chdir(evaluatorDir) 
243      if tempDir == None: 
244          tempDir = tempfile.mkdtemp() 
245      if folds != -1: 
246          folds = getFolds(sourceDir, folds) 
247          sourceSubsetDir = tempDir + "/source-subset" 
248          if os.path.exists(sourceSubsetDir): 
249              shutil.rmtree(sourceSubsetDir) 
250          shutil.copytree(sourceDir, sourceSubsetDir) 
251          removeDocuments(sourceSubsetDir, folds, foldToRemove) 
252      else: 
253          sourceSubsetDir = sourceDir 
254       
255      results = {} 
256       
257       
258      if mainTask == "GE09": 
259          preparedGoldDir = os.path.join(tempDir, "prepared-gold") 
260          commands = "perl prepare-gold.pl " + goldDir + " " + preparedGoldDir 
261          p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
262          if verbose and not silent: 
263              printLines(p.stderr.readlines()) 
264              printLines(p.stdout.readlines()) 
265          else:  
266              p.stderr.readlines() 
267              p.stdout.readlines() 
268          goldDir = preparedGoldDir 
269       
270       
271      outDir = tempDir + "/output" 
272      if mainTask == "GE": 
273          commands = "perl a2-normalize.pl -g " + goldDir 
274          commands += " -o " + outDir 
275          commands += " " + sourceSubsetDir + "/*" + taskSuffix  
276      else: 
277          commands = "perl prepare-eval.pl -g " + goldDir 
278          commands += " " + sourceSubsetDir + " " + outDir 
279      p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
280      if verbose and not silent: 
281          printLines(p.stderr.readlines()) 
282          printLines(p.stdout.readlines()) 
283      else:  
284          p.stderr.readlines() 
285          p.stdout.readlines() 
286                   
287      if "strict" in evaluations: 
288           
289          commands = "perl a2-evaluate.pl"  
290          if mainTask == "GE": commands += " -t " + str(task) 
291          if debug: commands += " -v -d" 
292          commands += " -g " + goldDir + " " + outDir + "/*" + taskSuffix  
293          p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
294          stderrLines = p.stderr.readlines() 
295          stdoutLines = p.stdout.readlines() 
296          if not silent: 
297              printLines(stderrLines) 
298              print >> sys.stderr, "##### strict evaluation mode #####" 
299              printLines(stdoutLines) 
300          results["strict"] = parseResults(stdoutLines) 
301       
302      if "approximate" in evaluations: 
303          if not silent: 
304              print >> sys.stderr, "##### approximate span and recursive mode #####" 
305           
306          commands = "perl a2-evaluate.pl" 
307          if mainTask == "GE": commands += " -t " + str(task) 
308          if debug: commands += " -v -d" 
309          commands += " -g " + goldDir + " -sp " + outDir + "/*" + taskSuffix  
310          p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
311          stderrLines = p.stderr.readlines() 
312          stdoutLines = p.stdout.readlines() 
313          if not silent: 
314              printLines(stderrLines) 
315              printLines(stdoutLines) 
316          results["approximate"] = parseResults(stdoutLines) 
317   
318      if "decomposition" in evaluations: 
319          if not silent: 
320              print >> sys.stderr, "##### event decomposition in the approximate span mode #####" 
321           
322          commands = "perl a2-evaluate.pl" 
323          if mainTask == "GE": commands += " -t " + str(task) 
324          if debug: commands += " -v -d" 
325          commands += " -g " + goldDir + " -sp " + outDir + "/*" + taskSuffix  
326          p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
327          stderrLines = p.stderr.readlines() 
328          stdoutLines = p.stdout.readlines() 
329          if not silent: 
330              printLines(stderrLines) 
331              printLines(stdoutLines) 
332          results["decomposition"] = parseResults(stdoutLines) 
333       
334      if not debug: 
335          shutil.rmtree(tempDir) 
336      else: 
337          print >> sys.stderr, "Temporary directory left at", tempDir 
338       
339       
340      os.chdir(origDir) 
341      return results 
 342   
344   
345   
346      for line in lines: 
347          print >> sys.stderr, line[:-1] 
 348       
349   
350   
351   
352   
353   
354   
355   
356   
357   
358   
359   
360   
361   
362   
363 -def evaluateBX(corpusName, sourceDir, goldDir=None, silent=False): 
 364      assert corpusName in ["BI", "BB"], corpusName 
365      evaluatorDir, sourceDir, goldDir, tempDir = checkEvaluator(corpusName, sourceDir, goldDir) 
366      if goldDir == None: 
367          return None 
368       
369      if corpusName == "BI": 
370          commands = "java -jar " + evaluatorDir + "/BioNLP-ST_2011_bacteria_interactions_evaluation_software.jar " + goldDir + " " + sourceDir 
371      elif corpusName == "BB": 
372          commands = "java -jar " + evaluatorDir + "/BioNLP-ST_2011_Bacteria_Biotopes_evaluation_software.jar " + goldDir + " " + sourceDir 
373      else: 
374          assert False, corpusName 
375   
376      p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
377      stderrLines = p.stderr.readlines() 
378      stdoutLines = p.stdout.readlines() 
379      if not silent: 
380          printLinesBX(stderrLines) 
381          printLinesBX(stdoutLines) 
382       
383      results = {} 
384      if corpusName == "BI": 
385          category = None 
386          for line in stdoutLines: 
387              if ":" in line: 
388                  category = line.split(":")[0].strip() 
389              if category == "Global scores" and line.startswith("    "): 
390                  key, value = line.strip().split("=") 
391                  key = key.strip() 
392                  value = value.strip() 
393                  assert key not in results 
394                  if key == "f-score": 
395                      key = "fscore" 
396                  if value == "NaN": 
397                      results[key] = 0.0 
398                  else: 
399                      results[key] = float(value) 
400      elif corpusName == "BB": 
401          for line in stdoutLines: 
402              key, value = line.strip().split("=") 
403              key = key.strip() 
404              value = value.strip() 
405              assert key not in results 
406              if key == "F-score": 
407                  key = "fscore" 
408              if value == "NaN": 
409                  results[key] = 0.0 
410              else: 
411                  results[key] = float(value) 
412      if tempDir != None:  
413          shutil.rmtree(tempDir) 
414      return results 
 415        
417      assert corpus in ["EPI", "ID"], corpus 
418      evaluatorDir, sourceDir, goldDir, tempDir = checkEvaluator(corpus, sourceDir, goldDir) 
419      if goldDir == None: 
420          return None 
421      commands = "cd " + evaluatorDir 
422      commands += " ; " + "python evaluation.py -s -p -r " + goldDir + " " + sourceDir + "/*.a2" 
423      p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
424      stderrLines = p.stderr.readlines() 
425      stdoutLines = p.stdout.readlines() 
426      if not silent: 
427          for line in stderrLines: 
428              print >> sys.stderr, line, 
429          for line in stdoutLines: 
430              print >> sys.stderr, line, 
431          print >> sys.stderr 
432      for line in stderrLines + stdoutLines: 
433          if "No such file or directory" in line: 
434              return None 
435      if tempDir != None:  
436          shutil.rmtree(tempDir) 
437      return parseResults(stdoutLines) 
 438   
439 -def evaluateREN(sourceDir, goldDir=None, silent=False): 
 440      evaluatorDir, sourceDir, goldDir, tempDir = checkEvaluator("REN", sourceDir, goldDir) 
441      if goldDir == None: 
442          return None 
443      commands = "cd " + evaluatorDir 
444      commands += " ; " + "java -jar eval_rename.jar " + goldDir + " " + sourceDir 
445      p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
446      stderrLines = p.stderr.readlines() 
447      stdoutLines = p.stdout.readlines() 
448      if not silent: 
449          for line in stderrLines: 
450              print >> sys.stderr, line, 
451          for line in stdoutLines: 
452              print >> sys.stderr, line, 
453          print >> sys.stderr 
454      results = {} 
455      for line in stdoutLines: 
456          category, value = line.strip().split(":") 
457          value = value.strip() 
458          if value == "NaN": 
459              value = 0.0 
460          elif "." in value: 
461              value = float(value) 
462          else: 
463              value = int(value) 
464          results[category.strip()] = value 
465      if tempDir != None:  
466          shutil.rmtree(tempDir) 
467      return results 
 468   
469 -def evaluateCO(sourceDir, goldDir=None, silent=False): 
 470      evaluatorDir, sourceDir, goldDir, tempDir = checkEvaluator("CO", sourceDir, goldDir) 
471      if goldDir == None: 
472          return None 
473       
474      if tempDir == None: 
475          tempDir = tempfile.mkdtemp() 
476      resultDir = os.path.join(tempDir, "result") 
477      os.makedirs(resultDir) 
478      commands = "cd " + evaluatorDir 
479      commands += " ; " + "java -jar CRScorer.jar " + goldDir + " " + sourceDir + " " + resultDir 
480      p = subprocess.Popen(commands, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
481      stderrLines = p.stderr.readlines() 
482      stdoutLines = p.stdout.readlines() 
483      if not silent: 
484          for i in range(len(stdoutLines)): 
485               
486              if (not stdoutLines[i].strip().endswith("...")) or (i < len(stdoutLines) - 1 and not stdoutLines[i+1].strip().endswith("...")): 
487                  print >> sys.stderr, stdoutLines[i], 
488          for line in stderrLines: 
489              print >> sys.stderr, line, 
490          print >> sys.stderr 
491       
492      f = open(os.path.join(resultDir, "eval.results"), "rt") 
493      resultLines = f.readlines() 
494      f.close() 
495      results = {"MENTION DETECTION":{}, "MENTION LINKING":{}} 
496      currentBlock = None 
497      for line in resultLines: 
498          line = line.replace("\t", " ") 
499          print >> sys.stderr, line.rstrip() 
500          if line[0] == "*": 
501              continue 
502          if "EVALUATION OF MENTION DETECTION" in line: 
503              currentBlock = results["MENTION DETECTION"] 
504          elif "EVALUATION OF MENTION LINKING" in line: 
505              currentBlock = results["MENTION LINKING"] 
506          elif ":" in line: 
507              name, value = line.split(":") 
508              name = name.strip() 
509              value = int(value) 
510              currentBlock[name] = value 
511          elif line[0] == "P": 
512              splits = line.split() 
513              assert splits[0] == "P" and splits[1] == "=" and splits[3] == "R" and splits[4] == "=" and splits[6] == "F" and splits[7] == "=", line 
514              currentBlock["precision"] = float(splits[2]) 
515              currentBlock["recall"] = float(splits[5]) 
516              currentBlock["fscore"] = float(splits[8]) 
517       
518      if tempDir != None:  
519          shutil.rmtree(tempDir) 
520      return results 
 521   
522  if __name__=="__main__": 
523       
524      try: 
525          import psyco 
526          psyco.full() 
527          print >> sys.stderr, "Found Psyco, using" 
528      except ImportError: 
529          print >> sys.stderr, "Psyco not installed" 
530       
531      from optparse import OptionParser 
532      optparser = OptionParser(description="Evaluate BioNLP Shared Task predictions") 
533      optparser.add_option("-i", "--input", default=None, dest="input", help="input directory with predicted shared task files", metavar="FILE") 
534      optparser.add_option("-g", "--gold", default=None, dest="gold", help="optional gold directory (default is the task development set)", metavar="FILE") 
535      optparser.add_option("-t", "--task", default="GE.2", dest="task", help="") 
536      optparser.add_option("-v", "--variance", default=0, type="int", dest="variance", help="variance folds") 
537      optparser.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="debug") 
538      optparser.add_option("--install", default=None, dest="install", help="Install directory (or DEFAULT)") 
539      (options, args) = optparser.parse_args() 
540       
541       
542      if options.install == None: 
543          assert(options.input != None) 
544          evalResult = evaluate(options.input, options.task, options.gold, debug=options.debug) 
545          if options.debug: 
546              print >> sys.stderr, "evaluate output:", evalResult 
547      else: 
548          downloadDir = None 
549          destDir = None 
550          if options.install != "DEFAULT": 
551              if "," in options.install: 
552                  destDir, downloadDir = options.install.split(",") 
553              else: 
554                  destDir = options.install 
555          settings = install(destDir, downloadDir) 
556          for key in sorted(settings.keys()): 
557              print key + "=\"" + str(settings[key]) + "\"" 
558       
559   
560   
561   
562   
563   
564   
565   
566   
567   
568   
569   
570