1  from collections import defaultdict 
  2  from STTools import * 
  3   
  5      counts = defaultdict(int) 
  6      counts["modifier"] = 0 
  7      counts["modifier(spec)"] = 0 
  8      counts["modifier(neg)"] = 0 
  9      for doc in documents: 
 10          if doc.proteins != None: 
 11              for protein in doc.proteins:  
 12                  counts["protein("+protein.type+")"] += 1 
 13                  counts["protein"] += 1 
 14          if doc.triggers != None: 
 15              for trigger in doc.triggers:  
 16                  counts["trigger("+trigger.type+")"] += 1 
 17                  counts["trigger"] += 1 
 18          if doc.events != None: 
 19              for event in doc.events:  
 20                  counts["event("+event.type+")"] += 1 
 21                  counts["event"] += 1 
 22                  if event.speculation != None: 
 23                      counts["modifier"] += 1 
 24                      counts["modifier(spec)"] += 1 
 25                  if event.negation != None: 
 26                      counts["modifier"] += 1 
 27                      counts["modifier(neg)"] += 1 
 28          if doc.relations != None: 
 29              for relation in doc.relations:  
 30                  counts["relation("+relation.type+")"] += 1 
 31                  counts["relation"] += 1 
 32          for ann in doc.events + doc.relations: 
 33              for arg in ann.arguments: 
 34                  counts["arg("+arg[0]+")"] += 1 
 35                  counts["arg"] += 1 
 36      return counts 
  37   
 39      print >> sys.stderr, "Comparing BioNLP Shared Task format document sets" 
 40      print "Loading set A:", a 
 41      docsA = loadSet(a, a2Tag=a2Tag) 
 42      print "Loading set B:", b 
 43      docsB = loadSet(b, a2Tag=a2Tag) 
 44      countsA = getCounts(docsA) 
 45      countsB = getCounts(docsB) 
 46      allKeys = list(set(countsA.keys() + countsB.keys())) 
 47      allKeys.sort() 
 48      maxKeyLength = max([len(x) for x in allKeys]) 
 49       
 50      print "Sets" 
 51      print "A:", a, "(documents: " + str(len(docsA)) + ")" 
 52      print "B:", b, "(documents: " + str(len(docsB)) + ")" 
 53       
 54      titleLine = "Category" 
 55      while len(titleLine) <= maxKeyLength: 
 56          titleLine += " " 
 57      titleLine += "A" 
 58      titleLine += " " * 9 
 59      titleLine += "B" 
 60      titleLine += " " * 9 
 61      titleLine += "Diff" 
 62      titleLine += " " * 6 
 63      titleLine += "Status" 
 64      print titleLine 
 65       
 66      for key in allKeys: 
 67          line = key 
 68          while len(line) <= maxKeyLength: 
 69              line += " " 
 70          valA = (countsA[key] / float(len(docsA))) 
 71          line += "%.2f" % valA 
 72          while len(line) <= maxKeyLength + 10: 
 73              line += " " 
 74          valB = (countsB[key] / float(len(docsB))) 
 75          line += "%.2f" % valB 
 76           
 77          while len(line) <= maxKeyLength + 20: 
 78              line += " " 
 79          if valA == 0 or valB == 0: 
 80              diff = None 
 81              line += "N/A" 
 82          else: 
 83              diff = valA / valB 
 84              line += "%.2f" % diff 
 85           
 86          while len(line) <= maxKeyLength + 30: 
 87              line += " " 
 88          if diff != None: 
 89              dist = abs(1.0 - diff) 
 90              maxCount = 30 
 91              step = 0.01 
 92              count = 0 
 93              while dist > 0: 
 94                  dist -= step 
 95                  count += 1 
 96                  line += "!" 
 97                  if count >= maxCount: 
 98                      line += "+" 
 99                      break 
100          else: 
101              line += "-" 
102           
103           
104           
105           
106           
107          print line 
 108   
109  if __name__=="__main__": 
110      import sys 
111       
112      from optparse import OptionParser 
113       
114      try: 
115          import psyco 
116          psyco.full() 
117          print >> sys.stderr, "Found Psyco, using" 
118      except ImportError: 
119          print >> sys.stderr, "Psyco not installed" 
120   
121      optparser = OptionParser(description="Compare event distribution") 
122      optparser.add_option("-a", "--inputA", default=None, dest="inputA", help="", metavar="FILE") 
123      optparser.add_option("-b", "--inputB", default=None, dest="inputB", help="") 
124      optparser.add_option("-t", "--a2Tag", default="a2", dest="a2Tag", help="") 
125       
126      (options, args) = optparser.parse_args() 
127       
128      compare(options.inputA, options.inputB, a2Tag=options.a2Tag) 
129