1  """ 
  2  For two-class classification 
  3  """ 
  4  import Evaluator 
  5  import itertools 
  6  import sys, os 
  7  import types 
  8  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..") 
  9  from Core.IdSet import IdSet 
 10  import Core.ExampleUtils as ExampleUtils 
 13 -    def __init__(self, examples=None, predictions=None, classSet=None): 
  14          if type(classSet) == types.StringType:  
 15              classSet = IdSet(filename=classSet) 
 16          if type(predictions) == types.StringType:  
 17              predictions = ExampleUtils.loadPredictions(predictions) 
 18          if type(examples) == types.StringType:  
 19              examples = ExampleUtils.readExamples(examples, False) 
 20           
 21           
 22          self.truePositives = 0 
 23          self.falsePositives = 0 
 24          self.trueNegatives = 0 
 25          self.falseNegatives = 0 
 26          self.precision = None 
 27          self.recall = None 
 28          self.fScore = None 
 29          self.AUC = None 
 30          self.type = "binary" 
 31          if predictions != None: 
 32              self._calculate(examples, predictions) 
  33       
 34      @classmethod 
 35 -    def evaluate(cls, examples, predictions, classSet=None, outputFile=None): 
  36          """ 
 37          Enables using this class without having to manually instantiate it 
 38          """ 
 39          evaluator = cls(examples, predictions, classSet) 
 40          print >> sys.stderr, evaluator.toStringConcise() 
 41          if outputFile != None: 
 42              evaluator.saveCSV(outputFile) 
 43          return evaluator 
  44       
 46          if self.fScore > evaluation.fScore: 
 47              return 1 
 48          elif self.fScore == evaluation.fScore: 
 49              return 0 
 50          else: 
 51              return -1 
  52       
 54          averageEvaluator = BinaryEvaluator(None) 
 55          averageEvaluator.precision = 0 
 56          averageEvaluator.recall = 0 
 57          averageEvaluator.fScore = 0 
 58          averageEvaluator.AUC = 0 
 59          averageEvaluator.truePositives = "-" 
 60          averageEvaluator.falsePositives = "-" 
 61          averageEvaluator.trueNegatives = "-" 
 62          averageEvaluator.falseNegatives = "-" 
 63          sumWeight = 0.0 
 64          for evaluator in evaluators: 
 65              assert(isinstance(evaluator,BinaryEvaluator)) 
 66              weight = float(len(evaluator.predictions)) 
 67              sumWeight += weight 
 68              averageEvaluator.precision += weight * evaluator.precision 
 69              averageEvaluator.recall += weight * evaluator.recall 
 70              averageEvaluator.fScore += weight * evaluator.fScore 
 71              if evaluator.AUC != None: 
 72                  averageEvaluator.AUC += weight * evaluator.AUC 
 73          if averageEvaluator.AUC > 0: 
 74              averageEvaluator.AUC /= sumWeight 
 75          else: 
 76              averageEvaluator.AUC = None 
 77          if sumWeight > 0: 
 78              averageEvaluator.precision /= sumWeight 
 79              averageEvaluator.recall /= sumWeight 
 80              averageEvaluator.fScore /= sumWeight 
 81          return averageEvaluator 
  82      average = staticmethod(average) 
 83       
 84 -    def pool(evaluators): 
  85          predictions = [] 
 86          for evaluator in evaluators: 
 87              assert(isinstance(evaluator,BinaryEvaluator)) 
 88              predictions.extend(evaluator.predictions) 
 89          return BinaryEvaluator(predictions) 
  90      pool = staticmethod(pool)       
 91       
 93          numPositiveExamples = 0 
 94          numNegativeExamples = 0 
 95          predictionsForPositives = [] 
 96          predictionsForNegatives = [] 
 97          for example, prediction in itertools.izip(examples, predictions): 
 98              trueClass = example[1]  
 99              predictedClass = prediction[0]  
100              if trueClass > 0: 
101                  numPositiveExamples += 1 
102                  if predictedClass > 0: 
103                      predictionsForPositives.append(1) 
104                  else: 
105                      predictionsForPositives.append(0) 
106              else: 
107                  numNegativeExamples += 1 
108                  if predictedClass > 0: 
109                      predictionsForNegatives.append(1) 
110                  else: 
111                      predictionsForNegatives.append(0) 
112          auc = 0 
113          for i in predictionsForPositives: 
114             for j in predictionsForNegatives: 
115                 if i > j: 
116                     auc += 1. 
117                 elif i == j: 
118                     auc += 0.5 
119          if numPositiveExamples * numNegativeExamples > 0: 
120              auc /= float(numPositiveExamples * numNegativeExamples) 
121          else: 
122              auc = 0 
123          return auc 
 124       
126           
127           
128          self.classifications = [] 
129           
130          for example, prediction in itertools.izip(examples, predictions): 
131              trueClass = example[1]  
132              predictedClass = prediction[0]  
133              if trueClass > 0: 
134                  if predictedClass > 0:  
135                      self.truePositives += 1 
136                      self.classifications.append((prediction[0],"tp",self.type)) 
137                  else:  
138                      self.falseNegatives += 1 
139                      self.classifications.append((prediction[0],"fn",self.type)) 
140              else: 
141                  if predictedClass > 0:  
142                      self.falsePositives += 1 
143                      self.classifications.append((prediction[0],"fp",self.type)) 
144                  else:  
145                      self.trueNegatives += 1 
146                      self.classifications.append((prediction[0],"tn",self.type)) 
147           
148          totalPositives = float(self.truePositives + self.falsePositives) 
149          if totalPositives > 0.0: 
150              self.precision = float(self.truePositives) / totalPositives 
151          else: 
152              self.precision = 0.0 
153          realPositives = float(self.truePositives + self.falseNegatives) 
154          if realPositives > 0.0: 
155              self.recall = float(self.truePositives) / realPositives 
156          else: 
157              self.recall = 0.0 
158          if self.precision + self.recall > 0.0: 
159              self.fScore = (2*self.precision*self.recall) / (self.precision + self.recall) 
160          else: 
161              self.fScore = 0.0 
162           
163          self.AUC = self.__calculateAUC(examples, predictions) 
 164       
166          if title != None: 
167              string = indent + title + ": " 
168          else: 
169              string = indent 
170          string += "p/n:" + str(self.truePositives+self.falseNegatives) + "/" + str(self.trueNegatives+self.falsePositives) 
171          string += " tp/fp|tn/fn:" + str(self.truePositives) + "/" + str(self.falsePositives) + "|" + str(self.trueNegatives) + "/" + str(self.falseNegatives) 
172          string += " p/r/f:" + str(self.precision)[0:6] + "/" + str(self.recall)[0:6] + "/" + str(self.fScore)[0:6]             
173          if self.AUC != None: 
174              string += " a:" + str(self.AUC)[0:6] 
175          else: 
176              string += " a:N/A" 
177          return string 
 178       
179   
180   
181   
182   
183   
184   
185   
186   
187   
188   
189   
190   
191   
192   
193   
194   
195   
196   
197   
198       
200          dict = {} 
201          dict["positives"] = self.truePositives+self.falseNegatives 
202          dict["negatives"] = self.trueNegatives+self.falsePositives 
203          dict["true positives"] = self.truePositives 
204          dict["false positives"] = self.falsePositives 
205          dict["true negatives"] = self.trueNegatives 
206          dict["false negatives"] = self.falseNegatives  
207          dict["precision"] = self.precision 
208          dict["recall"] = self.recall 
209          dict["f-score"] = self.fScore 
210          if self.AUC != None: 
211              dict["AUC"] = self.AUC 
212          else: 
213              dict["AUC"] = "N/A" 
214          return dict 
  215