TEES.Evaluators.BinaryEvaluator

13 - def __init__(self, examples=None, predictions=None, classSet=None):

14 if type(classSet) == types.StringType: # class names are in file 15 classSet = IdSet(filename=classSet) 16 if type(predictions) == types.StringType: # predictions are in file 17 predictions = ExampleUtils.loadPredictions(predictions) 18 if type(examples) == types.StringType: # examples are in file 19 examples = ExampleUtils.readExamples(examples, False) 20 #self.examples = examples 21 #self.predictions = predictions 22 self.truePositives = 0 23 self.falsePositives = 0 24 self.trueNegatives = 0 25 self.falseNegatives = 0 26 self.precision = None 27 self.recall = None 28 self.fScore = None 29 self.AUC = None 30 self.type = "binary" 31 if predictions != None: 32 self._calculate(examples, predictions)

33 34 @classmethod

35 - def evaluate(cls, examples, predictions, classSet=None, outputFile=None):

36 """ 37 Enables using this class without having to manually instantiate it 38 """ 39 evaluator = cls(examples, predictions, classSet) 40 print >> sys.stderr, evaluator.toStringConcise() 41 if outputFile != None: 42 evaluator.saveCSV(outputFile) 43 return evaluator

44

45 - def compare(self, evaluation):

46 if self.fScore > evaluation.fScore: 47 return 1 48 elif self.fScore == evaluation.fScore: 49 return 0 50 else: 51 return -1

52

53 - def average(evaluators):

54 averageEvaluator = BinaryEvaluator(None) 55 averageEvaluator.precision = 0 56 averageEvaluator.recall = 0 57 averageEvaluator.fScore = 0 58 averageEvaluator.AUC = 0 59 averageEvaluator.truePositives = "-" 60 averageEvaluator.falsePositives = "-" 61 averageEvaluator.trueNegatives = "-" 62 averageEvaluator.falseNegatives = "-" 63 sumWeight = 0.0 64 for evaluator in evaluators: 65 assert(isinstance(evaluator,BinaryEvaluator)) 66 weight = float(len(evaluator.predictions)) 67 sumWeight += weight 68 averageEvaluator.precision += weight * evaluator.precision 69 averageEvaluator.recall += weight * evaluator.recall 70 averageEvaluator.fScore += weight * evaluator.fScore 71 if evaluator.AUC != None: 72 averageEvaluator.AUC += weight * evaluator.AUC 73 if averageEvaluator.AUC > 0: 74 averageEvaluator.AUC /= sumWeight 75 else: 76 averageEvaluator.AUC = None 77 if sumWeight > 0: 78 averageEvaluator.precision /= sumWeight 79 averageEvaluator.recall /= sumWeight 80 averageEvaluator.fScore /= sumWeight 81 return averageEvaluator

82 average = staticmethod(average) 83

84 - def pool(evaluators):

85 predictions = [] 86 for evaluator in evaluators: 87 assert(isinstance(evaluator,BinaryEvaluator)) 88 predictions.extend(evaluator.predictions) 89 return BinaryEvaluator(predictions)

90 pool = staticmethod(pool) 91

92 - def __calculateAUC(self, examples, predictions):

93 numPositiveExamples = 0 94 numNegativeExamples = 0 95 predictionsForPositives = [] 96 predictionsForNegatives = [] 97 for example, prediction in itertools.izip(examples, predictions): 98 trueClass = example[1] #prediction[0][1] 99 predictedClass = prediction[0] #prediction[1] 100 if trueClass > 0: 101 numPositiveExamples += 1 102 if predictedClass > 0: 103 predictionsForPositives.append(1) 104 else: 105 predictionsForPositives.append(0) 106 else: 107 numNegativeExamples += 1 108 if predictedClass > 0: 109 predictionsForNegatives.append(1) 110 else: 111 predictionsForNegatives.append(0) 112 auc = 0 113 for i in predictionsForPositives: 114 for j in predictionsForNegatives: 115 if i > j: 116 auc += 1. 117 elif i == j: 118 auc += 0.5 119 if numPositiveExamples * numNegativeExamples > 0: 120 auc /= float(numPositiveExamples * numNegativeExamples) 121 else: 122 auc = 0 123 return auc

124

125 - def _calculate(self, examples, predictions):

126 # First count instances 127 #print predictions 128 self.classifications = [] 129 #assert len(examples) == len(predictions), (len(examples), len(predictions)) 130 for example, prediction in itertools.izip(examples, predictions): 131 trueClass = example[1] #prediction[0][1] 132 predictedClass = prediction[0] #prediction[1] 133 if trueClass > 0: 134 if predictedClass > 0: # 1,1 135 self.truePositives += 1 136 self.classifications.append((prediction[0],"tp",self.type)) 137 else: # 1,-1 138 self.falseNegatives += 1 139 self.classifications.append((prediction[0],"fn",self.type)) 140 else: 141 if predictedClass > 0: # -1,1 142 self.falsePositives += 1 143 self.classifications.append((prediction[0],"fp",self.type)) 144 else: # -1,-1 145 self.trueNegatives += 1 146 self.classifications.append((prediction[0],"tn",self.type)) 147 # Then calculate statistics 148 totalPositives = float(self.truePositives + self.falsePositives) 149 if totalPositives > 0.0: 150 self.precision = float(self.truePositives) / totalPositives 151 else: 152 self.precision = 0.0 153 realPositives = float(self.truePositives + self.falseNegatives) 154 if realPositives > 0.0: 155 self.recall = float(self.truePositives) / realPositives 156 else: 157 self.recall = 0.0 158 if self.precision + self.recall > 0.0: 159 self.fScore = (2*self.precision*self.recall) / (self.precision + self.recall) 160 else: 161 self.fScore = 0.0 162 163 self.AUC = self.__calculateAUC(examples, predictions)

164

165 - def toStringConcise(self, indent="", title=None):

166 if title != None: 167 string = indent + title + ": " 168 else: 169 string = indent 170 string += "p/n:" + str(self.truePositives+self.falseNegatives) + "/" + str(self.trueNegatives+self.falsePositives) 171 string += " tp/fp|tn/fn:" + str(self.truePositives) + "/" + str(self.falsePositives) + "|" + str(self.trueNegatives) + "/" + str(self.falseNegatives) 172 string += " p/r/f:" + str(self.precision)[0:6] + "/" + str(self.recall)[0:6] + "/" + str(self.fScore)[0:6] 173 if self.AUC != None: 174 string += " a:" + str(self.AUC)[0:6] 175 else: 176 string += " a:N/A" 177 return string

178 179 # def saveCSV(self, filename, fold=None): 180 ## import csv 181 ## csvFile = open(filename, "wb") 182 ## writer = csv.writer(csvFile) 183 ## writer.writerow(["positives","negatives","true positives","false positives","true negatives","false negatives","precision","recall","f-score","AUC"]) 184 ## values = [self.truePositives+self.falseNegatives,self.trueNegatives+self.falsePositives,self.truePositives,self.falsePositives,self.trueNegatives,self.falseNegatives,self.precision,self.recall,self.fScore] 185 ## if self.AUC != None: 186 ## values.append(self.AUC) 187 ## else: 188 ## values.append("N/A") 189 ## writer.writerow(values) 190 ## csvFile.close() 191 # import sys 192 # sys.path.append("..") 193 # import Utils.TableUtils as TableUtils 194 # dicts = self.toDict() 195 # if fold != None: 196 # dicts[0]["fold"] = fold 197 # TableUtils.addToCSV(dicts, filename, Evaluator.g_evaluatorFieldnames) 198

199 - def toDict(self):

200 dict = {} 201 dict["positives"] = self.truePositives+self.falseNegatives 202 dict["negatives"] = self.trueNegatives+self.falsePositives 203 dict["true positives"] = self.truePositives 204 dict["false positives"] = self.falsePositives 205 dict["true negatives"] = self.trueNegatives 206 dict["false negatives"] = self.falseNegatives 207 dict["precision"] = self.precision 208 dict["recall"] = self.recall 209 dict["f-score"] = self.fScore 210 if self.AUC != None: 211 dict["AUC"] = self.AUC 212 else: 213 dict["AUC"] = "N/A" 214 return dict

Source Code for Module TEES.Evaluators.BinaryEvaluator