Package TEES :: Package Utils :: Module CompareExamples
[hide private]

Source Code for Module TEES.Utils.CompareExamples

  1  import itertools 
  2  import sys, os 
  3  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..") 
  4  import Core.ExampleUtils as ExampleUtils 
  5  from Core.IdSet import IdSet 
  6  from Utils.ProgressCounter import ProgressCounter  
  7   
8 -def removeFeatures(example):
9 return [example[0], example[1], {}, example[3]]
10
11 -def getFeatureNames(example, featureIds):
12 names = [] 13 for feature in sorted(example[2].keys()): 14 name = featureIds.getName(feature) 15 assert name != None 16 names.append(name) 17 return names
18
19 -def compareExamples(examples1, examples2, features1, features2=None):
20 ExampleUtils.readExamples(examples1) 21 exampleIter1 = ExampleUtils.readExamples(examples1) 22 exampleIter2 = ExampleUtils.readExamples(examples2) 23 features1 = IdSet(filename=features1) 24 if features2 != None: 25 features2 = IdSet(filename=features2) 26 else: 27 features2 = features1 28 # Compare feature sets 29 if set(features1.Ids.keys()) != set(features2.Ids.keys()): 30 print "Feature sets differ" 31 # Compare examples 32 counter = ProgressCounter(step=1) 33 for e1, e2 in itertools.izip(exampleIter1, exampleIter2): 34 counter.update() 35 assert e1[0] == e2[0], (removeFeatures(e1), removeFeatures(e2)) 36 if e1[1] != e2[1]: 37 print "Class differs" 38 print " E1", removeFeatures(e1) 39 print " E2", removeFeatures(e2) 40 f1 = getFeatureNames(e1, features1) 41 f2 = getFeatureNames(e2, features2) 42 f1Set = set(f1) 43 f2Set = set(f2) 44 f1Only = f1Set.difference(f2Set) 45 f2Only = f2Set.difference(f1Set) 46 if len(f1Only) > 0 or len(f2Only) > 0: 47 print "Features differ" 48 print " E1", removeFeatures(e1) 49 print " E2", removeFeatures(e2) 50 if len(f1Only) > 0: 51 print " E1-only features:", f1Only 52 if len(f2Only) > 0: 53 print " E2-only features:", f2Only 54 else: 55 assert len(f1) == len(f2) 56 fCount = 0 57 differ = False 58 for feature1, feature2 in zip(f1, f2): 59 #f1Id = features1.getId(feature1, createIfNotExist=False) 60 #if f1Id == 454 or feature1 == "e1_strength_Positive_regulation": 61 # print "!!!!!!!!!!!", 454, feature1, e1[2][f1Id] 62 if feature1 != feature2: 63 if not differ: 64 print "Feature order differs for example", e1[0] 65 differ = True 66 print "[" + feature1 + "/" + feature2 + "](" + str(fCount) + ") ", 67 else: 68 f1Id = features1.getId(feature1, createIfNotExist=False) 69 f2Id = features2.getId(feature2, createIfNotExist=False) 70 f1Value = e1[2][f1Id] 71 f2Value = e2[2][f2Id] 72 if f1Value != f2Value: 73 if not differ: 74 print "Feature values differ", e1[0] 75 differ = True 76 print "[" + feature1 + "/" + str(f1Id) + "]" + "[" + str(f1Value) + "/" + str(f2Value) + "]" + "(" + str(fCount) + ") ", 77 fCount += 1 78 if differ: 79 print 80 counter.endUpdate()
81 82 if __name__=="__main__": 83 # Import Psyco if available 84 try: 85 import psyco 86 psyco.full() 87 print >> sys.stderr, "Found Psyco, using" 88 except ImportError: 89 print >> sys.stderr, "Psyco not installed" 90 from optparse import OptionParser 91 optparser = OptionParser(description="Analyze SVM example files") 92 optparser.add_option("-a", "--fileA", default=None, dest="fileA", help="") 93 optparser.add_option("-b", "--fileB", default=None, dest="fileB", help="") 94 optparser.add_option("-f", "--featureIds", default=None, dest="featureIds", help="") 95 optparser.add_option("-g", "--featureIdsB", default=None, dest="featureIdsB", help="") 96 (options, args) = optparser.parse_args() 97 98 compareExamples(options.fileA, options.fileB, options.featureIds, options.featureIdsB) 99