1  import itertools 
  2  import sys, os 
  3  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..") 
  4  import Core.ExampleUtils as ExampleUtils 
  5  from Core.IdSet import IdSet 
  6  from Utils.ProgressCounter import ProgressCounter  
  7   
  9      return [example[0], example[1], {}, example[3]] 
  10   
 12      names = [] 
 13      for feature in sorted(example[2].keys()): 
 14          name = featureIds.getName(feature) 
 15          assert name != None 
 16          names.append(name) 
 17      return names 
  18   
 20      ExampleUtils.readExamples(examples1) 
 21      exampleIter1 = ExampleUtils.readExamples(examples1) 
 22      exampleIter2 = ExampleUtils.readExamples(examples2) 
 23      features1 = IdSet(filename=features1) 
 24      if features2 != None: 
 25          features2 = IdSet(filename=features2) 
 26      else: 
 27          features2 = features1 
 28       
 29      if set(features1.Ids.keys()) != set(features2.Ids.keys()): 
 30          print "Feature sets differ" 
 31       
 32      counter = ProgressCounter(step=1) 
 33      for e1, e2 in itertools.izip(exampleIter1, exampleIter2): 
 34          counter.update() 
 35          assert e1[0] == e2[0], (removeFeatures(e1), removeFeatures(e2)) 
 36          if e1[1] != e2[1]: 
 37              print "Class differs" 
 38              print "  E1", removeFeatures(e1) 
 39              print "  E2", removeFeatures(e2) 
 40          f1 = getFeatureNames(e1, features1) 
 41          f2 = getFeatureNames(e2, features2) 
 42          f1Set = set(f1) 
 43          f2Set = set(f2) 
 44          f1Only = f1Set.difference(f2Set) 
 45          f2Only = f2Set.difference(f1Set) 
 46          if len(f1Only) > 0 or len(f2Only) > 0: 
 47              print "Features differ" 
 48              print "  E1", removeFeatures(e1) 
 49              print "  E2", removeFeatures(e2) 
 50              if len(f1Only) > 0: 
 51                  print "  E1-only features:", f1Only 
 52              if len(f2Only) > 0: 
 53                  print "  E2-only features:", f2Only 
 54          else: 
 55              assert len(f1) == len(f2) 
 56              fCount = 0 
 57              differ = False 
 58              for feature1, feature2 in zip(f1, f2): 
 59                   
 60                   
 61                   
 62                  if feature1 != feature2: 
 63                      if not differ: 
 64                          print "Feature order differs for example", e1[0] 
 65                          differ = True 
 66                      print "[" + feature1 + "/" + feature2 + "](" + str(fCount) + ") ", 
 67                  else: 
 68                      f1Id = features1.getId(feature1, createIfNotExist=False) 
 69                      f2Id = features2.getId(feature2, createIfNotExist=False) 
 70                      f1Value = e1[2][f1Id] 
 71                      f2Value = e2[2][f2Id] 
 72                      if f1Value != f2Value: 
 73                          if not differ: 
 74                              print "Feature values differ", e1[0] 
 75                              differ = True 
 76                          print "[" + feature1 + "/" + str(f1Id) + "]" + "[" + str(f1Value) + "/" + str(f2Value) + "]" + "(" + str(fCount) + ") ", 
 77                  fCount += 1               
 78              if differ: 
 79                  print 
 80      counter.endUpdate() 
  81   
 82  if __name__=="__main__": 
 83       
 84      try: 
 85          import psyco 
 86          psyco.full() 
 87          print >> sys.stderr, "Found Psyco, using" 
 88      except ImportError: 
 89          print >> sys.stderr, "Psyco not installed" 
 90      from optparse import OptionParser 
 91      optparser = OptionParser(description="Analyze SVM example files") 
 92      optparser.add_option("-a", "--fileA", default=None, dest="fileA", help="") 
 93      optparser.add_option("-b", "--fileB", default=None, dest="fileB", help="") 
 94      optparser.add_option("-f", "--featureIds", default=None, dest="featureIds", help="") 
 95      optparser.add_option("-g", "--featureIdsB", default=None, dest="featureIdsB", help="") 
 96      (options, args) = optparser.parse_args() 
 97       
 98      compareExamples(options.fileA, options.fileB, options.featureIds, options.featureIdsB) 
 99