1 import itertools
2 import sys, os
3 sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..")
4 import Core.ExampleUtils as ExampleUtils
5 from Core.IdSet import IdSet
6 from Utils.ProgressCounter import ProgressCounter
7
9 return [example[0], example[1], {}, example[3]]
10
12 names = []
13 for feature in sorted(example[2].keys()):
14 name = featureIds.getName(feature)
15 assert name != None
16 names.append(name)
17 return names
18
20 ExampleUtils.readExamples(examples1)
21 exampleIter1 = ExampleUtils.readExamples(examples1)
22 exampleIter2 = ExampleUtils.readExamples(examples2)
23 features1 = IdSet(filename=features1)
24 if features2 != None:
25 features2 = IdSet(filename=features2)
26 else:
27 features2 = features1
28
29 if set(features1.Ids.keys()) != set(features2.Ids.keys()):
30 print "Feature sets differ"
31
32 counter = ProgressCounter(step=1)
33 for e1, e2 in itertools.izip(exampleIter1, exampleIter2):
34 counter.update()
35 assert e1[0] == e2[0], (removeFeatures(e1), removeFeatures(e2))
36 if e1[1] != e2[1]:
37 print "Class differs"
38 print " E1", removeFeatures(e1)
39 print " E2", removeFeatures(e2)
40 f1 = getFeatureNames(e1, features1)
41 f2 = getFeatureNames(e2, features2)
42 f1Set = set(f1)
43 f2Set = set(f2)
44 f1Only = f1Set.difference(f2Set)
45 f2Only = f2Set.difference(f1Set)
46 if len(f1Only) > 0 or len(f2Only) > 0:
47 print "Features differ"
48 print " E1", removeFeatures(e1)
49 print " E2", removeFeatures(e2)
50 if len(f1Only) > 0:
51 print " E1-only features:", f1Only
52 if len(f2Only) > 0:
53 print " E2-only features:", f2Only
54 else:
55 assert len(f1) == len(f2)
56 fCount = 0
57 differ = False
58 for feature1, feature2 in zip(f1, f2):
59
60
61
62 if feature1 != feature2:
63 if not differ:
64 print "Feature order differs for example", e1[0]
65 differ = True
66 print "[" + feature1 + "/" + feature2 + "](" + str(fCount) + ") ",
67 else:
68 f1Id = features1.getId(feature1, createIfNotExist=False)
69 f2Id = features2.getId(feature2, createIfNotExist=False)
70 f1Value = e1[2][f1Id]
71 f2Value = e2[2][f2Id]
72 if f1Value != f2Value:
73 if not differ:
74 print "Feature values differ", e1[0]
75 differ = True
76 print "[" + feature1 + "/" + str(f1Id) + "]" + "[" + str(f1Value) + "/" + str(f2Value) + "]" + "(" + str(fCount) + ") ",
77 fCount += 1
78 if differ:
79 print
80 counter.endUpdate()
81
82 if __name__=="__main__":
83
84 try:
85 import psyco
86 psyco.full()
87 print >> sys.stderr, "Found Psyco, using"
88 except ImportError:
89 print >> sys.stderr, "Psyco not installed"
90 from optparse import OptionParser
91 optparser = OptionParser(description="Analyze SVM example files")
92 optparser.add_option("-a", "--fileA", default=None, dest="fileA", help="")
93 optparser.add_option("-b", "--fileB", default=None, dest="fileB", help="")
94 optparser.add_option("-f", "--featureIds", default=None, dest="featureIds", help="")
95 optparser.add_option("-g", "--featureIdsB", default=None, dest="featureIdsB", help="")
96 (options, args) = optparser.parse_args()
97
98 compareExamples(options.fileA, options.fileB, options.featureIds, options.featureIdsB)
99