Package TEES :: Package Utils :: Module CompareFeatures
[hide private]

Source Code for Module TEES.Utils.CompareFeatures

 1  import sys, os 
 2  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..") 
 3  from Core.IdSet import IdSet 
 4  import Core.ExampleUtils as ExampleUtils 
 5   
6 -def getFeatureNames(examples, featureIds):
7 names = set() 8 for example in examples: 9 for feature, value in example[2].iteritems(): 10 names.add(featureIds.getName(feature)) 11 return names
12 13 if __name__=="__main__": 14 # Import Psyco if available 15 try: 16 import psyco 17 psyco.full() 18 print >> sys.stderr, "Found Psyco, using" 19 except ImportError: 20 print >> sys.stderr, "Psyco not installed" 21 22 from optparse import OptionParser 23 import os 24 optparser = OptionParser(description="Analyze SVM example files") 25 optparser.add_option("-s", "--source", default=None, dest="source", help="examples", metavar="FILE") 26 optparser.add_option("-t", "--target", default=None, dest="target", help="examples") 27 optparser.add_option("-f", "--sourceFeatureIds", default=None, dest="sourceFeatureIds", help="examples", metavar="FILE") 28 optparser.add_option("-g", "--targetFeatureIds", default=None, dest="targetFeatureIds", help="examples") 29 (options, args) = optparser.parse_args() 30 31 print "Loading ids" 32 sFeatIds = IdSet(filename=options.sourceFeatureIds) 33 tFeatIds = IdSet(filename=options.targetFeatureIds) 34 print "Loading examples" 35 sExamples = ExampleUtils.readExamples(options.source) 36 tExamples = ExampleUtils.readExamples(options.target) 37 print "Making name sets" 38 s = getFeatureNames(sExamples, sFeatIds) 39 t = getFeatureNames(tExamples, tFeatIds) 40 print "Source features:", len(s) 41 print "Target features:", len(t) 42 print "Intersection:", len(s & t) 43 onlyS = s - t 44 onlyT = t - s 45 print "Only source:", len(onlyS) 46 print "Only target:", len(onlyT) 47 # state = {} 48 # for n in onlyS: 49 # presence = state.setdefault(n, [0,0]) 50 # presence[0] = 1 51 # for n in onlyT: 52 # presence = state.setdefault(n, [0,0]) 53 # presence[1] = 1 54 # for key in sorted(state.keys()): 55 # print key, state[key] 56 print "#Only Source:" 57 for value in sorted(onlyS): 58 print value 59 print "#Only Target:" 60 for value in sorted(onlyT): 61 print value 62 print "#Intersection:" 63 for value in sorted(s & t): 64 print value 65