Package TEES :: Package Core :: Module Split
[hide private]

Source Code for Module TEES.Core.Split

 1  """ 
 2  Functions for dividing data into random sets. 
 3  """ 
 4   
 5  __version__ = "$Revision: 1.3 $" 
 6   
 7  import random 
 8   
9 -def getSample(popSize, sampleFraction, seed=0):
10 """ 11 Generates a list of 1/0 values for defining a random sample for a list of length popSize. 12 List elements with value 0 belong to the sample. 13 14 @param popSize: The length of the list from which the sample is drawn. 15 @type popSize: int 16 @param sampleFraction: The fraction [0,1] of the population to be included in the sample 17 @type sampleFraction: float 18 @param seed: int 19 @type seed: a seed value for the Python random number generator 20 """ 21 random.seed(seed) 22 sample = random.sample( xrange(popSize), int(sampleFraction*float(popSize)) ) 23 vector = [] 24 for i in range(popSize): 25 if i in sample: 26 vector.append(0) 27 else: 28 vector.append(1) 29 return vector
30
31 -def getFolds(popSize, folds, seed=0):
32 """ 33 Divides the population into n folds of roughly equal size. 34 35 @param popSize: The length of the list from which the sample is drawn. 36 @type popSize: int 37 @param folds: the number of folds to divide the population into 38 @type folds: int >= 1 39 @param seed: int 40 @type seed: a seed value for the Python random number generator 41 """ 42 sampleSize = int(float(popSize) / float(folds)) 43 random.seed(seed) 44 45 vector = [] 46 for i in range(popSize): 47 vector.append(-1) # -1 is for items not in any fold 48 49 population = range(popSize) 50 for i in range(folds): 51 sample = random.sample(population, sampleSize) 52 for j in sample: 53 vector[j] = i 54 population.remove(j) 55 # add -1 cases roughly evenly to all folds 56 currentFold = 0 57 for i in range(len(vector)): 58 if vector[i] == -1: 59 assert(currentFold < folds-1) 60 vector[i] = currentFold 61 currentFold += 1 62 return vector
63 64 # test program for demonstrating sampling and folds 65 if __name__=="__main__": 66 print "Testing 20, 0.0:" 67 print getSample(20,0.0) 68 print "Testing 20, 0.5:" 69 print getSample(20,0.5) 70 print "Folds 20 / 2:" 71 print getFolds(20,2) 72 print "Folds 20 / 3:" 73 print getFolds(20,3) 74 print "Folds 20 / 4:" 75 print getFolds(20,4) 76 print "Folds 20 / 20:" 77 print getFolds(20,20) 78