1 """
2 Functions for dividing data into random sets.
3 """
4
5 __version__ = "$Revision: 1.3 $"
6
7 import random
8
9 -def getSample(popSize, sampleFraction, seed=0):
10 """
11 Generates a list of 1/0 values for defining a random sample for a list of length popSize.
12 List elements with value 0 belong to the sample.
13
14 @param popSize: The length of the list from which the sample is drawn.
15 @type popSize: int
16 @param sampleFraction: The fraction [0,1] of the population to be included in the sample
17 @type sampleFraction: float
18 @param seed: int
19 @type seed: a seed value for the Python random number generator
20 """
21 random.seed(seed)
22 sample = random.sample( xrange(popSize), int(sampleFraction*float(popSize)) )
23 vector = []
24 for i in range(popSize):
25 if i in sample:
26 vector.append(0)
27 else:
28 vector.append(1)
29 return vector
30
32 """
33 Divides the population into n folds of roughly equal size.
34
35 @param popSize: The length of the list from which the sample is drawn.
36 @type popSize: int
37 @param folds: the number of folds to divide the population into
38 @type folds: int >= 1
39 @param seed: int
40 @type seed: a seed value for the Python random number generator
41 """
42 sampleSize = int(float(popSize) / float(folds))
43 random.seed(seed)
44
45 vector = []
46 for i in range(popSize):
47 vector.append(-1)
48
49 population = range(popSize)
50 for i in range(folds):
51 sample = random.sample(population, sampleSize)
52 for j in sample:
53 vector[j] = i
54 population.remove(j)
55
56 currentFold = 0
57 for i in range(len(vector)):
58 if vector[i] == -1:
59 assert(currentFold < folds-1)
60 vector[i] = currentFold
61 currentFold += 1
62 return vector
63
64
65 if __name__=="__main__":
66 print "Testing 20, 0.0:"
67 print getSample(20,0.0)
68 print "Testing 20, 0.5:"
69 print getSample(20,0.5)
70 print "Folds 20 / 2:"
71 print getFolds(20,2)
72 print "Folds 20 / 3:"
73 print getFolds(20,3)
74 print "Folds 20 / 4:"
75 print getFolds(20,4)
76 print "Folds 20 / 20:"
77 print getFolds(20,20)
78