1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """
25 pstat.py module
26
27 #################################################
28 ####### Written by: Gary Strangman ###########
29 ####### Last modified: Jun 29, 2001 ###########
30 #################################################
31
32 This module provides some useful list and array manipulation routines
33 modeled after those found in the |Stat package by Gary Perlman, plus a
34 number of other useful list/file manipulation functions. The list-based
35 functions include:
36
37 abut (source,*args)
38 simpleabut (source, addon)
39 colex (listoflists,cnums)
40 collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
41 dm (listoflists,criterion)
42 flat (l)
43 linexand (listoflists,columnlist,valuelist)
44 linexor (listoflists,columnlist,valuelist)
45 linedelimited (inlist,delimiter)
46 lineincols (inlist,colsize)
47 lineincustcols (inlist,colsizes)
48 list2string (inlist)
49 makelol(inlist)
50 makestr(x)
51 printcc (lst,extra=2)
52 printincols (listoflists,colsize)
53 pl (listoflists)
54 printl(listoflists)
55 replace (lst,oldval,newval)
56 recode (inlist,listmap,cols='all')
57 remap (listoflists,criterion)
58 roundlist (inlist,num_digits_to_round_floats_to)
59 sortby(listoflists,sortcols)
60 unique (inlist)
61 duplicates(inlist)
62 writedelimited (listoflists, delimiter, file, writetype='w')
63
64 Some of these functions have alternate versions which are defined only if
65 Numeric (NumPy) can be imported. These functions are generally named as
66 above, with an 'a' prefix.
67
68 aabut (source, *args)
69 acolex (a,indices,axis=1)
70 acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
71 adm (a,criterion)
72 alinexand (a,columnlist,valuelist)
73 alinexor (a,columnlist,valuelist)
74 areplace (a,oldval,newval)
75 arecode (a,listmap,col='all')
76 arowcompare (row1, row2)
77 arowsame (row1, row2)
78 asortrows(a,axis=0)
79 aunique(inarray)
80 aduplicates(inarray)
81
82 Currently, the code is all but completely un-optimized. In many cases, the
83 array versions of functions amount simply to aliases to built-in array
84 functions/methods. Their inclusion here is for function name consistency.
85 """
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107 import stats
108 import string, copy
109 from types import *
110
111 __version__ = 0.4
112
113
114
115
116
117
118
119 -def abut (source,*args):
120 """
121 Like the |Stat abut command. It concatenates two lists side-by-side
122 and returns the result. '2D' lists are also accomodated for either argument
123 (source or addon). CAUTION: If one list is shorter, it will be repeated
124 until it is as long as the longest list. If this behavior is not desired,
125 use pstat.simpleabut().
126
127 Usage: abut(source, args) where args=any # of lists
128 Returns: a list of lists as long as the LONGEST list past, source on the
129 'left', lists in <args> attached consecutively on the 'right'
130 """
131
132 if type(source) not in [ListType,TupleType]:
133 source = [source]
134 for addon in args:
135 if type(addon) not in [ListType,TupleType]:
136 addon = [addon]
137 if len(addon) < len(source):
138 if len(source) % len(addon) == 0:
139 repeats = len(source)/len(addon)
140 origadd = copy.deepcopy(addon)
141 for i in range(repeats-1):
142 addon = addon + origadd
143 else:
144 repeats = len(source)/len(addon)+1
145 origadd = copy.deepcopy(addon)
146 for i in range(repeats-1):
147 addon = addon + origadd
148 addon = addon[0:len(source)]
149 elif len(source) < len(addon):
150 if len(addon) % len(source) == 0:
151 repeats = len(addon)/len(source)
152 origsour = copy.deepcopy(source)
153 for i in range(repeats-1):
154 source = source + origsour
155 else:
156 repeats = len(addon)/len(source)+1
157 origsour = copy.deepcopy(source)
158 for i in range(repeats-1):
159 source = source + origsour
160 source = source[0:len(addon)]
161
162 source = simpleabut(source,addon)
163 return source
164
165
167 """
168 Concatenates two lists as columns and returns the result. '2D' lists
169 are also accomodated for either argument (source or addon). This DOES NOT
170 repeat either list to make the 2 lists of equal length. Beware of list pairs
171 with different lengths ... the resulting list will be the length of the
172 FIRST list passed.
173
174 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
175 Returns: a list of lists as long as source, with source on the 'left' and
176 addon on the 'right'
177 """
178 if type(source) not in [ListType,TupleType]:
179 source = [source]
180 if type(addon) not in [ListType,TupleType]:
181 addon = [addon]
182 minlen = min(len(source),len(addon))
183 list = copy.deepcopy(source)
184 if type(source[0]) not in [ListType,TupleType]:
185 if type(addon[0]) not in [ListType,TupleType]:
186 for i in range(minlen):
187 list[i] = [source[i]] + [addon[i]]
188 else:
189 for i in range(minlen):
190 list[i] = [source[i]] + addon[i]
191 else:
192 if type(addon[0]) not in [ListType,TupleType]:
193 for i in range(minlen):
194 list[i] = source[i] + [addon[i]]
195 else:
196 for i in range(minlen):
197 list[i] = source[i] + addon[i]
198 source = list
199 return source
200
201
202 -def colex (listoflists,cnums):
203 """
204 Extracts from listoflists the columns specified in the list 'cnums'
205 (cnums can be an integer, a sequence of integers, or a string-expression that
206 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
207 columns 3 onward from the listoflists).
208
209 Usage: colex (listoflists,cnums)
210 Returns: a list-of-lists corresponding to the columns from listoflists
211 specified by cnums, in the order the column numbers appear in cnums
212 """
213 global index
214 column = 0
215 if type(cnums) in [ListType,TupleType]:
216 index = cnums[0]
217 column = map(lambda x: x[index], listoflists)
218 for col in cnums[1:]:
219 index = col
220 column = abut(column,map(lambda x: x[index], listoflists))
221 elif type(cnums) == StringType:
222 evalstring = 'map(lambda x: x'+cnums+', listoflists)'
223 column = eval(evalstring)
224 else:
225 index = cnums
226 column = map(lambda x: x[index], listoflists)
227 return column
228
229
230 -def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
231 """
232 Averages data in collapsecol, keeping all unique items in keepcols
233 (using unique, which keeps unique LISTS of column numbers), retaining the
234 unique sets of values in keepcols, the mean for each. Setting fcn1
235 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
236 will append those results (e.g., the sterr, N) after each calculated mean.
237 cfcn is the collapse function to apply (defaults to mean, defined here in the
238 pstat module to avoid circular imports with stats.py, but harmonicmean or
239 others could be passed).
240
241 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
242 Returns: a list of lists with all unique permutations of entries appearing in
243 columns ("conditions") specified by keepcols, abutted with the result of
244 cfcn (if cfcn=None, defaults to the mean) of each column specified by
245 collapsecols.
246 """
247 def collmean (inlist):
248 s = 0
249 for item in inlist:
250 s = s + item
251 return s/float(len(inlist))
252
253 if type(keepcols) not in [ListType,TupleType]:
254 keepcols = [keepcols]
255 if type(collapsecols) not in [ListType,TupleType]:
256 collapsecols = [collapsecols]
257 if cfcn == None:
258 cfcn = collmean
259 if keepcols == []:
260 means = [0]*len(collapsecols)
261 for i in range(len(collapsecols)):
262 avgcol = colex(listoflists,collapsecols[i])
263 means[i] = cfcn(avgcol)
264 if fcn1:
265 try:
266 test = fcn1(avgcol)
267 except:
268 test = 'N/A'
269 means[i] = [means[i], test]
270 if fcn2:
271 try:
272 test = fcn2(avgcol)
273 except:
274 test = 'N/A'
275 try:
276 means[i] = means[i] + [len(avgcol)]
277 except TypeError:
278 means[i] = [means[i],len(avgcol)]
279 return means
280 else:
281 values = colex(listoflists,keepcols)
282 uniques = unique(values)
283 uniques.sort()
284 newlist = []
285 if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
286 for item in uniques:
287 if type(item) not in [ListType,TupleType]: item =[item]
288 tmprows = linexand(listoflists,keepcols,item)
289 for col in collapsecols:
290 avgcol = colex(tmprows,col)
291 item.append(cfcn(avgcol))
292 if fcn1 <> None:
293 try:
294 test = fcn1(avgcol)
295 except:
296 test = 'N/A'
297 item.append(test)
298 if fcn2 <> None:
299 try:
300 test = fcn2(avgcol)
301 except:
302 test = 'N/A'
303 item.append(test)
304 newlist.append(item)
305 return newlist
306
307
308 -def dm (listoflists,criterion):
309 """
310 Returns rows from the passed list of lists that meet the criteria in
311 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
312 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
313 with column 2 equal to the string 'N').
314
315 Usage: dm (listoflists, criterion)
316 Returns: rows from listoflists that meet the specified criterion.
317 """
318 function = 'filter(lambda x: '+criterion+',listoflists)'
319 lines = eval(function)
320 return lines
321
322
324 """
325 Returns the flattened version of a '2D' list. List-correlate to the a.flat()
326 method of NumPy arrays.
327
328 Usage: flat(l)
329 """
330 newl = []
331 for i in range(len(l)):
332 for j in range(len(l[i])):
333 newl.append(l[i][j])
334 return newl
335
336
337 -def linexand (listoflists,columnlist,valuelist):
338 """
339 Returns the rows of a list of lists where col (from columnlist) = val
340 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
341 len(columnlist) must equal len(valuelist).
342
343 Usage: linexand (listoflists,columnlist,valuelist)
344 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
345 """
346 if type(columnlist) not in [ListType,TupleType]:
347 columnlist = [columnlist]
348 if type(valuelist) not in [ListType,TupleType]:
349 valuelist = [valuelist]
350 criterion = ''
351 for i in range(len(columnlist)):
352 if type(valuelist[i])==StringType:
353 critval = '\'' + valuelist[i] + '\''
354 else:
355 critval = str(valuelist[i])
356 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
357 criterion = criterion[0:-3]
358 function = 'filter(lambda x: '+criterion+',listoflists)'
359 lines = eval(function)
360 return lines
361
362
363 -def linexor (listoflists,columnlist,valuelist):
364 """
365 Returns the rows of a list of lists where col (from columnlist) = val
366 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
367 One value is required for each column in columnlist. If only one value
368 exists for columnlist but multiple values appear in valuelist, the
369 valuelist values are all assumed to pertain to the same column.
370
371 Usage: linexor (listoflists,columnlist,valuelist)
372 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
373 """
374 if type(columnlist) not in [ListType,TupleType]:
375 columnlist = [columnlist]
376 if type(valuelist) not in [ListType,TupleType]:
377 valuelist = [valuelist]
378 criterion = ''
379 if len(columnlist) == 1 and len(valuelist) > 1:
380 columnlist = columnlist*len(valuelist)
381 for i in range(len(columnlist)):
382 if type(valuelist[i])==StringType:
383 critval = '\'' + valuelist[i] + '\''
384 else:
385 critval = str(valuelist[i])
386 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
387 criterion = criterion[0:-2]
388 function = 'filter(lambda x: '+criterion+',listoflists)'
389 lines = eval(function)
390 return lines
391
392
394 """
395 Returns a string composed of elements in inlist, with each element
396 separated by 'delimiter.' Used by function writedelimited. Use '\t'
397 for tab-delimiting.
398
399 Usage: linedelimited (inlist,delimiter)
400 """
401 outstr = ''
402 for item in inlist:
403 if type(item) <> StringType:
404 item = str(item)
405 outstr = outstr + item + delimiter
406 outstr = outstr[0:-1]
407 return outstr
408
409
411 """
412 Returns a string composed of elements in inlist, with each element
413 right-aligned in columns of (fixed) colsize.
414
415 Usage: lineincols (inlist,colsize) where colsize is an integer
416 """
417 outstr = ''
418 for item in inlist:
419 if type(item) <> StringType:
420 item = str(item)
421 size = len(item)
422 if size <= colsize:
423 for i in range(colsize-size):
424 outstr = outstr + ' '
425 outstr = outstr + item
426 else:
427 outstr = outstr + item[0:colsize+1]
428 return outstr
429
430
432 """
433 Returns a string composed of elements in inlist, with each element
434 right-aligned in a column of width specified by a sequence colsizes. The
435 length of colsizes must be greater than or equal to the number of columns
436 in inlist.
437
438 Usage: lineincustcols (inlist,colsizes)
439 Returns: formatted string created from inlist
440 """
441 outstr = ''
442 for i in range(len(inlist)):
443 if type(inlist[i]) <> StringType:
444 item = str(inlist[i])
445 else:
446 item = inlist[i]
447 size = len(item)
448 if size <= colsizes[i]:
449 for j in range(colsizes[i]-size):
450 outstr = outstr + ' '
451 outstr = outstr + item
452 else:
453 outstr = outstr + item[0:colsizes[i]+1]
454 return outstr
455
456
458 """
459 Converts a 1D list to a single long string for file output, using
460 the string.join function.
461
462 Usage: list2string (inlist,delimit=' ')
463 Returns: the string created from inlist
464 """
465 stringlist = map(makestr,inlist)
466 return string.join(stringlist,delimit)
467
468
470 """
471 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
472 want to use put() to write a 1D list one item per line in the file.
473
474 Usage: makelol(inlist)
475 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
476 """
477 x = []
478 for item in inlist:
479 x.append([item])
480 return x
481
482
484 if type(x) <> StringType:
485 x = str(x)
486 return x
487
488
490 """
491 Prints a list of lists in columns, customized by the max size of items
492 within the columns (max size of items in col, plus 'extra' number of spaces).
493 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
494 respectively.
495
496 Usage: printcc (lst,extra=2)
497 Returns: None
498 """
499 if type(lst[0]) not in [ListType,TupleType]:
500 lst = [lst]
501 rowstokill = []
502 list2print = copy.deepcopy(lst)
503 for i in range(len(lst)):
504 if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
505 rowstokill = rowstokill + [i]
506 rowstokill.reverse()
507 for row in rowstokill:
508 del list2print[row]
509 maxsize = [0]*len(list2print[0])
510 for col in range(len(list2print[0])):
511 items = colex(list2print,col)
512 items = map(makestr,items)
513 maxsize[col] = max(map(len,items)) + extra
514 for row in lst:
515 if row == ['\n'] or row == '\n' or row == '' or row == ['']:
516 print
517 elif row == ['dashes'] or row == 'dashes':
518 dashes = [0]*len(maxsize)
519 for j in range(len(maxsize)):
520 dashes[j] = '-'*(maxsize[j]-2)
521 print lineincustcols(dashes,maxsize)
522 else:
523 print lineincustcols(row,maxsize)
524 return None
525
526
528 """
529 Prints a list of lists in columns of (fixed) colsize width, where
530 colsize is an integer.
531
532 Usage: printincols (listoflists,colsize)
533 Returns: None
534 """
535 for row in listoflists:
536 print lineincols(row,colsize)
537 return None
538
539
540 -def pl (listoflists):
541 """
542 Prints a list of lists, 1 list (row) at a time.
543
544 Usage: pl(listoflists)
545 Returns: None
546 """
547 for row in listoflists:
548 if row[-1] == '\n':
549 print row,
550 else:
551 print row
552 return None
553
554
556 """Alias for pl."""
557 pl(listoflists)
558 return
559
560
562 """
563 Replaces all occurrences of 'oldval' with 'newval', recursively.
564
565 Usage: replace (inlst,oldval,newval)
566 """
567 lst = inlst*1
568 for i in range(len(lst)):
569 if type(lst[i]) not in [ListType,TupleType]:
570 if lst[i]==oldval: lst[i]=newval
571 else:
572 lst[i] = replace(lst[i],oldval,newval)
573 return lst
574
575
576 -def recode (inlist,listmap,cols=None):
577 """
578 Changes the values in a list to a new set of values (useful when
579 you need to recode data from (e.g.) strings to numbers. cols defaults
580 to None (meaning all columns are recoded).
581
582 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
583 Returns: inlist with the appropriate values replaced with new ones
584 """
585 lst = copy.deepcopy(inlist)
586 if cols != None:
587 if type(cols) not in [ListType,TupleType]:
588 cols = [cols]
589 for col in cols:
590 for row in range(len(lst)):
591 try:
592 idx = colex(listmap,0).index(lst[row][col])
593 lst[row][col] = listmap[idx][1]
594 except ValueError:
595 pass
596 else:
597 for row in range(len(lst)):
598 for col in range(len(lst)):
599 try:
600 idx = colex(listmap,0).index(lst[row][col])
601 lst[row][col] = listmap[idx][1]
602 except ValueError:
603 pass
604 return lst
605
606
607 -def remap (listoflists,criterion):
608 """
609 Remaps values in a given column of a 2D list (listoflists). This requires
610 a criterion as a function of 'x' so that the result of the following is
611 returned ... map(lambda x: 'criterion',listoflists).
612
613 Usage: remap(listoflists,criterion) criterion=string
614 Returns: remapped version of listoflists
615 """
616 function = 'map(lambda x: '+criterion+',listoflists)'
617 lines = eval(function)
618 return lines
619
620
622 """
623 Goes through each element in a 1D or 2D inlist, and applies the following
624 function to all elements of FloatType ... round(element,digits).
625
626 Usage: roundlist(inlist,digits)
627 Returns: list with rounded floats
628 """
629 if type(inlist[0]) in [IntType, FloatType]:
630 inlist = [inlist]
631 l = inlist*1
632 for i in range(len(l)):
633 for j in range(len(l[i])):
634 if type(l[i][j])==FloatType:
635 l[i][j] = round(l[i][j],digits)
636 return l
637
638
639 -def sortby(listoflists,sortcols):
640 """
641 Sorts a list of lists on the column(s) specified in the sequence
642 sortcols.
643
644 Usage: sortby(listoflists,sortcols)
645 Returns: sorted list, unchanged column ordering
646 """
647 newlist = abut(colex(listoflists,sortcols),listoflists)
648 newlist.sort()
649 try:
650 numcols = len(sortcols)
651 except TypeError:
652 numcols = 1
653 crit = '[' + str(numcols) + ':]'
654 newlist = colex(newlist,crit)
655 return newlist
656
657
659 """
660 Returns all unique items in the passed list. If the a list-of-lists
661 is passed, unique LISTS are found (i.e., items in the first dimension are
662 compared).
663
664 Usage: unique (inlist)
665 Returns: the unique elements (or rows) in inlist
666 """
667 uniques = []
668 for item in inlist:
669 if item not in uniques:
670 uniques.append(item)
671 return uniques
672
674 """
675 Returns duplicate items in the FIRST dimension of the passed list.
676
677 Usage: duplicates (inlist)
678 """
679 dups = []
680 for i in range(len(inlist)):
681 if inlist[i] in inlist[i+1:]:
682 dups.append(inlist[i])
683 return dups
684
685
687 """
688 Returns items that are NOT duplicated in the first dim of the passed list.
689
690 Usage: nonrepeats (inlist)
691 """
692 nonrepeats = []
693 for i in range(len(inlist)):
694 if inlist.count(inlist[i]) == 1:
695 nonrepeats.append(inlist[i])
696 return nonrepeats
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716 try:
717 import Numeric
718 N = Numeric
719
720 - def aabut (source, *args):
721 """
722 Like the |Stat abut command. It concatenates two arrays column-wise
723 and returns the result. CAUTION: If one array is shorter, it will be
724 repeated until it is as long as the other.
725
726 Usage: aabut (source, args) where args=any # of arrays
727 Returns: an array as long as the LONGEST array past, source appearing on the
728 'left', arrays in <args> attached on the 'right'.
729 """
730 if len(source.shape)==1:
731 width = 1
732 source = N.resize(source,[source.shape[0],width])
733 else:
734 width = source.shape[1]
735 for addon in args:
736 if len(addon.shape)==1:
737 width = 1
738 addon = N.resize(addon,[source.shape[0],width])
739 else:
740 width = source.shape[1]
741 if len(addon) < len(source):
742 addon = N.resize(addon,[source.shape[0],addon.shape[1]])
743 elif len(source) < len(addon):
744 source = N.resize(source,[addon.shape[0],source.shape[1]])
745 source = N.concatenate((source,addon),1)
746 return source
747
748
749 - def acolex (a,indices,axis=1):
750 """
751 Extracts specified indices (a list) from passed array, along passed
752 axis (column extraction is default). BEWARE: A 1D array is presumed to be a
753 column-array (and that the whole array will be returned as a column).
754
755 Usage: acolex (a,indices,axis=1)
756 Returns: the columns of a specified by indices
757 """
758 if type(indices) not in [ListType,TupleType,N.ArrayType]:
759 indices = [indices]
760 if len(N.shape(a)) == 1:
761 cols = N.resize(a,[a.shape[0],1])
762 else:
763 cols = N.take(a,indices,axis)
764 return cols
765
766
767 - def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
768 """
769 Averages data in collapsecol, keeping all unique items in keepcols
770 (using unique, which keeps unique LISTS of column numbers), retaining
771 the unique sets of values in keepcols, the mean for each. If stderror or
772 N of the mean are desired, set either or both parameters to 1.
773
774 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
775 Returns: unique 'conditions' specified by the contents of columns specified
776 by keepcols, abutted with the mean(s) of column(s) specified by
777 collapsecols
778 """
779 def acollmean (inarray):
780 return N.sum(N.ravel(inarray))
781
782 if cfcn == None:
783 cfcn = acollmean
784 if keepcols == []:
785 avgcol = acolex(a,collapsecols)
786 means = N.sum(avgcol)/float(len(avgcol))
787 if fcn1<>None:
788 try:
789 test = fcn1(avgcol)
790 except:
791 test = N.array(['N/A']*len(means))
792 means = aabut(means,test)
793 if fcn2<>None:
794 try:
795 test = fcn2(avgcol)
796 except:
797 test = N.array(['N/A']*len(means))
798 means = aabut(means,test)
799 return means
800 else:
801 if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
802 keepcols = [keepcols]
803 values = colex(a,keepcols)
804 uniques = unique(values)
805 uniques.sort()
806 newlist = []
807 for item in uniques:
808 if type(item) not in [ListType,TupleType,N.ArrayType]:
809 item =[item]
810 tmprows = alinexand(a,keepcols,item)
811 for col in collapsecols:
812 avgcol = acolex(tmprows,col)
813 item.append(acollmean(avgcol))
814 if fcn1<>None:
815 try:
816 test = fcn1(avgcol)
817 except:
818 test = 'N/A'
819 item.append(test)
820 if fcn2<>None:
821 try:
822 test = fcn2(avgcol)
823 except:
824 test = 'N/A'
825 item.append(test)
826 newlist.append(item)
827 try:
828 new_a = N.array(newlist)
829 except TypeError:
830 new_a = N.array(newlist,'O')
831 return new_a
832
833
834 - def adm (a,criterion):
835 """
836 Returns rows from the passed list of lists that meet the criteria in
837 the passed criterion expression (a string as a function of x).
838
839 Usage: adm (a,criterion) where criterion is like 'x[2]==37'
840 """
841 function = 'filter(lambda x: '+criterion+',a)'
842 lines = eval(function)
843 try:
844 lines = N.array(lines)
845 except:
846 lines = N.array(lines,'O')
847 return lines
848
849
851 if type(x)==StringType:
852 return 1
853 else:
854 return 0
855
856
858 """
859 Returns the rows of an array where col (from columnlist) = val
860 (from valuelist). One value is required for each column in columnlist.
861
862 Usage: alinexand (a,columnlist,valuelist)
863 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
864 """
865 if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
866 columnlist = [columnlist]
867 if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
868 valuelist = [valuelist]
869 criterion = ''
870 for i in range(len(columnlist)):
871 if type(valuelist[i])==StringType:
872 critval = '\'' + valuelist[i] + '\''
873 else:
874 critval = str(valuelist[i])
875 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
876 criterion = criterion[0:-3]
877 return adm(a,criterion)
878
879
881 """
882 Returns the rows of an array where col (from columnlist) = val (from
883 valuelist). One value is required for each column in columnlist.
884 The exception is if either columnlist or valuelist has only 1 value,
885 in which case that item will be expanded to match the length of the
886 other list.
887
888 Usage: alinexor (a,columnlist,valuelist)
889 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
890 """
891 if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
892 columnlist = [columnlist]
893 if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
894 valuelist = [valuelist]
895 criterion = ''
896 if len(columnlist) == 1 and len(valuelist) > 1:
897 columnlist = columnlist*len(valuelist)
898 elif len(valuelist) == 1 and len(columnlist) > 1:
899 valuelist = valuelist*len(columnlist)
900 for i in range(len(columnlist)):
901 if type(valuelist[i])==StringType:
902 critval = '\'' + valuelist[i] + '\''
903 else:
904 critval = str(valuelist[i])
905 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
906 criterion = criterion[0:-2]
907 return adm(a,criterion)
908
909
911 """
912 Replaces all occurrences of oldval with newval in array a.
913
914 Usage: areplace(a,oldval,newval)
915 """
916 newa = N.not_equal(a,oldval)*a
917 return newa+N.equal(a,oldval)*newval
918
919
920 - def arecode (a,listmap,col='all'):
921 """
922 Remaps the values in an array to a new set of values (useful when
923 you need to recode data from (e.g.) strings to numbers as most stats
924 packages require. Can work on SINGLE columns, or 'all' columns at once.
925
926 Usage: arecode (a,listmap,col='all')
927 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
928 """
929 ashape = a.shape
930 if col == 'all':
931 work = a.flat
932 else:
933 work = acolex(a,col)
934 work = work.flat
935 for pair in listmap:
936 if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
937 work = N.array(work,'O')
938 a = N.array(a,'O')
939 for i in range(len(work)):
940 if work[i]==pair[0]:
941 work[i] = pair[1]
942 if col == 'all':
943 return N.reshape(work,ashape)
944 else:
945 return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
946 else:
947 work = N.where(N.equal(work,pair[0]),pair[1],work)
948 return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
949
950
952 """
953 Compares two rows from an array, regardless of whether it is an
954 array of numbers or of python objects (which requires the cmp function).
955
956 Usage: arowcompare(row1,row2)
957 Returns: an array of equal length containing 1s where the two rows had
958 identical elements and 0 otherwise
959 """
960 if row1.typecode()=='O' or row2.typecode=='O':
961 cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2))))
962 else:
963 cmpvect = N.equal(row1,row2)
964 return cmpvect
965
966
968 """
969 Compares two rows from an array, regardless of whether it is an
970 array of numbers or of python objects (which requires the cmp function).
971
972 Usage: arowsame(row1,row2)
973 Returns: 1 if the two rows are identical, 0 otherwise.
974 """
975 cmpval = N.alltrue(arowcompare(row1,row2))
976 return cmpval
977
978
980 """
981 Sorts an array "by rows". This differs from the Numeric.sort() function,
982 which sorts elements WITHIN the given axis. Instead, this function keeps
983 the elements along the given axis intact, but shifts them 'up or down'
984 relative to one another.
985
986 Usage: asortrows(a,axis=0)
987 Returns: sorted version of a
988 """
989 if axis != 0:
990 a = N.swapaxes(a, axis, 0)
991 l = a.tolist()
992 l.sort()
993 y = N.array(l)
994 if axis != 0:
995 y = N.swapaxes(y, axis, 0)
996 return y
997
998
1000 """
1001 Returns unique items in the FIRST dimension of the passed array. Only
1002 works on arrays NOT including string items.
1003
1004 Usage: aunique (inarray)
1005 """
1006 uniques = N.array([inarray[0]])
1007 if len(uniques.shape) == 1:
1008 for item in inarray[1:]:
1009 if N.add.reduce(N.equal(uniques,item).flat) == 0:
1010 try:
1011 uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
1012 except TypeError:
1013 uniques = N.concatenate([uniques,N.array([item])])
1014 else:
1015 if inarray.typecode() != 'O':
1016 for item in inarray[1:]:
1017 if not N.sum(N.alltrue(N.equal(uniques,item),1)):
1018 try:
1019 uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1020 except TypeError:
1021 uniques = N.concatenate([uniques,N.array([item])])
1022 else:
1023 pass
1024 else:
1025 for item in inarray[1:]:
1026 newflag = 1
1027 for unq in uniques:
1028 test = N.sum(abs(N.array(map(cmp,item,unq))))
1029 if test == 0:
1030 newflag = 0
1031 break
1032 if newflag == 1:
1033 try:
1034 uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1035 except TypeError:
1036 uniques = N.concatenate([uniques,N.array([item])])
1037 return uniques
1038
1039
1041 """
1042 Returns duplicate items in the FIRST dimension of the passed array. Only
1043 works on arrays NOT including string items.
1044
1045 Usage: aunique (inarray)
1046 """
1047 inarray = N.array(inarray)
1048 if len(inarray.shape) == 1:
1049 dups = []
1050 inarray = inarray.tolist()
1051 for i in range(len(inarray)):
1052 if inarray[i] in inarray[i+1:]:
1053 dups.append(inarray[i])
1054 dups = aunique(dups)
1055 else:
1056 dups = []
1057 aslist = inarray.tolist()
1058 for i in range(len(aslist)):
1059 if aslist[i] in aslist[i+1:]:
1060 dups.append(aslist[i])
1061 dups = unique(dups)
1062 dups = N.array(dups)
1063 return dups
1064
1065 except ImportError:
1066 pass
1067