Package TEES :: Package Utils :: Package Libraries :: Module pstat
[hide private]

Source Code for Module TEES.Utils.Libraries.pstat

   1  # Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
 
   2  #
 
   3  # This software is distributable under the terms of the GNU
 
   4  # General Public License (GPL) v2, the text of which can be found at
 
   5  # http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
 
   6  # using this module constitutes acceptance of the terms of this License.
 
   7  #
 
   8  # Disclaimer
 
   9  # 
 
  10  # This software is provided "as-is".  There are no expressed or implied
 
  11  # warranties of any kind, including, but not limited to, the warranties
 
  12  # of merchantability and fittness for a given application.  In no event
 
  13  # shall Gary Strangman be liable for any direct, indirect, incidental,
 
  14  # special, exemplary or consequential damages (including, but not limited
 
  15  # to, loss of use, data or profits, or business interruption) however
 
  16  # caused and on any theory of liability, whether in contract, strict
 
  17  # liability or tort (including negligence or otherwise) arising in any way
 
  18  # out of the use of this software, even if advised of the possibility of
 
  19  # such damage.
 
  20  #
 
  21  # Comments and/or additions are welcome (send e-mail to:
 
  22  # strang@nmr.mgh.harvard.edu).
 
  23  # 
 
  24  """
 
  25  pstat.py module
 
  26  
 
  27  #################################################
 
  28  #######  Written by:  Gary Strangman  ###########
 
  29  #######  Last modified:  Jun 29, 2001 ###########
 
  30  #################################################
 
  31  
 
  32  This module provides some useful list and array manipulation routines
 
  33  modeled after those found in the |Stat package by Gary Perlman, plus a
 
  34  number of other useful list/file manipulation functions.  The list-based
 
  35  functions include:
 
  36  
 
  37        abut (source,*args)
 
  38        simpleabut (source, addon)
 
  39        colex (listoflists,cnums)
 
  40        collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
  41        dm (listoflists,criterion)
 
  42        flat (l)
 
  43        linexand (listoflists,columnlist,valuelist)
 
  44        linexor (listoflists,columnlist,valuelist)
 
  45        linedelimited (inlist,delimiter)
 
  46        lineincols (inlist,colsize) 
 
  47        lineincustcols (inlist,colsizes)
 
  48        list2string (inlist)
 
  49        makelol(inlist)
 
  50        makestr(x)
 
  51        printcc (lst,extra=2)
 
  52        printincols (listoflists,colsize)
 
  53        pl (listoflists)
 
  54        printl(listoflists)
 
  55        replace (lst,oldval,newval)
 
  56        recode (inlist,listmap,cols='all')
 
  57        remap (listoflists,criterion)
 
  58        roundlist (inlist,num_digits_to_round_floats_to)
 
  59        sortby(listoflists,sortcols)
 
  60        unique (inlist)
 
  61        duplicates(inlist)
 
  62        writedelimited (listoflists, delimiter, file, writetype='w')
 
  63  
 
  64  Some of these functions have alternate versions which are defined only if
 
  65  Numeric (NumPy) can be imported.  These functions are generally named as
 
  66  above, with an 'a' prefix.
 
  67  
 
  68        aabut (source, *args)
 
  69        acolex (a,indices,axis=1)
 
  70        acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
 
  71        adm (a,criterion)
 
  72        alinexand (a,columnlist,valuelist)
 
  73        alinexor (a,columnlist,valuelist)
 
  74        areplace (a,oldval,newval)
 
  75        arecode (a,listmap,col='all')
 
  76        arowcompare (row1, row2)
 
  77        arowsame (row1, row2)
 
  78        asortrows(a,axis=0)
 
  79        aunique(inarray)
 
  80        aduplicates(inarray)
 
  81  
 
  82  Currently, the code is all but completely un-optimized.  In many cases, the
 
  83  array versions of functions amount simply to aliases to built-in array
 
  84  functions/methods.  Their inclusion here is for function name consistency.
 
  85  """ 
  86  
 
  87  ## CHANGE LOG:
 
  88  ## ==========
 
  89  ## 01-11-15 ... changed list2string() to accept a delimiter
 
  90  ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
 
  91  ## 01-05-31 ... added duplicates() and aduplicates() functions
 
  92  ## 00-12-28 ... license made GPL, docstring and import requirements
 
  93  ## 99-11-01 ... changed version to 0.3
 
  94  ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
 
  95  ## 03/27/99 ... added areplace function, made replace fcn recursive
 
  96  ## 12/31/98 ... added writefc function for ouput to fixed column sizes
 
  97  ## 12/07/98 ... fixed import problem (failed on collapse() fcn)
 
  98  ##              added __version__ variable (now 0.2)
 
  99  ## 12/05/98 ... updated doc-strings
 
 100  ##              added features to collapse() function
 
 101  ##              added flat() function for lists
 
 102  ##              fixed a broken asortrows() 
 
 103  ## 11/16/98 ... fixed minor bug in aput for 1D arrays
 
 104  ##
 
 105  ## 11/08/98 ... fixed aput to output large arrays correctly
 
 106  
 
 107  import stats  # required 3rd party module 
 108  import string, copy 
 109  from types import * 
 110  
 
 111  __version__ = 0.4 
 112  
 
 113  ###===========================  LIST FUNCTIONS  ==========================
 
 114  ###
 
 115  ### Here are the list functions, DEFINED FOR ALL SYSTEMS.
 
 116  ### Array functions (for NumPy-enabled computers) appear below.
 
 117  ###
 
 118  
 
119 -def abut (source,*args):
120 """ 121 Like the |Stat abut command. It concatenates two lists side-by-side 122 and returns the result. '2D' lists are also accomodated for either argument 123 (source or addon). CAUTION: If one list is shorter, it will be repeated 124 until it is as long as the longest list. If this behavior is not desired, 125 use pstat.simpleabut(). 126 127 Usage: abut(source, args) where args=any # of lists 128 Returns: a list of lists as long as the LONGEST list past, source on the 129 'left', lists in <args> attached consecutively on the 'right' 130 """ 131 132 if type(source) not in [ListType,TupleType]: 133 source = [source] 134 for addon in args: 135 if type(addon) not in [ListType,TupleType]: 136 addon = [addon] 137 if len(addon) < len(source): # is source list longer? 138 if len(source) % len(addon) == 0: # are they integer multiples? 139 repeats = len(source)/len(addon) # repeat addon n times 140 origadd = copy.deepcopy(addon) 141 for i in range(repeats-1): 142 addon = addon + origadd 143 else: 144 repeats = len(source)/len(addon)+1 # repeat addon x times, 145 origadd = copy.deepcopy(addon) # x is NOT an integer 146 for i in range(repeats-1): 147 addon = addon + origadd 148 addon = addon[0:len(source)] 149 elif len(source) < len(addon): # is addon list longer? 150 if len(addon) % len(source) == 0: # are they integer multiples? 151 repeats = len(addon)/len(source) # repeat source n times 152 origsour = copy.deepcopy(source) 153 for i in range(repeats-1): 154 source = source + origsour 155 else: 156 repeats = len(addon)/len(source)+1 # repeat source x times, 157 origsour = copy.deepcopy(source) # x is NOT an integer 158 for i in range(repeats-1): 159 source = source + origsour 160 source = source[0:len(addon)] 161 162 source = simpleabut(source,addon) 163 return source
164 165
166 -def simpleabut (source, addon):
167 """ 168 Concatenates two lists as columns and returns the result. '2D' lists 169 are also accomodated for either argument (source or addon). This DOES NOT 170 repeat either list to make the 2 lists of equal length. Beware of list pairs 171 with different lengths ... the resulting list will be the length of the 172 FIRST list passed. 173 174 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) 175 Returns: a list of lists as long as source, with source on the 'left' and 176 addon on the 'right' 177 """ 178 if type(source) not in [ListType,TupleType]: 179 source = [source] 180 if type(addon) not in [ListType,TupleType]: 181 addon = [addon] 182 minlen = min(len(source),len(addon)) 183 list = copy.deepcopy(source) # start abut process 184 if type(source[0]) not in [ListType,TupleType]: 185 if type(addon[0]) not in [ListType,TupleType]: 186 for i in range(minlen): 187 list[i] = [source[i]] + [addon[i]] # source/addon = column 188 else: 189 for i in range(minlen): 190 list[i] = [source[i]] + addon[i] # addon=list-of-lists 191 else: 192 if type(addon[0]) not in [ListType,TupleType]: 193 for i in range(minlen): 194 list[i] = source[i] + [addon[i]] # source=list-of-lists 195 else: 196 for i in range(minlen): 197 list[i] = source[i] + addon[i] # source/addon = list-of-lists 198 source = list 199 return source
200 201
202 -def colex (listoflists,cnums):
203 """ 204 Extracts from listoflists the columns specified in the list 'cnums' 205 (cnums can be an integer, a sequence of integers, or a string-expression that 206 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex 207 columns 3 onward from the listoflists). 208 209 Usage: colex (listoflists,cnums) 210 Returns: a list-of-lists corresponding to the columns from listoflists 211 specified by cnums, in the order the column numbers appear in cnums 212 """ 213 global index 214 column = 0 215 if type(cnums) in [ListType,TupleType]: # if multiple columns to get 216 index = cnums[0] 217 column = map(lambda x: x[index], listoflists) 218 for col in cnums[1:]: 219 index = col 220 column = abut(column,map(lambda x: x[index], listoflists)) 221 elif type(cnums) == StringType: # if an 'x[3:]' type expr. 222 evalstring = 'map(lambda x: x'+cnums+', listoflists)' 223 column = eval(evalstring) 224 else: # else it's just 1 col to get 225 index = cnums 226 column = map(lambda x: x[index], listoflists) 227 return column
228 229
230 -def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
231 """ 232 Averages data in collapsecol, keeping all unique items in keepcols 233 (using unique, which keeps unique LISTS of column numbers), retaining the 234 unique sets of values in keepcols, the mean for each. Setting fcn1 235 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) 236 will append those results (e.g., the sterr, N) after each calculated mean. 237 cfcn is the collapse function to apply (defaults to mean, defined here in the 238 pstat module to avoid circular imports with stats.py, but harmonicmean or 239 others could be passed). 240 241 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 242 Returns: a list of lists with all unique permutations of entries appearing in 243 columns ("conditions") specified by keepcols, abutted with the result of 244 cfcn (if cfcn=None, defaults to the mean) of each column specified by 245 collapsecols. 246 """ 247 def collmean (inlist): 248 s = 0 249 for item in inlist: 250 s = s + item 251 return s/float(len(inlist))
252 253 if type(keepcols) not in [ListType,TupleType]: 254 keepcols = [keepcols] 255 if type(collapsecols) not in [ListType,TupleType]: 256 collapsecols = [collapsecols] 257 if cfcn == None: 258 cfcn = collmean 259 if keepcols == []: 260 means = [0]*len(collapsecols) 261 for i in range(len(collapsecols)): 262 avgcol = colex(listoflists,collapsecols[i]) 263 means[i] = cfcn(avgcol) 264 if fcn1: 265 try: 266 test = fcn1(avgcol) 267 except: 268 test = 'N/A' 269 means[i] = [means[i], test] 270 if fcn2: 271 try: 272 test = fcn2(avgcol) 273 except: 274 test = 'N/A' 275 try: 276 means[i] = means[i] + [len(avgcol)] 277 except TypeError: 278 means[i] = [means[i],len(avgcol)] 279 return means 280 else: 281 values = colex(listoflists,keepcols) 282 uniques = unique(values) 283 uniques.sort() 284 newlist = [] 285 if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols] 286 for item in uniques: 287 if type(item) not in [ListType,TupleType]: item =[item] 288 tmprows = linexand(listoflists,keepcols,item) 289 for col in collapsecols: 290 avgcol = colex(tmprows,col) 291 item.append(cfcn(avgcol)) 292 if fcn1 <> None: 293 try: 294 test = fcn1(avgcol) 295 except: 296 test = 'N/A' 297 item.append(test) 298 if fcn2 <> None: 299 try: 300 test = fcn2(avgcol) 301 except: 302 test = 'N/A' 303 item.append(test) 304 newlist.append(item) 305 return newlist 306 307
308 -def dm (listoflists,criterion):
309 """ 310 Returns rows from the passed list of lists that meet the criteria in 311 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' 312 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows 313 with column 2 equal to the string 'N'). 314 315 Usage: dm (listoflists, criterion) 316 Returns: rows from listoflists that meet the specified criterion. 317 """ 318 function = 'filter(lambda x: '+criterion+',listoflists)' 319 lines = eval(function) 320 return lines
321 322
323 -def flat(l):
324 """ 325 Returns the flattened version of a '2D' list. List-correlate to the a.flat() 326 method of NumPy arrays. 327 328 Usage: flat(l) 329 """ 330 newl = [] 331 for i in range(len(l)): 332 for j in range(len(l[i])): 333 newl.append(l[i][j]) 334 return newl
335 336
337 -def linexand (listoflists,columnlist,valuelist):
338 """ 339 Returns the rows of a list of lists where col (from columnlist) = val 340 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). 341 len(columnlist) must equal len(valuelist). 342 343 Usage: linexand (listoflists,columnlist,valuelist) 344 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i 345 """ 346 if type(columnlist) not in [ListType,TupleType]: 347 columnlist = [columnlist] 348 if type(valuelist) not in [ListType,TupleType]: 349 valuelist = [valuelist] 350 criterion = '' 351 for i in range(len(columnlist)): 352 if type(valuelist[i])==StringType: 353 critval = '\'' + valuelist[i] + '\'' 354 else: 355 critval = str(valuelist[i]) 356 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 357 criterion = criterion[0:-3] # remove the "and" after the last crit 358 function = 'filter(lambda x: '+criterion+',listoflists)' 359 lines = eval(function) 360 return lines
361 362
363 -def linexor (listoflists,columnlist,valuelist):
364 """ 365 Returns the rows of a list of lists where col (from columnlist) = val 366 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). 367 One value is required for each column in columnlist. If only one value 368 exists for columnlist but multiple values appear in valuelist, the 369 valuelist values are all assumed to pertain to the same column. 370 371 Usage: linexor (listoflists,columnlist,valuelist) 372 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i 373 """ 374 if type(columnlist) not in [ListType,TupleType]: 375 columnlist = [columnlist] 376 if type(valuelist) not in [ListType,TupleType]: 377 valuelist = [valuelist] 378 criterion = '' 379 if len(columnlist) == 1 and len(valuelist) > 1: 380 columnlist = columnlist*len(valuelist) 381 for i in range(len(columnlist)): # build an exec string 382 if type(valuelist[i])==StringType: 383 critval = '\'' + valuelist[i] + '\'' 384 else: 385 critval = str(valuelist[i]) 386 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 387 criterion = criterion[0:-2] # remove the "or" after the last crit 388 function = 'filter(lambda x: '+criterion+',listoflists)' 389 lines = eval(function) 390 return lines
391 392
393 -def linedelimited (inlist,delimiter):
394 """ 395 Returns a string composed of elements in inlist, with each element 396 separated by 'delimiter.' Used by function writedelimited. Use '\t' 397 for tab-delimiting. 398 399 Usage: linedelimited (inlist,delimiter) 400 """ 401 outstr = '' 402 for item in inlist: 403 if type(item) <> StringType: 404 item = str(item) 405 outstr = outstr + item + delimiter 406 outstr = outstr[0:-1] 407 return outstr
408 409
410 -def lineincols (inlist,colsize):
411 """ 412 Returns a string composed of elements in inlist, with each element 413 right-aligned in columns of (fixed) colsize. 414 415 Usage: lineincols (inlist,colsize) where colsize is an integer 416 """ 417 outstr = '' 418 for item in inlist: 419 if type(item) <> StringType: 420 item = str(item) 421 size = len(item) 422 if size <= colsize: 423 for i in range(colsize-size): 424 outstr = outstr + ' ' 425 outstr = outstr + item 426 else: 427 outstr = outstr + item[0:colsize+1] 428 return outstr
429 430
431 -def lineincustcols (inlist,colsizes):
432 """ 433 Returns a string composed of elements in inlist, with each element 434 right-aligned in a column of width specified by a sequence colsizes. The 435 length of colsizes must be greater than or equal to the number of columns 436 in inlist. 437 438 Usage: lineincustcols (inlist,colsizes) 439 Returns: formatted string created from inlist 440 """ 441 outstr = '' 442 for i in range(len(inlist)): 443 if type(inlist[i]) <> StringType: 444 item = str(inlist[i]) 445 else: 446 item = inlist[i] 447 size = len(item) 448 if size <= colsizes[i]: 449 for j in range(colsizes[i]-size): 450 outstr = outstr + ' ' 451 outstr = outstr + item 452 else: 453 outstr = outstr + item[0:colsizes[i]+1] 454 return outstr
455 456
457 -def list2string (inlist,delimit=' '):
458 """ 459 Converts a 1D list to a single long string for file output, using 460 the string.join function. 461 462 Usage: list2string (inlist,delimit=' ') 463 Returns: the string created from inlist 464 """ 465 stringlist = map(makestr,inlist) 466 return string.join(stringlist,delimit)
467 468
469 -def makelol(inlist):
470 """ 471 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you 472 want to use put() to write a 1D list one item per line in the file. 473 474 Usage: makelol(inlist) 475 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. 476 """ 477 x = [] 478 for item in inlist: 479 x.append([item]) 480 return x
481 482
483 -def makestr (x):
484 if type(x) <> StringType: 485 x = str(x) 486 return x
487 488
489 -def printcc (lst,extra=2):
490 """ 491 Prints a list of lists in columns, customized by the max size of items 492 within the columns (max size of items in col, plus 'extra' number of spaces). 493 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, 494 respectively. 495 496 Usage: printcc (lst,extra=2) 497 Returns: None 498 """ 499 if type(lst[0]) not in [ListType,TupleType]: 500 lst = [lst] 501 rowstokill = [] 502 list2print = copy.deepcopy(lst) 503 for i in range(len(lst)): 504 if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: 505 rowstokill = rowstokill + [i] 506 rowstokill.reverse() # delete blank rows from the end 507 for row in rowstokill: 508 del list2print[row] 509 maxsize = [0]*len(list2print[0]) 510 for col in range(len(list2print[0])): 511 items = colex(list2print,col) 512 items = map(makestr,items) 513 maxsize[col] = max(map(len,items)) + extra 514 for row in lst: 515 if row == ['\n'] or row == '\n' or row == '' or row == ['']: 516 print 517 elif row == ['dashes'] or row == 'dashes': 518 dashes = [0]*len(maxsize) 519 for j in range(len(maxsize)): 520 dashes[j] = '-'*(maxsize[j]-2) 521 print lineincustcols(dashes,maxsize) 522 else: 523 print lineincustcols(row,maxsize) 524 return None
525 526
527 -def printincols (listoflists,colsize):
528 """ 529 Prints a list of lists in columns of (fixed) colsize width, where 530 colsize is an integer. 531 532 Usage: printincols (listoflists,colsize) 533 Returns: None 534 """ 535 for row in listoflists: 536 print lineincols(row,colsize) 537 return None
538 539
540 -def pl (listoflists):
541 """ 542 Prints a list of lists, 1 list (row) at a time. 543 544 Usage: pl(listoflists) 545 Returns: None 546 """ 547 for row in listoflists: 548 if row[-1] == '\n': 549 print row, 550 else: 551 print row 552 return None
553 554
555 -def printl(listoflists):
556 """Alias for pl.""" 557 pl(listoflists) 558 return
559 560
561 -def replace (inlst,oldval,newval):
562 """ 563 Replaces all occurrences of 'oldval' with 'newval', recursively. 564 565 Usage: replace (inlst,oldval,newval) 566 """ 567 lst = inlst*1 568 for i in range(len(lst)): 569 if type(lst[i]) not in [ListType,TupleType]: 570 if lst[i]==oldval: lst[i]=newval 571 else: 572 lst[i] = replace(lst[i],oldval,newval) 573 return lst
574 575
576 -def recode (inlist,listmap,cols=None):
577 """ 578 Changes the values in a list to a new set of values (useful when 579 you need to recode data from (e.g.) strings to numbers. cols defaults 580 to None (meaning all columns are recoded). 581 582 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list 583 Returns: inlist with the appropriate values replaced with new ones 584 """ 585 lst = copy.deepcopy(inlist) 586 if cols != None: 587 if type(cols) not in [ListType,TupleType]: 588 cols = [cols] 589 for col in cols: 590 for row in range(len(lst)): 591 try: 592 idx = colex(listmap,0).index(lst[row][col]) 593 lst[row][col] = listmap[idx][1] 594 except ValueError: 595 pass 596 else: 597 for row in range(len(lst)): 598 for col in range(len(lst)): 599 try: 600 idx = colex(listmap,0).index(lst[row][col]) 601 lst[row][col] = listmap[idx][1] 602 except ValueError: 603 pass 604 return lst
605 606
607 -def remap (listoflists,criterion):
608 """ 609 Remaps values in a given column of a 2D list (listoflists). This requires 610 a criterion as a function of 'x' so that the result of the following is 611 returned ... map(lambda x: 'criterion',listoflists). 612 613 Usage: remap(listoflists,criterion) criterion=string 614 Returns: remapped version of listoflists 615 """ 616 function = 'map(lambda x: '+criterion+',listoflists)' 617 lines = eval(function) 618 return lines
619 620
621 -def roundlist (inlist,digits):
622 """ 623 Goes through each element in a 1D or 2D inlist, and applies the following 624 function to all elements of FloatType ... round(element,digits). 625 626 Usage: roundlist(inlist,digits) 627 Returns: list with rounded floats 628 """ 629 if type(inlist[0]) in [IntType, FloatType]: 630 inlist = [inlist] 631 l = inlist*1 632 for i in range(len(l)): 633 for j in range(len(l[i])): 634 if type(l[i][j])==FloatType: 635 l[i][j] = round(l[i][j],digits) 636 return l
637 638
639 -def sortby(listoflists,sortcols):
640 """ 641 Sorts a list of lists on the column(s) specified in the sequence 642 sortcols. 643 644 Usage: sortby(listoflists,sortcols) 645 Returns: sorted list, unchanged column ordering 646 """ 647 newlist = abut(colex(listoflists,sortcols),listoflists) 648 newlist.sort() 649 try: 650 numcols = len(sortcols) 651 except TypeError: 652 numcols = 1 653 crit = '[' + str(numcols) + ':]' 654 newlist = colex(newlist,crit) 655 return newlist
656 657
658 -def unique (inlist):
659 """ 660 Returns all unique items in the passed list. If the a list-of-lists 661 is passed, unique LISTS are found (i.e., items in the first dimension are 662 compared). 663 664 Usage: unique (inlist) 665 Returns: the unique elements (or rows) in inlist 666 """ 667 uniques = [] 668 for item in inlist: 669 if item not in uniques: 670 uniques.append(item) 671 return uniques
672
673 -def duplicates(inlist):
674 """ 675 Returns duplicate items in the FIRST dimension of the passed list. 676 677 Usage: duplicates (inlist) 678 """ 679 dups = [] 680 for i in range(len(inlist)): 681 if inlist[i] in inlist[i+1:]: 682 dups.append(inlist[i]) 683 return dups
684 685
686 -def nonrepeats(inlist):
687 """ 688 Returns items that are NOT duplicated in the first dim of the passed list. 689 690 Usage: nonrepeats (inlist) 691 """ 692 nonrepeats = [] 693 for i in range(len(inlist)): 694 if inlist.count(inlist[i]) == 1: 695 nonrepeats.append(inlist[i]) 696 return nonrepeats
697 698 699 #=================== PSTAT ARRAY FUNCTIONS ===================== 700 #=================== PSTAT ARRAY FUNCTIONS ===================== 701 #=================== PSTAT ARRAY FUNCTIONS ===================== 702 #=================== PSTAT ARRAY FUNCTIONS ===================== 703 #=================== PSTAT ARRAY FUNCTIONS ===================== 704 #=================== PSTAT ARRAY FUNCTIONS ===================== 705 #=================== PSTAT ARRAY FUNCTIONS ===================== 706 #=================== PSTAT ARRAY FUNCTIONS ===================== 707 #=================== PSTAT ARRAY FUNCTIONS ===================== 708 #=================== PSTAT ARRAY FUNCTIONS ===================== 709 #=================== PSTAT ARRAY FUNCTIONS ===================== 710 #=================== PSTAT ARRAY FUNCTIONS ===================== 711 #=================== PSTAT ARRAY FUNCTIONS ===================== 712 #=================== PSTAT ARRAY FUNCTIONS ===================== 713 #=================== PSTAT ARRAY FUNCTIONS ===================== 714 #=================== PSTAT ARRAY FUNCTIONS ===================== 715 716 try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE 717 import Numeric 718 N = Numeric 719
720 - def aabut (source, *args):
721 """ 722 Like the |Stat abut command. It concatenates two arrays column-wise 723 and returns the result. CAUTION: If one array is shorter, it will be 724 repeated until it is as long as the other. 725 726 Usage: aabut (source, args) where args=any # of arrays 727 Returns: an array as long as the LONGEST array past, source appearing on the 728 'left', arrays in <args> attached on the 'right'. 729 """ 730 if len(source.shape)==1: 731 width = 1 732 source = N.resize(source,[source.shape[0],width]) 733 else: 734 width = source.shape[1] 735 for addon in args: 736 if len(addon.shape)==1: 737 width = 1 738 addon = N.resize(addon,[source.shape[0],width]) 739 else: 740 width = source.shape[1] 741 if len(addon) < len(source): 742 addon = N.resize(addon,[source.shape[0],addon.shape[1]]) 743 elif len(source) < len(addon): 744 source = N.resize(source,[addon.shape[0],source.shape[1]]) 745 source = N.concatenate((source,addon),1) 746 return source
747 748
749 - def acolex (a,indices,axis=1):
750 """ 751 Extracts specified indices (a list) from passed array, along passed 752 axis (column extraction is default). BEWARE: A 1D array is presumed to be a 753 column-array (and that the whole array will be returned as a column). 754 755 Usage: acolex (a,indices,axis=1) 756 Returns: the columns of a specified by indices 757 """ 758 if type(indices) not in [ListType,TupleType,N.ArrayType]: 759 indices = [indices] 760 if len(N.shape(a)) == 1: 761 cols = N.resize(a,[a.shape[0],1]) 762 else: 763 cols = N.take(a,indices,axis) 764 return cols
765 766
767 - def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
768 """ 769 Averages data in collapsecol, keeping all unique items in keepcols 770 (using unique, which keeps unique LISTS of column numbers), retaining 771 the unique sets of values in keepcols, the mean for each. If stderror or 772 N of the mean are desired, set either or both parameters to 1. 773 774 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 775 Returns: unique 'conditions' specified by the contents of columns specified 776 by keepcols, abutted with the mean(s) of column(s) specified by 777 collapsecols 778 """ 779 def acollmean (inarray): 780 return N.sum(N.ravel(inarray))
781 782 if cfcn == None: 783 cfcn = acollmean 784 if keepcols == []: 785 avgcol = acolex(a,collapsecols) 786 means = N.sum(avgcol)/float(len(avgcol)) 787 if fcn1<>None: 788 try: 789 test = fcn1(avgcol) 790 except: 791 test = N.array(['N/A']*len(means)) 792 means = aabut(means,test) 793 if fcn2<>None: 794 try: 795 test = fcn2(avgcol) 796 except: 797 test = N.array(['N/A']*len(means)) 798 means = aabut(means,test) 799 return means 800 else: 801 if type(keepcols) not in [ListType,TupleType,N.ArrayType]: 802 keepcols = [keepcols] 803 values = colex(a,keepcols) # so that "item" can be appended (below) 804 uniques = unique(values) # get a LIST, so .sort keeps rows intact 805 uniques.sort() 806 newlist = [] 807 for item in uniques: 808 if type(item) not in [ListType,TupleType,N.ArrayType]: 809 item =[item] 810 tmprows = alinexand(a,keepcols,item) 811 for col in collapsecols: 812 avgcol = acolex(tmprows,col) 813 item.append(acollmean(avgcol)) 814 if fcn1<>None: 815 try: 816 test = fcn1(avgcol) 817 except: 818 test = 'N/A' 819 item.append(test) 820 if fcn2<>None: 821 try: 822 test = fcn2(avgcol) 823 except: 824 test = 'N/A' 825 item.append(test) 826 newlist.append(item) 827 try: 828 new_a = N.array(newlist) 829 except TypeError: 830 new_a = N.array(newlist,'O') 831 return new_a 832 833
834 - def adm (a,criterion):
835 """ 836 Returns rows from the passed list of lists that meet the criteria in 837 the passed criterion expression (a string as a function of x). 838 839 Usage: adm (a,criterion) where criterion is like 'x[2]==37' 840 """ 841 function = 'filter(lambda x: '+criterion+',a)' 842 lines = eval(function) 843 try: 844 lines = N.array(lines) 845 except: 846 lines = N.array(lines,'O') 847 return lines
848 849
850 - def isstring(x):
851 if type(x)==StringType: 852 return 1 853 else: 854 return 0
855 856
857 - def alinexand (a,columnlist,valuelist):
858 """ 859 Returns the rows of an array where col (from columnlist) = val 860 (from valuelist). One value is required for each column in columnlist. 861 862 Usage: alinexand (a,columnlist,valuelist) 863 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i 864 """ 865 if type(columnlist) not in [ListType,TupleType,N.ArrayType]: 866 columnlist = [columnlist] 867 if type(valuelist) not in [ListType,TupleType,N.ArrayType]: 868 valuelist = [valuelist] 869 criterion = '' 870 for i in range(len(columnlist)): 871 if type(valuelist[i])==StringType: 872 critval = '\'' + valuelist[i] + '\'' 873 else: 874 critval = str(valuelist[i]) 875 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 876 criterion = criterion[0:-3] # remove the "and" after the last crit 877 return adm(a,criterion)
878 879
880 - def alinexor (a,columnlist,valuelist):
881 """ 882 Returns the rows of an array where col (from columnlist) = val (from 883 valuelist). One value is required for each column in columnlist. 884 The exception is if either columnlist or valuelist has only 1 value, 885 in which case that item will be expanded to match the length of the 886 other list. 887 888 Usage: alinexor (a,columnlist,valuelist) 889 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i 890 """ 891 if type(columnlist) not in [ListType,TupleType,N.ArrayType]: 892 columnlist = [columnlist] 893 if type(valuelist) not in [ListType,TupleType,N.ArrayType]: 894 valuelist = [valuelist] 895 criterion = '' 896 if len(columnlist) == 1 and len(valuelist) > 1: 897 columnlist = columnlist*len(valuelist) 898 elif len(valuelist) == 1 and len(columnlist) > 1: 899 valuelist = valuelist*len(columnlist) 900 for i in range(len(columnlist)): 901 if type(valuelist[i])==StringType: 902 critval = '\'' + valuelist[i] + '\'' 903 else: 904 critval = str(valuelist[i]) 905 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 906 criterion = criterion[0:-2] # remove the "or" after the last crit 907 return adm(a,criterion)
908 909
910 - def areplace (a,oldval,newval):
911 """ 912 Replaces all occurrences of oldval with newval in array a. 913 914 Usage: areplace(a,oldval,newval) 915 """ 916 newa = N.not_equal(a,oldval)*a 917 return newa+N.equal(a,oldval)*newval
918 919
920 - def arecode (a,listmap,col='all'):
921 """ 922 Remaps the values in an array to a new set of values (useful when 923 you need to recode data from (e.g.) strings to numbers as most stats 924 packages require. Can work on SINGLE columns, or 'all' columns at once. 925 926 Usage: arecode (a,listmap,col='all') 927 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] 928 """ 929 ashape = a.shape 930 if col == 'all': 931 work = a.flat 932 else: 933 work = acolex(a,col) 934 work = work.flat 935 for pair in listmap: 936 if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O': 937 work = N.array(work,'O') 938 a = N.array(a,'O') 939 for i in range(len(work)): 940 if work[i]==pair[0]: 941 work[i] = pair[1] 942 if col == 'all': 943 return N.reshape(work,ashape) 944 else: 945 return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) 946 else: # must be a non-Object type array and replacement 947 work = N.where(N.equal(work,pair[0]),pair[1],work) 948 return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
949 950
951 - def arowcompare(row1, row2):
952 """ 953 Compares two rows from an array, regardless of whether it is an 954 array of numbers or of python objects (which requires the cmp function). 955 956 Usage: arowcompare(row1,row2) 957 Returns: an array of equal length containing 1s where the two rows had 958 identical elements and 0 otherwise 959 """ 960 if row1.typecode()=='O' or row2.typecode=='O': 961 cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 962 else: 963 cmpvect = N.equal(row1,row2) 964 return cmpvect
965 966
967 - def arowsame(row1, row2):
968 """ 969 Compares two rows from an array, regardless of whether it is an 970 array of numbers or of python objects (which requires the cmp function). 971 972 Usage: arowsame(row1,row2) 973 Returns: 1 if the two rows are identical, 0 otherwise. 974 """ 975 cmpval = N.alltrue(arowcompare(row1,row2)) 976 return cmpval
977 978
979 - def asortrows(a,axis=0):
980 """ 981 Sorts an array "by rows". This differs from the Numeric.sort() function, 982 which sorts elements WITHIN the given axis. Instead, this function keeps 983 the elements along the given axis intact, but shifts them 'up or down' 984 relative to one another. 985 986 Usage: asortrows(a,axis=0) 987 Returns: sorted version of a 988 """ 989 if axis != 0: 990 a = N.swapaxes(a, axis, 0) 991 l = a.tolist() 992 l.sort() # or l.sort(_sort) 993 y = N.array(l) 994 if axis != 0: 995 y = N.swapaxes(y, axis, 0) 996 return y
997 998
999 - def aunique(inarray):
1000 """ 1001 Returns unique items in the FIRST dimension of the passed array. Only 1002 works on arrays NOT including string items. 1003 1004 Usage: aunique (inarray) 1005 """ 1006 uniques = N.array([inarray[0]]) 1007 if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY 1008 for item in inarray[1:]: 1009 if N.add.reduce(N.equal(uniques,item).flat) == 0: 1010 try: 1011 uniques = N.concatenate([uniques,N.array[N.NewAxis,:]]) 1012 except TypeError: 1013 uniques = N.concatenate([uniques,N.array([item])]) 1014 else: # IT MUST BE A 2+D ARRAY 1015 if inarray.typecode() != 'O': # not an Object array 1016 for item in inarray[1:]: 1017 if not N.sum(N.alltrue(N.equal(uniques,item),1)): 1018 try: 1019 uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) 1020 except TypeError: # the item to add isn't a list 1021 uniques = N.concatenate([uniques,N.array([item])]) 1022 else: 1023 pass # this item is already in the uniques array 1024 else: # must be an Object array, alltrue/equal functions don't work 1025 for item in inarray[1:]: 1026 newflag = 1 1027 for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> 1028 test = N.sum(abs(N.array(map(cmp,item,unq)))) 1029 if test == 0: # if item identical to any 1 row in uniques 1030 newflag = 0 # then not a novel item to add 1031 break 1032 if newflag == 1: 1033 try: 1034 uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) 1035 except TypeError: # the item to add isn't a list 1036 uniques = N.concatenate([uniques,N.array([item])]) 1037 return uniques
1038 1039
1040 - def aduplicates(inarray):
1041 """ 1042 Returns duplicate items in the FIRST dimension of the passed array. Only 1043 works on arrays NOT including string items. 1044 1045 Usage: aunique (inarray) 1046 """ 1047 inarray = N.array(inarray) 1048 if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY 1049 dups = [] 1050 inarray = inarray.tolist() 1051 for i in range(len(inarray)): 1052 if inarray[i] in inarray[i+1:]: 1053 dups.append(inarray[i]) 1054 dups = aunique(dups) 1055 else: # IT MUST BE A 2+D ARRAY 1056 dups = [] 1057 aslist = inarray.tolist() 1058 for i in range(len(aslist)): 1059 if aslist[i] in aslist[i+1:]: 1060 dups.append(aslist[i]) 1061 dups = unique(dups) 1062 dups = N.array(dups) 1063 return dups
1064 1065 except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs 1066 pass 1067