1  
 
   2  
 
   3  
 
   4  
 
   5  
 
   6  
 
   7  
 
   8  
 
   9  
 
  10  
 
  11  
 
  12  
 
  13  
 
  14  
 
  15  
 
  16  
 
  17  
 
  18  
 
  19  
 
  20  
 
  21  
 
  22  
 
  23  
 
  24  """
 
  25  pstat.py module
 
  26  
 
  27  #################################################
 
  28  #######  Written by:  Gary Strangman  ###########
 
  29  #######  Last modified:  Jun 29, 2001 ###########
 
  30  #################################################
 
  31  
 
  32  This module provides some useful list and array manipulation routines
 
  33  modeled after those found in the |Stat package by Gary Perlman, plus a
 
  34  number of other useful list/file manipulation functions.  The list-based
 
  35  functions include:
 
  36  
 
  37        abut (source,*args)
 
  38        simpleabut (source, addon)
 
  39        colex (listoflists,cnums)
 
  40        collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
  41        dm (listoflists,criterion)
 
  42        flat (l)
 
  43        linexand (listoflists,columnlist,valuelist)
 
  44        linexor (listoflists,columnlist,valuelist)
 
  45        linedelimited (inlist,delimiter)
 
  46        lineincols (inlist,colsize) 
 
  47        lineincustcols (inlist,colsizes)
 
  48        list2string (inlist)
 
  49        makelol(inlist)
 
  50        makestr(x)
 
  51        printcc (lst,extra=2)
 
  52        printincols (listoflists,colsize)
 
  53        pl (listoflists)
 
  54        printl(listoflists)
 
  55        replace (lst,oldval,newval)
 
  56        recode (inlist,listmap,cols='all')
 
  57        remap (listoflists,criterion)
 
  58        roundlist (inlist,num_digits_to_round_floats_to)
 
  59        sortby(listoflists,sortcols)
 
  60        unique (inlist)
 
  61        duplicates(inlist)
 
  62        writedelimited (listoflists, delimiter, file, writetype='w')
 
  63  
 
  64  Some of these functions have alternate versions which are defined only if
 
  65  Numeric (NumPy) can be imported.  These functions are generally named as
 
  66  above, with an 'a' prefix.
 
  67  
 
  68        aabut (source, *args)
 
  69        acolex (a,indices,axis=1)
 
  70        acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
 
  71        adm (a,criterion)
 
  72        alinexand (a,columnlist,valuelist)
 
  73        alinexor (a,columnlist,valuelist)
 
  74        areplace (a,oldval,newval)
 
  75        arecode (a,listmap,col='all')
 
  76        arowcompare (row1, row2)
 
  77        arowsame (row1, row2)
 
  78        asortrows(a,axis=0)
 
  79        aunique(inarray)
 
  80        aduplicates(inarray)
 
  81  
 
  82  Currently, the code is all but completely un-optimized.  In many cases, the
 
  83  array versions of functions amount simply to aliases to built-in array
 
  84  functions/methods.  Their inclusion here is for function name consistency.
 
  85  """ 
  86  
 
  87  
 
  88  
 
  89  
 
  90  
 
  91  
 
  92  
 
  93  
 
  94  
 
  95  
 
  96  
 
  97  
 
  98  
 
  99  
 
 100  
 
 101  
 
 102  
 
 103  
 
 104  
 
 105  
 
 106  
 
 107  import stats   
 108  import string, copy 
 109  from types import * 
 110  
 
 111  __version__ = 0.4 
 112  
 
 113  
 
 114  
 
 115  
 
 116  
 
 117  
 
 118  
 
 119 -def abut (source,*args): 
  120      """
 
 121  Like the |Stat abut command.  It concatenates two lists side-by-side
 
 122  and returns the result.  '2D' lists are also accomodated for either argument
 
 123  (source or addon).  CAUTION:  If one list is shorter, it will be repeated
 
 124  until it is as long as the longest list.  If this behavior is not desired,
 
 125  use pstat.simpleabut().
 
 126  
 
 127  Usage:   abut(source, args)   where args=any # of lists
 
 128  Returns: a list of lists as long as the LONGEST list past, source on the
 
 129           'left', lists in <args> attached consecutively on the 'right'
 
 130  """ 
 131  
 
 132      if type(source) not in [ListType,TupleType]: 
 133          source = [source] 
 134      for addon in args: 
 135          if type(addon) not in [ListType,TupleType]: 
 136              addon = [addon] 
 137          if len(addon) < len(source):                 
 138              if len(source) % len(addon) == 0:         
 139                  repeats = len(source)/len(addon)     
 140                  origadd = copy.deepcopy(addon) 
 141                  for i in range(repeats-1): 
 142                      addon = addon + origadd 
 143              else: 
 144                  repeats = len(source)/len(addon)+1   
 145                  origadd = copy.deepcopy(addon)       
 146                  for i in range(repeats-1): 
 147                      addon = addon + origadd 
 148                      addon = addon[0:len(source)] 
 149          elif len(source) < len(addon):                 
 150              if len(addon) % len(source) == 0:         
 151                  repeats = len(addon)/len(source)     
 152                  origsour = copy.deepcopy(source) 
 153                  for i in range(repeats-1): 
 154                      source = source + origsour 
 155              else: 
 156                  repeats = len(addon)/len(source)+1   
 157                  origsour = copy.deepcopy(source)     
 158                  for i in range(repeats-1): 
 159                      source = source + origsour 
 160                  source = source[0:len(addon)] 
 161  
 
 162          source = simpleabut(source,addon) 
 163      return source 
  164  
 
 165  
 
 167      """
 
 168  Concatenates two lists as columns and returns the result.  '2D' lists
 
 169  are also accomodated for either argument (source or addon).  This DOES NOT
 
 170  repeat either list to make the 2 lists of equal length.  Beware of list pairs
 
 171  with different lengths ... the resulting list will be the length of the
 
 172  FIRST list passed.
 
 173  
 
 174  Usage:   simpleabut(source,addon)  where source, addon=list (or list-of-lists)
 
 175  Returns: a list of lists as long as source, with source on the 'left' and
 
 176                   addon on the 'right'
 
 177  """ 
 178      if type(source) not in [ListType,TupleType]: 
 179          source = [source] 
 180      if type(addon) not in [ListType,TupleType]: 
 181          addon = [addon] 
 182      minlen = min(len(source),len(addon)) 
 183      list = copy.deepcopy(source)                 
 184      if type(source[0]) not in [ListType,TupleType]: 
 185          if type(addon[0]) not in [ListType,TupleType]: 
 186              for i in range(minlen): 
 187                  list[i] = [source[i]] + [addon[i]]         
 188          else: 
 189              for i in range(minlen): 
 190                  list[i] = [source[i]] + addon[i]         
 191      else: 
 192          if type(addon[0]) not in [ListType,TupleType]: 
 193              for i in range(minlen): 
 194                  list[i] = source[i] + [addon[i]]         
 195          else: 
 196              for i in range(minlen): 
 197                  list[i] = source[i] + addon[i]         
 198      source = list 
 199      return source 
  200  
 
 201  
 
 202 -def colex (listoflists,cnums): 
  203      """
 
 204  Extracts from listoflists the columns specified in the list 'cnums'
 
 205  (cnums can be an integer, a sequence of integers, or a string-expression that
 
 206  corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
 
 207  columns 3 onward from the listoflists).
 
 208  
 
 209  Usage:   colex (listoflists,cnums)
 
 210  Returns: a list-of-lists corresponding to the columns from listoflists
 
 211           specified by cnums, in the order the column numbers appear in cnums
 
 212  """ 
 213      global index 
 214      column = 0 
 215      if type(cnums) in [ListType,TupleType]:    
 216          index = cnums[0] 
 217          column = map(lambda x: x[index], listoflists) 
 218          for col in cnums[1:]: 
 219              index = col 
 220              column = abut(column,map(lambda x: x[index], listoflists)) 
 221      elif type(cnums) == StringType:               
 222          evalstring = 'map(lambda x: x'+cnums+', listoflists)' 
 223          column = eval(evalstring) 
 224      else:                                      
 225          index = cnums 
 226          column = map(lambda x: x[index], listoflists) 
 227      return column 
  228  
 
 229  
 
 230 -def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): 
  231       """
 
 232  Averages data in collapsecol, keeping all unique items in keepcols
 
 233  (using unique, which keeps unique LISTS of column numbers), retaining the
 
 234  unique sets of values in keepcols, the mean for each.  Setting fcn1
 
 235  and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
 
 236  will append those results (e.g., the sterr, N) after each calculated mean.
 
 237  cfcn is the collapse function to apply (defaults to mean, defined here in the
 
 238  pstat module to avoid circular imports with stats.py, but harmonicmean or
 
 239  others could be passed).
 
 240  
 
 241  Usage:    collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
 242  Returns: a list of lists with all unique permutations of entries appearing in
 
 243       columns ("conditions") specified by keepcols, abutted with the result of
 
 244       cfcn (if cfcn=None, defaults to the mean) of each column specified by
 
 245       collapsecols.
 
 246  """ 
 247       def collmean (inlist): 
 248           s = 0 
 249           for item in inlist: 
 250               s = s + item 
 251           return s/float(len(inlist)) 
  252  
 
 253       if type(keepcols) not in [ListType,TupleType]: 
 254           keepcols = [keepcols] 
 255       if type(collapsecols) not in [ListType,TupleType]: 
 256           collapsecols = [collapsecols] 
 257       if cfcn == None: 
 258           cfcn = collmean 
 259       if keepcols == []: 
 260           means = [0]*len(collapsecols) 
 261           for i in range(len(collapsecols)): 
 262               avgcol = colex(listoflists,collapsecols[i]) 
 263               means[i] = cfcn(avgcol) 
 264               if fcn1: 
 265                   try: 
 266                       test = fcn1(avgcol) 
 267                   except: 
 268                       test = 'N/A' 
 269                       means[i] = [means[i], test] 
 270               if fcn2: 
 271                   try: 
 272                       test = fcn2(avgcol) 
 273                   except: 
 274                       test = 'N/A' 
 275                   try: 
 276                       means[i] = means[i] + [len(avgcol)] 
 277                   except TypeError: 
 278                       means[i] = [means[i],len(avgcol)] 
 279           return means 
 280       else: 
 281           values = colex(listoflists,keepcols) 
 282           uniques = unique(values) 
 283           uniques.sort() 
 284           newlist = [] 
 285           if type(keepcols) not in [ListType,TupleType]:  keepcols = [keepcols] 
 286           for item in uniques: 
 287               if type(item) not in [ListType,TupleType]:  item =[item] 
 288               tmprows = linexand(listoflists,keepcols,item) 
 289               for col in collapsecols: 
 290                   avgcol = colex(tmprows,col) 
 291                   item.append(cfcn(avgcol)) 
 292                   if fcn1 <> None: 
 293                       try: 
 294                           test = fcn1(avgcol) 
 295                       except: 
 296                           test = 'N/A' 
 297                       item.append(test) 
 298                   if fcn2 <> None: 
 299                       try: 
 300                           test = fcn2(avgcol) 
 301                       except: 
 302                           test = 'N/A' 
 303                       item.append(test) 
 304                   newlist.append(item) 
 305           return newlist 
 306  
 
 307  
 
 308 -def dm (listoflists,criterion): 
  309      """
 
 310  Returns rows from the passed list of lists that meet the criteria in
 
 311  the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
 
 312  will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
 
 313  with column 2 equal to the string 'N').
 
 314  
 
 315  Usage:   dm (listoflists, criterion)
 
 316  Returns: rows from listoflists that meet the specified criterion.
 
 317  """ 
 318      function = 'filter(lambda x: '+criterion+',listoflists)' 
 319      lines = eval(function) 
 320      return lines 
  321  
 
 322  
 
 324      """
 
 325  Returns the flattened version of a '2D' list.  List-correlate to the a.flat()
 
 326  method of NumPy arrays.
 
 327  
 
 328  Usage:    flat(l)
 
 329  """ 
 330      newl = [] 
 331      for i in range(len(l)): 
 332          for j in range(len(l[i])): 
 333              newl.append(l[i][j]) 
 334      return newl 
  335  
 
 336  
 
 337 -def linexand (listoflists,columnlist,valuelist): 
  338      """
 
 339  Returns the rows of a list of lists where col (from columnlist) = val
 
 340  (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
 
 341  len(columnlist) must equal len(valuelist).
 
 342  
 
 343  Usage:   linexand (listoflists,columnlist,valuelist)
 
 344  Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
 
 345  """ 
 346      if type(columnlist) not in [ListType,TupleType]: 
 347          columnlist = [columnlist] 
 348      if type(valuelist) not in [ListType,TupleType]: 
 349          valuelist = [valuelist] 
 350      criterion = '' 
 351      for i in range(len(columnlist)): 
 352          if type(valuelist[i])==StringType: 
 353              critval = '\'' + valuelist[i] + '\'' 
 354          else: 
 355              critval = str(valuelist[i]) 
 356          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 
 357      criterion = criterion[0:-3]          
 358      function = 'filter(lambda x: '+criterion+',listoflists)' 
 359      lines = eval(function) 
 360      return lines 
  361  
 
 362  
 
 363 -def linexor (listoflists,columnlist,valuelist): 
  364      """
 
 365  Returns the rows of a list of lists where col (from columnlist) = val
 
 366  (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
 
 367  One value is required for each column in columnlist.  If only one value
 
 368  exists for columnlist but multiple values appear in valuelist, the
 
 369  valuelist values are all assumed to pertain to the same column.
 
 370  
 
 371  Usage:   linexor (listoflists,columnlist,valuelist)
 
 372  Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
 
 373  """ 
 374      if type(columnlist) not in [ListType,TupleType]: 
 375          columnlist = [columnlist] 
 376      if type(valuelist) not in [ListType,TupleType]: 
 377          valuelist = [valuelist] 
 378      criterion = '' 
 379      if len(columnlist) == 1 and len(valuelist) > 1: 
 380          columnlist = columnlist*len(valuelist) 
 381      for i in range(len(columnlist)):           
 382          if type(valuelist[i])==StringType: 
 383              critval = '\'' + valuelist[i] + '\'' 
 384          else: 
 385              critval = str(valuelist[i]) 
 386          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 
 387      criterion = criterion[0:-2]          
 388      function = 'filter(lambda x: '+criterion+',listoflists)' 
 389      lines = eval(function) 
 390      return lines 
  391  
 
 392  
 
 394      """
 
 395  Returns a string composed of elements in inlist, with each element
 
 396  separated by 'delimiter.'  Used by function writedelimited.  Use '\t'
 
 397  for tab-delimiting.
 
 398  
 
 399  Usage:   linedelimited (inlist,delimiter)
 
 400  """ 
 401      outstr = '' 
 402      for item in inlist: 
 403          if type(item) <> StringType: 
 404              item = str(item) 
 405          outstr = outstr + item + delimiter 
 406      outstr = outstr[0:-1] 
 407      return outstr 
  408  
 
 409  
 
 411      """
 
 412  Returns a string composed of elements in inlist, with each element
 
 413  right-aligned in columns of (fixed) colsize.
 
 414  
 
 415  Usage:   lineincols (inlist,colsize)   where colsize is an integer
 
 416  """ 
 417      outstr = '' 
 418      for item in inlist: 
 419          if type(item) <> StringType: 
 420              item = str(item) 
 421          size = len(item) 
 422          if size <= colsize: 
 423              for i in range(colsize-size): 
 424                  outstr = outstr + ' ' 
 425              outstr = outstr + item 
 426          else: 
 427              outstr = outstr + item[0:colsize+1] 
 428      return outstr 
  429  
 
 430  
 
 432      """
 
 433  Returns a string composed of elements in inlist, with each element
 
 434  right-aligned in a column of width specified by a sequence colsizes.  The
 
 435  length of colsizes must be greater than or equal to the number of columns
 
 436  in inlist.
 
 437  
 
 438  Usage:   lineincustcols (inlist,colsizes)
 
 439  Returns: formatted string created from inlist
 
 440  """ 
 441      outstr = '' 
 442      for i in range(len(inlist)): 
 443          if type(inlist[i]) <> StringType: 
 444              item = str(inlist[i]) 
 445          else: 
 446              item = inlist[i] 
 447          size = len(item) 
 448          if size <= colsizes[i]: 
 449              for j in range(colsizes[i]-size): 
 450                  outstr = outstr + ' ' 
 451              outstr = outstr + item 
 452          else: 
 453              outstr = outstr + item[0:colsizes[i]+1] 
 454      return outstr 
  455  
 
 456  
 
 458      """
 
 459  Converts a 1D list to a single long string for file output, using
 
 460  the string.join function.
 
 461  
 
 462  Usage:   list2string (inlist,delimit=' ')
 
 463  Returns: the string created from inlist
 
 464  """ 
 465      stringlist = map(makestr,inlist) 
 466      return string.join(stringlist,delimit) 
  467  
 
 468  
 
 470      """
 
 471  Converts a 1D list to a 2D list (i.e., a list-of-lists).  Useful when you
 
 472  want to use put() to write a 1D list one item per line in the file.
 
 473  
 
 474  Usage:   makelol(inlist)
 
 475  Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
 
 476  """ 
 477      x = [] 
 478      for item in inlist: 
 479          x.append([item]) 
 480      return x 
  481  
 
 482  
 
 484      if type(x) <> StringType: 
 485          x = str(x) 
 486      return x 
  487  
 
 488  
 
 490      """
 
 491  Prints a list of lists in columns, customized by the max size of items
 
 492  within the columns (max size of items in col, plus 'extra' number of spaces).
 
 493  Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
 
 494  respectively.
 
 495  
 
 496  Usage:   printcc (lst,extra=2)
 
 497  Returns: None
 
 498  """ 
 499      if type(lst[0]) not in [ListType,TupleType]: 
 500          lst = [lst] 
 501      rowstokill = [] 
 502      list2print = copy.deepcopy(lst) 
 503      for i in range(len(lst)): 
 504          if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: 
 505              rowstokill = rowstokill + [i] 
 506      rowstokill.reverse()    
 507      for row in rowstokill: 
 508          del list2print[row] 
 509      maxsize = [0]*len(list2print[0]) 
 510      for col in range(len(list2print[0])): 
 511          items = colex(list2print,col) 
 512          items = map(makestr,items) 
 513          maxsize[col] = max(map(len,items)) + extra 
 514      for row in lst: 
 515          if row == ['\n'] or row == '\n' or row == '' or row == ['']: 
 516              print 
 517          elif row == ['dashes'] or row == 'dashes': 
 518              dashes = [0]*len(maxsize) 
 519              for j in range(len(maxsize)): 
 520                  dashes[j] = '-'*(maxsize[j]-2) 
 521              print lineincustcols(dashes,maxsize) 
 522          else: 
 523              print lineincustcols(row,maxsize) 
 524      return None 
  525  
 
 526  
 
 528      """
 
 529  Prints a list of lists in columns of (fixed) colsize width, where
 
 530  colsize is an integer.
 
 531  
 
 532  Usage:   printincols (listoflists,colsize)
 
 533  Returns: None
 
 534  """ 
 535      for row in listoflists: 
 536          print lineincols(row,colsize) 
 537      return None 
  538  
 
 539  
 
 540 -def pl (listoflists): 
  541      """
 
 542  Prints a list of lists, 1 list (row) at a time.
 
 543  
 
 544  Usage:   pl(listoflists)
 
 545  Returns: None
 
 546  """ 
 547      for row in listoflists: 
 548          if row[-1] == '\n': 
 549              print row, 
 550          else: 
 551              print row 
 552      return None 
  553  
 
 554  
 
 556      """Alias for pl.""" 
 557      pl(listoflists) 
 558      return 
  559  
 
 560  
 
 562      """
 
 563  Replaces all occurrences of 'oldval' with 'newval', recursively.
 
 564  
 
 565  Usage:   replace (inlst,oldval,newval)
 
 566  """ 
 567      lst = inlst*1 
 568      for i in range(len(lst)): 
 569          if type(lst[i]) not in [ListType,TupleType]: 
 570              if lst[i]==oldval: lst[i]=newval 
 571          else: 
 572              lst[i] = replace(lst[i],oldval,newval) 
 573      return lst 
  574  
 
 575  
 
 576 -def recode (inlist,listmap,cols=None): 
  577      """
 
 578  Changes the values in a list to a new set of values (useful when
 
 579  you need to recode data from (e.g.) strings to numbers.  cols defaults
 
 580  to None (meaning all columns are recoded).
 
 581  
 
 582  Usage:   recode (inlist,listmap,cols=None)  cols=recode cols, listmap=2D list
 
 583  Returns: inlist with the appropriate values replaced with new ones
 
 584  """ 
 585      lst = copy.deepcopy(inlist) 
 586      if cols != None: 
 587          if type(cols) not in [ListType,TupleType]: 
 588              cols = [cols] 
 589          for col in cols: 
 590              for row in range(len(lst)): 
 591                  try: 
 592                      idx = colex(listmap,0).index(lst[row][col]) 
 593                      lst[row][col] = listmap[idx][1] 
 594                  except ValueError: 
 595                      pass 
 596      else: 
 597          for row in range(len(lst)): 
 598              for col in range(len(lst)): 
 599                  try: 
 600                      idx = colex(listmap,0).index(lst[row][col]) 
 601                      lst[row][col] = listmap[idx][1] 
 602                  except ValueError: 
 603                      pass 
 604      return lst 
  605  
 
 606  
 
 607 -def remap (listoflists,criterion): 
  608      """
 
 609  Remaps values in a given column of a 2D list (listoflists).  This requires
 
 610  a criterion as a function of 'x' so that the result of the following is
 
 611  returned ... map(lambda x: 'criterion',listoflists).  
 
 612  
 
 613  Usage:   remap(listoflists,criterion)    criterion=string
 
 614  Returns: remapped version of listoflists
 
 615  """ 
 616      function = 'map(lambda x: '+criterion+',listoflists)' 
 617      lines = eval(function) 
 618      return lines 
  619  
 
 620  
 
 622      """
 
 623  Goes through each element in a 1D or 2D inlist, and applies the following
 
 624  function to all elements of FloatType ... round(element,digits).
 
 625  
 
 626  Usage:   roundlist(inlist,digits)
 
 627  Returns: list with rounded floats
 
 628  """ 
 629      if type(inlist[0]) in [IntType, FloatType]: 
 630          inlist = [inlist] 
 631      l = inlist*1 
 632      for i in range(len(l)): 
 633          for j in range(len(l[i])): 
 634              if type(l[i][j])==FloatType: 
 635                  l[i][j] = round(l[i][j],digits) 
 636      return l 
  637  
 
 638  
 
 639 -def sortby(listoflists,sortcols): 
  640      """
 
 641  Sorts a list of lists on the column(s) specified in the sequence
 
 642  sortcols.
 
 643  
 
 644  Usage:   sortby(listoflists,sortcols)
 
 645  Returns: sorted list, unchanged column ordering
 
 646  """ 
 647      newlist = abut(colex(listoflists,sortcols),listoflists) 
 648      newlist.sort() 
 649      try: 
 650          numcols = len(sortcols) 
 651      except TypeError: 
 652          numcols = 1 
 653      crit = '[' + str(numcols) + ':]' 
 654      newlist = colex(newlist,crit) 
 655      return newlist 
  656  
 
 657  
 
 659      """
 
 660  Returns all unique items in the passed list.  If the a list-of-lists
 
 661  is passed, unique LISTS are found (i.e., items in the first dimension are
 
 662  compared).
 
 663  
 
 664  Usage:   unique (inlist)
 
 665  Returns: the unique elements (or rows) in inlist
 
 666  """ 
 667      uniques = [] 
 668      for item in inlist: 
 669          if item not in uniques: 
 670              uniques.append(item) 
 671      return uniques 
  672  
 
 674      """
 
 675  Returns duplicate items in the FIRST dimension of the passed list.
 
 676  
 
 677  Usage:   duplicates (inlist)
 
 678  """ 
 679      dups = [] 
 680      for i in range(len(inlist)): 
 681          if inlist[i] in inlist[i+1:]: 
 682              dups.append(inlist[i]) 
 683      return dups 
  684  
 
 685  
 
 687      """
 
 688  Returns items that are NOT duplicated in the first dim of the passed list.
 
 689  
 
 690  Usage:   nonrepeats (inlist)
 
 691  """ 
 692      nonrepeats = [] 
 693      for i in range(len(inlist)): 
 694          if inlist.count(inlist[i]) == 1: 
 695              nonrepeats.append(inlist[i]) 
 696      return nonrepeats 
  697  
 
 698  
 
 699  
 
 700  
 
 701  
 
 702  
 
 703  
 
 704  
 
 705  
 
 706  
 
 707  
 
 708  
 
 709  
 
 710  
 
 711  
 
 712  
 
 713  
 
 714  
 
 715  
 
 716  try:                          
 717   import Numeric 
 718   N = Numeric 
 719  
 
 720 - def aabut (source, *args): 
  721      """
 
 722  Like the |Stat abut command.  It concatenates two arrays column-wise
 
 723  and returns the result.  CAUTION:  If one array is shorter, it will be
 
 724  repeated until it is as long as the other.
 
 725  
 
 726  Usage:   aabut (source, args)    where args=any # of arrays
 
 727  Returns: an array as long as the LONGEST array past, source appearing on the
 
 728           'left', arrays in <args> attached on the 'right'.
 
 729  """ 
 730      if len(source.shape)==1: 
 731          width = 1 
 732          source = N.resize(source,[source.shape[0],width]) 
 733      else: 
 734          width = source.shape[1] 
 735      for addon in args: 
 736          if len(addon.shape)==1: 
 737              width = 1 
 738              addon = N.resize(addon,[source.shape[0],width]) 
 739          else: 
 740              width = source.shape[1] 
 741          if len(addon) < len(source): 
 742              addon = N.resize(addon,[source.shape[0],addon.shape[1]]) 
 743          elif len(source) < len(addon): 
 744              source = N.resize(source,[addon.shape[0],source.shape[1]]) 
 745          source = N.concatenate((source,addon),1) 
 746      return source 
  747  
 
 748  
 
 749 - def acolex (a,indices,axis=1): 
  750      """
 
 751  Extracts specified indices (a list) from passed array, along passed
 
 752  axis (column extraction is default).  BEWARE: A 1D array is presumed to be a
 
 753  column-array (and that the whole array will be returned as a column).
 
 754  
 
 755  Usage:   acolex (a,indices,axis=1)
 
 756  Returns: the columns of a specified by indices
 
 757  """ 
 758      if type(indices) not in [ListType,TupleType,N.ArrayType]: 
 759          indices = [indices] 
 760      if len(N.shape(a)) == 1: 
 761          cols = N.resize(a,[a.shape[0],1]) 
 762      else: 
 763          cols = N.take(a,indices,axis) 
 764      return cols 
  765  
 
 766  
 
 767 - def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): 
  768      """
 
 769  Averages data in collapsecol, keeping all unique items in keepcols
 
 770  (using unique, which keeps unique LISTS of column numbers), retaining
 
 771  the unique sets of values in keepcols, the mean for each.  If stderror or
 
 772  N of the mean are desired, set either or both parameters to 1.
 
 773  
 
 774  Usage:   acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
 775  Returns: unique 'conditions' specified by the contents of columns specified
 
 776           by keepcols, abutted with the mean(s) of column(s) specified by
 
 777           collapsecols
 
 778  """ 
 779      def acollmean (inarray): 
 780          return N.sum(N.ravel(inarray)) 
  781  
 
 782      if cfcn == None: 
 783          cfcn = acollmean 
 784      if keepcols == []: 
 785          avgcol = acolex(a,collapsecols) 
 786          means = N.sum(avgcol)/float(len(avgcol)) 
 787          if fcn1<>None: 
 788              try: 
 789                  test = fcn1(avgcol) 
 790              except: 
 791                  test = N.array(['N/A']*len(means)) 
 792              means = aabut(means,test) 
 793          if fcn2<>None: 
 794              try: 
 795                  test = fcn2(avgcol) 
 796              except: 
 797                  test = N.array(['N/A']*len(means)) 
 798              means = aabut(means,test) 
 799          return means 
 800      else: 
 801          if type(keepcols) not in [ListType,TupleType,N.ArrayType]: 
 802              keepcols = [keepcols] 
 803          values = colex(a,keepcols)    
 804          uniques = unique(values)   
 805          uniques.sort() 
 806          newlist = [] 
 807          for item in uniques: 
 808              if type(item) not in [ListType,TupleType,N.ArrayType]: 
 809                  item =[item] 
 810              tmprows = alinexand(a,keepcols,item) 
 811              for col in collapsecols: 
 812                  avgcol = acolex(tmprows,col) 
 813                  item.append(acollmean(avgcol)) 
 814                  if fcn1<>None: 
 815                      try: 
 816                          test = fcn1(avgcol) 
 817                      except: 
 818                          test = 'N/A' 
 819                      item.append(test) 
 820                  if fcn2<>None: 
 821                      try: 
 822                          test = fcn2(avgcol) 
 823                      except: 
 824                          test = 'N/A' 
 825                      item.append(test) 
 826                  newlist.append(item) 
 827          try: 
 828              new_a = N.array(newlist) 
 829          except TypeError: 
 830              new_a = N.array(newlist,'O') 
 831          return new_a 
 832  
 
 833  
 
 834 - def adm (a,criterion): 
  835      """
 
 836  Returns rows from the passed list of lists that meet the criteria in
 
 837  the passed criterion expression (a string as a function of x).
 
 838  
 
 839  Usage:   adm (a,criterion)   where criterion is like 'x[2]==37'
 
 840  """ 
 841      function = 'filter(lambda x: '+criterion+',a)' 
 842      lines = eval(function) 
 843      try: 
 844          lines = N.array(lines) 
 845      except: 
 846          lines = N.array(lines,'O') 
 847      return lines 
  848  
 
 849  
 
 851      if type(x)==StringType: 
 852          return 1 
 853      else: 
 854          return 0 
  855  
 
 856  
 
 858      """
 
 859  Returns the rows of an array where col (from columnlist) = val
 
 860  (from valuelist).  One value is required for each column in columnlist.
 
 861  
 
 862  Usage:   alinexand (a,columnlist,valuelist)
 
 863  Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
 
 864  """ 
 865      if type(columnlist) not in [ListType,TupleType,N.ArrayType]: 
 866          columnlist = [columnlist] 
 867      if type(valuelist) not in [ListType,TupleType,N.ArrayType]: 
 868          valuelist = [valuelist] 
 869      criterion = '' 
 870      for i in range(len(columnlist)): 
 871          if type(valuelist[i])==StringType: 
 872              critval = '\'' + valuelist[i] + '\'' 
 873          else: 
 874              critval = str(valuelist[i]) 
 875          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 
 876      criterion = criterion[0:-3]          
 877      return adm(a,criterion) 
  878  
 
 879  
 
 881      """
 
 882  Returns the rows of an array where col (from columnlist) = val (from
 
 883  valuelist).  One value is required for each column in columnlist.
 
 884  The exception is if either columnlist or valuelist has only 1 value,
 
 885  in which case that item will be expanded to match the length of the
 
 886  other list.
 
 887  
 
 888  Usage:   alinexor (a,columnlist,valuelist)
 
 889  Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
 
 890  """ 
 891      if type(columnlist) not in [ListType,TupleType,N.ArrayType]: 
 892          columnlist = [columnlist] 
 893      if type(valuelist) not in [ListType,TupleType,N.ArrayType]: 
 894          valuelist = [valuelist] 
 895      criterion = '' 
 896      if len(columnlist) == 1 and len(valuelist) > 1: 
 897          columnlist = columnlist*len(valuelist) 
 898      elif len(valuelist) == 1 and len(columnlist) > 1: 
 899          valuelist = valuelist*len(columnlist) 
 900      for i in range(len(columnlist)): 
 901          if type(valuelist[i])==StringType: 
 902              critval = '\'' + valuelist[i] + '\'' 
 903          else: 
 904              critval = str(valuelist[i]) 
 905          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 
 906      criterion = criterion[0:-2]          
 907      return adm(a,criterion) 
  908  
 
 909  
 
 911      """
 
 912  Replaces all occurrences of oldval with newval in array a.
 
 913  
 
 914  Usage:   areplace(a,oldval,newval)
 
 915  """ 
 916      newa = N.not_equal(a,oldval)*a 
 917      return newa+N.equal(a,oldval)*newval 
  918  
 
 919  
 
 920 - def arecode (a,listmap,col='all'): 
  921      """
 
 922  Remaps the values in an array to a new set of values (useful when
 
 923  you need to recode data from (e.g.) strings to numbers as most stats
 
 924  packages require.  Can work on SINGLE columns, or 'all' columns at once.
 
 925  
 
 926  Usage:   arecode (a,listmap,col='all')
 
 927  Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
 
 928  """ 
 929      ashape = a.shape 
 930      if col == 'all': 
 931          work = a.flat 
 932      else: 
 933          work = acolex(a,col) 
 934          work = work.flat 
 935      for pair in listmap: 
 936          if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O': 
 937              work = N.array(work,'O') 
 938              a = N.array(a,'O') 
 939              for i in range(len(work)): 
 940                  if work[i]==pair[0]: 
 941                      work[i] = pair[1] 
 942              if col == 'all': 
 943                  return N.reshape(work,ashape) 
 944              else: 
 945                  return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) 
 946          else:    
 947              work = N.where(N.equal(work,pair[0]),pair[1],work) 
 948              return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) 
  949  
 
 950  
 
 952      """
 
 953  Compares two rows from an array, regardless of whether it is an
 
 954  array of numbers or of python objects (which requires the cmp function).
 
 955  
 
 956  Usage:   arowcompare(row1,row2)
 
 957  Returns: an array of equal length containing 1s where the two rows had
 
 958           identical elements and 0 otherwise
 
 959  """ 
 960      if row1.typecode()=='O' or row2.typecode=='O': 
 961          cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2))))  
 962      else: 
 963          cmpvect = N.equal(row1,row2) 
 964      return cmpvect 
  965  
 
 966  
 
 968      """
 
 969  Compares two rows from an array, regardless of whether it is an
 
 970  array of numbers or of python objects (which requires the cmp function).
 
 971  
 
 972  Usage:   arowsame(row1,row2)
 
 973  Returns: 1 if the two rows are identical, 0 otherwise.
 
 974  """ 
 975      cmpval = N.alltrue(arowcompare(row1,row2)) 
 976      return cmpval 
  977  
 
 978  
 
 980      """
 
 981  Sorts an array "by rows".  This differs from the Numeric.sort() function,
 
 982  which sorts elements WITHIN the given axis.  Instead, this function keeps
 
 983  the elements along the given axis intact, but shifts them 'up or down'
 
 984  relative to one another.
 
 985  
 
 986  Usage:   asortrows(a,axis=0)
 
 987  Returns: sorted version of a
 
 988  """ 
 989      if axis != 0: 
 990          a = N.swapaxes(a, axis, 0) 
 991      l = a.tolist() 
 992      l.sort()            
 993      y = N.array(l) 
 994      if axis != 0: 
 995          y = N.swapaxes(y, axis, 0) 
 996      return y 
  997  
 
 998  
 
1000      """
 
1001  Returns unique items in the FIRST dimension of the passed array. Only
 
1002  works on arrays NOT including string items.
 
1003  
 
1004  Usage:   aunique (inarray)
 
1005  """ 
1006      uniques = N.array([inarray[0]]) 
1007      if len(uniques.shape) == 1:             
1008          for item in inarray[1:]: 
1009              if N.add.reduce(N.equal(uniques,item).flat) == 0: 
1010                  try: 
1011                      uniques = N.concatenate([uniques,N.array[N.NewAxis,:]]) 
1012                  except TypeError: 
1013                      uniques = N.concatenate([uniques,N.array([item])]) 
1014      else:                                   
1015          if inarray.typecode() != 'O':   
1016              for item in inarray[1:]: 
1017                  if not N.sum(N.alltrue(N.equal(uniques,item),1)): 
1018                      try: 
1019                          uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) 
1020                      except TypeError:     
1021                          uniques = N.concatenate([uniques,N.array([item])]) 
1022                  else: 
1023                      pass   
1024          else:    
1025              for item in inarray[1:]: 
1026                  newflag = 1 
1027                  for unq in uniques:   
1028                      test = N.sum(abs(N.array(map(cmp,item,unq)))) 
1029                      if test == 0:    
1030                          newflag = 0  
1031                          break 
1032                  if newflag == 1: 
1033                      try: 
1034                          uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) 
1035                      except TypeError:     
1036                          uniques = N.concatenate([uniques,N.array([item])]) 
1037      return uniques 
 1038  
 
1039  
 
1041      """
 
1042  Returns duplicate items in the FIRST dimension of the passed array. Only
 
1043  works on arrays NOT including string items.
 
1044  
 
1045  Usage:   aunique (inarray)
 
1046  """ 
1047      inarray = N.array(inarray) 
1048      if len(inarray.shape) == 1:             
1049          dups = [] 
1050          inarray = inarray.tolist() 
1051          for i in range(len(inarray)): 
1052              if inarray[i] in inarray[i+1:]: 
1053                  dups.append(inarray[i]) 
1054          dups = aunique(dups) 
1055      else:                                   
1056          dups = [] 
1057          aslist = inarray.tolist() 
1058          for i in range(len(aslist)): 
1059              if aslist[i] in aslist[i+1:]: 
1060                  dups.append(aslist[i]) 
1061          dups = unique(dups) 
1062          dups = N.array(dups) 
1063      return dups 
 1064  
 
1065  except ImportError:     
1066   pass 
1067