Package TEES :: Package Utils :: Package Libraries :: Module pstat
Source Code for Module TEES.Utils.Libraries.pstat

   1  # Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
 
   2  #
 
   3  # This software is distributable under the terms of the GNU
 
   4  # General Public License (GPL) v2, the text of which can be found at
 
   5  # http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
 
   6  # using this module constitutes acceptance of the terms of this License.
 
   7  #
 
   8  # Disclaimer
 
   9  # 
 
  10  # This software is provided "as-is".  There are no expressed or implied
 
  11  # warranties of any kind, including, but not limited to, the warranties
 
  12  # of merchantability and fittness for a given application.  In no event
 
  13  # shall Gary Strangman be liable for any direct, indirect, incidental,
 
  14  # special, exemplary or consequential damages (including, but not limited
 
  15  # to, loss of use, data or profits, or business interruption) however
 
  16  # caused and on any theory of liability, whether in contract, strict
 
  17  # liability or tort (including negligence or otherwise) arising in any way
 
  18  # out of the use of this software, even if advised of the possibility of
 
  19  # such damage.
 
  20  #
 
  21  # Comments and/or additions are welcome (send e-mail to:
 
  22  # strang@nmr.mgh.harvard.edu).
 
  23  # 
 
  24  """
 
  25  pstat.py module
 
  26  
 
  27  #################################################
 
  28  #######  Written by:  Gary Strangman  ###########
 
  29  #######  Last modified:  Jun 29, 2001 ###########
 
  30  #################################################
 
  31  
 
  32  This module provides some useful list and array manipulation routines
 
  33  modeled after those found in the |Stat package by Gary Perlman, plus a
 
  34  number of other useful list/file manipulation functions.  The list-based
 
  35  functions include:
 
  36  
 
  37        abut (source,*args)
 
  38        simpleabut (source, addon)
 
  39        colex (listoflists,cnums)
 
  40        collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
  41        dm (listoflists,criterion)
 
  42        flat (l)
 
  43        linexand (listoflists,columnlist,valuelist)
 
  44        linexor (listoflists,columnlist,valuelist)
 
  45        linedelimited (inlist,delimiter)
 
  46        lineincols (inlist,colsize) 
 
  47        lineincustcols (inlist,colsizes)
 
  48        list2string (inlist)
 
  49        makelol(inlist)
 
  50        makestr(x)
 
  51        printcc (lst,extra=2)
 
  52        printincols (listoflists,colsize)
 
  53        pl (listoflists)
 
  54        printl(listoflists)
 
  55        replace (lst,oldval,newval)
 
  56        recode (inlist,listmap,cols='all')
 
  57        remap (listoflists,criterion)
 
  58        roundlist (inlist,num_digits_to_round_floats_to)
 
  59        sortby(listoflists,sortcols)
 
  60        unique (inlist)
 
  61        duplicates(inlist)
 
  62        writedelimited (listoflists, delimiter, file, writetype='w')
 
  63  
 
  64  Some of these functions have alternate versions which are defined only if
 
  65  Numeric (NumPy) can be imported.  These functions are generally named as
 
  66  above, with an 'a' prefix.
 
  67  
 
  68        aabut (source, *args)
 
  69        acolex (a,indices,axis=1)
 
  70        acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
 
  71        adm (a,criterion)
 
  72        alinexand (a,columnlist,valuelist)
 
  73        alinexor (a,columnlist,valuelist)
 
  74        areplace (a,oldval,newval)
 
  75        arecode (a,listmap,col='all')
 
  76        arowcompare (row1, row2)
 
  77        arowsame (row1, row2)
 
  78        asortrows(a,axis=0)
 
  79        aunique(inarray)
 
  80        aduplicates(inarray)
 
  81  
 
  82  Currently, the code is all but completely un-optimized.  In many cases, the
 
  83  array versions of functions amount simply to aliases to built-in array
 
  84  functions/methods.  Their inclusion here is for function name consistency.
 
  85  """ 
  86  
 
  87  ## CHANGE LOG:
 
  88  ## ==========
 
  89  ## 01-11-15 ... changed list2string() to accept a delimiter
 
  90  ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
 
  91  ## 01-05-31 ... added duplicates() and aduplicates() functions
 
  92  ## 00-12-28 ... license made GPL, docstring and import requirements
 
  93  ## 99-11-01 ... changed version to 0.3
 
  94  ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
 
  95  ## 03/27/99 ... added areplace function, made replace fcn recursive
 
  96  ## 12/31/98 ... added writefc function for ouput to fixed column sizes
 
  97  ## 12/07/98 ... fixed import problem (failed on collapse() fcn)
 
  98  ##              added __version__ variable (now 0.2)
 
  99  ## 12/05/98 ... updated doc-strings
 
 100  ##              added features to collapse() function
 
 101  ##              added flat() function for lists
 
 102  ##              fixed a broken asortrows() 
 
 103  ## 11/16/98 ... fixed minor bug in aput for 1D arrays
 
 104  ##
 
 105  ## 11/08/98 ... fixed aput to output large arrays correctly
 
 106  
 
 107  import stats  # required 3rd party module 
 108  import string, copy 
 109  from types import * 
 110  
 
 111  __version__ = 0.4 
 112  
 
 113  ###===========================  LIST FUNCTIONS  ==========================
 
 114  ###
 
 115  ### Here are the list functions, DEFINED FOR ALL SYSTEMS.
 
 116  ### Array functions (for NumPy-enabled computers) appear below.
 
 117  ###
 
 118  
 
 119 -def abut (source,*args): 
 120      """
 
 121  Like the |Stat abut command.  It concatenates two lists side-by-side
 
 122  and returns the result.  '2D' lists are also accomodated for either argument
 
 123  (source or addon).  CAUTION:  If one list is shorter, it will be repeated
 
 124  until it is as long as the longest list.  If this behavior is not desired,
 
 125  use pstat.simpleabut().
 
 126  
 
 127  Usage:   abut(source, args)   where args=any # of lists
 
 128  Returns: a list of lists as long as the LONGEST list past, source on the
 
 129           'left', lists in <args> attached consecutively on the 'right'
 
 130  """ 
 131  
 
 132      if type(source) not in [ListType,TupleType]: 
 133          source = [source] 
 134      for addon in args: 
 135          if type(addon) not in [ListType,TupleType]: 
 136              addon = [addon] 
 137          if len(addon) < len(source):                # is source list longer? 
 138              if len(source) % len(addon) == 0:        # are they integer multiples? 
 139                  repeats = len(source)/len(addon)    # repeat addon n times 
 140                  origadd = copy.deepcopy(addon) 
 141                  for i in range(repeats-1): 
 142                      addon = addon + origadd 
 143              else: 
 144                  repeats = len(source)/len(addon)+1  # repeat addon x times, 
 145                  origadd = copy.deepcopy(addon)      #    x is NOT an integer 
 146                  for i in range(repeats-1): 
 147                      addon = addon + origadd 
 148                      addon = addon[0:len(source)] 
 149          elif len(source) < len(addon):                # is addon list longer? 
 150              if len(addon) % len(source) == 0:        # are they integer multiples? 
 151                  repeats = len(addon)/len(source)    # repeat source n times 
 152                  origsour = copy.deepcopy(source) 
 153                  for i in range(repeats-1): 
 154                      source = source + origsour 
 155              else: 
 156                  repeats = len(addon)/len(source)+1  # repeat source x times, 
 157                  origsour = copy.deepcopy(source)    #   x is NOT an integer 
 158                  for i in range(repeats-1): 
 159                      source = source + origsour 
 160                  source = source[0:len(addon)] 
 161  
 
 162          source = simpleabut(source,addon) 
 163      return source 
 164  
 
 165  
 
 166 -def simpleabut (source, addon): 
 167      """
 
 168  Concatenates two lists as columns and returns the result.  '2D' lists
 
 169  are also accomodated for either argument (source or addon).  This DOES NOT
 
 170  repeat either list to make the 2 lists of equal length.  Beware of list pairs
 
 171  with different lengths ... the resulting list will be the length of the
 
 172  FIRST list passed.
 
 173  
 
 174  Usage:   simpleabut(source,addon)  where source, addon=list (or list-of-lists)
 
 175  Returns: a list of lists as long as source, with source on the 'left' and
 
 176                   addon on the 'right'
 
 177  """ 
 178      if type(source) not in [ListType,TupleType]: 
 179          source = [source] 
 180      if type(addon) not in [ListType,TupleType]: 
 181          addon = [addon] 
 182      minlen = min(len(source),len(addon)) 
 183      list = copy.deepcopy(source)                # start abut process 
 184      if type(source[0]) not in [ListType,TupleType]: 
 185          if type(addon[0]) not in [ListType,TupleType]: 
 186              for i in range(minlen): 
 187                  list[i] = [source[i]] + [addon[i]]        # source/addon = column 
 188          else: 
 189              for i in range(minlen): 
 190                  list[i] = [source[i]] + addon[i]        # addon=list-of-lists 
 191      else: 
 192          if type(addon[0]) not in [ListType,TupleType]: 
 193              for i in range(minlen): 
 194                  list[i] = source[i] + [addon[i]]        # source=list-of-lists 
 195          else: 
 196              for i in range(minlen): 
 197                  list[i] = source[i] + addon[i]        # source/addon = list-of-lists 
 198      source = list 
 199      return source 
 200  
 
 201  
 
 202 -def colex (listoflists,cnums): 
 203      """
 
 204  Extracts from listoflists the columns specified in the list 'cnums'
 
 205  (cnums can be an integer, a sequence of integers, or a string-expression that
 
 206  corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
 
 207  columns 3 onward from the listoflists).
 
 208  
 
 209  Usage:   colex (listoflists,cnums)
 
 210  Returns: a list-of-lists corresponding to the columns from listoflists
 
 211           specified by cnums, in the order the column numbers appear in cnums
 
 212  """ 
 213      global index 
 214      column = 0 
 215      if type(cnums) in [ListType,TupleType]:   # if multiple columns to get 
 216          index = cnums[0] 
 217          column = map(lambda x: x[index], listoflists) 
 218          for col in cnums[1:]: 
 219              index = col 
 220              column = abut(column,map(lambda x: x[index], listoflists)) 
 221      elif type(cnums) == StringType:              # if an 'x[3:]' type expr. 
 222          evalstring = 'map(lambda x: x'+cnums+', listoflists)' 
 223          column = eval(evalstring) 
 224      else:                                     # else it's just 1 col to get 
 225          index = cnums 
 226          column = map(lambda x: x[index], listoflists) 
 227      return column 
 228  
 
 229  
 
 230 -def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): 
 231       """
 
 232  Averages data in collapsecol, keeping all unique items in keepcols
 
 233  (using unique, which keeps unique LISTS of column numbers), retaining the
 
 234  unique sets of values in keepcols, the mean for each.  Setting fcn1
 
 235  and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
 
 236  will append those results (e.g., the sterr, N) after each calculated mean.
 
 237  cfcn is the collapse function to apply (defaults to mean, defined here in the
 
 238  pstat module to avoid circular imports with stats.py, but harmonicmean or
 
 239  others could be passed).
 
 240  
 
 241  Usage:    collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
 242  Returns: a list of lists with all unique permutations of entries appearing in
 
 243       columns ("conditions") specified by keepcols, abutted with the result of
 
 244       cfcn (if cfcn=None, defaults to the mean) of each column specified by
 
 245       collapsecols.
 
 246  """ 
 247       def collmean (inlist): 
 248           s = 0 
 249           for item in inlist: 
 250               s = s + item 
 251           return s/float(len(inlist)) 
 252  
 
 253       if type(keepcols) not in [ListType,TupleType]: 
 254           keepcols = [keepcols] 
 255       if type(collapsecols) not in [ListType,TupleType]: 
 256           collapsecols = [collapsecols] 
 257       if cfcn == None: 
 258           cfcn = collmean 
 259       if keepcols == []: 
 260           means = [0]*len(collapsecols) 
 261           for i in range(len(collapsecols)): 
 262               avgcol = colex(listoflists,collapsecols[i]) 
 263               means[i] = cfcn(avgcol) 
 264               if fcn1: 
 265                   try: 
 266                       test = fcn1(avgcol) 
 267                   except: 
 268                       test = 'N/A' 
 269                       means[i] = [means[i], test] 
 270               if fcn2: 
 271                   try: 
 272                       test = fcn2(avgcol) 
 273                   except: 
 274                       test = 'N/A' 
 275                   try: 
 276                       means[i] = means[i] + [len(avgcol)] 
 277                   except TypeError: 
 278                       means[i] = [means[i],len(avgcol)] 
 279           return means 
 280       else: 
 281           values = colex(listoflists,keepcols) 
 282           uniques = unique(values) 
 283           uniques.sort() 
 284           newlist = [] 
 285           if type(keepcols) not in [ListType,TupleType]:  keepcols = [keepcols] 
 286           for item in uniques: 
 287               if type(item) not in [ListType,TupleType]:  item =[item] 
 288               tmprows = linexand(listoflists,keepcols,item) 
 289               for col in collapsecols: 
 290                   avgcol = colex(tmprows,col) 
 291                   item.append(cfcn(avgcol)) 
 292                   if fcn1 <> None: 
 293                       try: 
 294                           test = fcn1(avgcol) 
 295                       except: 
 296                           test = 'N/A' 
 297                       item.append(test) 
 298                   if fcn2 <> None: 
 299                       try: 
 300                           test = fcn2(avgcol) 
 301                       except: 
 302                           test = 'N/A' 
 303                       item.append(test) 
 304                   newlist.append(item) 
 305           return newlist 
 306  
 
 307  
 
 308 -def dm (listoflists,criterion): 
 309      """
 
 310  Returns rows from the passed list of lists that meet the criteria in
 
 311  the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
 
 312  will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
 
 313  with column 2 equal to the string 'N').
 
 314  
 
 315  Usage:   dm (listoflists, criterion)
 
 316  Returns: rows from listoflists that meet the specified criterion.
 
 317  """ 
 318      function = 'filter(lambda x: '+criterion+',listoflists)' 
 319      lines = eval(function) 
 320      return lines 
 321  
 
 322  
 
 323 -def flat(l): 
 324      """
 
 325  Returns the flattened version of a '2D' list.  List-correlate to the a.flat()
 
 326  method of NumPy arrays.
 
 327  
 
 328  Usage:    flat(l)
 
 329  """ 
 330      newl = [] 
 331      for i in range(len(l)): 
 332          for j in range(len(l[i])): 
 333              newl.append(l[i][j]) 
 334      return newl 
 335  
 
 336  
 
 337 -def linexand (listoflists,columnlist,valuelist): 
 338      """
 
 339  Returns the rows of a list of lists where col (from columnlist) = val
 
 340  (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
 
 341  len(columnlist) must equal len(valuelist).
 
 342  
 
 343  Usage:   linexand (listoflists,columnlist,valuelist)
 
 344  Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
 
 345  """ 
 346      if type(columnlist) not in [ListType,TupleType]: 
 347          columnlist = [columnlist] 
 348      if type(valuelist) not in [ListType,TupleType]: 
 349          valuelist = [valuelist] 
 350      criterion = '' 
 351      for i in range(len(columnlist)): 
 352          if type(valuelist[i])==StringType: 
 353              critval = '\'' + valuelist[i] + '\'' 
 354          else: 
 355              critval = str(valuelist[i]) 
 356          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 
 357      criterion = criterion[0:-3]         # remove the "and" after the last crit 
 358      function = 'filter(lambda x: '+criterion+',listoflists)' 
 359      lines = eval(function) 
 360      return lines 
 361  
 
 362  
 
 363 -def linexor (listoflists,columnlist,valuelist): 
 364      """
 
 365  Returns the rows of a list of lists where col (from columnlist) = val
 
 366  (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
 
 367  One value is required for each column in columnlist.  If only one value
 
 368  exists for columnlist but multiple values appear in valuelist, the
 
 369  valuelist values are all assumed to pertain to the same column.
 
 370  
 
 371  Usage:   linexor (listoflists,columnlist,valuelist)
 
 372  Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
 
 373  """ 
 374      if type(columnlist) not in [ListType,TupleType]: 
 375          columnlist = [columnlist] 
 376      if type(valuelist) not in [ListType,TupleType]: 
 377          valuelist = [valuelist] 
 378      criterion = '' 
 379      if len(columnlist) == 1 and len(valuelist) > 1: 
 380          columnlist = columnlist*len(valuelist) 
 381      for i in range(len(columnlist)):          # build an exec string 
 382          if type(valuelist[i])==StringType: 
 383              critval = '\'' + valuelist[i] + '\'' 
 384          else: 
 385              critval = str(valuelist[i]) 
 386          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 
 387      criterion = criterion[0:-2]         # remove the "or" after the last crit 
 388      function = 'filter(lambda x: '+criterion+',listoflists)' 
 389      lines = eval(function) 
 390      return lines 
 391  
 
 392  
 
 393 -def linedelimited (inlist,delimiter): 
 394      """
 
 395  Returns a string composed of elements in inlist, with each element
 
 396  separated by 'delimiter.'  Used by function writedelimited.  Use '\t'
 
 397  for tab-delimiting.
 
 398  
 
 399  Usage:   linedelimited (inlist,delimiter)
 
 400  """ 
 401      outstr = '' 
 402      for item in inlist: 
 403          if type(item) <> StringType: 
 404              item = str(item) 
 405          outstr = outstr + item + delimiter 
 406      outstr = outstr[0:-1] 
 407      return outstr 
 408  
 
 409  
 
 410 -def lineincols (inlist,colsize): 
 411      """
 
 412  Returns a string composed of elements in inlist, with each element
 
 413  right-aligned in columns of (fixed) colsize.
 
 414  
 
 415  Usage:   lineincols (inlist,colsize)   where colsize is an integer
 
 416  """ 
 417      outstr = '' 
 418      for item in inlist: 
 419          if type(item) <> StringType: 
 420              item = str(item) 
 421          size = len(item) 
 422          if size <= colsize: 
 423              for i in range(colsize-size): 
 424                  outstr = outstr + ' ' 
 425              outstr = outstr + item 
 426          else: 
 427              outstr = outstr + item[0:colsize+1] 
 428      return outstr 
 429  
 
 430  
 
 431 -def lineincustcols (inlist,colsizes): 
 432      """
 
 433  Returns a string composed of elements in inlist, with each element
 
 434  right-aligned in a column of width specified by a sequence colsizes.  The
 
 435  length of colsizes must be greater than or equal to the number of columns
 
 436  in inlist.
 
 437  
 
 438  Usage:   lineincustcols (inlist,colsizes)
 
 439  Returns: formatted string created from inlist
 
 440  """ 
 441      outstr = '' 
 442      for i in range(len(inlist)): 
 443          if type(inlist[i]) <> StringType: 
 444              item = str(inlist[i]) 
 445          else: 
 446              item = inlist[i] 
 447          size = len(item) 
 448          if size <= colsizes[i]: 
 449              for j in range(colsizes[i]-size): 
 450                  outstr = outstr + ' ' 
 451              outstr = outstr + item 
 452          else: 
 453              outstr = outstr + item[0:colsizes[i]+1] 
 454      return outstr 
 455  
 
 456  
 
 457 -def list2string (inlist,delimit=' '): 
 458      """
 
 459  Converts a 1D list to a single long string for file output, using
 
 460  the string.join function.
 
 461  
 
 462  Usage:   list2string (inlist,delimit=' ')
 
 463  Returns: the string created from inlist
 
 464  """ 
 465      stringlist = map(makestr,inlist) 
 466      return string.join(stringlist,delimit) 
 467  
 
 468  
 
 469 -def makelol(inlist): 
 470      """
 
 471  Converts a 1D list to a 2D list (i.e., a list-of-lists).  Useful when you
 
 472  want to use put() to write a 1D list one item per line in the file.
 
 473  
 
 474  Usage:   makelol(inlist)
 
 475  Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
 
 476  """ 
 477      x = [] 
 478      for item in inlist: 
 479          x.append([item]) 
 480      return x 
 481  
 
 482  
 
 483 -def makestr (x): 
 484      if type(x) <> StringType: 
 485          x = str(x) 
 486      return x 
 487  
 
 488  
 
 489 -def printcc (lst,extra=2): 
 490      """
 
 491  Prints a list of lists in columns, customized by the max size of items
 
 492  within the columns (max size of items in col, plus 'extra' number of spaces).
 
 493  Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
 
 494  respectively.
 
 495  
 
 496  Usage:   printcc (lst,extra=2)
 
 497  Returns: None
 
 498  """ 
 499      if type(lst[0]) not in [ListType,TupleType]: 
 500          lst = [lst] 
 501      rowstokill = [] 
 502      list2print = copy.deepcopy(lst) 
 503      for i in range(len(lst)): 
 504          if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: 
 505              rowstokill = rowstokill + [i] 
 506      rowstokill.reverse()   # delete blank rows from the end 
 507      for row in rowstokill: 
 508          del list2print[row] 
 509      maxsize = [0]*len(list2print[0]) 
 510      for col in range(len(list2print[0])): 
 511          items = colex(list2print,col) 
 512          items = map(makestr,items) 
 513          maxsize[col] = max(map(len,items)) + extra 
 514      for row in lst: 
 515          if row == ['\n'] or row == '\n' or row == '' or row == ['']: 
 516              print 
 517          elif row == ['dashes'] or row == 'dashes': 
 518              dashes = [0]*len(maxsize) 
 519              for j in range(len(maxsize)): 
 520                  dashes[j] = '-'*(maxsize[j]-2) 
 521              print lineincustcols(dashes,maxsize) 
 522          else: 
 523              print lineincustcols(row,maxsize) 
 524      return None 
 525  
 
 526  
 
 527 -def printincols (listoflists,colsize): 
 528      """
 
 529  Prints a list of lists in columns of (fixed) colsize width, where
 
 530  colsize is an integer.
 
 531  
 
 532  Usage:   printincols (listoflists,colsize)
 
 533  Returns: None
 
 534  """ 
 535      for row in listoflists: 
 536          print lineincols(row,colsize) 
 537      return None 
 538  
 
 539  
 
 540 -def pl (listoflists): 
 541      """
 
 542  Prints a list of lists, 1 list (row) at a time.
 
 543  
 
 544  Usage:   pl(listoflists)
 
 545  Returns: None
 
 546  """ 
 547      for row in listoflists: 
 548          if row[-1] == '\n': 
 549              print row, 
 550          else: 
 551              print row 
 552      return None 
 553  
 
 554  
 
 555 -def printl(listoflists): 
 556      """Alias for pl.""" 
 557      pl(listoflists) 
 558      return 
 559  
 
 560  
 
 561 -def replace (inlst,oldval,newval): 
 562      """
 
 563  Replaces all occurrences of 'oldval' with 'newval', recursively.
 
 564  
 
 565  Usage:   replace (inlst,oldval,newval)
 
 566  """ 
 567      lst = inlst*1 
 568      for i in range(len(lst)): 
 569          if type(lst[i]) not in [ListType,TupleType]: 
 570              if lst[i]==oldval: lst[i]=newval 
 571          else: 
 572              lst[i] = replace(lst[i],oldval,newval) 
 573      return lst 
 574  
 
 575  
 
 576 -def recode (inlist,listmap,cols=None): 
 577      """
 
 578  Changes the values in a list to a new set of values (useful when
 
 579  you need to recode data from (e.g.) strings to numbers.  cols defaults
 
 580  to None (meaning all columns are recoded).
 
 581  
 
 582  Usage:   recode (inlist,listmap,cols=None)  cols=recode cols, listmap=2D list
 
 583  Returns: inlist with the appropriate values replaced with new ones
 
 584  """ 
 585      lst = copy.deepcopy(inlist) 
 586      if cols != None: 
 587          if type(cols) not in [ListType,TupleType]: 
 588              cols = [cols] 
 589          for col in cols: 
 590              for row in range(len(lst)): 
 591                  try: 
 592                      idx = colex(listmap,0).index(lst[row][col]) 
 593                      lst[row][col] = listmap[idx][1] 
 594                  except ValueError: 
 595                      pass 
 596      else: 
 597          for row in range(len(lst)): 
 598              for col in range(len(lst)): 
 599                  try: 
 600                      idx = colex(listmap,0).index(lst[row][col]) 
 601                      lst[row][col] = listmap[idx][1] 
 602                  except ValueError: 
 603                      pass 
 604      return lst 
 605  
 
 606  
 
 607 -def remap (listoflists,criterion): 
 608      """
 
 609  Remaps values in a given column of a 2D list (listoflists).  This requires
 
 610  a criterion as a function of 'x' so that the result of the following is
 
 611  returned ... map(lambda x: 'criterion',listoflists).  
 
 612  
 
 613  Usage:   remap(listoflists,criterion)    criterion=string
 
 614  Returns: remapped version of listoflists
 
 615  """ 
 616      function = 'map(lambda x: '+criterion+',listoflists)' 
 617      lines = eval(function) 
 618      return lines 
 619  
 
 620  
 
 621 -def roundlist (inlist,digits): 
 622      """
 
 623  Goes through each element in a 1D or 2D inlist, and applies the following
 
 624  function to all elements of FloatType ... round(element,digits).
 
 625  
 
 626  Usage:   roundlist(inlist,digits)
 
 627  Returns: list with rounded floats
 
 628  """ 
 629      if type(inlist[0]) in [IntType, FloatType]: 
 630          inlist = [inlist] 
 631      l = inlist*1 
 632      for i in range(len(l)): 
 633          for j in range(len(l[i])): 
 634              if type(l[i][j])==FloatType: 
 635                  l[i][j] = round(l[i][j],digits) 
 636      return l 
 637  
 
 638  
 
 639 -def sortby(listoflists,sortcols): 
 640      """
 
 641  Sorts a list of lists on the column(s) specified in the sequence
 
 642  sortcols.
 
 643  
 
 644  Usage:   sortby(listoflists,sortcols)
 
 645  Returns: sorted list, unchanged column ordering
 
 646  """ 
 647      newlist = abut(colex(listoflists,sortcols),listoflists) 
 648      newlist.sort() 
 649      try: 
 650          numcols = len(sortcols) 
 651      except TypeError: 
 652          numcols = 1 
 653      crit = '[' + str(numcols) + ':]' 
 654      newlist = colex(newlist,crit) 
 655      return newlist 
 656  
 
 657  
 
 658 -def unique (inlist): 
 659      """
 
 660  Returns all unique items in the passed list.  If the a list-of-lists
 
 661  is passed, unique LISTS are found (i.e., items in the first dimension are
 
 662  compared).
 
 663  
 
 664  Usage:   unique (inlist)
 
 665  Returns: the unique elements (or rows) in inlist
 
 666  """ 
 667      uniques = [] 
 668      for item in inlist: 
 669          if item not in uniques: 
 670              uniques.append(item) 
 671      return uniques 
 672  
 
 673 -def duplicates(inlist): 
 674      """
 
 675  Returns duplicate items in the FIRST dimension of the passed list.
 
 676  
 
 677  Usage:   duplicates (inlist)
 
 678  """ 
 679      dups = [] 
 680      for i in range(len(inlist)): 
 681          if inlist[i] in inlist[i+1:]: 
 682              dups.append(inlist[i]) 
 683      return dups 
 684  
 
 685  
 
 686 -def nonrepeats(inlist): 
 687      """
 
 688  Returns items that are NOT duplicated in the first dim of the passed list.
 
 689  
 
 690  Usage:   nonrepeats (inlist)
 
 691  """ 
 692      nonrepeats = [] 
 693      for i in range(len(inlist)): 
 694          if inlist.count(inlist[i]) == 1: 
 695              nonrepeats.append(inlist[i]) 
 696      return nonrepeats 
 697  
 
 698  
 
 699  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 700  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 701  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 702  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 703  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 704  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 705  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 706  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 707  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 708  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 709  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 710  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 711  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 712  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 713  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 714  #===================   PSTAT ARRAY FUNCTIONS  =====================
 
 715  
 
 716  try:                         # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE 
 717   import Numeric 
 718   N = Numeric 
 719  
 
 720 - def aabut (source, *args): 
 721      """
 
 722  Like the |Stat abut command.  It concatenates two arrays column-wise
 
 723  and returns the result.  CAUTION:  If one array is shorter, it will be
 
 724  repeated until it is as long as the other.
 
 725  
 
 726  Usage:   aabut (source, args)    where args=any # of arrays
 
 727  Returns: an array as long as the LONGEST array past, source appearing on the
 
 728           'left', arrays in <args> attached on the 'right'.
 
 729  """ 
 730      if len(source.shape)==1: 
 731          width = 1 
 732          source = N.resize(source,[source.shape[0],width]) 
 733      else: 
 734          width = source.shape[1] 
 735      for addon in args: 
 736          if len(addon.shape)==1: 
 737              width = 1 
 738              addon = N.resize(addon,[source.shape[0],width]) 
 739          else: 
 740              width = source.shape[1] 
 741          if len(addon) < len(source): 
 742              addon = N.resize(addon,[source.shape[0],addon.shape[1]]) 
 743          elif len(source) < len(addon): 
 744              source = N.resize(source,[addon.shape[0],source.shape[1]]) 
 745          source = N.concatenate((source,addon),1) 
 746      return source 
 747  
 
 748  
 
 749 - def acolex (a,indices,axis=1): 
 750      """
 
 751  Extracts specified indices (a list) from passed array, along passed
 
 752  axis (column extraction is default).  BEWARE: A 1D array is presumed to be a
 
 753  column-array (and that the whole array will be returned as a column).
 
 754  
 
 755  Usage:   acolex (a,indices,axis=1)
 
 756  Returns: the columns of a specified by indices
 
 757  """ 
 758      if type(indices) not in [ListType,TupleType,N.ArrayType]: 
 759          indices = [indices] 
 760      if len(N.shape(a)) == 1: 
 761          cols = N.resize(a,[a.shape[0],1]) 
 762      else: 
 763          cols = N.take(a,indices,axis) 
 764      return cols 
 765  
 
 766  
 
 767 - def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): 
 768      """
 
 769  Averages data in collapsecol, keeping all unique items in keepcols
 
 770  (using unique, which keeps unique LISTS of column numbers), retaining
 
 771  the unique sets of values in keepcols, the mean for each.  If stderror or
 
 772  N of the mean are desired, set either or both parameters to 1.
 
 773  
 
 774  Usage:   acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
 775  Returns: unique 'conditions' specified by the contents of columns specified
 
 776           by keepcols, abutted with the mean(s) of column(s) specified by
 
 777           collapsecols
 
 778  """ 
 779      def acollmean (inarray): 
 780          return N.sum(N.ravel(inarray)) 
 781  
 
 782      if cfcn == None: 
 783          cfcn = acollmean 
 784      if keepcols == []: 
 785          avgcol = acolex(a,collapsecols) 
 786          means = N.sum(avgcol)/float(len(avgcol)) 
 787          if fcn1<>None: 
 788              try: 
 789                  test = fcn1(avgcol) 
 790              except: 
 791                  test = N.array(['N/A']*len(means)) 
 792              means = aabut(means,test) 
 793          if fcn2<>None: 
 794              try: 
 795                  test = fcn2(avgcol) 
 796              except: 
 797                  test = N.array(['N/A']*len(means)) 
 798              means = aabut(means,test) 
 799          return means 
 800      else: 
 801          if type(keepcols) not in [ListType,TupleType,N.ArrayType]: 
 802              keepcols = [keepcols] 
 803          values = colex(a,keepcols)   # so that "item" can be appended (below) 
 804          uniques = unique(values)  # get a LIST, so .sort keeps rows intact 
 805          uniques.sort() 
 806          newlist = [] 
 807          for item in uniques: 
 808              if type(item) not in [ListType,TupleType,N.ArrayType]: 
 809                  item =[item] 
 810              tmprows = alinexand(a,keepcols,item) 
 811              for col in collapsecols: 
 812                  avgcol = acolex(tmprows,col) 
 813                  item.append(acollmean(avgcol)) 
 814                  if fcn1<>None: 
 815                      try: 
 816                          test = fcn1(avgcol) 
 817                      except: 
 818                          test = 'N/A' 
 819                      item.append(test) 
 820                  if fcn2<>None: 
 821                      try: 
 822                          test = fcn2(avgcol) 
 823                      except: 
 824                          test = 'N/A' 
 825                      item.append(test) 
 826                  newlist.append(item) 
 827          try: 
 828              new_a = N.array(newlist) 
 829          except TypeError: 
 830              new_a = N.array(newlist,'O') 
 831          return new_a 
 832  
 
 833  
 
 834 - def adm (a,criterion): 
 835      """
 
 836  Returns rows from the passed list of lists that meet the criteria in
 
 837  the passed criterion expression (a string as a function of x).
 
 838  
 
 839  Usage:   adm (a,criterion)   where criterion is like 'x[2]==37'
 
 840  """ 
 841      function = 'filter(lambda x: '+criterion+',a)' 
 842      lines = eval(function) 
 843      try: 
 844          lines = N.array(lines) 
 845      except: 
 846          lines = N.array(lines,'O') 
 847      return lines 
 848  
 
 849  
 
 850 - def isstring(x): 
 851      if type(x)==StringType: 
 852          return 1 
 853      else: 
 854          return 0 
 855  
 
 856  
 
 857 - def alinexand (a,columnlist,valuelist): 
 858      """
 
 859  Returns the rows of an array where col (from columnlist) = val
 
 860  (from valuelist).  One value is required for each column in columnlist.
 
 861  
 
 862  Usage:   alinexand (a,columnlist,valuelist)
 
 863  Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
 
 864  """ 
 865      if type(columnlist) not in [ListType,TupleType,N.ArrayType]: 
 866          columnlist = [columnlist] 
 867      if type(valuelist) not in [ListType,TupleType,N.ArrayType]: 
 868          valuelist = [valuelist] 
 869      criterion = '' 
 870      for i in range(len(columnlist)): 
 871          if type(valuelist[i])==StringType: 
 872              critval = '\'' + valuelist[i] + '\'' 
 873          else: 
 874              critval = str(valuelist[i]) 
 875          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 
 876      criterion = criterion[0:-3]         # remove the "and" after the last crit 
 877      return adm(a,criterion) 
 878  
 
 879  
 
 880 - def alinexor (a,columnlist,valuelist): 
 881      """
 
 882  Returns the rows of an array where col (from columnlist) = val (from
 
 883  valuelist).  One value is required for each column in columnlist.
 
 884  The exception is if either columnlist or valuelist has only 1 value,
 
 885  in which case that item will be expanded to match the length of the
 
 886  other list.
 
 887  
 
 888  Usage:   alinexor (a,columnlist,valuelist)
 
 889  Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
 
 890  """ 
 891      if type(columnlist) not in [ListType,TupleType,N.ArrayType]: 
 892          columnlist = [columnlist] 
 893      if type(valuelist) not in [ListType,TupleType,N.ArrayType]: 
 894          valuelist = [valuelist] 
 895      criterion = '' 
 896      if len(columnlist) == 1 and len(valuelist) > 1: 
 897          columnlist = columnlist*len(valuelist) 
 898      elif len(valuelist) == 1 and len(columnlist) > 1: 
 899          valuelist = valuelist*len(columnlist) 
 900      for i in range(len(columnlist)): 
 901          if type(valuelist[i])==StringType: 
 902              critval = '\'' + valuelist[i] + '\'' 
 903          else: 
 904              critval = str(valuelist[i]) 
 905          criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 
 906      criterion = criterion[0:-2]         # remove the "or" after the last crit 
 907      return adm(a,criterion) 
 908  
 
 909  
 
 910 - def areplace (a,oldval,newval): 
 911      """
 
 912  Replaces all occurrences of oldval with newval in array a.
 
 913  
 
 914  Usage:   areplace(a,oldval,newval)
 
 915  """ 
 916      newa = N.not_equal(a,oldval)*a 
 917      return newa+N.equal(a,oldval)*newval 
 918  
 
 919  
 
 920 - def arecode (a,listmap,col='all'): 
 921      """
 
 922  Remaps the values in an array to a new set of values (useful when
 
 923  you need to recode data from (e.g.) strings to numbers as most stats
 
 924  packages require.  Can work on SINGLE columns, or 'all' columns at once.
 
 925  
 
 926  Usage:   arecode (a,listmap,col='all')
 
 927  Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
 
 928  """ 
 929      ashape = a.shape 
 930      if col == 'all': 
 931          work = a.flat 
 932      else: 
 933          work = acolex(a,col) 
 934          work = work.flat 
 935      for pair in listmap: 
 936          if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O': 
 937              work = N.array(work,'O') 
 938              a = N.array(a,'O') 
 939              for i in range(len(work)): 
 940                  if work[i]==pair[0]: 
 941                      work[i] = pair[1] 
 942              if col == 'all': 
 943                  return N.reshape(work,ashape) 
 944              else: 
 945                  return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) 
 946          else:   # must be a non-Object type array and replacement 
 947              work = N.where(N.equal(work,pair[0]),pair[1],work) 
 948              return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) 
 949  
 
 950  
 
 951 - def arowcompare(row1, row2): 
 952      """
 
 953  Compares two rows from an array, regardless of whether it is an
 
 954  array of numbers or of python objects (which requires the cmp function).
 
 955  
 
 956  Usage:   arowcompare(row1,row2)
 
 957  Returns: an array of equal length containing 1s where the two rows had
 
 958           identical elements and 0 otherwise
 
 959  """ 
 960      if row1.typecode()=='O' or row2.typecode=='O': 
 961          cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 
 962      else: 
 963          cmpvect = N.equal(row1,row2) 
 964      return cmpvect 
 965  
 
 966  
 
 967 - def arowsame(row1, row2): 
 968      """
 
 969  Compares two rows from an array, regardless of whether it is an
 
 970  array of numbers or of python objects (which requires the cmp function).
 
 971  
 
 972  Usage:   arowsame(row1,row2)
 
 973  Returns: 1 if the two rows are identical, 0 otherwise.
 
 974  """ 
 975      cmpval = N.alltrue(arowcompare(row1,row2)) 
 976      return cmpval 
 977  
 
 978  
 
 979 - def asortrows(a,axis=0): 
 980      """
 
 981  Sorts an array "by rows".  This differs from the Numeric.sort() function,
 
 982  which sorts elements WITHIN the given axis.  Instead, this function keeps
 
 983  the elements along the given axis intact, but shifts them 'up or down'
 
 984  relative to one another.
 
 985  
 
 986  Usage:   asortrows(a,axis=0)
 
 987  Returns: sorted version of a
 
 988  """ 
 989      if axis != 0: 
 990          a = N.swapaxes(a, axis, 0) 
 991      l = a.tolist() 
 992      l.sort()           # or l.sort(_sort) 
 993      y = N.array(l) 
 994      if axis != 0: 
 995          y = N.swapaxes(y, axis, 0) 
 996      return y 
 997  
 
 998  
 
 999 - def aunique(inarray): 
1000      """
 
1001  Returns unique items in the FIRST dimension of the passed array. Only
 
1002  works on arrays NOT including string items.
 
1003  
 
1004  Usage:   aunique (inarray)
 
1005  """ 
1006      uniques = N.array([inarray[0]]) 
1007      if len(uniques.shape) == 1:            # IF IT'S A 1D ARRAY 
1008          for item in inarray[1:]: 
1009              if N.add.reduce(N.equal(uniques,item).flat) == 0: 
1010                  try: 
1011                      uniques = N.concatenate([uniques,N.array[N.NewAxis,:]]) 
1012                  except TypeError: 
1013                      uniques = N.concatenate([uniques,N.array([item])]) 
1014      else:                                  # IT MUST BE A 2+D ARRAY 
1015          if inarray.typecode() != 'O':  # not an Object array 
1016              for item in inarray[1:]: 
1017                  if not N.sum(N.alltrue(N.equal(uniques,item),1)): 
1018                      try: 
1019                          uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) 
1020                      except TypeError:    # the item to add isn't a list 
1021                          uniques = N.concatenate([uniques,N.array([item])]) 
1022                  else: 
1023                      pass  # this item is already in the uniques array 
1024          else:   # must be an Object array, alltrue/equal functions don't work 
1025              for item in inarray[1:]: 
1026                  newflag = 1 
1027                  for unq in uniques:  # NOTE: cmp --> 0=same, -1=<, 1=> 
1028                      test = N.sum(abs(N.array(map(cmp,item,unq)))) 
1029                      if test == 0:   # if item identical to any 1 row in uniques 
1030                          newflag = 0 # then not a novel item to add 
1031                          break 
1032                  if newflag == 1: 
1033                      try: 
1034                          uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) 
1035                      except TypeError:    # the item to add isn't a list 
1036                          uniques = N.concatenate([uniques,N.array([item])]) 
1037      return uniques 
1038  
 
1039  
 
1040 - def aduplicates(inarray): 
1041      """
 
1042  Returns duplicate items in the FIRST dimension of the passed array. Only
 
1043  works on arrays NOT including string items.
 
1044  
 
1045  Usage:   aunique (inarray)
 
1046  """ 
1047      inarray = N.array(inarray) 
1048      if len(inarray.shape) == 1:            # IF IT'S A 1D ARRAY 
1049          dups = [] 
1050          inarray = inarray.tolist() 
1051          for i in range(len(inarray)): 
1052              if inarray[i] in inarray[i+1:]: 
1053                  dups.append(inarray[i]) 
1054          dups = aunique(dups) 
1055      else:                                  # IT MUST BE A 2+D ARRAY 
1056          dups = [] 
1057          aslist = inarray.tolist() 
1058          for i in range(len(aslist)): 
1059              if aslist[i] in aslist[i+1:]: 
1060                  dups.append(aslist[i]) 
1061          dups = unique(dups) 
1062          dups = N.array(dups) 
1063      return dups 
1064  
 
1065  except ImportError:    # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs 
1066   pass 
1067