Package TEES :: Package Core :: Module Model
[hide private]

Source Code for Module TEES.Core.Model

  1  """ 
  2  For storing the results of TEES training. 
  3  """ 
  4  import sys, os, shutil 
  5  import filecmp 
  6  import zipfile 
  7  import tempfile 
  8   
  9  NOTHING = object() 
 10   
11 -class Model():
12 """ 13 The Model object is an interface to a model file on the disk. The model file 14 itself is simply a directory or zip-archive storing all the files that result 15 from TEES being trained. In addition, it can store named string values for 16 saved settings etc. 17 18 When a member of a model is accessed, it is copied to a temporary cache directory. 19 When a model is saved, files that have changed in the cache are copied to the model 20 directory/archive. Note that for both files and strings that are added to the model, 21 are saved to it only when Model.save is called. 22 """
23 - def __init__(self, path, mode="r", verbose=True, compression=zipfile.ZIP_DEFLATED):
24 """ 25 Make a new model or open an existing one 26 27 @param path: The model file or directory. If making a new model, a path ending in ".zip" results in a compressed archive. 28 @param mode: r, w or a for read, write or append 29 @param verbose: Model reports what is happening 30 @param compression: The compression method if a the model is a zip-archive. 31 """ 32 self.members = {} # path_inside_model:path_to_cache_file (path_to_cache_file == None for members not yet requested) 33 self.valueFileName = "TEES_MODEL_VALUES.tsv" 34 self.compression = compression 35 self.workdir = None 36 self.mode = None 37 self.path = None 38 self.open(path, mode) 39 self.verbose = verbose
40
41 - def __del__(self):
42 self.close()
43
44 - def close(self):
45 if self.workdir != None: 46 shutil.rmtree(self.workdir) 47 self.workdir = None 48 self.path = None 49 self.members = None
50
51 - def add(self, name):
52 self.members[name] = None
53
54 - def insert(self, filepath, name):
55 """ 56 Adds a new file to the model. 57 58 @param filepath: Path to the file being added 59 @param name: Path of the file inside the model 60 """ 61 shutil.copy2(filepath, os.path.join(self.workdir, name)) 62 self.members[name] = os.path.join(self.workdir, name)
63
64 - def importFrom(self, model, members, strings=None):
65 """ 66 Copy several members from another model 67 """ 68 for member in members: 69 self.insert(model.get(member), member) 70 if strings != None: 71 for string in strings: 72 self.addStr(string, model.getStr(string))
73
74 - def addStrings(self, dict):
75 """ 76 Add multiple name/value pairs 77 """ 78 for key in sorted(dict.keys()): 79 self.addStr(key, dict[key])
80
81 - def addStr(self, name, value):
82 """ 83 Add a named string to the model 84 85 @param : the name of the string 86 @param : the string 87 """ 88 for c in ["\n", "\t", "\r"]: 89 assert c not in name, (c, name, value) 90 assert c not in value, (c, name, value) 91 values = self._getValues() 92 if name != None: 93 values[name] = value 94 elif name in values: # remove the parameter 95 del values[name] 96 self._setValues(values)
97
98 - def getStr(self, name, defaultIfNotExist=NOTHING, asType=None):
99 """ 100 Get a named string from the model 101 102 @param name : the name of the string 103 @param defaultIfNotExist: if set to a value != NOTHING, will be returned if a name does not exist. Otherwise an exception is raised. 104 @param asType : if set, cast the return value to this type 105 """ 106 values = self._getValues() 107 if name in values: 108 if asType == None: 109 return values[name] 110 else: 111 return asType(values[name]) 112 elif defaultIfNotExist != NOTHING: 113 return defaultIfNotExist 114 else: 115 raise IOError("String named '" + name + "' not defined in model " + self.path)
116
117 - def save(self):
118 """ 119 Save a model. 120 121 When saving a model, files that have changed in the cache are written to the model. 122 """ 123 if self.mode == "r": 124 raise IOError("Model not open for writing") 125 if self.isPackage: 126 package = zipfile.ZipFile(self.path, "r", self.compression) 127 packageNames = package.namelist() 128 # Check which files have changed in the cache 129 changed = [] 130 for name in sorted(self.members.keys()): 131 cached = self.members[name] 132 if cached != None and os.path.exists(cached): # cache file exists 133 if self.isPackage: 134 cachedInfo = os.stat(cached) 135 packageFileInfo = None 136 if name in packageNames: 137 packageFileInfo = package.getinfo(name) 138 if packageFileInfo == None or cachedInfo.st_size != packageFileInfo.file_size: 139 changed.append(name) 140 else: 141 modelFilename = os.path.join(self.path, name) 142 if not os.path.exists(modelFilename) or not filecmp.cmp(modelFilename, cached): 143 changed.append(name) 144 # Copy changed files from the cache to the model 145 if len(changed) > 0: 146 if self.verbose: print >> sys.stderr, "Saving model \"" + self.path + "\" (cache:" + self.workdir + ", changed:" + ",".join(changed) + ")" 147 if self.isPackage: 148 tempdir = tempfile.mkdtemp() # place to unpack existing model 149 package.extractall(tempdir) # unpack model 150 package.close() # close model 151 for name in changed: # add changed files from cache 152 shutil.copy2(self.members[name], os.path.join(tempdir, name)) # from cache to unpacked model 153 package = zipfile.ZipFile(self.path, "w", self.compression) # recreate the model 154 for name in os.listdir(tempdir): # add all files to model 155 package.write(os.path.join(tempdir, name), name) # add file from tempdir 156 shutil.rmtree(tempdir) # remove temporary directory 157 else: 158 for name in changed: 159 shutil.copy2(self.members[name], os.path.join(self.path, name)) 160 if self.isPackage: 161 package.close()
162
163 - def saveAs(self, outPath):
164 """ 165 Save a model with a different name. 166 """ 167 print >> sys.stderr, "Saving model \"" + self.path, "as", outPath 168 if os.path.exists(outPath): 169 print >> sys.stderr, outPath, "exists, removing" 170 if os.path.isdir(outPath): 171 shutil.rmtree(outPath) 172 else: 173 os.remove(outPath) 174 if self.isPackage: 175 # copy current model to new location 176 shutil.copy2(self.path, outPath) 177 # add cached (potentially updated) files 178 package = zipfile.ZipFile(outPath, "a") 179 for f in os.listdir(self.workdir): 180 package.write(f) 181 package.close() 182 else: 183 # copy files from model 184 shutil.copytree(self.path, outPath) 185 # copy cached (potentially updated) files 186 for f in os.listdir(self.workdir): 187 shutil.copy2(os.path.join(self.workdir, f), outPath)
188
189 - def hasMember(self, name):
190 return name in self.members
191
192 - def get(self, name, addIfNotExist=False):
193 """ 194 Return a file member from the model. The member is extracted to a cached directory 195 and returned as a path name. If this file is modified, when the model is saved, 196 it will be copied back to the model. 197 198 @param name : the path to the file inside the model 199 @param addIfNotExist : Return a file name which can be created for adding the file 200 """ 201 if name not in self.members: 202 if addIfNotExist: 203 self.add(name) 204 else: 205 raise IOError("Model has no member \"" + name + "\"") 206 # Cache member if not yet cached 207 if self.members[name] == None: # file has not been cached yet 208 cacheFilename = os.path.join(self.workdir, name) 209 if self.isPackage: 210 package = zipfile.ZipFile(self.path, "r") 211 try: 212 if self.verbose: print >> sys.stderr, "Caching model \"" + self.path + "\" member \"" + name + "\" to \"" + cacheFilename + "\"" 213 package.extract(name, self.workdir) 214 except: # member does not exist yet 215 pass 216 package.close() 217 elif os.path.exists(os.path.join(self.path, name)): # member already exists inside the model directory 218 if self.verbose: print >> sys.stderr, "Caching model \"" + self.path + "\" member \"" + name + "\" to \"" + cacheFilename + "\"" 219 shutil.copy2(os.path.join(self.path, name), cacheFilename) 220 self.members[name] = cacheFilename 221 return self.members[name]
222
223 - def open(self, path, mode="r"):
224 assert mode in ["r", "w", "a"] 225 self.mode = mode 226 self.path = path 227 if self.path.endswith('.zip'): 228 self._openPackage(path, mode) 229 else: 230 self._openDir(path, mode) 231 self.workdir = tempfile.mkdtemp()
232
233 - def _openDir(self, path, mode):
234 if mode == "w" and os.path.exists(path): 235 shutil.rmtree(path) 236 if not os.path.exists(path): 237 os.mkdir(path) 238 open(os.path.join(path, self.valueFileName), "wt").close() 239 # get members 240 members = os.listdir(path) 241 for member in members: 242 self.members[member] = None 243 self.isPackage = False
244
245 - def _openPackage(self, path, mode):
246 if mode == "w" and os.path.exists(path): 247 os.remove(path) 248 if not os.path.exists(path): # create empty archive 249 package = zipfile.ZipFile(path, "w", self.compression) 250 temp = tempfile.mkstemp() 251 package.write(temp[1], self.valueFileName) 252 package.close() 253 os.remove(temp[1]) 254 # get members 255 package = zipfile.ZipFile(path, "r") 256 for name in package.namelist(): 257 self.members[name] = None 258 package.close() 259 self.isPackage = True
260 261 # Value file
262 - def _getValues(self):
263 values = {} 264 settingsFileName = self.get(self.valueFileName, True) 265 if os.path.exists(settingsFileName): 266 f = open(settingsFileName, "rt") 267 for line in f: 268 key, value = line.split("\t", 1) 269 key = key.strip() 270 value = value.strip() 271 values[key] = value 272 f.close() 273 return values
274
275 - def _setValues(self, values):
276 f = open(self.get(self.valueFileName, True), "wt") 277 for key in sorted(values.keys()): 278 f.write(key + "\t" + values[key] + "\n") 279 f.close()
280