1 """
2 For storing the results of TEES training.
3 """
4 import sys, os, shutil
5 import filecmp
6 import zipfile
7 import tempfile
8
9 NOTHING = object()
10
12 """
13 The Model object is an interface to a model file on the disk. The model file
14 itself is simply a directory or zip-archive storing all the files that result
15 from TEES being trained. In addition, it can store named string values for
16 saved settings etc.
17
18 When a member of a model is accessed, it is copied to a temporary cache directory.
19 When a model is saved, files that have changed in the cache are copied to the model
20 directory/archive. Note that for both files and strings that are added to the model,
21 are saved to it only when Model.save is called.
22 """
23 - def __init__(self, path, mode="r", verbose=True, compression=zipfile.ZIP_DEFLATED):
24 """
25 Make a new model or open an existing one
26
27 @param path: The model file or directory. If making a new model, a path ending in ".zip" results in a compressed archive.
28 @param mode: r, w or a for read, write or append
29 @param verbose: Model reports what is happening
30 @param compression: The compression method if a the model is a zip-archive.
31 """
32 self.members = {}
33 self.valueFileName = "TEES_MODEL_VALUES.tsv"
34 self.compression = compression
35 self.workdir = None
36 self.mode = None
37 self.path = None
38 self.open(path, mode)
39 self.verbose = verbose
40
43
45 if self.workdir != None:
46 shutil.rmtree(self.workdir)
47 self.workdir = None
48 self.path = None
49 self.members = None
50
51 - def add(self, name):
52 self.members[name] = None
53
54 - def insert(self, filepath, name):
55 """
56 Adds a new file to the model.
57
58 @param filepath: Path to the file being added
59 @param name: Path of the file inside the model
60 """
61 shutil.copy2(filepath, os.path.join(self.workdir, name))
62 self.members[name] = os.path.join(self.workdir, name)
63
64 - def importFrom(self, model, members, strings=None):
65 """
66 Copy several members from another model
67 """
68 for member in members:
69 self.insert(model.get(member), member)
70 if strings != None:
71 for string in strings:
72 self.addStr(string, model.getStr(string))
73
75 """
76 Add multiple name/value pairs
77 """
78 for key in sorted(dict.keys()):
79 self.addStr(key, dict[key])
80
81 - def addStr(self, name, value):
82 """
83 Add a named string to the model
84
85 @param : the name of the string
86 @param : the string
87 """
88 for c in ["\n", "\t", "\r"]:
89 assert c not in name, (c, name, value)
90 assert c not in value, (c, name, value)
91 values = self._getValues()
92 if name != None:
93 values[name] = value
94 elif name in values:
95 del values[name]
96 self._setValues(values)
97
99 """
100 Get a named string from the model
101
102 @param name : the name of the string
103 @param defaultIfNotExist: if set to a value != NOTHING, will be returned if a name does not exist. Otherwise an exception is raised.
104 @param asType : if set, cast the return value to this type
105 """
106 values = self._getValues()
107 if name in values:
108 if asType == None:
109 return values[name]
110 else:
111 return asType(values[name])
112 elif defaultIfNotExist != NOTHING:
113 return defaultIfNotExist
114 else:
115 raise IOError("String named '" + name + "' not defined in model " + self.path)
116
118 """
119 Save a model.
120
121 When saving a model, files that have changed in the cache are written to the model.
122 """
123 if self.mode == "r":
124 raise IOError("Model not open for writing")
125 if self.isPackage:
126 package = zipfile.ZipFile(self.path, "r", self.compression)
127 packageNames = package.namelist()
128
129 changed = []
130 for name in sorted(self.members.keys()):
131 cached = self.members[name]
132 if cached != None and os.path.exists(cached):
133 if self.isPackage:
134 cachedInfo = os.stat(cached)
135 packageFileInfo = None
136 if name in packageNames:
137 packageFileInfo = package.getinfo(name)
138 if packageFileInfo == None or cachedInfo.st_size != packageFileInfo.file_size:
139 changed.append(name)
140 else:
141 modelFilename = os.path.join(self.path, name)
142 if not os.path.exists(modelFilename) or not filecmp.cmp(modelFilename, cached):
143 changed.append(name)
144
145 if len(changed) > 0:
146 if self.verbose: print >> sys.stderr, "Saving model \"" + self.path + "\" (cache:" + self.workdir + ", changed:" + ",".join(changed) + ")"
147 if self.isPackage:
148 tempdir = tempfile.mkdtemp()
149 package.extractall(tempdir)
150 package.close()
151 for name in changed:
152 shutil.copy2(self.members[name], os.path.join(tempdir, name))
153 package = zipfile.ZipFile(self.path, "w", self.compression)
154 for name in os.listdir(tempdir):
155 package.write(os.path.join(tempdir, name), name)
156 shutil.rmtree(tempdir)
157 else:
158 for name in changed:
159 shutil.copy2(self.members[name], os.path.join(self.path, name))
160 if self.isPackage:
161 package.close()
162
164 """
165 Save a model with a different name.
166 """
167 print >> sys.stderr, "Saving model \"" + self.path, "as", outPath
168 if os.path.exists(outPath):
169 print >> sys.stderr, outPath, "exists, removing"
170 if os.path.isdir(outPath):
171 shutil.rmtree(outPath)
172 else:
173 os.remove(outPath)
174 if self.isPackage:
175
176 shutil.copy2(self.path, outPath)
177
178 package = zipfile.ZipFile(outPath, "a")
179 for f in os.listdir(self.workdir):
180 package.write(f)
181 package.close()
182 else:
183
184 shutil.copytree(self.path, outPath)
185
186 for f in os.listdir(self.workdir):
187 shutil.copy2(os.path.join(self.workdir, f), outPath)
188
190 return name in self.members
191
192 - def get(self, name, addIfNotExist=False):
193 """
194 Return a file member from the model. The member is extracted to a cached directory
195 and returned as a path name. If this file is modified, when the model is saved,
196 it will be copied back to the model.
197
198 @param name : the path to the file inside the model
199 @param addIfNotExist : Return a file name which can be created for adding the file
200 """
201 if name not in self.members:
202 if addIfNotExist:
203 self.add(name)
204 else:
205 raise IOError("Model has no member \"" + name + "\"")
206
207 if self.members[name] == None:
208 cacheFilename = os.path.join(self.workdir, name)
209 if self.isPackage:
210 package = zipfile.ZipFile(self.path, "r")
211 try:
212 if self.verbose: print >> sys.stderr, "Caching model \"" + self.path + "\" member \"" + name + "\" to \"" + cacheFilename + "\""
213 package.extract(name, self.workdir)
214 except:
215 pass
216 package.close()
217 elif os.path.exists(os.path.join(self.path, name)):
218 if self.verbose: print >> sys.stderr, "Caching model \"" + self.path + "\" member \"" + name + "\" to \"" + cacheFilename + "\""
219 shutil.copy2(os.path.join(self.path, name), cacheFilename)
220 self.members[name] = cacheFilename
221 return self.members[name]
222
223 - def open(self, path, mode="r"):
232
234 if mode == "w" and os.path.exists(path):
235 shutil.rmtree(path)
236 if not os.path.exists(path):
237 os.mkdir(path)
238 open(os.path.join(path, self.valueFileName), "wt").close()
239
240 members = os.listdir(path)
241 for member in members:
242 self.members[member] = None
243 self.isPackage = False
244
246 if mode == "w" and os.path.exists(path):
247 os.remove(path)
248 if not os.path.exists(path):
249 package = zipfile.ZipFile(path, "w", self.compression)
250 temp = tempfile.mkstemp()
251 package.write(temp[1], self.valueFileName)
252 package.close()
253 os.remove(temp[1])
254
255 package = zipfile.ZipFile(path, "r")
256 for name in package.namelist():
257 self.members[name] = None
258 package.close()
259 self.isPackage = True
260
261
263 values = {}
264 settingsFileName = self.get(self.valueFileName, True)
265 if os.path.exists(settingsFileName):
266 f = open(settingsFileName, "rt")
267 for line in f:
268 key, value = line.split("\t", 1)
269 key = key.strip()
270 value = value.strip()
271 values[key] = value
272 f.close()
273 return values
274
276 f = open(self.get(self.valueFileName, True), "wt")
277 for key in sorted(values.keys()):
278 f.write(key + "\t" + values[key] + "\n")
279 f.close()
280