1 """
2 The base class for the object oriented interface.
3 """
4 import sys, os
5 import shutil
6 import itertools
7 import tempfile
8 sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..")
9 from Core.Model import Model
10 from StepSelector import StepSelector
11 import Utils.Parameters as Parameters
12 import Evaluators.BioNLP11GeniaTools
13 import types
14 import time, datetime
15
17 """
18 Detector is the central class of the TEES object oriented interface. Subclasses derived from
19 it encapsulate the event and relation detection process used by TEES for the various tasks
20 it has been developed for. When extending TEES, a new Detector can be derived from this class.
21
22 The Detector is designed for a pipeline where interaction XML is converted to machine learning
23 examples, these examples are used to train a classifier and this classifier in turn is used
24 to classify unknown text.
25 """
27 self.exampleBuilder = None
28 self.exampleWriter = None
29 self.Classifier = None
30 self.evaluator = None
31 self.bioNLPSTParams = None
32 self.stEvaluator = Evaluators.BioNLP11GeniaTools
33 self.modelPath = None
34 self.combinedModelPath = None
35 self.tag = "UNKNOWN-"
36 self.model = None
37 self.combinedModel = None
38 self.workDir = ""
39 self.workDirIsTempDir = False
40
41 self.exampleStyle = None
42 self.classifierParameters = None
43 self.parse = "split-mccc-preparsed"
44 self.tokenization = None
45
46 self.state = None
47 self.select = None
48 self.STATE_TRAIN = "TRAIN"
49 self.STATE_CLASSIFY = "CLASSIFY"
50
51
52 self.connection = None
53 self.modelsToClose = []
54 self.variablesToRemove = set()
55 self.debug=False
56
59
61 for model in self.modelsToClose:
62 model.close()
63
65 if self.select == None or self.select.check(step):
66 if verbose: print >> sys.stderr, "=== ENTER STEP", self.__class__.__name__ + ":" + self.state + ":" + step, "==="
67 return True
68 else:
69 return False
70
72 if self.select == None:
73 return "NOT_EXIST"
74 else:
75 return self.select.getStepStatus(step)
76
78 if workDir == None:
79 return
80 elif workDir.strip() == "":
81 assert not self.workDirIsTempDir
82 self.workDir = ""
83 elif not workDir.endswith("/"):
84 assert not self.workDirIsTempDir
85 self.workDir = workDir + "/"
86
88 self.workDir = tempfile.mkdtemp()
89 self.workDirIsTempDir = True
90
92 if self.workDirIsTempDir:
93 print >> sys.stderr, "Removing temporary work directory", self.workDir
94 shutil.rmtree(self.workDir)
95 self.workDirIsTempDir = False
96 self.setWorkDir("")
97
98
99
100
102 self.connection = connection
103 self.connection.debug = self.debug
104 return connection
105
106
107
108
109
110
111
112
113
114
115
116
117
120
121 - def saveStr(self, name, value, model=None, modelMustExist=True):
122 if type(model) in types.StringTypes:
123 modelObj = self.openModel(model, "a")
124 else:
125 if modelMustExist:
126 assert model != None
127 modelObj = model
128 if modelObj != None:
129 modelObj.addStr(name, value)
130 modelObj.save()
131
132 - def saveStrings(self, dict, model=None, modelMustExist=True):
133 if type(model) in types.StringTypes:
134 modelObj = self.openModel(model, "a")
135 else:
136 if modelMustExist:
137 assert model != None
138 modelObj = model
139 if modelObj != None:
140 modelObj.addStrings(dict)
141 modelObj.save()
142
143 - def getStr(self, name, model):
144 if type(model) in types.StringTypes:
145 modelObj = self.openModel(model, "r")
146 else:
147 modelObj = model
148 value = modelObj.getStr(name)
149 if model == None: modelObj.close()
150 return value
151
152 - def addClassifierModel(self, model, classifierModelPath, classifierParameters, threshold=None):
153 classifierModel = model.get(self.tag+"classifier-model", True)
154 shutil.copy2(classifierModelPath, classifierModel)
155 model.addStr(self.tag+"classifier-parameter", Parameters.toString(Parameters.get(classifierParameters)))
156 if threshold != None:
157 model.addStr(self.tag+"threshold", str(threshold))
158 return classifierModel
159
161 if type(model) in types.StringTypes:
162 model = Model(model, mode)
163 self.modelsToClose.append(model)
164 return model
165
167 if parameters == None:
168 if model != None:
169 model = self.openModel(model, "r")
170 parameters = model.getStr("BioNLPSTParams", defaultIfNotExist=None)
171 else:
172 parameters = {}
173 return Parameters.get(parameters, ["convert", "evaluate", "scores"])
174
175 - def buildExamples(self, model, datas, outputs, golds=[], exampleStyle=None, saveIdsToModel=False, parse=None):
176 if exampleStyle == None:
177 exampleStyle = model.getStr(self.tag+"example-style")
178 if parse == None:
179 parse = self.getStr(self.tag+"parse", model)
180 for data, output, gold in itertools.izip_longest(datas, outputs, golds, fillvalue=[]):
181 print >> sys.stderr, "Example generation for", output
182 if not isinstance(data, (list, tuple)): data = [data]
183 if not isinstance(gold, (list, tuple)): gold = [gold]
184 append = False
185 for dataSet, goldSet in itertools.izip_longest(data, gold, fillvalue=None):
186 if dataSet != None:
187 self.exampleBuilder.run(dataSet, output, parse, None, exampleStyle, model.get(self.tag+"ids.classes", True), model.get(self.tag+"ids.features", True), goldSet, append, saveIdsToModel)
188 append = True
189 if saveIdsToModel:
190 model.save()
191
192 - def enterState(self, state, steps=None, fromStep=None, toStep=None, omitSteps=None):
193 if self.state == None:
194 assert self.select == None
195 self.state = state
196 if self.select == None or (self.select.currentStep == None and fromStep == steps[0]):
197 print >> sys.stderr, "*", self.__class__.__name__ + ":" + state + "(ENTER)", "*"
198 self.enterStateTime = time.time()
199 if steps != None:
200 self.select = StepSelector(steps, fromStep, toStep, omitSteps=omitSteps)
201 else:
202 assert self.state == state, (state, self.state)
203 assert self.select.steps == steps, (steps, self.select.steps)
204 self.select.setLimits(fromStep, toStep)
205
207 if self.state == None:
208 for name in sorted(vars.keys()):
209 setattr(self, name, vars[name])
210 self.variablesToRemove.add(name)
211
213 if model == None:
214 return model
215 elif type(model) in types.StringTypes:
216 model = self.openModel(model, "w")
217 else:
218 assert model.mode in ["a", "w"]
219 for param in saveParams:
220 model.addStr(param[1], Parameters.toString(getattr(self, param[0])))
221 model.save()
222 return model
223
225 if self.select == None or self.select.currentStep == self.select.steps[-1]:
226 if self.select != None:
227 self.select.printStepTime()
228 print >> sys.stderr, "*", self.__class__.__name__ + ":" + self.state + "(EXIT)", str(datetime.timedelta(seconds=time.time()-self.enterStateTime)), "*"
229 self.state = None
230 self.select = None
231 for name in self.variablesToRemove:
232 if hasattr(self, name):
233 delattr(self, name)
234 self._closeModels()
235
236 - def train(self, trainData=None, optData=None,
237 model=None, combinedModel=None,
238 exampleStyle=None, classifierParameters=None,
239 parse=None, tokenization=None,
240 fromStep=None, toStep=None):
242
243 - def classify(self, data, model, output):
245