Package TEES :: Package Detectors :: Module Detector
[hide private]

Source Code for Module TEES.Detectors.Detector

  1  """ 
  2  The base class for the object oriented interface. 
  3  """ 
  4  import sys, os 
  5  import shutil 
  6  import itertools 
  7  import tempfile 
  8  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..") 
  9  from Core.Model import Model 
 10  from StepSelector import StepSelector 
 11  import Utils.Parameters as Parameters 
 12  import Evaluators.BioNLP11GeniaTools 
 13  import types 
 14  import time, datetime 
 15   
16 -class Detector():
17 """ 18 Detector is the central class of the TEES object oriented interface. Subclasses derived from 19 it encapsulate the event and relation detection process used by TEES for the various tasks 20 it has been developed for. When extending TEES, a new Detector can be derived from this class. 21 22 The Detector is designed for a pipeline where interaction XML is converted to machine learning 23 examples, these examples are used to train a classifier and this classifier in turn is used 24 to classify unknown text. 25 """
26 - def __init__(self):
27 self.exampleBuilder = None 28 self.exampleWriter = None 29 self.Classifier = None 30 self.evaluator = None 31 self.bioNLPSTParams = None 32 self.stEvaluator = Evaluators.BioNLP11GeniaTools 33 self.modelPath = None 34 self.combinedModelPath = None 35 self.tag = "UNKNOWN-" 36 self.model = None 37 self.combinedModel = None 38 self.workDir = "" 39 self.workDirIsTempDir = False 40 41 self.exampleStyle = None 42 self.classifierParameters = None 43 self.parse = "split-mccc-preparsed" 44 self.tokenization = None 45 46 self.state = None # None, TRAIN, CLASSIFY 47 self.select = None 48 self.STATE_TRAIN = "TRAIN" 49 self.STATE_CLASSIFY = "CLASSIFY" 50 51 #self.cscConnection = None 52 self.connection = None 53 self.modelsToClose = [] 54 self.variablesToRemove = set() 55 self.debug=False
56
57 - def __del__(self):
58 self._closeModels()
59
60 - def _closeModels(self):
61 for model in self.modelsToClose: 62 model.close()
63
64 - def checkStep(self, step, verbose=True):
65 if self.select == None or self.select.check(step): 66 if verbose: print >> sys.stderr, "=== ENTER STEP", self.__class__.__name__ + ":" + self.state + ":" + step, "===" 67 return True 68 else: 69 return False
70
71 - def getStepStatus(self, step):
72 if self.select == None: 73 return "NOT_EXIST" 74 else: 75 return self.select.getStepStatus(step)
76
77 - def setWorkDir(self, workDir=""):
78 if workDir == None: # bypass assignment and keep currently defined workdir 79 return 80 elif workDir.strip() == "": # current system path 81 assert not self.workDirIsTempDir 82 self.workDir = "" 83 elif not workDir.endswith("/"): # make sure workdir can be combined with other paths using '+' 84 assert not self.workDirIsTempDir 85 self.workDir = workDir + "/"
86
87 - def setTempWorkDir(self):
88 self.workDir = tempfile.mkdtemp() 89 self.workDirIsTempDir = True
90
91 - def deleteTempWorkDir(self):
92 if self.workDirIsTempDir: 93 print >> sys.stderr, "Removing temporary work directory", self.workDir 94 shutil.rmtree(self.workDir) 95 self.workDirIsTempDir = False 96 self.setWorkDir("")
97 98 # def getSharedStep(self, childDetector, step, direction=1): 99 # childDetector.select.getSharedStep(step, self.select.steps, direction) 100
101 - def setConnection(self, connection):
102 self.connection = connection 103 self.connection.debug = self.debug 104 return connection
105 106 # def setCSCConnection(self, options, cscworkdir): 107 # if "local" not in options: 108 # clear = False 109 # if "clear" in options: 110 # clear = True 111 # if "louhi" in options: 112 # self.cscConnection = CSCConnection(cscworkdir, "jakrbj@louhi.csc.fi", clear) 113 # else: 114 # self.cscConnection = CSCConnection(cscworkdir, "jakrbj@murska.csc.fi", clear) 115 # else: 116 # self.cscConnection = None 117
118 - def getModel(self):
119 return self.openModel(model)
120
121 - def saveStr(self, name, value, model=None, modelMustExist=True):
122 if type(model) in types.StringTypes: 123 modelObj = self.openModel(model, "a") 124 else: 125 if modelMustExist: 126 assert model != None 127 modelObj = model 128 if modelObj != None: 129 modelObj.addStr(name, value) 130 modelObj.save()
131
132 - def saveStrings(self, dict, model=None, modelMustExist=True):
133 if type(model) in types.StringTypes: 134 modelObj = self.openModel(model, "a") 135 else: 136 if modelMustExist: 137 assert model != None 138 modelObj = model 139 if modelObj != None: 140 modelObj.addStrings(dict) 141 modelObj.save()
142
143 - def getStr(self, name, model):
144 if type(model) in types.StringTypes: 145 modelObj = self.openModel(model, "r") 146 else: 147 modelObj = model 148 value = modelObj.getStr(name) 149 if model == None: modelObj.close() 150 return value
151
152 - def addClassifierModel(self, model, classifierModelPath, classifierParameters, threshold=None):
153 classifierModel = model.get(self.tag+"classifier-model", True) 154 shutil.copy2(classifierModelPath, classifierModel) 155 model.addStr(self.tag+"classifier-parameter", Parameters.toString(Parameters.get(classifierParameters))) 156 if threshold != None: 157 model.addStr(self.tag+"threshold", str(threshold)) 158 return classifierModel
159
160 - def openModel(self, model, mode="r"):
161 if type(model) in types.StringTypes: 162 model = Model(model, mode) 163 self.modelsToClose.append(model) 164 return model
165
166 - def getBioNLPSharedTaskParams(self, parameters=None, model=None):
167 if parameters == None: 168 if model != None: 169 model = self.openModel(model, "r") 170 parameters = model.getStr("BioNLPSTParams", defaultIfNotExist=None) 171 else: 172 parameters = {} 173 return Parameters.get(parameters, ["convert", "evaluate", "scores"])
174
175 - def buildExamples(self, model, datas, outputs, golds=[], exampleStyle=None, saveIdsToModel=False, parse=None):
176 if exampleStyle == None: 177 exampleStyle = model.getStr(self.tag+"example-style") 178 if parse == None: 179 parse = self.getStr(self.tag+"parse", model) 180 for data, output, gold in itertools.izip_longest(datas, outputs, golds, fillvalue=[]): 181 print >> sys.stderr, "Example generation for", output 182 if not isinstance(data, (list, tuple)): data = [data] 183 if not isinstance(gold, (list, tuple)): gold = [gold] 184 append = False 185 for dataSet, goldSet in itertools.izip_longest(data, gold, fillvalue=None): 186 if dataSet != None: 187 self.exampleBuilder.run(dataSet, output, parse, None, exampleStyle, model.get(self.tag+"ids.classes", True), model.get(self.tag+"ids.features", True), goldSet, append, saveIdsToModel) 188 append = True 189 if saveIdsToModel: 190 model.save()
191
192 - def enterState(self, state, steps=None, fromStep=None, toStep=None, omitSteps=None):
193 if self.state == None: 194 assert self.select == None 195 self.state = state 196 if self.select == None or (self.select.currentStep == None and fromStep == steps[0]): 197 print >> sys.stderr, "*", self.__class__.__name__ + ":" + state + "(ENTER)", "*" 198 self.enterStateTime = time.time() 199 if steps != None: 200 self.select = StepSelector(steps, fromStep, toStep, omitSteps=omitSteps) 201 else: 202 assert self.state == state, (state, self.state) 203 assert self.select.steps == steps, (steps, self.select.steps) 204 self.select.setLimits(fromStep, toStep)
205
206 - def initVariables(self, **vars):
207 if self.state == None: 208 for name in sorted(vars.keys()): 209 setattr(self, name, vars[name]) 210 self.variablesToRemove.add(name)
211
212 - def initModel(self, model, saveParams=[]):
213 if model == None: 214 return model 215 elif type(model) in types.StringTypes: 216 model = self.openModel(model, "w") 217 else: 218 assert model.mode in ["a", "w"] 219 for param in saveParams: 220 model.addStr(param[1], Parameters.toString(getattr(self, param[0]))) 221 model.save() 222 return model
223
224 - def exitState(self):
225 if self.select == None or self.select.currentStep == self.select.steps[-1]: 226 if self.select != None: 227 self.select.printStepTime() # print last step time 228 print >> sys.stderr, "*", self.__class__.__name__ + ":" + self.state + "(EXIT)", str(datetime.timedelta(seconds=time.time()-self.enterStateTime)), "*" 229 self.state = None 230 self.select = None 231 for name in self.variablesToRemove: 232 if hasattr(self, name): 233 delattr(self, name) 234 self._closeModels()
235
236 - def train(self, trainData=None, optData=None, 237 model=None, combinedModel=None, 238 exampleStyle=None, classifierParameters=None, 239 parse=None, tokenization=None, 240 fromStep=None, toStep=None):
241 pass
242
243 - def classify(self, data, model, output):
244 pass
245