Package TEES :: Package Detectors :: Module EventDetector
[hide private]

Source Code for Module TEES.Detectors.EventDetector

  1  import sys, os 
  2  import shutil 
  3  import types 
  4  import copy 
  5  from Detector import Detector 
  6  from EntityDetector import EntityDetector 
  7  from EdgeDetector import EdgeDetector 
  8  from UnmergingDetector import UnmergingDetector 
  9  from ModifierDetector import ModifierDetector 
 10  #from Core.RecallAdjust import RecallAdjust 
 11  import Utils.Parameters as Parameters 
 12  import Utils.InteractionXML as InteractionXML 
 13  import Evaluators.EvaluateInteractionXML as EvaluateInteractionXML 
 14  import Utils.STFormat.ConvertXML 
 15  import Utils.STFormat.Compare 
 16  import Evaluators.BioNLP11GeniaTools 
 17   
18 -class EventDetector(Detector):
19 """ 20 A multi-stage detector used for the BioNLP Shared Task type events. 21 """
22 - def __init__(self):
23 Detector.__init__(self) 24 self.triggerDetector = EntityDetector() 25 self.edgeDetector = EdgeDetector() 26 self.unmergingDetector = UnmergingDetector() 27 self.doUnmergingSelfTraining = True #False 28 self.modifierDetector = ModifierDetector() 29 #self.stEvaluator = Evaluators.BioNLP11GeniaTools 30 #self.stWriteScores = False 31 self.STATE_COMPONENT_TRAIN = "COMPONENT_TRAIN" 32 self.tag = "event-"
33
34 - def setConnection(self, connection):
35 self.triggerDetector.setConnection(connection) 36 self.edgeDetector.setConnection(connection) 37 self.unmergingDetector.setConnection(connection) 38 self.modifierDetector.setConnection(connection) 39 return connection
40
41 - def setWorkDir(self, workDir):
42 Detector.setWorkDir(self, workDir) # for EventDetector 43 # setup components 44 for detector in [self.triggerDetector, self.edgeDetector, self.unmergingDetector, self.modifierDetector]: 45 if detector != None: 46 detector.setWorkDir(workDir)
47
48 - def train(self, trainData=None, optData=None, 49 model=None, combinedModel=None, 50 triggerExampleStyle=None, edgeExampleStyle=None, unmergingExampleStyle=None, modifierExampleStyle=None, 51 triggerClassifierParameters=None, edgeClassifierParameters=None, 52 unmergingClassifierParameters=None, modifierClassifierParameters=None, 53 recallAdjustParameters=None, unmerging=False, trainModifiers=False, 54 fullGrid=False, task=None, 55 parse=None, tokenization=None, 56 fromStep=None, toStep=None, 57 workDir=None):
58 # Initialize the training process ############################## 59 self.initVariables(trainData=trainData, optData=optData, model=model, combinedModel=combinedModel, 60 triggerExampleStyle=triggerExampleStyle, edgeExampleStyle=edgeExampleStyle, 61 unmergingExampleStyle=unmergingExampleStyle, modifierExampleStyle=modifierExampleStyle, 62 triggerClassifierParameters=triggerClassifierParameters, 63 edgeClassifierParameters=edgeClassifierParameters, 64 unmergingClassifierParameters=unmergingClassifierParameters, 65 modifierClassifierParameters=modifierClassifierParameters, 66 recallAdjustParameters=recallAdjustParameters, unmerging=unmerging, trainModifiers=trainModifiers, 67 fullGrid=fullGrid, task=task, parse=parse, tokenization=tokenization) 68 self.setWorkDir(workDir) 69 # Begin the training process #################################### 70 self.enterState(self.STATE_TRAIN, ["EXAMPLES", "BEGIN-MODEL", "END-MODEL", "BEGIN-COMBINED-MODEL", 71 "SELF-TRAIN-EXAMPLES-FOR-UNMERGING", "UNMERGING-EXAMPLES", "BEGIN-UNMERGING-MODEL", "END-UNMERGING-MODEL", 72 "GRID", "BEGIN-COMBINED-MODEL-FULLGRID", "END-COMBINED-MODEL"], fromStep, toStep) 73 self.triggerDetector.enterState(self.STATE_COMPONENT_TRAIN) 74 self.edgeDetector.enterState(self.STATE_COMPONENT_TRAIN) 75 self.unmergingDetector.enterState(self.STATE_COMPONENT_TRAIN) 76 self.modifierDetector.enterState(self.STATE_COMPONENT_TRAIN) 77 if self.checkStep("EXAMPLES"): 78 self.model = self.initModel(self.model, 79 [("triggerExampleStyle", self.triggerDetector.tag+"example-style"), 80 ("triggerClassifierParameters", self.triggerDetector.tag+"classifier-parameters-train"), 81 ("edgeExampleStyle", self.edgeDetector.tag+"example-style"), 82 ("edgeClassifierParameters", self.edgeDetector.tag+"classifier-parameters-train"), 83 ("unmergingExampleStyle", self.unmergingDetector.tag+"example-style"), 84 ("unmergingClassifierParameters", self.unmergingDetector.tag+"classifier-parameters-train"), 85 ("modifierExampleStyle", self.modifierDetector.tag+"example-style"), 86 ("modifierClassifierParameters", self.modifierDetector.tag+"classifier-parameters-train")]) 87 self.combinedModel = self.initModel(self.combinedModel) 88 tags = [self.triggerDetector.tag, self.edgeDetector.tag, self.unmergingDetector.tag] 89 if trainModifiers: tags += [self.modifierDetector.tag] 90 stringDict = {} 91 for tag in tags: 92 stringDict[tag+"parse"] = parse 93 stringDict[tag+"task"] = task 94 self.saveStrings(stringDict, self.model) 95 self.saveStrings(stringDict, self.combinedModel, False) 96 self.triggerDetector.buildExamples(self.model, [optData.replace("-nodup", ""), trainData.replace("-nodup", "")], [self.workDir+self.triggerDetector.tag+"opt-examples.gz", self.workDir+self.triggerDetector.tag+"train-examples.gz"], saveIdsToModel=True) 97 self.edgeDetector.buildExamples(self.model, [optData.replace("-nodup", ""), trainData.replace("-nodup", "")], [self.workDir+self.edgeDetector.tag+"opt-examples.gz", self.workDir+self.edgeDetector.tag+"train-examples.gz"], saveIdsToModel=True) 98 if trainModifiers: 99 self.modifierDetector.buildExamples(self.model, [optData, trainData], [self.workDir+self.modifierDetector.tag+"opt-examples.gz", self.workDir+self.modifierDetector.tag+"train-examples.gz"], saveIdsToModel=True) 100 # (Re-)open models in case we start after the "EXAMPLES" step 101 self.model = self.openModel(model, "a") 102 self.combinedModel = self.openModel(combinedModel, "a") 103 if self.checkStep("BEGIN-MODEL"): 104 #for model in [self.model, self.combinedModel]: 105 # if model != None: 106 # model.addStr("BioNLPSTParams", Parameters.toString(self.bioNLPSTParams)) 107 self.triggerDetector.beginModel(None, self.model, [self.workDir+self.triggerDetector.tag+"train-examples.gz"], self.workDir+self.triggerDetector.tag+"opt-examples.gz") 108 self.edgeDetector.beginModel(None, self.model, [self.workDir+self.edgeDetector.tag+"train-examples.gz"], self.workDir+self.edgeDetector.tag+"opt-examples.gz") 109 if trainModifiers: 110 self.modifierDetector.beginModel(None, self.model, [self.workDir+self.modifierDetector.tag+"train-examples.gz"], self.workDir+self.modifierDetector.tag+"opt-examples.gz") 111 if self.checkStep("END-MODEL"): 112 self.triggerDetector.endModel(None, self.model, self.workDir+self.triggerDetector.tag+"opt-examples.gz") 113 self.edgeDetector.endModel(None, self.model, self.workDir+self.edgeDetector.tag+"opt-examples.gz") 114 if trainModifiers: 115 self.modifierDetector.endModel(None, self.model, self.workDir+self.modifierDetector.tag+"opt-examples.gz") 116 if self.checkStep("BEGIN-COMBINED-MODEL"): 117 if not self.fullGrid: 118 print >> sys.stderr, "Training combined model before grid search" 119 self.triggerDetector.beginModel(None, self.combinedModel, [self.workDir+self.triggerDetector.tag+"train-examples.gz", self.workDir+self.triggerDetector.tag+"opt-examples.gz"], self.workDir+self.triggerDetector.tag+"opt-examples.gz", self.model) 120 self.edgeDetector.beginModel(None, self.combinedModel, [self.workDir+self.edgeDetector.tag+"train-examples.gz", self.workDir+self.edgeDetector.tag+"opt-examples.gz"], self.workDir+self.edgeDetector.tag+"opt-examples.gz", self.model) 121 else: 122 print >> sys.stderr, "Combined model will be trained after grid search" 123 if trainModifiers: 124 print >> sys.stderr, "Training combined model for modifier detection" 125 self.modifierDetector.beginModel(None, self.combinedModel, [self.workDir+self.modifierDetector.tag+"train-examples.gz", self.workDir+self.modifierDetector.tag+"opt-examples.gz"], self.workDir+self.modifierDetector.tag+"opt-examples.gz", self.model) 126 self.trainUnmergingDetector() 127 if self.checkStep("GRID"): 128 self.doGrid() 129 if self.checkStep("BEGIN-COMBINED-MODEL-FULLGRID"): 130 if self.fullGrid: 131 print >> sys.stderr, "Training combined model after grid search" 132 self.triggerDetector.beginModel(None, self.combinedModel, [self.workDir+self.triggerDetector.tag+"train-examples.gz", self.workDir+self.triggerDetector.tag+"opt-examples.gz"], self.workDir+self.triggerDetector.tag+"opt-examples.gz", self.model) 133 self.edgeDetector.beginModel(None, self.combinedModel, [self.workDir+self.edgeDetector.tag+"train-examples.gz", self.workDir+self.edgeDetector.tag+"opt-examples.gz"], self.workDir+self.edgeDetector.tag+"opt-examples.gz", self.model) 134 if trainModifiers: 135 print >> sys.stderr, "Training combined model for modifier detection" 136 self.modifierDetector.beginModel(None, self.combinedModel, [self.workDir+self.modifierDetector.tag+"train-examples.gz", self.workDir+self.modifierDetector.tag+"opt-examples.gz"], self.workDir+self.modifierDetector.tag+"opt-examples.gz", self.model) 137 else: 138 print >> sys.stderr, "Combined model has been trained before grid search" 139 if self.checkStep("END-COMBINED-MODEL"): 140 self.triggerDetector.endModel(None, self.combinedModel, self.workDir+self.triggerDetector.tag+"opt-examples.gz") 141 self.edgeDetector.endModel(None, self.combinedModel, self.workDir+self.edgeDetector.tag+"opt-examples.gz") 142 if trainModifiers: 143 self.modifierDetector.endModel(None, self.combinedModel, self.workDir+self.modifierDetector.tag+"opt-examples.gz") 144 # End the training process #################################### 145 if workDir != None: 146 self.setWorkDir("") 147 self.exitState() 148 self.triggerDetector.exitState() 149 self.edgeDetector.exitState() 150 self.unmergingDetector.exitState() 151 self.modifierDetector.exitState()
152
153 - def doGrid(self):
154 print >> sys.stderr, "--------- Parameter grid search ---------" 155 # Build trigger examples 156 self.triggerDetector.buildExamples(self.model, [self.optData], [self.workDir+"grid-trigger-examples.gz"]) 157 158 if self.fullGrid: 159 # Parameters to optimize 160 ALL_PARAMS={ 161 "trigger":[int(i) for i in Parameters.get(self.triggerClassifierParameters, valueListKey="c")["c"]], 162 "booster":[float(i) for i in self.recallAdjustParameters.split(",")], 163 "edge":[int(i) for i in Parameters.get(self.edgeClassifierParameters, valueListKey="c")["c"]] } 164 else: 165 ALL_PARAMS={"trigger":Parameters.get(self.model.getStr(self.triggerDetector.tag+"classifier-parameter"), valueListKey="c")["c"], 166 "booster":[float(i) for i in self.recallAdjustParameters.split(",")], 167 "edge":Parameters.get(self.model.getStr(self.edgeDetector.tag+"classifier-parameter"), valueListKey="c")["c"]} 168 169 paramCombinations = Parameters.getCombinations(ALL_PARAMS, ["trigger", "booster", "edge"]) 170 prevParams = None 171 EDGE_MODEL_STEM = os.path.join(self.edgeDetector.workDir, os.path.normpath(self.model.path)+"-edge-models/model-c_") 172 TRIGGER_MODEL_STEM = os.path.join(self.triggerDetector.workDir, os.path.normpath(self.model.path)+"-trigger-models/model-c_") 173 bestResults = None 174 for i in range(len(paramCombinations)): 175 params = paramCombinations[i] 176 print >> sys.stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" 177 print >> sys.stderr, "Processing params", str(i+1) + "/" + str(len(paramCombinations)), params 178 print >> sys.stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" 179 # Triggers and Boost 180 if prevParams == None or prevParams["trigger"] != params["trigger"] or prevParams["booster"] != params["booster"]: 181 print >> sys.stderr, "Classifying trigger examples for parameters", "trigger:" + str(params["trigger"]), "booster:" + str(params["booster"]) 182 xml = self.triggerDetector.classifyToXML(self.optData, self.model, self.workDir+"grid-trigger-examples", self.workDir+"grid-", classifierModel=TRIGGER_MODEL_STEM+str(params["trigger"]), recallAdjust=params["booster"]) 183 prevParams = params 184 ## Build edge examples 185 #self.edgeDetector.buildExamples(self.model, [xml], [self.workDir+"grid-edge-examples"], [self.optData]) 186 # Classify with pre-defined model 187 edgeClassifierModel=EDGE_MODEL_STEM+str(params["edge"]) 188 xml = self.edgeDetector.classifyToXML(xml, self.model, self.workDir+"grid-edge-examples", self.workDir+"grid-", classifierModel=edgeClassifierModel, goldData=self.optData) 189 bestResults = self.evaluateGrid(xml, params, bestResults) 190 # Remove remaining intermediate grid files 191 for tag1 in ["edge", "trigger", "unmerging"]: 192 for tag2 in ["examples", "pred.xml.gz"]: 193 if os.path.exists(self.workDir+"grid-"+tag1+"-"+tag2): 194 os.remove(self.workDir+"grid-"+tag1+"-"+tag2) 195 print >> sys.stderr, "Parameter grid search complete" 196 print >> sys.stderr, "Tested", len(paramCombinations), "combinations" 197 print >> sys.stderr, "Best parameters:", bestResults[0] 198 print >> sys.stderr, "Best result:", bestResults[2] # f-score 199 # Save grid model 200 self.saveStr("recallAdjustParameter", str(bestResults[0]["booster"]), self.model) 201 self.saveStr("recallAdjustParameter", str(bestResults[0]["booster"]), self.combinedModel, False) 202 if self.fullGrid: # define best models 203 self.triggerDetector.addClassifierModel(self.model, TRIGGER_MODEL_STEM+str(bestResults[0]["trigger"]), bestResults[0]["trigger"]) 204 self.edgeDetector.addClassifierModel(self.model, EDGE_MODEL_STEM+str(bestResults[0]["edge"]), bestResults[0]["edge"]) 205 # Remove work files 206 for stepTag in [self.workDir+"grid-trigger", self.workDir+"grid-edge", self.workDir+"grid-unmerging"]: 207 for fileStem in ["-classifications", "-classifications.log", "examples.gz", "pred.xml.gz"]: 208 if os.path.exists(stepTag+fileStem): 209 os.remove(stepTag+fileStem)
210
211 - def evaluateGrid(self, xml, params, bestResults):
212 if xml != None: 213 # TODO: Where should the EvaluateInteractionXML evaluator come from? 214 EIXMLResult = EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.optData, self.parse) 215 # Convert to ST-format 216 if self.bioNLPSTParams["evaluate"]: 217 Utils.STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-flat-geniaformat", "a2") #getA2FileTag(options.task, subTask)) 218 stFormatDir = self.workDir+"grid-flat-geniaformat" 219 220 if self.unmerging: 221 xml = self.unmergingDetector.classifyToXML(xml, self.model, None, self.workDir+"grid-", goldData=self.optData) 222 if self.bioNLPSTParams["evaluate"]: 223 Utils.STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-unmerging-geniaformat", "a2") 224 stFormatDir = self.workDir+"grid-unmerging-geniaformat" 225 # Evaluation 226 stEvaluation = None 227 if self.bioNLPSTParams["evaluate"]: 228 stEvaluation = self.stEvaluator.evaluate(stFormatDir, self.task) 229 if stEvaluation != None: 230 if bestResults == None or stEvaluation[0] > bestResults[1][0]: 231 bestResults = (params, stEvaluation, stEvaluation[0]) 232 else: 233 if bestResults == None or EIXMLResult.getData().fscore > bestResults[1].getData().fscore: 234 bestResults = (params, EIXMLResult, EIXMLResult.getData().fscore) 235 if self.bioNLPSTParams["evaluate"]: 236 shutil.rmtree(self.workDir+"grid-flat-geniaformat") 237 if os.path.exists(self.workDir+"grid-unmerging-geniaformat"): 238 shutil.rmtree(self.workDir+"grid-unmerging-geniaformat") 239 else: 240 print >> sys.stderr, "No predicted edges" 241 return bestResults
242
243 - def trainUnmergingDetector(self):
244 xml = None 245 if not self.unmerging: 246 print >> sys.stderr, "No unmerging" 247 if self.checkStep("SELF-TRAIN-EXAMPLES-FOR-UNMERGING", self.unmerging) and self.unmerging: 248 # Self-classified train data for unmerging 249 if self.doUnmergingSelfTraining: 250 # This allows limiting to a subcorpus 251 triggerStyle = copy.copy(Parameters.get(self.triggerExampleStyle)) 252 edgeStyle = copy.copy(Parameters.get(self.edgeExampleStyle)) 253 unmergingStyle = Parameters.get(self.unmergingExampleStyle) 254 if "sentenceLimit" in unmergingStyle and unmergingStyle["sentenceLimit"]: 255 triggerStyle["sentenceLimit"] = unmergingStyle["sentenceLimit"] 256 edgeStyle["sentenceLimit"] = unmergingStyle["sentenceLimit"] 257 # Build the examples 258 xml = self.triggerDetector.classifyToXML(self.trainData, self.model, None, self.workDir+"unmerging-extra-", exampleStyle=triggerStyle)#, recallAdjust=0.5) 259 xml = self.edgeDetector.classifyToXML(xml, self.model, None, self.workDir+"unmerging-extra-", exampleStyle=edgeStyle)#, recallAdjust=0.5) 260 assert xml != None 261 EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.trainData, self.parse) 262 else: 263 print >> sys.stderr, "No self-training for unmerging" 264 if self.checkStep("UNMERGING-EXAMPLES", self.unmerging) and self.unmerging: 265 # Unmerging example generation 266 GOLD_TEST_FILE = self.optData.replace("-nodup", "") 267 GOLD_TRAIN_FILE = self.trainData.replace("-nodup", "") 268 if self.doUnmergingSelfTraining: 269 if xml == None: 270 xml = self.workDir+"unmerging-extra-edge-pred.xml.gz" 271 self.unmergingDetector.buildExamples(self.model, [self.optData.replace("-nodup", ""), [self.trainData.replace("-nodup", ""), xml]], 272 [self.workDir+"unmerging-opt-examples.gz", self.workDir+"unmerging-train-examples.gz"], 273 [GOLD_TEST_FILE, [GOLD_TRAIN_FILE, GOLD_TRAIN_FILE]], 274 exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True) 275 xml = None 276 else: 277 self.unmergingDetector.buildExamples(self.model, [self.optData.replace("-nodup", ""), self.trainData.replace("-nodup", "")], 278 [self.workDir+"unmerging-opt-examples.gz", self.workDir+"unmerging-train-examples.gz"], 279 [GOLD_TEST_FILE, GOLD_TRAIN_FILE], 280 exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True) 281 xml = None 282 #UnmergingExampleBuilder.run("/home/jari/biotext/EventExtension/TrainSelfClassify/test-predicted-edges.xml", GOLD_TRAIN_FILE, UNMERGING_TRAIN_EXAMPLE_FILE, PARSE, TOK, UNMERGING_FEATURE_PARAMS, UNMERGING_IDS, append=True) 283 if self.checkStep("BEGIN-UNMERGING-MODEL", self.unmerging) and self.unmerging: 284 self.unmergingDetector.beginModel(None, self.model, self.workDir+"unmerging-train-examples.gz", self.workDir+"unmerging-opt-examples.gz") 285 if self.checkStep("END-UNMERGING-MODEL", self.unmerging) and self.unmerging: 286 self.unmergingDetector.endModel(None, self.model, self.workDir+"unmerging-opt-examples.gz") 287 print >> sys.stderr, "Adding unmerging classifier model to test-set event model" 288 if self.combinedModel != None: 289 self.combinedModel.addStr("unmerging-example-style", self.model.getStr("unmerging-example-style")) 290 self.combinedModel.insert(self.model.get("unmerging-ids.classes"), "unmerging-ids.classes") 291 self.combinedModel.insert(self.model.get("unmerging-ids.features"), "unmerging-ids.features") 292 self.unmergingDetector.addClassifierModel(self.combinedModel, self.model.get("unmerging-classifier-model"), 293 self.model.getStr("unmerging-classifier-parameter")) 294 self.combinedModel.save()
295
296 - def classify(self, data, model, output, parse=None, task=None, goldData=None, fromStep=None, toStep=None, omitSteps=None, workDir=None):
297 #BINARY_RECALL_MODE = False # TODO: make a parameter 298 xml = None 299 model = self.openModel(model, "r") 300 self.initVariables(classifyData=data, model=model, xml=None, task=task, parse=parse) 301 self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep, omitSteps) 302 #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep) 303 self.setWorkDir(workDir) 304 if workDir == None: 305 self.setTempWorkDir() 306 workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-") 307 self.model = self.openModel(self.model, "r") 308 stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model) 309 if self.checkStep("TRIGGERS"): 310 xml = self.triggerDetector.classifyToXML(self.classifyData, self.model, None, workOutputTag, goldData=goldData, parse=self.parse, recallAdjust=float(self.getStr("recallAdjustParameter", self.model))) 311 if self.checkStep("EDGES"): 312 xml = self.getWorkFile(xml, workOutputTag + "trigger-pred.xml.gz") 313 xml = self.edgeDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) 314 assert xml != None 315 if self.parse == None: 316 edgeParse = self.getStr(self.edgeDetector.tag+"parse", self.model) 317 else: 318 edgeParse = self.parse 319 #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) 320 if goldData != None: 321 EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, goldData, edgeParse) 322 else: 323 EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse) 324 if self.checkStep("UNMERGING"): 325 if self.model.hasMember("unmerging-classifier-model"): 326 #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz") 327 # To avoid running out of memory, always use file on disk 328 xml = self.getWorkFile(None, workOutputTag + "edge-pred.xml.gz") 329 #goldData = None 330 #if type(self.classifyData) in types.StringTypes: 331 # if os.path.exists(self.classifyData.replace("-nodup", "")): 332 # goldData = self.classifyData.replace("-nodup", "") 333 xml = self.unmergingDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) 334 else: 335 print >> sys.stderr, "No model for unmerging" 336 if self.checkStep("MODIFIERS"): 337 if self.model.hasMember("modifier-classifier-model"): 338 xml = self.getWorkFile(xml, [workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) 339 xml = self.modifierDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse) 340 else: 341 print >> sys.stderr, "No model for modifier detection" 342 if self.checkStep("ST-CONVERT"): 343 if stParams["convert"]: 344 xml = self.getWorkFile(xml, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) 345 Utils.STFormat.ConvertXML.toSTFormat(xml, output+"-events.tar.gz", outputTag="a2", writeScores=(stParams["scores"] == True)) 346 if stParams["evaluate"]: #self.stEvaluator != None: 347 task = self.task 348 if task == None: 349 task = self.getStr(self.edgeDetector.tag+"task", self.model) 350 self.stEvaluator.evaluate(output + "-events.tar.gz", task) 351 else: 352 print >> sys.stderr, "No BioNLP shared task format conversion" 353 finalXMLFile = self.getWorkFile(None, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"]) 354 if finalXMLFile != None: 355 shutil.copy2(finalXMLFile, output+"-pred.xml.gz") 356 self.deleteTempWorkDir() 357 self.exitState()
358
359 - def getWorkFile(self, fileObject, serializedPath=None):
360 """ 361 Returns fileObject if it is not None, otherwise tries all paths in serializedPath 362 and returns the first one that exists. Use this to get an intermediate file in a 363 stepwise process. 364 """ 365 if fileObject != None: 366 return fileObject 367 elif type(serializedPath) not in types.StringTypes: # multiple files to try 368 for sPath in serializedPath: 369 if os.path.exists(sPath): 370 return sPath 371 assert False 372 else: 373 assert os.path.exists(serializedPath) 374 return serializedPath
375