Package TEES :: Module train
[hide private]

Source Code for Module TEES.train

  1  """ 
  2  Train a new model for event or relation detection. 
  3  """ 
  4  import sys, os 
  5  from Utils.InteractionXML.DeleteElements import getEmptyCorpus 
  6  import Utils.InteractionXML.Catenate as Catenate 
  7  import Utils.Stream as Stream 
  8  import Utils.Settings as Settings 
  9  import Utils.Parameters as Parameters 
 10  from Utils.Connection.Connection import getConnection 
 11  import Utils.STFormat.Compare 
 12  import shutil 
 13  import atexit 
 14  import types 
 15  from Core.Model import Model 
 16  from Detectors.StepSelector import StepSelector 
 17  from Detectors.Preprocessor import Preprocessor 
 18   
19 -def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None, 20 processUnmerging=None, processModifiers=None, isSingleStage=False, 21 bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, 22 classifierParams=None, doFullGrid=False, deleteOutput=False, copyFrom=None, 23 log="log.txt", step=None, omitSteps=None, debug=False, connection=None):
24 """ 25 Train a new model for event or relation detection. 26 27 @param output: A directory where output files will appear. 28 @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks. 29 @param detector: a Detector object, or a string defining one to be imported 30 @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test" 31 @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test" 32 @param parse: The parse element name in the training interaction XML 33 @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default. 34 @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default. 35 @param isSingleStage: False for EventDetector, True for a single stage detector. 36 @param bioNLPSTParams: Parameters controlling BioNLP ST format output. 37 @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying. 38 @param exampleStyles: A parameter set for controlling example builders. 39 @param classifierParams: A parameter set for controlling classifiers. 40 @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search. 41 @param deleteOutput: Remove an existing output directory 42 @param copyFrom: Copy an existing output directory for use as a template 43 @param log: An optional alternative name for the log file. None is for no logging. 44 @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST" 45 @param omitSteps: step=substep parameters, where multiple substeps can be defined. 46 @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved 47 @param connection: A parameter set defining a local or remote connection for training the classifier 48 """ 49 # Insert default arguments where needed 50 inputFiles = Parameters.get(inputFiles, {"train":None, "devel":None, "test":None}) 51 models = Parameters.get(models, {"devel":None, "test":None}) 52 exampleStyles = Parameters.get(exampleStyles, {"examples":None, "trigger":None, "edge":None, "unmerging":None, "modifiers":None}) 53 classifierParams = Parameters.get(classifierParams, {"examples":None, "trigger":None, "recall":None, "edge":None, "unmerging":None, "modifiers":None}) 54 processUnmerging = getDefinedBool(processUnmerging) 55 processModifiers = getDefinedBool(processModifiers) 56 # Initialize working directory 57 workdir(output, deleteOutput, copyFrom, log) 58 # Get task specific parameters 59 detector, processUnmerging, processModifiers, isSingleStage, bioNLPSTParams, preprocessorParams, exampleStyles, classifierParams, removeNamesFromEmpty = getTaskSettings(task, 60 detector, processUnmerging, processModifiers, isSingleStage, bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams) 61 if task != None: task = task.replace("-MINI", "").replace("-FULL", "") 62 # Define processing steps 63 selector, detectorSteps, omitDetectorSteps = getSteps(step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"]) 64 65 # Initialize the detector 66 detector, detectorName = getDetector(detector) 67 detector = detector() # initialize object 68 detector.debug = debug 69 detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(bioNLPSTParams) 70 #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format 71 #detector.stWriteScores = True # write confidence scores into additional st-format files 72 connection = getConnection(connection) 73 detector.setConnection(connection) 74 connection.debug = debug 75 if deleteOutput: 76 connection.clearWorkDir() 77 78 # Train 79 if selector.check("TRAIN"): 80 print >> sys.stderr, "----------------------------------------------------" 81 print >> sys.stderr, "------------------ Train Detector ------------------" 82 print >> sys.stderr, "----------------------------------------------------" 83 if isSingleStage: 84 detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], 85 exampleStyles["examples"], classifierParams["examples"], parse, None, task, 86 fromStep=detectorSteps["TRAIN"], workDir="training") 87 else: 88 detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], 89 exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"], 90 classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"], 91 classifierParams["recall"], processUnmerging, processModifiers, 92 doFullGrid, task, parse, None, 93 fromStep=detectorSteps["TRAIN"], workDir="training") 94 # Save the detector type 95 for model in [models["devel"], models["test"]]: 96 if os.path.exists(model): 97 model = Model(model, "a") 98 model.addStr("detector", detectorName) 99 if preprocessorParams != None: 100 preprocessor = Preprocessor() 101 model.addStr("preprocessorParams", Parameters.toString(preprocessor.getParameters(preprocessorParams))) 102 model.save() 103 model.close() 104 if selector.check("DEVEL"): 105 print >> sys.stderr, "----------------------------------------------------" 106 print >> sys.stderr, "------------ Check devel classification ------------" 107 print >> sys.stderr, "----------------------------------------------------" 108 detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel") 109 if selector.check("EMPTY"): 110 # By passing an emptied devel set through the prediction system, we can check that we get the same predictions 111 # as in the DEVEL step, ensuring the model does not use leaked information. 112 print >> sys.stderr, "----------------------------------------------------" 113 print >> sys.stderr, "------------ Empty devel classification ------------" 114 print >> sys.stderr, "----------------------------------------------------" 115 detector.classify(getEmptyCorpus(inputFiles["devel"], removeNames=removeNamesFromEmpty), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty") 116 if selector.check("TEST"): 117 print >> sys.stderr, "----------------------------------------------------" 118 print >> sys.stderr, "------------- Test set classification --------------" 119 print >> sys.stderr, "----------------------------------------------------" 120 if inputFiles["test"] == None or not os.path.exists(inputFiles["test"]): 121 print >> sys.stderr, "Skipping, test file", inputFiles["test"], "does not exist" 122 else: 123 detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files 124 detector.classify(inputFiles["test"], models["test"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test") 125 if detector.bioNLPSTParams["convert"]: 126 Utils.STFormat.Compare.compare("classification-test/test-events.tar.gz", "classification-devel/devel-events.tar.gz", "a2")
127
128 -def getSteps(step, omitSteps, mainSteps):
129 # Determine substep to start from, for the main step from which processing starts 130 step = Parameters.get(step, mainSteps) 131 fromMainStep = None 132 fromSubStep = {} # The substep to start from, for the main step to start from 133 for mainStep in step.keys(): 134 fromSubStep[mainStep] = step[mainStep] # the sub step to start from 135 if step[mainStep] != None: 136 assert fromMainStep == None # processing can start from one place only 137 fromMainStep = mainStep 138 if step[mainStep] == True: 139 fromSubStep[mainStep] = None 140 else: 141 assert type(step[mainStep]) in types.StringTypes # no list allowed, processing can start from one place only 142 # Determine steps to omit 143 omitSubSteps = {} # Skip these substeps. If the value is True, skip the entire main step. 144 omitMainSteps = [] 145 omitSteps = Parameters.get(omitSteps, mainSteps) 146 for mainStep in omitSteps.keys(): 147 omitSubSteps[mainStep] = omitSteps[mainStep] 148 if omitSteps[mainStep] == True: 149 omitMainSteps.append(mainStep) 150 omitSubSteps[mainStep] = None 151 # Initialize main step selector 152 if fromMainStep != None: 153 if fromSubStep[fromMainStep] != None: 154 print >> sys.stderr, "Starting process from step", fromMainStep + ", substep", fromSubStep[fromMainStep] 155 else: 156 print >> sys.stderr, "Starting process from step", fromMainStep 157 selector = StepSelector(mainSteps, fromStep=fromMainStep, omitSteps=omitMainSteps) 158 return selector, fromSubStep, omitSubSteps
159
160 -def getDetector(detector, model=None):
161 # Get the detector 162 if detector == None: 163 assert model != None 164 model = Model(model, "r") 165 detector = model.getStr("detector") 166 model.close() 167 if type(detector) in types.StringTypes: 168 print >> sys.stderr, "Importing detector", detector 169 detectorName = detector 170 if detector.startswith("from"): 171 exec detector 172 detector = eval(detector.split(".")[-1]) 173 else: 174 exec "from " + detector + " import " + detector.split(".")[-1] 175 detector = eval(detector.split(".")[-1]) 176 else: # assume it is a class 177 detectorName = detector.__name__ 178 print >> sys.stderr, "Using detector", detectorName 179 detector = detector 180 return detector, detectorName
181 182
183 -def workdir(path, deleteIfExists=True, copyFrom=None, log="log.txt"):
184 # When using a template, always remove existing work directory 185 if copyFrom != None: 186 deleteIfExists = True 187 # Remove existing work directory, if requested to do so 188 if os.path.exists(path) and deleteIfExists: 189 print >> sys.stderr, "Output directory exists, removing", path 190 shutil.rmtree(path) 191 # Create work directory if needed 192 if not os.path.exists(path): 193 if copyFrom == None: 194 print >> sys.stderr, "Making output directory", path 195 os.makedirs(path) 196 else: 197 print >> sys.stderr, "Copying template from", options.copyFrom, "to", path 198 shutil.copytree(options.copyFrom, path) 199 else: 200 print >> sys.stderr, "Using existing output directory", path 201 # Remember current directory and switch to workdir 202 atexit.register(os.chdir, os.getcwd()) 203 os.chdir(path) 204 # Open log (if a relative path, it goes under workdir) 205 if log != None: 206 Stream.openLog(log) 207 else: 208 print >> sys.stderr, "No logging" 209 return path
210
211 -def getTaskSettings(task, detector, processUnmerging, processModifiers, isSingleStage, 212 bioNLPSTParams, preprocessorParams, 213 inputFiles, exampleStyles, classifierParameters):
214 if task != None: 215 print >> sys.stderr, "Determining training settings for task", task 216 assert task.replace("-MINI", "") in ["GE09", "GE09.1", "GE09.2", "GE", "GE.1", "GE.2", "EPI", "ID", "BB", "BI", "BI-FULL", "CO", "REL", "REN", "DDI", "DDI-FULL"], task 217 218 fullTaskId = task 219 subTask = 2 220 if "." in task: 221 task, subTask = task.split(".") 222 subTask = int(subTask) 223 #dataPath = os.path.expanduser("~/biotext/BioNLP2011/data/main-tasks/") 224 dataPath = Settings.CORPUS_DIR 225 # Optional overrides for input files 226 #if inputFiles["devel"] == None: inputFiles["devel"] = dataPath + task + "/" + task + "-devel.xml" 227 #if inputFiles["train"] == None: inputFiles["train"] = dataPath + task + "/" + task + "-train.xml" 228 #if inputFiles["test"] == None: inputFiles["test"] = dataPath + task + "/" + task + "-test.xml" 229 if inputFiles["devel"] == None and inputFiles["devel"] != "None": 230 inputFiles["devel"] = os.path.join(dataPath, task.replace("-FULL", "") + "-devel.xml") 231 if inputFiles["train"] == None and inputFiles["train"] != "None": 232 if task == "ID": # add GE-task data to the ID training set 233 inputFiles["train"] = Catenate.catenate([os.path.join(dataPath, "ID-train.xml"), 234 os.path.join(dataPath, "GE-devel.xml"), 235 os.path.join(dataPath, "GE-train.xml")], 236 "training/ID-train-and-GE-devel-and-train.xml.gz", fast=True) 237 else: 238 inputFiles["train"] = os.path.join(dataPath, task.replace("-FULL", "") + "-train.xml") 239 if inputFiles["test"] == None and inputFiles["test"] != "None": 240 inputFiles["test"] = os.path.join(dataPath, task.replace("-FULL", "") + "-test.xml") 241 242 task = task.replace("-MINI", "") 243 # Example generation parameters 244 if detector == None: 245 detector = "Detectors.EventDetector" 246 if task == "CO": 247 detector = "Detectors.CODetector" 248 elif task in ["REN", "BI", "DDI"]: 249 detector = "Detectors.EdgeDetector" 250 isSingleStage = True 251 print >> sys.stderr, "Detector undefined, using default '" + detector + "' for task", fullTaskId 252 if bioNLPSTParams == None and task not in ["DDI", "DDI-FULL"]: 253 bioNLPSTParams = "convert:evaluate:scores" 254 if task == "BI-FULL": 255 bioNLPSTParams = "convert:scores" # the shared task evaluator is not designed for predicted entities 256 print >> sys.stderr, "BioNLP Shared Task parameters undefined, using default '" + bioNLPSTParams + "' for task", fullTaskId 257 if preprocessorParams == None: 258 preprocessorParams = ["intermediateFiles"] 259 if task in ["BI", "BI-FULL", "BB", "DDI", "DDI-FULL"]: 260 preprocessorParams += ["omitSteps=NER,DIVIDE-SETS"] 261 else: 262 preprocessorParams += ["omitSteps=DIVIDE-SETS"] 263 preprocessorParams += ["PARSE.requireEntities"] # parse only sentences where BANNER found an entity 264 preprocessorParams = ":".join(preprocessorParams) 265 print >> sys.stderr, "Preprocessor parameters undefined, using default '" + preprocessorParams + "' for task", fullTaskId 266 if processUnmerging == None and not isSingleStage: 267 processUnmerging = True 268 if task in ["CO", "REL", "BB", "BI-FULL", "DDI-FULL"]: 269 processUnmerging = False 270 print >> sys.stderr, "Unmerging undefined, using default", processUnmerging, "for task", fullTaskId 271 if processModifiers == None: 272 processModifiers = False 273 if task in ["GE", "EPI", "ID"]: 274 processModifiers = True 275 print >> sys.stderr, "Modifier prediction undefined, using default", processModifiers, " for task", fullTaskId 276 if exampleStyles["examples"] == None and isSingleStage: 277 if task == "REN": 278 exampleStyles["examples"] = "trigger_features:typed:no_linear:entities:noMasking:maxFeatures:bacteria_renaming:maskTypeAsProtein=Gene" 279 elif task == "BI": 280 exampleStyles["examples"] = "trigger_features:typed:directed:no_linear:entities:noMasking:maxFeatures:bi_limits" 281 elif task == "DDI": 282 exampleStyles["examples"] = "trigger_features:typed:no_linear:entities:noMasking:maxFeatures:ddi_features:ddi_mtmx:filter_shortest_path=conj_and" 283 print >> sys.stderr, "Single-stage examples style undefined, using default '" + exampleStyles["examples"] + "' for task", fullTaskId 284 if exampleStyles["edge"] == None and not isSingleStage: 285 print >> sys.stderr, "Edge example style undefined, using default for task", fullTaskId 286 if task in ["GE09", "GE"]: 287 exampleStyles["edge"]="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures" #,multipath" 288 if subTask == 1: 289 exampleStyles["edge"] += ":genia_task1" 290 elif task in ["BB"]: 291 exampleStyles["edge"]="trigger_features:typed:directed:no_linear:entities:bb_limits:noMasking:maxFeatures" 292 elif task == "EPI": 293 exampleStyles["edge"]="trigger_features:typed:directed:no_linear:entities:epi_limits:noMasking:maxFeatures" 294 elif task == "ID": 295 exampleStyles["edge"]="trigger_features:typed:directed:no_linear:entities:id_limits:noMasking:maxFeatures" 296 elif task == "REL": 297 exampleStyles["edge"]="trigger_features:typed:directed:no_linear:entities:noMasking:maxFeatures:rel_limits:rel_features" 298 elif task == "CO": 299 exampleStyles["edge"]="trigger_features:typed:directed:no_linear:entities:noMasking:maxFeatures:co_limits" 300 elif task == "BI-FULL": 301 exampleStyles["edge"] = "trigger_features:typed:directed:no_linear:entities:noMasking:maxFeatures:bi_limits" 302 elif task == "DDI-FULL": 303 exampleStyles["edge"] = "trigger_features:typed:no_linear:entities:noMasking:maxFeatures:ddi_features:filter_shortest_path=conj_and" 304 else: 305 exampleStyles["edge"]="trigger_features:typed:directed:no_linear:entities:noMasking:maxFeatures" 306 if exampleStyles["trigger"] == None and not isSingleStage: 307 print >> sys.stderr, "Trigger example style undefined, using default for task", fullTaskId 308 if task in ["GE09", "GE"] and subTask == 1: 309 exampleStyles["trigger"] = "genia_task1" 310 elif task == "EPI": 311 exampleStyles["trigger"] = "epi_merge_negated" 312 elif task == "BB": 313 exampleStyles["trigger"] = "bb_features:build_for_nameless:wordnet" 314 elif task == "REL": 315 exampleStyles["trigger"] = "rel_features" 316 elif task == "CO": 317 options.triggerExampleBuilder = "PhraseTriggerExampleBuilder" 318 elif task in ["BI-FULL", "DDI-FULL"]: 319 exampleStyles["trigger"] = "build_for_nameless:names" 320 if exampleStyles["unmerging"] == None and not isSingleStage: 321 exampleStyles["unmerging"] = "trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures" 322 #if task == "ID": # Do not use catenated GE for unmerging examples 323 # exampleStyles["unmerging"] += ":sentenceLimit=id.ID" 324 # Classifier parameters 325 if classifierParameters["examples"] == None and isSingleStage: 326 print >> sys.stderr, "Classifier parameters for single-stage examples undefined, using default for task", fullTaskId 327 if task == "REN": 328 classifierParameters["examples"] = "10,100,1000,2000,3000,4000,4500,5000,5500,6000,7500,10000,20000,25000,28000,50000,60000" 329 elif task == "BI": 330 classifierParameters["examples"] = "10,100,1000,2500,5000,7500,10000,20000,25000,28000,50000,60000,65000,80000,100000,150000" 331 elif task == "DDI": 332 classifierParameters["examples"] = "c=10,100,1000,2500,4000,5000,6000,7500,10000,20000,25000,50000:TEES.threshold" 333 if classifierParameters["trigger"] == None and not isSingleStage: 334 print >> sys.stderr, "Classifier parameters for trigger examples undefined, using default for task", fullTaskId 335 classifierParameters["trigger"] = "1000,5000,10000,20000,50000,80000,100000,150000,180000,200000,250000,300000,350000,500000,1000000" 336 if classifierParameters["recall"] == None and not isSingleStage: 337 print >> sys.stderr, "Recall adjust parameter undefined, using default for task", fullTaskId 338 classifierParameters["recall"] = "0.5,0.6,0.65,0.7,0.85,1.0,1.1,1.2" 339 if task == "CO": 340 classifierParameters["recall"] = "0.8,0.9,0.95,1.0" 341 if classifierParameters["edge"] == None and not isSingleStage: 342 print >> sys.stderr, "Classifier parameters for edge examples undefined, using default for task", fullTaskId 343 classifierParameters["edge"] = "5000,7500,10000,20000,25000,27500,28000,29000,30000,35000,40000,50000,60000,65000" 344 if task in ["REL", "CO"]: 345 classifierParameters["edge"] = "10,100,1000,5000,7500,10000,20000,25000,28000,50000,60000,65000,100000,500000,1000000" 346 if classifierParameters["unmerging"] == None and not isSingleStage: 347 print >> sys.stderr, "Classifier parameters for unmerging examples undefined, using default for task", fullTaskId 348 classifierParameters["unmerging"] = "1,10,100,500,1000,1500,2500,5000,10000,20000,50000,80000,100000" 349 if classifierParameters["modifiers"] == None and not isSingleStage: 350 print >> sys.stderr, "Classifier parameters for modifier examples undefined, using default for task", fullTaskId 351 classifierParameters["modifiers"] = "5000,10000,20000,50000,100000" 352 353 if isSingleStage and exampleStyles["examples"] != None and "names" in exampleStyles["examples"]: 354 removeNamesFromEmpty = True 355 elif (not isSingleStage) and exampleStyles["trigger"] != None and "names" in exampleStyles["trigger"]: 356 removeNamesFromEmpty = True 357 else: 358 removeNamesFromEmpty = False 359 return detector, processUnmerging, processModifiers, isSingleStage, bioNLPSTParams, preprocessorParams, exampleStyles, classifierParameters, removeNamesFromEmpty
360
361 -def getDefinedBool(string):
362 if string in (True, False): # already defined 363 return string 364 assert string in (None, "True", "False") # undefined or needs to be converted to bool 365 if string == None: 366 return None 367 elif string == "True": 368 return True 369 else: 370 return False
371
372 -def getDefinedBoolOption(option, opt, value, parser):
373 if value == None: 374 setattr(parser.values, option.dest, True) 375 else: 376 setattr(parser.values, option.dest, getDefinedBool(value))
377 378 if __name__=="__main__": 379 # Import Psyco if available 380 try: 381 import psyco 382 psyco.full() 383 print >> sys.stderr, "Found Psyco, using" 384 except ImportError: 385 print >> sys.stderr, "Psyco not installed" 386 387 from optparse import OptionParser, OptionGroup 388 optparser = OptionParser(description="Train a new event/relation extraction model") 389 # main options 390 group = OptionGroup(optparser, "Main Options", "") 391 group.add_option("-t", "--task", default=None, dest="task", help="task number") 392 group.add_option("-p", "--parse", default="McCC", dest="parse", help="Parse XML element name") 393 group.add_option("-c", "--connection", default=None, dest="connection", help="") 394 optparser.add_option_group(group) 395 # input 396 group = OptionGroup(optparser, "Input Files", "If these are undefined, a task (-t) specific corpus file will be used") 397 group.add_option("--trainFile", default=None, dest="trainFile", help="") 398 group.add_option("--develFile", default=None, dest="develFile", help="") 399 group.add_option("--testFile", default=None, dest="testFile", help="") 400 optparser.add_option_group(group) 401 # output 402 group = OptionGroup(optparser, "Output Files", "Files created from training the detector") 403 group.add_option("-o", "--output", default=None, dest="output", help="Output directory for intermediate files") 404 group.add_option("--develModel", default="model-devel", dest="develModel", help="Model trained on 'trainFile', with parameters optimized on 'develFile'") 405 group.add_option("--testModel", default="model-test", dest="testModel", help="Model trained on 'trainFile'+'develFile', with parameters from 'develModel'") 406 optparser.add_option_group(group) 407 # Example builders 408 group = OptionGroup(optparser, "Detector to train", "") 409 group.add_option("--detector", default=None, dest="detector", help="the detector class to use") 410 group.add_option("--singleStage", default=False, action="store_true", dest="singleStage", help="'detector' is a single stage detector") 411 group.add_option("--noBioNLPSTFormat", default=False, action="store_true", dest="noBioNLPSTFormat", help="Do not output BioNLP Shared Task format version (a1, a2, txt)") 412 group.add_option("--bioNLPSTParams", default=None, dest="bioNLPSTParams", help="") 413 group.add_option("--preprocessorParams", default=None, dest="preprocessorParams", help="") 414 optparser.add_option_group(group) 415 # Example builder parameters 416 event = OptionGroup(optparser, "Event Detector Options (used when not using '--singleStage')", "") 417 single = OptionGroup(optparser, "Single Stage Detector Options (used when using '--singleStage')", "") 418 single.add_option("--exampleStyle", default=None, dest="exampleStyle", help="Single-stage detector example style") 419 event.add_option("-u", "--unmerging", default=None, action="callback", callback=getDefinedBoolOption, dest="unmerging", help="SVM unmerging") 420 event.add_option("-m", "--modifiers", default=None, action="callback", callback=getDefinedBoolOption, dest="modifiers", help="Train model for modifier detection") 421 event.add_option("--triggerStyle", default=None, dest="triggerStyle", help="Event detector trigger example style") 422 event.add_option("--edgeStyle", default=None, dest="edgeStyle", help="Event detector edge example style") 423 event.add_option("--unmergingStyle", default=None, dest="unmergingStyle", help="Event detector unmerging example style") 424 event.add_option("--modifierStyle", default=None, dest="modifierStyle", help="Event detector modifier example style") 425 # Classifier parameters 426 single.add_option("-e", "--exampleParams", default=None, dest="exampleParams", help="Single-stage detector parameters") 427 event.add_option("-r", "--triggerParams", default=None, dest="triggerParams", help="Trigger detector c-parameter values") 428 event.add_option("-a", "--recallAdjustParams", default=None, dest="recallAdjustParams", help="Recall adjuster parameter values") 429 event.add_option("-d", "--edgeParams", default=None, dest="edgeParams", help="Edge detector c-parameter values") 430 event.add_option("-n", "--unmergingParams", default=None, dest="unmergingParams", help="Unmerging c-parameter values") 431 event.add_option("-f", "--modifierParams", default=None, dest="modifierParams", help="Modifier c-parameter values") 432 event.add_option("--fullGrid", default=False, action="store_true", dest="fullGrid", help="Full grid search for parameters") 433 optparser.add_option_group(single) 434 optparser.add_option_group(event) 435 # Debugging and process control 436 debug = OptionGroup(optparser, "Debug and Process Control Options", "") 437 debug.add_option("--step", default=None, dest="step", help="Step to start processing from, with optional substep (STEP=SUBSTEP). Step values are TRAIN, DEVEL, EMPTY and TEST.") 438 debug.add_option("--omitSteps", default=None, dest="omitSteps", help="") 439 debug.add_option("--copyFrom", default=None, dest="copyFrom", help="Copy this directory as template") 440 debug.add_option("--log", default="log.txt", dest="log", help="Log file name") 441 debug.add_option("--noLog", default=False, action="store_true", dest="noLog", help="Do not keep a log file") 442 debug.add_option("--clearAll", default=False, action="store_true", dest="clearAll", help="Delete all files") 443 debug.add_option("--debug", default=False, action="store_true", dest="debug", help="More verbose output") 444 optparser.add_option_group(debug) 445 (options, args) = optparser.parse_args() 446 447 assert options.output != None 448 if options.noLog: options.log = None 449 train(options.output, options.task, options.detector, 450 inputFiles={"devel":options.develFile, "train":options.trainFile, "test":options.testFile}, 451 models={"devel":options.develModel, "test":options.testModel}, parse=options.parse, 452 processUnmerging=options.unmerging, processModifiers=options.modifiers, isSingleStage=options.singleStage, 453 bioNLPSTParams=options.bioNLPSTParams, preprocessorParams=options.preprocessorParams, 454 exampleStyles={"examples":options.exampleStyle, "trigger":options.triggerStyle, "edge":options.edgeStyle, "unmerging":options.unmergingStyle, "modifiers":options.modifierStyle}, 455 classifierParams={"examples":options.exampleParams, "trigger":options.triggerParams, "recall":options.recallAdjustParams, "edge":options.edgeParams, "unmerging":options.unmergingParams, "modifiers":options.modifierParams}, 456 doFullGrid=options.fullGrid, deleteOutput=options.clearAll, copyFrom=options.copyFrom, 457 log=options.log, step=options.step, omitSteps=options.omitSteps, debug=options.debug, connection=options.connection) 458