Package TEES :: Module configure
[hide private]

Source Code for Module TEES.configure

  1  """ 
  2  Configure TEES by installing data files and external components. 
  3  """ 
  4   
  5  import sys, os, shutil 
  6  import textwrap 
  7  from Utils.Menu import * 
  8  import Utils.Settings as Settings 
  9  # Classifier wrapper 
 10  import Classifiers.SVMMultiClassClassifier 
 11  # External tools wrappers 
 12  import Tools.GeniaSentenceSplitter 
 13  import Tools.BANNER 
 14  import Tools.BLLIPParser 
 15  import Tools.StanfordParser 
 16  # Corpora 
 17  import Utils.Convert.convertBioNLP as convertBioNLP 
 18  import Utils.Download 
 19  # TODO: Logging 
 20   
21 -def pathMenuInitializer(menu, prevMenu):
22 nextMenus = [] 23 if prevMenu.optDict["1"].toggle: 24 nextMenus.append("Classifier") 25 if prevMenu.optDict["2"].toggle: 26 nextMenus.append("Models") 27 if prevMenu.optDict["3"].toggle: 28 nextMenus.append("Corpora") 29 if prevMenu.optDict["4"].toggle: 30 nextMenus.append("Tools") 31 menu.optDict["c"].nextMenu = nextMenus 32 33 menu.text = """ 34 1. By default, all data and tools will be installed to one directory, the DATAPATH. 35 You can later set the installation directory individually for each component, or 36 you can change the default path now. 37 38 """ 39 if menu.defaultInstallDir == None: 40 if menu.system.progArgs["installDir"] != None: 41 menu.defaultInstallDir = menu.system.progArgs["installDir"] 42 if menu.system.progArgs["clearInstallDir"] and os.path.exists(menu.system.progArgs["installDir"]): 43 shutil.rmtree(menu.system.progArgs["installDir"]) 44 menu.text += "\nUsing the DATAPATH path from configure.py command line options.\n\n" 45 elif menu.system.progArgs["localSettings"] != None: 46 os.environ["TEES_SETTINGS"] = os.path.abspath(menu.system.progArgs["localSettings"]) 47 reload(Settings) 48 menu.defaultInstallDir = Settings.DATAPATH 49 elif "TEES_SETTINGS" in os.environ: 50 menu.defaultInstallDir = Settings.DATAPATH 51 else: 52 menu.defaultInstallDir = os.path.expanduser("~/.tees") 53 elif os.path.exists(menu.defaultInstallDir): 54 if not os.path.isdir(menu.defaultInstallDir): 55 menu.text += "WARNING! The DATAPATH directory is not a directory.\n\n" 56 else: 57 try: 58 os.makedirs(menu.defaultInstallDir) 59 except: 60 menu.text += "WARNING! Could not create DATAPATH.\n\n" 61 #menu.text += "DATAPATH = " + menu.defaultInstallDir + "\n" 62 63 menu.text += """ 64 2. TEES reads its configuration from a file defined by the environment 65 variable "TEES_SETTINGS". This environment variable must be set, and 66 point to a configuration file for TEES to work. By editing this 67 configuration file you can configure TEES in addition (or instead of) 68 using this configuration program. 69 """ 70 if menu.configFilePath == None: 71 if menu.system.progArgs["localSettings"] != None: 72 menu.configFilePath = menu.system.progArgs["localSettings"] 73 menu.text += "\nUsing the TEES_SETTINGS path from configure.py command line options.\n\n" 74 elif "TEES_SETTINGS" in os.environ: 75 menu.configFilePath = os.environ["TEES_SETTINGS"] 76 menu.text += """ 77 The "TEES_SETTINGS" environment variable is already set. If the configuration file 78 exists, this installation program will use it and by default install only missing components. 79 """ 80 else: 81 menu.configFilePath = os.path.expanduser("~/.tees_local_settings.py") 82 if os.path.exists(menu.configFilePath): 83 menu.text += """ 84 The "TEES_SETTINGS" environment variable is not set, but a configuration file has been 85 found in the default location. This installation program will use the existing 86 file, and by default install only missing components. 87 """ 88 else: 89 menu.text += """ 90 The "TEES_SETTINGS" environment variable is not set, so a new local configuration file 91 will be created. 92 """ 93 #menu.text += "TEES_SETTINGS = " + menu.configFilePath + "\n\n" 94 menu.system.setAttr("defaultInstallDir", menu.defaultInstallDir) 95 Settings.DATAPATH = menu.defaultInstallDir 96 menu.system.setAttr("configFilePath", menu.configFilePath) 97 os.environ["TEES_SETTINGS"] = menu.configFilePath 98 setClosingMessage(menu.system, menu.configFilePath) 99 menu.optDict["c"].handlerArgs = [menu.configFilePath]
100
101 -def setClosingMessage(menuSystem, configFilePath):
102 menuSystem.closingMessage = "!!!!!!!!!!!!!!!!!!!!!! Important Note !!!!!!!!!!!!!!!!!!!!!!\n" 103 menuSystem.closingMessage += "Before using TEES, remember to define the TEES_SETTINGS\n" 104 menuSystem.closingMessage += "environment variable. How to do this depends on your shell,\n" 105 menuSystem.closingMessage += "some common commands are:\n\n" 106 menuSystem.closingMessage += "bash: 'export TEES_SETTINGS=" + configFilePath + "'\n" 107 menuSystem.closingMessage += "tcsh: 'setenv TEES_SETTINGS " + configFilePath + "'\n"
108
109 -def initLocalSettings(filename):
110 assert Menu.system.defaultInstallDir != None 111 if os.path.exists(filename): 112 print >> sys.stderr, "Using existing local settings file", filename 113 return 114 print >> sys.stderr, "Initializing local settings file", filename 115 f = open(filename, "wt") 116 f.write(""" 117 # Edit these settings to configure TEES. A variable must have a value 118 # other than None for it to be usable. This file is interpreted as 119 # a Python module, so Python code can be used. 120 121 # Tools 122 SVM_MULTICLASS_DIR = None # svm_multiclass_learn and svm_multiclass_classify directory 123 BANNER_DIR = None # BANNER program directory 124 GENIA_SENTENCE_SPLITTER_DIR = None # GENIA Sentence Splitter directory 125 RUBY_PATH = "ruby" # Command to run Ruby (used only by the GENIA Sentence Splitter) 126 BLLIP_PARSER_DIR = None # The BLLIP parser directory 127 MCCLOSKY_BIOPARSINGMODEL_DIR = None # The McClosky BioModel directory 128 STANFORD_PARSER_DIR = None # The Stanford parser directory 129 130 # Data 131 DATAPATH = 'DATAPATH_VALUE' # Main directory for datafiles 132 CORPUS_DIR = None # Directory for the corpus XML-files 133 TEES_MODEL_DIR = None # Directory for the official TEES models 134 """.replace(" ", "").replace("DATAPATH_VALUE", Menu.system.defaultInstallDir)) 135 f.close() 136 # Reset local settings 137 os.environ["TEES_SETTINGS"] = filename 138 reload(Settings)
139
140 -def checkInstallPath(menu, setting, defaultInstallKey="i", defaultSkipKey="s"):
141 #if getattr(menu, menuVariable) == None: 142 # setattr(menu, menuVariable, menu.system.defaultInstallDir + "/" + installSubDir) 143 if hasattr(Settings, setting) and getattr(Settings, setting) != None: 144 menu.text += "The " + setting + " setting is already configured, so the default option is to skip installing.\n\n" 145 menu.text += setting + "=" + getattr(Settings, setting) 146 menu.setDefault(defaultSkipKey) 147 return False 148 else: 149 menu.setDefault(defaultInstallKey) 150 return True
151
152 -def checkCorpusInstall(corpus, dataSets=("-train.xml", "-devel.xml", "-test.xml")):
153 # CORPUS_DIR is set, so check if the corpus is installed 154 allFound = True # check for all corpus subsets 155 for dataSet in dataSets: 156 filePath = Settings.CORPUS_DIR + "/" + corpus + dataSet 157 if not os.path.exists(filePath): 158 #print >> sys.stderr, "Corpus file", filePath, "is not installed" 159 allFound = False 160 if allFound: # if corpus files are present, installing this corpora can be skipped 161 return True 162 else: # if a corpus file is missing, mark it to be installed 163 return False
164
165 -def svmMenuInitializer(menu, prevMenu):
166 menu.text = """ 167 TEES uses the SVM Multiclass classifer by Thorsten Joachims for all 168 classification tasks. You can optionally choose to compile it from 169 source if the precompiled Linux-binary does not work on your system. 170 """ 171 checkInstallPath(menu, "SVM_MULTICLASS_DIR") 172 if hasattr(Settings, "SVM_MULTICLASS_DIR") and getattr(Settings, "SVM_MULTICLASS_DIR") != None: 173 menu.setDefault("s") 174 svmInstallDir = Settings.SVM_MULTICLASS_DIR 175 else: 176 menu.setDefault("i") 177 svmInstallDir = None 178 menu.optDict["i"].handlerArgs = [None, os.path.join(menu.system.defaultInstallDir, "tools/download"), True, menu.optDict["1"].toggle, True]
179
180 -def toolsMenuInitializer(menu, prevMenu):
181 # Java path for ANT 182 #if getattr(menu, "javaHome") == None: 183 # if "JAVA_HOME" in os.environ: 184 # setattr(menu, "javaHome", os.environ("JAVA_HOME")) 185 # else: 186 # setattr(menu, "javaHome", "") 187 # Tool initializers 188 handlers = [] 189 handlerArgs = [] 190 redownload = menu.optDict["1"].toggle 191 if menu.optDict["2"].toggle or (menu != prevMenu and checkInstallPath(menu, "GENIA_SENTENCE_SPLITTER_DIR")): 192 menu.optDict["2"].toggle = True 193 handlers.append(Tools.GeniaSentenceSplitter.install) 194 handlerArgs.append([None, None, redownload, True]) 195 if menu.optDict["3"].toggle or (menu != prevMenu and checkInstallPath(menu, "BANNER_DIR")): 196 menu.optDict["3"].toggle = True 197 handlers.append(Tools.BANNER.install) 198 handlerArgs.append([None, None, redownload, False, None, True]) 199 if menu.optDict["4"].toggle or (menu != prevMenu and checkInstallPath(menu, "BLLIP_PARSER_DIR")): 200 menu.optDict["4"].toggle = True 201 handlers.append(Tools.BLLIPParser.install) 202 handlerArgs.append([None, None, redownload, True]) 203 if menu.optDict["5"].toggle or (menu != prevMenu and checkInstallPath(menu, "STANFORD_PARSER_DIR")): 204 menu.optDict["5"].toggle = True 205 handlers.append(Tools.StanfordParser.install) 206 handlerArgs.append([None, None, redownload, True]) 207 menu.optDict["i"].handler = handlers 208 menu.optDict["i"].handlerArgs = handlerArgs
209
210 -def modelsMenuInitializer(menu, prevMenu):
211 menu.text = """ 212 TEES models are used for predicting events or relations using 213 classify.py. Models are provided for all tasks in the BioNLP'11, 214 BioNLP'09 and DDI'11 shared tasks. 215 216 For a list of models and instructions for using them see 217 https://github.com/jbjorne/TEES/wiki/Classifying. 218 """ 219 # Mark "skip" as default option, this will be re-marked if there is no model directory 220 if menu != prevMenu: 221 menu.setDefault("s") 222 redownload = menu.optDict["1"].toggle 223 destPath = os.path.join(menu.system.defaultInstallDir, "models") 224 downloadPath = os.path.join(menu.system.defaultInstallDir, "models/download") 225 # If MODEL_DIR setting is not set set it now 226 if menu != prevMenu and (not hasattr(Settings, "MODEL_DIR") or Settings.MODEL_DIR == None or not os.path.exists(Settings.MODEL_DIR)): 227 menu.setDefault("i") 228 menu.optDict["i"].handler = [Utils.Download.downloadAndExtract, Settings.setLocal] 229 menu.optDict["i"].handlerArgs = [[Settings.URL["MODELS"], destPath, downloadPath, None, True, redownload], ["MODEL_DIR", destPath]]
230
231 -def corpusMenuInitializer(menu, prevMenu):
232 menu.text = """ 233 The corpora are used for training new models and testing existing 234 models. The corpora installable here are from the two BioNLP Shared 235 Tasks (BioNLP'09 and BioNLP'11) on Event Extraction (organized by 236 University of Tokyo), and the First Challenge Task: Drug-Drug Interaction 237 Extraction (DDI'11, organized by Universidad Carlos III de Madrid). 238 239 The corpora are downloaded as interaction XML files, generated from the 240 original Shared Task files. If you need to convert the corpora from 241 the original files, you can use the convertBioNLP.py and convertDDI.py programs 242 located at Utils/Convert. 243 244 It is also recommended to download the official BioNLP Shared Task evaluator 245 programs, which will be used by TEES when training or testing on those corpora. 246 """ 247 # Mark "skip" as default option, this will be re-marked as install if a corpus is missing 248 menu.setDefault("s") 249 # If CORPUS_DIR setting is not set set it now 250 if not hasattr(Settings, "CORPUS_DIR") or getattr(Settings, "CORPUS_DIR") == None: 251 Settings.setLocal("CORPUS_DIR", os.path.join(menu.system.defaultInstallDir, "corpora")) 252 print >> sys.stderr 253 # Initialize handlers 254 handlers = [] 255 handlerArgs = [] 256 corpusInstallPath = os.path.join(menu.system.defaultInstallDir, "corpora") 257 corpusDownloadPath = os.path.join(menu.system.defaultInstallDir, "corpora/download") 258 # Check which corpora need to be installed 259 redownload = menu.optDict["1"].toggle 260 for corpus in ["GE", "EPI", "ID", "BB", "BI", "CO", "REL", "REN"]: 261 if menu.optDict["2"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)): 262 menu.setDefault("i") 263 menu.optDict["2"].toggle = True 264 handlers.append(convertBioNLP.installPreconverted) 265 handlerArgs.append(["BIONLP_11_CORPORA", corpusInstallPath, corpusDownloadPath, redownload, True]) 266 break 267 if menu.optDict["3"].toggle or (menu != prevMenu and not checkCorpusInstall("GE09")): 268 menu.setDefault("i") 269 menu.optDict["3"].toggle = True 270 handlers.append(convertBioNLP.installPreconverted) 271 handlerArgs.append(["BIONLP_09_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) 272 if menu.optDict["4"].toggle or (menu != prevMenu and not checkCorpusInstall("DDI", ("-train.xml", "-devel.xml"))): 273 menu.setDefault("i") 274 menu.optDict["4"].toggle = True 275 handlers.append(convertBioNLP.installPreconverted) 276 handlerArgs.append(["DDI_11_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) 277 # A handler for installing BioNLP'11 evaluators 278 evaluatorInstallPath = os.path.join(menu.system.defaultInstallDir, "tools/evaluators") 279 evaluatorDownloadPath = os.path.join(menu.system.defaultInstallDir, "tools/download") 280 if menu.optDict["5"].toggle or (menu != prevMenu and (not hasattr(Settings, "BIONLP_EVALUATOR_DIR") or getattr(Settings, "BIONLP_EVALUATOR_DIR") == None)): 281 menu.setDefault("i") 282 menu.optDict["5"].toggle = True 283 handlers.append(convertBioNLP.installEvaluators) 284 handlerArgs.append([evaluatorInstallPath, evaluatorDownloadPath, redownload, True]) 285 # Add the handlers to install option 286 menu.optDict["i"].handler = handlers 287 menu.optDict["i"].handlerArgs = handlerArgs
288
289 -def buildMenus():
290 Menu("Classifier", None, [ 291 Option("1", "Compile from source", toggle=False), 292 Option("i", "Install", handler=Classifiers.SVMMultiClassClassifier.install), 293 Option("s", "Skip")], 294 svmMenuInitializer) 295 296 Menu("Install Directory", None, [ 297 Option("1", "Change DATAPATH", dataInput="defaultInstallDir"), 298 Option("2", "Change TEES_SETTINGS", dataInput="configFilePath"), 299 Option("c", "Continue", "Classifier", isDefault=True, handler=initLocalSettings)], 300 pathMenuInitializer) 301 302 Menu("Configure TEES", 303 """ 304 Welcome to using the Turku Event Extraction System (TEES)! In order to work, TEES 305 depends on a number of other programs, which have to be set up before use. 306 307 The classifier (1) is required for all uses of the system. The models (2) are 308 required for predicting events and together with the preprocessing tools (4) 309 can be used on any unprocessed text. The corpora (3) are used for testing the 310 performance of a model or for training a new model. 311 312 If you are unsure which components you need, just install everything (the default choice). 313 You can also rerun configure.py at any time later to install missing components. 314 315 To make a choice, type the option's key and press enter, or just press enter for the 316 default option. The '*' sign indicates the default option and brackets a selectable one. 317 """, 318 [ 319 Option("1", "Install classifier (SVM Multiclass)", toggle=True), 320 Option("2", "Install models (TEES models for BioNLP'11, BioNLP'09 and DDI'11)", toggle=True), 321 Option("3", "Install corpora (BioNLP'11, BioNLP'09 and DDI'11)", toggle=True), 322 Option("4", "Install preprocessing tools (BANNER, BLLIP parser etc)", toggle=True), 323 Option("c", "Continue and install selected items", "Install Directory", isDefault=True), 324 Option("q", "Quit", handler=sys.exit), 325 ]) 326 327 Menu("Models", "Install TEES models\n", [ 328 Option("1", "Redownload already downloaded files", toggle=False), 329 Option.SPACE, 330 Option("i", "Install", isDefault=True), 331 Option("s", "Skip")], 332 modelsMenuInitializer) 333 334 Menu("Corpora", "Install corpora\n", [ 335 Option("1", "Redownload already downloaded files", toggle=False), 336 Option.SPACE, 337 Option("2", "Install BioNLP'11 corpora", toggle=False), 338 Option("3", "Install BioNLP'09 (GENIA) corpus", toggle=False), 339 Option("4", "Install DDI'11 (Drug-Drug Interactions) corpus", toggle=False), 340 Option.SPACE, 341 Option("5", "Install BioNLP'11 evaluators", toggle=False), 342 Option.SPACE, 343 Option("i", "Install", isDefault=True), 344 Option("s", "Skip")], 345 corpusMenuInitializer) 346 347 Menu("Tools", 348 """ 349 The tools are required for processing unannotated text and can 350 be used as part of TEES, or independently through their wrappers. For 351 information and usage conditions, see https://github.com/jbjorne/TEES/wiki/Licenses. 352 Some of the tools need to be compiled from source, this will take a while. 353 354 The external tools used by TEES are: 355 356 The GENIA Sentence Splitter of Tokyo University (Tsuruoka Y. et. al.) 357 358 The BANNER named entity recognizer by Robert Leaman et. al. 359 360 The BLLIP parser of Brown University (Charniak E., Johnson M. et. al.) 361 362 The Stanford Parser of the Stanford Natural Language Processing Group 363 """, 364 [ 365 Option("1", "Redownload already downloaded files", toggle=False), 366 Option.SPACE, 367 Option("2", "Install GENIA Sentence Splitter", toggle=False), 368 Option("3", "Install BANNER named entity recognizer", toggle=False), 369 Option("4", "Install BLLIP parser", toggle=False), 370 Option("5", "Install Stanford Parser", toggle=False), 371 Option.SPACE, 372 Option("i", "Install", isDefault=True), 373 Option("s", "Skip")], 374 toolsMenuInitializer) 375 376 return "Configure TEES"
377
378 -def configure(installDir=None, localSettings=None, auto=False, width=80, clear=False, onError="ASK"):
379 Menu.system.width = width 380 Menu.system.progArgs = {} 381 Menu.system.progArgs["installDir"] = installDir 382 Menu.system.progArgs["localSettings"] = localSettings 383 Menu.system.progArgs["clearInstallDir"] = clear 384 Menu.system.onException = onError 385 Menu.system.run(buildMenus()) 386 Menu.system.finalize()
387 388 if __name__=="__main__": 389 import sys 390 391 from optparse import OptionParser 392 # Import Psyco if available 393 try: 394 import psyco 395 psyco.full() 396 print >> sys.stderr, "Found Psyco, using" 397 except ImportError: 398 pass 399 400 optparser = OptionParser(usage="%prog [options]\nConfigure TEES") 401 optparser.add_option("-i", "--installDir", default=None, dest="installDir", help="", metavar="FILE") 402 optparser.add_option("-l", "--localSettings", default=None, dest="localSettings", help="", metavar="FILE") 403 optparser.add_option("-w", "--width", default=80, type="int", dest="width", help="") 404 optparser.add_option("--auto", default=False, action="store_true", dest="auto", help="") 405 optparser.add_option("--clearInstallDir", default=False, action="store_true", dest="clearInstallDir", help="") 406 optparser.add_option("--onError", default="ASK", dest="onError", help="ASK, IGNORE or EXIT") 407 (options, args) = optparser.parse_args() 408 assert options.onError in ["ASK", "IGNORE", "EXIT"] 409 410 configure(options.installDir, options.localSettings, options.auto, options.width, options.clearInstallDir, options.onError) 411 # Menu.system.width = options.width 412 # Menu.system.progArgs = {} 413 # Menu.system.progArgs["installDir"] = options.installDir 414 # Menu.system.progArgs["localSettings"] = options.localSettings 415 # Menu.system.progArgs["clearInstallDir"] = options.clearInstallDir 416 # Menu.system.run(buildMenus()) 417