1 """
2 Configure TEES by installing data files and external components.
3 """
4
5 import sys, os, shutil
6 import textwrap
7 from Utils.Menu import *
8 import Utils.Settings as Settings
9
10 import Classifiers.SVMMultiClassClassifier
11
12 import Tools.GeniaSentenceSplitter
13 import Tools.BANNER
14 import Tools.BLLIPParser
15 import Tools.StanfordParser
16
17 import Utils.Convert.convertBioNLP as convertBioNLP
18 import Utils.Download
19
20
22 nextMenus = []
23 if prevMenu.optDict["1"].toggle:
24 nextMenus.append("Classifier")
25 if prevMenu.optDict["2"].toggle:
26 nextMenus.append("Models")
27 if prevMenu.optDict["3"].toggle:
28 nextMenus.append("Corpora")
29 if prevMenu.optDict["4"].toggle:
30 nextMenus.append("Tools")
31 menu.optDict["c"].nextMenu = nextMenus
32
33 menu.text = """
34 1. By default, all data and tools will be installed to one directory, the DATAPATH.
35 You can later set the installation directory individually for each component, or
36 you can change the default path now.
37
38 """
39 if menu.defaultInstallDir == None:
40 if menu.system.progArgs["installDir"] != None:
41 menu.defaultInstallDir = menu.system.progArgs["installDir"]
42 if menu.system.progArgs["clearInstallDir"] and os.path.exists(menu.system.progArgs["installDir"]):
43 shutil.rmtree(menu.system.progArgs["installDir"])
44 menu.text += "\nUsing the DATAPATH path from configure.py command line options.\n\n"
45 elif menu.system.progArgs["localSettings"] != None:
46 os.environ["TEES_SETTINGS"] = os.path.abspath(menu.system.progArgs["localSettings"])
47 reload(Settings)
48 menu.defaultInstallDir = Settings.DATAPATH
49 elif "TEES_SETTINGS" in os.environ:
50 menu.defaultInstallDir = Settings.DATAPATH
51 else:
52 menu.defaultInstallDir = os.path.expanduser("~/.tees")
53 elif os.path.exists(menu.defaultInstallDir):
54 if not os.path.isdir(menu.defaultInstallDir):
55 menu.text += "WARNING! The DATAPATH directory is not a directory.\n\n"
56 else:
57 try:
58 os.makedirs(menu.defaultInstallDir)
59 except:
60 menu.text += "WARNING! Could not create DATAPATH.\n\n"
61
62
63 menu.text += """
64 2. TEES reads its configuration from a file defined by the environment
65 variable "TEES_SETTINGS". This environment variable must be set, and
66 point to a configuration file for TEES to work. By editing this
67 configuration file you can configure TEES in addition (or instead of)
68 using this configuration program.
69 """
70 if menu.configFilePath == None:
71 if menu.system.progArgs["localSettings"] != None:
72 menu.configFilePath = menu.system.progArgs["localSettings"]
73 menu.text += "\nUsing the TEES_SETTINGS path from configure.py command line options.\n\n"
74 elif "TEES_SETTINGS" in os.environ:
75 menu.configFilePath = os.environ["TEES_SETTINGS"]
76 menu.text += """
77 The "TEES_SETTINGS" environment variable is already set. If the configuration file
78 exists, this installation program will use it and by default install only missing components.
79 """
80 else:
81 menu.configFilePath = os.path.expanduser("~/.tees_local_settings.py")
82 if os.path.exists(menu.configFilePath):
83 menu.text += """
84 The "TEES_SETTINGS" environment variable is not set, but a configuration file has been
85 found in the default location. This installation program will use the existing
86 file, and by default install only missing components.
87 """
88 else:
89 menu.text += """
90 The "TEES_SETTINGS" environment variable is not set, so a new local configuration file
91 will be created.
92 """
93
94 menu.system.setAttr("defaultInstallDir", menu.defaultInstallDir)
95 Settings.DATAPATH = menu.defaultInstallDir
96 menu.system.setAttr("configFilePath", menu.configFilePath)
97 os.environ["TEES_SETTINGS"] = menu.configFilePath
98 setClosingMessage(menu.system, menu.configFilePath)
99 menu.optDict["c"].handlerArgs = [menu.configFilePath]
100
102 menuSystem.closingMessage = "!!!!!!!!!!!!!!!!!!!!!! Important Note !!!!!!!!!!!!!!!!!!!!!!\n"
103 menuSystem.closingMessage += "Before using TEES, remember to define the TEES_SETTINGS\n"
104 menuSystem.closingMessage += "environment variable. How to do this depends on your shell,\n"
105 menuSystem.closingMessage += "some common commands are:\n\n"
106 menuSystem.closingMessage += "bash: 'export TEES_SETTINGS=" + configFilePath + "'\n"
107 menuSystem.closingMessage += "tcsh: 'setenv TEES_SETTINGS " + configFilePath + "'\n"
108
110 assert Menu.system.defaultInstallDir != None
111 if os.path.exists(filename):
112 print >> sys.stderr, "Using existing local settings file", filename
113 return
114 print >> sys.stderr, "Initializing local settings file", filename
115 f = open(filename, "wt")
116 f.write("""
117 # Edit these settings to configure TEES. A variable must have a value
118 # other than None for it to be usable. This file is interpreted as
119 # a Python module, so Python code can be used.
120
121 # Tools
122 SVM_MULTICLASS_DIR = None # svm_multiclass_learn and svm_multiclass_classify directory
123 BANNER_DIR = None # BANNER program directory
124 GENIA_SENTENCE_SPLITTER_DIR = None # GENIA Sentence Splitter directory
125 RUBY_PATH = "ruby" # Command to run Ruby (used only by the GENIA Sentence Splitter)
126 BLLIP_PARSER_DIR = None # The BLLIP parser directory
127 MCCLOSKY_BIOPARSINGMODEL_DIR = None # The McClosky BioModel directory
128 STANFORD_PARSER_DIR = None # The Stanford parser directory
129
130 # Data
131 DATAPATH = 'DATAPATH_VALUE' # Main directory for datafiles
132 CORPUS_DIR = None # Directory for the corpus XML-files
133 TEES_MODEL_DIR = None # Directory for the official TEES models
134 """.replace(" ", "").replace("DATAPATH_VALUE", Menu.system.defaultInstallDir))
135 f.close()
136
137 os.environ["TEES_SETTINGS"] = filename
138 reload(Settings)
139
141
142
143 if hasattr(Settings, setting) and getattr(Settings, setting) != None:
144 menu.text += "The " + setting + " setting is already configured, so the default option is to skip installing.\n\n"
145 menu.text += setting + "=" + getattr(Settings, setting)
146 menu.setDefault(defaultSkipKey)
147 return False
148 else:
149 menu.setDefault(defaultInstallKey)
150 return True
151
153
154 allFound = True
155 for dataSet in dataSets:
156 filePath = Settings.CORPUS_DIR + "/" + corpus + dataSet
157 if not os.path.exists(filePath):
158
159 allFound = False
160 if allFound:
161 return True
162 else:
163 return False
164
166 menu.text = """
167 TEES uses the SVM Multiclass classifer by Thorsten Joachims for all
168 classification tasks. You can optionally choose to compile it from
169 source if the precompiled Linux-binary does not work on your system.
170 """
171 checkInstallPath(menu, "SVM_MULTICLASS_DIR")
172 if hasattr(Settings, "SVM_MULTICLASS_DIR") and getattr(Settings, "SVM_MULTICLASS_DIR") != None:
173 menu.setDefault("s")
174 svmInstallDir = Settings.SVM_MULTICLASS_DIR
175 else:
176 menu.setDefault("i")
177 svmInstallDir = None
178 menu.optDict["i"].handlerArgs = [None, os.path.join(menu.system.defaultInstallDir, "tools/download"), True, menu.optDict["1"].toggle, True]
179
209
211 menu.text = """
212 TEES models are used for predicting events or relations using
213 classify.py. Models are provided for all tasks in the BioNLP'11,
214 BioNLP'09 and DDI'11 shared tasks.
215
216 For a list of models and instructions for using them see
217 https://github.com/jbjorne/TEES/wiki/Classifying.
218 """
219
220 if menu != prevMenu:
221 menu.setDefault("s")
222 redownload = menu.optDict["1"].toggle
223 destPath = os.path.join(menu.system.defaultInstallDir, "models")
224 downloadPath = os.path.join(menu.system.defaultInstallDir, "models/download")
225
226 if menu != prevMenu and (not hasattr(Settings, "MODEL_DIR") or Settings.MODEL_DIR == None or not os.path.exists(Settings.MODEL_DIR)):
227 menu.setDefault("i")
228 menu.optDict["i"].handler = [Utils.Download.downloadAndExtract, Settings.setLocal]
229 menu.optDict["i"].handlerArgs = [[Settings.URL["MODELS"], destPath, downloadPath, None, True, redownload], ["MODEL_DIR", destPath]]
230
232 menu.text = """
233 The corpora are used for training new models and testing existing
234 models. The corpora installable here are from the two BioNLP Shared
235 Tasks (BioNLP'09 and BioNLP'11) on Event Extraction (organized by
236 University of Tokyo), and the First Challenge Task: Drug-Drug Interaction
237 Extraction (DDI'11, organized by Universidad Carlos III de Madrid).
238
239 The corpora are downloaded as interaction XML files, generated from the
240 original Shared Task files. If you need to convert the corpora from
241 the original files, you can use the convertBioNLP.py and convertDDI.py programs
242 located at Utils/Convert.
243
244 It is also recommended to download the official BioNLP Shared Task evaluator
245 programs, which will be used by TEES when training or testing on those corpora.
246 """
247
248 menu.setDefault("s")
249
250 if not hasattr(Settings, "CORPUS_DIR") or getattr(Settings, "CORPUS_DIR") == None:
251 Settings.setLocal("CORPUS_DIR", os.path.join(menu.system.defaultInstallDir, "corpora"))
252 print >> sys.stderr
253
254 handlers = []
255 handlerArgs = []
256 corpusInstallPath = os.path.join(menu.system.defaultInstallDir, "corpora")
257 corpusDownloadPath = os.path.join(menu.system.defaultInstallDir, "corpora/download")
258
259 redownload = menu.optDict["1"].toggle
260 for corpus in ["GE", "EPI", "ID", "BB", "BI", "CO", "REL", "REN"]:
261 if menu.optDict["2"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)):
262 menu.setDefault("i")
263 menu.optDict["2"].toggle = True
264 handlers.append(convertBioNLP.installPreconverted)
265 handlerArgs.append(["BIONLP_11_CORPORA", corpusInstallPath, corpusDownloadPath, redownload, True])
266 break
267 if menu.optDict["3"].toggle or (menu != prevMenu and not checkCorpusInstall("GE09")):
268 menu.setDefault("i")
269 menu.optDict["3"].toggle = True
270 handlers.append(convertBioNLP.installPreconverted)
271 handlerArgs.append(["BIONLP_09_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True])
272 if menu.optDict["4"].toggle or (menu != prevMenu and not checkCorpusInstall("DDI", ("-train.xml", "-devel.xml"))):
273 menu.setDefault("i")
274 menu.optDict["4"].toggle = True
275 handlers.append(convertBioNLP.installPreconverted)
276 handlerArgs.append(["DDI_11_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True])
277
278 evaluatorInstallPath = os.path.join(menu.system.defaultInstallDir, "tools/evaluators")
279 evaluatorDownloadPath = os.path.join(menu.system.defaultInstallDir, "tools/download")
280 if menu.optDict["5"].toggle or (menu != prevMenu and (not hasattr(Settings, "BIONLP_EVALUATOR_DIR") or getattr(Settings, "BIONLP_EVALUATOR_DIR") == None)):
281 menu.setDefault("i")
282 menu.optDict["5"].toggle = True
283 handlers.append(convertBioNLP.installEvaluators)
284 handlerArgs.append([evaluatorInstallPath, evaluatorDownloadPath, redownload, True])
285
286 menu.optDict["i"].handler = handlers
287 menu.optDict["i"].handlerArgs = handlerArgs
288
290 Menu("Classifier", None, [
291 Option("1", "Compile from source", toggle=False),
292 Option("i", "Install", handler=Classifiers.SVMMultiClassClassifier.install),
293 Option("s", "Skip")],
294 svmMenuInitializer)
295
296 Menu("Install Directory", None, [
297 Option("1", "Change DATAPATH", dataInput="defaultInstallDir"),
298 Option("2", "Change TEES_SETTINGS", dataInput="configFilePath"),
299 Option("c", "Continue", "Classifier", isDefault=True, handler=initLocalSettings)],
300 pathMenuInitializer)
301
302 Menu("Configure TEES",
303 """
304 Welcome to using the Turku Event Extraction System (TEES)! In order to work, TEES
305 depends on a number of other programs, which have to be set up before use.
306
307 The classifier (1) is required for all uses of the system. The models (2) are
308 required for predicting events and together with the preprocessing tools (4)
309 can be used on any unprocessed text. The corpora (3) are used for testing the
310 performance of a model or for training a new model.
311
312 If you are unsure which components you need, just install everything (the default choice).
313 You can also rerun configure.py at any time later to install missing components.
314
315 To make a choice, type the option's key and press enter, or just press enter for the
316 default option. The '*' sign indicates the default option and brackets a selectable one.
317 """,
318 [
319 Option("1", "Install classifier (SVM Multiclass)", toggle=True),
320 Option("2", "Install models (TEES models for BioNLP'11, BioNLP'09 and DDI'11)", toggle=True),
321 Option("3", "Install corpora (BioNLP'11, BioNLP'09 and DDI'11)", toggle=True),
322 Option("4", "Install preprocessing tools (BANNER, BLLIP parser etc)", toggle=True),
323 Option("c", "Continue and install selected items", "Install Directory", isDefault=True),
324 Option("q", "Quit", handler=sys.exit),
325 ])
326
327 Menu("Models", "Install TEES models\n", [
328 Option("1", "Redownload already downloaded files", toggle=False),
329 Option.SPACE,
330 Option("i", "Install", isDefault=True),
331 Option("s", "Skip")],
332 modelsMenuInitializer)
333
334 Menu("Corpora", "Install corpora\n", [
335 Option("1", "Redownload already downloaded files", toggle=False),
336 Option.SPACE,
337 Option("2", "Install BioNLP'11 corpora", toggle=False),
338 Option("3", "Install BioNLP'09 (GENIA) corpus", toggle=False),
339 Option("4", "Install DDI'11 (Drug-Drug Interactions) corpus", toggle=False),
340 Option.SPACE,
341 Option("5", "Install BioNLP'11 evaluators", toggle=False),
342 Option.SPACE,
343 Option("i", "Install", isDefault=True),
344 Option("s", "Skip")],
345 corpusMenuInitializer)
346
347 Menu("Tools",
348 """
349 The tools are required for processing unannotated text and can
350 be used as part of TEES, or independently through their wrappers. For
351 information and usage conditions, see https://github.com/jbjorne/TEES/wiki/Licenses.
352 Some of the tools need to be compiled from source, this will take a while.
353
354 The external tools used by TEES are:
355
356 The GENIA Sentence Splitter of Tokyo University (Tsuruoka Y. et. al.)
357
358 The BANNER named entity recognizer by Robert Leaman et. al.
359
360 The BLLIP parser of Brown University (Charniak E., Johnson M. et. al.)
361
362 The Stanford Parser of the Stanford Natural Language Processing Group
363 """,
364 [
365 Option("1", "Redownload already downloaded files", toggle=False),
366 Option.SPACE,
367 Option("2", "Install GENIA Sentence Splitter", toggle=False),
368 Option("3", "Install BANNER named entity recognizer", toggle=False),
369 Option("4", "Install BLLIP parser", toggle=False),
370 Option("5", "Install Stanford Parser", toggle=False),
371 Option.SPACE,
372 Option("i", "Install", isDefault=True),
373 Option("s", "Skip")],
374 toolsMenuInitializer)
375
376 return "Configure TEES"
377
387
388 if __name__=="__main__":
389 import sys
390
391 from optparse import OptionParser
392
393 try:
394 import psyco
395 psyco.full()
396 print >> sys.stderr, "Found Psyco, using"
397 except ImportError:
398 pass
399
400 optparser = OptionParser(usage="%prog [options]\nConfigure TEES")
401 optparser.add_option("-i", "--installDir", default=None, dest="installDir", help="", metavar="FILE")
402 optparser.add_option("-l", "--localSettings", default=None, dest="localSettings", help="", metavar="FILE")
403 optparser.add_option("-w", "--width", default=80, type="int", dest="width", help="")
404 optparser.add_option("--auto", default=False, action="store_true", dest="auto", help="")
405 optparser.add_option("--clearInstallDir", default=False, action="store_true", dest="clearInstallDir", help="")
406 optparser.add_option("--onError", default="ASK", dest="onError", help="ASK, IGNORE or EXIT")
407 (options, args) = optparser.parse_args()
408 assert options.onError in ["ASK", "IGNORE", "EXIT"]
409
410 configure(options.installDir, options.localSettings, options.auto, options.width, options.clearInstallDir, options.onError)
411
412
413
414
415
416
417