1  """ 
  2  Configure TEES by installing data files and external components. 
  3  """ 
  4   
  5  import sys, os, shutil 
  6  import textwrap 
  7  from Utils.Menu import * 
  8  import Utils.Settings as Settings 
  9   
 10  import Classifiers.SVMMultiClassClassifier 
 11   
 12  import Tools.GeniaSentenceSplitter 
 13  import Tools.BANNER 
 14  import Tools.BLLIPParser 
 15  import Tools.StanfordParser 
 16   
 17  import Utils.Convert.convertBioNLP as convertBioNLP 
 18  import Utils.Download 
 19   
 20   
 22      nextMenus = [] 
 23      if prevMenu.optDict["1"].toggle: 
 24          nextMenus.append("Classifier") 
 25      if prevMenu.optDict["2"].toggle: 
 26          nextMenus.append("Models") 
 27      if prevMenu.optDict["3"].toggle: 
 28          nextMenus.append("Corpora") 
 29      if prevMenu.optDict["4"].toggle: 
 30          nextMenus.append("Tools") 
 31      menu.optDict["c"].nextMenu = nextMenus 
 32       
 33      menu.text = """ 
 34      1. By default, all data and tools will be installed to one directory, the DATAPATH.  
 35      You can later set the installation directory individually for each component, or  
 36      you can change the default path now. 
 37       
 38      """ 
 39      if menu.defaultInstallDir == None: 
 40          if menu.system.progArgs["installDir"] != None: 
 41              menu.defaultInstallDir = menu.system.progArgs["installDir"] 
 42              if menu.system.progArgs["clearInstallDir"] and os.path.exists(menu.system.progArgs["installDir"]): 
 43                  shutil.rmtree(menu.system.progArgs["installDir"]) 
 44              menu.text += "\nUsing the DATAPATH path from configure.py command line options.\n\n" 
 45          elif menu.system.progArgs["localSettings"] != None: 
 46              os.environ["TEES_SETTINGS"] = os.path.abspath(menu.system.progArgs["localSettings"]) 
 47              reload(Settings) 
 48              menu.defaultInstallDir = Settings.DATAPATH 
 49          elif "TEES_SETTINGS" in os.environ: 
 50              menu.defaultInstallDir = Settings.DATAPATH 
 51          else: 
 52              menu.defaultInstallDir = os.path.expanduser("~/.tees") 
 53      elif os.path.exists(menu.defaultInstallDir): 
 54          if not os.path.isdir(menu.defaultInstallDir): 
 55              menu.text += "WARNING! The DATAPATH directory is not a directory.\n\n" 
 56      else: 
 57          try: 
 58              os.makedirs(menu.defaultInstallDir) 
 59          except: 
 60              menu.text += "WARNING! Could not create DATAPATH.\n\n" 
 61       
 62       
 63      menu.text += """ 
 64      2. TEES reads its configuration from a file defined by the environment 
 65      variable "TEES_SETTINGS". This environment variable must be set, and 
 66      point to a configuration file for TEES to work. By editing this  
 67      configuration file you can configure TEES in addition (or instead of) 
 68      using this configuration program. 
 69      """ 
 70      if menu.configFilePath == None: 
 71          if menu.system.progArgs["localSettings"] != None: 
 72              menu.configFilePath = menu.system.progArgs["localSettings"] 
 73              menu.text += "\nUsing the TEES_SETTINGS path from configure.py command line options.\n\n" 
 74          elif "TEES_SETTINGS" in os.environ: 
 75              menu.configFilePath = os.environ["TEES_SETTINGS"] 
 76              menu.text += """ 
 77              The "TEES_SETTINGS" environment variable is already set. If the configuration file 
 78              exists, this installation program will use it and by default install only missing components. 
 79              """ 
 80          else: 
 81              menu.configFilePath = os.path.expanduser("~/.tees_local_settings.py") 
 82              if os.path.exists(menu.configFilePath): 
 83                  menu.text += """ 
 84                  The "TEES_SETTINGS" environment variable is not set, but a configuration file has been 
 85                  found in the default location. This installation program will use the existing 
 86                  file, and by default install only missing components. 
 87                  """      
 88              else: 
 89                  menu.text += """ 
 90                  The "TEES_SETTINGS" environment variable is not set, so a new local configuration file 
 91                  will be created. 
 92                  """ 
 93       
 94      menu.system.setAttr("defaultInstallDir", menu.defaultInstallDir) 
 95      Settings.DATAPATH = menu.defaultInstallDir 
 96      menu.system.setAttr("configFilePath", menu.configFilePath) 
 97      os.environ["TEES_SETTINGS"] = menu.configFilePath 
 98      setClosingMessage(menu.system, menu.configFilePath) 
 99      menu.optDict["c"].handlerArgs = [menu.configFilePath] 
 100       
102      menuSystem.closingMessage = "!!!!!!!!!!!!!!!!!!!!!! Important Note !!!!!!!!!!!!!!!!!!!!!!\n" 
103      menuSystem.closingMessage += "Before using TEES, remember to define the TEES_SETTINGS\n" 
104      menuSystem.closingMessage += "environment variable. How to do this depends on your shell,\n" 
105      menuSystem.closingMessage += "some common commands are:\n\n" 
106      menuSystem.closingMessage += "bash: 'export TEES_SETTINGS=" + configFilePath + "'\n" 
107      menuSystem.closingMessage += "tcsh: 'setenv TEES_SETTINGS " + configFilePath + "'\n"  
 108       
110      assert Menu.system.defaultInstallDir != None 
111      if os.path.exists(filename): 
112          print >> sys.stderr, "Using existing local settings file", filename 
113          return 
114      print >> sys.stderr, "Initializing local settings file", filename 
115      f = open(filename, "wt") 
116      f.write(""" 
117      # Edit these settings to configure TEES. A variable must have a value  
118      # other than None for it to be usable. This file is interpreted as 
119      # a Python module, so Python code can be used. 
120       
121      # Tools 
122      SVM_MULTICLASS_DIR = None # svm_multiclass_learn and svm_multiclass_classify directory 
123      BANNER_DIR = None # BANNER program directory 
124      GENIA_SENTENCE_SPLITTER_DIR = None # GENIA Sentence Splitter directory 
125      RUBY_PATH = "ruby" # Command to run Ruby (used only by the GENIA Sentence Splitter) 
126      BLLIP_PARSER_DIR = None # The BLLIP parser directory 
127      MCCLOSKY_BIOPARSINGMODEL_DIR = None # The McClosky BioModel directory 
128      STANFORD_PARSER_DIR = None # The Stanford parser directory 
129       
130      # Data 
131      DATAPATH = 'DATAPATH_VALUE' # Main directory for datafiles 
132      CORPUS_DIR = None # Directory for the corpus XML-files 
133      TEES_MODEL_DIR = None # Directory for the official TEES models 
134      """.replace("    ", "").replace("DATAPATH_VALUE", Menu.system.defaultInstallDir)) 
135      f.close() 
136       
137      os.environ["TEES_SETTINGS"] = filename 
138      reload(Settings) 
 139   
141       
142       
143      if hasattr(Settings, setting) and getattr(Settings, setting) != None: 
144          menu.text += "The " + setting + " setting is already configured, so the default option is to skip installing.\n\n" 
145          menu.text += setting + "=" + getattr(Settings, setting) 
146          menu.setDefault(defaultSkipKey) 
147          return False 
148      else: 
149          menu.setDefault(defaultInstallKey) 
150          return True 
 151   
153       
154      allFound = True  
155      for dataSet in dataSets: 
156          filePath = Settings.CORPUS_DIR + "/" + corpus + dataSet 
157          if not os.path.exists(filePath): 
158               
159              allFound = False 
160      if allFound:  
161          return True 
162      else:  
163          return False 
 164   
166      menu.text = """ 
167      TEES uses the SVM Multiclass classifer by Thorsten Joachims for all  
168      classification tasks. You can optionally choose to compile it from  
169      source if the precompiled Linux-binary does not work on your system. 
170      """ 
171      checkInstallPath(menu, "SVM_MULTICLASS_DIR") 
172      if hasattr(Settings, "SVM_MULTICLASS_DIR") and getattr(Settings, "SVM_MULTICLASS_DIR") != None: 
173          menu.setDefault("s") 
174          svmInstallDir = Settings.SVM_MULTICLASS_DIR 
175      else: 
176          menu.setDefault("i") 
177          svmInstallDir = None 
178      menu.optDict["i"].handlerArgs = [None, os.path.join(menu.system.defaultInstallDir, "tools/download"), True, menu.optDict["1"].toggle, True] 
 179   
209   
211      menu.text = """ 
212      TEES models are used for predicting events or relations using 
213      classify.py. Models are provided for all tasks in the BioNLP'11,  
214      BioNLP'09 and DDI'11 shared tasks. 
215       
216      For a list of models and instructions for using them see 
217      https://github.com/jbjorne/TEES/wiki/Classifying. 
218      """ 
219       
220      if menu != prevMenu: 
221          menu.setDefault("s") 
222      redownload = menu.optDict["1"].toggle 
223      destPath = os.path.join(menu.system.defaultInstallDir, "models") 
224      downloadPath = os.path.join(menu.system.defaultInstallDir, "models/download") 
225       
226      if menu != prevMenu and (not hasattr(Settings, "MODEL_DIR") or Settings.MODEL_DIR == None or not os.path.exists(Settings.MODEL_DIR)): 
227          menu.setDefault("i") 
228      menu.optDict["i"].handler = [Utils.Download.downloadAndExtract, Settings.setLocal] 
229      menu.optDict["i"].handlerArgs = [[Settings.URL["MODELS"], destPath, downloadPath, None, True, redownload], ["MODEL_DIR", destPath]] 
 230   
232      menu.text = """ 
233      The corpora are used for training new models and testing existing 
234      models. The corpora installable here are from the two BioNLP Shared 
235      Tasks (BioNLP'09 and BioNLP'11) on Event Extraction (organized by  
236      University of Tokyo), and the First Challenge Task: Drug-Drug Interaction  
237      Extraction (DDI'11, organized by Universidad Carlos III de Madrid). 
238       
239      The corpora are downloaded as interaction XML files, generated from the 
240      original Shared Task files. If you need to convert the corpora from  
241      the original files, you can use the convertBioNLP.py and convertDDI.py programs 
242      located at Utils/Convert.  
243       
244      It is also recommended to download the official BioNLP Shared Task evaluator  
245      programs, which will be used by TEES when training or testing on those corpora. 
246      """ 
247       
248      menu.setDefault("s") 
249       
250      if not hasattr(Settings, "CORPUS_DIR") or getattr(Settings, "CORPUS_DIR") == None: 
251          Settings.setLocal("CORPUS_DIR", os.path.join(menu.system.defaultInstallDir, "corpora")) 
252          print >> sys.stderr 
253       
254      handlers = [] 
255      handlerArgs = [] 
256      corpusInstallPath = os.path.join(menu.system.defaultInstallDir, "corpora") 
257      corpusDownloadPath = os.path.join(menu.system.defaultInstallDir, "corpora/download") 
258       
259      redownload = menu.optDict["1"].toggle 
260      for corpus in ["GE", "EPI", "ID", "BB", "BI", "CO", "REL", "REN"]: 
261          if menu.optDict["2"].toggle or (menu != prevMenu and not checkCorpusInstall(corpus)): 
262              menu.setDefault("i") 
263              menu.optDict["2"].toggle = True 
264              handlers.append(convertBioNLP.installPreconverted) 
265              handlerArgs.append(["BIONLP_11_CORPORA", corpusInstallPath, corpusDownloadPath, redownload, True]) 
266              break 
267      if menu.optDict["3"].toggle or (menu != prevMenu and not checkCorpusInstall("GE09")): 
268          menu.setDefault("i") 
269          menu.optDict["3"].toggle = True 
270          handlers.append(convertBioNLP.installPreconverted) 
271          handlerArgs.append(["BIONLP_09_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) 
272      if menu.optDict["4"].toggle or (menu != prevMenu and not checkCorpusInstall("DDI", ("-train.xml", "-devel.xml"))): 
273          menu.setDefault("i") 
274          menu.optDict["4"].toggle = True 
275          handlers.append(convertBioNLP.installPreconverted) 
276          handlerArgs.append(["DDI_11_CORPUS", corpusInstallPath, corpusDownloadPath, redownload, True]) 
277       
278      evaluatorInstallPath = os.path.join(menu.system.defaultInstallDir, "tools/evaluators") 
279      evaluatorDownloadPath = os.path.join(menu.system.defaultInstallDir, "tools/download") 
280      if menu.optDict["5"].toggle or (menu != prevMenu and (not hasattr(Settings, "BIONLP_EVALUATOR_DIR") or getattr(Settings, "BIONLP_EVALUATOR_DIR") == None)): 
281          menu.setDefault("i") 
282          menu.optDict["5"].toggle = True 
283          handlers.append(convertBioNLP.installEvaluators) 
284          handlerArgs.append([evaluatorInstallPath, evaluatorDownloadPath, redownload, True]) 
285       
286      menu.optDict["i"].handler = handlers 
287      menu.optDict["i"].handlerArgs = handlerArgs 
 288       
290      Menu("Classifier", None, [ 
291          Option("1", "Compile from source", toggle=False), 
292          Option("i", "Install", handler=Classifiers.SVMMultiClassClassifier.install), 
293          Option("s", "Skip")], 
294          svmMenuInitializer) 
295          
296      Menu("Install Directory", None, [ 
297          Option("1", "Change DATAPATH", dataInput="defaultInstallDir"), 
298          Option("2", "Change TEES_SETTINGS", dataInput="configFilePath"), 
299          Option("c", "Continue", "Classifier", isDefault=True, handler=initLocalSettings)], 
300          pathMenuInitializer) 
301   
302      Menu("Configure TEES",  
303          """ 
304          Welcome to using the Turku Event Extraction System (TEES)! In order to work, TEES 
305          depends on a number of other programs, which have to be set up before use. 
306           
307          The classifier (1) is required for all uses of the system. The models (2) are  
308          required for predicting events and together with the preprocessing tools (4) 
309          can be used on any unprocessed text. The corpora (3) are used for testing the  
310          performance of a model or for training a new model. 
311           
312          If you are unsure which components you need, just install everything (the default choice).  
313          You can also rerun configure.py at any time later to install missing components. 
314           
315          To make a choice, type the option's key and press enter, or just press enter for the 
316          default option. The '*' sign indicates the default option and brackets a selectable one. 
317          """, 
318          [ 
319          Option("1", "Install classifier (SVM Multiclass)", toggle=True), 
320          Option("2", "Install models (TEES models for BioNLP'11, BioNLP'09 and DDI'11)", toggle=True), 
321          Option("3", "Install corpora (BioNLP'11, BioNLP'09 and DDI'11)", toggle=True), 
322          Option("4", "Install preprocessing tools (BANNER, BLLIP parser etc)", toggle=True), 
323          Option("c", "Continue and install selected items", "Install Directory", isDefault=True), 
324          Option("q", "Quit", handler=sys.exit), 
325          ]) 
326   
327      Menu("Models", "Install TEES models\n", [ 
328          Option("1", "Redownload already downloaded files", toggle=False), 
329          Option.SPACE, 
330          Option("i", "Install", isDefault=True), 
331          Option("s", "Skip")], 
332          modelsMenuInitializer) 
333       
334      Menu("Corpora", "Install corpora\n", [ 
335          Option("1", "Redownload already downloaded files", toggle=False), 
336          Option.SPACE, 
337          Option("2", "Install BioNLP'11 corpora", toggle=False), 
338          Option("3", "Install BioNLP'09 (GENIA) corpus", toggle=False), 
339          Option("4", "Install DDI'11 (Drug-Drug Interactions) corpus", toggle=False), 
340          Option.SPACE, 
341          Option("5", "Install BioNLP'11 evaluators", toggle=False), 
342          Option.SPACE, 
343          Option("i", "Install", isDefault=True), 
344          Option("s", "Skip")], 
345          corpusMenuInitializer) 
346       
347      Menu("Tools",  
348           """ 
349           The tools are required for processing unannotated text and can 
350           be used as part of TEES, or independently through their wrappers. For 
351           information and usage conditions, see https://github.com/jbjorne/TEES/wiki/Licenses. 
352           Some of the tools need to be compiled from source, this will take a while. 
353            
354           The external tools used by TEES are: 
355            
356           The GENIA Sentence Splitter of Tokyo University (Tsuruoka Y. et. al.) 
357            
358           The BANNER named entity recognizer by Robert Leaman et. al. 
359            
360           The BLLIP parser of Brown University (Charniak E., Johnson M. et. al.) 
361            
362           The Stanford Parser of the Stanford Natural Language Processing Group 
363           """,  
364           [ 
365          Option("1", "Redownload already downloaded files", toggle=False), 
366          Option.SPACE, 
367          Option("2", "Install GENIA Sentence Splitter", toggle=False), 
368          Option("3", "Install BANNER named entity recognizer", toggle=False), 
369          Option("4", "Install BLLIP parser", toggle=False), 
370          Option("5", "Install Stanford Parser", toggle=False), 
371          Option.SPACE, 
372          Option("i", "Install", isDefault=True), 
373          Option("s", "Skip")], 
374          toolsMenuInitializer) 
375   
376      return "Configure TEES" 
 377   
387   
388  if __name__=="__main__": 
389      import sys 
390       
391      from optparse import OptionParser 
392       
393      try: 
394          import psyco 
395          psyco.full() 
396          print >> sys.stderr, "Found Psyco, using" 
397      except ImportError: 
398          pass 
399   
400      optparser = OptionParser(usage="%prog [options]\nConfigure TEES") 
401      optparser.add_option("-i", "--installDir", default=None, dest="installDir", help="", metavar="FILE") 
402      optparser.add_option("-l", "--localSettings", default=None, dest="localSettings", help="", metavar="FILE") 
403      optparser.add_option("-w", "--width", default=80, type="int", dest="width", help="") 
404      optparser.add_option("--auto", default=False, action="store_true", dest="auto", help="") 
405      optparser.add_option("--clearInstallDir", default=False, action="store_true", dest="clearInstallDir", help="") 
406      optparser.add_option("--onError", default="ASK", dest="onError", help="ASK, IGNORE or EXIT") 
407      (options, args) = optparser.parse_args() 
408      assert options.onError in ["ASK", "IGNORE", "EXIT"] 
409       
410      configure(options.installDir, options.localSettings, options.auto, options.width, options.clearInstallDir, options.onError) 
411   
412   
413   
414   
415   
416   
417