Package TEES :: Package ExampleWriters :: Module BioTextExampleWriter
[hide private]

Source Code for Module TEES.ExampleWriters.BioTextExampleWriter

 1  """ 
 2  Wrapper for all interaction XML example writers 
 3  """ 
 4  import sys, os, types 
 5  thisPath = os.path.dirname(os.path.abspath(__file__)) 
 6  sys.path.append(os.path.abspath(os.path.join(thisPath,".."))) 
 7  import Core.ExampleUtils as ExampleUtils 
 8   
 9  from EntityExampleWriter import EntityExampleWriter 
10  from EdgeExampleWriter import EdgeExampleWriter 
11  from ModifierExampleWriter import ModifierExampleWriter 
12  from PhraseTriggerExampleWriter import PhraseTriggerExampleWriter 
13  #IF LOCAL 
14  from UnmergingExampleWriter import UnmergingExampleWriter 
15 #from UnmergedEdgeExampleWriter import UnmergedEdgeExampleWriter 16 #from AsymmetricEventExampleWriter import AsymmetricEventExampleWriter 17 #ENDIF 18 19 -class BioTextExampleWriter:
20 """ 21 A generic example writer that automatically calls the correct Example Writer 22 based on the type of the examples. 23 """ 24 @classmethod
25 - def write(cls, examples, predictions, corpus, outputFile, classSet=None, parse=None, tokenization=None, goldCorpus=None, insertWeights=False):
26 if type(examples) == types.StringType: 27 print >> sys.stderr, "Reading examples from", examples 28 examples = ExampleUtils.readExamples(examples, False) 29 30 # This looks a bit strange, but should work with the re-iterable 31 # generators that readExamples returns 32 xType = None 33 for example in examples: 34 assert example[3].has_key("xtype") 35 xType = example[3]["xtype"] 36 break 37 38 if xType == "token": 39 w = EntityExampleWriter() 40 if insertWeights: 41 w.insertWeights = True 42 elif xType == "edge": 43 w = EdgeExampleWriter() 44 elif xType == "task3": 45 w = ModifierExampleWriter() 46 elif xType == "entRel": 47 w = EntityRelationExampleWriter() 48 elif xType == "phrase": 49 w = PhraseTriggerExampleWriter() 50 #IF LOCAL 51 elif xType == "um": 52 w = UnmergingExampleWriter() 53 #elif xType == "ue": 54 # w = UnmergedEdgeExampleWriter() 55 #elif xType == "asym": 56 # w = AsymmetricEventExampleWriter() 57 #ENDIF 58 else: 59 assert False, ("Unknown entity type", xType) 60 return w.writeXML(examples, predictions, corpus, outputFile, classSet, parse, tokenization, goldCorpus=goldCorpus)
61 62 if __name__=="__main__": 63 # Import Psyco if available 64 try: 65 import psyco 66 psyco.full() 67 print >> sys.stderr, "Found Psyco, using" 68 except ImportError: 69 print >> sys.stderr, "Psyco not installed" 70 71 from optparse import OptionParser 72 optparser = OptionParser(usage="%prog [options]\nWrite predicted examples to interaction XML") 73 optparser.add_option("-e", "--examples", default=None, dest="examples", help="Machine learning example file", metavar="FILE") 74 optparser.add_option("-p", "--predictions", default=None, dest="predictions", help="Classifier predictions for the example file", metavar="FILE") 75 optparser.add_option("-i", "--classIds", default=None, dest="classIds", help="Multiclass class Ids") 76 optparser.add_option("-c", "--corpus", default=None, dest="corpus", help="Interaction XML file for adding examples to", metavar="FILE") 77 optparser.add_option("-g", "--goldCorpus", default=None, dest="goldCorpus", help="Interaction XML file with gold elements", metavar="FILE") 78 optparser.add_option("-a", "--parse", default="split-McClosky", dest="parse", help="Parse XML element name") 79 optparser.add_option("-t", "--tokenization", default="split-McClosky", dest="tokenization", help="Tokenization XML element name") 80 optparser.add_option("-o", "--output", default=None, dest="output", help="Output file") 81 optparser.add_option("-w", "--insertWeights", default=False, action="store_true", dest="insertWeights", help="Write weights for gold elements") 82 #optparser.add_option("-t", "--task", default=1, type="int", dest="task", help="task number") 83 (options, args) = optparser.parse_args() 84 85 assert(options.examples != None) 86 assert(options.predictions != None) 87 assert(options.classIds != None) 88 assert(options.corpus != None) 89 assert(options.output != None) 90 91 BioTextExampleWriter.write(options.examples, options.predictions, options.corpus, options.output, options.classIds, options.parse, options.tokenization, options.goldCorpus, insertWeights = options.insertWeights) 92