Package TEES :: Package ExampleBuilders :: Package FeatureBuilders :: Module EVEXFeatureBuilder
[hide private]

Source Code for Module TEES.ExampleBuilders.FeatureBuilders.EVEXFeatureBuilder

 1  """ 
 2  EVEX Feature Builder 
 3  """ 
 4  __version__ = "$Revision: 1.5 $" 
 5   
 6  from FeatureBuilder import FeatureBuilder 
 7   
8 -class EVEXFeatureBuilder(FeatureBuilder):
9 - def __init__(self, featureSet):
10 """ 11 This is called, when the ExampleBuilder object is created. 12 13 @type featureSet: Core.IdSet 14 @param featureSet: The feature ids 15 """ 16 FeatureBuilder.__init__(self, featureSet)
17
18 - def initSentence(self, sentenceGraph):
19 """ 20 This function is called once for each sentence, before any calls to "buildFeatures". It 21 should be used to initialize per-sentence data structures. 22 23 @type sentenceGraph: Core.SentenceGraph 24 @param sentenceGraph: a SentenceGraph object providing access to the aligned semantic and syntactic 25 information of the sentence. The underlying XML can also be accessed through 26 this class. 27 """ 28 ### Sentence initialization code here ### 29 pass
30
31 - def buildEdgeFeatures(self, entity1, entity2, token1, token2, path, sentenceGraph):
32 """ 33 This is the main-function for feature generation. It is called once for each 34 directed entity pair in the sentence. 35 36 For defining features, please use the member function "setFeature(self, name, value=1)", 37 derived from the parent class. This ensures features get correctly tagged, if needed. 38 39 @type entity1: cElementTree.Element 40 @param entity1: First entity of the candidate edge, an Interaction XML "entity"-element 41 @type entity2: cElementTree.Element 42 @param entity2: Second entity of the candidate edge, an Interaction XML "entity"-element 43 @type token1: cElementTree.Element 44 @param token1: The head token of entity1, an Interaction XML "token"-element 45 @type token2: cElementTree.Element 46 @param token2: The head token of entity2, an Interaction XML "token"-element 47 @type path: list of cElementTree.Elements (when "no_path" style is set, this is always [token1, token2]) 48 @param path: the shortest connecting path of tokens (Interaction XML "token"-elements) 49 @type sentenceGraph: Core.SentenceGraph 50 @param sentenceGraph: a SentenceGraph object providing access to the aligned semantic and syntactic 51 information of the sentence. The underlying XML can also be accessed through 52 this class. 53 """ 54 ### Feature generation code here ### 55 pass
56 57 if __name__=="__main__": 58 """ 59 The main-function is the test program for the EVEX feature builder. It takes as a parameter an 60 Interaction XML corpus file, and builds edge-examples using MultiEdgeExampleBuilder. When the 61 "evex" style parameter is set, MultiEdgeExampleBuilder will call EVEXFeatureBuilder for feature 62 generation. 63 """ 64 import sys 65 sys.path.append("../..") 66 from Core.IdSet import IdSet 67 import Core.ExampleUtils as ExampleUtils 68 from ExampleBuilders.MultiEdgeExampleBuilder import MultiEdgeExampleBuilder 69 70 # Import Psyco if available 71 try: 72 import psyco 73 psyco.full() 74 print >> sys.stderr, "Found Psyco, using" 75 except ImportError: 76 print >> sys.stderr, "Psyco not installed" 77 78 from optparse import OptionParser 79 optparser = OptionParser(usage="%prog [options]\nTest EVEX Feature Builder.") 80 defaultInput = "/usr/share/biotext/BioNLP2011/data/main-tasks/GE/GE-devel-nodup.xml" 81 optparser.add_option("-i", "--input", default=defaultInput, dest="input", help="Corpus in analysis format", metavar="FILE") 82 optparser.add_option("-o", "--output", default="evex-examples.txt", dest="output", help="Output feature file") 83 optparser.add_option("-d", "--edgeIds", default="evex-ids", dest="edgeIds", help="Example class and feature id file stem (files = STEM.class_names and STEM.feature_names)") 84 optparser.add_option("-t", "--tokenization", default="split-mccc-preparsed", dest="tokenization", help="tokenization") 85 optparser.add_option("-p", "--parse", default="split-mccc-preparsed", dest="parse", help="parse") 86 optparser.add_option("-s", "--styles", default="typed,directed,no_path,no_task,no_dependency,no_linear,entities,genia_limits,noMasking,maxFeatures,evex", dest="edgeStyles", help="") 87 (options, args) = optparser.parse_args() 88 assert options.input != None 89 assert options.output != None 90 assert options.edgeIds != None 91 92 exampleBuilder = MultiEdgeExampleBuilder() 93 exampleBuilder.run(options.input, options.output, options.parse, options.tokenization, "style:"+options.edgeStyles, options.edgeIds) 94