1 """
2 EVEX Feature Builder
3 """
4 __version__ = "$Revision: 1.5 $"
5
6 from FeatureBuilder import FeatureBuilder
7
10 """
11 This is called, when the ExampleBuilder object is created.
12
13 @type featureSet: Core.IdSet
14 @param featureSet: The feature ids
15 """
16 FeatureBuilder.__init__(self, featureSet)
17
19 """
20 This function is called once for each sentence, before any calls to "buildFeatures". It
21 should be used to initialize per-sentence data structures.
22
23 @type sentenceGraph: Core.SentenceGraph
24 @param sentenceGraph: a SentenceGraph object providing access to the aligned semantic and syntactic
25 information of the sentence. The underlying XML can also be accessed through
26 this class.
27 """
28
29 pass
30
32 """
33 This is the main-function for feature generation. It is called once for each
34 directed entity pair in the sentence.
35
36 For defining features, please use the member function "setFeature(self, name, value=1)",
37 derived from the parent class. This ensures features get correctly tagged, if needed.
38
39 @type entity1: cElementTree.Element
40 @param entity1: First entity of the candidate edge, an Interaction XML "entity"-element
41 @type entity2: cElementTree.Element
42 @param entity2: Second entity of the candidate edge, an Interaction XML "entity"-element
43 @type token1: cElementTree.Element
44 @param token1: The head token of entity1, an Interaction XML "token"-element
45 @type token2: cElementTree.Element
46 @param token2: The head token of entity2, an Interaction XML "token"-element
47 @type path: list of cElementTree.Elements (when "no_path" style is set, this is always [token1, token2])
48 @param path: the shortest connecting path of tokens (Interaction XML "token"-elements)
49 @type sentenceGraph: Core.SentenceGraph
50 @param sentenceGraph: a SentenceGraph object providing access to the aligned semantic and syntactic
51 information of the sentence. The underlying XML can also be accessed through
52 this class.
53 """
54
55 pass
56
57 if __name__=="__main__":
58 """
59 The main-function is the test program for the EVEX feature builder. It takes as a parameter an
60 Interaction XML corpus file, and builds edge-examples using MultiEdgeExampleBuilder. When the
61 "evex" style parameter is set, MultiEdgeExampleBuilder will call EVEXFeatureBuilder for feature
62 generation.
63 """
64 import sys
65 sys.path.append("../..")
66 from Core.IdSet import IdSet
67 import Core.ExampleUtils as ExampleUtils
68 from ExampleBuilders.MultiEdgeExampleBuilder import MultiEdgeExampleBuilder
69
70
71 try:
72 import psyco
73 psyco.full()
74 print >> sys.stderr, "Found Psyco, using"
75 except ImportError:
76 print >> sys.stderr, "Psyco not installed"
77
78 from optparse import OptionParser
79 optparser = OptionParser(usage="%prog [options]\nTest EVEX Feature Builder.")
80 defaultInput = "/usr/share/biotext/BioNLP2011/data/main-tasks/GE/GE-devel-nodup.xml"
81 optparser.add_option("-i", "--input", default=defaultInput, dest="input", help="Corpus in analysis format", metavar="FILE")
82 optparser.add_option("-o", "--output", default="evex-examples.txt", dest="output", help="Output feature file")
83 optparser.add_option("-d", "--edgeIds", default="evex-ids", dest="edgeIds", help="Example class and feature id file stem (files = STEM.class_names and STEM.feature_names)")
84 optparser.add_option("-t", "--tokenization", default="split-mccc-preparsed", dest="tokenization", help="tokenization")
85 optparser.add_option("-p", "--parse", default="split-mccc-preparsed", dest="parse", help="parse")
86 optparser.add_option("-s", "--styles", default="typed,directed,no_path,no_task,no_dependency,no_linear,entities,genia_limits,noMasking,maxFeatures,evex", dest="edgeStyles", help="")
87 (options, args) = optparser.parse_args()
88 assert options.input != None
89 assert options.output != None
90 assert options.edgeIds != None
91
92 exampleBuilder = MultiEdgeExampleBuilder()
93 exampleBuilder.run(options.input, options.output, options.parse, options.tokenization, "style:"+options.edgeStyles, options.edgeIds)
94