Package TEES :: Package ExampleBuilders :: Package FeatureBuilders :: Module EdgeFeatureBuilder
[hide private]

Source Code for Module TEES.ExampleBuilders.FeatureBuilders.EdgeFeatureBuilder

 1  """ 
 2  Dependency edge features 
 3  """ 
 4  __version__ = "$Revision: 1.3 $" 
 5   
 6  from FeatureBuilder import FeatureBuilder 
 7  #import Stemming.PorterStemmer as PorterStemmer 
 8   
9 -class EdgeFeatureBuilder(FeatureBuilder):
10 - def __init__(self, featureSet):
11 FeatureBuilder.__init__(self, featureSet)
12
13 - def buildEdgeFeatures(self, depEdge, sentenceGraph, tag = "dep_", text=True, POS=True, annType=True, maskNames=True):
14 self.features[self.featureSet.getId(tag+depEdge[2].attrib["type"])] = 1 15 if text: 16 if maskNames: 17 self.features[self.featureSet.getId(tag+"t1txt_"+sentenceGraph.getTokenText(depEdge[0]))] = 1 18 self.features[self.featureSet.getId(tag+"t2txt_"+sentenceGraph.getTokenText(depEdge[1]))] = 1 19 else: 20 self.features[self.featureSet.getId(tag+"t1txt_"+depEdge[0].attrib["text"])] = 1 21 self.features[self.featureSet.getId(tag+"t2txt_"+depEdge[1].attrib["text"])] = 1 22 #features[self.featureSet.getId("t1stem_"+PorterStemmer.stem(sentenceGraph.getTokenText(depEdge[0])))] = 1 23 #features[self.featureSet.getId("t2stem_"+PorterStemmer.stem(sentenceGraph.getTokenText(depEdge[1])))] = 1 24 if POS: 25 self.features[self.featureSet.getId(tag+"POS_"+depEdge[0].attrib["POS"])] = 1 26 self.features[self.featureSet.getId(tag+"POS_"+depEdge[1].attrib["POS"])] = 1 27 self.features[self.featureSet.getId(tag+"t1POS_"+depEdge[0].attrib["POS"])] = 1 28 self.features[self.featureSet.getId(tag+"t2POS_"+depEdge[1].attrib["POS"])] = 1 29 30 if annType: 31 if sentenceGraph.tokenIsEntityHead[depEdge[0]] != None: 32 self.features[self.featureSet.getId(tag+"annType_"+sentenceGraph.tokenIsEntityHead[depEdge[0]].attrib["type"])] = 1 33 self.features[self.featureSet.getId(tag+"t1AnnType_"+sentenceGraph.tokenIsEntityHead[depEdge[0]].attrib["type"])] = 1 34 if sentenceGraph.tokenIsEntityHead[depEdge[1]] != None: 35 self.features[self.featureSet.getId(tag+"annType_"+sentenceGraph.tokenIsEntityHead[depEdge[1]].attrib["type"])] = 1 36 self.features[self.featureSet.getId(tag+"t2AnnType_"+sentenceGraph.tokenIsEntityHead[depEdge[1]].attrib["type"])] = 1
37
38 - def buildTerminusFeatures(self, token, sentenceGraph, prefix = "term", text=True, POS=True, annType=True, maskNames=True):
39 inEdges = sentenceGraph.dependencyGraph.in_edges(token) 40 for edge in inEdges: 41 self.features[self.featureSet.getId(prefix+"HangingIn_"+edge[2].attrib["type"])] = 1 42 if POS: self.features[self.featureSet.getId(prefix+"HangingIn_"+edge[0].attrib["POS"])] = 1 43 if annType and sentenceGraph.tokenIsEntityHead[edge[0]] != None: 44 self.features[self.featureSet.getId(prefix+"HangingIn_AnnType_"+sentenceGraph.tokenIsEntityHead[edge[0]].attrib["type"])] = 1 45 if text: 46 if maskNames: self.features[self.featureSet.getId(prefix+"HangingIn_"+sentenceGraph.getTokenText(edge[0]))] = 1 47 else: self.features[self.featureSet.getId(prefix+"HangingIn_"+edge[0].attrib["text"])] = 1 48 outEdges = sentenceGraph.dependencyGraph.out_edges(token) 49 for edge in outEdges: 50 self.features[self.featureSet.getId(prefix+"HangingOut_"+edge[2].attrib["type"])] = 1 51 if POS: self.features[self.featureSet.getId(prefix+"HangingOut_"+edge[1].attrib["POS"])] = 1 52 if annType and sentenceGraph.tokenIsEntityHead[edge[1]] != None: 53 self.features[self.featureSet.getId(prefix+"HangingOut_AnnType_"+sentenceGraph.tokenIsEntityHead[edge[1]].attrib["type"])] = 1 54 if text: 55 if maskNames: self.features[self.featureSet.getId(prefix+"HangingOut_"+sentenceGraph.getTokenText(edge[1]))] = 1 56 else: self.features[self.featureSet.getId(prefix+"HangingOut_"+edge[1].attrib["text"])] = 1
57
58 - def buildAttachedEdgeFeatures(self, depEdge, sentenceGraph, tag = "", text=True, POS=True, annType=True, maskNames=True):
59 self.buildTerminusFeatures(depEdge[0], sentenceGraph, prefix = tag+"t1", text=text, POS=POS, annType=annType, maskNames=maskNames) 60 self.buildTerminusFeatures(depEdge[1], sentenceGraph, prefix = tag+"t2", text=text, POS=POS, annType=annType, maskNames=maskNames) 61 return
62
63 - def buildLinearOrderFeatures(self, depEdge):
64 t1Position = int(depEdge[0].attrib["id"].split("_")[-1]) 65 t2Position = int(depEdge[1].attrib["id"].split("_")[-1]) 66 self.features[self.featureSet.getId("lin_distance")] = t2Position - t1Position 67 68 if t1Position < t2Position: 69 self.features[self.featureSet.getId("lin_forward")] = 1 70 self.features[self.featureSet.getId("lin_distance")] = abs(t2Position - t1Position) 71 #features[self.featureSet.getId("l1txt_"+sentenceGraph.getTokenText(depEdge[0]))] = 1 72 #features[self.featureSet.getId("l1POS_"+depEdge[0].attrib["POS"])] = 1 73 #features[self.featureSet.getId("l2txt_"+sentenceGraph.getTokenText(depEdge[1]))] = 1 74 #features[self.featureSet.getId("l2POS_"+depEdge[1].attrib["POS"])] = 1 75 else: 76 self.features[self.featureSet.getId("lin_reverse")] = 1 77 self.features[self.featureSet.getId("lin_distance")] = abs(t2Position - t1Position)
78 #features[self.featureSet.getId("l2txt_"+sentenceGraph.getTokenText(depEdge[0]))] = 1 79 #features[self.featureSet.getId("l2POS_"+depEdge[0].attrib["POS"])] = 1 80 #features[self.featureSet.getId("l1txt_"+sentenceGraph.getTokenText(depEdge[1]))] = 1 81 #features[self.featureSet.getId("l1POS_"+depEdge[1].attrib["POS"])] = 1 82