1 """
2 Dependency edge features
3 """
4 __version__ = "$Revision: 1.3 $"
5
6 from FeatureBuilder import FeatureBuilder
7
8
12
13 - def buildEdgeFeatures(self, depEdge, sentenceGraph, tag = "dep_", text=True, POS=True, annType=True, maskNames=True):
14 self.features[self.featureSet.getId(tag+depEdge[2].attrib["type"])] = 1
15 if text:
16 if maskNames:
17 self.features[self.featureSet.getId(tag+"t1txt_"+sentenceGraph.getTokenText(depEdge[0]))] = 1
18 self.features[self.featureSet.getId(tag+"t2txt_"+sentenceGraph.getTokenText(depEdge[1]))] = 1
19 else:
20 self.features[self.featureSet.getId(tag+"t1txt_"+depEdge[0].attrib["text"])] = 1
21 self.features[self.featureSet.getId(tag+"t2txt_"+depEdge[1].attrib["text"])] = 1
22
23
24 if POS:
25 self.features[self.featureSet.getId(tag+"POS_"+depEdge[0].attrib["POS"])] = 1
26 self.features[self.featureSet.getId(tag+"POS_"+depEdge[1].attrib["POS"])] = 1
27 self.features[self.featureSet.getId(tag+"t1POS_"+depEdge[0].attrib["POS"])] = 1
28 self.features[self.featureSet.getId(tag+"t2POS_"+depEdge[1].attrib["POS"])] = 1
29
30 if annType:
31 if sentenceGraph.tokenIsEntityHead[depEdge[0]] != None:
32 self.features[self.featureSet.getId(tag+"annType_"+sentenceGraph.tokenIsEntityHead[depEdge[0]].attrib["type"])] = 1
33 self.features[self.featureSet.getId(tag+"t1AnnType_"+sentenceGraph.tokenIsEntityHead[depEdge[0]].attrib["type"])] = 1
34 if sentenceGraph.tokenIsEntityHead[depEdge[1]] != None:
35 self.features[self.featureSet.getId(tag+"annType_"+sentenceGraph.tokenIsEntityHead[depEdge[1]].attrib["type"])] = 1
36 self.features[self.featureSet.getId(tag+"t2AnnType_"+sentenceGraph.tokenIsEntityHead[depEdge[1]].attrib["type"])] = 1
37
38 - def buildTerminusFeatures(self, token, sentenceGraph, prefix = "term", text=True, POS=True, annType=True, maskNames=True):
39 inEdges = sentenceGraph.dependencyGraph.in_edges(token)
40 for edge in inEdges:
41 self.features[self.featureSet.getId(prefix+"HangingIn_"+edge[2].attrib["type"])] = 1
42 if POS: self.features[self.featureSet.getId(prefix+"HangingIn_"+edge[0].attrib["POS"])] = 1
43 if annType and sentenceGraph.tokenIsEntityHead[edge[0]] != None:
44 self.features[self.featureSet.getId(prefix+"HangingIn_AnnType_"+sentenceGraph.tokenIsEntityHead[edge[0]].attrib["type"])] = 1
45 if text:
46 if maskNames: self.features[self.featureSet.getId(prefix+"HangingIn_"+sentenceGraph.getTokenText(edge[0]))] = 1
47 else: self.features[self.featureSet.getId(prefix+"HangingIn_"+edge[0].attrib["text"])] = 1
48 outEdges = sentenceGraph.dependencyGraph.out_edges(token)
49 for edge in outEdges:
50 self.features[self.featureSet.getId(prefix+"HangingOut_"+edge[2].attrib["type"])] = 1
51 if POS: self.features[self.featureSet.getId(prefix+"HangingOut_"+edge[1].attrib["POS"])] = 1
52 if annType and sentenceGraph.tokenIsEntityHead[edge[1]] != None:
53 self.features[self.featureSet.getId(prefix+"HangingOut_AnnType_"+sentenceGraph.tokenIsEntityHead[edge[1]].attrib["type"])] = 1
54 if text:
55 if maskNames: self.features[self.featureSet.getId(prefix+"HangingOut_"+sentenceGraph.getTokenText(edge[1]))] = 1
56 else: self.features[self.featureSet.getId(prefix+"HangingOut_"+edge[1].attrib["text"])] = 1
57
59 self.buildTerminusFeatures(depEdge[0], sentenceGraph, prefix = tag+"t1", text=text, POS=POS, annType=annType, maskNames=maskNames)
60 self.buildTerminusFeatures(depEdge[1], sentenceGraph, prefix = tag+"t2", text=text, POS=POS, annType=annType, maskNames=maskNames)
61 return
62
64 t1Position = int(depEdge[0].attrib["id"].split("_")[-1])
65 t2Position = int(depEdge[1].attrib["id"].split("_")[-1])
66 self.features[self.featureSet.getId("lin_distance")] = t2Position - t1Position
67
68 if t1Position < t2Position:
69 self.features[self.featureSet.getId("lin_forward")] = 1
70 self.features[self.featureSet.getId("lin_distance")] = abs(t2Position - t1Position)
71
72
73
74
75 else:
76 self.features[self.featureSet.getId("lin_reverse")] = 1
77 self.features[self.featureSet.getId("lin_distance")] = abs(t2Position - t1Position)
78
79
80
81
82