Package TEES :: Package ExampleBuilders :: Package FeatureBuilders :: Module NodalidaFeatureBuilder
[hide private]

Source Code for Module TEES.ExampleBuilders.FeatureBuilders.NodalidaFeatureBuilder

 1  from FeatureBuilder import FeatureBuilder 
 2   
3 -class NodalidaFeatureBuilder(FeatureBuilder):
4 - def __init__(self, featureSet):
5 FeatureBuilder.__init__(self, featureSet)
6
7 - def buildShortestPaths(self, graph, tokenPath, position=0, newPath=None):
8 if newPath == None: 9 assert(position == 0) 10 newPath = [tokenPath[0]] 11 else: 12 newPath = newPath + [tokenPath[position]] 13 14 if position == len(tokenPath) - 1: 15 return [newPath] 16 forwardEdges = [] 17 if graph.has_edge(tokenPath[position],tokenPath[position+1]): 18 forwardEdges.extend(graph.get_edge(tokenPath[position],tokenPath[position+1])) 19 reverseEdges = [] 20 if graph.has_edge(tokenPath[position+1],tokenPath[position]): 21 reverseEdges.extend(graph.get_edge(tokenPath[position+1],tokenPath[position])) 22 newPaths = [] 23 for reverseEdge in reverseEdges: 24 newPaths.extend( self.buildShortestPaths(graph, tokenPath, position+1, newPath + [(reverseEdge,"reverse")]) ) 25 for forwardEdge in forwardEdges: 26 newPaths.extend( self.buildShortestPaths(graph, tokenPath, position+1, newPath + [(forwardEdge,"forward")]) ) 27 return newPaths
28
29 - def buildTokenGramFeatures(self, tokenPath, sentenceGraph):
30 txtGrams = [""] 31 annTypeGrams = [""] 32 posGrams = [""] 33 for token in tokenPath: 34 featureList = self.getTokenFeatures(token, sentenceGraph) 35 for feature in featureList: 36 if feature.find("txt_") != -1: 37 newGrams = [] 38 for gram in txtGrams: 39 newGrams.append(gram + feature) 40 txtGrams = newGrams 41 elif feature.find("POS_") != -1: 42 newGrams = [] 43 for gram in posGrams: 44 newGrams.append(gram + feature) 45 posGrams = newGrams 46 elif feature.find("annType_") != -1: 47 newGrams = [] 48 for gram in annTypeGrams: 49 newGrams.append(gram + feature) 50 annTypeGrams = newGrams 51 for gram in txtGrams + annTypeGrams + posGrams: 52 if gram != "": 53 self.setFeature(gram, 1)
54
55 - def buildEdgeGramFeatures(self, edgePath):
56 string = "" 57 print edgePath 58 for edge in edgePath: 59 string += edge[0].attrib["type"] + "-" + edge[1] 60 self.setFeature(string, 1)
61
62 - def buildNGrams(self, paths, sentenceGraph, n=3):
63 for path in paths: 64 assert(len(path)%2==1) 65 tokenPhase = True 66 for i in range(len(path)): 67 # Token n-grams 68 if tokenPhase: 69 tokenGram = [] 70 for j in range(i, max(-1,i-n*2), -2): 71 tokenGram = [path[j]] + tokenGram 72 self.buildTokenGramFeatures(tokenGram, sentenceGraph) 73 # Dependency n-grams 74 else: 75 edgeGram = [] 76 for j in range(i, max(0,i-n*2), -2): 77 edgeGram = [path[j]] + edgeGram 78 self.buildEdgeGramFeatures(tokenGram) 79 tokenPhase = not tokenPhase
80