1 from FeatureBuilder import FeatureBuilder
2
6
8 if newPath == None:
9 assert(position == 0)
10 newPath = [tokenPath[0]]
11 else:
12 newPath = newPath + [tokenPath[position]]
13
14 if position == len(tokenPath) - 1:
15 return [newPath]
16 forwardEdges = []
17 if graph.has_edge(tokenPath[position],tokenPath[position+1]):
18 forwardEdges.extend(graph.get_edge(tokenPath[position],tokenPath[position+1]))
19 reverseEdges = []
20 if graph.has_edge(tokenPath[position+1],tokenPath[position]):
21 reverseEdges.extend(graph.get_edge(tokenPath[position+1],tokenPath[position]))
22 newPaths = []
23 for reverseEdge in reverseEdges:
24 newPaths.extend( self.buildShortestPaths(graph, tokenPath, position+1, newPath + [(reverseEdge,"reverse")]) )
25 for forwardEdge in forwardEdges:
26 newPaths.extend( self.buildShortestPaths(graph, tokenPath, position+1, newPath + [(forwardEdge,"forward")]) )
27 return newPaths
28
30 txtGrams = [""]
31 annTypeGrams = [""]
32 posGrams = [""]
33 for token in tokenPath:
34 featureList = self.getTokenFeatures(token, sentenceGraph)
35 for feature in featureList:
36 if feature.find("txt_") != -1:
37 newGrams = []
38 for gram in txtGrams:
39 newGrams.append(gram + feature)
40 txtGrams = newGrams
41 elif feature.find("POS_") != -1:
42 newGrams = []
43 for gram in posGrams:
44 newGrams.append(gram + feature)
45 posGrams = newGrams
46 elif feature.find("annType_") != -1:
47 newGrams = []
48 for gram in annTypeGrams:
49 newGrams.append(gram + feature)
50 annTypeGrams = newGrams
51 for gram in txtGrams + annTypeGrams + posGrams:
52 if gram != "":
53 self.setFeature(gram, 1)
54
56 string = ""
57 print edgePath
58 for edge in edgePath:
59 string += edge[0].attrib["type"] + "-" + edge[1]
60 self.setFeature(string, 1)
61
63 for path in paths:
64 assert(len(path)%2==1)
65 tokenPhase = True
66 for i in range(len(path)):
67
68 if tokenPhase:
69 tokenGram = []
70 for j in range(i, max(-1,i-n*2), -2):
71 tokenGram = [path[j]] + tokenGram
72 self.buildTokenGramFeatures(tokenGram, sentenceGraph)
73
74 else:
75 edgeGram = []
76 for j in range(i, max(0,i-n*2), -2):
77 edgeGram = [path[j]] + edgeGram
78 self.buildEdgeGramFeatures(tokenGram)
79 tokenPhase = not tokenPhase
80