1 from FeatureBuilder import FeatureBuilder
2 import numpy
3
4 import numpy.linalg
5 import networkx as NX
6 import copy
7 import sys
8 sys.path.append("../..")
9 import Core.ExampleUtils as ExampleUtils
10
12 """ convert an (R, G, B) tuple to #RRGGBB """
13 hexcolor = '#%02x%02x%02x' % (int(red),int(green),int(blue))
14
15 return hexcolor
16
18 span = maxVal - minVal
19 pos = value / span
20 spanHalf = span / 2.0
21 blue = max((spanHalf - value)/spanHalf, 0.0) * 255
22 red = max((spanHalf - abs(value-spanHalf))/spanHalf, 0.0) * 255
23 green = max((value-spanHalf)/spanHalf, 0.0) * 255
24 return getHexColor(red, green, blue)
25
27 from HtmlBuilder import HtmlBuilder
28 h = HtmlBuilder()
29 h.newPage("test","")
30
31 h.header("Adjacency Matrix", 3)
32 h.table(1)
33 rows, columns = matrix.shape
34 h.tableRow()
35 h.tableData(None, True)
36 for i in range(columns):
37 h.tableData(None, False)
38 h.span( str(i), "font-size:smaller;font-weight:bold" )
39 h.closeElement()
40 h.closeElement()
41
42 for i in range(rows):
43 h.tableRow()
44 h.tableData(None, False)
45 h.span( str(i), "font-size:smaller;font-weight:bold" )
46 h.closeElement()
47 for j in range(columns):
48 h.tableData(None, False)
49 if matrix[i,j] != 0.0:
50 style = "font-size:smaller;background-color:" + getColorFromBRGSpectrum(matrix[i,j])
51 h.span( str(matrix[i,j])[0:4], style )
52 else:
53 style = "font-size:smaller"
54 h.span( "0", style )
55 h.closeElement()
56 h.closeElement()
57
58 h.closeElement()
59
60 h.header("Legend", 4)
61 h.table(1)
62 h.tableRow()
63 h.tableData(None, False)
64 h.span( "0.0", "font-size:smaller" )
65 h.closeElement()
66 i = 0.1
67 while i <= 1.0:
68 h.tableData(None, False)
69 h.span( str(i), "font-size:smaller;background-color:" + getColorFromBRGSpectrum(i) )
70 h.closeElement()
71 i += 0.1
72 h.closeElement()
73 h.closeElement()
74
75 if labels != None:
76 h.header("Labels", 3)
77 for i in range(len(labels)):
78 string = str(i) + ": "
79 first = True
80 for label in labels[i]:
81 if not first:
82 string += ", "
83 string += label
84 first = False
85 h.span(string)
86 h.lineBreak()
87
88 h.write(filename)
89
93
95 edgeList = []
96 depGraph = sentenceGraph.dependencyGraph
97 pt = path
98 for i in range(1, len(path)):
99 edgeList.extend(depGraph.getEdges(pt[i], pt[i-1]))
100 edgeList.extend(depGraph.getEdges(pt[i-1], pt[i]))
101 edges = edgeList
102 adjacencyMatrix, labels = self._buildAdjacencyMatrix(sentenceGraph, path, edges)
103 node_count = 2*len(sentenceGraph.tokens) + len(sentenceGraph.dependencies)
104
105 if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
106 adjacencyMatrixToHtml(adjacencyMatrix, labels, "LLL.d0.s0_adjacency_matrix.html")
107
108 allPathsMatrix = self._prepareMatrix(adjacencyMatrix, node_count)
109 self._matrixToFeatures(allPathsMatrix, labels)
110 if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
111 adjacencyMatrixToHtml(allPathsMatrix, labels, "LLL.d0.s0_all_paths_matrix.html")
112 commentLines = []
113 commentLines.extend(self.featureSet.toStrings())
114 example = ["example_"+self.entity1.attrib["id"]+"_"+self.entity2.attrib["id"],"unknown",self.features]
115 ExampleUtils.writeExamples([example],"LLL.d0.s0_example.txt",commentLines)
116
117
119
120 """Linearizes the representation of the graph"""
121 linear = {}
122 for i in range(W.shape[0]):
123 for j in range(W.shape[1]):
124 if W[i,j] > 0.00001:
125 for label1 in labels[i]:
126 if (not "punct" in labels[i]) and (not "punct" in labels[j]):
127 for label2 in labels[j]:
128
129 label = label1+"_$_"+label2
130 self.features[self.featureSet.getId(label)] = W[i,j]
131
132 - def _prepareMatrix(self, adjacencyMatrix, node_count, dtyp=numpy.float64):
133 W = adjacencyMatrix * -1.0
134
135
136
137
138 W += numpy.mat(numpy.identity(node_count, dtype = dtyp))
139 return numpy.linalg.inv(W) - numpy.mat(numpy.identity(node_count, dtype=dtyp))
140
142 """ Returns the position id of the token """
143 return int(tokenElement.attrib["id"].split("_")[1])
144
145 - def _getTokenText(self, path, sentenceGraph, token):
146 tokenText = sentenceGraph.getTokenText(token)
147 if tokenText == "NAMED_ENT":
148 if token == path[0]:
149 tokenText = "NAMED_ENT_1"
150 elif token == path[-1]:
151 tokenText = "NAMED_ENT_2"
152 return tokenText
153
154 - def _buildAdjacencyMatrix(self, sentenceGraph, path, edges, floattype=numpy.float64, directed=True, linearOrderWeight=0.9):
155 """ Returns a Numpy-matrix
156 """
157
158 node_count = 2*len(sentenceGraph.tokens) + len(sentenceGraph.dependencies)
159
160 adjMatrix = numpy.mat(numpy.zeros((node_count,node_count), dtype = floattype))
161
162 labels = [set([]) for x in range(node_count)]
163
164 dep_indices = range(2*len(sentenceGraph.tokens), node_count)
165
166
167 weightByDependency = {}
168 self._setAllDependencyWeights(sentenceGraph, weightByDependency, 0.3)
169 self._setDependencyWeightsByPath(edges, weightByDependency, 0.9)
170 self._reduceWeightByDistance(sentenceGraph, weightByDependency)
171
172
173 allEdges = edges
174
175
176 depEdgePairs = []
177 depGraphEdges = sentenceGraph.dependencyGraph.edges
178 for dependency in sentenceGraph.dependencies:
179 for edge in depGraphEdges:
180 if edge[2] == dependency:
181 depEdgePairs.append( (dependency, edge) )
182 depGraphEdges.remove(edge)
183
184 for depPair, index in zip(depEdgePairs, dep_indices):
185 dep = depPair[1]
186
187 adjMatrix[self._getTokenId(dep[0])-1, index] = weightByDependency[dep[2]]
188 adjMatrix[index, self._getTokenId(dep[1])-1] = weightByDependency[dep[2]]
189
190 if not directed:
191 adjMatrix[self._getTokenId(dep[1])-1, index] = weightByDependency[dep[2]]
192 adjMatrix[index, self._getTokenId(dep[0])-1] = weightByDependency[dep[2]]
193
194
195
196
197
198
199 if dep in allEdges:
200 labels[index].add("sp_" + dep[2].attrib["type"])
201 else:
202 labels[index].add(dep[2].attrib["type"])
203
204
205 for i in range(len(sentenceGraph.tokens),2*len(sentenceGraph.tokens)-1):
206 adjMatrix[i,i+1] = linearOrderWeight
207 if not directed:
208 adjMatrix[i+1,i] = linearOrderWeight
209
210
211
212 preTagByToken = self._addPositionTags(sentenceGraph, [path[0]], [path[-1]])
213 for node in sentenceGraph.tokens:
214 index = self._getTokenId(node) - 1
215
216 features = self.getTokenFeatures(node, sentenceGraph)
217 if "txt_NAMED_ENT" in features:
218 if self.entity1 in sentenceGraph.tokenIsEntityHead[node]:
219 features.remove("txt_NAMED_ENT")
220 features.append("txt_NAMED_ENT_1")
221 elif self.entity2 in sentenceGraph.tokenIsEntityHead[node]:
222 features.remove("txt_NAMED_ENT")
223 features.append("txt_NAMED_ENT_2")
224 if "noAnnType" in features:
225 features.remove("noAnnType")
226
227
228 if node in path:
229 for feature in features:
230 labels[index].add("sp_"+feature)
231
232
233 else:
234 for feature in features:
235 labels[index].add(feature)
236
237
238
239
240
241
242 if preTagByToken.has_key(node):
243 preTag = preTagByToken[node]
244 for feature in features:
245 labels[index].add(preTag+feature)
246
247
248
249
250
251
252
253 return adjMatrix, labels
254
256 """ All weights are set to the given value
257 """
258 for node in sentenceGraph.dependencies:
259 weights[node] = weight
260
262 allEdges = []
263 if edgeDict != None:
264 keys1 = edgeDict.keys()
265 keys1.sort()
266 for k1 in keys1:
267 keys2 = edgeDict[k1].keys()
268 keys2.sort()
269 for k2 in keys2:
270 allEdges.extend(edgeDict[k1][k2])
271 return allEdges
272
274 """ The weights of all dependencies in specified paths are set to the
275 given value
276 """
277 allEdges = edges
278
279 for edge in allEdges:
280 assert(weights.has_key(edge[2]))
281 weights[edge[2]] = weight
282
284 """ Reduces the weight of dependencies based on their distance
285 from the nearest dependency whose weight is >= the threshold.
286 """
287 undirected = sentenceGraph.dependencyGraph.toUndirected()
288 edges = undirected.edges
289 tempGraph = NX.Graph(directed=False)
290 for edge in edges:
291 tempGraph.add_edge(edge[0], edge[1])
292 tokenDistanceDict = NX.all_pairs_shortest_path_length(tempGraph, cutoff=999)
293 dependencyDistances = {}
294
295 zeroDistanceEdges = []
296 for edge in edges:
297 if weights[edge[2]] >= zeroDistanceThreshold:
298 zeroDistanceEdges.append(edge)
299 dependencyDistances[edge[2]] = 0
300
301
302 if len(zeroDistanceEdges) == 0:
303 return
304
305
306 for edge in edges:
307 if edge in zeroDistanceEdges:
308 continue
309 shortestDistance = 99
310 for zeroDistanceEdge in zeroDistanceEdges:
311 if tokenDistanceDict.has_key(edge[0]):
312 if tokenDistanceDict[edge[0]].has_key(zeroDistanceEdge[0]):
313 if tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[0] ] < shortestDistance:
314 shortestDistance = tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[0] ]
315 if tokenDistanceDict[edge[0]].has_key(zeroDistanceEdge[1]):
316 if tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[1] ] < shortestDistance:
317 shortestDistance = tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[1] ]
318 if tokenDistanceDict.has_key(edge[1]):
319 if tokenDistanceDict[edge[1]].has_key(zeroDistanceEdge[0]):
320 if tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[0] ] < shortestDistance:
321 shortestDistance = tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[0] ]
322 if tokenDistanceDict[edge[1]].has_key(zeroDistanceEdge[1]):
323 if tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[1] ] < shortestDistance:
324 shortestDistance = tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[1] ]
325
326 dependencyDistances[edge[2]] = shortestDistance + 1
327
328
329 for dependency in sentenceGraph.dependencies:
330 if not dependencyDistances.has_key(dependency):
331 dependencyDistances[dependency] = 99
332 weights[dependency] *= pow(reduceFactor, max(dependencyDistances[dependency] - 1, 0))
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
381