1  from FeatureBuilder import FeatureBuilder 
  2  import numpy 
  3   
  4  import numpy.linalg 
  5  import networkx as NX 
  6  import copy 
  7  import sys 
  8  sys.path.append("../..") 
  9  import Core.ExampleUtils as ExampleUtils 
 10   
 12      """ convert an (R, G, B) tuple to #RRGGBB """ 
 13      hexcolor = '#%02x%02x%02x' % (int(red),int(green),int(blue)) 
 14       
 15      return hexcolor 
  16   
 18      span = maxVal - minVal 
 19      pos = value / span 
 20      spanHalf = span / 2.0  
 21      blue = max((spanHalf - value)/spanHalf, 0.0) * 255 
 22      red = max((spanHalf - abs(value-spanHalf))/spanHalf, 0.0) * 255 
 23      green = max((value-spanHalf)/spanHalf, 0.0) * 255 
 24      return getHexColor(red, green, blue) 
  25   
 27      from HtmlBuilder import HtmlBuilder 
 28      h = HtmlBuilder() 
 29      h.newPage("test","") 
 30       
 31      h.header("Adjacency Matrix", 3) 
 32      h.table(1) 
 33      rows, columns = matrix.shape 
 34      h.tableRow()  
 35      h.tableData(None, True)  
 36      for i in range(columns): 
 37          h.tableData(None, False) 
 38          h.span( str(i), "font-size:smaller;font-weight:bold" ) 
 39          h.closeElement()  
 40      h.closeElement()  
 41       
 42      for i in range(rows): 
 43          h.tableRow() 
 44          h.tableData(None, False) 
 45          h.span( str(i), "font-size:smaller;font-weight:bold" ) 
 46          h.closeElement()  
 47          for j in range(columns):             
 48              h.tableData(None, False) 
 49              if matrix[i,j] != 0.0: 
 50                  style = "font-size:smaller;background-color:" + getColorFromBRGSpectrum(matrix[i,j])  
 51                  h.span( str(matrix[i,j])[0:4], style ) 
 52              else: 
 53                  style = "font-size:smaller" 
 54                  h.span( "0", style ) 
 55              h.closeElement()  
 56          h.closeElement()  
 57       
 58      h.closeElement()  
 59       
 60      h.header("Legend", 4) 
 61      h.table(1) 
 62      h.tableRow() 
 63      h.tableData(None, False) 
 64      h.span( "0.0", "font-size:smaller" ) 
 65      h.closeElement()  
 66      i = 0.1 
 67      while i <= 1.0: 
 68          h.tableData(None, False) 
 69          h.span( str(i), "font-size:smaller;background-color:" + getColorFromBRGSpectrum(i) ) 
 70          h.closeElement()  
 71          i += 0.1 
 72      h.closeElement()  
 73      h.closeElement()  
 74       
 75      if labels != None: 
 76          h.header("Labels", 3) 
 77          for i in range(len(labels)): 
 78              string = str(i) + ": " 
 79              first = True 
 80              for label in labels[i]: 
 81                  if not first: 
 82                      string += ", " 
 83                  string += label 
 84                  first = False 
 85              h.span(string) 
 86              h.lineBreak() 
 87       
 88      h.write(filename) 
  89   
 93       
 95          edgeList = [] 
 96          depGraph = sentenceGraph.dependencyGraph 
 97          pt = path 
 98          for i in range(1, len(path)): 
 99              edgeList.extend(depGraph.getEdges(pt[i], pt[i-1])) 
100              edgeList.extend(depGraph.getEdges(pt[i-1], pt[i])) 
101          edges = edgeList 
102          adjacencyMatrix, labels = self._buildAdjacencyMatrix(sentenceGraph, path, edges) 
103          node_count = 2*len(sentenceGraph.tokens) + len(sentenceGraph.dependencies) 
104           
105          if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0": 
106              adjacencyMatrixToHtml(adjacencyMatrix, labels, "LLL.d0.s0_adjacency_matrix.html") 
107           
108          allPathsMatrix = self._prepareMatrix(adjacencyMatrix, node_count) 
109          self._matrixToFeatures(allPathsMatrix, labels) 
110          if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0": 
111              adjacencyMatrixToHtml(allPathsMatrix, labels, "LLL.d0.s0_all_paths_matrix.html") 
112              commentLines = [] 
113              commentLines.extend(self.featureSet.toStrings()) 
114              example = ["example_"+self.entity1.attrib["id"]+"_"+self.entity2.attrib["id"],"unknown",self.features] 
115              ExampleUtils.writeExamples([example],"LLL.d0.s0_example.txt",commentLines) 
 116               
117   
119           
120          """Linearizes the representation of the graph""" 
121          linear = {} 
122          for i in range(W.shape[0]): 
123              for j in range(W.shape[1]): 
124                  if W[i,j] > 0.00001:  
125                      for label1 in labels[i]: 
126                          if (not "punct" in labels[i]) and (not "punct" in labels[j]): 
127                              for label2 in labels[j]: 
128                                   
129                                  label = label1+"_$_"+label2 
130                                  self.features[self.featureSet.getId(label)] = W[i,j] 
 131   
132 -    def _prepareMatrix(self, adjacencyMatrix, node_count, dtyp=numpy.float64): 
 133          W = adjacencyMatrix * -1.0 
134   
135   
136   
137   
138          W += numpy.mat(numpy.identity(node_count, dtype = dtyp))     
139          return numpy.linalg.inv(W) - numpy.mat(numpy.identity(node_count, dtype=dtyp))     
 140       
142          """ Returns the position id of the token """ 
143          return int(tokenElement.attrib["id"].split("_")[1]) 
 144       
145 -    def _getTokenText(self, path, sentenceGraph, token): 
 146          tokenText = sentenceGraph.getTokenText(token) 
147          if tokenText == "NAMED_ENT": 
148              if token == path[0]: 
149                  tokenText = "NAMED_ENT_1" 
150              elif token == path[-1]: 
151                  tokenText = "NAMED_ENT_2" 
152          return tokenText 
 153   
154 -    def _buildAdjacencyMatrix(self, sentenceGraph, path, edges, floattype=numpy.float64, directed=True, linearOrderWeight=0.9): 
 155          """ Returns a Numpy-matrix 
156          """ 
157           
158          node_count = 2*len(sentenceGraph.tokens) + len(sentenceGraph.dependencies) 
159           
160          adjMatrix = numpy.mat(numpy.zeros((node_count,node_count), dtype = floattype)) 
161           
162          labels = [set([]) for x in range(node_count)] 
163           
164          dep_indices = range(2*len(sentenceGraph.tokens), node_count) 
165           
166           
167          weightByDependency = {} 
168          self._setAllDependencyWeights(sentenceGraph, weightByDependency, 0.3) 
169          self._setDependencyWeightsByPath(edges, weightByDependency, 0.9) 
170          self._reduceWeightByDistance(sentenceGraph, weightByDependency) 
171           
172           
173          allEdges = edges  
174           
175           
176          depEdgePairs = [] 
177          depGraphEdges = sentenceGraph.dependencyGraph.edges  
178          for dependency in sentenceGraph.dependencies: 
179              for edge in depGraphEdges: 
180                  if edge[2] == dependency: 
181                      depEdgePairs.append( (dependency, edge) ) 
182                      depGraphEdges.remove(edge) 
183               
184          for depPair, index in zip(depEdgePairs, dep_indices): 
185              dep = depPair[1] 
186               
187              adjMatrix[self._getTokenId(dep[0])-1, index] = weightByDependency[dep[2]] 
188              adjMatrix[index, self._getTokenId(dep[1])-1] = weightByDependency[dep[2]] 
189               
190              if not directed: 
191                  adjMatrix[self._getTokenId(dep[1])-1, index] = weightByDependency[dep[2]] 
192                  adjMatrix[index, self._getTokenId(dep[0])-1] = weightByDependency[dep[2]] 
193              
194   
195   
196   
197   
198   
199              if dep in allEdges: 
200                  labels[index].add("sp_" + dep[2].attrib["type"]) 
201              else: 
202                  labels[index].add(dep[2].attrib["type"]) 
203               
204           
205          for i in range(len(sentenceGraph.tokens),2*len(sentenceGraph.tokens)-1): 
206              adjMatrix[i,i+1] = linearOrderWeight 
207              if not directed: 
208                  adjMatrix[i+1,i] = linearOrderWeight 
209       
210           
211           
212          preTagByToken = self._addPositionTags(sentenceGraph, [path[0]], [path[-1]]) 
213          for node in sentenceGraph.tokens: 
214              index = self._getTokenId(node) - 1 
215               
216              features = self.getTokenFeatures(node, sentenceGraph) 
217              if "txt_NAMED_ENT" in features: 
218                  if self.entity1 in sentenceGraph.tokenIsEntityHead[node]: 
219                      features.remove("txt_NAMED_ENT") 
220                      features.append("txt_NAMED_ENT_1") 
221                  elif self.entity2 in sentenceGraph.tokenIsEntityHead[node]: 
222                      features.remove("txt_NAMED_ENT") 
223                      features.append("txt_NAMED_ENT_2") 
224              if "noAnnType" in features: 
225                  features.remove("noAnnType") 
226               
227               
228              if node in path:  
229                  for feature in features: 
230                      labels[index].add("sp_"+feature) 
231                   
232                   
233              else: 
234                  for feature in features: 
235                      labels[index].add(feature) 
236                   
237                   
238   
239   
240   
241   
242              if preTagByToken.has_key(node): 
243                  preTag = preTagByToken[node] 
244                  for feature in features: 
245                      labels[index].add(preTag+feature) 
246                   
247                   
248   
249   
250   
251   
252           
253          return adjMatrix, labels 
 254   
256          """ All weights are set to the given value 
257          """ 
258          for node in sentenceGraph.dependencies: 
259              weights[node] = weight 
 260       
262          allEdges = [] 
263          if edgeDict != None: 
264              keys1 = edgeDict.keys() 
265              keys1.sort() 
266              for k1 in keys1: 
267                  keys2 = edgeDict[k1].keys() 
268                  keys2.sort() 
269                  for k2 in keys2: 
270                      allEdges.extend(edgeDict[k1][k2]) 
271          return allEdges 
 272       
274          """ The weights of all dependencies in specified paths are set to the 
275          given value 
276          """ 
277          allEdges = edges  
278           
279          for edge in allEdges: 
280              assert(weights.has_key(edge[2])) 
281              weights[edge[2]] = weight 
 282                       
284          """ Reduces the weight of dependencies based on their distance 
285          from the nearest dependency whose weight is >= the threshold. 
286          """ 
287          undirected = sentenceGraph.dependencyGraph.toUndirected()  
288          edges = undirected.edges 
289          tempGraph = NX.Graph(directed=False) 
290          for edge in edges: 
291              tempGraph.add_edge(edge[0], edge[1]) 
292          tokenDistanceDict = NX.all_pairs_shortest_path_length(tempGraph, cutoff=999) 
293          dependencyDistances = {} 
294   
295          zeroDistanceEdges = [] 
296          for edge in edges: 
297              if weights[edge[2]] >= zeroDistanceThreshold: 
298                  zeroDistanceEdges.append(edge) 
299                  dependencyDistances[edge[2]] = 0 
300           
301           
302          if len(zeroDistanceEdges) == 0: 
303              return 
304           
305           
306          for edge in edges: 
307              if edge in zeroDistanceEdges: 
308                  continue 
309              shortestDistance = 99 
310              for zeroDistanceEdge in zeroDistanceEdges: 
311                  if tokenDistanceDict.has_key(edge[0]): 
312                      if tokenDistanceDict[edge[0]].has_key(zeroDistanceEdge[0]): 
313                          if tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[0] ] < shortestDistance: 
314                              shortestDistance = tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[0] ] 
315                      if tokenDistanceDict[edge[0]].has_key(zeroDistanceEdge[1]): 
316                          if tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[1] ] < shortestDistance: 
317                              shortestDistance = tokenDistanceDict[ edge[0] ][ zeroDistanceEdge[1] ] 
318                  if tokenDistanceDict.has_key(edge[1]): 
319                      if tokenDistanceDict[edge[1]].has_key(zeroDistanceEdge[0]): 
320                          if tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[0] ] < shortestDistance: 
321                              shortestDistance = tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[0] ] 
322                      if tokenDistanceDict[edge[1]].has_key(zeroDistanceEdge[1]): 
323                          if tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[1] ] < shortestDistance: 
324                              shortestDistance = tokenDistanceDict[ edge[1] ][ zeroDistanceEdge[1] ] 
325               
326              dependencyDistances[edge[2]] = shortestDistance + 1 
327   
328           
329          for dependency in sentenceGraph.dependencies: 
330              if not dependencyDistances.has_key(dependency): 
331                  dependencyDistances[dependency] = 99 
332              weights[dependency] *= pow(reduceFactor, max(dependencyDistances[dependency] - 1, 0)) 
 333   
334   
335   
336   
337   
338   
339   
340   
341   
342   
343   
344   
345   
346   
347   
348   
349   
350   
351   
352   
353   
354   
355       
 381