Package TEES :: Package ExampleBuilders :: Package FeatureBuilders :: Module MultiEdgeFeatureBuilder
[hide private]

Source Code for Module TEES.ExampleBuilders.FeatureBuilders.MultiEdgeFeatureBuilder

  1  """ 
  2  Shortest path features 
  3  """ 
  4  __version__ = "$Revision: 1.30 $" 
  5   
  6  from FeatureBuilder import FeatureBuilder 
  7  import Utils.Libraries.PorterStemmer as PorterStemmer 
  8  #from EdgeFeatureBuilder import EdgeFeatureBuilder 
  9  import Utils.Libraries.combine as combine 
 10   
11 -class MultiEdgeFeatureBuilder(FeatureBuilder):
12 """ 13 This feature builder generates features describing a pair of word tokens connected by one or more 14 dependencies. Most of the features it produces are built on the shortest undirected path of 15 dependencies between the two tokens. 16 """
17 - def __init__(self, featureSet, style=None):
18 """ 19 @type featureSet: IdSet 20 @param featureSet: feature ids 21 """ 22 FeatureBuilder.__init__(self, featureSet, style=style) 23 #self.edgeFeatureBuilder = EdgeFeatureBuilder(featureSet) 24 self.ontologyFeatureBuilder = None 25 self.noAnnType = False 26 self.predictedRange = None
27
28 - def getEdgeType(self, edge):
29 # simplification reduces performance by 0.2 pp 30 return edge.get("type") 31 32 eType = edge.get("type") 33 if eType == "subj" or eType.startswith("nsubj") or eType.startswith("csubj"): 34 return "subj" 35 elif eType in ["obj", "dobj", "iobj", "pobj"]: 36 return "obj" 37 elif eType == "agent" or eType == "prepc" or eType.startswith("prep_"): 38 return "prep" 39 elif eType == "appos": # or nn 40 return "nn" 41 else: 42 return eType
43
44 - def definePredictedValueRange(self, sentences, elementName):
45 self.predictedRange = [None,None] 46 for sentence in sentences: 47 targetElements = sentence.findall(elementName) 48 for element in targetElements: 49 predictions = element.get("predictions") 50 if predictions != None and predictions != "": 51 predictions = predictions.split(",") 52 for p in predictions: 53 splits = p.split(":") 54 value = float(splits[1]) 55 if self.predictedRange[0] == None or self.predictedRange[0] > value: 56 self.predictedRange[0] = value 57 if self.predictedRange[1] == None or self.predictedRange[1] < value: 58 self.predictedRange[1] = value
59 60 # def buildStructureFeatures(self, sentenceGraph, paths): 61 # t1 = sentenceGraph.entityHeadTokenByEntity[self.entity1] 62 # t2 = sentenceGraph.entityHeadTokenByEntity[self.entity2] 63 # if paths.has_key(t1) and paths[t1].has_key(t2): 64 # path = paths[t1][t2] 65 # prevToken = None 66 # structure = "" 67 # for pathToken in path: 68 # if prevToken != None: 69 # if sentenceGraph.dependencyGraph.has_edge(prevToken,pathToken): 70 # structure += ">" + sentenceGraph.dependencyGraph.get_edge(prevToken,pathToken)[0].get("type") + ">" 71 # elif sentenceGraph.dependencyGraph.has_edge(pathToken,prevToken): 72 # structure += "<" + sentenceGraph.dependencyGraph.get_edge(pathToken,prevToken)[0].get("type") + "<" 73 # else: 74 # assert(False) 75 # structure += pathToken.get("POS")[0:1] 76 # prevToken = pathToken 77 # self.setFeature(structure, 1) 78
79 - def setFeatureVector(self, features=None, entity1=None, entity2=None, resetCache=True):
80 """ 81 When the feature builder builds features, they are put to this feature vector. 82 83 @type features: dictionary 84 @param features: a reference to the feature vector 85 @type entity1: cElementTree.Element 86 @param entity1: an entity used by trigger or edge feature builders 87 @type entity2: cElementTree.Element 88 @param entity2: an entity used by trigger or edge feature builders 89 @type resetCache: boolean 90 @param resetCache: Some intermediate features are cached to speed up example generation. This 91 cache should be cleared when moving to another example. 92 """ 93 self.entity1 = entity1 94 self.entity2 = entity2 95 self.features = features 96 #self.edgeFeatureBuilder.setFeatureVector(features) 97 if self.ontologyFeatureBuilder != None: 98 self.ontologyFeatureBuilder.setFeatureVector(features) 99 if resetCache: 100 self.tokenFeatures = {} 101 self.edgeCache = {} 102 self.depPathCache = {}
103
104 - def buildPredictedValueFeatures(self, element, tag):
105 """ 106 Edge examples are usually predicted on top of predicted entities. The entities' confidence scores 107 can be used as features for edge detection. For these features to be used, the model must also have 108 been trained on data that contains prediction confidence scores. 109 """ 110 predictions = element.get("predictions") 111 if predictions != None and predictions != "": 112 predictions = predictions.split(",") 113 for p in predictions: 114 splits = p.split(":") 115 if self.predictedRange[0] == None or self.predictedRange[1] == None: 116 value = 1.0 117 else: 118 value = float(splits[1]) 119 value -= self.predictedRange[0] 120 value /= (self.predictedRange[1] - self.predictedRange[0]) 121 assert(value >= 0 and value <= 1) 122 #print tag + "_strength_"+splits[0], value 123 self.setFeature(tag + "_strength_"+splits[0], value) 124 else: 125 #print tag + "_strength_"+str(element.get("type")), 1.0 126 self.setFeature(tag + "_strength_" + str(element.get("type")), 1.0)
127
128 - def buildEntityFeatures(self, sentenceGraph):
129 """ 130 Build features for the two entities of the current example. These features are labeled as "e1" or "e2", 131 so entity order is meaningful. 132 """ 133 #for token, entities in sentenceGraph.entitiesByToken.iteritems(): 134 for token in sentenceGraph.tokens: 135 if token not in sentenceGraph.entitiesByToken: 136 continue 137 entities = sentenceGraph.entitiesByToken[token] 138 if self.entity1 in entities: 139 tokenFeatures = self.getTokenFeatures(token, sentenceGraph) 140 for feature in tokenFeatures: 141 self.setFeature("e1_"+feature, 1) 142 if self.entity2 in entities: 143 tokenFeatures = self.getTokenFeatures(token, sentenceGraph) 144 for feature in tokenFeatures: 145 self.setFeature("e2_"+feature, 1) 146 if self.entity1 != None and self.entity2 != None: 147 entityCombination = "" 148 if self.entity1.get("isName") != None: 149 if self.entity1.get("isName") == "True": 150 entityCombination += "e1_Entity_" 151 else: 152 entityCombination += "e1_InteractionWord_" 153 if self.predictedRange != None: 154 self.buildPredictedValueFeatures(self.entity1, "e1") 155 else: 156 entityCombination += "e1_Entity_" 157 if self.entity2.get("isName") != None: 158 if self.entity2.get("isName") == "True": 159 entityCombination += "e2_Entity" 160 else: 161 entityCombination += "e2_InteractionWord" 162 if self.predictedRange != None: 163 self.buildPredictedValueFeatures(self.entity2, "e2") 164 else: 165 entityCombination += "e2_Entity" 166 self.setFeature(entityCombination, 1) 167 self.setFeature("eTypes_"+self.getEntityType(self.entity1)+"_"+self.getEntityType(self.entity2), 1) 168 169 if sentenceGraph.entityHeadTokenByEntity[self.entity1] == sentenceGraph.entityHeadTokenByEntity[self.entity2]: 170 self.setFeature("selfLoop", 1)
171 172 # def getPathIds(self, path): 173 # ids = path[0].get("id") + path[1].get("id") 174 175 # def pathsToIds(self, paths): 176 # for path in paths: 177 # for i in range(len(path)): 178 # path[i] = path[i].get("id") 179 # return paths 180 181 # def getEdges(self, graph, path): 182 # """ 183 # Builds a dictionary where edges are indexed by the indices of their 184 # start and end tokens in the path. F.e. to get the edges from path[1] 185 # to path[2] call return_value[1][2]. 186 # 187 # @type graph: Directed NetworkX graph 188 # @type path: list 189 # @param path: list of token elements 190 # """ 191 ## self.edgeCache = {} 192 ## ids = self.getPathIds(path) 193 ## if self.edgeCache.has_key(ids): 194 ## return self.edgeCache[ids] 195 # 196 # pathEdges = {} 197 # for i in range(0, len(path)): 198 # pathEdges[i] = {} 199 # for i in range(1, len(path)): 200 # pathEdges[i][i-1] = [] 201 # pathEdges[i-1][i] = [] 202 # #edges = graph.edges(data=True) 203 # edges = graph.edges 204 # #undirected = graph.toUndirected() 205 # for i in range(1, len(path)): 206 # pathEdges[i-1][i] = graph.getEdges(path[i-1], path[i]) 207 # pathEdges[i][i-1] = graph.getEdges(path[i], path[i-1]) 208 # #found = False 209 # #for edge in edges: 210 # ##edgeTuple = (edge[0], edge[1], edge[2]["element"]) 211 # #if edge[0] == path[i-1] and edge[1] == path[i]: 212 # # #pathEdges[i-1][i].append(edgeTuple) 213 # # pathEdges[i-1][i].append(edge) 214 # # found = True 215 # #elif edge[1] == path[i-1] and edge[0] == path[i]: 216 # # #pathEdges[i][i-1].append(edgeTuple) 217 # # pathEdges[i][i-1].append(edge) 218 # # found = True 219 ## assert(found==True), ("Path", 220 ## [x.get("id") for x in path], 221 ## "Nodes", 222 ## [x.get("id") for x in graph.nodes], 223 ## "Edges", 224 ## [(x[0].get("id"), x[1].get("id"), x[2].get("id")) for x in graph.edges], 225 ## "Undirected Nodes", 226 ## [x.get("id") for x in undirected.nodes], 227 ## "Undirected Edges", 228 ## [(x[0].get("id"), x[1].get("id"), x[2].get("id")) for x in undirected.edges], 229 ## "Paths", 230 ## self.pathsToIds(graph.getPaths(path[0], path[-1])), 231 ## "Undirected Paths", 232 ## self.pathsToIds(undirected.getPaths(path[0], path[-1])) 233 ## ) 234 ## self.edgeCache[ids] = pathEdges 235 # return pathEdges 236 237 # def getEdgeSet(self, graph, path): 238 # pathEdges = set() 239 # edges = graph.edges(data=True) 240 # for i in range(1, len(path)): 241 # for edge in edges: 242 # edgeTuple = (edge[0], edge[1], edge[2]["element"]) 243 # if edge[0] == path[i-1] and edge[1] == path[i]: 244 # pathEdges.add(edgeTuple) 245 # elif edge[1] == path[i-1] and edge[0] == path[i]: 246 # pathEdges.add(edgeTuple) 247 # return pathEdges 248 249 # def getEdgeCombinations(self, graph, path): 250 # if len(path) == 1: 251 # return set() 252 # 253 # pathEdges = self.getEdges(graph, path) 254 # 255 # #ids = self.getPathIds(path) 256 # #self.depPathCache[ids] = set() 257 # 258 # #if self.depPathCache.has_key(ids): 259 # # return self.depPathCache[ids] 260 # 261 # #self.depPathCache[ids] = set() 262 # depPaths = set() 263 # pathEdgeStrings = [] 264 # for i in range(1, len(path)): 265 # pathEdgeStrings.append([]) 266 # for e in pathEdges[i][i-1]: 267 # pathEdgeStrings[-1].append(e[2].get("type")+">") 268 # for e in pathEdges[i-1][i]: 269 # pathEdgeStrings[-1].append("<"+e[2].get("type")) 270 # combinations = combine.combine(*pathEdgeStrings) 271 # for combination in combinations: 272 # #self.depPathCache[ids].add( ".".join(combination) ) 273 # depPaths.add( ".".join(combination) ) 274 # #return self.depPathCache[ids] 275 # return depPaths 276 277 # def getWalks(self, pathTokens, pathEdges, position=1, walk=None): 278 # """ 279 # A path is defined by a list of tokens. But since there can be more than one edge 280 # between the same two tokens, there are multiple ways of getting from the first 281 # token to the last token. This function returns all of these "walks", i.e. the combinations 282 # of edges that can be travelled to get from the first to the last token of the path. 283 # """ 284 # allWalks = [] 285 # if walk == None: 286 # walk = [] 287 # 288 # edges = pathEdges[position-1][position] + pathEdges[position][position-1] 289 # for edge in edges: 290 # if position < len(pathTokens)-1: 291 # allWalks.extend(self.getWalks(pathTokens, pathEdges, position+1, walk + [edge])) 292 # else: 293 # allWalks.append(walk + [edge]) 294 # return allWalks 295
296 - def buildPathLengthFeatures(self, pathTokens):
297 """ 298 Simple numeric features about the length of the path 299 """ 300 self.setFeature("len_tokens_"+str(len(pathTokens)), 1) 301 self.setFeature("len", len(pathTokens))
302
303 - def buildSentenceFeatures(self, sentenceGraph):
304 textCounts = {} 305 for token in sentenceGraph.tokens: 306 texts = self.getTokenAnnotatedType(token, sentenceGraph) 307 #text = sentenceGraph.getTokenText(token) 308 for text in texts: 309 if not textCounts.has_key(text): 310 textCounts[text] = 0 311 textCounts[text] += 1 312 #for k, v in textCounts.iteritems(): 313 for key in sorted(textCounts.keys()): 314 self.setFeature("count_"+key, textCounts[key])
315
316 - def buildTerminusTokenFeatures(self, pathTokens, sentenceGraph):
317 """ 318 Token features for the first and last tokens of the path 319 """ 320 for feature in self.getTokenFeatures(pathTokens[0], sentenceGraph): 321 self.setFeature("tokTerm1_"+feature, 1) 322 for feature in self.getTokenFeatures(pathTokens[-1], sentenceGraph): 323 self.setFeature("tokTerm2_"+feature, 1)
324 325 #self.features[self.featureSet.getId("tokTerm1POS_"+pathTokens[0].attrib["POS"])] = 1 326 #self.features[self.featureSet.getId("tokTerm1txt_"+sentenceGraph.getTokenText(pathTokens[0]))] = 1 327 #self.features[self.featureSet.getId("tokTerm2POS_"+pathTokens[-1].attrib["POS"])] = 1 328 #self.features[self.featureSet.getId("tokTerm2txt_"+sentenceGraph.getTokenText(pathTokens[-1]))] = 1 329
330 - def buildWalkPaths(self, pathTokens, walks, sentenceGraph):
331 # t1 = self.getTokenAnnotatedType(pathTokens[0], sentenceGraph) 332 # t2 = self.getTokenAnnotatedType(pathTokens[-1], sentenceGraph) 333 internalTypes = "" 334 for token in pathTokens[0:-1]: 335 annTypes = self.getTokenAnnotatedType(token, sentenceGraph) 336 for annType in annTypes: 337 internalTypes += "_" + annType 338 internalTypes += "__" 339 self.setFeature("tokenPath"+internalTypes, 1)
340 341 # for walk in walks: 342 # edgeString = "" 343 # for edge in walk: 344 # edgeString += "_" + edge[2].attrib["type"] 345 # self.features[self.featureSet.getId("walkPath_"+t1+edgeString+"_"+t2)] = 1 346
347 - def buildPathGrams(self, length, pathTokens, sentenceGraph):
348 """ 349 Goes through all the possible walks and builds features for subsections 350 of "length" edges. 351 """ 352 #if pathEdges == None: 353 # return 354 355 t1 = self.getTokenAnnotatedType(pathTokens[0], sentenceGraph) 356 t2 = self.getTokenAnnotatedType(pathTokens[-1], sentenceGraph) 357 358 #walks = self.getWalks(pathTokens, pathEdges) 359 walks = sentenceGraph.dependencyGraph.getWalks(pathTokens) 360 #if len(walks) > 1: 361 # print "Path tokens", [x.get("id") for x in pathTokens] 362 # print "Walks", len(walks) 363 self.buildWalkPaths(pathTokens, walks, sentenceGraph) 364 dirGrams = [] 365 for walk in walks: 366 dirGrams.append("") 367 for i in range(len(pathTokens)-1): # len(pathTokens) == len(walk) 368 for j in range(len(walks)): 369 if walks[j][i][0] == pathTokens[i]: 370 dirGrams[j] += "F" 371 else: 372 assert walks[j][i][1] == pathTokens[i] 373 dirGrams[j] += "R" 374 if i >= length-1: 375 styleGram = dirGrams[j][i-(length-1):i+1] 376 edgeGram = "depGram_" + styleGram 377 # Label tokens by their role in the xgram 378 for token in pathTokens[i-(length-1)+1:i+1]: 379 for feature in self.getTokenFeatures(token, sentenceGraph, annotatedType=(self.maximum == True)): 380 self.setFeature("tok_"+styleGram+feature, 1) 381 # Label edges by their role in the xgram 382 position = 0 383 tokenTypeGram = "" 384 for edge in walks[j][i-(length-1):i+1]: 385 self.setFeature("dep_"+styleGram+str(position)+"_"+self.getEdgeType(edge[2]), 1) 386 position += 1 387 edgeGram += "_" + self.getEdgeType(edge[2]) 388 self.setFeature(edgeGram, 1) 389 for type1 in t1: 390 for type2 in t2: 391 self.setFeature(type1+"_"+edgeGram+"_"+type2, 1) 392 for dirGram in dirGrams: 393 self.setFeature("edge_directions_"+dirGram, 1)
394
395 - def addType(self, token, sentenceGraph, prefix="annType_"):
396 types = self.getTokenAnnotatedType(token, sentenceGraph) 397 for type in types: 398 self.setFeature(prefix+type, 1)
399
400 - def buildPathEdgeFeatures(self, pathTokens, sentenceGraph):
401 #if pathEdges == None: 402 # return 403 404 edgeList = [] 405 depGraph = sentenceGraph.dependencyGraph 406 pt = pathTokens 407 for i in range(1, len(pathTokens)): 408 edgeList.extend(depGraph.getEdges(pt[i], pt[i-1])) 409 edgeList.extend(depGraph.getEdges(pt[i-1], pt[i])) 410 #edgeList.extend(pathEdges[i][i-1]) 411 #edgeList.extend(pathEdges[i-1][i]) 412 for edge in edgeList: 413 depType = self.getEdgeType(edge[2]) 414 self.setFeature("dep_"+depType, 1) 415 # Token 1 416 self.setFeature("txt_"+sentenceGraph.getTokenText(edge[0]), 1) 417 self.setFeature("POS_"+edge[0].get("POS"), 1) 418 self.addType(edge[0], sentenceGraph, prefix="annType_") 419 # Token 2 420 self.setFeature("txt_"+sentenceGraph.getTokenText(edge[1]), 1) 421 self.setFeature("POS_"+edge[1].get("POS"), 1) 422 self.addType(edge[1], sentenceGraph, prefix="annType_") 423 424 # g-d features 425 gText = sentenceGraph.getTokenText(edge[0]) 426 dText = sentenceGraph.getTokenText(edge[1]) 427 gPOS = edge[0].get("POS") 428 dPOS = edge[1].get("POS") 429 gAT = "noAnnType" 430 dAT = "noAnnType" 431 if sentenceGraph.tokenIsEntityHead[edge[0]] != None: 432 gATs = self.getTokenAnnotatedType(edge[0], sentenceGraph) 433 if sentenceGraph.tokenIsEntityHead[edge[1]] != None: 434 dATs = self.getTokenAnnotatedType(edge[1], sentenceGraph) 435 self.setFeature("gov_"+gText+"_"+dText, 1) 436 self.setFeature("gov_"+gPOS+"_"+dPOS, 1) 437 for gAT in gATs: 438 for dAT in dATs: 439 self.setFeature("gov_"+gAT+"_"+dAT, 1) 440 441 for gAT in gATs: 442 self.setFeature("triple_"+gAT+"_"+depType+"_"+dAT, 1)
443 #self.features[self.featureSet.getId("triple_"+gPOS+"_"+depType+"_"+dPOS)] = 1 444 #self.features[self.featureSet.getId("triple_"+gText+"_"+depType+"_"+dText)] = 1 445 446 # # Features for edge-type/token combinations that define the governor/dependent roles 447 # self.features[self.featureSet.getId("depgov_"+depType+"_"+dText)] = 1 448 # self.features[self.featureSet.getId("depgov_"+depType+"_"+dPOS)] = 1 449 # self.features[self.featureSet.getId("depgov_"+depType+"_"+dAT)] = 1 450 # self.features[self.featureSet.getId("depdep_"+gText+"_"+depType)] = 1 451 # self.features[self.featureSet.getId("depdep_"+gPOS+"_"+depType)] = 1 452 # self.features[self.featureSet.getId("depdep_"+gAT+"_"+depType)] = 1 453
454 - def buildSingleElementFeatures(self, pathTokens, sentenceGraph):
455 depGraph = sentenceGraph.dependencyGraph 456 pt = pathTokens 457 # Edges directed relative to the path 458 for i in range(1,len(pathTokens)): 459 #if pathEdges != None: 460 #for edge in pathEdges[i][i-1]: 461 for edge in depGraph.getEdges(pt[i], pt[i-1]): 462 depType = self.getEdgeType(edge[2]) 463 self.setFeature("dep_"+depType+"Forward_", 1) 464 #for edge in pathEdges[i-1][i]: 465 for edge in depGraph.getEdges(pt[i-1], pt[i]): 466 depType = self.getEdgeType(edge[2]) 467 self.setFeature("dep_Reverse_"+depType, 1) 468 469 # Internal tokens 470 for i in range(1,len(pathTokens)-1): 471 self.setFeature("internalPOS_"+pathTokens[i].get("POS"), 1) 472 self.setFeature("internalTxt_"+sentenceGraph.getTokenText(pathTokens[i]), 1) 473 # Internal dependencies 474 for i in range(2,len(pathTokens)-1): 475 #if pathEdges != None: 476 #for edge in pathEdges[i][i-1]: 477 for edge in depGraph.getEdges(pt[i], pt[i-1]): 478 self.setFeature("internalDep_"+self.getEdgeType(edge[2]), 1) 479 #for edge in pathEdges[i-1][i]: 480 for edge in depGraph.getEdges(pt[i-1], pt[i]): 481 self.setFeature("internalDep_"+self.getEdgeType(edge[2]), 1)
482 483 # def buildEdgeCombinations(self, pathTokens, sentenceGraph): 484 # 485 ## if edges[0][1]: 486 ## features[self.featureSet.getId("internalPOS_"+edges[0][0][0].attrib["POS"])]=1 487 ## features[self.featureSet.getId("internalTxt_"+sentenceGraph.getTokenText(edges[0][0][0]))]=1 488 ## else: 489 ## features[self.featureSet.getId("internalPOS_"+edges[0][0][1].attrib["POS"])]=1 490 ## features[self.featureSet.getId("internalTxt_"+sentenceGraph.getTokenText(edges[0][0][1]))]=1 491 ## if edges[-1][1]: 492 ## features[self.featureSet.getId("internalPOS_"+edges[-1][0][1].attrib["POS"])]=1 493 ## features[self.featureSet.getId("internalTxt_"+sentenceGraph.getTokenText(edges[-1][0][1]))]=1 494 ## else: 495 ## features[self.featureSet.getId("internalPOS_"+edges[-1][0][0].attrib["POS"])]=1 496 ## features[self.featureSet.getId("internalTxt_"+sentenceGraph.getTokenText(edges[-1][0][0]))]=1 497 ## for i in range(1,len(edges)-1): 498 ## features[self.featureSet.getId("internalPOS_"+edges[i][0][0].attrib["POS"])]=1 499 ## features[self.featureSet.getId("internalTxt_"+sentenceGraph.getTokenText(edges[i][0][0]))]=1 500 ## features[self.featureSet.getId("internalPOS_"+edges[i][0][1].attrib["POS"])]=1 501 ## features[self.featureSet.getId("internalTxt_"+sentenceGraph.getTokenText(edges[i][0][1]))]=1 502 ## features[self.featureSet.getId("internalDep_"+edges[i][0][2].attrib["type"])]=1 503 # 504 # return 505 # # Edge bigrams 506 # for i in range(1,len(pathTokens)-1): 507 # edgesForward1 = pathEdges[i][i-1] 508 # edgesReverse1 = pathEdges[i-1][i] 509 # edgesForward2 = pathEdges[i][i+1] 510 # edgesReverse2 = pathEdges[i+1][i] 511 # for e1 in edgesForward1: 512 # for e2 in edgesForward2: 513 # self.setFeature("dep_"+e1[2].get("type")+">"+e2[2].get("type")+">", 1) 514 # for e1 in edgesReverse1: 515 # for e2 in edgesReverse2: 516 # self.setFeature("dep_"+e1[2].get("type")+"<"+e2[2].get("type")+"<", 1) 517 # for e1 in edgesForward1: 518 # for e2 in edgesReverse2: 519 # self.setFeature("dep_"+e1[2].get("type")+">"+e2[2].get("type")+"<", 1) 520 # for e1 in edgesReverse1: 521 # for e2 in edgesForward2: 522 # self.setFeature("dep_"+e1[2].get("type")+"<"+e2[2].get("type")+">", 1) 523 # 524 ## for i in range(1,len(edges)): 525 ## type1 = edges[i-1][0][2].attrib["type"] 526 ## type2 = edges[i][0][2].attrib["type"] 527 ## if edges[i-1][1] and edges[i][1]: 528 ## features[self.featureSet.getId("dep_"+type1+">"+type2+">")] = 1 529 ## elif edges[i-1][1] and edges[i][0]: 530 ## features[self.featureSet.getId("dep_"+type1+">"+type2+"<")] = 1 531 ## elif edges[i-1][0] and edges[i][0]: 532 ## features[self.featureSet.getId("dep_"+type1+"<"+type2+"<")] = 1 533 ## elif edges[i-1][0] and edges[i][1]: 534 ## features[self.featureSet.getId("dep_"+type1+"<"+type2+">")] = 1 535
536 - def buildTerminusFeatures(self, token, ignoreEdges, prefix, sentenceGraph):
537 # Attached edges 538 #inEdges = sentenceGraph.dependencyGraph.in_edges(token) 539 inEdges = sentenceGraph.dependencyGraph.getInEdges(token) 540 for edge in inEdges: 541 if edge in ignoreEdges: 542 continue 543 self.setFeature(prefix+"HangingIn_"+self.getEdgeType(edge[2]), 1) 544 for feature in self.getTokenFeatures(edge[0], sentenceGraph): 545 self.setFeature(prefix+"HangingIn_"+feature, 1) 546 #outEdges = sentenceGraph.dependencyGraph.out_edges(token) 547 outEdges = sentenceGraph.dependencyGraph.getOutEdges(token) 548 for edge in outEdges: 549 if edge in ignoreEdges: 550 continue 551 self.setFeature(prefix+"HangingOut_"+self.getEdgeType(edge[2]), 1) 552 for feature in self.getTokenFeatures(edge[1], sentenceGraph): 553 self.setFeature(prefix+"HangingOut_"+feature, 1)
554