Package TEES :: Package ExampleBuilders :: Module UnmergingExampleBuilder
[hide private]

Source Code for Module TEES.ExampleBuilders.UnmergingExampleBuilder

  1  """ 
  2  Edge Examples 
  3  """ 
  4  __version__ = "$Revision: 1.13 $" 
  5   
  6  import sys, os 
  7  thisPath = os.path.dirname(os.path.abspath(__file__)) 
  8  sys.path.append(os.path.abspath(os.path.join(thisPath,".."))) 
  9  from ExampleBuilders.ExampleBuilder import ExampleBuilder 
 10  from Core.IdSet import IdSet 
 11  import Core.ExampleUtils as ExampleUtils 
 12  from FeatureBuilders.MultiEdgeFeatureBuilder import MultiEdgeFeatureBuilder 
 13  from FeatureBuilders.TriggerFeatureBuilder import TriggerFeatureBuilder 
 14  #from FeatureBuilders.TokenFeatureBuilder import TokenFeatureBuilder 
 15  from Core.SimpleGraph import Graph 
 16  from Utils.ProgressCounter import ProgressCounter 
 17  import Utils.Libraries.combine as combine 
 18  import Utils.ElementTreeUtils as ETUtils 
 19  import gzip 
 20  import types 
 21   
22 -def combinations(iterable, r):
23 # combinations('ABCD', 2) --> AB AC AD BC BD CD 24 # combinations(range(4), 3) --> 012 013 023 123 25 pool = tuple(iterable) 26 n = len(pool) 27 if r > n: 28 return 29 indices = range(r) 30 yield tuple(pool[i] for i in indices) 31 while True: 32 for i in reversed(range(r)): 33 if indices[i] != i + n - r: 34 break 35 else: 36 return 37 indices[i] += 1 38 for j in range(i+1, r): 39 indices[j] = indices[j-1] + 1 40 yield tuple(pool[i] for i in indices)
41
42 -def compareInteractionPrecedence(e1, e2):
43 """ 44 e1/e2 = (interaction, pathdist, lindist, tok2pos) 45 """ 46 if e1[1] > e2[1]: 47 return 1 48 elif e1[1] < e2[1]: 49 return -1 50 else: # same dependency distance 51 if e1[2] > e2[2]: 52 return 1 53 elif e1[2] < e2[2]: 54 return -1 55 else: # same linear distance 56 if e1[3] > e2[3]: 57 return 1 58 elif e1[3] < e2[3]: 59 return -1 60 else: # same head token for entity 2 61 return 0
62 #assert False, ("Precedence error",e1,e2) 63
64 -class UnmergingExampleBuilder(ExampleBuilder):
65 """ 66 This example builder makes unmerging examples, i.e. examples describing 67 potential events. 68 """ 69 #def __init__(self, style="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures", length=None, types=[], featureSet=None, classSet=None):
70 - def __init__(self, style=None, length=None, types=[], featureSet=None, classSet=None):
71 # reset style regardless of input 72 #style="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures" 73 if featureSet == None: 74 featureSet = IdSet() 75 if classSet == None: 76 classSet = IdSet(1) 77 else: 78 classSet = classSet 79 assert( classSet.getId("neg") == 1 ) 80 81 ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) 82 83 self.styles = self._setDefaultParameters(["trigger_features","typed","directed","no_linear","entities","genia_limits", 84 "noAnnType", "noMasking", "maxFeatures", "no_merge", "disable_entity_features", 85 "disable_single_element_features", "disable_ngram_features", "disable_path_edge_features"]) 86 self.styles = self.getParameters(style) 87 self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) 88 self.multiEdgeFeatureBuilder.noAnnType = self.styles["noAnnType"] 89 self.multiEdgeFeatureBuilder.maskNamedEntities = not self.styles["noMasking"] 90 self.multiEdgeFeatureBuilder.maximum = self.styles["maxFeatures"] 91 #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) 92 self.pathLengths = length 93 assert(self.pathLengths == None) 94 self.types = types 95 96 self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) 97 self.triggerFeatureBuilder.useNonNameEntities = True
98 99 #self.outFile = open("exampleTempFile.txt","wt") 100
101 - def getInteractionEdgeLengths(self, sentenceGraph, paths):
102 """ 103 Return dependency and linear length of all interaction edges 104 (measured between the two tokens). 105 """ 106 interactionLengths = {} 107 for interaction in sentenceGraph.interactions: 108 # Calculated interaction edge dep and lin length 109 e1 = sentenceGraph.entitiesById[interaction.get("e1")] 110 e2 = sentenceGraph.entitiesById[interaction.get("e2")] 111 t1 = sentenceGraph.entityHeadTokenByEntity[e1] 112 t2 = sentenceGraph.entityHeadTokenByEntity[e2] 113 # Get dep path length 114 if t1 != t2: 115 path = paths.getPaths(t1, t2) 116 if t1 != t2 and len(path) > 0: 117 pathLength = min(len(x) for x in path) #len(paths[t1][t2]) 118 else: # no dependencyPath 119 pathLength = 999999 # more than any real path 120 # Linear distance 121 t1Pos = -1 122 t2Pos = -1 123 for i in range(len(sentenceGraph.tokens)): 124 if sentenceGraph.tokens[i] == t1: 125 t1Pos = i 126 if t2Pos != -1: 127 break 128 if sentenceGraph.tokens[i] == t2: 129 t2Pos = i 130 if t1Pos != -1: 131 break 132 linLength = abs(t1Pos - t2Pos) 133 interactionLengths[interaction] = (interaction, pathLength, linLength, t2Pos) 134 return interactionLengths
135
136 - def eventIsGold(self, entity, arguments, sentenceGraph, goldGraph, goldEntitiesByOffset):
137 offset = entity.get("headOffset") 138 if not goldEntitiesByOffset.has_key(offset): 139 return False 140 eType = entity.get("type") 141 goldEntities = goldEntitiesByOffset[offset] 142 143 # Check all gold entities for a match 144 for goldEntity in goldEntities: 145 isGold = True 146 147 # The entity type must match 148 if goldEntity.get("type") != eType: 149 isGold = False 150 continue 151 goldEntityId = goldEntity.get("id") 152 153 # Collect the gold interactions 154 goldInteractions = [] 155 for goldInteraction in goldGraph.interactions: 156 if goldInteraction.get("e1") == goldEntityId: 157 goldInteractions.append(goldInteraction) 158 159 # Argument count rules 160 if len(goldInteractions) != len(arguments): # total number of edges differs 161 isGold = False 162 continue 163 # count number of edges per type 164 argTypeCounts = {} 165 for argument in arguments: 166 argType = argument.get("type") 167 if not argTypeCounts.has_key(argType): argTypeCounts[argType] = 0 168 argTypeCounts[argType] += 1 169 # count number of gold edges per type 170 goldTypeCounts = {} 171 for argument in goldInteractions: 172 argType = argument.get("type") 173 if not goldTypeCounts.has_key(argType): goldTypeCounts[argType] = 0 174 goldTypeCounts[argType] += 1 175 # argument edge counts per type must match 176 if argTypeCounts != goldTypeCounts: 177 isGold = False 178 continue 179 180 # Exact argument matching 181 for argument in arguments: # check all edges 182 e1 = argument.get("e1") 183 e2 = argument.get("e2") 184 e2Entity = sentenceGraph.entitiesById[e2] 185 e2Offset = e2Entity.get("headOffset") 186 e2Type = e2Entity.get("type") 187 argType = argument.get("type") 188 189 found = False 190 for goldInteraction in goldInteractions: 191 if goldInteraction.get("type") == argType: 192 goldE2Entity = goldGraph.entitiesById[goldInteraction.get("e2")] 193 if goldE2Entity.get("headOffset") == e2Offset and goldE2Entity.get("type") == e2Type: 194 found = True 195 break 196 if found == False: # this edge did not have a corresponding gold edge 197 isGold = False 198 break 199 200 # Event is in gold 201 if isGold: 202 break 203 204 return isGold
205
206 - def getArgumentCombinations(self, eType, interactions, entityId=None):
207 combs = [] 208 if eType == "Binding": 209 # Making examples for only all-together/all-separate cases 210 # doesn't work, since even gold data has several cases of 211 # overlapping bindings with different numbers of arguments 212 #if len(interactions) > 0: 213 # return [interactions] 214 #else: 215 # return interactions 216 217 # Skip causes 218 themes = [] 219 for interaction in interactions: 220 if interaction.get("type") == "Theme": 221 themes.append(interaction) 222 223 for i in range(len(themes)): 224 # Looking at a2-normalize.pl reveals that there can be max 6 themes 225 # Based on training+devel data, four is maximum 226 if i < 10: #4: 227 for j in combinations(themes, i+1): 228 combs.append(j) 229 # if len(combs) >= 100: 230 # print >> sys.stderr, "Warning, truncating unmerging examples at 100 for Binding entity", entityId 231 # break 232 return combs 233 elif eType == "Process": # For ID-task 234 argCombinations = [] 235 argCombinations.append([]) # process can have 0 interactions 236 for interaction in interactions: 237 if interaction.get("type") == "Participant": 238 argCombinations.append([interaction]) 239 return argCombinations 240 else: # one of the regulation-types, or one of the simple types 241 themes = [] 242 causes = [] 243 siteArgs = [] 244 contextGenes = [] 245 sideChains = [] 246 locTargets = [] 247 for interaction in interactions: 248 iType = interaction.get("type") 249 #assert iType in ["Theme", "Cause"], (iType, ETUtils.toStr(interaction)) 250 if iType not in ["Theme", "Cause", "SiteArg", "Contextgene", "Sidechain"]: # "AtLoc", "ToLoc"]: 251 continue 252 if iType == "Theme": 253 themes.append(interaction) 254 elif iType == "Cause": 255 causes.append(interaction) 256 elif iType == "SiteArg": 257 siteArgs.append(interaction) 258 elif iType == "Contextgene": 259 contextGenes.append(interaction) 260 elif iType == "Sidechain": 261 sideChains.append(interaction) 262 elif iType in ["AtLoc", "ToLoc"]: 263 locTargets.append(iType) 264 else: 265 assert False, (iType, interaction.get("id")) 266 # Limit arguments to event types that can have them 267 if eType.find("egulation") == -1 and eType != "Catalysis": 268 causes = [] 269 if eType != "Glycosylation": sideChains = [] 270 if eType not in ["Acetylation", "Methylation"]: contextGenes = [] 271 if eType == "Catalysis": siteArgs = [] 272 # Themes can always appear alone 273 themeAloneCombinations = [] 274 for theme in themes: 275 themeAloneCombinations.append([theme]) 276 #print "Combine", combine.combine(themes, causes), "TA", themeAloneCombinations 277 return combine.combine(themes, causes) \ 278 + combine.combine(themes, siteArgs) \ 279 + combine.combine(themes, sideChains) \ 280 + combine.combine(themes, contextGenes) \ 281 + combine.combine(themes, siteArgs, sideChains) \ 282 + combine.combine(themes, siteArgs, contextGenes) \ 283 + combine.combine(themes, locTargets) \ 284 + themeAloneCombinations
285 286 # The predicted value range is not used in the features the UnmergingExampleBuilder gets 287 # from the MultiEdgeFeatureBuilder 288 # def definePredictedValueRange(self, sentences, elementName): 289 # self.multiEdgeFeatureBuilder.definePredictedValueRange(sentences, elementName) 290 # 291 # def getPredictedValueRange(self): 292 # return self.multiEdgeFeatureBuilder.predictedRange 293
294 - def sortInteractionsById(self, interactions):
295 # The order of the interactions affects the order of the unmerging examples, and this 296 # affects performance. It's not clear whether this is what really happens, or whether 297 # the order of the interactions has some effect on the consistency of the unmerging 298 # features (it shouldn't). However, in case it does, this function is left here for now, 299 # although it shouldn't be needed at all. In any case the impact is minimal, for GE 300 # 53.22 vs 53.28 on the development set. 301 pairs = [] 302 for interaction in interactions: 303 pairs.append( (int(interaction.get("id").split(".i")[-1]), interaction) ) 304 pairs.sort() 305 return [x[1] for x in pairs]
306
307 - def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph=None):
308 """ 309 Build examples for a single sentence. Returns a list of examples. 310 See Core/ExampleUtils for example format. 311 """ 312 self.multiEdgeFeatureBuilder.setFeatureVector(resetCache=True) 313 self.triggerFeatureBuilder.initSentence(sentenceGraph) 314 315 #examples = [] 316 exampleIndex = 0 317 318 #undirected = self.nxMultiDiGraphToUndirected(sentenceGraph.dependencyGraph) 319 #paths = NX10.all_pairs_shortest_path(undirected, cutoff=999) 320 undirected = sentenceGraph.dependencyGraph.toUndirected() 321 paths = undirected 322 323 # Get argument order 324 self.interactionLenghts = self.getInteractionEdgeLengths(sentenceGraph, paths) 325 326 # Map tokens to character offsets 327 tokenByOffset = {} 328 for i in range(len(sentenceGraph.tokens)): 329 token = sentenceGraph.tokens[i] 330 if goldGraph != None: # check that the tokenizations match 331 goldToken = goldGraph.tokens[i] 332 assert token.get("id") == goldToken.get("id") and token.get("charOffset") == goldToken.get("charOffset") 333 tokenByOffset[token.get("charOffset")] = token.get("id") 334 335 # Map gold entities to their head offsets 336 goldEntitiesByOffset = {} 337 if goldGraph != None: 338 for entity in goldGraph.entities: 339 offset = entity.get("headOffset") 340 assert offset != None 341 if not goldEntitiesByOffset.has_key(offset): 342 goldEntitiesByOffset[offset] = [] 343 goldEntitiesByOffset[offset].append(entity) 344 345 # Generate examples based on interactions between entities or interactions between tokens 346 # interactionsByEntityId = {} 347 # for entity in sentenceGraph.entities: 348 # interactionsByEntityId[entity.get("id")] = [] 349 # for interaction in sentenceGraph.interactions: 350 # if interaction.get("type") == "neg": 351 # continue 352 # e1Id = interaction.get("e1") 353 # interactionsByEntityId[e1Id].append(interaction) 354 if self.styles["no_merge"]: 355 mergeInput = False 356 entities = sentenceGraph.entities 357 else: 358 mergeInput = True 359 sentenceGraph.mergeInteractionGraph(True) 360 entities = sentenceGraph.mergedEntities 361 self.exampleStats.addValue("Duplicate entities skipped", len(sentenceGraph.entities) - len(entities)) 362 363 exampleIndex = 0 364 for entity in entities: # sentenceGraph.entities: 365 eType = entity.get("type") 366 assert eType != None, entity.attrib 367 eType = str(eType) 368 #if eType not in ["Binding", "Positive_regulation", "Negative_regulation", "Regulation"]: 369 # continue 370 371 #if not goldEntitiesByOffset.has_key(entity.get("headOffset")): 372 # continue 373 374 #interactions = interactionsByEntityId[entity.get("id")] 375 interactions = [x[2] for x in sentenceGraph.getOutInteractions(entity, mergeInput)] 376 interactions = self.sortInteractionsById(interactions) 377 argCombinations = self.getArgumentCombinations(eType, interactions, entity.get("id")) 378 #if len(argCombinations) <= 1: 379 # continue 380 assert argCombinations != None, (entity.get("id"), entity.get("type")) 381 for argCombination in argCombinations: 382 if eType != "Process": 383 assert len(argCombination) > 0, eType + ": " + str(argCombinations) 384 # Originally binary classification 385 if goldGraph != None: 386 isGoldEvent = self.eventIsGold(entity, argCombination, sentenceGraph, goldGraph, goldEntitiesByOffset) 387 #if eType == "Binding": 388 # print argCombination[0].get("e1"), len(argCombination), isGoldEvent 389 else: 390 isGoldEvent = False 391 # Named (multi-)class 392 if isGoldEvent: 393 #category = "event" 394 category = eType 395 if category.find("egulation") != -1: 396 category = "All_regulation" 397 elif category != "Binding": 398 category = "Other" #"simple6" 399 else: 400 category = "neg" 401 402 features = {} 403 404 argString = "" 405 for arg in argCombination: 406 argString += "," + arg.get("id") 407 extra = {"xtype":"um","e":entity.get("id"),"i":argString[1:],"etype":eType,"class":category} 408 assert type(extra["etype"]) == types.StringType, extra 409 self.exampleStats.addExample(category) 410 example = self.buildExample(sentenceGraph, paths, entity, argCombination, interactions) 411 example[0] = sentenceGraph.getSentenceId()+".x"+str(exampleIndex) 412 example[1] = self.classSet.getId(category) 413 example[3] = extra 414 #examples.append( example ) 415 ExampleUtils.appendExamples([example], outfile) 416 exampleIndex += 1 417 418 #return examples 419 return exampleIndex
420
421 - def buildExample(self, sentenceGraph, paths, eventEntity, argCombination, allInteractions): #themeEntities, causeEntities=None):
422 # NOTE!!!! TODO 423 # add also features for arguments present, but not in this combination 424 425 features = {} 426 self.features = features 427 428 self.buildInterArgumentBagOfWords(argCombination, sentenceGraph) 429 430 eventEntityType = eventEntity.get("type") 431 if eventEntityType == "Binding": 432 interactionIndex = {} 433 groupInteractionLengths = [] 434 for interaction in allInteractions: 435 groupInteractionLengths.append(self.interactionLenghts[interaction]) 436 groupInteractionLengths.sort(compareInteractionPrecedence) 437 #print groupInteractionLengths 438 for i in range(len(groupInteractionLengths)): 439 interactionIndex[groupInteractionLengths[i][0]] = i 440 441 eventToken = sentenceGraph.entityHeadTokenByEntity[eventEntity] 442 self.triggerFeatureBuilder.setFeatureVector(self.features) 443 self.triggerFeatureBuilder.tag = "trg_" 444 self.triggerFeatureBuilder.buildFeatures(eventToken) 445 self.triggerFeatureBuilder.tag = None 446 447 #self.setFeature("rootType_"+eventEntity.get("type"), 1) 448 449 argThemeCount = 0 450 argCauseCount = 0 451 argCounts = {} 452 # Current example's edge combination 453 for arg in argCombination: 454 if arg.get("type") == "Theme": 455 argThemeCount += 1 456 tag = "argTheme" 457 self.buildArgumentFeatures(sentenceGraph, paths, features, eventToken, arg, tag) 458 if eventEntityType == "Binding": 459 tag += str(interactionIndex[arg]) 460 self.buildArgumentFeatures(sentenceGraph, paths, features, eventToken, arg, tag) 461 elif arg.get("type") == "Cause": # Cause 462 argCauseCount += 1 463 self.buildArgumentFeatures(sentenceGraph, paths, features, eventToken, arg, "argCause") 464 else: 465 argType = arg.get("type") 466 if argType not in argCounts: argCounts[argType] = 0 467 self.buildArgumentFeatures(sentenceGraph, paths, features, eventToken, arg, "arg"+argType) 468 argCounts[argType] += 1 469 470 # Edge group context 471 contextThemeCount = 0 472 contextCauseCount = 0 473 for interaction in allInteractions: 474 if interaction in argCombination: # Already part of current example's combination 475 continue 476 if interaction.get("type") == "Theme": 477 contextThemeCount += 1 478 tag = "conTheme" 479 self.buildArgumentFeatures(sentenceGraph, paths, features, eventToken, interaction, tag) 480 if eventEntityType == "Binding": 481 tag += str(interactionIndex[interaction]) 482 self.buildArgumentFeatures(sentenceGraph, paths, features, eventToken, interaction, tag) 483 else: # Cause 484 contextCauseCount += 1 485 self.buildArgumentFeatures(sentenceGraph, paths, features, eventToken, interaction, "conCause") 486 487 self.setFeature("argCount", len(argCombination)) 488 self.setFeature("argCount_" + str(len(argCombination)), 1) 489 self.setFeature("interactionCount", len(allInteractions)) 490 self.setFeature("interactionCount_" + str(len(allInteractions)), 1) 491 492 self.setFeature("argThemeCount", argThemeCount) 493 self.setFeature("argThemeCount_" + str(argThemeCount), 1) 494 self.setFeature("argCauseCount", argCauseCount) 495 self.setFeature("argCauseCount_" + str(argCauseCount), 1) 496 for key in sorted(argCounts.keys()): 497 self.setFeature("arg" + key + "Count", argCounts[key]) 498 self.setFeature("arg" + key + "Count_" + str(argCounts[key]), 1) 499 500 self.setFeature("interactionThemeCount", contextThemeCount) 501 self.setFeature("interactionThemeCount_" + str(contextThemeCount), 1) 502 self.setFeature("interactionCauseCount", contextCauseCount) 503 self.setFeature("interactionCauseCount_" + str(contextCauseCount), 1) 504 505 self.triggerFeatureBuilder.tag = "" 506 self.triggerFeatureBuilder.setFeatureVector(None) 507 508 # Common features 509 # if e1Type.find("egulation") != -1: # leave r out to avoid problems with capitalization 510 # if entity2.get("isName") == "True": 511 # features[self.featureSet.getId("GENIA_regulation_of_protein")] = 1 512 # else: 513 # features[self.featureSet.getId("GENIA_regulation_of_event")] = 1 514 515 # define extra attributes 516 return [None,None,features,None]
517
518 - def buildArgumentFeatures(self, sentenceGraph, paths, features, eventToken, arg, tag):
519 argEntity = sentenceGraph.entitiesById[arg.get("e2")] 520 argToken = sentenceGraph.entityHeadTokenByEntity[argEntity] 521 self.buildEdgeFeatures(sentenceGraph, paths, features, eventToken, argToken, tag) 522 self.triggerFeatureBuilder.tag = tag + "trg_" 523 self.triggerFeatureBuilder.buildFeatures(argToken) 524 if argEntity.get("isName") == "True": 525 self.setFeature(tag+"Protein", 1) 526 else: 527 self.setFeature(tag+"Event", 1) 528 self.setFeature("nestingEvent", 1) 529 self.setFeature(tag+"_"+argEntity.get("type"), 1)
530
531 - def buildEdgeFeatures(self, sentenceGraph, paths, features, eventToken, argToken, tag):
532 #eventToken = sentenceGraph.entityHeadTokenByEntity[eventNode] 533 #argToken = sentenceGraph.entityHeadTokenByEntity[argNode] 534 self.multiEdgeFeatureBuilder.tag = tag + "_" 535 self.multiEdgeFeatureBuilder.setFeatureVector(features, None, None, False) 536 537 self.setFeature(tag+"_present", 1) 538 539 path = paths.getPaths(eventToken, argToken) 540 if eventToken != argToken and len(path) > 0: 541 path = path[0] 542 else: 543 path = [eventToken, argToken] 544 #edges = None 545 546 if not self.styles["disable_entity_features"]: 547 self.multiEdgeFeatureBuilder.buildEntityFeatures(sentenceGraph) 548 self.multiEdgeFeatureBuilder.buildPathLengthFeatures(path) 549 #if not "disable_terminus_features" in self.styles: 550 # self.multiEdgeFeatureBuilder.buildTerminusTokenFeatures(path, sentenceGraph) # remove for fast 551 if not self.styles["disable_single_element_features"]: 552 self.multiEdgeFeatureBuilder.buildSingleElementFeatures(path, sentenceGraph) 553 if not self.styles["disable_ngram_features"]: 554 self.multiEdgeFeatureBuilder.buildPathGrams(2, path, sentenceGraph) # remove for fast 555 self.multiEdgeFeatureBuilder.buildPathGrams(3, path, sentenceGraph) # remove for fast 556 self.multiEdgeFeatureBuilder.buildPathGrams(4, path, sentenceGraph) # remove for fast 557 if not self.styles["disable_path_edge_features"]: 558 self.multiEdgeFeatureBuilder.buildPathEdgeFeatures(path, sentenceGraph) 559 #self.multiEdgeFeatureBuilder.buildSentenceFeatures(sentenceGraph) 560 self.multiEdgeFeatureBuilder.setFeatureVector(None, None, None, False) 561 self.multiEdgeFeatureBuilder.tag = ""
562
563 - def buildInterArgumentBagOfWords(self, arguments, sentenceGraph):
564 if len(arguments) < 2: 565 return 566 567 indexByToken = {} 568 for i in range(len(sentenceGraph.tokens)): 569 indexByToken[sentenceGraph.tokens[i]] = i 570 571 argTokenIndices = set() 572 for arg in arguments: 573 argEntity = sentenceGraph.entitiesById[arg.get("e2")] 574 argToken = sentenceGraph.entityHeadTokenByEntity[argEntity] 575 argTokenIndices.add(indexByToken[argToken]) 576 minIndex = min(argTokenIndices) 577 maxIndex = max(argTokenIndices) 578 self.setFeature("argBoWRange", (maxIndex-minIndex)) 579 self.setFeature("argBoWRange_" + str(maxIndex-minIndex), 1) 580 bow = set() 581 for i in range(minIndex+1, maxIndex): 582 token = sentenceGraph.tokens[i] 583 if len(sentenceGraph.tokenIsEntityHead[token]) == 0 and not sentenceGraph.tokenIsName[token]: 584 bow.add(token.get("text")) 585 bow = sorted(list(bow)) 586 for word in bow: 587 self.setFeature("argBoW_"+word, 1) 588 if word in ["/", "-"]: 589 self.setFeature("argBoW_slashOrHyphen", 1) 590 if len(bow) == 1: 591 self.setFeature("argBoWonly_"+bow[0], 1) 592 if bow[0] in ["/", "-"]: 593 self.setFeature("argBoWonly_slashOrHyphen", 1)
594