1 """
2 Edge Examples
3 """
4
5 import sys, os
6 thisPath = os.path.dirname(os.path.abspath(__file__))
7 sys.path.append(os.path.abspath(os.path.join(thisPath,"..")))
8 from ExampleBuilders.ExampleBuilder import ExampleBuilder
9 from Core.IdSet import IdSet
10 import Core.ExampleUtils as ExampleUtils
11 from FeatureBuilders.MultiEdgeFeatureBuilder import MultiEdgeFeatureBuilder
12 from FeatureBuilders.TokenFeatureBuilder import TokenFeatureBuilder
13 from FeatureBuilders.BioInferOntologyFeatureBuilder import BioInferOntologyFeatureBuilder
14 from FeatureBuilders.NodalidaFeatureBuilder import NodalidaFeatureBuilder
15 from FeatureBuilders.BacteriaRenamingFeatureBuilder import BacteriaRenamingFeatureBuilder
16 from FeatureBuilders.RELFeatureBuilder import RELFeatureBuilder
17 from FeatureBuilders.DrugFeatureBuilder import DrugFeatureBuilder
18 from FeatureBuilders.EVEXFeatureBuilder import EVEXFeatureBuilder
19 from FeatureBuilders.GiulianoFeatureBuilder import GiulianoFeatureBuilder
20
21 from Core.SimpleGraph import Graph
22 from FeatureBuilders.TriggerFeatureBuilder import TriggerFeatureBuilder
23 import Utils.Range as Range
24 from multiprocessing import Process
25
26
27 import Evaluators.EvaluateInteractionXML as EvaluateInteractionXML
28
30 """
31 This example builder makes edge examples, i.e. examples describing
32 the event arguments.
33 """
34 - def __init__(self, style=None, length=None, types=[], featureSet=None, classSet=None):
35 if featureSet == None:
36 featureSet = IdSet()
37 if classSet == None:
38 classSet = IdSet(1)
39 else:
40 classSet = classSet
41 assert( classSet.getId("neg") == 1 or (len(classSet.Ids)== 2 and classSet.getId("neg") == -1) )
42
43 ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
44
45 self._setDefaultParameters([
46 "typed", "directed", "headsOnly", "graph_kernel", "noAnnType", "noMasking", "maxFeatures",
47 "genia_limits", "epi_limits", "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits",
48 "genia_task1", "ontology", "nodalida", "bacteria_renaming", "trigger_features", "rel_features",
49 "ddi_features", "ddi_mtmx", "evex", "giuliano", "random", "themeOnly", "causeOnly", "no_path", "entities",
50 "skip_extra_triggers", "headsOnly", "graph_kernel", "trigger_features", "no_task", "no_dependency",
51 "disable_entity_features", "disable_terminus_features", "disable_single_element_features",
52 "disable_ngram_features", "disable_path_edge_features", "no_linear", "subset", "binary", "pos_only",
53 "entity_type", "filter_shortest_path", "maskTypeAsProtein"])
54 self.styles = self.getParameters(style)
55 if style == None:
56 style["typed"] = style["directed"] = style["headsOnly"] = True
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet, self.styles)
73
74
75 if self.styles["graph_kernel"]:
76 from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
77 self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet)
78 if self.styles["noAnnType"]:
79 self.multiEdgeFeatureBuilder.noAnnType = True
80 if self.styles["noMasking"]:
81 self.multiEdgeFeatureBuilder.maskNamedEntities = False
82 if self.styles["maxFeatures"]:
83 self.multiEdgeFeatureBuilder.maximum = True
84 if self.styles["genia_task1"]:
85 self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity")
86 self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
87 if self.styles["ontology"]:
88 self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
89 if self.styles["nodalida"]:
90 self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(self.featureSet)
91 if self.styles["bacteria_renaming"]:
92 self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(self.featureSet)
93 if self.styles["trigger_features"]:
94 self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet, self.styles)
95 self.triggerFeatureBuilder.useNonNameEntities = True
96 if self.styles["genia_task1"]:
97 self.triggerFeatureBuilder.filterAnnTypes.add("Entity")
98
99 if self.styles["rel_features"]:
100 self.relFeatureBuilder = RELFeatureBuilder(featureSet)
101 if self.styles["ddi_features"]:
102 self.drugFeatureBuilder = DrugFeatureBuilder(featureSet)
103 if self.styles["evex"]:
104 self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet)
105 if self.styles["giuliano"]:
106 self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
107 self.pathLengths = length
108 assert(self.pathLengths == None)
109 self.types = types
110 if self.styles["random"]:
111 from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
112 self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
113
116
118 return self.multiEdgeFeatureBuilder.predictedRange
119
121 if len(typesToInclude) == 0:
122 return edges
123 edgesToKeep = []
124 for edge in edges:
125 if edge.get("type") in typesToInclude:
126 edgesToKeep.append(edge)
127 return edgesToKeep
128
130 """
131 Example class. Multiple overlapping edges create a merged type.
132 """
133 types = set()
134
135
136
137
138
139
140
141
142
143
144 intEdges = sentenceGraph.interactionGraph.getEdges(t1, t2)
145 if not directed:
146 intEdges = intEdges + sentenceGraph.interactionGraph.getEdges(t2, t1)
147 for intEdge in intEdges:
148 types.add(intEdge[2].get("type"))
149 types = list(types)
150 types.sort()
151 categoryName = ""
152 for name in types:
153 if categoryName != "":
154 categoryName += "---"
155 categoryName += name
156 if categoryName != "":
157 return categoryName
158 else:
159 return "neg"
160
161 - def getCategoryName(self, sentenceGraph, e1, e2, directed=True, duplicateEntities=None):
162 """
163 Example class. Multiple overlapping edges create a merged type.
164 """
165
166
167
168
169
170
171
172
173
174
175
176
177 interactions = sentenceGraph.getInteractions(e1, e2, True)
178 if not directed:
179 interactions = interactions + sentenceGraph.getInteractions(e2, e1, True)
180
181
182 types = set()
183 for interaction in interactions:
184 types.add(interaction[2].get("type"))
185 types = list(types)
186 types.sort()
187 categoryName = ""
188 for name in types:
189 if self.styles["causeOnly"] and name != "Cause":
190 continue
191 if self.styles["themeOnly"] and name != "Theme":
192 continue
193 if categoryName != "":
194 categoryName += "---"
195 categoryName += name
196 if categoryName != "":
197 return categoryName
198 else:
199 return "neg"
200
202 if e1.get("type") == "Protein" and e2.get("type") == "Entity":
203 return True
204 else:
205 return False
206
208
209
210 if e1.get("type") == "Bacterium" and e2.get("type") in ["Host", "HostPart", "Geographical", "Environment", "Food", "Medical", "Soil", "Water"]:
211 return True
212 elif e1.get("type") == "Host" and e2.get("type") == "HostPart":
213 return True
214 else:
215 return False
216
218 if eType in ["GeneProduct", "Protein", "ProteinFamily", "PolymeraseComplex"]:
219 return "ProteinEntity"
220 elif eType in ["Gene", "GeneFamily", "GeneComplex", "Regulon", "Site", "Promoter"]:
221 return "GeneEntity"
222 else:
223 return None
224
226 e1Type = e1.get("type")
227 e1SuperType = self.getBISuperType(e1Type)
228 e2Type = e2.get("type")
229 e2SuperType = self.getBISuperType(e2Type)
230
231 tag = "(" + e1Type + "/" + e2Type + ")"
232 if e1Type == "Regulon":
233 if e2SuperType in ["GeneEntity", "ProteinEntity"]:
234 return True
235 if e1SuperType == "ProteinEntity":
236 if e2Type in ["Site", "Promoter", "Gene", "GeneComplex"]:
237 return True
238 if e1Type in ["Action", "Transcription", "Expression"]:
239 return True
240 if e1Type == "Site":
241 if e2SuperType == "GeneEntity":
242 return True
243 if e1Type == "Promoter":
244 if e2SuperType in ["GeneEntity", "ProteinEntity"]:
245 return True
246 if e1SuperType in ["GeneEntity", "ProteinEntity"]:
247 if e2SuperType in ["GeneEntity", "ProteinEntity"]:
248 return True
249 stats.filter("bi_limits")
250 return False
251
253 if e1.get("type") != "Catalysis":
254 if e1.get("type") in ["Protein", "Entity"]:
255 return False
256 elif e2.get("type") in ["Protein", "Entity"]:
257 return True
258 else:
259 return False
260 else:
261 if e2.get("type") != "Entity":
262 return True
263 else:
264 return False
265 assert False, (e1.get("type"), e2.get("type"))
266
268 e1Type = e1.get("type")
269 e2Type = e2.get("type")
270 e1IsCore = e1Type in ["Protein", "Regulon-operon", "Two-component-system", "Chemical", "Organism"]
271 e2IsCore = e2Type in ["Protein", "Regulon-operon", "Two-component-system", "Chemical", "Organism"]
272 if e1IsCore:
273 return False
274 elif e1Type in ["Gene_expression", "Transcription"]:
275 if e2Type in ["Protein", "Regulon-operon"]:
276 return True
277 else:
278 return False
279 elif e1Type in ["Protein_catabolism", "Phosphorylation"]:
280 if e2Type == "Protein":
281 return True
282 else:
283 return False
284 elif e1Type == "Localization":
285 if e2IsCore or e2Type == "Entity":
286 return True
287 else:
288 return False
289 elif e1Type in ["Binding", "Process"]:
290 if e2IsCore:
291 return True
292 else:
293 return False
294 elif "egulation" in e1Type:
295 if e2Type != "Entity":
296 return True
297 else:
298 return False
299 elif e1Type == "Entity":
300 if e2IsCore:
301 return True
302 else:
303 return False
304 assert False, (e1Type, e2Type)
305
307 if e1.get("type") == "Exp" and e2.get("type") == "Exp":
308 anaphoraTok = sentenceGraph.entityHeadTokenByEntity[e1]
309 antecedentTok = sentenceGraph.entityHeadTokenByEntity[e2]
310 antecedentTokenFound = False
311 for token in sentenceGraph.tokens:
312 if token == antecedentTok:
313 antecedentTokenFound = True
314 if token == anaphoraTok:
315 if antecedentTokenFound:
316 return True
317 else:
318 return False
319 assert False
320 elif e1.get("type") == "Exp" and e2.get("type") == "Protein":
321 return True
322 else:
323 return False
324
326 e1Type = e1.get("type")
327 e2Type = e2.get("type")
328 if e1Type == "Protein":
329 return False
330 elif e1Type in ["Entity", "Gene_expression", "Transcription", "Protein_catabolism", "Phosphorylation", "Binding"]:
331 if e2Type == "Protein":
332 return True
333 else:
334 return False
335 elif e1Type == "Localization":
336 if e2Type in ["Protein", "Entity"]:
337 return True
338 else:
339 return False
340 elif "egulation" in e1Type:
341 if e2Type != "Entity":
342 return True
343 else:
344 return False
345 assert False, (e1Type, e2Type)
346
348 if len(entityToGold[e1]) > 0 and len(entityToGold[e2]) > 0:
349 return self.getCategoryName(goldGraph, entityToGold[e1][0], entityToGold[e2][0], directed=directed)
350 else:
351 return "neg"
352
354 import types
355 assert edgeTypes != None
356 if type(edgeTypes) not in [types.ListType, types.TupleType]:
357 edgeTypes = [edgeTypes]
358 if edge[2].get("type") in edgeTypes:
359 return True
360 else:
361 return False
362
364 """
365 Build examples for a single sentence. Returns a list of examples.
366 See Core/ExampleUtils for example format.
367 """
368
369 exampleIndex = 0
370
371 if self.styles["trigger_features"]:
372 self.triggerFeatureBuilder.initSentence(sentenceGraph)
373 if self.styles["evex"]:
374 self.evexFeatureBuilder.initSentence(sentenceGraph)
375
376
377
378
379
380
381 sentenceGraph.mergeInteractionGraph(True)
382 entities = sentenceGraph.mergedEntities
383 entityToDuplicates = sentenceGraph.mergedEntityToDuplicates
384 self.exampleStats.addValue("Duplicate entities skipped", len(sentenceGraph.entities) - len(entities))
385
386
387 if goldGraph != None:
388 entityToGold = EvaluateInteractionXML.mapEntities(entities, goldGraph.entities)
389
390 paths = None
391 if not self.styles["no_path"]:
392
393
394
395
396 undirected = sentenceGraph.dependencyGraph.toUndirected()
397
398 paths = undirected
399 if self.styles["filter_shortest_path"] != None:
400 paths.resetAnalyses()
401 paths.FloydWarshall(self.filterEdge, {"edgeTypes":self.styles["filter_shortest_path"]})
402
403
404
405
406
407
408
409
410
411 if self.styles["entities"]:
412 loopRange = len(entities)
413 else:
414 loopRange = len(sentenceGraph.tokens)
415 for i in range(loopRange-1):
416 for j in range(i+1,loopRange):
417 eI = None
418 eJ = None
419 if self.styles["entities"]:
420 eI = entities[i]
421 eJ = entities[j]
422 tI = sentenceGraph.entityHeadTokenByEntity[eI]
423 tJ = sentenceGraph.entityHeadTokenByEntity[eJ]
424
425
426 if eI.get("type") == "neg" or eJ.get("type") == "neg":
427 continue
428 if self.styles["skip_extra_triggers"]:
429 if eI.get("source") != None or eJ.get("source") != None:
430 continue
431 else:
432 tI = sentenceGraph.tokens[i]
433 tJ = sentenceGraph.tokens[j]
434
435 if self.styles["headsOnly"]:
436 if (len(sentenceGraph.tokenIsEntityHead[tI]) == 0) or (len(sentenceGraph.tokenIsEntityHead[tJ]) == 0):
437 continue
438
439 if self.styles["directed"]:
440
441 if self.styles["entities"]:
442 categoryName = self.getCategoryName(sentenceGraph, eI, eJ, True)
443 if goldGraph != None:
444 categoryName = self.getGoldCategoryName(goldGraph, entityToGold, eI, eJ, True)
445 else:
446 categoryName = self.getCategoryNameFromTokens(sentenceGraph, tI, tJ, True)
447
448 self.exampleStats.beginExample(categoryName)
449 makeExample = True
450 if self.styles["genia_limits"] and not self.isPotentialGeniaInteraction(eI, eJ):
451 makeExample = False
452 self.exampleStats.filter("genia_limits")
453 if self.styles["genia_task1"] and (eI.get("type") == "Entity" or eJ.get("type") == "Entity"):
454 makeExample = False
455 self.exampleStats.filter("genia_task1")
456 if self.styles["rel_limits"] and not self.isPotentialRELInteraction(eI, eJ):
457 makeExample = False
458 self.exampleStats.filter("rel_limits")
459 if self.styles["co_limits"] and not self.isPotentialCOInteraction(eI, eJ, sentenceGraph):
460 makeExample = False
461 self.exampleStats.filter("co_limits")
462 if self.styles["bb_limits"] and not self.isPotentialBBInteraction(eI, eJ, sentenceGraph):
463 makeExample = False
464 self.exampleStats.filter("bb_limits")
465 if categoryName != "neg":
466 self.exampleStats.filter("bb_limits(" + categoryName + ":" + eI.get("type") + "/" + eJ.get("type") + ")")
467 if self.styles["bi_limits"] and not self.isPotentialBIInteraction(eI, eJ, sentenceGraph, self.exampleStats):
468 makeExample = False
469
470 if self.styles["epi_limits"] and not self.isPotentialEPIInteraction(eI, eJ, sentenceGraph):
471 makeExample = False
472 self.exampleStats.filter("epi_limits")
473 if self.styles["id_limits"] and not self.isPotentialIDInteraction(eI, eJ, sentenceGraph):
474 makeExample = False
475 self.exampleStats.filter("id_limits")
476
477
478
479
480
481
482 if self.styles["pos_only"] and categoryName == "neg":
483 makeExample = False
484 self.exampleStats.filter("pos_only")
485 if makeExample:
486
487 ExampleUtils.appendExamples([self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ)], outfile)
488 exampleIndex += 1
489 self.exampleStats.endExample()
490
491
492 if self.styles["entities"]:
493 categoryName = self.getCategoryName(sentenceGraph, eJ, eI, True)
494 if goldGraph != None:
495 categoryName = self.getGoldCategoryName(goldGraph, entityToGold, eJ, eI, True)
496 else:
497 categoryName = self.getCategoryNameFromTokens(sentenceGraph, tJ, tI, True)
498
499 self.exampleStats.beginExample(categoryName)
500 makeExample = True
501 if self.styles["genia_limits"] and not self.isPotentialGeniaInteraction(eJ, eI):
502 makeExample = False
503 self.exampleStats.filter("genia_limits")
504 if self.styles["genia_task1"] and (eI.get("type") == "Entity" or eJ.get("type") == "Entity"):
505 makeExample = False
506 self.exampleStats.filter("genia_task1")
507 if self.styles["rel_limits"] and not self.isPotentialRELInteraction(eJ, eI):
508 makeExample = False
509 self.exampleStats.filter("rel_limits")
510 if self.styles["co_limits"] and not self.isPotentialCOInteraction(eJ, eI, sentenceGraph):
511 makeExample = False
512 self.exampleStats.filter("co_limits")
513 if self.styles["bb_limits"] and not self.isPotentialBBInteraction(eJ, eI, sentenceGraph):
514 makeExample = False
515 self.exampleStats.filter("bb_limits")
516 if categoryName != "neg":
517 self.exampleStats.filter("bb_limits(" + categoryName + ":" + eJ.get("type") + "/" + eI.get("type") + ")")
518 if self.styles["bi_limits"] and not self.isPotentialBIInteraction(eJ, eI, sentenceGraph, self.exampleStats):
519 makeExample = False
520
521 if self.styles["epi_limits"] and not self.isPotentialEPIInteraction(eJ, eI, sentenceGraph):
522 makeExample = False
523 self.exampleStats.filter("epi_limits")
524 if self.styles["id_limits"] and not self.isPotentialIDInteraction(eJ, eI, sentenceGraph):
525 makeExample = False
526 self.exampleStats.filter("id_limits")
527
528
529
530
531
532
533 if self.styles["pos_only"] and categoryName == "neg":
534 makeExample = False
535 self.exampleStats.filter("pos_only")
536 if makeExample:
537
538 ExampleUtils.appendExamples([self.buildExample(tJ, tI, paths, sentenceGraph, categoryName, exampleIndex, eJ, eI)], outfile)
539 exampleIndex += 1
540 self.exampleStats.endExample()
541 else:
542 if self.styles["entities"]:
543 categoryName = self.getCategoryName(sentenceGraph, eI, eJ, directed=False)
544 else:
545 categoryName = self.getCategoryNameFromTokens(sentenceGraph, tI, tJ, directed=False)
546 self.exampleStats.beginExample(categoryName)
547 forwardExample = self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ)
548 if not self.styles["graph_kernel"]:
549 reverseExample = self.buildExample(tJ, tI, paths, sentenceGraph, categoryName, exampleIndex, eJ, eI)
550 forwardExample[2].update(reverseExample[2])
551
552 ExampleUtils.appendExamples([forwardExample], outfile)
553 exampleIndex += 1
554 self.exampleStats.endExample()
555
556
557 return exampleIndex
558
559 - def buildExample(self, token1, token2, paths, sentenceGraph, categoryName, exampleIndex, entity1=None, entity2=None):
560 """
561 Build a single directed example for the potential edge between token1 and token2
562 """
563
564
565
566
567 features = {}
568 if True:
569
570
571
572
573 if not self.styles["no_path"]:
574
575
576
577
578
579
580
581
582
583
584
585 path = paths.getPaths(token1, token2)
586 if len(path) > 0:
587
588
589 path = path[0]
590 pathExists = True
591 else:
592 path = [token1, token2]
593 pathExists = False
594 else:
595 path = [token1, token2]
596 pathExists = False
597
598 assert(self.pathLengths == None)
599 if self.pathLengths == None or len(path)-1 in self.pathLengths:
600
601
602
603
604 if self.styles["trigger_features"]:
605 self.triggerFeatureBuilder.setFeatureVector(features)
606 self.triggerFeatureBuilder.tag = "trg1_"
607 self.triggerFeatureBuilder.buildFeatures(token1)
608 self.triggerFeatureBuilder.tag = "trg2_"
609 self.triggerFeatureBuilder.buildFeatures(token2)
610 self.triggerFeatureBuilder.setFeatureVector(None)
611
612 if self.styles["rel_features"] and not self.styles["no_task"]:
613 self.relFeatureBuilder.setFeatureVector(features)
614 self.relFeatureBuilder.tag = "rel1_"
615 self.relFeatureBuilder.buildAllFeatures(sentenceGraph.tokens, sentenceGraph.tokens.index(token1))
616 self.relFeatureBuilder.tag = "rel2_"
617 self.relFeatureBuilder.buildAllFeatures(sentenceGraph.tokens, sentenceGraph.tokens.index(token2))
618 self.relFeatureBuilder.setFeatureVector(None)
619 if self.styles["bacteria_renaming"] and not self.styles["no_task"]:
620 self.bacteriaRenamingFeatureBuilder.setFeatureVector(features)
621 self.bacteriaRenamingFeatureBuilder.buildPairFeatures(entity1, entity2)
622
623 self.bacteriaRenamingFeatureBuilder.setFeatureVector(None)
624 if self.styles["co_limits"] and not self.styles["no_task"]:
625 e1Offset = Range.charOffsetToSingleTuple(entity1.get("charOffset"))
626 e2Offset = Range.charOffsetToSingleTuple(entity2.get("charOffset"))
627 if Range.contains(e1Offset, e2Offset):
628 features[self.featureSet.getId("e1_contains_e2")] = 1
629 if entity2.get("isName") == "True":
630 features[self.featureSet.getId("e1_contains_e2name")] = 1
631 if Range.contains(e2Offset, e1Offset):
632 features[self.featureSet.getId("e2_contains_e1")] = 1
633 if entity1.get("isName") == "True":
634 features[self.featureSet.getId("e2_contains_e1name")] = 1
635 if self.styles["ddi_features"]:
636 self.drugFeatureBuilder.setFeatureVector(features)
637 self.drugFeatureBuilder.tag = "ddi_"
638 self.drugFeatureBuilder.buildPairFeatures(entity1, entity2)
639 if self.styles["ddi_mtmx"]:
640 self.drugFeatureBuilder.buildMTMXFeatures(entity1, entity2)
641 self.drugFeatureBuilder.setFeatureVector(None)
642
643
644
645
646
647
648
649
650 if self.styles["graph_kernel"]:
651 self.graphKernelFeatureBuilder.setFeatureVector(features, entity1, entity2)
652 self.graphKernelFeatureBuilder.buildGraphKernelFeatures(sentenceGraph, path)
653 self.graphKernelFeatureBuilder.setFeatureVector(None)
654 if self.styles["entity_type"]:
655 e1Type = self.multiEdgeFeatureBuilder.getEntityType(entity1)
656 e2Type = self.multiEdgeFeatureBuilder.getEntityType(entity2)
657 features[self.featureSet.getId("e1_"+e1Type)] = 1
658 features[self.featureSet.getId("e2_"+e2Type)] = 1
659 features[self.featureSet.getId("distance_"+str(len(path)))] = 1
660 if not self.styles["no_dependency"]:
661
662 self.multiEdgeFeatureBuilder.setFeatureVector(features, entity1, entity2)
663
664 if not self.styles["disable_entity_features"]:
665 self.multiEdgeFeatureBuilder.buildEntityFeatures(sentenceGraph)
666 self.multiEdgeFeatureBuilder.buildPathLengthFeatures(path)
667 if not self.styles["disable_terminus_features"]:
668 self.multiEdgeFeatureBuilder.buildTerminusTokenFeatures(path, sentenceGraph)
669 if not self.styles["disable_single_element_features"]:
670 self.multiEdgeFeatureBuilder.buildSingleElementFeatures(path, sentenceGraph)
671 if not self.styles["disable_ngram_features"]:
672
673 self.multiEdgeFeatureBuilder.buildPathGrams(2, path, sentenceGraph)
674 self.multiEdgeFeatureBuilder.buildPathGrams(3, path, sentenceGraph)
675 self.multiEdgeFeatureBuilder.buildPathGrams(4, path, sentenceGraph)
676
677
678
679
680 if not self.styles["disable_path_edge_features"]:
681 self.multiEdgeFeatureBuilder.buildPathEdgeFeatures(path, sentenceGraph)
682 self.multiEdgeFeatureBuilder.buildSentenceFeatures(sentenceGraph)
683 self.multiEdgeFeatureBuilder.setFeatureVector(None)
684 if self.styles["nodalida"]:
685 self.nodalidaFeatureBuilder.setFeatureVector(features, entity1, entity2)
686 shortestPaths = self.nodalidaFeatureBuilder.buildShortestPaths(sentenceGraph.dependencyGraph, path)
687 print shortestPaths
688 if len(shortestPaths) > 0:
689 self.nodalidaFeatureBuilder.buildNGrams(shortestPaths, sentenceGraph)
690 self.nodalidaFeatureBuilder.setFeatureVector(None)
691 if not self.styles["no_linear"]:
692 self.tokenFeatureBuilder.setFeatureVector(features)
693 for i in range(len(sentenceGraph.tokens)):
694 if sentenceGraph.tokens[i] == token1:
695 token1Index = i
696 if sentenceGraph.tokens[i] == token2:
697 token2Index = i
698 linearPreTag = "linfw_"
699 if token1Index > token2Index:
700 token1Index, token2Index = token2Index, token1Index
701 linearPreTag = "linrv_"
702 self.tokenFeatureBuilder.buildLinearOrderFeatures(token1Index, sentenceGraph, 2, 2, preTag="linTok1")
703 self.tokenFeatureBuilder.buildLinearOrderFeatures(token2Index, sentenceGraph, 2, 2, preTag="linTok2")
704
705
706
707
708
709
710
711
712 self.tokenFeatureBuilder.setFeatureVector(None)
713 if self.styles["random"]:
714 self.randomFeatureBuilder.setFeatureVector(features)
715 self.randomFeatureBuilder.buildRandomFeatures(100, 0.01)
716 self.randomFeatureBuilder.setFeatureVector(None)
717 if self.styles["genia_limits"] and not self.styles["no_task"]:
718 e1Type = entity1.get("type")
719 e2Type = entity2.get("type")
720 assert(entity1.get("isName") == "False")
721 if entity2.get("isName") == "True":
722 features[self.featureSet.getId("GENIA_target_protein")] = 1
723 else:
724 features[self.featureSet.getId("GENIA_nested_event")] = 1
725 if e1Type.find("egulation") != -1:
726 if entity2.get("isName") == "True":
727 features[self.featureSet.getId("GENIA_regulation_of_protein")] = 1
728 else:
729 features[self.featureSet.getId("GENIA_regulation_of_event")] = 1
730 if self.styles["bi_limits"]:
731
732 e1Type = entity1.get("type")
733 e2Type = entity2.get("type")
734 e1SuperType = str(self.getBISuperType(e1Type))
735 e2SuperType = str(self.getBISuperType(e2Type))
736 features[self.featureSet.getId("BI_e1_"+e1Type)] = 1
737 features[self.featureSet.getId("BI_e2_"+e2Type)] = 1
738 features[self.featureSet.getId("BI_e1sup_"+e1SuperType)] = 1
739 features[self.featureSet.getId("BI_e2sup_"+e2SuperType)] = 1
740 features[self.featureSet.getId("BI_e1e2_"+e1Type+"_"+e2Type)] = 1
741 features[self.featureSet.getId("BI_e1e2sup_"+e1SuperType+"_"+e2SuperType)] = 1
742 if self.styles["evex"]:
743 self.evexFeatureBuilder.setFeatureVector(features, entity1, entity2)
744 self.evexFeatureBuilder.buildEdgeFeatures(entity1, entity2, token1, token2, path, sentenceGraph)
745 self.evexFeatureBuilder.setFeatureVector(None)
746 if self.styles["giuliano"]:
747 self.giulianoFeatureBuilder.setFeatureVector(features, entity1, entity2)
748 self.giulianoFeatureBuilder.buildEdgeFeatures(entity1, entity2, token1, token2, path, sentenceGraph)
749 self.giulianoFeatureBuilder.setFeatureVector(None)
750 else:
751 features[self.featureSet.getId("always_negative")] = 1
752 if self.styles["subset"]:
753 features[self.featureSet.getId("out_of_scope")] = 1
754 else:
755 features[self.featureSet.getId("always_negative")] = 1
756 if self.styles["subset"]:
757 features[self.featureSet.getId("out_of_scope")] = 1
758 path = [token1, token2]
759
760
761 if int(path[0].get("charOffset").split("-")[0]) < int(path[-1].get("charOffset").split("-")[0]):
762
763 extra = {"xtype":"edge","type":"i","t1":path[0].get("id"),"t2":path[-1].get("id")}
764 extra["deprev"] = False
765 else:
766
767 extra = {"xtype":"edge","type":"i","t1":path[-1].get("id"),"t2":path[0].get("id")}
768 extra["deprev"] = True
769 if entity1 != None:
770
771 extra["e1"] = entity1.get("id")
772 if sentenceGraph.mergedEntityToDuplicates != None:
773
774 extra["e1DuplicateIds"] = ",".join([x.get("id") for x in sentenceGraph.mergedEntityToDuplicates[entity1]])
775 if entity2 != None:
776
777 extra["e2"] = entity2.get("id")
778 if sentenceGraph.mergedEntityToDuplicates != None:
779 extra["e2DuplicateIds"] = ",".join([x.get("id") for x in sentenceGraph.mergedEntityToDuplicates[entity2]])
780
781 extra["categoryName"] = categoryName
782 if self.styles["bacteria_renaming"]:
783 if entity1.get("text") != None and entity1.get("text") != "":
784 extra["e1t"] = entity1.get("text").replace(" ", "---").replace(":","-COL-")
785 if entity2.get("text") != None and entity2.get("text") != "":
786 extra["e2t"] = entity2.get("text").replace(" ", "---").replace(":","-COL-")
787 sentenceOrigId = sentenceGraph.sentenceElement.get("origId")
788 if sentenceOrigId != None:
789 extra["SOID"] = sentenceOrigId
790
791 if self.styles["binary"]:
792 if categoryName != "neg":
793 category = 1
794 else:
795 category = -1
796 categoryName = "i"
797 else:
798 category = self.classSet.getId(categoryName)
799
800
801
802 return (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra)
803