Package TEES :: Package ExampleBuilders :: Package FeatureBuilders :: Module BioInferOntologyFeatureBuilder
[hide private]

Source Code for Module TEES.ExampleBuilders.FeatureBuilders.BioInferOntologyFeatureBuilder

 1  """ 
 2  BioInfer ontology based features 
 3  """ 
 4  __version__ = "$Revision: 1.2 $" 
 5   
 6  import sys, os 
 7  try: 
 8      import xml.etree.cElementTree as ET 
 9  except ImportError: 
10      import cElementTree as ET 
11  import Utils.ElementTreeUtils as ETUtils 
12  from FeatureBuilder import FeatureBuilder 
13   
14  g_bioInferFileName = "../../../BioInfer/data/bioinfer.xml" 
15   
16 -def parseNodes(node, ontology):
17 for child in node: 18 assert(not ontology.has_key(child.attrib["name"])) 19 ontology[child.attrib["name"]] = node.attrib["name"] 20 parseNodes(child, ontology)
21
22 -def loadOntologies(bioInferFileName):
23 bioInferTree = ET.parse(bioInferFileName) 24 bioInferRoot = bioInferTree.getroot() 25 ontologyElements = bioInferRoot.findall("ontology") 26 ontologies = {} 27 for ontologyElement in ontologyElements: 28 ontologies[ontologyElement.attrib["type"]] = {} 29 for node in ontologyElement: 30 parseNodes(node, ontologies[ontologyElement.attrib["type"]]) 31 return ontologies
32
33 -def printNode(node, indent=""):
34 print indent + node.attrib["name"] 35 for child in node: 36 printNode(child, indent + " ")
37
38 -def getParents(term, ontology):
39 parents = [] 40 parent = ontology[node] 41 while parent != None: 42 parents.append(parent) 43 parent = ontology[parent] 44 return parent
45
46 -class BioInferOntologyFeatureBuilder(FeatureBuilder):
47 - def __init__(self, featureSet):
48 global g_bioInferFileName 49 FeatureBuilder.__init__(self, featureSet) 50 self.ontologies = loadOntologies(g_bioInferFileName)
51
52 - def getTokenAnnotatedType(self, token, sentenceGraph):
53 if sentenceGraph.tokenIsEntityHead[token] != None: 54 return sentenceGraph.tokenIsEntityHead[token].attrib["type"] 55 else: 56 return None
57
58 - def buildOntologyFeaturesForPath(self, sentenceGraph, pathTokens, pathEdges=None):
59 for token in pathTokens: 60 tokenType = self.getTokenAnnotatedType(token, sentenceGraph) 61 if tokenType != None: 62 self.buildOntologyFeatures(tokenType, "ont_")
63
64 - def buildOntologyFeatures(self, term, tag=""):
65 features = self.getParents(term) 66 for feature in features: 67 self.features[tag+feature] = 1
68
69 - def getParents(self, term):
70 returnValues = [] 71 for k, ontology in self.ontologies.iteritems(): 72 if ontology.has_key(term): 73 parents = [] 74 parent = term 75 while ontology.has_key(parent): 76 parent = ontology[parent] 77 parents.append(parent) 78 for parent in parents: 79 returnValues.append(k+"_"+parent) 80 return returnValues
81 82 if __name__=="__main__": 83 ontologies = loadOntologies(g_bioInferFileName) 84 for k, v in ontologies.iteritems(): 85 print k + ":" 86 print v 87