Package TEES :: Package ExampleWriters :: Module EntityExampleWriter
[hide private]

Source Code for Module TEES.ExampleWriters.EntityExampleWriter

  1  import sys 
  2  from SentenceExampleWriter import SentenceExampleWriter 
  3  import Utils.InteractionXML.IDUtils as IDUtils 
  4  import Utils.InteractionXML.ExtendTriggers 
  5  try: 
  6      import xml.etree.cElementTree as ET 
  7  except ImportError: 
  8      import cElementTree as ET 
  9   
10 -class EntityExampleWriter(SentenceExampleWriter):
11 - def __init__(self):
12 self.xType = "token" 13 self.insertWeights = False 14 SentenceExampleWriter.__init__(self)
15
16 - def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None):
17 self.assertSameSentence(examples) 18 19 extensionRequested = False 20 21 sentenceElement = sentenceObject.sentence 22 sentenceId = sentenceElement.get("id") 23 # detach analyses-element 24 sentenceAnalysesElement = None 25 sentenceAnalysesElement = sentenceElement.find("sentenceanalyses") 26 if sentenceAnalysesElement == None: 27 sentenceAnalysesElement = sentenceElement.find("analyses") 28 if sentenceAnalysesElement != None: 29 sentenceElement.remove(sentenceAnalysesElement) 30 # remove pairs and interactions 31 interactions = self.removeChildren(sentenceElement, ["pair", "interaction"]) 32 # remove entities 33 newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity")) 34 nonNameEntities = self.removeNonNameEntities(sentenceElement) 35 # remove named entities if needed 36 if exampleStyle != None and "names" in exampleStyle and exampleStyle["names"]: # remove all entities, including names 37 self.removeChildren(sentenceElement, ["entity"]) 38 39 # gold sentence elements 40 goldEntityTypeByHeadOffset = {} 41 goldEntityByHeadOffset = {} 42 if goldSentence != None: 43 for entity in goldSentence.entities: 44 headOffset = entity.get("headOffset") 45 if not goldEntityTypeByHeadOffset.has_key(headOffset): 46 goldEntityTypeByHeadOffset[headOffset] = [] 47 goldEntityByHeadOffset[headOffset] = [] 48 goldEntityTypeByHeadOffset[headOffset].append(entity) 49 goldEntityByHeadOffset[headOffset].append(entity) 50 for key in goldEntityTypeByHeadOffset: 51 goldEntityTypeByHeadOffset[key] = self.getMergedEntityType(goldEntityTypeByHeadOffset[key]) 52 for token in sentenceObject.tokens: 53 if not goldEntityTypeByHeadOffset.has_key(token.get("charOffset")): 54 goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg" 55 56 # add new pairs 57 for example in examples: 58 # Entity examplesalways refer to a single head token 59 headTokenId = example[3]["t"] 60 headToken = None 61 for token in sentenceObject.tokens: 62 if token.get("id") == headTokenId: 63 headToken = token 64 break 65 assert headToken != None, example[3] 66 # Determine if additional processing is requested 67 unmergeEPINeg = None 68 if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi": 69 unmergeEPINeg = headToken.get("text") 70 if "trigex" in example[3] and example[3]["trigex"] == "bb": 71 extensionRequested = True 72 # Make entities for positive predictions 73 prediction = predictionsByExample[example[0]] 74 predictionString = self.getPredictionStrengthString(prediction, classSet, classIds) 75 for eType in self.getElementTypes(prediction, classSet, classIds, unmergeEPINegText=unmergeEPINeg): # split merged classes 76 entityElement = ET.Element("entity") 77 entityElement.set("isName", "False") 78 entityElement.set("charOffset", headToken.get("charOffset")) 79 entityElement.set("headOffset", headToken.get("charOffset")) 80 entityElement.set("text", headToken.get("text")) 81 entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount)) 82 entityElement.set("type", eType) 83 entityElement.set("predictions", predictionString) 84 #self.setElementType(entityElement, prediction, classSet, classIds, unmergeEPINeg=unmergeEPINeg) 85 if self.insertWeights: # in other words, use gold types 86 headOffset = headToken.get("charOffset") 87 if goldEntityByHeadOffset.has_key(headOffset): 88 for entity in goldEntityByHeadOffset[headOffset]: 89 entity.set("predictions", entityElement.get("predictions") ) 90 if goldEntityTypeByHeadOffset.has_key(headToken.get("charOffset")): 91 entityElement.set("goldType", goldEntityTypeByHeadOffset[headToken.get("charOffset")]) 92 if "goldIds" in example[3]: # The entities for which this example was built 93 entityElement.set("goldIds", example[3]["goldIds"]) 94 if (entityElement.get("type") != "neg" and not goldEntityByHeadOffset.has_key(entityElement.get("headOffset"))) or not self.insertWeights: 95 newEntityIdCount += 1 96 sentenceElement.append(entityElement) 97 elif entityElement.get("type") == "neg": 98 pass 99 #newEntityIdCount += 1 100 #sentenceElement.append(entityElement) 101 102 # if only adding weights, re-attach interactions and gold entities 103 if self.insertWeights: 104 for entity in nonNameEntities: 105 sentenceElement.append(entity) 106 for interaction in interactions: 107 sentenceElement.append(interaction) 108 109 # re-attach the analyses-element 110 if sentenceAnalysesElement != None: 111 sentenceElement.append(sentenceAnalysesElement) 112 113 # Extend bacteria triggers 114 if extensionRequested: 115 Utils.InteractionXML.ExtendTriggers.extend(sentenceElement, entityTypes=["Bacterium"])
116
117 - def getMergedEntityType(self, entities):
118 """ 119 If a single token belongs to multiple entities of different types, 120 a new, composite type is defined. This type is the alphabetically 121 ordered types of these entities joined with '---'. 122 """ 123 types = set() 124 for entity in entities: 125 types.add(entity.get("type")) 126 types = list(types) 127 types.sort() 128 typeString = "" 129 for type in types: 130 if type == "Protein": 131 continue 132 if typeString != "": 133 typeString += "---" 134 typeString += type 135 136 if typeString == "": 137 return "neg" 138 139 return typeString
140