1 import sys
2 from SentenceExampleWriter import SentenceExampleWriter
3 import Utils.InteractionXML.IDUtils as IDUtils
4 import Utils.InteractionXML.ExtendTriggers
5 try:
6 import xml.etree.cElementTree as ET
7 except ImportError:
8 import cElementTree as ET
9
15
16 - def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None):
17 self.assertSameSentence(examples)
18
19 extensionRequested = False
20
21 sentenceElement = sentenceObject.sentence
22 sentenceId = sentenceElement.get("id")
23
24 sentenceAnalysesElement = None
25 sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
26 if sentenceAnalysesElement == None:
27 sentenceAnalysesElement = sentenceElement.find("analyses")
28 if sentenceAnalysesElement != None:
29 sentenceElement.remove(sentenceAnalysesElement)
30
31 interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
32
33 newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
34 nonNameEntities = self.removeNonNameEntities(sentenceElement)
35
36 if exampleStyle != None and "names" in exampleStyle and exampleStyle["names"]:
37 self.removeChildren(sentenceElement, ["entity"])
38
39
40 goldEntityTypeByHeadOffset = {}
41 goldEntityByHeadOffset = {}
42 if goldSentence != None:
43 for entity in goldSentence.entities:
44 headOffset = entity.get("headOffset")
45 if not goldEntityTypeByHeadOffset.has_key(headOffset):
46 goldEntityTypeByHeadOffset[headOffset] = []
47 goldEntityByHeadOffset[headOffset] = []
48 goldEntityTypeByHeadOffset[headOffset].append(entity)
49 goldEntityByHeadOffset[headOffset].append(entity)
50 for key in goldEntityTypeByHeadOffset:
51 goldEntityTypeByHeadOffset[key] = self.getMergedEntityType(goldEntityTypeByHeadOffset[key])
52 for token in sentenceObject.tokens:
53 if not goldEntityTypeByHeadOffset.has_key(token.get("charOffset")):
54 goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg"
55
56
57 for example in examples:
58
59 headTokenId = example[3]["t"]
60 headToken = None
61 for token in sentenceObject.tokens:
62 if token.get("id") == headTokenId:
63 headToken = token
64 break
65 assert headToken != None, example[3]
66
67 unmergeEPINeg = None
68 if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi":
69 unmergeEPINeg = headToken.get("text")
70 if "trigex" in example[3] and example[3]["trigex"] == "bb":
71 extensionRequested = True
72
73 prediction = predictionsByExample[example[0]]
74 predictionString = self.getPredictionStrengthString(prediction, classSet, classIds)
75 for eType in self.getElementTypes(prediction, classSet, classIds, unmergeEPINegText=unmergeEPINeg):
76 entityElement = ET.Element("entity")
77 entityElement.set("isName", "False")
78 entityElement.set("charOffset", headToken.get("charOffset"))
79 entityElement.set("headOffset", headToken.get("charOffset"))
80 entityElement.set("text", headToken.get("text"))
81 entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
82 entityElement.set("type", eType)
83 entityElement.set("predictions", predictionString)
84
85 if self.insertWeights:
86 headOffset = headToken.get("charOffset")
87 if goldEntityByHeadOffset.has_key(headOffset):
88 for entity in goldEntityByHeadOffset[headOffset]:
89 entity.set("predictions", entityElement.get("predictions") )
90 if goldEntityTypeByHeadOffset.has_key(headToken.get("charOffset")):
91 entityElement.set("goldType", goldEntityTypeByHeadOffset[headToken.get("charOffset")])
92 if "goldIds" in example[3]:
93 entityElement.set("goldIds", example[3]["goldIds"])
94 if (entityElement.get("type") != "neg" and not goldEntityByHeadOffset.has_key(entityElement.get("headOffset"))) or not self.insertWeights:
95 newEntityIdCount += 1
96 sentenceElement.append(entityElement)
97 elif entityElement.get("type") == "neg":
98 pass
99
100
101
102
103 if self.insertWeights:
104 for entity in nonNameEntities:
105 sentenceElement.append(entity)
106 for interaction in interactions:
107 sentenceElement.append(interaction)
108
109
110 if sentenceAnalysesElement != None:
111 sentenceElement.append(sentenceAnalysesElement)
112
113
114 if extensionRequested:
115 Utils.InteractionXML.ExtendTriggers.extend(sentenceElement, entityTypes=["Bacterium"])
116
118 """
119 If a single token belongs to multiple entities of different types,
120 a new, composite type is defined. This type is the alphabetically
121 ordered types of these entities joined with '---'.
122 """
123 types = set()
124 for entity in entities:
125 types.add(entity.get("type"))
126 types = list(types)
127 types.sort()
128 typeString = ""
129 for type in types:
130 if type == "Protein":
131 continue
132 if typeString != "":
133 typeString += "---"
134 typeString += type
135
136 if typeString == "":
137 return "neg"
138
139 return typeString
140