1 import sys, os
2 try:
3 import xml.etree.cElementTree as ET
4 except ImportError:
5 import cElementTree as ET
6 sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../")
7 import Utils.ElementTreeUtils as ETUtils
8 from collections import defaultdict
9
11 if not isNegatableEPITrigger(eType):
12 return eType
13
14 preTag = ""
15 if "_" in eType:
16 preTag, eType = eType.split("_")
17 eType = eType.replace("De", "").replace("de", "")
18 if preTag != "":
19 return preTag + "_" + eType
20 else:
21 if eType[0].islower():
22 eType = eType[0].upper() + eType[1:]
23 return eType
24
26 preTag = ""
27 if "_" in eType:
28 preTag, eType = eType.split("_")
29 eType = eType.lower()
30 if preTag != "":
31 return preTag + "_" + "de" + eType
32 else:
33 return "De" + eType
34
36 return eType in ["Phosphorylation",
37 "Dephosphorylation",
38 "Hydroxylation",
39 "Dehydroxylation",
40 "Ubiquitination",
41 "Deubiquitination",
42 "DNA_methylation",
43 "DNA_demethylation",
44 "Glycosylation",
45 "Deglycosylation",
46 "Acetylation",
47 "Deacetylation",
48 "Methylation",
49 "Demethylation"]
50
51
53 assert eText != None
54 classNames = eType.split("---")
55 newNames = set()
56 for className in classNames:
57 newNames.add(getNewType(className, eText))
58 return "---".join(sorted(list(newNames)))
59
61 if not isNegatableEPITrigger(eType):
62 return eType
63 eBaseType = getEPIBaseType(eType)
64 eTextLower = eText.lower()
65 if "remov" in eTextLower:
66 eNewType = negateType(eBaseType)
67 elif "loss" in eTextLower:
68 eNewType = negateType(eBaseType)
69 elif "erasure" in eTextLower:
70 eNewType = negateType(eBaseType)
71
72
73 elif eTextLower.startswith("de"):
74 eNewType = negateType(eBaseType)
75 else:
76 eNewType = eBaseType
77 return eNewType
78
80 if not (ET.iselement(input) and input.tag == "sentence"):
81 print >> sys.stderr, "Loading corpus file", input
82 corpusTree = ETUtils.ETFromObj(input)
83 corpusRoot = corpusTree.getroot()
84
85 if not (ET.iselement(input) and input.tag == "sentence"):
86 sentences = corpusRoot.getiterator("sentence")
87 else:
88 sentences = [input]
89 counts = defaultdict(int)
90 for sentence in sentences:
91 for entity in sentence.findall("entity"):
92 counts["all-entities"] += 1
93 eType = entity.get("type")
94 if not isNegatableEPITrigger(eType):
95 counts["out-of-scope"] += 1
96 continue
97 eBaseType = getEPIBaseType(eType)
98 eText = entity.get("text").lower()
99 eNewType = determineNewType(eType, eText)
100
101
102 counts["entities"] += 1
103 if verbose:
104 print "Entity", entity.get("id"), [entity.get("text")], [eType, eBaseType, eNewType],
105 if eNewType != eBaseType:
106 counts["negated"] += 1
107 if verbose: print "NEGATED",
108 if eNewType == eType:
109 counts["correct"] += 1
110 if verbose: print "CORRECT"
111 else:
112 counts["incorrect"] += 1
113 if eNewType == eBaseType:
114 counts["incorrect-pos"] += 1
115 else:
116 counts["incorrect-neg"] += 1
117 if verbose: print "INCORRECT"
118 entity.set("type", eNewType)
119 if verbose:
120 print counts
121
122 if not (ET.iselement(input) and input.tag == "sentence"):
123 if output != None:
124 print >> sys.stderr, "Writing output to", output
125 ETUtils.write(corpusRoot, output)
126 return corpusTree
127
128 if __name__=="__main__":
129 print >> sys.stderr, "##### Extend Triggers #####"
130
131 try:
132 import psyco
133 psyco.full()
134 print >> sys.stderr, "Found Psyco, using"
135 except ImportError:
136 print >> sys.stderr, "Psyco not installed"
137
138 from optparse import OptionParser
139 optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.")
140 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE")
141 optparser.add_option("-o", "--output", default=None, dest="output", help="Corpus in analysis format", metavar="FILE")
142 optparser.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="")
143 (options, args) = optparser.parse_args()
144 assert(options.input != None)
145
146
147 negateEvents(options.input, options.output, verbose=options.debug)
148