Package TEES :: Package Utils :: Package InteractionXML :: Module ResolveEPITriggerTypes
[hide private]

Source Code for Module TEES.Utils.InteractionXML.ResolveEPITriggerTypes

  1  import sys, os 
  2  try: 
  3      import xml.etree.cElementTree as ET 
  4  except ImportError: 
  5      import cElementTree as ET 
  6  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../") 
  7  import Utils.ElementTreeUtils as ETUtils 
  8  from collections import defaultdict 
  9   
10 -def getEPIBaseType(eType):
11 if not isNegatableEPITrigger(eType): 12 return eType 13 14 preTag = "" 15 if "_" in eType: 16 preTag, eType = eType.split("_") 17 eType = eType.replace("De", "").replace("de", "") 18 if preTag != "": 19 return preTag + "_" + eType 20 else: 21 if eType[0].islower(): 22 eType = eType[0].upper() + eType[1:] 23 return eType
24
25 -def negateType(eType):
26 preTag = "" 27 if "_" in eType: 28 preTag, eType = eType.split("_") 29 eType = eType.lower() 30 if preTag != "": 31 return preTag + "_" + "de" + eType 32 else: 33 return "De" + eType
34
35 -def isNegatableEPITrigger(eType):
36 return eType in ["Phosphorylation", 37 "Dephosphorylation", 38 "Hydroxylation", 39 "Dehydroxylation", 40 "Ubiquitination", 41 "Deubiquitination", 42 "DNA_methylation", 43 "DNA_demethylation", 44 "Glycosylation", 45 "Deglycosylation", 46 "Acetylation", 47 "Deacetylation", 48 "Methylation", 49 "Demethylation"]
50 #"Catalysis"] 51
52 -def determineNewType(eType, eText):
53 assert eText != None 54 classNames = eType.split("---") 55 newNames = set() 56 for className in classNames: 57 newNames.add(getNewType(className, eText)) 58 return "---".join(sorted(list(newNames)))
59
60 -def getNewType(eType, eText):
61 if not isNegatableEPITrigger(eType): 62 return eType 63 eBaseType = getEPIBaseType(eType) 64 eTextLower = eText.lower() 65 if "remov" in eTextLower: 66 eNewType = negateType(eBaseType) 67 elif "loss" in eTextLower: 68 eNewType = negateType(eBaseType) 69 elif "erasure" in eTextLower: 70 eNewType = negateType(eBaseType) 71 #elif eText.startswith("hypo"): 72 # eNewType = negateType(eBaseType) 73 elif eTextLower.startswith("de"): 74 eNewType = negateType(eBaseType) 75 else: 76 eNewType = eBaseType 77 return eNewType 78
79 -def negateEvents(input, output=None, verbose=False):
80 if not (ET.iselement(input) and input.tag == "sentence"): 81 print >> sys.stderr, "Loading corpus file", input 82 corpusTree = ETUtils.ETFromObj(input) 83 corpusRoot = corpusTree.getroot() 84 85 if not (ET.iselement(input) and input.tag == "sentence"): 86 sentences = corpusRoot.getiterator("sentence") 87 else: 88 sentences = [input] 89 counts = defaultdict(int) 90 for sentence in sentences: 91 for entity in sentence.findall("entity"): 92 counts["all-entities"] += 1 93 eType = entity.get("type") 94 if not isNegatableEPITrigger(eType): 95 counts["out-of-scope"] += 1 96 continue 97 eBaseType = getEPIBaseType(eType) 98 eText = entity.get("text").lower() 99 eNewType = determineNewType(eType, eText) 100 101 # Insert changed charOffset 102 counts["entities"] += 1 103 if verbose: 104 print "Entity", entity.get("id"), [entity.get("text")], [eType, eBaseType, eNewType], 105 if eNewType != eBaseType: 106 counts["negated"] += 1 107 if verbose: print "NEGATED", 108 if eNewType == eType: 109 counts["correct"] += 1 110 if verbose: print "CORRECT" 111 else: 112 counts["incorrect"] += 1 113 if eNewType == eBaseType: 114 counts["incorrect-pos"] += 1 115 else: 116 counts["incorrect-neg"] += 1 117 if verbose: print "INCORRECT" 118 entity.set("type", eNewType) 119 if verbose: 120 print counts 121 122 if not (ET.iselement(input) and input.tag == "sentence"): 123 if output != None: 124 print >> sys.stderr, "Writing output to", output 125 ETUtils.write(corpusRoot, output) 126 return corpusTree
127 128 if __name__=="__main__": 129 print >> sys.stderr, "##### Extend Triggers #####" 130 # Import Psyco if available 131 try: 132 import psyco 133 psyco.full() 134 print >> sys.stderr, "Found Psyco, using" 135 except ImportError: 136 print >> sys.stderr, "Psyco not installed" 137 138 from optparse import OptionParser 139 optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.") 140 optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE") 141 optparser.add_option("-o", "--output", default=None, dest="output", help="Corpus in analysis format", metavar="FILE") 142 optparser.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="") 143 (options, args) = optparser.parse_args() 144 assert(options.input != None) 145 #assert(options.output != None) 146 147 negateEvents(options.input, options.output, verbose=options.debug) 148