Package TEES :: Package Utils :: Package STFormat :: Module Analyze
[hide private]

Source Code for Module TEES.Utils.STFormat.Analyze

 1  from STTools import * 
 2   
3 -def getNestingChains(arg, chain=None):
4 node = arg[1] 5 if node.trigger == None: # reached protein level 6 return [chain] 7 else: 8 if chain == None: 9 chain = arg[0][0] + ":" + node.type 10 else: 11 chain += "-" + arg[0][0] + ":" + node.type 12 chains = [] 13 for arg in node.arguments: 14 chains.extend(getNestingChains(arg, chain)) 15 return chains
16
17 -def analyzeNesting(documents):
18 chainCounts = {} 19 for doc in documents: 20 argumentEvents = set() 21 # Find all events that act as arguments, i.e. are not top level 22 for event in doc.events: 23 for arg in event.arguments: 24 if arg[1].trigger != None: # event 25 argumentEvents.add(arg[1]) 26 # Determine nesting structures for top level events 27 for event in doc.events: 28 if event not in argumentEvents: 29 chains = getNestingChains(("Root", event)) 30 for chain in chains: 31 if not chainCounts.has_key(chain): 32 chainCounts[chain] = 0 33 chainCounts[chain] += 1 34 print "Counts:" 35 for k in sorted(chainCounts.keys()): 36 print " ", k, chainCounts[k]
37 38 if __name__=="__main__": 39 from optparse import OptionParser 40 # Import Psyco if available 41 try: 42 import psyco 43 psyco.full() 44 print >> sys.stderr, "Found Psyco, using" 45 except ImportError: 46 print >> sys.stderr, "Psyco not installed" 47 48 #proteins, triggers, events = load(1335418, "/home/jari/biotext/tools/TurkuEventExtractionSystem-1.0/data/evaluation-data/evaluation-tools-devel-gold") 49 #write(1335418, "/home/jari/data/temp", proteins, triggers, events ) 50 optparser = OptionParser(description="ST format statistics") 51 optparser.add_option("-i", "--input", default=None, dest="input", help="", metavar="FILE") 52 (options, args) = optparser.parse_args() 53 54 #p = "/home/jari/data/BioNLP09SharedTask/bionlp09_shared_task_development_data_rev1" 55 #p = "/home/jari/data/BioNLP11SharedTask/BioNLP-ST_2011_Entity_Relations_development_data" 56 print "Loading documents" 57 documents = loadSet(options.input) 58 print "Analyzing" 59 analyzeNesting(documents) 60 print "Statistics" 61 getStatistics(documents) 62