TEES.Utils.FindHeads

10 if iterate: 11 from Utils.ProgressCounter import ProgressCounter 12 import InteractionXML.SentenceElements as SentenceElements 13 print >> sys.stderr, "Determining head offsets using parse", parse, "and tokenization", tokenization 14 print >> sys.stderr, "Removing existing head offsets" 15 removeCount = 0 16 counter = ProgressCounter(None, "Find heads") 17 counter.showMilliseconds = True 18 for sentences in SentenceElements.getCorpusIterator(input, output, parse, tokenization): 19 for sentence in sentences: 20 if removeExisting: 21 for e in sentence.sentence.findall("entity"): 22 if e.get("headOffset") != None: 23 removeCount += 1 24 del e.attrib["headOffset"] 25 graph = SentenceGraph.SentenceGraph(sentence.sentence, sentence.tokens, sentence.dependencies) 26 graph.mapInteractions(sentence.entities, sentence.interactions) 27 # Make sure every parse gets head scores 28 #if graph.tokenHeadScores == None: 29 # graph.getTokenHeadScores() 30 counter.update(len(sentences), "Finding heads ("+sentences[-1].sentence.get("id")+"): ") 31 print >> sys.stderr, "Removed head offsets from", removeCount, "entities" 32 else: 33 xml = ETUtils.ETFromObj(input) 34 if removeExisting: 35 print >> sys.stderr, "Removing existing head offsets" 36 removeCount = 0 37 xml = ETUtils.ETFromObj(input) 38 for d in xml.getroot().findall("document"): 39 for s in d.findall("sentence"): 40 for e in s.findall("entity"): 41 if e.get("headOffset") != None: 42 removeCount += 1 43 del e.attrib["headOffset"] 44 print >> sys.stderr, "Removed head offsets from", removeCount, "entities" 45 46 # SentenceGraph automatically calculates head offsets and adds them to entities if they are missing 47 print >> sys.stderr, "Determining head offsets using parse", parse, "and tokenization", tokenization 48 corpusElements = SentenceGraph.loadCorpus(xml, parse, tokenization) 49 50 # Make sure every parse gets head scores 51 for sentence in corpusElements.sentences: 52 if sentence.sentenceGraph == None: 53 continue 54 if sentence.sentenceGraph.tokenHeadScores == None: 55 sentence.sentenceGraph.getTokenHeadScores() 56 57 if output != None: 58 print >> sys.stderr, "Writing output to", output 59 ETUtils.write(corpusElements.rootElement, output) 60 return xml

Source Code for Module TEES.Utils.FindHeads