Package TEES :: Package ExampleBuilders :: Package FeatureBuilders :: Module WordNetFeatureBuilder
[hide private]

Source Code for Module TEES.ExampleBuilders.FeatureBuilders.WordNetFeatureBuilder

 1  import sys, os 
 2  sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../..")) 
 3  #print os.path.join(os.path.dirname(os.path.abspath(__file__)), "../..") 
 4  from Core.IdSet import IdSet 
 5  import Utils.Libraries.PorterStemmer as PorterStemmer 
 6  import Core.ExampleUtils as ExampleUtils 
 7  from FeatureBuilder import FeatureBuilder 
 8   
9 -class WordNetFeatureBuilder(FeatureBuilder):
10 - def __init__(self, featureSet=None):
11 FeatureBuilder.__init__(self, featureSet) 12 from nltk.corpus import wordnet 13 self.wordnet = wordnet 14 print >> sys.stderr, "Using WordNet via NLTK"
15
16 - def pennPOSToWordNet(self, pos):
17 if pos.startswith("JJ"): 18 return "a" #wn.ADJ 19 elif pos.startswith("NN"): 20 return "n" #wn.NOUN 21 elif pos.startswith("VB"): 22 #print "JEP" 23 #print "VERB", wn.VERB 24 return "v" #wn.VERB 25 elif pos.startswith("RB"): 26 return "r" #wn.ADV 27 else: 28 return None
29
30 - def getSynset(self, text, wordNetPos):
31 if wordNetPos == None: 32 return None 33 synsets = self.wordnet.synsets(text, pos=wordNetPos) 34 if len(synsets) > 0: 35 return [synsets[0]] 36 else: 37 return None
38
39 - def getHypernyms(self, synset):
40 rv = [] 41 rv.append("HYPER_"+synset.name) # add also the base level 42 for hypernym in synset.hypernyms(): 43 rv.append("HYPER_"+hypernym.name) 44 return rv
45
46 - def getTokenFeatures(self, tokenText, pennPos):
47 #print tokenText, pennPos, "X", 48 rv = [] 49 if tokenText == None: 50 return rv 51 #print "A", 52 wordNetPos = self.pennPOSToWordNet(pennPos) 53 #print "B", 54 synsets = self.getSynset(tokenText, wordNetPos) 55 #print "C", 56 if synsets != None: 57 rv.extend(self.getHypernyms(synsets[0])) 58 rv.append("LEX_" + synsets[0].lexname) 59 #print "D" 60 return rv
61 62 if __name__=="__main__": 63 w = WordNetFeatureBuilder() 64 print w.getTokenFeatures("cat", "NN") 65