1 import sys, os
2 sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
3
4 from Core.IdSet import IdSet
5 import Utils.Libraries.PorterStemmer as PorterStemmer
6 import Core.ExampleUtils as ExampleUtils
7 from FeatureBuilder import FeatureBuilder
8
11 FeatureBuilder.__init__(self, featureSet)
12 from nltk.corpus import wordnet
13 self.wordnet = wordnet
14 print >> sys.stderr, "Using WordNet via NLTK"
15
16 - def pennPOSToWordNet(self, pos):
17 if pos.startswith("JJ"):
18 return "a"
19 elif pos.startswith("NN"):
20 return "n"
21 elif pos.startswith("VB"):
22
23
24 return "v"
25 elif pos.startswith("RB"):
26 return "r"
27 else:
28 return None
29
31 if wordNetPos == None:
32 return None
33 synsets = self.wordnet.synsets(text, pos=wordNetPos)
34 if len(synsets) > 0:
35 return [synsets[0]]
36 else:
37 return None
38
40 rv = []
41 rv.append("HYPER_"+synset.name)
42 for hypernym in synset.hypernyms():
43 rv.append("HYPER_"+hypernym.name)
44 return rv
45
47
48 rv = []
49 if tokenText == None:
50 return rv
51
52 wordNetPos = self.pennPOSToWordNet(pennPos)
53
54 synsets = self.getSynset(tokenText, wordNetPos)
55
56 if synsets != None:
57 rv.extend(self.getHypernyms(synsets[0]))
58 rv.append("LEX_" + synsets[0].lexname)
59
60 return rv
61
62 if __name__=="__main__":
63 w = WordNetFeatureBuilder()
64 print w.getTokenFeatures("cat", "NN")
65