1 import sys, os
2 from FeatureBuilder import FeatureBuilder
3 sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../..")
4 import Utils.Settings as Settings
5 import Utils.Download
6
7
8
9
10
11
12
14 f = open(filename)
15 synDict = {}
16 lowerCased = set()
17 for line in f:
18 if line[0:3] != "BSU":
19 continue
20 synSplits = line.split()[4:]
21 synList = []
22 for name in synSplits:
23 name = name.replace(";", "")
24 name = name.lower()
25 synList.append(name)
26 if not synList[0] in synDict:
27 synDict[synList[0]] = synList[1:]
28 else:
29 print >> sys.stderr, "Warning,", synList[0], "already a primary name"
30 synDict[synList[0]].extend(synList[1:])
31 f.close()
32 return synDict
33
34
35
37 f = open(filename)
38 synDict = {}
39 lowerCased = set()
40 for line in f:
41 line = line.strip()
42 synList = line.split(",")
43 for i in range(len(synList)):
44 synList[i] = synList[i].lower()
45 if not synList[0] in synDict:
46 synDict[synList[0]] = synList[1:]
47 else:
48 print >> sys.stderr, "Warning,", synList[0], "already a primary name"
49 synDict[synList[0]].extend(synList[1:])
50 f.close()
51 return synDict
52
53 -def installRENData(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False):
54 print >> sys.stderr, "---------------", "Downloading TEES data files for REN", "---------------"
55 print >> sys.stderr, "These files are derived from UniProt bacsu and SubtiWiki"
56 if destPath == None:
57 destPath = os.path.join(Settings.DATAPATH, "resources")
58 if downloadPath == None:
59 downloadPath = os.path.join(Settings.DATAPATH, "resources/download")
60 Utils.Download.downloadAndExtract(Settings.URL["TEES_RESOURCES"], destPath, downloadPath, redownload=redownload)
61 Settings.setLocal("TEES_RESOURCES", destPath, updateLocalSettings)
62
63
66 FeatureBuilder.__init__(self, featureSet)
67
68
69
70 if not hasattr(Settings, "TEES_RESOURCES"):
71 print >> sys.stderr, "TEES example builder data files not installed, installing now"
72 installRENData(updateLocalSettings=True)
73 self.bacsu = readBacsu(os.path.join(Settings.TEES_RESOURCES, "bacsu-modified.txt"))
74 self.subti = readSubtiwiki(os.path.join(Settings.TEES_RESOURCES, "Subtiwiki-Synonyms.csv"))
75
76 self.any = {}
77 for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
78 self.any[key] = set()
79 if self.bacsu.has_key(key):
80 for value in self.bacsu[key]:
81 self.any[key].add(value)
82 if self.subti.has_key(key):
83 for value in self.subti[key]:
84 self.any[key].add(value)
85 self.any[key] = list(self.any[key])
86 self.any[key].sort()
87
88 self.all = {}
89 for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
90 self.all[key] = set()
91 allSynonyms = set()
92 bacsuSet = set()
93 if self.bacsu.has_key(key):
94 bacsuSet = self.bacsu[key]
95 for x in bacsuSet: allSynonyms.add(x)
96 subtiSet = set()
97 if self.subti.has_key(key):
98 subtiSet = self.subti[key]
99 for x in subtiSet: allSynonyms.add(x)
100 for synonym in allSynonyms:
101 if synonym in bacsuSet and synonym in subtiSet:
102 self.all[key].add(synonym)
103 self.all[key] = list(self.all[key])
104 self.all[key].sort()
105
107 self.buildPairFeaturesDict(e1, e2, self.bacsu, "bacsu")
108 self.buildPairFeaturesDict(e1, e2, self.subti, "subti")
109 self.buildPairFeaturesDict(e1, e2, self.any, "any")
110 self.buildPairFeaturesDict(e1, e2, self.all, "all")
111
113
114 for tag, pair in ( ("frw_", (e1, e2)), ("rev_", (e2, e1)) ):
115 e1Text = pair[0].get("text").strip().lower()
116 e2Text = pair[1].get("text").strip().lower()
117 if synDict.has_key(e1Text):
118 if e2Text in synDict[e1Text]:
119 self.setFeature(tag + synTag + "_synonym")
120
122 e1Text = e1.get("text").strip().lower()
123 e2Text = e2.get("text").strip().lower()
124 if e1Text != "":
125 e1FirstThreeLetters = e1Text[0:3]
126 e1LastLetter = e1Text[-1]
127 else:
128 e1FirstThreeLetters = "NONE"
129 e1LastLetter = "NONE"
130 if e2Text != "":
131 e2FirstThreeLetters = e2Text[0:3]
132 e2LastLetter = e2Text[-1]
133 else:
134 e2FirstThreeLetters = "NONE"
135 e2LastLetter = "NONE"
136 self.setFeature("REN_subpair_f3_" + e1FirstThreeLetters + "_" + e2FirstThreeLetters)
137 self.setFeature("REN_subpair_l1_" + e1LastLetter + "_" + e2LastLetter)
138