1 import sys, os
2 sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
3 from Core.IdSet import IdSet
4 import Core.ExampleUtils as ExampleUtils
5 from FeatureBuilder import FeatureBuilder
6 import Utils.Settings as Settings
7 import Utils.Download
8 import Utils.ElementTreeUtils as ETUtils
9 from collections import defaultdict
10
11 -def installDrugBank(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False):
12 print >> sys.stderr, "---------------", "Downloading Drug Bank XML", "---------------"
13 print >> sys.stderr, "See http://www.drugbank.ca/downloads for conditions of use"
14 if destPath == None:
15 destPath = os.path.join(Settings.DATAPATH, "resources")
16 if downloadPath == None:
17 downloadPath = os.path.join(Settings.DATAPATH, "resources/download")
18 filenames = Utils.Download.downloadAndExtract(Settings.URL["DRUG_BANK_XML"], destPath, downloadPath, redownload=redownload)
19 assert len(filenames) == 1
20 Settings.setLocal("DRUG_BANK_XML", os.path.join(destPath, filenames[0]), updateLocalSettings)
21
23 data = None
24
36
49
51 rv = [str(e1.get(attr)).lower().replace(" ", ""), str(e2.get(attr)).lower().replace(" ", "")]
52 if rv[0] == "": rv[0] = "none"
53 if rv[1] == "": rv[1] = "none"
54 rv.sort()
55 return rv
56
58 names = self.getMTMXAttrs(e1, e2, "mtmxName")
59 self.setFeature("mtmxNames-" + "-".join(names))
60 if names[0] == names[1]:
61 if names[0] in ["", "none"]:
62 self.setFeature("mtmxNames-both_unknown")
63 else:
64 self.setFeature("mtmxNames-both_identical")
65 self.setFeature("mtmxShortNames-" + "-".join(self.getMTMXAttrs(e1, e2, "mtmxNameShort")))
66 mtmxCuis = self.getMTMXAttrs(e1, e2, "mtmxCui")
67 for mtmxCui in mtmxCuis:
68 self.setFeature("mtmxCui_" + mtmxCui)
69 self.setFeature("mtmxCuis-" + "-".join(mtmxCuis))
70
71 rv = self.getMTMXAttrs(e1, e2, "mtmxProb")
72 if rv[0] in ["", "none"]: rv[0] = "0"
73 if rv[1] in ["", "none"]: rv[1] = "0"
74 rv[0] = int(rv[0])
75 rv[1] = int(rv[1])
76 assert rv[0] <= 1000 and rv[1] <= 1000, (rv[0], rv[1])
77 rv.sort()
78 self.setFeature("mtmxProbMin", float(rv[0]) / 1000.0)
79 self.setFeature("mtmxProbMax", float(rv[1]) / 1000.0)
80
81 sem = self.getMTMXAttrs(e1, e2, "mtmxSemTypes")
82
83 for i in sem[0].split(","):
84 for j in sem[1].split(","):
85 semPair = [i, j]
86 semPair.sort()
87
88 self.setFeature("semPair-" + "-".join(semPair))
89 self.setFeature("semType-" + i)
90 self.setFeature("semType-" + j)
91
105
108
109 -def getNestedItems(parent, term, data, preTag, termPlural=None, verbose=False):
110 if termPlural != None:
111 items = parent.find(preTag+termPlural).findall(preTag+term)
112 else:
113 items = parent.find(preTag+term+"s").findall(preTag+term)
114 for item in items:
115 data[term].append(item.text)
116 if verbose: print " " + term + ": " + item.text
117
119 counts = defaultdict(int)
120 if verbose: print "Resolving Interactions"
121 for id in data:
122 if verbose: print id, data[id]["name"]
123 for interaction in data[id]["interaction"]:
124 partnerDBId = str(interaction[0])
125 partnerDBId = "DB" + (5 - len(partnerDBId)) * "0" + partnerDBId
126 interaction[0] = partnerDBId
127 if partnerDBId in data:
128 interaction[1] = data[partnerDBId]["name"]
129 counts["found-partner-ids"] += 1
130 else:
131 counts["missing-partner-ids"] += 1
132 if verbose: print " ", interaction
133 if verbose: print "Interaction resolution counts:", counts
134
136 intPairs = defaultdict(lambda : defaultdict(lambda: False))
137 for id in data:
138 for interaction in data[id]["interaction"]:
139 if interaction[1] != None:
140 intPairs[id][interaction[0]] = True
141 intPairs[interaction[0]][id] = True
142 return intPairs
143
145 counts = defaultdict(int)
146 nameToId = defaultdict(list)
147 for id in sorted(data.keys()):
148 for name in [data[id]["name"]] + data[id]["synonym"] + data[id]["brand"]:
149
150 if normalize:
151 name = normalizeDrugName(name)
152 if id not in nameToId[name]:
153 nameToId[name].append(id)
154
155 for name in nameToId:
156 counts[len(nameToId[name])] += 1
157 if len(nameToId[name]) > 2:
158 if verbose: print "Multiple ids:", len(nameToId[name]), name, nameToId[name]
159 if verbose: print "Name to id:", counts
160 return nameToId
161
162 -def loadDrugBank(filename, preTag="{http://drugbank.ca}", verbose=False):
163 data = defaultdict(lambda : defaultdict(list))
164 print "Loading DrugBank XML"
165 xml = ETUtils.ETFromObj(filename)
166 print "Processing DrugBank XML"
167 root = xml.getroot()
168 assert root.tag == preTag+"drugs", root.tag
169 for drug in root.findall(preTag+"drug"):
170 id = drug.find(preTag+"drugbank-id").text
171 name = drug.find(preTag+"name").text
172 if verbose: print id, name
173 assert id not in data
174 data[id]["name"] = name
175
176
177 getNestedItems(drug, "synonym", data[id], preTag)
178 getNestedItems(drug, "brand", data[id], preTag)
179 getNestedItems(drug, "group", data[id], preTag)
180 getNestedItems(drug, "category", data[id], preTag, "categories")
181 interactions = drug.find(preTag+"drug-interactions").findall(preTag+"drug-interaction")
182 for interaction in interactions:
183 data[id]["interaction"].append( [interaction.find(preTag+"drug").text, None, interaction.find(preTag+"description").text,] )
184 return data
185
191
192 if __name__=="__main__":
193
194
195
196
197
198 f = DrugFeatureBuilder()
199
200 print "1:", f.getInteraction("Refludan", "Treprostinil")
201 print "2:", f.getInteraction("Refludan", "TreprostinilBlahBlah")
202 print "3:", f.getInteraction("Refludan", "[4-({5-(AMINOCARBONYL)-4-[(3-METHYLPHENYL)AMINO]PYRIMIDIN-2-YL}AMINO)PHENYL]ACETIC ACID")
203