1  import sys, os 
  2  from FeatureBuilder import FeatureBuilder 
  3  sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../..") 
  4  import Utils.Settings as Settings 
  5  import Utils.Download 
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 14      f = open(filename) 
 15      synDict = {} 
 16      lowerCased = set() 
 17      for line in f: 
 18          if line[0:3] != "BSU": 
 19              continue 
 20          synSplits = line.split()[4:] 
 21          synList = [] 
 22          for name in synSplits: 
 23              name = name.replace(";", "") 
 24              name = name.lower() 
 25              synList.append(name) 
 26          if not synList[0] in synDict: 
 27              synDict[synList[0]] = synList[1:] 
 28          else: 
 29              print >> sys.stderr, "Warning,", synList[0], "already a primary name" 
 30              synDict[synList[0]].extend(synList[1:]) 
 31      f.close() 
 32      return synDict 
  33   
 34   
 35   
 37      f = open(filename) 
 38      synDict = {} 
 39      lowerCased = set() 
 40      for line in f: 
 41          line = line.strip() 
 42          synList = line.split(",") 
 43          for i in range(len(synList)): 
 44              synList[i] = synList[i].lower() 
 45          if not synList[0] in synDict: 
 46              synDict[synList[0]] = synList[1:] 
 47          else: 
 48              print >> sys.stderr, "Warning,", synList[0], "already a primary name" 
 49              synDict[synList[0]].extend(synList[1:]) 
 50      f.close() 
 51      return synDict 
  52   
 53 -def installRENData(destPath=None, downloadPath=None, redownload=False, updateLocalSettings=False): 
  54      print >> sys.stderr, "---------------", "Downloading TEES data files for REN", "---------------" 
 55      print >> sys.stderr, "These files are derived from UniProt bacsu and SubtiWiki" 
 56      if destPath == None: 
 57          destPath = os.path.join(Settings.DATAPATH, "resources") 
 58      if downloadPath == None: 
 59          downloadPath = os.path.join(Settings.DATAPATH, "resources/download") 
 60      Utils.Download.downloadAndExtract(Settings.URL["TEES_RESOURCES"], destPath, downloadPath, redownload=redownload) 
 61      Settings.setLocal("TEES_RESOURCES", destPath, updateLocalSettings) 
  62   
 63   
 66          FeatureBuilder.__init__(self, featureSet) 
 67           
 68           
 69           
 70          if not hasattr(Settings, "TEES_RESOURCES"): 
 71              print >> sys.stderr, "TEES example builder data files not installed, installing now" 
 72              installRENData(updateLocalSettings=True) 
 73          self.bacsu = readBacsu(os.path.join(Settings.TEES_RESOURCES, "bacsu-modified.txt")) 
 74          self.subti = readSubtiwiki(os.path.join(Settings.TEES_RESOURCES, "Subtiwiki-Synonyms.csv")) 
 75           
 76          self.any = {} 
 77          for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))): 
 78              self.any[key] = set() 
 79              if self.bacsu.has_key(key): 
 80                  for value in self.bacsu[key]:  
 81                      self.any[key].add(value) 
 82              if self.subti.has_key(key): 
 83                  for value in self.subti[key]:  
 84                      self.any[key].add(value) 
 85              self.any[key] = list(self.any[key]) 
 86              self.any[key].sort() 
 87           
 88          self.all = {} 
 89          for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))): 
 90              self.all[key] = set()   
 91              allSynonyms = set() 
 92              bacsuSet = set() 
 93              if self.bacsu.has_key(key): 
 94                  bacsuSet = self.bacsu[key] 
 95                  for x in bacsuSet: allSynonyms.add(x) 
 96              subtiSet = set() 
 97              if self.subti.has_key(key): 
 98                  subtiSet = self.subti[key] 
 99                  for x in subtiSet: allSynonyms.add(x) 
100              for synonym in allSynonyms: 
101                  if synonym in bacsuSet and synonym in subtiSet: 
102                      self.all[key].add(synonym) 
103              self.all[key] = list(self.all[key]) 
104              self.all[key].sort() 
 105       
107          self.buildPairFeaturesDict(e1, e2, self.bacsu, "bacsu") 
108          self.buildPairFeaturesDict(e1, e2, self.subti, "subti") 
109          self.buildPairFeaturesDict(e1, e2, self.any, "any") 
110          self.buildPairFeaturesDict(e1, e2, self.all, "all") 
 111           
113           
114          for tag, pair in ( ("frw_", (e1, e2)), ("rev_", (e2, e1)) ): 
115              e1Text = pair[0].get("text").strip().lower() 
116              e2Text = pair[1].get("text").strip().lower() 
117              if synDict.has_key(e1Text): 
118                  if e2Text in synDict[e1Text]: 
119                      self.setFeature(tag + synTag + "_synonym") 
 120       
122          e1Text = e1.get("text").strip().lower() 
123          e2Text = e2.get("text").strip().lower() 
124          if e1Text != "": 
125              e1FirstThreeLetters = e1Text[0:3] 
126              e1LastLetter = e1Text[-1] 
127          else: 
128              e1FirstThreeLetters = "NONE" 
129              e1LastLetter = "NONE" 
130          if e2Text != "": 
131              e2FirstThreeLetters = e2Text[0:3] 
132              e2LastLetter = e2Text[-1] 
133          else: 
134              e2FirstThreeLetters = "NONE" 
135              e2LastLetter = "NONE" 
136          self.setFeature("REN_subpair_f3_" + e1FirstThreeLetters + "_" + e2FirstThreeLetters) 
137          self.setFeature("REN_subpair_l1_" + e1LastLetter + "_" + e2LastLetter) 
  138