1  import sys, os 
  2  thisPath = os.path.dirname(os.path.abspath(__file__)) 
  3  sys.path.append(os.path.abspath(os.path.join(thisPath,"../.."))) 
  4  import Utils.ElementTreeUtils as ETUtils 
  5  from collections import defaultdict 
  6   
  8      xml = ETUtils.ETFromObj(input) 
  9      outFile = open(output, "wt") 
 10      for sentence in xml.getiterator("sentence"): 
 11           
 12          intMap = defaultdict(lambda:defaultdict(lambda:None)) 
 13          for interaction in sentence.findall("interaction"): 
 14               
 15               
 16               
 17              if interaction.get("type") != "neg": 
 18                  intMap[interaction.get("e1")][interaction.get("e2")] = interaction 
 19                  intMap[interaction.get("e2")][interaction.get("e1")] = interaction 
 20           
 21          entities = sentence.findall("entity") 
 22          for i in range(0, len(entities)-1): 
 23              for j in range(i+1, len(entities)): 
 24                  eIId = entities[i].get("id") 
 25                  eJId = entities[j].get("id") 
 26                  outFile.write(eIId + "\t" + eJId + "\t") 
 27                  if intMap[eIId][eJId] != None: 
 28                      outFile.write("1\n") 
 29                  else: 
 30                      outFile.write("0\n") 
  31   
 33      assert os.path.exists(input), input 
 34      f = open(input, "rt") 
 35      inputLines = f.readlines() 
 36      f.close() 
 37       
 38      assert os.path.exists(rls), rls 
 39      f = open(rls, "rt") 
 40      rlsLines = f.readlines() 
 41      f.close() 
 42       
 43      outFile = open(output, "wt") 
 44      assert len(inputLines) == len(rlsLines), (len(inputLines), len(rlsLines)) 
 45      for inputLine, rlsLine in zip(inputLines, rlsLines): 
 46          outFile.write(inputLine.rsplit("\t", 1)[0] + "\t" + rlsLine) 
 47      outFile.close() 
  48   
 49 -def addMTMX(input, mtmxDir, output=None): 
  50      from collections import defaultdict 
 51       
 52      print "Reading interaction XML" 
 53      counts = defaultdict(int) 
 54      xml = ETUtils.ETFromObj(input).getroot() 
 55      docById = {} 
 56      for document in xml.getiterator("document"): 
 57          docId = document.get("origId") 
 58          assert docId not in docById 
 59          docById[docId] = document 
 60          counts["document"] += 1 
 61      for entity in xml.getiterator("entity"): 
 62          counts["entity"] += 1 
 63       
 64       
 65      print "Processing MTMX" 
 66      for filename in sorted(os.listdir(mtmxDir)): 
 67          if filename.endswith(".xml"): 
 68              print >> sys.stderr, filename, 
 69              fileId = filename.split("_")[0] 
 70              if fileId not in docById: 
 71                  print >> sys.stderr, "skipped" 
 72                  continue 
 73              else: 
 74                  print >> sys.stderr, "processing" 
 75              doc = docById[fileId] 
 76              entityByOrigId = {} 
 77              for entity in doc.getiterator("entity"): 
 78                  assert entity.get("origId") not in entityByOrigId, entity.get("origId") 
 79                  entityByOrigId[entity.get("origId")] = entity 
 80              mtmx = ETUtils.ETFromObj(os.path.join(mtmxDir, filename)).getroot() 
 81              for phrase in mtmx.getiterator("PHRASE"): 
 82                  if phrase.get("ID") in entityByOrigId: 
 83                      entity = entityByOrigId[phrase.get("ID")] 
 84                      mapCount = 0 
 85                      for map in phrase.getiterator("MAP"): 
 86                          if (map.get("NAME").lower() == entity.get("text").lower()) or (map.get("NAME_SHORT").lower() == entity.get("text").lower()): 
 87                              if entity.get("mtmxProb") != None: 
 88                                  if int(entity.get("mtmxProb")) > int(map.get("PROB")): 
 89                                      break 
 90                                  else: 
 91                                      counts["mapped-multi"] += 1 
 92                                      counts["mapped-multi-"+str(mapCount)] += 1 
 93                                       
 94                              else: 
 95                                  counts["mapped-at-least-once"] += 1 
 96                              entity.set("mtmxProb", str(map.get("PROB"))) 
 97                              entity.set("mtmxCui", str(map.get("CUI"))) 
 98                              entity.set("mtmxName", str(map.get("NAME"))) 
 99                              entity.set("mtmxNameShort", str(map.get("NAME_SHORT"))) 
100                              entity.set("mtmxSemTypes", str(map.get("SEMTYPES"))) 
101                              counts["mappings"] += 1 
102                              mapCount += 1 
103      print >> sys.stderr, counts 
104      if output != None: 
105          ETUtils.write(xml, output) 
 106                   
107  if __name__=="__main__": 
108       
109      try: 
110          import psyco 
111          psyco.full() 
112          print >> sys.stderr, "Found Psyco, using" 
113      except ImportError: 
114          print >> sys.stderr, "Psyco not installed" 
115   
116      from optparse import OptionParser 
117      optparser = OptionParser(description="Tools for the DDI'11 Shared Task") 
118      optparser.add_option("-i", "--input", default=None, dest="input", help="input file (interaction XML)") 
119      optparser.add_option("-o", "--output", default=None, dest="output", help="output file (txt file)") 
120      optparser.add_option("-d", "--add", default=None, dest="add", help="data to be added, e.g. rls classifications") 
121      optparser.add_option("-a", "--action", default=None, dest="action", help="") 
122      (options, args) = optparser.parse_args() 
123      assert options.action in ["SUBMISSION", "TRANSFER_RLS", "ADD_MTMX"] 
124       
125      if options.action == "SUBMISSION": 
126          makeDDISubmissionFile(options.input, options.output) 
127      elif options.action == "TRANSFER_RLS": 
128          transferClassifications(options.input, options.add, options.output) 
129      elif options.action == "ADD_MTMX": 
130          addMTMX(options.input, options.add, options.output) 
131      else: 
132          assert False, options.action 
133