1 import sys, os
2 thisPath = os.path.dirname(os.path.abspath(__file__))
3 sys.path.append(os.path.abspath(os.path.join(thisPath,"../..")))
4 import Utils.ElementTreeUtils as ETUtils
5 from collections import defaultdict
6
8 xml = ETUtils.ETFromObj(input)
9 outFile = open(output, "wt")
10 for sentence in xml.getiterator("sentence"):
11
12 intMap = defaultdict(lambda:defaultdict(lambda:None))
13 for interaction in sentence.findall("interaction"):
14
15
16
17 if interaction.get("type") != "neg":
18 intMap[interaction.get("e1")][interaction.get("e2")] = interaction
19 intMap[interaction.get("e2")][interaction.get("e1")] = interaction
20
21 entities = sentence.findall("entity")
22 for i in range(0, len(entities)-1):
23 for j in range(i+1, len(entities)):
24 eIId = entities[i].get("id")
25 eJId = entities[j].get("id")
26 outFile.write(eIId + "\t" + eJId + "\t")
27 if intMap[eIId][eJId] != None:
28 outFile.write("1\n")
29 else:
30 outFile.write("0\n")
31
33 assert os.path.exists(input), input
34 f = open(input, "rt")
35 inputLines = f.readlines()
36 f.close()
37
38 assert os.path.exists(rls), rls
39 f = open(rls, "rt")
40 rlsLines = f.readlines()
41 f.close()
42
43 outFile = open(output, "wt")
44 assert len(inputLines) == len(rlsLines), (len(inputLines), len(rlsLines))
45 for inputLine, rlsLine in zip(inputLines, rlsLines):
46 outFile.write(inputLine.rsplit("\t", 1)[0] + "\t" + rlsLine)
47 outFile.close()
48
49 -def addMTMX(input, mtmxDir, output=None):
50 from collections import defaultdict
51
52 print "Reading interaction XML"
53 counts = defaultdict(int)
54 xml = ETUtils.ETFromObj(input).getroot()
55 docById = {}
56 for document in xml.getiterator("document"):
57 docId = document.get("origId")
58 assert docId not in docById
59 docById[docId] = document
60 counts["document"] += 1
61 for entity in xml.getiterator("entity"):
62 counts["entity"] += 1
63
64
65 print "Processing MTMX"
66 for filename in sorted(os.listdir(mtmxDir)):
67 if filename.endswith(".xml"):
68 print >> sys.stderr, filename,
69 fileId = filename.split("_")[0]
70 if fileId not in docById:
71 print >> sys.stderr, "skipped"
72 continue
73 else:
74 print >> sys.stderr, "processing"
75 doc = docById[fileId]
76 entityByOrigId = {}
77 for entity in doc.getiterator("entity"):
78 assert entity.get("origId") not in entityByOrigId, entity.get("origId")
79 entityByOrigId[entity.get("origId")] = entity
80 mtmx = ETUtils.ETFromObj(os.path.join(mtmxDir, filename)).getroot()
81 for phrase in mtmx.getiterator("PHRASE"):
82 if phrase.get("ID") in entityByOrigId:
83 entity = entityByOrigId[phrase.get("ID")]
84 mapCount = 0
85 for map in phrase.getiterator("MAP"):
86 if (map.get("NAME").lower() == entity.get("text").lower()) or (map.get("NAME_SHORT").lower() == entity.get("text").lower()):
87 if entity.get("mtmxProb") != None:
88 if int(entity.get("mtmxProb")) > int(map.get("PROB")):
89 break
90 else:
91 counts["mapped-multi"] += 1
92 counts["mapped-multi-"+str(mapCount)] += 1
93
94 else:
95 counts["mapped-at-least-once"] += 1
96 entity.set("mtmxProb", str(map.get("PROB")))
97 entity.set("mtmxCui", str(map.get("CUI")))
98 entity.set("mtmxName", str(map.get("NAME")))
99 entity.set("mtmxNameShort", str(map.get("NAME_SHORT")))
100 entity.set("mtmxSemTypes", str(map.get("SEMTYPES")))
101 counts["mappings"] += 1
102 mapCount += 1
103 print >> sys.stderr, counts
104 if output != None:
105 ETUtils.write(xml, output)
106
107 if __name__=="__main__":
108
109 try:
110 import psyco
111 psyco.full()
112 print >> sys.stderr, "Found Psyco, using"
113 except ImportError:
114 print >> sys.stderr, "Psyco not installed"
115
116 from optparse import OptionParser
117 optparser = OptionParser(description="Tools for the DDI'11 Shared Task")
118 optparser.add_option("-i", "--input", default=None, dest="input", help="input file (interaction XML)")
119 optparser.add_option("-o", "--output", default=None, dest="output", help="output file (txt file)")
120 optparser.add_option("-d", "--add", default=None, dest="add", help="data to be added, e.g. rls classifications")
121 optparser.add_option("-a", "--action", default=None, dest="action", help="")
122 (options, args) = optparser.parse_args()
123 assert options.action in ["SUBMISSION", "TRANSFER_RLS", "ADD_MTMX"]
124
125 if options.action == "SUBMISSION":
126 makeDDISubmissionFile(options.input, options.output)
127 elif options.action == "TRANSFER_RLS":
128 transferClassifications(options.input, options.add, options.output)
129 elif options.action == "ADD_MTMX":
130 addMTMX(options.input, options.add, options.output)
131 else:
132 assert False, options.action
133