10INT *freqRel, *freqEnt;
11INT *lefHead, *rigHead;
12INT *lefTail, *rigTail;
14REAL *left_mean, *right_mean;
22INT *testLef, *testRig;
23INT *validLef, *validRig;
25extern "C" void importProb(REAL temp) {
29 fin = fopen((inPath +
"kl_prob.txt").c_str(),
"r");
30 printf(
"Current temperature:%f\n", temp);
31 prob = (REAL *)calloc(relationTotal * (relationTotal - 1),
sizeof(REAL));
33 for (INT i = 0; i < relationTotal * (relationTotal - 1); ++i) {
34 tmp = fscanf(fin,
"%f", &prob[i]);
37 for (INT i = 0; i < relationTotal; ++i) {
38 for (INT j = 0; j < relationTotal - 1; ++j) {
39 REAL tmp = exp(-prob[i * (relationTotal - 1) + j] / temp);
41 prob[i * (relationTotal - 1) + j] = tmp;
43 for (INT j = 0; j < relationTotal - 1; ++j) {
44 prob[i * (relationTotal - 1) + j] /= sum;
51extern "C" void importTrainFiles() {
53 printf(
"The toolkit is importing datasets.\n");
58 fin = fopen((inPath +
"relation2id.txt").c_str(),
"r");
60 fin = fopen(rel_file.c_str(),
"r");
61 tmp = fscanf(fin,
"%ld", &relationTotal);
62 printf(
"The total of relations is %ld.\n", relationTotal);
66 fin = fopen((inPath +
"entity2id.txt").c_str(),
"r");
68 fin = fopen(ent_file.c_str(),
"r");
69 tmp = fscanf(fin,
"%ld", &entityTotal);
70 printf(
"The total of entities is %ld.\n", entityTotal);
74 fin = fopen((inPath +
"train2id.txt").c_str(),
"r");
76 fin = fopen(train_file.c_str(),
"r");
77 tmp = fscanf(fin,
"%ld", &trainTotal);
78 trainList = (
Triple *)calloc(trainTotal,
sizeof(
Triple));
79 trainHead = (
Triple *)calloc(trainTotal,
sizeof(
Triple));
80 trainTail = (
Triple *)calloc(trainTotal,
sizeof(
Triple));
82 freqRel = (INT *)calloc(relationTotal,
sizeof(INT));
83 freqEnt = (INT *)calloc(entityTotal,
sizeof(INT));
84 for (INT i = 0; i < trainTotal; i++) {
85 tmp = fscanf(fin,
"%ld", &trainList[i].h);
86 tmp = fscanf(fin,
"%ld", &trainList[i].t);
87 tmp = fscanf(fin,
"%ld", &trainList[i].r);
90 std::sort(trainList, trainList + trainTotal, Triple::cmp_head);
93 trainHead[0] = trainTail[0] = trainRel[0] = trainList[0];
94 freqEnt[trainList[0].t] += 1;
95 freqEnt[trainList[0].h] += 1;
96 freqRel[trainList[0].r] += 1;
97 for (INT i = 1; i < tmp; i++)
98 if (trainList[i].h != trainList[i - 1].h ||
99 trainList[i].r != trainList[i - 1].r ||
100 trainList[i].t != trainList[i - 1].t) {
101 trainHead[trainTotal] = trainTail[trainTotal] = trainRel[trainTotal] =
102 trainList[trainTotal] = trainList[i];
104 freqEnt[trainList[i].t]++;
105 freqEnt[trainList[i].h]++;
106 freqRel[trainList[i].r]++;
109 std::sort(trainHead, trainHead + trainTotal, Triple::cmp_head);
110 std::sort(trainTail, trainTail + trainTotal, Triple::cmp_tail);
111 std::sort(trainRel, trainRel + trainTotal, Triple::cmp_rel);
112 printf(
"The total of train triples is %ld.\n", trainTotal);
114 lefHead = (INT *)calloc(entityTotal,
sizeof(INT));
115 rigHead = (INT *)calloc(entityTotal,
sizeof(INT));
116 lefTail = (INT *)calloc(entityTotal,
sizeof(INT));
117 rigTail = (INT *)calloc(entityTotal,
sizeof(INT));
118 lefRel = (INT *)calloc(entityTotal,
sizeof(INT));
119 rigRel = (INT *)calloc(entityTotal,
sizeof(INT));
120 memset(rigHead, -1,
sizeof(INT) * entityTotal);
121 memset(rigTail, -1,
sizeof(INT) * entityTotal);
122 memset(rigRel, -1,
sizeof(INT) * entityTotal);
123 for (INT i = 1; i < trainTotal; i++) {
124 if (trainTail[i].t != trainTail[i - 1].t) {
125 rigTail[trainTail[i - 1].t] = i - 1;
126 lefTail[trainTail[i].t] = i;
128 if (trainHead[i].h != trainHead[i - 1].h) {
129 rigHead[trainHead[i - 1].h] = i - 1;
130 lefHead[trainHead[i].h] = i;
132 if (trainRel[i].h != trainRel[i - 1].h) {
133 rigRel[trainRel[i - 1].h] = i - 1;
134 lefRel[trainRel[i].h] = i;
137 lefHead[trainHead[0].h] = 0;
138 rigHead[trainHead[trainTotal - 1].h] = trainTotal - 1;
139 lefTail[trainTail[0].t] = 0;
140 rigTail[trainTail[trainTotal - 1].t] = trainTotal - 1;
141 lefRel[trainRel[0].h] = 0;
142 rigRel[trainRel[trainTotal - 1].h] = trainTotal - 1;
144 left_mean = (REAL *)calloc(relationTotal,
sizeof(REAL));
145 right_mean = (REAL *)calloc(relationTotal,
sizeof(REAL));
146 for (INT i = 0; i < entityTotal; i++) {
147 for (INT j = lefHead[i] + 1; j <= rigHead[i]; j++)
148 if (trainHead[j].r != trainHead[j - 1].r)
149 left_mean[trainHead[j].r] += 1.0;
150 if (lefHead[i] <= rigHead[i])
151 left_mean[trainHead[lefHead[i]].r] += 1.0;
152 for (INT j = lefTail[i] + 1; j <= rigTail[i]; j++)
153 if (trainTail[j].r != trainTail[j - 1].r)
154 right_mean[trainTail[j].r] += 1.0;
155 if (lefTail[i] <= rigTail[i])
156 right_mean[trainTail[lefTail[i]].r] += 1.0;
158 for (INT i = 0; i < relationTotal; i++) {
159 left_mean[i] = freqRel[i] / left_mean[i];
160 right_mean[i] = freqRel[i] / right_mean[i];
168extern "C" void importTestFiles() {
173 fin = fopen((inPath +
"relation2id.txt").c_str(),
"r");
175 fin = fopen(rel_file.c_str(),
"r");
176 tmp = fscanf(fin,
"%ld", &relationTotal);
180 fin = fopen((inPath +
"entity2id.txt").c_str(),
"r");
182 fin = fopen(ent_file.c_str(),
"r");
183 tmp = fscanf(fin,
"%ld", &entityTotal);
186 FILE *f_kb1, *f_kb2, *f_kb3;
187 if (train_file ==
"")
188 f_kb2 = fopen((inPath +
"train2id.txt").c_str(),
"r");
190 f_kb2 = fopen(train_file.c_str(),
"r");
192 f_kb1 = fopen((inPath +
"test2id.txt").c_str(),
"r");
194 f_kb1 = fopen(test_file.c_str(),
"r");
195 if (valid_file ==
"")
196 f_kb3 = fopen((inPath +
"valid2id.txt").c_str(),
"r");
198 f_kb3 = fopen(valid_file.c_str(),
"r");
199 tmp = fscanf(f_kb1,
"%ld", &testTotal);
200 tmp = fscanf(f_kb2,
"%ld", &trainTotal);
201 tmp = fscanf(f_kb3,
"%ld", &validTotal);
202 tripleTotal = testTotal + trainTotal + validTotal;
204 validList = (
Triple *)calloc(validTotal,
sizeof(
Triple));
205 tripleList = (
Triple *)calloc(tripleTotal,
sizeof(
Triple));
206 for (INT i = 0; i < testTotal; i++) {
207 tmp = fscanf(f_kb1,
"%ld", &testList[i].h);
208 tmp = fscanf(f_kb1,
"%ld", &testList[i].t);
209 tmp = fscanf(f_kb1,
"%ld", &testList[i].r);
210 tripleList[i] = testList[i];
212 for (INT i = 0; i < trainTotal; i++) {
213 tmp = fscanf(f_kb2,
"%ld", &tripleList[i + testTotal].h);
214 tmp = fscanf(f_kb2,
"%ld", &tripleList[i + testTotal].t);
215 tmp = fscanf(f_kb2,
"%ld", &tripleList[i + testTotal].r);
217 for (INT i = 0; i < validTotal; i++) {
218 tmp = fscanf(f_kb3,
"%ld", &tripleList[i + testTotal + trainTotal].h);
219 tmp = fscanf(f_kb3,
"%ld", &tripleList[i + testTotal + trainTotal].t);
220 tmp = fscanf(f_kb3,
"%ld", &tripleList[i + testTotal + trainTotal].r);
221 validList[i] = tripleList[i + testTotal + trainTotal];
227 std::sort(tripleList, tripleList + tripleTotal, Triple::cmp_head);
228 std::sort(testList, testList + testTotal, Triple::cmp_rel2);
229 std::sort(validList, validList + validTotal, Triple::cmp_rel2);
230 printf(
"The total of test triples is %ld.\n", testTotal);
231 printf(
"The total of valid triples is %ld.\n", validTotal);
233 testLef = (INT *)calloc(relationTotal,
sizeof(INT));
234 testRig = (INT *)calloc(relationTotal,
sizeof(INT));
235 memset(testLef, -1,
sizeof(INT) * relationTotal);
236 memset(testRig, -1,
sizeof(INT) * relationTotal);
237 for (INT i = 1; i < testTotal; i++) {
238 if (testList[i].r != testList[i - 1].r) {
239 testRig[testList[i - 1].r] = i - 1;
240 testLef[testList[i].r] = i;
243 testLef[testList[0].r] = 0;
244 testRig[testList[testTotal - 1].r] = testTotal - 1;
246 validLef = (INT *)calloc(relationTotal,
sizeof(INT));
247 validRig = (INT *)calloc(relationTotal,
sizeof(INT));
248 memset(validLef, -1,
sizeof(INT) * relationTotal);
249 memset(validRig, -1,
sizeof(INT) * relationTotal);
250 for (INT i = 1; i < validTotal; i++) {
251 if (validList[i].r != validList[i - 1].r) {
252 validRig[validList[i - 1].r] = i - 1;
253 validLef[validList[i].r] = i;
256 validLef[validList[0].r] = 0;
257 validRig[validList[validTotal - 1].r] = validTotal - 1;
267extern "C" void importTypeFiles() {
269 head_lef = (INT *)calloc(relationTotal,
sizeof(INT));
270 head_rig = (INT *)calloc(relationTotal,
sizeof(INT));
271 tail_lef = (INT *)calloc(relationTotal,
sizeof(INT));
272 tail_rig = (INT *)calloc(relationTotal,
sizeof(INT));
275 FILE *f_type = fopen((inPath +
"type_constrain.txt").c_str(),
"r");
277 tmp = fscanf(f_type,
"%ld", &tmp);
278 for (INT i = 0; i < relationTotal; i++) {
280 tmp = fscanf(f_type,
"%ld %ld", &rel, &tot);
281 for (INT j = 0; j < tot; j++) {
282 tmp = fscanf(f_type,
"%ld", &tmp);
285 tmp = fscanf(f_type,
"%ld%ld", &rel, &tot);
286 for (INT j = 0; j < tot; j++) {
287 tmp = fscanf(f_type,
"%ld", &tmp);
292 head_type = (INT *)calloc(total_lef,
sizeof(INT));
293 tail_type = (INT *)calloc(total_rig,
sizeof(INT));
296 f_type = fopen((inPath +
"type_constrain.txt").c_str(),
"r");
297 tmp = fscanf(f_type,
"%ld", &tmp);
298 for (INT i = 0; i < relationTotal; i++) {
300 tmp = fscanf(f_type,
"%ld%ld", &rel, &tot);
301 head_lef[rel] = total_lef;
302 for (INT j = 0; j < tot; j++) {
303 tmp = fscanf(f_type,
"%ld", &head_type[total_lef]);
306 head_rig[rel] = total_lef;
307 std::sort(head_type + head_lef[rel], head_type + head_rig[rel]);
308 tmp = fscanf(f_type,
"%ld%ld", &rel, &tot);
309 tail_lef[rel] = total_rig;
310 for (INT j = 0; j < tot; j++) {
311 tmp = fscanf(f_type,
"%ld", &tail_type[total_rig]);
314 tail_rig[rel] = total_rig;
315 std::sort(tail_type + tail_lef[rel], tail_type + tail_rig[rel]);