Skip to content

Commit a6e2094

Browse files
committed
Updated example generated code
1 parent e942203 commit a6e2094

9 files changed

+196
-91
lines changed

_includes/demo.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<div class="mdl-grid" style="padding-top: 0px">
22
<div class="mdl-cell mdl-cell--12-col">
3-
<p style="line-height: 100%">This is an prototype implementation of the sparse tensor algebra compiler theory and contains known bugs, which are documented <a href="https://github.com/tensor-compiler/taco/issues">here</a>. If you find additional issues, please consider submitting a bug report.</p>
3+
<p style="line-height: 100%">This is a prototype implementation of the sparse tensor algebra compiler theory and contains known bugs, which are documented <a href="https://github.com/tensor-compiler/taco/issues">here</a>. If you find additional issues, please consider submitting a bug report.</p>
44
<h6 style="margin-bottom: 0px; margin-top: 18px">Input a tensor algebra expression in index notation to generate code that computes it:</h6>
55
<div class="input-bar">
66
<div class="mdl-textfield expr-input">

examples/mttkrp_full.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ typedef struct {
2626
taco_mode_t* mode_types; // mode storage types
2727
uint8_t*** indices; // tensor index data (per mode)
2828
uint8_t* vals; // tensor values
29+
uint8_t* fill_value; // tensor fill value
2930
int32_t vals_size; // values array size
3031
} taco_tensor_t;
3132
#endif
@@ -36,6 +37,26 @@ int omp_get_max_threads() { return 1; }
3637
int cmp(const void *a, const void *b) {
3738
return *((const int*)a) - *((const int*)b);
3839
}
40+
int taco_gallop(int *array, int arrayStart, int arrayEnd, int target) {
41+
if (array[arrayStart] >= target || arrayStart >= arrayEnd) {
42+
return arrayStart;
43+
}
44+
int step = 1;
45+
int curr = arrayStart;
46+
while (curr + step < arrayEnd && array[curr + step] < target) {
47+
curr += step;
48+
step = step * 2;
49+
}
50+
51+
step = step / 2;
52+
while (step > 0) {
53+
if (curr + step < arrayEnd && array[curr + step] < target) {
54+
curr += step;
55+
}
56+
step = step / 2;
57+
}
58+
return curr+1;
59+
}
3960
int taco_binarySearchAfter(int *array, int arrayStart, int arrayEnd, int target) {
4061
if (array[arrayStart] >= target) {
4162
return arrayStart;

examples/spadd_assembly.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,21 +54,21 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
5454
#pragma omp parallel for schedule(runtime)
5555
for (int32_t i = 0; i < C1_dimension; i++) {
5656

57-
int32_t jB1 = B2_pos[i];
58-
int32_t pB2_end0 = B2_pos[(i + 1)];
59-
int32_t jC1 = C2_pos[i];
60-
int32_t pC2_end0 = C2_pos[(i + 1)];
57+
int32_t jB = B2_pos[i];
58+
int32_t pB2_end = B2_pos[(i + 1)];
59+
int32_t jC = C2_pos[i];
60+
int32_t pC2_end = C2_pos[(i + 1)];
6161

62-
while (jB1 < pB2_end0 && jC1 < pC2_end0) {
63-
int32_t jB2 = B2_crd[jB1];
64-
int32_t jC2 = C2_crd[jC1];
65-
int32_t j = TACO_MIN(jB2,jC2);
66-
if (jB2 == j && jC2 == j) {
62+
while (jB < pB2_end && jC < pC2_end) {
63+
int32_t jB0 = B2_crd[jB];
64+
int32_t jC0 = C2_crd[jC];
65+
int32_t j = TACO_MIN(jB0,jC0);
66+
if (jB0 == j && jC0 == j) {
6767
int32_t pA2 = A2_pos[i];
6868
A2_pos[i] = A2_pos[i] + 1;
6969
A2_crd[pA2] = j;
7070
}
71-
else if (jB2 == j) {
71+
else if (jB0 == j) {
7272
int32_t pA20 = A2_pos[i];
7373
A2_pos[i] = A2_pos[i] + 1;
7474
A2_crd[pA20] = j;
@@ -78,22 +78,22 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
7878
A2_pos[i] = A2_pos[i] + 1;
7979
A2_crd[pA21] = j;
8080
}
81-
jB1 += (int32_t)(jB2 == j);
82-
jC1 += (int32_t)(jC2 == j);
81+
jB += (int32_t)(jB0 == j);
82+
jC += (int32_t)(jC0 == j);
8383
}
84-
while (jB1 < pB2_end0) {
85-
int32_t j = B2_crd[jB1];
84+
while (jB < pB2_end) {
85+
int32_t j = B2_crd[jB];
8686
int32_t pA22 = A2_pos[i];
8787
A2_pos[i] = A2_pos[i] + 1;
8888
A2_crd[pA22] = j;
89-
jB1++;
89+
jB++;
9090
}
91-
while (jC1 < pC2_end0) {
92-
int32_t j = C2_crd[jC1];
91+
while (jC < pC2_end) {
92+
int32_t j = C2_crd[jC];
9393
int32_t pA23 = A2_pos[i];
9494
A2_pos[i] = A2_pos[i] + 1;
9595
A2_crd[pA23] = j;
96-
jC1++;
96+
jC++;
9797
}
9898
}
9999

examples/spadd_full.c

Lines changed: 62 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ typedef struct {
2626
taco_mode_t* mode_types; // mode storage types
2727
uint8_t*** indices; // tensor index data (per mode)
2828
uint8_t* vals; // tensor values
29+
uint8_t* fill_value; // tensor fill value
2930
int32_t vals_size; // values array size
3031
} taco_tensor_t;
3132
#endif
@@ -36,6 +37,26 @@ int omp_get_max_threads() { return 1; }
3637
int cmp(const void *a, const void *b) {
3738
return *((const int*)a) - *((const int*)b);
3839
}
40+
int taco_gallop(int *array, int arrayStart, int arrayEnd, int target) {
41+
if (array[arrayStart] >= target || arrayStart >= arrayEnd) {
42+
return arrayStart;
43+
}
44+
int step = 1;
45+
int curr = arrayStart;
46+
while (curr + step < arrayEnd && array[curr + step] < target) {
47+
curr += step;
48+
step = step * 2;
49+
}
50+
51+
step = step / 2;
52+
while (step > 0) {
53+
if (curr + step < arrayEnd && array[curr + step] < target) {
54+
curr += step;
55+
}
56+
step = step / 2;
57+
}
58+
return curr+1;
59+
}
3960
int taco_binarySearchAfter(int *array, int arrayStart, int arrayEnd, int target) {
4061
if (array[arrayStart] >= target) {
4162
return arrayStart;
@@ -231,21 +252,21 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
231252
#pragma omp parallel for schedule(runtime)
232253
for (int32_t i = 0; i < C1_dimension; i++) {
233254

234-
int32_t jB1 = B2_pos[i];
235-
int32_t pB2_end0 = B2_pos[(i + 1)];
236-
int32_t jC1 = C2_pos[i];
237-
int32_t pC2_end0 = C2_pos[(i + 1)];
255+
int32_t jB = B2_pos[i];
256+
int32_t pB2_end = B2_pos[(i + 1)];
257+
int32_t jC = C2_pos[i];
258+
int32_t pC2_end = C2_pos[(i + 1)];
238259

239-
while (jB1 < pB2_end0 && jC1 < pC2_end0) {
240-
int32_t jB2 = B2_crd[jB1];
241-
int32_t jC2 = C2_crd[jC1];
242-
int32_t j = TACO_MIN(jB2,jC2);
243-
if (jB2 == j && jC2 == j) {
260+
while (jB < pB2_end && jC < pC2_end) {
261+
int32_t jB0 = B2_crd[jB];
262+
int32_t jC0 = C2_crd[jC];
263+
int32_t j = TACO_MIN(jB0,jC0);
264+
if (jB0 == j && jC0 == j) {
244265
int32_t pA2 = A2_pos[i];
245266
A2_pos[i] = A2_pos[i] + 1;
246267
A2_crd[pA2] = j;
247268
}
248-
else if (jB2 == j) {
269+
else if (jB0 == j) {
249270
int32_t pA20 = A2_pos[i];
250271
A2_pos[i] = A2_pos[i] + 1;
251272
A2_crd[pA20] = j;
@@ -255,22 +276,22 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
255276
A2_pos[i] = A2_pos[i] + 1;
256277
A2_crd[pA21] = j;
257278
}
258-
jB1 += (int32_t)(jB2 == j);
259-
jC1 += (int32_t)(jC2 == j);
279+
jB += (int32_t)(jB0 == j);
280+
jC += (int32_t)(jC0 == j);
260281
}
261-
while (jB1 < pB2_end0) {
262-
int32_t j = B2_crd[jB1];
282+
while (jB < pB2_end) {
283+
int32_t j = B2_crd[jB];
263284
int32_t pA22 = A2_pos[i];
264285
A2_pos[i] = A2_pos[i] + 1;
265286
A2_crd[pA22] = j;
266-
jB1++;
287+
jB++;
267288
}
268-
while (jC1 < pC2_end0) {
269-
int32_t j = C2_crd[jC1];
289+
while (jC < pC2_end) {
290+
int32_t j = C2_crd[jC];
270291
int32_t pA23 = A2_pos[i];
271292
A2_pos[i] = A2_pos[i] + 1;
272293
A2_crd[pA23] = j;
273-
jC1++;
294+
jC++;
274295
}
275296
}
276297

@@ -342,51 +363,51 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
342363
#pragma omp parallel for schedule(runtime)
343364
for (int32_t i = 0; i < C1_dimension; i++) {
344365

345-
int32_t jB1 = B2_pos[i];
346-
int32_t pB2_end0 = B2_pos[(i + 1)];
347-
int32_t jC1 = C2_pos[i];
348-
int32_t pC2_end0 = C2_pos[(i + 1)];
366+
int32_t jB = B2_pos[i];
367+
int32_t pB2_end = B2_pos[(i + 1)];
368+
int32_t jC = C2_pos[i];
369+
int32_t pC2_end = C2_pos[(i + 1)];
349370

350-
while (jB1 < pB2_end0 && jC1 < pC2_end0) {
351-
int32_t jB2 = B2_crd[jB1];
352-
int32_t jC2 = C2_crd[jC1];
353-
int32_t j = TACO_MIN(jB2,jC2);
354-
if (jB2 == j && jC2 == j) {
371+
while (jB < pB2_end && jC < pC2_end) {
372+
int32_t jB0 = B2_crd[jB];
373+
int32_t jC0 = C2_crd[jC];
374+
int32_t j = TACO_MIN(jB0,jC0);
375+
if (jB0 == j && jC0 == j) {
355376
int32_t pA2 = A2_pos[i];
356377
A2_pos[i] = A2_pos[i] + 1;
357378
A2_crd[pA2] = j;
358-
A_vals[pA2] = B_vals[jB1] + C_vals[jC1];
379+
A_vals[pA2] = B_vals[jB] + C_vals[jC];
359380
}
360-
else if (jB2 == j) {
381+
else if (jB0 == j) {
361382
int32_t pA20 = A2_pos[i];
362383
A2_pos[i] = A2_pos[i] + 1;
363384
A2_crd[pA20] = j;
364-
A_vals[pA20] = B_vals[jB1];
385+
A_vals[pA20] = B_vals[jB];
365386
}
366387
else {
367388
int32_t pA21 = A2_pos[i];
368389
A2_pos[i] = A2_pos[i] + 1;
369390
A2_crd[pA21] = j;
370-
A_vals[pA21] = C_vals[jC1];
391+
A_vals[pA21] = C_vals[jC];
371392
}
372-
jB1 += (int32_t)(jB2 == j);
373-
jC1 += (int32_t)(jC2 == j);
393+
jB += (int32_t)(jB0 == j);
394+
jC += (int32_t)(jC0 == j);
374395
}
375-
while (jB1 < pB2_end0) {
376-
int32_t j = B2_crd[jB1];
396+
while (jB < pB2_end) {
397+
int32_t j = B2_crd[jB];
377398
int32_t pA22 = A2_pos[i];
378399
A2_pos[i] = A2_pos[i] + 1;
379400
A2_crd[pA22] = j;
380-
A_vals[pA22] = B_vals[jB1];
381-
jB1++;
401+
A_vals[pA22] = B_vals[jB];
402+
jB++;
382403
}
383-
while (jC1 < pC2_end0) {
384-
int32_t j = C2_crd[jC1];
404+
while (jC < pC2_end) {
405+
int32_t j = C2_crd[jC];
385406
int32_t pA23 = A2_pos[i];
386407
A2_pos[i] = A2_pos[i] + 1;
387408
A2_crd[pA23] = j;
388-
A_vals[pA23] = C_vals[jC1];
389-
jC1++;
409+
A_vals[pA23] = C_vals[jC];
410+
jC++;
390411
}
391412
}
392413

examples/spgemm_assembly.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
6666
int32_t workspace_index_list_size = 0;
6767
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
6868
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
69-
for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
70-
int32_t k = B2_crd[kB0];
71-
for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
72-
int32_t j = C2_crd[jC0];
69+
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
70+
int32_t k = B2_crd[kB];
71+
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
72+
int32_t j = C2_crd[jC];
7373
if (!workspace_already_set[j]) {
7474
workspace_index_list[workspace_index_list_size] = j;
7575
workspace_already_set[j] = 1;

examples/spgemm_full.c

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ typedef struct {
2626
taco_mode_t* mode_types; // mode storage types
2727
uint8_t*** indices; // tensor index data (per mode)
2828
uint8_t* vals; // tensor values
29+
uint8_t* fill_value; // tensor fill value
2930
int32_t vals_size; // values array size
3031
} taco_tensor_t;
3132
#endif
@@ -36,6 +37,26 @@ int omp_get_max_threads() { return 1; }
3637
int cmp(const void *a, const void *b) {
3738
return *((const int*)a) - *((const int*)b);
3839
}
40+
int taco_gallop(int *array, int arrayStart, int arrayEnd, int target) {
41+
if (array[arrayStart] >= target || arrayStart >= arrayEnd) {
42+
return arrayStart;
43+
}
44+
int step = 1;
45+
int curr = arrayStart;
46+
while (curr + step < arrayEnd && array[curr + step] < target) {
47+
curr += step;
48+
step = step * 2;
49+
}
50+
51+
step = step / 2;
52+
while (step > 0) {
53+
if (curr + step < arrayEnd && array[curr + step] < target) {
54+
curr += step;
55+
}
56+
step = step / 2;
57+
}
58+
return curr+1;
59+
}
3960
int taco_binarySearchAfter(int *array, int arrayStart, int arrayEnd, int target) {
4061
if (array[arrayStart] >= target) {
4162
return arrayStart;
@@ -245,10 +266,10 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
245266
int32_t workspace_index_list_size = 0;
246267
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
247268
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
248-
for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
249-
int32_t k = B2_crd[kB0];
250-
for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
251-
int32_t j = C2_crd[jC0];
269+
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
270+
int32_t k = B2_crd[kB];
271+
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
272+
int32_t j = C2_crd[jC];
252273
if (!workspace_already_set[j]) {
253274
workspace_index_list[workspace_index_list_size] = j;
254275
workspace_already_set[j] = 1;
@@ -353,18 +374,18 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
353374
double* restrict workspace = workspace_all + C2_dimension * omp_get_thread_num();
354375
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
355376
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
356-
for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
357-
int32_t k = B2_crd[kB0];
358-
for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
359-
int32_t j = C2_crd[jC0];
377+
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
378+
int32_t k = B2_crd[kB];
379+
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
380+
int32_t j = C2_crd[jC];
360381
if (!workspace_already_set[j]) {
361-
workspace[j] = B_vals[kB0] * C_vals[jC0];
382+
workspace[j] = B_vals[kB] * C_vals[jC];
362383
workspace_index_list[workspace_index_list_size] = j;
363384
workspace_already_set[j] = 1;
364385
workspace_index_list_size++;
365386
}
366387
else {
367-
workspace[j] = workspace[j] + B_vals[kB0] * C_vals[jC0];
388+
workspace[j] = workspace[j] + B_vals[kB] * C_vals[jC];
368389
}
369390
}
370391
}

0 commit comments

Comments
 (0)