Updated example generated code

stephenchouca · stephenchouca · commit a6e20949b310 · 2022-06-12T21:47:23.000-04:00
diff --git a/_includes/demo.html b/_includes/demo.html
@@ -1,6 +1,6 @@
 <div class="mdl-grid" style="padding-top: 0px">
   <div class="mdl-cell mdl-cell--12-col">
-    <p style="line-height: 100%">This is an prototype implementation of the sparse tensor algebra compiler theory and contains known bugs, which are documented <a href="https://github.com/tensor-compiler/taco/issues">here</a>. If you find additional issues, please consider submitting a bug report.</p>
+    <p style="line-height: 100%">This is a prototype implementation of the sparse tensor algebra compiler theory and contains known bugs, which are documented <a href="https://github.com/tensor-compiler/taco/issues">here</a>. If you find additional issues, please consider submitting a bug report.</p>
     <h6 style="margin-bottom: 0px; margin-top: 18px">Input a tensor algebra expression in index notation to generate code that computes it:</h6>
     <div class="input-bar">
       <div class="mdl-textfield expr-input">
diff --git a/examples/mttkrp_full.c b/examples/mttkrp_full.c
@@ -26,6 +26,7 @@ typedef struct {
   taco_mode_t* mode_types;    // mode storage types
   uint8_t***   indices;       // tensor index data (per mode)
   uint8_t*     vals;          // tensor values
+  uint8_t*     fill_value;    // tensor fill value
   int32_t      vals_size;     // values array size
 } taco_tensor_t;
 #endif
@@ -36,6 +37,26 @@ int omp_get_max_threads() { return 1; }
 int cmp(const void *a, const void *b) {
   return *((const int*)a) - *((const int*)b);
 }
+int taco_gallop(int *array, int arrayStart, int arrayEnd, int target) {
+  if (array[arrayStart] >= target || arrayStart >= arrayEnd) {
+    return arrayStart;
+  }
+  int step = 1;
+  int curr = arrayStart;
+  while (curr + step < arrayEnd && array[curr + step] < target) {
+    curr += step;
+    step = step * 2;
+  }
+
+  step = step / 2;
+  while (step > 0) {
+    if (curr + step < arrayEnd && array[curr + step] < target) {
+      curr += step;
+    }
+    step = step / 2;
+  }
+  return curr+1;
+}
 int taco_binarySearchAfter(int *array, int arrayStart, int arrayEnd, int target) {
   if (array[arrayStart] >= target) {
     return arrayStart;
diff --git a/examples/spadd_assembly.c b/examples/spadd_assembly.c
@@ -54,21 +54,21 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
   #pragma omp parallel for schedule(runtime)
   for (int32_t i = 0; i < C1_dimension; i++) {
 
-    int32_t jB1 = B2_pos[i];
-    int32_t pB2_end0 = B2_pos[(i + 1)];
-    int32_t jC1 = C2_pos[i];
-    int32_t pC2_end0 = C2_pos[(i + 1)];
+    int32_t jB = B2_pos[i];
+    int32_t pB2_end = B2_pos[(i + 1)];
+    int32_t jC = C2_pos[i];
+    int32_t pC2_end = C2_pos[(i + 1)];
 
-    while (jB1 < pB2_end0 && jC1 < pC2_end0) {
-      int32_t jB2 = B2_crd[jB1];
-      int32_t jC2 = C2_crd[jC1];
-      int32_t j = TACO_MIN(jB2,jC2);
-      if (jB2 == j && jC2 == j) {
+    while (jB < pB2_end && jC < pC2_end) {
+      int32_t jB0 = B2_crd[jB];
+      int32_t jC0 = C2_crd[jC];
+      int32_t j = TACO_MIN(jB0,jC0);
+      if (jB0 == j && jC0 == j) {
         int32_t pA2 = A2_pos[i];
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA2] = j;
       }
-      else if (jB2 == j) {
+      else if (jB0 == j) {
         int32_t pA20 = A2_pos[i];
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA20] = j;
@@ -78,22 +78,22 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA21] = j;
       }
-      jB1 += (int32_t)(jB2 == j);
-      jC1 += (int32_t)(jC2 == j);
+      jB += (int32_t)(jB0 == j);
+      jC += (int32_t)(jC0 == j);
     }
-    while (jB1 < pB2_end0) {
-      int32_t j = B2_crd[jB1];
+    while (jB < pB2_end) {
+      int32_t j = B2_crd[jB];
       int32_t pA22 = A2_pos[i];
       A2_pos[i] = A2_pos[i] + 1;
       A2_crd[pA22] = j;
-      jB1++;
+      jB++;
     }
-    while (jC1 < pC2_end0) {
-      int32_t j = C2_crd[jC1];
+    while (jC < pC2_end) {
+      int32_t j = C2_crd[jC];
       int32_t pA23 = A2_pos[i];
       A2_pos[i] = A2_pos[i] + 1;
       A2_crd[pA23] = j;
-      jC1++;
+      jC++;
     }
   }
 
diff --git a/examples/spadd_full.c b/examples/spadd_full.c
@@ -26,6 +26,7 @@ typedef struct {
   taco_mode_t* mode_types;    // mode storage types
   uint8_t***   indices;       // tensor index data (per mode)
   uint8_t*     vals;          // tensor values
+  uint8_t*     fill_value;    // tensor fill value
   int32_t      vals_size;     // values array size
 } taco_tensor_t;
 #endif
@@ -36,6 +37,26 @@ int omp_get_max_threads() { return 1; }
 int cmp(const void *a, const void *b) {
   return *((const int*)a) - *((const int*)b);
 }
+int taco_gallop(int *array, int arrayStart, int arrayEnd, int target) {
+  if (array[arrayStart] >= target || arrayStart >= arrayEnd) {
+    return arrayStart;
+  }
+  int step = 1;
+  int curr = arrayStart;
+  while (curr + step < arrayEnd && array[curr + step] < target) {
+    curr += step;
+    step = step * 2;
+  }
+
+  step = step / 2;
+  while (step > 0) {
+    if (curr + step < arrayEnd && array[curr + step] < target) {
+      curr += step;
+    }
+    step = step / 2;
+  }
+  return curr+1;
+}
 int taco_binarySearchAfter(int *array, int arrayStart, int arrayEnd, int target) {
   if (array[arrayStart] >= target) {
     return arrayStart;
@@ -231,21 +252,21 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
   #pragma omp parallel for schedule(runtime)
   for (int32_t i = 0; i < C1_dimension; i++) {
 
-    int32_t jB1 = B2_pos[i];
-    int32_t pB2_end0 = B2_pos[(i + 1)];
-    int32_t jC1 = C2_pos[i];
-    int32_t pC2_end0 = C2_pos[(i + 1)];
+    int32_t jB = B2_pos[i];
+    int32_t pB2_end = B2_pos[(i + 1)];
+    int32_t jC = C2_pos[i];
+    int32_t pC2_end = C2_pos[(i + 1)];
 
-    while (jB1 < pB2_end0 && jC1 < pC2_end0) {
-      int32_t jB2 = B2_crd[jB1];
-      int32_t jC2 = C2_crd[jC1];
-      int32_t j = TACO_MIN(jB2,jC2);
-      if (jB2 == j && jC2 == j) {
+    while (jB < pB2_end && jC < pC2_end) {
+      int32_t jB0 = B2_crd[jB];
+      int32_t jC0 = C2_crd[jC];
+      int32_t j = TACO_MIN(jB0,jC0);
+      if (jB0 == j && jC0 == j) {
         int32_t pA2 = A2_pos[i];
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA2] = j;
       }
-      else if (jB2 == j) {
+      else if (jB0 == j) {
         int32_t pA20 = A2_pos[i];
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA20] = j;
@@ -255,22 +276,22 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA21] = j;
       }
-      jB1 += (int32_t)(jB2 == j);
-      jC1 += (int32_t)(jC2 == j);
+      jB += (int32_t)(jB0 == j);
+      jC += (int32_t)(jC0 == j);
     }
-    while (jB1 < pB2_end0) {
-      int32_t j = B2_crd[jB1];
+    while (jB < pB2_end) {
+      int32_t j = B2_crd[jB];
       int32_t pA22 = A2_pos[i];
       A2_pos[i] = A2_pos[i] + 1;
       A2_crd[pA22] = j;
-      jB1++;
+      jB++;
     }
-    while (jC1 < pC2_end0) {
-      int32_t j = C2_crd[jC1];
+    while (jC < pC2_end) {
+      int32_t j = C2_crd[jC];
       int32_t pA23 = A2_pos[i];
       A2_pos[i] = A2_pos[i] + 1;
       A2_crd[pA23] = j;
-      jC1++;
+      jC++;
     }
   }
 
@@ -342,51 +363,51 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
   #pragma omp parallel for schedule(runtime)
   for (int32_t i = 0; i < C1_dimension; i++) {
 
-    int32_t jB1 = B2_pos[i];
-    int32_t pB2_end0 = B2_pos[(i + 1)];
-    int32_t jC1 = C2_pos[i];
-    int32_t pC2_end0 = C2_pos[(i + 1)];
+    int32_t jB = B2_pos[i];
+    int32_t pB2_end = B2_pos[(i + 1)];
+    int32_t jC = C2_pos[i];
+    int32_t pC2_end = C2_pos[(i + 1)];
 
-    while (jB1 < pB2_end0 && jC1 < pC2_end0) {
-      int32_t jB2 = B2_crd[jB1];
-      int32_t jC2 = C2_crd[jC1];
-      int32_t j = TACO_MIN(jB2,jC2);
-      if (jB2 == j && jC2 == j) {
+    while (jB < pB2_end && jC < pC2_end) {
+      int32_t jB0 = B2_crd[jB];
+      int32_t jC0 = C2_crd[jC];
+      int32_t j = TACO_MIN(jB0,jC0);
+      if (jB0 == j && jC0 == j) {
         int32_t pA2 = A2_pos[i];
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA2] = j;
-        A_vals[pA2] = B_vals[jB1] + C_vals[jC1];
+        A_vals[pA2] = B_vals[jB] + C_vals[jC];
       }
-      else if (jB2 == j) {
+      else if (jB0 == j) {
         int32_t pA20 = A2_pos[i];
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA20] = j;
-        A_vals[pA20] = B_vals[jB1];
+        A_vals[pA20] = B_vals[jB];
       }
       else {
         int32_t pA21 = A2_pos[i];
         A2_pos[i] = A2_pos[i] + 1;
         A2_crd[pA21] = j;
-        A_vals[pA21] = C_vals[jC1];
+        A_vals[pA21] = C_vals[jC];
       }
-      jB1 += (int32_t)(jB2 == j);
-      jC1 += (int32_t)(jC2 == j);
+      jB += (int32_t)(jB0 == j);
+      jC += (int32_t)(jC0 == j);
     }
-    while (jB1 < pB2_end0) {
-      int32_t j = B2_crd[jB1];
+    while (jB < pB2_end) {
+      int32_t j = B2_crd[jB];
       int32_t pA22 = A2_pos[i];
       A2_pos[i] = A2_pos[i] + 1;
       A2_crd[pA22] = j;
-      A_vals[pA22] = B_vals[jB1];
-      jB1++;
+      A_vals[pA22] = B_vals[jB];
+      jB++;
     }
-    while (jC1 < pC2_end0) {
-      int32_t j = C2_crd[jC1];
+    while (jC < pC2_end) {
+      int32_t j = C2_crd[jC];
       int32_t pA23 = A2_pos[i];
       A2_pos[i] = A2_pos[i] + 1;
       A2_crd[pA23] = j;
-      A_vals[pA23] = C_vals[jC1];
-      jC1++;
+      A_vals[pA23] = C_vals[jC];
+      jC++;
     }
   }
 
diff --git a/examples/spgemm_assembly.c b/examples/spgemm_assembly.c
@@ -66,10 +66,10 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
     int32_t workspace_index_list_size = 0;
     int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
     bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
-    for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
-      int32_t k = B2_crd[kB0];
-      for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
-        int32_t j = C2_crd[jC0];
+    for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
+      int32_t k = B2_crd[kB];
+      for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
+        int32_t j = C2_crd[jC];
         if (!workspace_already_set[j]) {
           workspace_index_list[workspace_index_list_size] = j;
           workspace_already_set[j] = 1;
diff --git a/examples/spgemm_full.c b/examples/spgemm_full.c
@@ -26,6 +26,7 @@ typedef struct {
   taco_mode_t* mode_types;    // mode storage types
   uint8_t***   indices;       // tensor index data (per mode)
   uint8_t*     vals;          // tensor values
+  uint8_t*     fill_value;    // tensor fill value
   int32_t      vals_size;     // values array size
 } taco_tensor_t;
 #endif
@@ -36,6 +37,26 @@ int omp_get_max_threads() { return 1; }
 int cmp(const void *a, const void *b) {
   return *((const int*)a) - *((const int*)b);
 }
+int taco_gallop(int *array, int arrayStart, int arrayEnd, int target) {
+  if (array[arrayStart] >= target || arrayStart >= arrayEnd) {
+    return arrayStart;
+  }
+  int step = 1;
+  int curr = arrayStart;
+  while (curr + step < arrayEnd && array[curr + step] < target) {
+    curr += step;
+    step = step * 2;
+  }
+
+  step = step / 2;
+  while (step > 0) {
+    if (curr + step < arrayEnd && array[curr + step] < target) {
+      curr += step;
+    }
+    step = step / 2;
+  }
+  return curr+1;
+}
 int taco_binarySearchAfter(int *array, int arrayStart, int arrayEnd, int target) {
   if (array[arrayStart] >= target) {
     return arrayStart;
@@ -245,10 +266,10 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
     int32_t workspace_index_list_size = 0;
     int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
     bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
-    for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
-      int32_t k = B2_crd[kB0];
-      for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
-        int32_t j = C2_crd[jC0];
+    for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
+      int32_t k = B2_crd[kB];
+      for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
+        int32_t j = C2_crd[jC];
         if (!workspace_already_set[j]) {
           workspace_index_list[workspace_index_list_size] = j;
           workspace_already_set[j] = 1;
@@ -353,18 +374,18 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
     double* restrict workspace = workspace_all + C2_dimension * omp_get_thread_num();
     int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
     bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
-    for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
-      int32_t k = B2_crd[kB0];
-      for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
-        int32_t j = C2_crd[jC0];
+    for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
+      int32_t k = B2_crd[kB];
+      for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
+        int32_t j = C2_crd[jC];
         if (!workspace_already_set[j]) {
-          workspace[j] = B_vals[kB0] * C_vals[jC0];
+          workspace[j] = B_vals[kB] * C_vals[jC];
           workspace_index_list[workspace_index_list_size] = j;
           workspace_already_set[j] = 1;
           workspace_index_list_size++;
         }
         else {
-          workspace[j] = workspace[j] + B_vals[kB0] * C_vals[jC0];
+          workspace[j] = workspace[j] + B_vals[kB] * C_vals[jC];
         }
       }
     }
diff --git a/examples/spmv_full.c b/examples/spmv_full.c
diff --git a/examples/ttv_assembly.c b/examples/ttv_assembly.c
diff --git a/examples/ttv_full.c b/examples/ttv_full.c