google
diff --git a/‎lib/Dialect/TensorExt/Transforms/BUILD
Lines changed: 3 additions & 1 deletion b/‎lib/Dialect/TensorExt/Transforms/BUILD
Lines changed: 3 additions & 1 deletion
diff --git a/‎lib/Dialect/TensorExt/Transforms/ImplementRotateAndReduce.cpp
Lines changed: 38 additions & 100 deletions b/‎lib/Dialect/TensorExt/Transforms/ImplementRotateAndReduce.cpp
Lines changed: 38 additions & 100 deletions
diff --git a/‎lib/Utils/ArithmeticDag.h renamed to ‎lib/Kernel/ArithmeticDag.h
Lines changed: 13 additions & 2 deletions b/‎lib/Utils/ArithmeticDag.h renamed to ‎lib/Kernel/ArithmeticDag.h
Lines changed: 13 additions & 2 deletions
diff --git a/‎lib/Utils/ArithmeticDagTest.cpp renamed to ‎lib/Kernel/ArithmeticDagTest.cpp
Lines changed: 3 additions & 1 deletion b/‎lib/Utils/ArithmeticDagTest.cpp renamed to ‎lib/Kernel/ArithmeticDagTest.cpp
Lines changed: 3 additions & 1 deletion
diff --git a/‎lib/Kernel/BUILD
Lines changed: 51 additions & 5 deletions b/‎lib/Kernel/BUILD
Lines changed: 51 additions & 5 deletions
@@ -132,8 +132,10 @@ cc_library(
     deps = [
         ":pass_inc_gen",
         "@heir//lib/Dialect/TensorExt/IR:Dialect",
+        "@heir//lib/Kernel:ArithmeticDag",
+        "@heir//lib/Kernel:IRMaterializingVisitor",
+        "@heir//lib/Kernel:KernelImplementation",
         "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:Analysis",
         "@llvm-project//mlir:ArithDialect",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:Pass",
 
@@ -2,55 +2,55 @@
 
 #include <cmath>
 #include <cstdint>
+#include <memory>
+#include <optional>
 
 #include "lib/Dialect/TensorExt/IR/TensorExtOps.h"
-#include "llvm/include/llvm/Support/Debug.h"             // from @llvm-project
-#include "mlir/include/mlir/Dialect/Arith/IR/Arith.h"    // from @llvm-project
-#include "mlir/include/mlir/Dialect/Tensor/IR/Tensor.h"  // from @llvm-project
-#include "mlir/include/mlir/IR/BuiltinAttributes.h"      // from @llvm-project
-#include "mlir/include/mlir/IR/BuiltinTypes.h"           // from @llvm-project
-#include "mlir/include/mlir/IR/Diagnostics.h"            // from @llvm-project
-#include "mlir/include/mlir/IR/OpDefinition.h"           // from @llvm-project
-#include "mlir/include/mlir/IR/OperationSupport.h"       // from @llvm-project
-#include "mlir/include/mlir/IR/PatternMatch.h"           // from @llvm-project
-#include "mlir/include/mlir/IR/Value.h"                  // from @llvm-project
-#include "mlir/include/mlir/Support/LLVM.h"              // from @llvm-project
-#include "mlir/include/mlir/Support/LogicalResult.h"     // from @llvm-project
+#include "lib/Kernel/ArithmeticDag.h"
+#include "lib/Kernel/IRMaterializingVisitor.h"
+#include "lib/Kernel/KernelImplementation.h"
+#include "llvm/include/llvm/Support/Debug.h"          // from @llvm-project
+#include "mlir/include/mlir/IR/BuiltinAttributes.h"   // from @llvm-project
+#include "mlir/include/mlir/IR/BuiltinTypes.h"        // from @llvm-project
+#include "mlir/include/mlir/IR/Diagnostics.h"         // from @llvm-project
+#include "mlir/include/mlir/IR/OpDefinition.h"        // from @llvm-project
+#include "mlir/include/mlir/IR/OperationSupport.h"    // from @llvm-project
+#include "mlir/include/mlir/IR/PatternMatch.h"        // from @llvm-project
+#include "mlir/include/mlir/IR/Value.h"               // from @llvm-project
+#include "mlir/include/mlir/Support/LLVM.h"           // from @llvm-project
+#include "mlir/include/mlir/Support/LogicalResult.h"  // from @llvm-project
 
 #define DEBUG_TYPE "implement-rotate-and-reduce"
 
 namespace mlir {
 namespace heir {
 namespace tensor_ext {
 
+using ::mlir::heir::kernel::ArithmeticDagNode;
+using ::mlir::heir::kernel::implementRotateAndReduce;
+using ::mlir::heir::kernel::IRMaterializingVisitor;
+using ::mlir::heir::kernel::SSAValue;
+
 #define GEN_PASS_DEF_IMPLEMENTROTATEANDREDUCE
 #include "lib/Dialect/TensorExt/Transforms/Passes.h.inc"
 
-// TODO(#2136): Add a better way to test the correctness of this kernel.
 LogicalResult convertRotateAndReduceOp(RotateAndReduceOp op) {
   LLVM_DEBUG(llvm::dbgs() << "Converting tensor_ext.rotate_and_reduce op: "
                           << op << "\n");
-  if (!op.getPlaintexts()) {
-    // TODO(#2122): Implement the case where we accumulate the ciphertext slot
-    // values.
-    return op->emitOpError() << "rotate and reduce not implemented yet for "
-                                "ciphertext value accumulation";
-  }
-
-  IRRewriter rewriter(op.getContext());
   TypedValue<RankedTensorType> input = op.getTensor();
-  TypedValue<RankedTensorType> plaintexts = op.getPlaintexts();
   unsigned steps = op.getSteps().getZExtValue();
   unsigned period = op.getPeriod().getZExtValue();
+  std::shared_ptr<ArithmeticDagNode<SSAValue>> implementedKernel;
+  SSAValue vectorLeaf(input);
 
-  StringRef mulOpName = isa<IntegerType>(input.getType().getElementType())
-                            ? "arith.muli"
-                            : "arith.mulf";
-  StringRef addOpName = isa<IntegerType>(input.getType().getElementType())
-                            ? "arith.addi"
-                            : "arith.addf";
+  if (!op.getPlaintexts()) {
+    implementedKernel = implementRotateAndReduce(
+        vectorLeaf, std::optional<SSAValue>(), period, steps);
+  }
+
+  TypedValue<RankedTensorType> plaintexts = op.getPlaintexts();
 
-  // Use a value of sqrt(n) as the baby step / giant step size.
+  // Validate divisibility of step size
   auto babySteps = static_cast<int64_t>(std::floor(std::sqrt(steps)));
   unsigned giantSteps = steps / babySteps;
   if (giantSteps * babySteps != steps) {
@@ -64,78 +64,16 @@ LogicalResult convertRotateAndReduceOp(RotateAndReduceOp op) {
              << steps << " with babySteps= " << babySteps
              << " and giantSteps= " << giantSteps << "\n");
 
-  // Compute sqrt(n) ciphertext rotations of the input as baby-steps.
-  rewriter.setInsertionPointAfter(op);
-  SmallVector<Value> babyStepVals;
-  babyStepVals.push_back(input);
-  for (int64_t i = 1; i < babySteps; ++i) {
-    babyStepVals.push_back(rewriter
-                               .create<tensor_ext::RotateOp>(
-                                   op->getLoc(), input,
-                                   rewriter.create<arith::ConstantIndexOp>(
-                                       op->getLoc(), period * i))
-                               .getResult());
-  }
-
-  unsigned plaintextSize = plaintexts.getType().getRank();
-  SmallVector<OpFoldResult> offsets(plaintextSize, rewriter.getIndexAttr(0));
-  SmallVector<OpFoldResult> sliceSizes;
-  sliceSizes.reserve(plaintextSize);
-  sliceSizes.push_back(rewriter.getIndexAttr(1));
-  for (int64_t i = 1; i < plaintextSize; ++i) {
-    sliceSizes.push_back(
-        rewriter.getIndexAttr(plaintexts.getType().getDimSize(i)));
-  }
-  SmallVector<OpFoldResult> unitStrides(plaintextSize,
-                                        rewriter.getIndexAttr(1));
-
-  // Compute the inner baby step sums.
-  Value result;
-  for (unsigned k = 0; k < giantSteps; ++k) {
-    Value innerSum;
-    auto rotationIndex = rewriter.create<arith::ConstantIndexOp>(
-        op->getLoc(), -babySteps * k * period);
-    for (unsigned j = 0; j < babySteps; ++j) {
-      offsets[0] = rewriter.getIndexAttr(j + k * babySteps * period);
-      Value rotatedPlaintext = rewriter.create<tensor_ext::RotateOp>(
-          op->getLoc(),
-          rewriter.create<tensor::ExtractSliceOp>(op->getLoc(), input.getType(),
-                                                  plaintexts, offsets,
-                                                  sliceSizes, unitStrides),
-          rotationIndex);
-      Value multiplied =
-          rewriter
-              .create(OperationState(op->getLoc(), mulOpName,
-                                     {rotatedPlaintext, babyStepVals[j]},
-                                     {rotatedPlaintext.getType()}))
-              ->getResults()[0];
-      if (!innerSum) {
-        innerSum = multiplied;
-      } else {
-        innerSum = rewriter
-                       .create(OperationState(op->getLoc(), addOpName,
-                                              {innerSum, multiplied},
-                                              {innerSum.getType()}))
-                       ->getResults()[0];
-      }
-    }
-
-    auto rotatedSum = rewriter.create<tensor_ext::RotateOp>(
-        op->getLoc(), innerSum,
-        rewriter.create<arith::ConstantIndexOp>(op->getLoc(),
-                                                period * k * babySteps));
-    if (!result) {
-      result = rotatedSum;
-    } else {
-      result =
-          rewriter
-              .create(OperationState(op->getLoc(), addOpName,
-                                     {result, rotatedSum}, {result.getType()}))
-              ->getResults()[0];
-    }
-  }
+  auto plaintextsLeaf = std::optional<SSAValue>(plaintexts);
+  implementedKernel =
+      implementRotateAndReduce(vectorLeaf, plaintextsLeaf, period, steps);
 
-  rewriter.replaceOp(op, result);
+  IRRewriter rewriter(op.getContext());
+  rewriter.setInsertionPointAfter(op);
+  ImplicitLocOpBuilder b(op.getLoc(), rewriter);
+  IRMaterializingVisitor visitor(b, input.getType());
+  Value finalOutput = implementedKernel->visit(visitor);
+  rewriter.replaceOp(op, finalOutput);
   return success();
 }
 
 
@@ -3,13 +3,15 @@
 
 #include <cassert>
 #include <cstddef>
+#include <cstdint>
 #include <memory>
 #include <unordered_map>
 #include <utility>
 #include <variant>
 
 namespace mlir {
 namespace heir {
+namespace kernel {
 
 // This file contains a generic DAG structure that can be used for representing
 // arithmetic DAGs with leaf nodes of various types.
@@ -53,7 +55,7 @@ struct PowerNode {
 template <typename T>
 struct LeftRotateNode {
   std::shared_ptr<ArithmeticDagNode<T>> operand;
-  size_t shift;
+  int64_t shift;
 };
 
 template <typename T>
@@ -141,7 +143,7 @@ struct ArithmeticDagNode {
   }
 
   static std::shared_ptr<ArithmeticDagNode<T>> leftRotate(
-      std::shared_ptr<ArithmeticDagNode<T>> tensor, size_t shift) {
+      std::shared_ptr<ArithmeticDagNode<T>> tensor, int64_t shift) {
     assert(tensor && "invalid tensor for leftRotate");
     auto node =
         std::shared_ptr<ArithmeticDagNode<T>>(new ArithmeticDagNode<T>());
@@ -213,40 +215,49 @@ class CachingVisitor {
 
   virtual ResultType operator()(const ConstantNode& node) {
     assert(false && "Visit logic for ConstantNode is not implemented.");
+    return ResultType();
   }
 
   virtual ResultType operator()(const LeafNode<T>& node) {
     assert(false && "Visit logic for LeafNode is not implemented.");
+    return ResultType();
   }
 
   virtual ResultType operator()(const AddNode<T>& node) {
     assert(false && "Visit logic for AddNode is not implemented.");
+    return ResultType();
   }
 
   virtual ResultType operator()(const SubtractNode<T>& node) {
     assert(false && "Visit logic for SubtractNode is not implemented.");
+    return ResultType();
   }
 
   virtual ResultType operator()(const MultiplyNode<T>& node) {
     assert(false && "Visit logic for MultiplyNode is not implemented.");
+    return ResultType();
   }
 
   virtual ResultType operator()(const PowerNode<T>& node) {
     assert(false && "Visit logic for PowerNode is not implemented.");
+    return ResultType();
   }
 
   virtual ResultType operator()(const LeftRotateNode<T>& node) {
     assert(false && "Visit logic for LeftRotateNode is not implemented.");
+    return ResultType();
   }
 
   virtual ResultType operator()(const ExtractNode<T>& node) {
     assert(false && "Visit logic for ExtractNode is not implemented.");
+    return ResultType();
   }
 
  private:
   std::unordered_map<const ArithmeticDagNode<T>*, ResultType> cache;
 };
 
+}  // namespace kernel
 }  // namespace heir
 }  // namespace mlir
 
 
@@ -6,10 +6,11 @@
 #include <string>
 
 #include "gtest/gtest.h"  // from @googletest
-#include "lib/Utils/ArithmeticDag.h"
+#include "lib/Kernel/ArithmeticDag.h"
 
 namespace mlir {
 namespace heir {
+namespace kernel {
 namespace {
 
 using StringLeavedDag = ArithmeticDagNode<std::string>;
@@ -154,5 +155,6 @@ TEST(ArithmeticDagTest, TestEvaluationVisitorSubstract) {
 }
 
 }  // namespace
+}  // namespace kernel
 }  // namespace heir
 }  // namespace mlir
@@ -6,6 +6,38 @@ package(
     default_visibility = ["//visibility:public"],
 )
 
+cc_library(
+    name = "ArithmeticDag",
+    srcs = ["ArithmeticDag.h"],
+    hdrs = ["ArithmeticDag.h"],
+)
+
+cc_test(
+    name = "ArithmeticDagTest",
+    srcs = ["ArithmeticDagTest.cpp"],
+    deps = [
+        ":ArithmeticDag",
+        "@googletest//:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "IRMaterializingVisitor",
+    srcs = ["IRMaterializingVisitor.cpp"],
+    hdrs = ["IRMaterializingVisitor.h"],
+    deps = [
+        ":ArithmeticDag",
+        ":KernelImplementation",
+        "@heir//lib/Dialect/TensorExt/IR:Dialect",
+        "@heir//lib/Utils:MathUtils",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:ArithDialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TensorDialect",
+    ],
+)
+
 cc_library(
     name = "Kernel",
     srcs = ["Kernel.cpp"],
@@ -22,23 +54,37 @@ cc_library(
 
 cc_library(
     name = "KernelImplementation",
-    srcs = ["KernelImplementation.cpp"],
     hdrs = ["KernelImplementation.h"],
     deps = [
+        ":ArithmeticDag",
         ":Kernel",
-        "@heir//lib/Utils:ArithmeticDag",
         "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TensorDialect",
+    ],
+)
+
+cc_library(
+    name = "TestingUtils",
+    srcs = ["TestingUtils.cpp"],
+    hdrs = ["TestingUtils.h"],
+    deps = [
+        ":ArithmeticDag",
+        ":KernelImplementation",
     ],
 )
 
 cc_test(
     name = "KernelImplementationTest",
-    srcs = ["KernelImplementationTest.cpp"],
+    srcs = [
+        "KernelImplementationTest.cpp",
+        "RotateAndReduceImplTest.cpp",
+    ],
     deps = [
+        ":ArithmeticDag",
         ":Kernel",
         ":KernelImplementation",
+        ":TestingUtils",
         "@googletest//:gtest_main",
-        "@heir//lib/Utils:ArithmeticDag",
-        "@llvm-project//mlir:IR",
     ],
 )