Added gradient ect computation to fect.py

ErnstRoell · ErnstRoell · commit 2de51590a168 · 2025-08-08T10:43:05.000+02:00
diff --git a/README.md b/README.md
@@ -66,6 +66,32 @@ plt.imshow(ect.detach().squeeze().numpy().T)
 plt.show()
 ```
 
+
+## Fast Euler Characteristic Transform
+
+
+```
+from dect.directions import generate_2d_directions
+from dect.fect import compute_fast_ect
+
+v = generate_2d_directions(num_thetas=2048).cuda()
+x_true = 0.5 * torch.rand(size=(10000, 2)).cuda()
+x = torch.nn.Parameter(0.2 * (torch.rand(size=(10000, 2), device="cuda") - 0.5))
+
+optimizer = torch.optim.Adam([x], lr=0.01)
+
+for epoch in range(200):
+    optimizer.zero_grad()
+    ect_true = fastect(x_true, v)
+    ect_pred = fastect(x, v)
+    loss = torch.nn.functional.mse_loss(ect_pred, ect_true)
+    loss.backward()
+    optimizer.step()
+```
+
+
+
+
 ## License
 
 Our code is released under a BSD-3-Clause license. This license essentially
diff --git a/dect/fect.py b/dect/fect.py
@@ -7,7 +7,7 @@
 Both compute at decent speed, inference times were approx 1.5 seconds. The
 current code is not optimized and relies heavily on torch.scatter perform the 2d
 bincount. It is therefore expected that a custom triton/cuda kernel will
-significantly reduce the compute time.  
+significantly reduce the compute time.
 
 Type casting to the right types in torch is non-ideal, leading to a non-memory
 optimized algorithm with much higher memory needs than needed. Case in point,
@@ -17,7 +17,7 @@
 unnecessary OOM errors.
 
 Important, no guards for overflows (it happens silently) and no
-differentiability. 
+differentiability.
 """
 
 import torch
@@ -29,7 +29,7 @@
 
 def bincount(idx, resolution):
     """Calculates the histogram in resolution bins."""
-    x = torch.zeros(size=(resolution, resolution), dtype=torch.int16)
+    x = torch.zeros(size=(resolution, resolution), dtype=torch.float32, device="cuda")
     return x.scatter_(0, idx.to(torch.int64), 1, reduce="add")
 
 
@@ -46,3 +46,30 @@ def fast_ect_edges(x, ei, v):
     nh = ((torch.matmul(x, v) + 1) * (resolution // 2)).to(torch.int32)
     eh = nh[ei].max(axis=0)[0]
     return bincount(nh, resolution), bincount(eh, resolution)
+
+
+class FastECT(torch.autograd.Function):
+    @staticmethod
+    def forward(x, v):
+        ect, idx = fast_ect_fn(x, v)
+        return ect.cumsum(dim=0), ect, idx
+
+    @staticmethod
+    def setup_context(ctx, inputs, outputs):
+        (ect, ect_grad, idx) = outputs
+        (_, v) = inputs
+        ctx.save_for_backward(ect, ect_grad, idx, v)
+
+    @staticmethod
+    def backward(ctx, grad_output, _, __):
+        (ect, ect_grad, idx, v) = ctx.saved_tensors
+        grad = ect_grad * grad_output / v.shape[1]
+        # Do not know if this will be correct.
+        ect_final_grad = torch.gather(grad, dim=0, index=idx.to(torch.int64))
+        out = ect_final_grad @ v.T
+        return -1 * out, None
+
+
+def compute_fast_ect(x, v):
+    ect, _, _ = FastECT.apply(x, v)
+    return ect