enable distribution infrastructure with great changes in contractor and quantum operators

refraction-ray · refraction-ray · commit 8cd5b0a7a159 · 2025-02-01T17:42:24.000+08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,10 @@
 
 - Add `merge_count` in `results` module
 
+### Fixed
+
+- Better contractor infrastructures with "breakpoint" contractor to simply get the networks
+
 ## 1.1.0
 
 ### Added
diff --git a/examples/slicing_auto_pmap_mpo.py b/examples/slicing_auto_pmap_mpo.py
@@ -0,0 +1,120 @@
+"""
+This script illustrates how to parallelize both the contraction path
+finding and sliced contraction computation
+"""
+
+from functools import partial
+import os
+
+num_device = 4
+os.environ["XLA_FLAGS"] = f"--xla_force_host_platform_device_count={num_device}"
+import cotengra as ctg
+import tensornetwork as tn
+
+import numpy as np
+import scipy
+import jax
+import optax
+import tensorcircuit as tc
+
+backend = "jax"
+K = tc.set_backend(backend)
+tc.set_dtype("complex128")
+
+
+def get_circuit(n, d, params):
+    c = tc.Circuit(n)
+    c.h(range(n))
+    for i in range(d):
+        for j in range(0, n - 1):
+            c.rzz(j, j + 1, theta=params[j, i, 0])
+        for j in range(n):
+            c.rx(j, theta=params[j, i, 1])
+        for j in range(n):
+            c.ry(j, theta=params[j, i, 2])
+    return c
+
+
+def core(params, i, tree, n, d, tc_mpo):
+    c = get_circuit(n, d, params)
+    mps = c.get_quvector()
+    e = mps.adjoint() @ tc_mpo @ mps
+    _, nodes = tc.cons.get_tn_info(e.nodes)
+    input_arrays = [node.tensor for node in nodes]
+    sliced_arrays = tree.slice_arrays(input_arrays, i)
+    return K.real(tree.contract_core(sliced_arrays, backend=backend))[0, 0]
+
+
+core_vag = K.value_and_grad(core)
+
+
+if __name__ == "__main__":
+    nqubit = 12
+    d = 6
+
+    Jx = jax.numpy.array([1.0] * (nqubit - 1))  # XX coupling strength
+    Bz = jax.numpy.array([-1.0] * nqubit)  # Transverse field strength
+
+    # Create TensorNetwork MPO
+    tn_mpo = tn.matrixproductstates.mpo.FiniteTFI(Jx, Bz, dtype=np.complex64)
+    tc_mpo = tc.quantum.tn2qop(tn_mpo)
+
+    # baseline results
+    lattice = tc.templates.graphs.Line1D(nqubit, pbc=False)
+    h = tc.quantum.heisenberg_hamiltonian(lattice, hzz=0, hyy=0, hxx=1.0, hz=-1.0)
+    es0 = scipy.sparse.linalg.eigsh(K.numpy(h), k=1, which="SA")[0]
+    print("exact ground state energy: ", es0)
+
+    params = K.implicit_randn(stddev=0.1, shape=[1, nqubit, d, 3], dtype=tc.rdtypestr)
+    params = K.tile(params, [num_device, 1, 1, 1])
+
+    optimizer = optax.adam(5e-2)
+    base_opt_state = optimizer.init(params[0])
+    replicated_opt_state = jax.tree.map(
+        lambda x: (
+            jax.numpy.broadcast_to(x, (num_device,) + x.shape)
+            if isinstance(x, jax.numpy.ndarray)
+            else x
+        ),
+        base_opt_state,
+    )
+
+    @partial(
+        jax.pmap,
+        axis_name="pmap",
+        in_axes=(0, 0, None, None, None, None, 0),
+        static_broadcasted_argnums=(2, 3, 4, 5),
+    )
+    def para_vag(params, i, tree, n, d, tc_mpo, opt_state):
+        loss, grads = core_vag(params, i, tree, n, d, tc_mpo)
+        grads = jax.lax.psum(grads, axis_name="pmap")
+        loss = jax.lax.psum(loss, axis_name="pmap")
+        updates, opt_state = optimizer.update(grads, opt_state, params)
+        params = optax.apply_updates(params, updates)
+        return params, opt_state, loss
+
+    c = get_circuit(nqubit, d, params[0])
+    mps = c.get_quvector()
+    e = mps.adjoint() @ tc_mpo @ mps
+    tn_info, nodes = tc.cons.get_tn_info(e.nodes)
+
+    opt = ctg.ReusableHyperOptimizer(
+        parallel=True,
+        slicing_opts={
+            "target_slices": num_device,
+            # "target_size": 2**20,  # Add memory target
+        },
+        max_repeats=256,
+        progbar=True,
+        minimize="combo",
+    )
+
+    tree = opt.search(*tn_info)
+
+    inds = K.arange(num_device)
+    for j in range(100):
+        print(f"training loop: {j}-step")
+        params, replicated_opt_state, loss = para_vag(
+            params, inds, tree, nqubit, d, tc_mpo, replicated_opt_state
+        )
+        print(loss[0])
diff --git a/examples/slicing_auto_pmap_vqa.py b/examples/slicing_auto_pmap_vqa.py
@@ -0,0 +1,97 @@
+"""
+This script illustrates how to parallelize both the contraction path
+finding and sliced contraction computation
+"""
+
+from functools import partial
+import os
+
+num_device = 8
+os.environ["XLA_FLAGS"] = f"--xla_force_host_platform_device_count={num_device}"
+import cotengra as ctg
+import jax
+import optax
+import tensorcircuit as tc
+
+backend = "jax"
+K = tc.set_backend(backend)
+
+
+def get_circuit(n, d, params):
+    c = tc.Circuit(n)
+    for i in range(d):
+        for j in range(0, n - 1):
+            c.rzz(j, j + 1, theta=params[j, i, 0])
+        for j in range(0, n):
+            c.rx(j, theta=params[j, i, 1])
+    return c
+
+
+def core(params, i, tree, n, d):
+    c = get_circuit(n, d, params)
+    nodes = c.expectation_before([tc.gates.z(), [0]], reuse=False)
+    _, nodes = tc.cons.get_tn_info(nodes)
+    input_arrays = [node.tensor for node in nodes]
+    sliced_arrays = tree.slice_arrays(input_arrays, i)
+    return K.real(tree.contract_core(sliced_arrays, backend=backend))
+
+
+core_vag = K.value_and_grad(core)
+
+
+if __name__ == "__main__":
+    nqubit = 14
+    d = 7
+
+    params = K.ones([1, nqubit, d, 2], dtype=tc.rdtypestr)
+    params = K.tile(params, [num_device, 1, 1, 1])
+
+    optimizer = optax.adam(5e-2)
+    base_opt_state = optimizer.init(params[0])
+    replicated_opt_state = jax.tree.map(
+        lambda x: (
+            jax.numpy.broadcast_to(x, (num_device,) + x.shape)
+            if isinstance(x, jax.numpy.ndarray)
+            else x
+        ),
+        base_opt_state,
+    )
+
+    @partial(
+        jax.pmap,
+        axis_name="pmap",
+        in_axes=(0, 0, None, None, None, 0),
+        static_broadcasted_argnums=(2, 3, 4),
+    )
+    def para_vag(params, i, tree, n, d, opt_state):
+        loss, grads = core_vag(params, i, tree, n, d)
+        grads = jax.lax.psum(grads, axis_name="pmap")
+        loss = jax.lax.psum(loss, axis_name="pmap")
+        updates, opt_state = optimizer.update(grads, opt_state, params)
+        params = optax.apply_updates(params, updates)
+        return params, opt_state, loss
+
+    c = get_circuit(nqubit, d, params[0])
+    nodes = c.expectation_before([tc.gates.z(), [0]], reuse=False)
+    tn_info, _ = tc.cons.get_tn_info(nodes)
+
+    opt = ctg.ReusableHyperOptimizer(
+        parallel=True,
+        slicing_opts={
+            "target_slices": num_device,
+            # "target_size": 2**20,  # Add memory target
+        },
+        max_repeats=256,
+        progbar=True,
+        minimize="combo",
+    )
+
+    tree = opt.search(*tn_info)
+
+    inds = K.arange(num_device)
+    for j in range(20):
+        print(f"training loop: {j}-step")
+        params, replicated_opt_state, loss = para_vag(
+            params, inds, tree, nqubit, d, replicated_opt_state
+        )
+        print(loss[0])
diff --git a/tensorcircuit/cons.py b/tensorcircuit/cons.py
@@ -8,7 +8,7 @@
 import sys
 import time
 from contextlib import contextmanager
-from functools import partial, reduce, wraps
+from functools import partial, reduce, wraps, lru_cache
 from operator import mul
 from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple
 
@@ -439,6 +439,28 @@ def tn_greedy_contractor(
 # base = tn.contractors.opt_einsum_paths.path_contractors.base
 # utils = tn.contractors.opt_einsum_paths.utils
 
+_einsum_symbols_base = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+
+@lru_cache(2**14)
+def get_symbol(i: int) -> str:
+    """Get the symbol corresponding to int ``i`` - runs through the usual 52
+    letters before resorting to unicode characters, starting at ``chr(192)``
+    and skipping surrogates. From cotengra codebase
+    """
+    if i < 52:
+        # use a-z, A-Z first
+        return _einsum_symbols_base[i]
+
+    # then proceed from 'À'
+    i += 140
+
+    if i >= 55296:
+        # Skip chr(57343) - chr(55296) as surrogates
+        i += 2048
+
+    return chr(i)
+
 
 def _get_path(
     nodes: List[tn.Node], algorithm: Any
@@ -451,6 +473,16 @@ def _get_path(
     return algorithm(input_sets, output_set, size_dict), nodes
 
 
+def _identity(*args: Any, **kws: Any) -> Any:
+    return args
+
+
+def _sort_tuple_list(input_list: List[Any], output_list: List[Any]) -> List[Any]:
+    sorted_elements = [(tuple(sorted(t)), i) for i, t in enumerate(input_list)]
+    sorted_elements.sort()
+    return [output_list[i] for _, i in sorted_elements]
+
+
 def _get_path_cache_friendly(
     nodes: List[tn.Node], algorithm: Any
 ) -> Tuple[List[Tuple[int, int]], List[tn.Node]]:
@@ -460,18 +492,21 @@ def _get_path_cache_friendly(
     for n in nodes:
         for e in n:
             if id(e) not in mapping_dict:
-                mapping_dict[id(e)] = i
+                mapping_dict[id(e)] = get_symbol(i)
                 i += 1
     # TODO(@refraction-ray): may be not that cache friendly, since the edge id correspondence is not that fixed?
-    input_sets = [set([mapping_dict[id(e)] for e in node.edges]) for node in nodes]
-    placeholder = [[1e20 for _ in range(100)]]
-    order = np.argsort(np.array(list(map(sorted, input_sets)) + placeholder, dtype=object))[:-1]  # type: ignore
-    nodes_new = [nodes[i] for i in order]
+    input_sets = [list([mapping_dict[id(e)] for e in node.edges]) for node in nodes]
+    # placeholder = [[1e20 for _ in range(100)]]
+    # order = np.argsort(np.array(list(map(sorted, input_sets)), dtype=object))  # type: ignore
+    # nodes_new = [nodes[i] for i in order]
+    nodes_new = _sort_tuple_list(input_sets, nodes)
     if isinstance(algorithm, list):
         return algorithm, nodes_new
 
-    input_sets = [set([mapping_dict[id(e)] for e in node.edges]) for node in nodes_new]
-    output_set = set([mapping_dict[id(e)] for e in tn.get_subgraph_dangling(nodes_new)])
+    input_sets = [list([mapping_dict[id(e)] for e in node.edges]) for node in nodes_new]
+    output_set = list(
+        [mapping_dict[id(e)] for e in tn.get_subgraph_dangling(nodes_new)]
+    )
     size_dict = {
         mapping_dict[id(edge)]: edge.dimension for edge in tn.get_all_edges(nodes_new)
     }
@@ -483,6 +518,9 @@ def _get_path_cache_friendly(
     # directly get input_sets, output_set and size_dict by using identity function as algorithm
 
 
+get_tn_info = partial(_get_path_cache_friendly, algorithm=_identity)
+
+
 # some contractor setup usages
 """
 import cotengra as ctg
@@ -513,7 +551,8 @@ def _get_path_cache_friendly(
 
 def opt_reconf(inputs, output, size, **kws):
     tree = opt.search(inputs, output, size)
-    tree_r = tree.subtree_reconfigure_forest(progbar=True, num_trees=10, num_restarts=20, subtree_weight_what=("size", ))
+    tree_r = tree.subtree_reconfigure_forest(progbar=True, num_trees=10, 
+                        num_restarts=20, subtree_weight_what=("size", ))
     return tree_r.get_path()
 
 tc.set_contractor("custom", optimizer=opt_reconf)
diff --git a/tensorcircuit/quantum.py b/tensorcircuit/quantum.py
diff --git a/tests/test_stabilizer.py b/tests/test_stabilizer.py