From f43ce4b202e37e9f00193c2e5b1b0ed9e01974e8 Mon Sep 17 00:00:00 2001
From: Alex McKeehan Boulton <mckeehan@sh03-ln07.stanford.edu>
Date: Tue, 13 Aug 2024 19:21:49 -0700
Subject: [PATCH] Add support for GPU parallelization of diag function via
 PyTorch sparse library

---
 SQcircuit/circuit.py | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/SQcircuit/circuit.py b/SQcircuit/circuit.py
index d0996d0..2268e41 100755
--- a/SQcircuit/circuit.py
+++ b/SQcircuit/circuit.py
@@ -1917,16 +1917,37 @@ def _diag_np(
         """
         hamil = self.hamiltonian()
 
-        # get the data out of qutip variable and use sparse SciPy 
-        # eigensolver which is faster.
-        try:
-            efreqs, evecs = scipy.sparse.linalg.eigs(
-                hamil.data_as('csr_matrix'), k=n_eig, which='SR'
-            )
-        except ArpackNoConvergence:
-            efreqs, evecs = scipy.sparse.linalg.eigs(
-                hamil.data_as('csr_matrix'), k=n_eig, ncv=10*n_eig, which='SR'
-            )
+
+
+        import time
+        t0 = time.time()
+
+        if torch.cuda.is_available():
+            csr = hamil.data_as('csr_matrix')
+            coo = csr.tocoo()
+
+            indices = torch.tensor([coo.row, coo.col], dtype=torch.int64)
+            values = torch.tensor(coo.data, dtype=torch.float32)
+            sparse_tensor = torch.sparse_coo_tensor(indices, values, coo.shape).to('cuda:0')
+
+            efreqs, evecs = torch.lobpcg(sparse_tensor, k=n_eig)
+            efreqs = efreqs.to('cpu')
+            evecs = evecs.to('cpu')
+        else:
+            # get the data out of qutip variable and use sparse SciPy
+            # eigensolver which is faster.
+            try:
+                efreqs, evecs = scipy.sparse.linalg.eigs(
+                    hamil.data_as('csr_matrix'), k=n_eig, which='SR'
+                )
+            except ArpackNoConvergence:
+                efreqs, evecs = scipy.sparse.linalg.eigs(
+                    hamil.data_as('csr_matrix'), k=n_eig, ncv=10*n_eig, which='SR'
+                )
+
+        t1 = time.time()
+        print(f"total: {t1 - t0}")
+
         # the output of eigen solver is not sorted
         efreqs_sorted = np.sort(efreqs.real)