From 2774ec6ecdc3b40f6b403128cb526fa7a98625db Mon Sep 17 00:00:00 2001
From: Matt Ralston <professional.bio.coder@gmail.com>
Date: Tue, 27 Aug 2024 22:09:48 -0400
Subject: [PATCH] Adding some experimental code directly to master. I'm
 considering the compositional analysis a "hotfix" because it's essential to
 the suite's functionality. THe cublas-gemm code was not a success. The module
 still doesn't compile because of some Cythonization issues outstanding and
 multiple. Okay, so while I'm figuring out how to properly Strassen, I can
 work with the regression.pyx file and continue to commit to main. Squash
 commit:

Author: Matt Ralston <professional.bio.coder@gmail.com>
Date:   Tue Aug 27 13:58:53 2024 -0400
Date:   Mon Aug 26 22:40:36 2024 -0400
Date:   Fri Aug 16 20:26:20 2024 -0400

    Strassen assignment. Struggling with some portions. New to Cython and its documentation. Syntax seems good, agreeable. Some allocate then mutate patterns are bothersome but nothing is too confusing.

    Adds CUDA code for delegation of regression multiplications to cublas-gemm. Scaffolding from chatgpt so not entirely valid cython. Still producing errors. THanks in advance.

    Closes #154. Patches parse.py to remove deprecated parameter 'b' from legacy/deprecated code. Pushing to main directly.
---
 .gitignore                 |  16 +++-
 TODO.org                   |  90 +++++++++++++++++-
 docs/Makefile              |  20 ++++
 docs/make.bat              |  35 +++++++
 kmerdb/__init__.py         |  18 ++++
 kmerdb/appmap.py           | 188 +++++++++++++++++++++++++++++++++++--
 kmerdb/config.py           |   2 +-
 kmerdb/cublas_gemm.pyx     | 103 ++++++++++++++++++++
 kmerdb/parse.py            |   6 --
 kmerdb/regression.pyx      | 150 +++++++++++++++++++++++++++++
 kmerdb/strassen_cython.pyx | 111 ++++++++++++++++++++++
 pyproject.toml             |   5 +-
 requirements-dev.txt       |   7 +-
 requirements.txt           |   2 +
 setup.py                   |   7 +-
 15 files changed, 737 insertions(+), 23 deletions(-)
 create mode 100644 docs/Makefile
 create mode 100644 docs/make.bat
 create mode 100644 kmerdb/cublas_gemm.pyx
 create mode 100644 kmerdb/regression.pyx
 create mode 100644 kmerdb/strassen_cython.pyx

diff --git a/.gitignore b/.gitignore
index d1e3080..d47f396 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,21 @@
 __pycache__
 kdb/__pycache__
 **.kdb
+docs/generated/
 examples/example_report/*.png
 examples/example_report
 test/data
-pypi_token.foo
+kmerdb.egg-info/
+kmerdb.log
+test.*
+build
+dist
+.reinstall.sh
+examples/*/*.png
+examples/*/*.jpg
+
+
+
+
+
+
diff --git a/TODO.org b/TODO.org
index 027cb73..c8963f5 100644
--- a/TODO.org
+++ b/TODO.org
@@ -6,6 +6,78 @@
 # .kdb files should be debrujin graph databases
 # The final prototype would be .bgzf format from biopython
 
+* 8/14/24 compositional ideas
+
+** Notes:
+ its a regression problem. the coefficients should sum to unity, and the sum of the k-mer count vectors is the the total k-mer count
+
+ 
+ the count vector is the 'coefficient' times the aggregate vector (.kdb file)
+
+ therefore, the 'coefficients' or proportions contributable, along with regression statistics,
+
+ can be made be performing least-squares on a count-matrix Ax = b, where b is the aggregate/collated/composite count-vector profile of multiple species, from which the decomposition progresses.
+
+ A is the count matrix obtained by collating suspected species of the 'metagenome' into one k-mer profile.
+
+ b is the 'observed' or artificially generated metagenome k-mer profile.
+
+ I want to do this in either Cython, Numba, and/or CUDA.
+
+ Then do the D2 statistics.
+** 
+
+** 
+
+
+
+* TODO 8/13/24 priority: compositional analysis
+
+** Lots of priorities, currently. Comp. analysis, custom SVD implementation for the matrix command? A = UsigmaVt  
+
+** Markov chain submodules? Log-likelihood ratio
+
+** TODO [/] T O D O  list
+
+*** IN-PROGRESS [ x ] Quarto journals elsevier preprint for biorxiv
+:LOGBOOK:
+- State "IN-PROGRESS" from "NEXT"       [2024-08-16 Fri 20:33]
+:END:
+
+*** IN-PROGRESS [ x ] compositional analysis (p0)
+:LOGBOOK:
+- State "IN-PROGRESS" from "NEXT"       [2024-08-16 Fri 20:33]
+:END:
+
+*** DONE [ x ]Are all species downloaded
+CLOSED: [2024-08-16 Fri 20:33]
+:LOGBOOK:
+- State "DONE"       from "WAITING"    [2024-08-16 Fri 20:33]
+- State "WAITING"    from "DONE"       [2024-08-14 Wed 16:36]
+- State "DONE"       from "WAITING"    [2024-08-14 Wed 16:36]
+- State "WAITING"    from "IN-PROGRESS" [2024-08-14 Wed 16:36]
+- State "IN-PROGRESS" from "NEXT"       [2024-08-14 Wed 16:36]
+:END:
+
+** Description from Xiong et al Mouse IBD.
+
+Once GF status was confirmed as described, GF NOD mice were colonized by co-housing with gnotobiotic mice
+colonized with defined cultured bacterial species (Altered Schaedler's flora; ASF - [35]) which were prepared
+in the laboratory from cloned bacteria using sterile technique. The ASF consists of Lactobacillus acidophilus
+(ASF 360), Lactobacillus murinus (ASF 361), Bacteroides distasonis, (ASF 519), Mucispirillum schaedleri (ASF 457),
+Eubacterium plexicaudatum (ASF 492), a Fusiform-shaped bacterium (ASF 356) and two Clostridium species (ASF 500, ASF 502).
+These ASF-colonized gnotobiotic mice were then bred in isolators to ensure no additional species were introduced.
+The presence of the ASF species was confirmed by species-specific bacterial qPCR [58].
+
+** 
+
+** 
+
+* 8/9/24 profile decomposition/recomposition(simulation) problem
+** Merge profiles with ratios (0.25% B. bifidum)
+
+** 
+
 * 8/8/24 Taking Notes on Xuejiang Xiong Mouse model IBD study
 
 ** SRA Accession id
@@ -145,10 +217,16 @@ Kolmogorov complexity comes in two flavors: prefix-free (K(x)) and simple comple
 
 
 ** TODO core species choices
-*** chicken farm estuary system changes (algination, asphyxia, microbiological changes
+*** chicken farm runoff - estuary system changes (algination, asphyxia, microbiological changes)
 *** anti-human leaky gut syndrome changes.
 **** i.e. looking at the human leaky gut syndrome, but in reverse. What are bioprotective species and niches that provide resilience to leaky-gut syndrome
 **** TODO chemophore SMILES and gastrotoxic footprints
+**** mouse model (SRA051354) currently being studied from Xuejiang Xiong
+**** looking to assess the Altered Shaedler flora/formula changes in irritable bowel syndrome.
+**** Currently, only have the accession and brief notes, still reading as of 8/12/24
+**** 
+
+
 *** pathology of lupus or auto-immune skin condition microbiome/metagenomic changes.
 *** vaginal microbiome changes
 *** 
@@ -156,7 +234,6 @@ Kolmogorov complexity comes in two flavors: prefix-free (K(x)) and simple comple
 ** 
 * IN PROGRESS 7/10/24 - [IMPORTANT] Needs a choice [cython d2 x graph algorithm features ]:
 ** [Key choice needed]: 1 [ 2 reviews + cython D2 metrics ] path 2 [ 2 reviews + graph algorithm ]
-
 ** cython d2 metrics including the delta distance : |pab(A)-pab(B)| (Karlin et al, tetra,tri,di- nucleotide frequencies)
 ** (describe Karlin delta, algorithm to calculate)
 *** Karlin delta first requires the least ambiguous k-mer (4-mer) frequency, i.e. the frequency of self
@@ -165,13 +242,18 @@ Kolmogorov complexity comes in two flavors: prefix-free (K(x)) and simple comple
 *** this specifies the numerator for the tetranucleotide frequency (lowercause tau)
 *** the denominator is only the most specific tetra and 1-neighboring trinucleotide frequencies, and the mononucleotide frequencies. [ f(acc) f(accg) f(ccg) f(a) f(c) f(t) f(g) ]
 ** 
-** new graph file format specification ( walk,path is a subclass of unlabeled graph, where node labels can be visited, path order, and progressive or retro in the walk.
+** new graph file format specification (walk, path is a subclass of unlabeled graph, where node labels can be visited, path order, and progressive or retro in the walk.
 ** contig generator method, and contig boundary definition specification
 ** 
 ** 
 ** 
 ** 
-* 6/28/24 - [ ...whoops, forgot the date by 3 x24hr blocxz. ] okay, so the 0.8.4 release should have the graph labeling done.
+* TODO 6/28/24 - [ ...whoops, forgot the date by 3 x24hr blocxz. ] okay, so the 0.8.4 release should have the graph labeling done.
+:LOGBOOK:
+- State "DELEGATED"  from "CANCELED"   [2024-08-12 Mon 17:02]
+- State "CANCELED"   from "DELEGATED"  [2024-08-12 Mon 17:02]
+- State "DELEGATED"  from              [2024-08-12 Mon 17:02]
+:END:
 
 ** graph node labeling and classification, and walk strategy
 
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..747ffb7
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/kmerdb/__init__.py b/kmerdb/__init__.py
index 7c0e6ef..2414ca1 100644
--- a/kmerdb/__init__.py
+++ b/kmerdb/__init__.py
@@ -207,6 +207,13 @@ def expanded_help(arguments):
 
     sys.stderr.write("\n\nUse --help for expanded usage\n")
 
+
+def composition(arguments):
+
+    
+    from kmerdb.regression import linear_regression
+
+    
 def distances(arguments):
     """
     An end-user function to provide CLI access to certain distances
@@ -2419,6 +2426,17 @@ def cli():
     dist_parser.set_defaults(func=distances)
 
 
+    comp_parser = subparsers.add_parser("composition", help=appmap.command_14_description)
+
+    comp_parser.add_argument("-v", "--verbose", help="Prints warnings to the console by default", default=0, action="count")
+    comp_parser.add_argument("--debug", action="store_true", default=False, help="Debug mode. Do not format errors and condense log")
+    comp_parser.add_argument("-l", "--log-file", type=str, default="kmerdb.log", help="Destination path to log file")
+    comp_parser.add_argument("-nl", "--num-log-lines", type=int, choices=config.default_logline_choices, default=50, help=argparse.SUPPRESS)
+    comp_parser.add_argument("kdb", metavar="composite.$K.kdb", type=str, help="Composite/collated metagenomic k-mer profile (as .kdb) to decompose into constituents")
+    comp_parser.add_argument("table", metavar="input.tsv", default=sys.STDIN, type=str, help="Input count matrix to use for compositional analysis")
+
+    
+
     index_parser = subparsers.add_parser("index", help=appmap.command_9_description)
 
     index_parser.add_argument("--force", action="store_true", help="Force index creation (if previous index exists")
diff --git a/kmerdb/appmap.py b/kmerdb/appmap.py
index 0b54970..189c40d 100644
--- a/kmerdb/appmap.py
+++ b/kmerdb/appmap.py
@@ -249,7 +249,8 @@
     command_10_name, #"shuf",
     command_11_name, #"usage",
     command_12_name, #"help"
-    command_13_name #"version"
+    command_13_name, #"version"
+    command_14_name  #"composition"
 ]
 
 
@@ -1962,7 +1963,7 @@
 
 """
 command_11_parameters = "inputs may be one or more fasta files, and the .kdb files needed for the model's output probability."
-command_11_inputs = "Input is a v{0} .kdb count vector file"
+command_11_inputs = "Input is a v{0} .kdb count vector file".format(config.VERSION)
 command_11_usage = "kmerdb prob --db <kmer_count_vector_1.kdb> [--db kmer_count_vector_2.kdb] <query_sequences_1.fasta.gz> [query_sequences_2.fasta.gz]"
 
 
@@ -2102,6 +2103,176 @@
 
 
 
+command_14_description = "Compositional analysis"
+command_14_description_long = """
+    Linear regression wth two inputs.
+    A 2D array 'A' with two dimensions, and non-necessarily square.
+
+    and a vector of data determining the linear constants in the regression.
+    
+
+    least squares problems occur when b is not in the column space of A.
+    
+    They are solvable if and only if the matrix A:
+      * is non-singular
+      * has independent rows/columns
+      * a non-zero determinant (i.e. it is *not* row identical to the identity matrix)
+      * and is full-rank. 
+
+    
+    ############
+    # definition
+    ############
+    At(b-Ax) = 0 OR
+    AtAx = Atb
+
+    but, less developed...
+    Ax = b
+
+    These are the normal equations.
+
+
+"""
+command_14_parameters = "inputs are the table of counts to use in the decomposition, and the composite/collated metagenomic k-mer profile (as .kdb) to decompose."
+command_14_inputs = "Input is a v{0} .kdb count vector file".format(config.VERSION)
+command_14_usage = "kmerdb composition -vv --debug input1.12.kdb 12_mer_profiles.tsv"
+
+
+        
+COMMAND_14_BANNER = """
+
+
+
+
+
+
+
+
+
+
+
+
+[--------------------------------------------------------------------------------------]
+
+
+
+
+
+
+
+
+                        [  n a m e    ]         :  -   {0}
+
+                   description : {1}
+
+{2}
+
+
+
+
+--------------------------
+
+                    kmerdb composition composite1.12.kdb kmer_counts.12.tsv
+
+                    [-]    inputs : 
+
+                           {3}
+
+                    [-]    parameters : 
+
+                           {4}
+
+
+
+                    [-]    [ usage ]  :  {5}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[--------------------------------------------------------------------------------------]
+""".format(command_14_name, command_14_description, command_14_description_long, command_14_inputs, command_14_parameters, command_14_usage)
+
+        
+
+COMMAND_14_PARAMS = OrderedDict({
+    "name": "arguments",
+    "type": "array",
+    "items": [
+        {
+            "name": "K-mer database file.",
+            "type": "file",
+            "value": "The k-mer count proifle of a composite/metagenomic population to decompose into percentages"
+        }
+    ]
+})
+
+        
+COMMAND_14_INPUTS = OrderedDict({
+    "name": "inputs",
+    "type": "array",
+    "items": [
+        {
+            "name": "Input count matrix (.tsv)",
+            "type": "file",
+            "value": "File to decompose the k-mer profile into its parts."
+        }
+        
+    ]
+})
+
+COMMAND_14_FEATURES = OrderedDict({
+    "name": "features",
+    "type": "array",
+    "items": [
+        OrderedDict({
+            "name": "[FIXME! 7/28/24 Hi fross, glhf!]",
+            "shortname": "",
+            "description" : ""
+        }),
+        OrderedDict({
+            "name": "",
+            "shortname": "",
+            "description": "(Deprecated)"
+        })
+    ]
+})
+    
+COMMAND_14_STEPS = OrderedDict({
+    "name": "steps",
+    "type": "array",
+    "items": [
+        OrderedDict({
+            "name": "",
+            "shortname": "",
+            "description": "(uhhhh...)",
+        }),
+        OrderedDict({
+            "name": "",
+            "shortname": "Shuffle k-mer counts",
+            "description": "(Deprecated)"
+        })
+
+    ]
+
+})
+
+
+
+
 ###################################################
 
 #            F i n a l     c o m m a n d    a g g r e g a t e
@@ -2119,7 +2290,8 @@
     "view": COMMAND_5_PARAMS["items"],
     "header": COMMAND_6_PARAMS["items"],
     "index": COMMAND_9_PARAMS["items"],
-    "shuf": COMMAND_10_PARAMS["items"]
+    "shuf": COMMAND_10_PARAMS["items"],
+    "composition": COMMAND_14_PARAMS["items"],
 }
 
 ALL_INPUTS = {
@@ -2132,7 +2304,8 @@
     "view": COMMAND_5_INPUTS["items"],
     "header": COMMAND_6_INPUTS["items"],
     "index": COMMAND_9_INPUTS["items"],
-    "shuf": COMMAND_10_INPUTS["items"]
+    "shuf": COMMAND_10_INPUTS["items"],
+    "composition": COMMAND_14_INPUTS["items"],
 }
 
 ALL_FEATURES = {
@@ -2145,8 +2318,8 @@
     "view": COMMAND_5_FEATURES["items"],
     "header": COMMAND_6_FEATURES["items"],
     "index": COMMAND_9_FEATURES["items"],
-    "shuf": COMMAND_10_FEATURES["items"]
-
+    "shuf": COMMAND_10_FEATURES["items"],
+    "composition": COMMAND_14_FEATURES["items"],
 
 }
 
@@ -2162,7 +2335,8 @@
     "view": COMMAND_5_STEPS["items"],
     "header": COMMAND_6_STEPS["items"],
     "index": COMMAND_9_STEPS["items"],
-    "shuf": COMMAND_10_STEPS["items"]
+    "shuf": COMMAND_10_STEPS["items"],
+    "composition": COMMAND_14_STEPS["items"],
 }
 
 
diff --git a/kmerdb/config.py b/kmerdb/config.py
index f9fe041..f623181 100644
--- a/kmerdb/config.py
+++ b/kmerdb/config.py
@@ -17,7 +17,7 @@
 
 
 
-VERSION="0.8.6"
+VERSION="0.8.7"
 REQUIRES_PYTHON="3.7.4"
 header_delimiter = "\n" + ("="*24) + "\n"
 
diff --git a/kmerdb/cublas_gemm.pyx b/kmerdb/cublas_gemm.pyx
new file mode 100644
index 0000000..16895e1
--- /dev/null
+++ b/kmerdb/cublas_gemm.pyx
@@ -0,0 +1,103 @@
+
+'''
+   Copyright 2020 Matthew Ralston
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+'''
+# cublas_gemm.pyx
+import numpy as np
+cimport numpy as cnp
+from libc.stdlib cimport malloc, free
+from libc.stdlib cimport memcpy
+from cuda import cudaMalloc, cudaMemcpy, cudaFree
+from cpython cimport PyObject
+
+# Declare C interface to cuBLAS
+cdef extern from "cublas_v2.h":
+    ctypedef void* cublasHandle_t
+    
+    int cublasCreate(cublasHandle_t* handle) nogil
+    int cublasDestroy(cublasHandle_t handle) nogil
+    
+    int cublasDgemm(cublasHandle_t handle, char transa, char transb,
+                    int m, int n, int k,
+                    double alpha, double *A, int lda,
+                    double *B, int ldb,
+                    double beta, double *C, int ldc)
+
+# Function to perform matrix multiplication using cuBLAS
+def matrix_mult_cuda_int64(cnp.ndarray[cnp.int64_t, ndim=2] A, 
+                            cnp.ndarray[cnp.int64_t, ndim=2] B, 
+                            cnp.ndarray[cnp.int64_t, ndim=2] C):
+    cdef int m, n, k
+    cdef int lda, ldb, ldc
+    cdef double alpha = 1.0
+    cdef double beta = 0.0
+
+    # Set matrix dimensions
+    m = A.shape[0]
+    k = A.shape[1]
+    n = B.shape[1]
+
+    lda = m
+    ldb = k
+    ldc = m
+
+    # Create the cuBLAS context
+    cdef cublasHandle_t handle = NULL
+
+    if cublasCreate(&handle) != 0:
+        raise RuntimeError("Failed to create cuBLAS handle")
+    
+    #cublasCreate(&handle)  # Properly create the cuBLAS handle
+
+    # Allocate device memory for inputs and result
+    cdef double *d_A
+    cdef double *d_B
+    cdef double *d_C
+
+    # Allocate device memory
+    cudaMalloc(&d_A, m * k * sizeof(double))
+    cudaMalloc(&d_B, k * n * sizeof(double))
+    cudaMalloc(&d_C, m * n * sizeof(double))
+
+    # Create temporary host arrays for conversion from int64 to double
+    cdef double *h_A = <double *> malloc(m * k * sizeof(double))
+    cdef double *h_B = <double *> malloc(k * n * sizeof(double))
+
+    # Copy data from numpy arrays (int64) to temporary host arrays (double)
+    for i in range(m * k):
+        h_A[i] = <double> A[i // A.shape[1], i % A.shape[1]]  # Convert to double
+
+    for i in range(k * n):
+        h_B[i] = <double> B[i // B.shape[1], i % B.shape[1]]  # Convert to double
+
+    # Copy from host arrays to device
+    cudaMemcpy(d_A, h_A, m * k * sizeof(double), cudaMemcpyHostToDevice)
+    cudaMemcpy(d_B, h_B, k * n * sizeof(double), cudaMemcpyHostToDevice)
+
+    # Perform the matrix multiplication using cuBLAS
+    cublasDgemm(handle, 'N', 'N', m, n, k, alpha, d_A, lda, 
+                d_B, ldb, beta, d_C, ldc)
+
+    # Copy result back from device to host (output C)
+    #cudaMemcpy(C.data, d_C, m * n * sizeof(double), cudaMemcpyDeviceToHost)
+
+    # Clean up device memory and host memory
+    free(h_A)
+    free(h_B)
+    #cudaFree(d_A)
+    #cudaFree(d_B)
+    #cudaFree(d_C)
+    cublasDestroy(handle)  # Properly destroy the cuBLAS handle
diff --git a/kmerdb/parse.py b/kmerdb/parse.py
index 56ddaeb..9c65b37 100644
--- a/kmerdb/parse.py
+++ b/kmerdb/parse.py
@@ -123,12 +123,6 @@ def parsefile(filepath:str, k:int, logger=None): #rows_per_batch:int=100000, b:i
     Kmer = kmer.Kmers(k, verbose=fasta) # A wrapper class to shred k-mers with
 
     recs = [r for r in seqprsr] # A block of exactly 'b' reads-per-block to process in parallel
-    if not fasta:
-        if _loggable:
-            logger.log_it("Read exactly {0} records from the seqparser object".format(len(recs)), "DEBUG")
-        assert len(recs) <= b, "The seqparser should return exactly {0} records at a time".format(b)
-    elif _loggable:
-        logger.log_it("Skipping the block size assertion for fasta files", "DEBUG")
 
     if _loggable:
         logger.log_it("Read {0} sequences from the {1} seqparser object".format(len(recs), "fasta" if fasta else "fastq"), "INFO")
diff --git a/kmerdb/regression.pyx b/kmerdb/regression.pyx
new file mode 100644
index 0000000..0304e85
--- /dev/null
+++ b/kmerdb/regression.pyx
@@ -0,0 +1,150 @@
+'''
+   Copyright 2020 Matthew Ralston
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+'''
+
+import sys
+
+
+
+import logging
+logger = logging.getLogger(__file__)
+
+
+
+import numpy as np
+cimport numpy as cnp
+cimport cython
+
+
+#from kmerdb cimport cublass_gemm
+
+from cpython.ref cimport PyObject
+
+
+def linear_regression(a, b):
+    x = regression(a, b, method="numpy")
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def regression(double[:, :] A, double[:] b, char[:] method):
+    """
+    Linear regression wth two inputs.
+    A 2D array 'A' with two dimensions, and non-necessarily square.
+
+    and a vector of data determining the linear constants in the regression.
+    
+
+    least squares problems occur when b is not in the column space of A.
+    
+    They are solvable if and only if the matrix A:
+      * is non-singular
+      * has independent rows/columns
+      * a non-zero determinant (i.e. it is *not* row identical to the identity matrix)
+      * and is full-rank. 
+
+    
+    ############
+    # definition
+    ############
+    At(b-Ax) = 0 OR
+    AtAx = Atb
+
+    but, less developed...
+    Ax = b
+
+    These are the normal equations.
+    
+    """
+    cdef int n, m
+    n = A.shape[0]  # number of samples
+    m = A.shape[1]  # number of constituents
+
+
+    if n != m:
+        raise ValueError("kmerdb.regression expects a square matrix A of independent variables for regression against the dependent variable.")
+
+    
+    cdef double[:, :] At = np.empty((m, n), dtype=np.double)
+    cdef double[:, :] At_test = np.empty((m, n), dtype=np.double)
+
+    # AtA: square symmetric positive definite required next if A is non-singular.
+    cdef double[:, :] AtA = np.empty((m, m), dtype=np.double)
+
+    
+    cdef double[:] Atb = np.empty(m, dtype=np.double)
+    cdef double[:] x = np.empty(m, dtype=np.double)
+
+    # Calculate A^T
+    #At = A.T
+    for i in range(n):
+        for j in range(m):
+            At[i, j] = A[j][i]
+    At_test = A.T
+    assert np.array_equal(At_test, At), "kmerdb.regression expects the transpose of A to be equivalent to NumPy's transpose function."
+
+    # Calculate A^T * A
+
+    ## [MULTIPLY]
+    if method == "numpy": # Delegated to BLAS if configured.
+        AtA = np.dot(At, A)
+    elif method == "strassen":
+        raise ValueError("NOTE: not yet implemented. testing a cublas delegated matrix multiply.")
+
+        #AtA = strassen_multiplication(At, A)
+    elif method == "gemm":
+
+        #cdef cnp.ndarray[int64_t, ndim=2] a = A
+        #cdef cnp.ndarray[int64_t, ndim=2] b = B
+        #cdef cnp.ndarray[int64_t, ndim=2] c = C
+
+
+
+        raise ValueError("NOTE: not implemented. issues during Cythonize. ")
+
+    
+        
+        #cublas_gemm.matrix_mult_cuda_int64(a, b, c) # Uses cublas_gemm, specifically Dgemm
+
+        
+        #AtA = c
+        #AtA = np.ndarray(c, dtype="uint64")
+
+        raise ValueError("NOTE: not yet implemented. testing a cublas delegated matrix multiply.")
+        
+    # Ensure the solution can be found by checking for non-singularity.
+
+    detAtA = np.linalg.det(AtA)
+
+    if detAtA == 0:
+        raise ValueError("kmerdb.regression encountered a non-invertible matrix 'AtA', which is used in the solution of the least-squares problem.")
+    else:
+        try:
+            # TODO: Need an inverse function 
+            AtA_inv = np.linalg.inv(AtA)
+        except Exception as e:
+            raise e
+
+
+    # Calculate A^T * b
+    Atb = np.dot(At, b)
+    
+
+    # Inverse A^T A and left-multiply with Atb. This yields the estimates for x that minimize error from the projection of b, p on the left nullspace of AtA. At => multiply with b. x is the Euclidean/Newtonian minimum of the convex space spanned by the left 
+    x = AtA_inv.dot(Atb)
+
+    return x
+
+
diff --git a/kmerdb/strassen_cython.pyx b/kmerdb/strassen_cython.pyx
new file mode 100644
index 0000000..ca88f13
--- /dev/null
+++ b/kmerdb/strassen_cython.pyx
@@ -0,0 +1,111 @@
+'''
+   Copyright 2020 Matthew Ralston
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+'''
+
+
+
+import numpy as np
+cimport numpy as cnp
+from libc.stdlib cimport malloc, free
+from libc.string cimport memcpy
+
+# Ensure appropriate imports for types
+from libc.stdint cimport int64_t
+
+# A simple utility function to add two matrices
+cdef cnp.ndarray[double, ndim=2] add(cnp.ndarray[double, ndim=2] A, 
+                                     cnp.ndarray[double, ndim=2] B):
+    cdef int i, j
+    cdef int rows = A.shape[0]
+    cdef int cols = A.shape[1]
+    cdef cnp.ndarray[double, ndim=2] C = np.empty((rows, cols), dtype=cnp.double)
+
+    for i in range(rows):
+        for j in range(cols):
+            C[i, j] = A[i, j] + B[i, j]
+    
+    return C
+
+# A simple utility function to subtract two matrices
+cdef cnp.ndarray[double, ndim=2] subtract(cnp.ndarray[double, ndim=2] A, 
+                                          cnp.ndarray[double, ndim=2] B):
+    cdef int i, j
+    cdef int rows = A.shape[0]
+    cdef int cols = A.shape[1]
+    cdef cnp.ndarray[double, ndim=2] C = np.empty((rows, cols), dtype=cnp.double)
+
+    for i in range(rows):
+        for j in range(cols):
+            C[i, j] = A[i, j] - B[i, j]
+    
+    return C
+
+# Strassen's algorithm for matrix multiplication
+cdef cnp.ndarray[double, ndim=2] strassen(cnp.ndarray[double, ndim=2] A,
+                                          cnp.ndarray[double, ndim=2] B):
+    cdef int n
+    n = A.shape[0]
+
+    if n <= 2:  # Base case
+        return np.dot(A, B)
+
+    # Splitting the matrices into quadrants
+    cdef int half = n // 2
+    cdef cnp.ndarray[double, ndim=2] A11 = A[:half, :half]
+    cdef cnp.ndarray[double, ndim=2] A12 = A[:half, half:]
+    cdef cnp.ndarray[double, ndim=2] A21 = A[half:, :half]
+    cdef cnp.ndarray[double, ndim=2] A22 = A[half:, half:]
+
+    cdef cnp.ndarray[double, ndim=2] B11 = B[:half, :half]
+    cdef cnp.ndarray[double, ndim=2] B12 = B[:half, half:]
+    cdef cnp.ndarray[double, ndim=2] B21 = B[half:, :half]
+    cdef cnp.ndarray[double, ndim=2] B22 = B[half:, half:]
+
+    # Compute the 7 products
+    P1 = strassen(A11, subtract(B12, B22))
+    P2 = strassen(add(A11, A12), B22)
+    P3 = strassen(add(A21, A22), B11)
+    P4 = strassen(A22, subtract(B21, B11))
+    P5 = strassen(add(A11, A22), add(B11, B22))
+    P6 = strassen(subtract(A12, A22), add(B21, B22))
+    P7 = strassen(subtract(A11, A21), add(B11, B12))
+
+    # Combine the products into a single result matrix
+    C = np.empty((n, n), dtype=cnp.double)
+    C[:half, :half] = add(subtract(add(P5, P4), P2), P6)
+    C[:half, half:] = add(P1, P2)
+    C[half:, :half] = add(P3, P4)
+    C[half:, half:] = subtract(add(subtract(add(P5, P1), P3), P7), P6)
+
+    return C
+
+# Batch processing function for Strassen's algorithm
+cpdef batch_strassen(cnp.ndarray[double, ndim=2] A_batch,
+                     cnp.ndarray[double, ndim=2] B_batch):
+    cdef int batch_size, i
+    batch_size = A_batch.shape[0]
+
+    # Result batch initialization
+    result_shape = (batch_size, A_batch.shape[1], A_batch.shape[2])
+    cdef cnp.ndarray[double, ndim=3] C_batch = cnp.empty(result_shape, dtype=cnp.double)
+
+    for i in range(batch_size):
+        C_batch[i] = strassen(A_batch[i], B_batch[i])
+
+    return C_batch
+
+
+
diff --git a/pyproject.toml b/pyproject.toml
index 0ae9f0a..cf80d8c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "kmerdb"
-version = "0.8.6"
+version = "0.8.7"
 description = "Yet another correction to the 'yet another correction to just a k-mer counter...'"
 readme = "README.md"
 authors = [{name="Matt Ralston <mralston.development@gmail.com>", email="mralston.development@gmail.com"}]
@@ -81,6 +81,7 @@ dependencies = [
              "matplotlib>=3.5.3",
 	     "pandas>=2.2.2",
              "setuptools>=69.2.0",
+	     "nvidia-cublas>=11.5.1.101",
 	     ]
 
 #requires-python = ">=3.7.4"
@@ -95,6 +96,8 @@ dev = [
     'build>=0.9.0',
     'coverage>=4.5.4',
     'expects>=0.9.0',
+    'nvidia-pyindex>=1.0.9',
+
     # 'networkx'
     # 'rypy2>=3.4.2'
     # Vanity
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 893f109..7f3968d 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -7,7 +7,6 @@ build>=0.9.0
 coverage>=4.5.4
 expects>=0.9.0
 setuptools>=61.3.1
-
 #########################################
 # Documentation
 #########################################
@@ -18,6 +17,12 @@ sphinx>=8.0.2
 # These are kind of developmental things, there really cant be testing around a 5% tested codebase, this is open source at its worse,
 # and considering  otherwise is toxic.
 pytest>=5.3.5
+#########################################
+# Nvidia/CUDA/GPU
+#########################################
+nvidia-pyindex>=1.0.9
+
+
 #########################################
 #
 # Miss me with it
diff --git a/requirements.txt b/requirements.txt
index 589c22f..21eaf59 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,3 +14,5 @@ scipy>=1.7.3
 scikit-learn==1.0.2
 matplotlib>=3.5.3
 pandas>=2.2.2
+nvidia-cublas>=11.5.1.101
+
diff --git a/setup.py b/setup.py
index 4c2525e..90a3b88 100644
--- a/setup.py
+++ b/setup.py
@@ -114,7 +114,7 @@ def can_import(module_name):
 AUTHOR = 'Matt Ralston'
 #REQUIRES_PYTHON = ">=3.7.4"
 REQUIRES_PYTHON = '>=3.12.2'
-VERSION = "0.8.6"
+VERSION = "0.8.7"
 KEYWORDS = ["bioinformatics", "fastq", "fasta", "k-mer", "kmer"]
 CLASSIFIERS = [
 	    "Development Status :: 1 - Planning",
@@ -165,6 +165,9 @@ def can_import(module_name):
     import numpy as np
     extensions = [
         Extension("kmerdb.distance", ["kmerdb/distance.pyx"], include_dirs=[np.get_include()], define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],),
+        Extension("kmerdb.regression", ["kmerdb/regression.pyx"], include_dirs=[np.get_include()], define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],),
+        Extension("kmerdb.strassen_cython", ["kmerdb/strassen_cython.pyx"], include_dirs=[np.get_include()]),
+        #Extension("kmerdb.cublas_gemm", ["kmerdb/cublas_gemm.pyx"], include_dirs=[np.get_include()]),
     ]
     # Where the magic happens:
     setup(
@@ -183,6 +186,7 @@ def can_import(module_name):
         packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
         package_dir={'kmerdb': 'kmerdb'},
         package_data={'kmerdb': ['CITATION.txt']},
+        library_dirs=[".", "/usr/local/cuda/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/include/linux"],
         # If your package is a single module, use this instead of 'packages':
         #py_modules=['kmerdb'],
         #scripts=['bin/kmerdb', 'bin/kmerdb_report.R'],
@@ -197,7 +201,6 @@ def can_import(module_name):
         #    tests_require=['mamba', 'expect'],
         #cmdclass={'build_ext': build_ext},
         ext_modules=cythonize(extensions),
-        library_dirs=["."],
         zip_safe=False,
     )
 else: