diff --git a/.python-version b/.python-version
deleted file mode 100644
index 3857eea..0000000
--- a/.python-version
+++ /dev/null
@@ -1 +0,0 @@
-kdb
diff --git a/TODO.org b/TODO.org
index 2d7070f..036edd1 100644
--- a/TODO.org
+++ b/TODO.org
@@ -5,6 +5,8 @@
 
 # .kdb files should be debrujin graph databases
 # The final prototype would be .bgzf format from biopython
+
+* 12/16/24 | Minimizers (lexicographic with window size)
 * 10/5/24 | released from hospital (2 days) and working on refactoring kmer module
 * 9/21/24 | somewhat changing from gpt4o-mini to claude sonnet
 ** using sonnet to create llc documents
diff --git a/kmerdb/__init__.py b/kmerdb/__init__.py
index 109ce02..193f7d7 100644
--- a/kmerdb/__init__.py
+++ b/kmerdb/__init__.py
@@ -204,9 +204,71 @@ def expanded_help(arguments):
         kmerdb_appmap.print_matrix_header()
     elif arguments.method == "distance":
         kmerdb_appmap.print_distance_header()
+    elif arguments.method == "minimizers":
+        kmerdb_appmap.print_minimizers_header()
 
+        
     sys.stderr.write("\n\nUse --help for expanded usage\n")
 
+def get_minimizers(arguments):
+
+    from Bio import SeqIO
+    import numpy as np
+    from kmerdb import fileutil, config, util, minimizer
+    import json
+    metadata = None
+    N = None
+    kmer_ids = None
+    fasta_seqs = []
+    mins = None
+    
+    assert type(arguments.kdb) is str, "kdb_in must be a str"
+    kdb_sfx = os.path.splitext(arguments.kdb)[-1]
+    fa_sfx = os.path.splitext(arguments.fasta)[-1]
+    
+    if kdb_sfx != ".kdb" and kdb_sfx != ".kdbg": # A filepath with invalid suffix
+        raise IOError("Input .kdb filepath '{0}' does not end in '.kdb'".format(arguments.kdb))
+    elif not os.path.exists(arguments.kdb):
+        raise IOError("Input .kdb filepath '{0}' does not exist on the filesystem".format(arguments.kdb))
+    elif fa_sfx != ".fa" and fa_sfx != ".fna" and fa_sfx != ".fasta":
+        raise IOError("Input .fasta filepath '{0}' does not exist on the filesystem".format(arguments.fasta))
+    
+    
+    if kdb_sfx == ".kdb":
+
+        with open(arguments.fasta, mode="r") as ifile:
+            for record in SeqIO.parse(ifile, "fasta"):
+                fasta_seqs.append(str(record.seq))
+
+                      
+        with fileutil.open(arguments.kdb, mode='r', slurp=True) as kdb_in:
+            metadata = kdb_in.metadata
+
+            
+            kmer_ids_dtype = metadata["kmer_ids_dtype"]
+            N = 4**metadata["k"]
+            if metadata["version"] != config.VERSION:
+                logger.log_it("KDB version is out of date, may be incompatible with current KDBReader class", "WARNING")
+            kmer_ids = kdb_in.kmer_ids
+
+            for seq in fasta_seqs:
+
+                minimizers = minimizer.select_lexicographical_minimizers(seq, metadata['k'], arguments.window_size, kmer_ids)
+
+                mins_as_list = list(map(lambda s: int(s), minimizers))
+                if mins is None:
+                    mins=mins_as_list
+                else:
+                    for kmer_is_selected, i in enumerate(mins_as_list):
+                        if mins_as_list[i] == 1:
+                            pass
+                        elif kmer_is_selected == 1:
+                            mins_as_list[i] = 1
+                # print minimizers in columnar format in new index file format (example.8.kdbi.1)
+                print(mins)
+                sys.exit(1)
+
+    
 def distances(arguments):
     """
     An end-user function to provide CLI access to certain distances
@@ -1084,7 +1146,7 @@ def hierarchical(arguments):
     
 #     from ecopy.diversity import rarefy
 #     import ecopy as ep
-#     from kdb import config
+#     from kmerdb import config
 
 
 #     if isinstance(arguments.input, argparse.FileType) or isinstance(arguments.input, TextIOWrapper):
@@ -1952,12 +2014,13 @@ def _profile(arguments):
     # Complete collating of counts across files
     # This technically uses 1 more arrray than necessary 'final_counts' but its okay
     logger.log_it("Summing counts from individual fasta/fastq files into a composite profile...", "INFO")
-
+    
 
     for d in data:
         counts = counts + d[0]
+        file_metadata.append(d[1])
 
-
+        
     if np.sum(counts) == 0:
         raise ValueError("Each element of the array of k-mer counts was 0. Sum of counts == 0. Likely an internal error. Exiting.")
 
@@ -2061,6 +2124,8 @@ def _profile(arguments):
     
     logger.log_it("Formatting master metadata dictionary...", "INFO")
     
+
+
     
     metadata=OrderedDict({
         "version": VERSION,
@@ -2353,7 +2418,20 @@ def cli():
     # assembly_parser.add_argument("kdbg", type=str, help=".kdbg file")
     # assembly_parser.set_defaults(func=assembly)
 
+    minimizer_parser = subparsers.add_parser("minimizers", help="Calculate an binary array of minimizers to associate with a .kdb count vector")
+    minimizer_parser.add_argument("kdb", type=str, help="A k-mer database file (.kdb)")
+    minimizer_parser.add_argument("fasta", type=str, help="A fasta file (.fa)")
+
+    minimizer_parser.add_argument("-w", "--window-size",  type=int, help="Choice of window size (in base-pairs) to select minimizers with", required=True)
+
+    minimizer_parser.add_argument("-v", "--verbose", help="Prints warnings to the console by default", default=0, action="count")
+    minimizer_parser.add_argument("--debug", action="store_true", default=False, help="Debug mode. Do not format errors and condense log")
+    minimizer_parser.add_argument("-nl", "--num-log-lines", type=int, choices=config.default_logline_choices, default=50, help=argparse.SUPPRESS)
+    minimizer_parser.add_argument("-l", "--log-file", type=str, default="kmerdb.log", help=argparse.SUPPRESS)
+    minimizer_parser.set_defaults(func=get_minimizers)
 
+
+    
     usage_parser = subparsers.add_parser("usage", help="provide expanded usage information on parameters and functions provided")
     usage_parser.add_argument("method", type=str, choices=("usage", "help", "profile", "graph", "index", "shuf", "matrix", "distance"), help="Print expanded usage statement")
     usage_parser.add_argument("-v", "--verbose", help="Prints warnings to the console by default", default=0, action="count")
@@ -2527,6 +2605,8 @@ def cli():
     version_parser.add_argument("-nl", "--num-log-lines", type=int, choices=config.default_logline_choices, default=50, help=argparse.SUPPRESS)
     version_parser.set_defaults(func=version)
 
+
+
     
     # markov_probability_parser = subparsers.add_parser("probability", help=u"""
 # Calculate the log-odds ratio of the Markov probability of a given sequence from the product (pi) of the transition probabilities(aij) times the frequency of the first k-mer (P(X1)), given the entire k-mer profile of a species.
diff --git a/kmerdb/appmap.py b/kmerdb/appmap.py
index a9b6610..9954eef 100644
--- a/kmerdb/appmap.py
+++ b/kmerdb/appmap.py
@@ -234,23 +234,23 @@
 
 
 
-command_1_name, command_2_name, command_3_name, command_4_name, command_5_name, command_6_name, command_7_name, command_8_name, command_9_name, command_10_name, command_11_name, command_12_name, command_13_name, command_14_name = config.subcommands
+command_1_name, command_2_name, command_11_name, command_3_name, command_4_name, command_5_name, command_6_name, command_7_name, command_8_name, command_9_name, command_10_name, command_12_name, command_13_name, command_14_name = config.subcommands
 
 COMMANDS = [
     command_1_name, #"profile",
-    command_2_name, #"make_graph",
-    command_3_name, #"get_matrix",
+    command_2_name, #"graph",
+    command_11_name, #"get_minimizers",
+    command_3_name, #"matrix",
     command_4_name, #"distance",
-    command_5_name, #"view",
-    command_6_name, #"header",
-    command_7_name, #"kmeans",
-    command_8_name, #"hierarchical",
-    command_9_name, #"index",
+    command_5_name, #"kmeans",
+    command_6_name, #"hierarchical",
+    command_7_name, #"view",
+    command_8_name, #"header",
+    command_9_name, #"index", #
     command_10_name, #"shuf",
-    command_11_name, #"usage",
-    command_12_name, #"help"
-    command_13_name, #"version"
-    command_14_name  #"composition"
+    command_12_name, #"usage",
+    command_13_name, #"help",
+    command_14_name, #"version"]
 ]
 
 
@@ -1942,11 +1942,154 @@
 
 
 
+command_11_description = "Calculate minimizers"
+command_11_description_long = """
+    Calculate minimizers, outputs a binary array to associate with a index array.
+    
 
 
+"""
+command_11_parameters = "Parameter of interest is window size"
+command_11_inputs = "Input is a v{0} .kdb count vector file".format(config.VERSION)
+command_11_usage = "kmerdb minimizers -vv --debug input1.12.kdb > input1.12.minimizers.kdbi"
 
-command_11_description = "K-mer Markov sequence probability feature  (Deprecated)"
-command_11_description_long = """
+
+        
+COMMAND_11_BANNER = """
+
+
+
+
+
+
+
+
+
+
+
+
+[--------------------------------------------------------------------------------------]
+
+
+
+
+
+
+
+
+                        [  n a m e    ]         :  -   {0}
+
+                   description : {1}
+
+{2}
+
+
+
+
+--------------------------
+
+                    kmerdb minimizers input1.12.kdb input1.12.minimizers.kdbi
+
+                    [-]    inputs : 
+
+                           {3}
+
+                    [-]    parameters : 
+
+                           {4}
+
+
+
+                    [-]    [ usage ]  :  {5}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[--------------------------------------------------------------------------------------]
+""".format(command_11_name, command_11_description, command_11_description_long, command_11_inputs, command_11_parameters, command_11_usage)
+
+        
+
+COMMAND_11_PARAMS = OrderedDict({
+    "name": "arguments",
+    "type": "array",
+    "items": [
+        {
+            "name": "K-mer database file.",
+            "type": "file",
+            "value": "The k-mer count proifle to calculate minimizers."
+        }
+    ]
+})
+
+        
+COMMAND_11_INPUTS = OrderedDict({
+    "name": "inputs",
+    "type": "array",
+    "items": [
+        {
+            "name": "Input count matrix (.tsv)",
+            "type": "file",
+            "value": "File to decompose the k-mer profile into its parts."
+        }
+        
+    ]
+})
+
+COMMAND_11_FEATURES = OrderedDict({
+    "name": "features",
+    "type": "array",
+    "items": [
+        OrderedDict({
+            "name": "linear regression for ",
+            "shortname": "",
+            "description" : ""
+        }),
+        OrderedDict({
+            "name": "",
+            "shortname": "",
+            "description": "(Deprecated)"
+        })
+    ]
+})
+    
+COMMAND_11_STEPS = OrderedDict({
+    "name": "steps",
+    "type": "array",
+    "items": [
+        OrderedDict({
+            "name": "",
+            "shortname": "",
+            "description": "(uhhhh...)",
+        }),
+        OrderedDict({
+            "name": "",
+            "shortname": "Shuffle k-mer counts",
+            "description": "(Deprecated)"
+        })
+
+    ]
+
+})
+
+
+
+command_12_description = "K-mer Markov sequence probability feature  (Deprecated)"
+command_12_description_long = """
  Uses conditional probabilities and multiplication rule along with Markov model of sequence to use likelihood/odds ratios to test likelihood of the sequence(s) given the inputs. Unadjusted for multiple-hypothesis testing.
 
 Conditional probability :
@@ -1962,13 +2105,13 @@
 
 
 """
-command_11_parameters = "inputs may be one or more fasta files, and the .kdb files needed for the model's output probability."
-command_11_inputs = "Input is a v{0} .kdb count vector file".format(config.VERSION)
-command_11_usage = "kmerdb prob --db <kmer_count_vector_1.kdb> [--db kmer_count_vector_2.kdb] <query_sequences_1.fasta.gz> [query_sequences_2.fasta.gz]"
+command_12_parameters = "inputs may be one or more fasta files, and the .kdb files needed for the model's output probability."
+command_12_inputs = "Input is a v{0} .kdb count vector file".format(config.VERSION)
+command_12_usage = "kmerdb prob --db <kmer_count_vector_1.kdb> [--db kmer_count_vector_2.kdb] <query_sequences_1.fasta.gz> [query_sequences_2.fasta.gz]"
 
 
         
-COMMAND_11_BANNER = """
+COMMAND_12_BANNER = """
 
 
 
@@ -2033,11 +2176,11 @@
 
 
 [--------------------------------------------------------------------------------------]
-""".format(command_11_name, command_11_description, command_11_description_long, command_11_inputs, command_11_parameters, command_11_usage)
+""".format(command_12_name, command_12_description, command_12_description_long, command_12_inputs, command_12_parameters, command_12_usage)
 
         
 
-COMMAND_11_PARAMS = OrderedDict({
+COMMAND_12_PARAMS = OrderedDict({
     "name": "arguments",
     "type": "array",
     "items": [
@@ -2050,7 +2193,7 @@
 })
 
         
-COMMAND_11_INPUTS = OrderedDict({
+COMMAND_12_INPUTS = OrderedDict({
     "name": "inputs",
     "type": "array",
     "items": [
@@ -2063,7 +2206,7 @@
     ]
 })
 
-COMMAND_11_FEATURES = OrderedDict({
+COMMAND_12_FEATURES = OrderedDict({
     "name": "features",
     "type": "array",
     "items": [
@@ -2080,7 +2223,7 @@
     ]
 })
     
-COMMAND_11_STEPS = OrderedDict({
+COMMAND_12_STEPS = OrderedDict({
     "name": "steps",
     "type": "array",
     "items": [
@@ -2290,6 +2433,7 @@
     "header": COMMAND_6_PARAMS["items"],
     "index": COMMAND_9_PARAMS["items"],
     "shuf": COMMAND_10_PARAMS["items"],
+    "minimizers": COMMAND_11_PARAMS["items"],
     "composition": COMMAND_14_PARAMS["items"],
 }
 
@@ -2304,6 +2448,8 @@
     "header": COMMAND_6_INPUTS["items"],
     "index": COMMAND_9_INPUTS["items"],
     "shuf": COMMAND_10_INPUTS["items"],
+    
+    "minimizers": COMMAND_11_INPUTS["items"],
     "composition": COMMAND_14_INPUTS["items"],
 }
 
@@ -2318,6 +2464,7 @@
     "header": COMMAND_6_FEATURES["items"],
     "index": COMMAND_9_FEATURES["items"],
     "shuf": COMMAND_10_FEATURES["items"],
+    "minimizers": COMMAND_11_FEATURES["items"],
     "composition": COMMAND_14_FEATURES["items"],
 
 }
@@ -2335,6 +2482,7 @@
     "header": COMMAND_6_STEPS["items"],
     "index": COMMAND_9_STEPS["items"],
     "shuf": COMMAND_10_STEPS["items"],
+    "minimizers": COMMAND_11_FEATURES["items"],
     "composition": COMMAND_14_STEPS["items"],
 }
 
diff --git a/kmerdb/config.py b/kmerdb/config.py
index c586ada..0987c4f 100644
--- a/kmerdb/config.py
+++ b/kmerdb/config.py
@@ -17,7 +17,7 @@
 
 
 
-VERSION="0.8.10"
+VERSION="0.8.11"
 REQUIRES_PYTHON="3.7.4"
 header_delimiter = "\n" + ("="*24) + "\n"
 
@@ -29,8 +29,8 @@
 # 4/9/24 there are some duplicates sure, but the requirements evolve faster than the dev do, are more essential for function, and I dont change the -dev file much. 
 
 # 12 subcommands, and associated function names in __init__.py
-subcommands = ["profile", "graph", "matrix", "distance", "kmeans", "hierarchical", "view", "header", "index", "shuf", "usage", "help", "version", "composition"]
-subcommand_functions = ["profile", "make_graph", "get_matrix", "distances", "kmeans", "hierarchical", "view", "header", "index_file", "shuf", "expanded_help", "expanded_help", "version", "composition"]
+subcommands = ["profile", "graph", "get_minimizers", "matrix", "distance", "kmeans", "hierarchical", "view", "header", "index", "shuf", "usage", "help", "version"]
+subcommand_functions = ["profile", "make_graph", "get_minimizers", "get_matrix", "distances", "kmeans", "hierarchical", "view", "header", "index_file", "shuf", "expanded_help", "expanded_help", "version"]
 
 default_logline_choices = (20, 50, 100, 200)
 KDB_COLUMN_NUMBER = 4
diff --git a/kmerdb/minimizer.py b/kmerdb/minimizer.py
new file mode 100644
index 0000000..b6ce9a3
--- /dev/null
+++ b/kmerdb/minimizer.py
@@ -0,0 +1,59 @@
+'''
+   Copyright 2020 Matthew Ralston
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+'''
+import sys
+import os
+
+#import yaml
+#from collections import OrderedDict
+
+import numpy as np
+
+from kmerdb import kmer, config, util
+
+def select_lexicographical_minimizers(seq, k, window_size, kmer_ids):
+    """
+    Select k-mers based on lexicographical minimizers and return a binary array indicating selected k-mers.
+    
+    :param seq: Input DNA sequence
+    :type seq: str
+    :param k: Choice of k
+    :type k: int
+    :param window_size: Size of the sliding window
+    :type window_size: int
+    :param kmer_ids: Array of kmer IDs (1 to 4^k)
+    :type kmer_ids: list
+
+    :returns: Binary array where 1 indicates selected k-mer, 0 otherwise
+    :rtype: np.array
+    """
+
+    N = len(kmer_ids)
+    
+    if len(seq) < k:
+        raise ValueError("Sequence length was less than k")
+
+    minimizers = np.zeros(N, dtype="int16")
+
+    for i in range(N - k + 1):
+        if i % window_size == 0:
+            subseq = seq[i:i+k]
+            kmer_id = kmer.kmer_to_id(subseq)
+            minimizers[kmer_id] = 1
+
+
+    return minimizers
+
diff --git a/kmerdb/parse.py b/kmerdb/parse.py
index 7fe642b..e5f4d7b 100644
--- a/kmerdb/parse.py
+++ b/kmerdb/parse.py
@@ -168,8 +168,15 @@ def parsefile(filepath:str, k:int, logger=None, quiet=True): #rows_per_batch:int
                     
         seqprsr.max_read_length = max(read_lengths)
         seqprsr.min_read_length = min(read_lengths)
-        seqprsr.avg_read_length = np.mean(np.array(read_lengths, dtype="uint32"))
-    
+
+        
+        seqprsr.avg_read_length = int(np.mean(np.array(read_lengths, dtype="uint32")))
+
+
+
+
+
+        
         sus = set()
 
         
@@ -252,7 +259,7 @@ def parsefile(filepath:str, k:int, logger=None, quiet=True): #rows_per_batch:int
     header = seqprsr.header_dict()
     header["min_read_length"] = min_read_length
     header["max_read_length"] = max_read_length
-    header["avg_read_length"] = np.mean(read_lengths)
+    header["avg_read_length"] = int(np.mean(read_lengths))
 
     seqprsr.nullomer_array = np.array(all_theoretical_kmer_ids, dtype="uint64")[is_nullomer]
 
diff --git a/pyproject.toml b/pyproject.toml
index e0202d0..b63e482 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "kmerdb"
-version = "0.8.10"
+version = "0.8.11"
 description = "Yet another correction to the 'yet another correction to just a k-mer counter...'"
 readme = "README.md"
 authors = [{name="Matt Ralston <mralston.development@gmail.com>", email="mralston.development@gmail.com"}]
diff --git a/setup.py b/setup.py
index dc1fdc0..c5bcb43 100644
--- a/setup.py
+++ b/setup.py
@@ -110,7 +110,7 @@ def can_import(module_name):
 long_description = 'See README.md for details'
 #REQUIRES_PYTHON = ">=3.7.4"
 REQUIRES_PYTHON = '>=3.12.2'
-VERSION = "0.8.10"
+VERSION = "0.8.11"
 URL = 'https://github.com/MatthewRalston/kmerdb'
 CURRENT_RELEASE = "https://github.com/MatthewRalston/kmerdb/archive/v{0}.tar.gz".format(VERSION)
 EMAIL = 'mralston.development@gmail.com'