Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit.ci] pre-commit autoupdate #3290

Merged
merged 3 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ repos:
- id: check-toml
- id: debug-statements
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.6
rev: v0.6.1
hooks:
- id: ruff-format
- id: ruff
Expand Down
81 changes: 42 additions & 39 deletions doc/kmers-and-minhash.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@
"def jaccard_similarity(a, b):\n",
" a = set(a)\n",
" b = set(b)\n",
" \n",
"\n",
" intersection = len(a.intersection(b))\n",
" union = len(a.union(b))\n",
" \n",
"\n",
" return intersection / union"
]
},
Expand All @@ -65,9 +65,9 @@
"def jaccard_containment(a, b):\n",
" a = set(a)\n",
" b = set(b)\n",
" \n",
"\n",
" intersection = len(a.intersection(b))\n",
" \n",
"\n",
" return intersection / len(a)"
]
},
Expand All @@ -84,9 +84,9 @@
"metadata": {},
"outputs": [],
"source": [
"a = ['ATGG', 'AACC']\n",
"b = ['ATGG', 'CACA']\n",
"c = ['ATGC', 'CACA']"
"a = [\"ATGG\", \"AACC\"]\n",
"b = [\"ATGG\", \"CACA\"]\n",
"c = [\"ATGC\", \"CACA\"]"
]
},
{
Expand Down Expand Up @@ -270,11 +270,11 @@
"def build_kmers(sequence, ksize):\n",
" kmers = []\n",
" n_kmers = len(sequence) - ksize + 1\n",
" \n",
"\n",
" for i in range(n_kmers):\n",
" kmer = sequence[i:i + ksize]\n",
" kmer = sequence[i : i + ksize]\n",
" kmers.append(kmer)\n",
" \n",
"\n",
" return kmers"
]
},
Expand Down Expand Up @@ -307,7 +307,7 @@
}
],
"source": [
"build_kmers('ATGGACCAGATATAGGGAGAGCCAGGTAGGACA', 21)"
"build_kmers(\"ATGGACCAGATATAGGGAGAGCCAGGTAGGACA\", 21)"
]
},
{
Expand All @@ -325,8 +325,8 @@
"metadata": {},
"outputs": [],
"source": [
"seq1 = 'ATGGACCAGATATAGGGAGAGCCAGGTAGGACA'\n",
"seq2 = 'ATGGACCAGATATTGGGAGAGCCGGGTAGGACA'\n",
"seq1 = \"ATGGACCAGATATAGGGAGAGCCAGGTAGGACA\"\n",
"seq2 = \"ATGGACCAGATATTGGGAGAGCCGGGTAGGACA\"\n",
"# differences: ^ ^"
]
},
Expand Down Expand Up @@ -375,13 +375,14 @@
"metadata": {},
"outputs": [],
"source": [
"import screed # a library for reading in FASTA/FASTQ\n",
"import screed # a library for reading in FASTA/FASTQ\n",
"\n",
"\n",
"def read_kmers_from_file(filename, ksize):\n",
" all_kmers = []\n",
" for record in screed.open(filename):\n",
" sequence = record.sequence\n",
" \n",
"\n",
" kmers = build_kmers(sequence, ksize)\n",
" all_kmers += kmers\n",
"\n",
Expand All @@ -394,7 +395,7 @@
"metadata": {},
"outputs": [],
"source": [
"akker_kmers = read_kmers_from_file('genomes/akkermansia.fa', 31)"
"akker_kmers = read_kmers_from_file(\"genomes/akkermansia.fa\", 31)"
]
},
{
Expand Down Expand Up @@ -444,8 +445,8 @@
"metadata": {},
"outputs": [],
"source": [
"shew1_kmers = read_kmers_from_file('genomes/shew_os185.fa', 31)\n",
"shew2_kmers = read_kmers_from_file('genomes/shew_os223.fa', 31)"
"shew1_kmers = read_kmers_from_file(\"genomes/shew_os185.fa\", 31)\n",
"shew2_kmers = read_kmers_from_file(\"genomes/shew_os223.fa\", 31)"
]
},
{
Expand All @@ -471,9 +472,9 @@
}
],
"source": [
"print('akker vs shew1', jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print('akker vs shew2', jaccard_similarity(akker_kmers, shew2_kmers))\n",
"print('shew1 vs shew2', jaccard_similarity(shew1_kmers, shew2_kmers))"
"print(\"akker vs shew1\", jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print(\"akker vs shew2\", jaccard_similarity(akker_kmers, shew2_kmers))\n",
"print(\"shew1 vs shew2\", jaccard_similarity(shew1_kmers, shew2_kmers))"
]
},
{
Expand All @@ -492,9 +493,9 @@
}
],
"source": [
"print('akker vs shew1', jaccard_containment(akker_kmers, shew1_kmers))\n",
"print('akker vs shew2', jaccard_containment(akker_kmers, shew2_kmers))\n",
"print('shew1 vs shew2', jaccard_containment(shew1_kmers, shew2_kmers))"
"print(\"akker vs shew1\", jaccard_containment(akker_kmers, shew1_kmers))\n",
"print(\"akker vs shew2\", jaccard_containment(akker_kmers, shew2_kmers))\n",
"print(\"shew1 vs shew2\", jaccard_containment(shew1_kmers, shew2_kmers))"
]
},
{
Expand Down Expand Up @@ -568,20 +569,22 @@
"source": [
"import mmh3\n",
"\n",
"\n",
"def hash_kmer(kmer):\n",
" # calculate the reverse complement\n",
" rc_kmer = screed.rc(kmer)\n",
" \n",
"\n",
" # determine whether original k-mer or reverse complement is lesser\n",
" if kmer < rc_kmer:\n",
" canonical_kmer = kmer\n",
" else:\n",
" canonical_kmer = rc_kmer\n",
" \n",
"\n",
" # calculate murmurhash using a hash seed of 42\n",
" hash = mmh3.hash64(canonical_kmer, 42)[0]\n",
" if hash < 0: hash += 2**64\n",
" \n",
" if hash < 0:\n",
" hash += 2**64\n",
"\n",
" # done\n",
" return hash"
]
Expand Down Expand Up @@ -610,7 +613,7 @@
}
],
"source": [
"hash_kmer('ATGGC')"
"hash_kmer(\"ATGGC\")"
]
},
{
Expand All @@ -637,7 +640,7 @@
}
],
"source": [
"hash_kmer('ATGGC')"
"hash_kmer(\"ATGGC\")"
]
},
{
Expand All @@ -664,7 +667,7 @@
}
],
"source": [
"hash_kmer('GCCAT')"
"hash_kmer(\"GCCAT\")"
]
},
{
Expand All @@ -691,7 +694,7 @@
}
],
"source": [
"hash_kmer('GCCAA')"
"hash_kmer(\"GCCAA\")"
]
},
{
Expand Down Expand Up @@ -836,7 +839,7 @@
" if hash_kmer(kmer) < keep_below:\n",
" keep.append(kmer)\n",
" # otherwise, discard\n",
" \n",
"\n",
" return keep"
]
},
Expand Down Expand Up @@ -901,8 +904,8 @@
}
],
"source": [
"print('akker vs akker, total', jaccard_similarity(akker_kmers, akker_kmers))\n",
"print('akker vs akker, sub', jaccard_similarity(akker_sub, akker_sub))"
"print(\"akker vs akker, total\", jaccard_similarity(akker_kmers, akker_kmers))\n",
"print(\"akker vs akker, sub\", jaccard_similarity(akker_sub, akker_sub))"
]
},
{
Expand All @@ -920,8 +923,8 @@
}
],
"source": [
"print('akker vs shew1, total', jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print('akker vs shew1, sub', jaccard_similarity(akker_sub, shew1_sub))"
"print(\"akker vs shew1, total\", jaccard_similarity(akker_kmers, shew1_kmers))\n",
"print(\"akker vs shew1, sub\", jaccard_similarity(akker_sub, shew1_sub))"
]
},
{
Expand All @@ -939,8 +942,8 @@
}
],
"source": [
"print('shew1 vs shew2, total', jaccard_similarity(shew1_kmers, shew2_kmers))\n",
"print('shew1 vs shew2, sub', jaccard_similarity(shew1_sub, shew2_sub))"
"print(\"shew1 vs shew2, total\", jaccard_similarity(shew1_kmers, shew2_kmers))\n",
"print(\"shew1 vs shew2, sub\", jaccard_similarity(shew1_sub, shew2_sub))"
]
},
{
Expand Down
46 changes: 29 additions & 17 deletions doc/plotting-compare.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
"metadata": {},
"outputs": [],
"source": [
"matrix, labels = fig.load_matrix_and_labels('compare-demo')"
"matrix, labels = fig.load_matrix_and_labels(\"compare-demo\")"
]
},
{
Expand Down Expand Up @@ -139,8 +139,8 @@
}
],
"source": [
"print('matrix:\\n', matrix)\n",
"print('labels:', labels)"
"print(\"matrix:\\n\", matrix)\n",
"print(\"labels:\", labels)"
]
},
{
Expand Down Expand Up @@ -192,8 +192,8 @@
}
],
"source": [
"print('reordered matrix:\\n', reordered_matrix)\n",
"print('reordered labels:', reordered_labels)"
"print(\"reordered matrix:\\n\", reordered_matrix)\n",
"print(\"reordered labels:\", reordered_labels)"
]
},
{
Expand All @@ -218,8 +218,10 @@
"source": [
"import scipy.cluster.hierarchy as sch\n",
"\n",
"def plot_composite_matrix(D, labeltext, show_labels=True,\n",
" vmax=1.0, vmin=0.0, force=False):\n",
"\n",
"def plot_composite_matrix(\n",
" D, labeltext, show_labels=True, vmax=1.0, vmin=0.0, force=False\n",
"):\n",
" \"\"\"Build a composite plot showing dendrogram + distance matrix/heatmap.\n",
"\n",
" Returns a matplotlib figure.\n",
Expand All @@ -228,25 +230,34 @@
" shown on the plot.\n",
" \"\"\"\n",
" if D.max() > 1.0 or D.min() < 0.0:\n",
" error('This matrix doesn\\'t look like a distance matrix - min value {}, max value {}', D.min(), D.max())\n",
" error(\n",
" \"This matrix doesn't look like a distance matrix - min value {}, max value {}\",\n",
" D.min(),\n",
" D.max(),\n",
" )\n",
" if not force:\n",
" raise ValueError(\"not a distance matrix\")\n",
" else:\n",
" notify('force is set; scaling to [0, 1]')\n",
" notify(\"force is set; scaling to [0, 1]\")\n",
" D -= D.min()\n",
" D /= D.max()\n",
"\n",
" if show_labels:\n",
" show_indices = True\n",
" pass\n",
"\n",
" fig = pylab.figure(figsize=(11, 8))\n",
" ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])\n",
"\n",
" # plot dendrogram\n",
" Y = sch.linkage(D, method='single') # centroid\n",
" Y = sch.linkage(D, method=\"single\") # centroid\n",
"\n",
" Z1 = sch.dendrogram(Y, orientation='left', labels=labeltext,\n",
" no_labels=not show_labels, get_leaves=True)\n",
" Z1 = sch.dendrogram(\n",
" Y,\n",
" orientation=\"left\",\n",
" labels=labeltext,\n",
" no_labels=not show_labels,\n",
" get_leaves=True,\n",
" )\n",
" ax1.set_xticks([])\n",
"\n",
" xstart = 0.45\n",
Expand All @@ -256,8 +267,8 @@
" scale_xstart = xstart + width + 0.01\n",
"\n",
" # re-order labels along rows, top to bottom\n",
" idx1 = Z1['leaves']\n",
" reordered_labels = [ labeltext[i] for i in idx1 ]\n",
" idx1 = Z1[\"leaves\"]\n",
" reordered_labels = [labeltext[i] for i in idx1]\n",
"\n",
" # reorder D by the clustering in the dendrogram\n",
" D = D[idx1, :]\n",
Expand All @@ -266,8 +277,9 @@
" # show matrix\n",
" axmatrix = fig.add_axes([xstart, 0.1, width, 0.6])\n",
"\n",
" im = axmatrix.matshow(D, aspect='auto', origin='lower',\n",
" cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax)\n",
" im = axmatrix.matshow(\n",
" D, aspect=\"auto\", origin=\"lower\", cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax\n",
" )\n",
" axmatrix.set_xticks([])\n",
" axmatrix.set_yticks([])\n",
"\n",
Expand Down
6 changes: 4 additions & 2 deletions doc/sourmash-collections.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,8 @@
],
"source": [
"from IPython.display import Image\n",
"Image(filename='compare_all.mat.matrix.png') "
"\n",
"Image(filename=\"compare_all.mat.matrix.png\")"
]
},
{
Expand Down Expand Up @@ -857,7 +858,8 @@
],
"source": [
"import pandas\n",
"df = pandas.read_csv('podar-lineage.csv')\n",
"\n",
"df = pandas.read_csv(\"podar-lineage.csv\")\n",
"df"
]
},
Expand Down
Loading
Loading