Skip to content

Commit

Permalink
Merge pull request #18 from ASLeonard/master
Browse files Browse the repository at this point in the history
Updated pipeline to allow writing locus info and kmer names in *kmer output. Credit @ASLeonard
  • Loading branch information
joyeuxnoel8 authored Mar 4, 2023
2 parents 604b945 + 0154cf3 commit d286cd6
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 20 deletions.
4 changes: 2 additions & 2 deletions pipeline/GoodPanGenomeGraph.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ ulimit -c 20000
cd {params.od}
module load gcc
{params.sd}/bin/vntr2kmers_thread -g -m <(cut -f $(({params.hi}+1)),$(({params.hi}+2)) {input.mapping}) -k {params.ksize} -fs {params.FS} -ntr {params.FS} -o {wildcards.genome}.rawPB -fa 2 {input.TRfa}
{params.sd}/bin/vntr2kmers_thread -g -m <(cut -f $(({params.hi}+1)),$(({params.hi}+2)) {input.mapping}) -k {params.ksize} -fs {params.FS} -ntr {params.FS} -on {wildcards.genome}.rawPB -fa 2 {input.TRfa}
if [ {params.prune} == "1" ]; then
samtools fasta -@2 -n {input.ILbam} |
Expand Down Expand Up @@ -367,7 +367,7 @@ cd {params.od}
ulimit -c 20000
awk '$1 ~ />/ || $2 == 0' {input.rawILkmers} |
{params.sd}/bin/vntr2kmers_thread -g -p /dev/stdin -m <(cut -f $(({params.hi}+1)),$(({params.hi}+2)) {input.mapping}) -k {params.ksize} -fs {params.FS} -ntr {params.FS} -o {wildcards.genome}.PB -fa 2 {input.TRfa}
{params.sd}/bin/vntr2kmers_thread -g -p /dev/stdin -m <(cut -f $(({params.hi}+1)),$(({params.hi}+2)) {input.mapping}) -k {params.ksize} -fs {params.FS} -ntr {params.FS} -on {wildcards.genome}.PB -fa 2 {input.TRfa}
"""

def getRPGGin():
Expand Down
37 changes: 19 additions & 18 deletions src/VNTR2kmers_thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,29 +43,28 @@ int main(int argc, const char * argv[]) {

if (argc < 2){
cerr << endl
<< "usage: vntr2kmers_thread [-name] [-th] [-g] [-p] [-m] -k -fs -ntr -o -fa \n"
<< " -name Keep kmer names in output. Default: False.\n"
<< "usage: vntr2kmers_thread [-th] [-g] [-p] [-m] -k -fs -ntr [-o|-on] -fa \n"
<< " -th <INT> Filter out kmers w/ count below this threshold. Default: 0, i.e. no filtering\n"
<< " -g output *graph.kmers for threading-based kmer query.\n"
<< " -p <FILE> Prune tr/graph kmers with the given kmers file.\n"
<< " -m <FILE> Use orthology map to merge haps.\n"
<< " -m <FILE> Use orthology map to merge haps.\n"
<< " -k <INT> Kmer size\n"
<< " -fs <INT> Length of flanking sequence in *.tr.fasta.\n"
<< " -ntr <INT> Length of desired NTR in *kmers.\n"
<< " -o <STR> Output file prefix.\n"
<< " -o <STR> Output file prefix" << endl
<< " -on <STR> Same as the -o option, but write locus and kmer name as well" << endl
<< " -fa <n> <list> Use specified *.fasta in the [list] instead of hapDB.\n"
<< " Count the first [n] files and build kmers for the rest\n\n";
return 0;
}
vector<string> args(argv, argv+argc);
bool keepName = false, genGraph = false, prune = false, usemap = false;
bool genGraph = false, prune = false, usemap = false, writeKmerName = false;
size_t argi = 1, threshold = 0, nhap = 0, NTRsize, fs, nfile2count, nloci;
string pruneFname, outPref, mapf;
vector<string> infnames;

while (argi < argc) {
if (args[argi] == "-name") { keepName = true; }
else if (args[argi] == "-th") { threshold = stoi(args[++argi]); }
if (args[argi] == "-th") { threshold = stoi(args[++argi]); }
else if (args[argi] == "-g") { genGraph = true; }
else if (args[argi] == "-p") {
prune = true;
Expand All @@ -84,7 +83,8 @@ int main(int argc, const char * argv[]) {
NTRsize = stoi(args[++argi]);
assert(fs >= NTRsize);
}
else if (args[argi] == "-o") {
else if (args[argi] == "-o" or args[argi] == "-on") {
writeKmerName = args[argi] == "-on";
outPref = args[++argi];
ofstream outf(outPref+".tr.kmers");
assert(outf);
Expand Down Expand Up @@ -193,16 +193,17 @@ int main(int argc, const char * argv[]) {
// write kmers files for all kmer databases
// -----
cerr << "writing outputs" << endl;
if (keepName) {
writeKmersWithName(outPref + ".tr", TRkmersDB, threshold);
writeKmersWithName(outPref + ".ntr", NTRkmersDB, threshold);
} else {
writeKmers(outPref + ".tr", TRkmersDB, threshold);
writeKmers(outPref + ".ntr", NTRkmersDB, threshold);
}

if (genGraph) {
writeKmersWithName(outPref + ".graph", graphDB);
if (writeKmerName) {
writeKmersWithName(outPref + ".tr", TRkmersDB, threshold);
writeKmersWithName(outPref + ".ntr", NTRkmersDB, threshold);
if (genGraph)
writeKmersWithName(outPref + ".graph", graphDB);
}
else {
writeKmers(outPref + ".tr", TRkmersDB, threshold);
writeKmers(outPref + ".ntr", NTRkmersDB, threshold);
if (genGraph)
writeKmers(outPref + ".graph", graphDB);
}

return 0;
Expand Down

0 comments on commit d286cd6

Please sign in to comment.