Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MashMap v3.0.6 #58

Merged
merged 15 commits into from
Jul 10, 2023
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ endif ()
if (${CMAKE_BUILD_TYPE} MATCHES Debug)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O -g")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O -g")
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
else()
# Use all standard-compliant optimizations - always add these:
set (CMAKE_C_FLAGS "${OpenMP_C_FLAGS} ${PIC_FLAG} ${EXTRA_FLAGS}")
Expand Down
1 change: 1 addition & 0 deletions src/common/progress.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <cmath>
#include <iostream>
#include <string>
#include <atomic>
Expand Down
1 change: 1 addition & 0 deletions src/common/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <cstdint>
#include <cmath>
#include <string>

Expand Down
37 changes: 30 additions & 7 deletions src/map/include/base_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <tuple>
#include <vector>
#include <chrono>
#include "common/progress.hpp"

namespace skch
{
Expand Down Expand Up @@ -165,6 +166,7 @@ namespace skch

//--for split read mapping

long double kmerComplexity; // Estimated sequence complexity
int n_merged; // how many mappings we've merged into this one
offset_t splitMappingId; // To identify split mappings that are chained
uint8_t discard; // set to 1 for deletion
Expand Down Expand Up @@ -204,10 +206,11 @@ namespace skch
//Container to save copy of kseq object
struct InputSeqContainer
{
seqno_t seqCounter; //sequence counter
offset_t len; //sequence length
std::string seq; //sequence string
std::string seqName; //sequence id
seqno_t seqCounter; //sequence counter
offset_t len; //sequence length
std::string seq; //sequence string
std::string seqName; //sequence id


/*
* @brief constructor
Expand All @@ -216,12 +219,30 @@ namespace skch
* @param[in] len length of sequence
*/
InputSeqContainer(const std::string& s, const std::string& id, seqno_t seqcount)
: seq(s)
, seqName(id)
: seqCounter(seqcount)
, len(s.length())
, seqCounter(seqcount) { }
, seq(s)
, seqName(id) { }
};

struct InputSeqProgContainer : InputSeqContainer
{
using InputSeqContainer::InputSeqContainer;
progress_meter::ProgressMeter& progress; //progress meter (shared)


/*
* @brief constructor
* @param[in] kseq_seq complete read or reference sequence
* @param[in] kseq_id sequence id name
* @param[in] len length of sequence
*/
InputSeqProgContainer(const std::string& s, const std::string& id, seqno_t seqcount, progress_meter::ProgressMeter& pm)
: InputSeqContainer(s, id, seqcount)
, progress(pm) { }
};


//Output type of map function
struct MapModuleOutput
{
Expand All @@ -248,6 +269,8 @@ namespace skch
std::string seqName; //sequence name
MinmerVec minmerTableQuery; //Vector of minmers in the query
MinmerVec seedHits; //Vector of minmers in the reference
int refGroup; //Prefix group of sequence
float kmerComplexity; //Estimated sequence complexity
};
}

Expand Down
37 changes: 35 additions & 2 deletions src/map/include/commonFunc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,25 @@ namespace skch {
ankerl::unordered_dense::map<hash_t, MinmerInfo> sketched_vals;
std::vector<hash_t> sketched_heap;
sketched_heap.reserve(sketchSize+1);

// Get distance until last "N"
int ambig_kmer_count = 0;
for (int i = kmerSize - 1; i >= 0; i--)
{
if (seq[i] == 'N')
{
ambig_kmer_count = i+1;
break;
}
}

for(offset_t i = 0; i < len - kmerSize + 1; i++)
{

if (seq[i+kmerSize-1] == 'N')
{
ambig_kmer_count = kmerSize;
}
//Hash kmers
hash_t hashFwd = CommonFunc::getHash(seq + i, kmerSize);
hash_t hashBwd;
Expand All @@ -199,7 +215,7 @@ namespace skch {
hashBwd = std::numeric_limits<hash_t>::max(); //Pick a dummy high value so that it is ignored later

//Consider non-symmetric kmers only
if(hashBwd != hashFwd)
if(hashBwd != hashFwd && ambig_kmer_count == 0)
{
//Take minimum value of kmer and its reverse complement
hash_t currentKmer = std::min(hashFwd, hashBwd);
Expand Down Expand Up @@ -237,6 +253,10 @@ namespace skch {
}
}
}
if (ambig_kmer_count > 0)
{
ambig_kmer_count--;
}
}

minmerIndex.resize(sketched_heap.size());
Expand Down Expand Up @@ -293,6 +313,10 @@ namespace skch {

//if(alphabetSize == 4) //not protein
//CommonFunc::reverseComplement(seq, seqRev.get(), len);

// Get distance until last "N"
int ambig_kmer_count = 0;


for(offset_t i = 0; i < len - kmerSize + 1; i++)
{
Expand Down Expand Up @@ -369,8 +393,12 @@ namespace skch {
Q.pop_front();
}

if (seq[i+kmerSize-1] == 'N')
{
ambig_kmer_count = kmerSize;
}
//Consider non-symmetric kmers only
if(hashBwd != hashFwd)
if(hashBwd != hashFwd && ambig_kmer_count == 0)
{
// Add current hash to window
Q.push_back(std::make_tuple(currentKmer, currentStrand, i));
Expand Down Expand Up @@ -399,6 +427,11 @@ namespace skch {
std::push_heap(heapWindow.begin(), heapWindow.end(), KIHeap_cmp);
}
}
if (ambig_kmer_count > 0)
{
ambig_kmer_count--;
}




Expand Down
Loading