Skip to content

Commit

Permalink
[MISC] Make scoring configuration usable (fixes seqan/product_backlog…
Browse files Browse the repository at this point in the history
  • Loading branch information
smehringer committed Aug 12, 2020
1 parent 3c7a219 commit aff7e36
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 10 deletions.
56 changes: 47 additions & 9 deletions include/seqan3/alignment/multiple/detail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#include <seqan3/alignment/configuration/align_config_band.hpp>
#include <seqan3/alignment/configuration/align_config_gap.hpp>
#include <seqan3/alignment/configuration/align_config_scoring.hpp>
#include <seqan3/alignment/scoring/nucleotide_scoring_scheme.hpp>
#include <seqan3/alphabet/aminoacid/all.hpp>
#include <seqan3/alphabet/nucleotide/all.hpp>
#include <seqan3/core/algorithm/configuration.hpp>
#include <seqan3/core/type_traits/template_inspection.hpp>

#include <seqan/basic.h>
#include <seqan/reduced_aminoacid.h>
Expand Down Expand Up @@ -92,7 +94,37 @@ void validate_configuration(seqan3::configuration<configs_t...> const &)
}

template <typename alphabet_type, typename seqan3_configuration_t>
auto seqan2_msa_configuration(seqan3_configuration_t const & config)
requires seqan3_configuration_t::template exists<seqan3::align_cfg::scoring>()
auto initialise_scoring_scheme(seqan3_configuration_t const & config)
{
auto scoring_scheme = get<seqan3::align_cfg::scoring>(config).value;
using scoring_scheme_alphabet_type = typename decltype(scoring_scheme)::alphabet_type; // seqan3 alphabet
using score_matrix_alphabet_type = decltype(convert_alph_3_to_2(scoring_scheme_alphabet_type{})); // seqan2 alphabet
using score_matrix_type = seqan::Score<int, seqan::ScoreMatrix<score_matrix_alphabet_type>>;

seqan::MsaOptions<alphabet_type, score_matrix_type> msaOpt{};
score_matrix_type scMat;

for (size_t i = 0; i < seqan3::alphabet_size<scoring_scheme_alphabet_type>; ++i)
{
for (size_t j = 0; j < seqan3::alphabet_size<scoring_scheme_alphabet_type>; ++j)
{
auto seqan3_i = seqan3::assign_rank_to(i, scoring_scheme_alphabet_type{});
auto seqan3_j = seqan3::assign_rank_to(j, scoring_scheme_alphabet_type{});
score_matrix_alphabet_type seqan2_i{seqan3::to_char(seqan3_i)};
score_matrix_alphabet_type seqan2_j{seqan3::to_char(seqan3_j)};

setScore(scMat, seqan2_i, seqan2_j, scoring_scheme.score(seqan3_i, seqan3_j));
}
}

msaOpt.sc = scMat;
return msaOpt;
}

template <typename alphabet_type, typename seqan3_configuration_t>
requires !seqan3_configuration_t::template exists<seqan3::align_cfg::scoring>()
auto initialise_scoring_scheme(seqan3_configuration_t const &)
{
using score_type = std::conditional_t<std::same_as<alphabet_type, seqan::AminoAcid> ||
std::same_as<alphabet_type, seqan::ReducedAminoAcid<seqan::Murphy10>> ||
Expand All @@ -102,6 +134,20 @@ auto seqan2_msa_configuration(seqan3_configuration_t const & config)

seqan::MsaOptions<alphabet_type, score_type> msaOpt{};

if constexpr (std::is_same_v<score_type, seqan::Score<int>>)
{
msaOpt.sc.data_match = 5; // tcoffee app default
msaOpt.sc.data_mismatch = -4; // tcoffee app default
}

return msaOpt;
}

template <typename alphabet_type, typename seqan3_configuration_t>
auto seqan2_msa_configuration(seqan3_configuration_t const & config)
{
auto msaOpt = initialise_scoring_scheme<alphabet_type>(config);

seqan::appendValue(msaOpt.method, 0); // global alignment
seqan::appendValue(msaOpt.method, 1); // local alignment
msaOpt.build = 0; // neighbour joining to build the guide tree
Expand All @@ -117,20 +163,12 @@ auto seqan2_msa_configuration(seqan3_configuration_t const & config)
msaOpt.pairwiseAlignmentMethod = 1; // unbanded
}

score_type scMat;
msaOpt.sc = scMat;

// seqan2 tcoffee app default: gap -1, gap open -13
auto const & gaps = config.get_or(align_cfg::gap{gap_scheme{gap_score{-1}, gap_open_score{-13}}}).value;
// convert to seqan2 gap score convention.
// See: https://docs.seqan.de/seqan/3-master-user/classseqan3_1_1gap__scheme.html
msaOpt.sc.data_gap_open = gaps.get_gap_open_score() + gaps.get_gap_score();
msaOpt.sc.data_gap_extend = gaps.get_gap_score();
if constexpr (std::is_same_v<score_type, seqan::Score<int>>)
{
msaOpt.sc.data_match = 5; // tcoffee app default
msaOpt.sc.data_mismatch = -4; // tcoffee app default
}

return msaOpt;
}
Expand Down
32 changes: 31 additions & 1 deletion test/unit/alignment/multiple/align_multiple_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <vector>

#include <seqan3/alignment/configuration/align_config_gap.hpp>
#include <seqan3/alignment/configuration/align_config_scoring.hpp>
#include <seqan3/alignment/scoring/aminoacid_scoring_scheme.hpp>
#include <seqan3/alignment/multiple/align_multiple.hpp>
#include <seqan3/alignment/multiple/detail.hpp>
#include <seqan3/alphabet/aminoacid/aa27.hpp>
Expand All @@ -27,6 +29,31 @@ using seqan3::operator""_dna5;
using seqan3::operator""_rna5;
using seqan3::operator""_rna4;

TEST(detail_initialise_scoring_scheme, no_scoring_configuration)
{
// no scoring information
seqan3::configuration config = seqan3::align_cfg::gap{seqan3::gap_scheme{seqan3::gap_score{-2},
seqan3::gap_open_score{-8}}};
auto msaOpt = seqan3::detail::initialise_scoring_scheme<seqan::Dna>(config);

EXPECT_TRUE((std::same_as<decltype(msaOpt.sc), seqan::Score<int>>));
}

TEST(detail_initialise_scoring_scheme, blosum62)
{
// no scoring information
seqan3::aminoacid_scoring_scheme scheme{seqan3::aminoacid_similarity_matrix::BLOSUM62};
seqan3::configuration config = seqan3::align_cfg::scoring{scheme};
auto msaOpt = seqan3::detail::initialise_scoring_scheme<seqan::AminoAcid>(config);

// compare matrix size and abort test if not equal so the value comparison does not segfault
ASSERT_EQ(static_cast<size_t>(decltype(msaOpt.sc)::TAB_SIZE), static_cast<size_t>(seqan::Blosum62::TAB_SIZE));

seqan::Blosum62 expected_matrix{};
for (size_t i = 0; i < static_cast<size_t>(seqan::Blosum62::TAB_SIZE); ++i)
EXPECT_EQ(msaOpt.sc.data_tab[i], expected_matrix.data_tab[i]);
}

TEST(align_multiple_test, the_first_dna4_test)
{
std::vector<seqan3::dna4_vector> input{"AAAACCCGGG"_dna4, "AACCCGGG"_dna4, "AAAACGGG"_dna4};
Expand Down Expand Up @@ -87,7 +114,10 @@ TEST(align_multiple_test, the_first_aminoacid_test)
copy(std::string_view{"--DPNKPKRAPSAFFVFMGEFREEFKQKNPKNKSVAAVGKAAGERWKSLSESEKAPYVAKANKLKGEYNKAIAAYNKGESA"}
| seqan3::views::char_to<seqan3::gapped<seqan3::aa27>>, std::cpp20::back_inserter(output[3]));

auto result = seqan3::align_multiple(input);
seqan3::aminoacid_scoring_scheme scheme{seqan3::aminoacid_similarity_matrix::BLOSUM62};
seqan3::configuration config = seqan3::align_cfg::scoring{scheme};

auto result = seqan3::align_multiple(input, config);

EXPECT_RANGE_EQ(output, result);
}
Expand Down

0 comments on commit aff7e36

Please sign in to comment.