Skip to content

Commit

Permalink
add -f remove_overlap square region gene pair and -h info
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaodli committed Oct 8, 2024
1 parent 739e9d0 commit 09dcdf3
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 57 deletions.
29 changes: 17 additions & 12 deletions src/controlLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1485,17 +1485,18 @@ int geno(int argc, char **argv) {


int pro(int argc, char **argv) {
double MIN_ALIGNMENT_SCORE = 2;
double MIN_ALIGNMENT_SCORE = 3;
double GAP_OPEN_PENALTY = -0.02;
double GAP_EXTENSION_PENALTY = -0.005;
int MAX_DIST_BETWEEN_MATCHES = 25;
int refMaximumTimes = 1;
int queryMaximumTimes = 1;
int OVER_LAP_WINDOW = 0;
int OVER_LAP_WINDOW = 1;
int delete_tandem_length = 0;
int get_all_collinear_gene_pair = 0;
int count_style = 1;
int strict_strand = 1;
int strict_remove_overlap = 0;

std::stringstream usage;

Expand All @@ -1511,14 +1512,15 @@ int pro(int argc, char **argv) {
" -E DOUBLE chain gap extend penalty (default: " << GAP_EXTENSION_PENALTY << ")" << std::endl <<
" -I DOUBLE minimum chain score (default: " << MIN_ALIGNMENT_SCORE << ")" << std::endl <<
" -D INT maximum gap size for chain (default: " << MAX_DIST_BETWEEN_MATCHES << ")" << std::endl <<
" -m INT Specify whether to delete tandem gene pairs in the input file (default: " << delete_tandem_length << ")" << std::endl <<
" Options: 0 means retain tandem gene pairs; 1 or any other integer means delete gene pairs with length less than this value)." << std::endl <<
" When you are doing self vs self synteny alignment, we recommend you to delete tandem gene pairs."<< std::endl <<
" -W INT Collapse BLAST matches. Specify the maximum distance allowed, and only retain best homology pair" << "(default: " << OVER_LAP_WINDOW << ")" << std::endl <<
" -a INT Enable this flag to get all collinear results and disable the R and Q parameters (default: " << get_all_collinear_gene_pair << ")" << std::endl <<
" Options: 0:enable R Q parameter; 1 or other integer: all collinear result" << std::endl <<
" -c INT For R Q count, 0: gene number count; 1 or other integer: block count (default: " << count_style << ")" << std::endl <<
" -s INT Specify whether the direction of the gene pair is the same as the block direction (default: "<< strict_strand << ")"
" -m INT This parameter is useful only for self vs self synteny alignment. Options: 0 means retain tandem gene pairs;"<< std::endl <<
" 1 or any other integer means remove gene pairs with a tandem length shorter than the specified integer value.(default: " << delete_tandem_length << ")" << std::endl <<
" When you are doing ks peaks analysis about WGD/Divergent event, you need set this parameter"<< std::endl <<
" -W INT Collapse BLAST matches. Specify the maximum distance allowed, and only retain best homology pair to synteny analysis under this distance condition(default: " << OVER_LAP_WINDOW << ")" << std::endl <<
" -a INT Enable this flag to get all collinear results and disable R and Q parameters (default: " << get_all_collinear_gene_pair << ")" << std::endl <<
" Options: 0:enable R Q parameter; 1 or other integer: get all collinear result and disable R Q parameter" << std::endl <<
" -c INT R Q parameter's count style for a block, 0: count only the syntenic genes within a block; 1 or other integer: count all genes within a block(default: " << count_style << ")" << std::endl <<
" -s INT Specify whether the direction of the gene pairs within a block must be strictly the same or reverse as the block's direction. (1:yes;0:no. default: "<< strict_strand << ")" << std::endl <<
" -f INT Specify whether to strictly remove square region gene pairs for a block to avoid overlap. (1:yes;0:no. default: "<< strict_remove_overlap << ")"
<< std::endl;

InputParser inputParser(argc, argv);
Expand Down Expand Up @@ -1567,6 +1569,9 @@ int pro(int argc, char **argv) {
if (inputParser.cmdOptionExists("-c")) {
count_style = std::stoi(inputParser.getCmdOption("-c"));
}
if (inputParser.cmdOptionExists("-f")) {
strict_remove_overlap = std::stoi(inputParser.getCmdOption("-f"));
}

std::vector<double> block_score;
std::vector<std::vector<AlignmentMatch>> alignmentMatchsMap;
Expand Down Expand Up @@ -1757,12 +1762,12 @@ int pro(int argc, char **argv) {
longestPathQuotaGene(alignmentMatchsMapT, alignmentMatchsMap, refIndexMap, queryIndexMap,
GAP_EXTENSION_PENALTY, GAP_OPEN_PENALTY, MIN_ALIGNMENT_SCORE,
MAX_DIST_BETWEEN_MATCHES,refMaximumTimes, queryMaximumTimes,
block_score, count_style, get_all_collinear_gene_pair, refTimes, queryTimes);
block_score, count_style, get_all_collinear_gene_pair, refTimes, queryTimes, strict_remove_overlap);
} else {
longestPathQuotaGeneNonStrand(alignmentMatchsMapT, alignmentMatchsMap, refIndexMap, queryIndexMap,
GAP_EXTENSION_PENALTY, GAP_OPEN_PENALTY, MIN_ALIGNMENT_SCORE,
MAX_DIST_BETWEEN_MATCHES,refMaximumTimes, queryMaximumTimes,
block_score, count_style, get_all_collinear_gene_pair, refTimes, queryTimes);
block_score, count_style, get_all_collinear_gene_pair, refTimes, queryTimes, strict_remove_overlap);
}
}
// output anchors file.
Expand Down
92 changes: 49 additions & 43 deletions src/impl/geneSyntenic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,7 @@ void longestPathQuotaGene(std::vector<AlignmentMatch> pairedSimilarFragments, st
double &MIN_ALIGNMENT_SCORE, const int &MAX_DIST_BETWEEN_MATCHES,
int &refMaximumTimes, int &queryMaximumTimes,
std::vector<double> &block_score, const int &count_style, const int &get_all_collinear_gene_pair,
std::map<std::string, int64_t> &refTimes, std::map<std::string, int64_t> &queryTimes) {
std::map<std::string, int64_t> &refTimes, std::map<std::string, int64_t> &queryTimes, const int &strict_remove_overlap) {

std::vector<Path> high;
std::vector<int64_t> ans;
Expand Down Expand Up @@ -1535,19 +1535,21 @@ void longestPathQuotaGene(std::vector<AlignmentMatch> pairedSimilarFragments, st
}
}
}
for (int ii = 0; ii < n; ii++) { // this is to avoid nested chain
if (pairedSimilarFragments[ii].getQueryChr() == pairedSimilarFragments[ans[0]].getQueryChr() && (
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryStart && chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd && chainQueryEnd <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryStartPos() && pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos() && pairedSimilarFragments[ii].getQueryEndPos() <= chainQueryEnd))
&& pairedSimilarFragments[ii].getRefChr() == pairedSimilarFragments[ans[0]].getRefChr() && (
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefStart && chainRefStart <= pairedSimilarFragments[ii].getRefEndPos()) ||
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd && chainRefEnd <= pairedSimilarFragments[ii].getRefEndPos()) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefStartPos() && pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefEndPos() && pairedSimilarFragments[ii].getRefEndPos() <= chainRefEnd))
) {
prev[ii] = -2;
if (strict_remove_overlap != 0){
for (int ii = 0; ii < n; ii++) { // this is to avoid nested chain
if (pairedSimilarFragments[ii].getQueryChr() == pairedSimilarFragments[ans[0]].getQueryChr() && (
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryStart && chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd && chainQueryEnd <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryStartPos() && pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos() && pairedSimilarFragments[ii].getQueryEndPos() <= chainQueryEnd))
&& pairedSimilarFragments[ii].getRefChr() == pairedSimilarFragments[ans[0]].getRefChr() && (
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefStart && chainRefStart <= pairedSimilarFragments[ii].getRefEndPos()) ||
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd && chainRefEnd <= pairedSimilarFragments[ii].getRefEndPos()) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefStartPos() && pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefEndPos() && pairedSimilarFragments[ii].getRefEndPos() <= chainRefEnd))
) {
prev[ii] = -2;
}
}
}
}
Expand Down Expand Up @@ -1585,7 +1587,7 @@ void longestPathQuotaGeneNonStrand(std::vector<AlignmentMatch> pairedSimilarFrag
double &MIN_ALIGNMENT_SCORE, const int &MAX_DIST_BETWEEN_MATCHES,
int &refMaximumTimes, int &queryMaximumTimes,
std::vector<double> &block_score, const int &count_style, const int &get_all_collinear_gene_pair,
std::map<std::string, int64_t> &refTimes, std::map<std::string, int64_t> &queryTimes) {
std::map<std::string, int64_t> &refTimes, std::map<std::string, int64_t> &queryTimes, const int &strict_remove_overlap) {

std::vector<PathNonStrand> high;
std::vector<int64_t> ans;
Expand Down Expand Up @@ -1997,20 +1999,22 @@ void longestPathQuotaGeneNonStrand(std::vector<AlignmentMatch> pairedSimilarFrag
}
}
}
for (int ii = 0; ii < n; ii++) { // this is to avoid nested chain
if (pairedSimilarFragments[ii].getQueryChr() == pairedSimilarFragments[ans[0]].getQueryChr() && (
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryStart && chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd && chainQueryEnd <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryStartPos() && pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos() && pairedSimilarFragments[ii].getQueryEndPos() <= chainQueryEnd))
&& pairedSimilarFragments[ii].getRefChr() == pairedSimilarFragments[ans[0]].getRefChr() && (
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefStart && chainRefStart <= pairedSimilarFragments[ii].getRefEndPos()) ||
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd && chainRefEnd <= pairedSimilarFragments[ii].getRefEndPos()) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefStartPos() && pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefEndPos() && pairedSimilarFragments[ii].getRefEndPos() <= chainRefEnd))
) {
prev_positive[ii] = -2;
prev_negative[ii] = -2;
if (strict_remove_overlap !=0 ){
for (int ii = 0; ii < n; ii++) { // this is to avoid nested chain
if (pairedSimilarFragments[ii].getQueryChr() == pairedSimilarFragments[ans[0]].getQueryChr() && (
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryStart && chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd && chainQueryEnd <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryStartPos() && pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos() && pairedSimilarFragments[ii].getQueryEndPos() <= chainQueryEnd))
&& pairedSimilarFragments[ii].getRefChr() == pairedSimilarFragments[ans[0]].getRefChr() && (
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefStart && chainRefStart <= pairedSimilarFragments[ii].getRefEndPos()) ||
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd && chainRefEnd <= pairedSimilarFragments[ii].getRefEndPos()) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefStartPos() && pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefEndPos() && pairedSimilarFragments[ii].getRefEndPos() <= chainRefEnd))
) {
prev_positive[ii] = -2;
prev_negative[ii] = -2;
}
}
}
}
Expand Down Expand Up @@ -2127,20 +2131,22 @@ void longestPathQuotaGeneNonStrand(std::vector<AlignmentMatch> pairedSimilarFrag
}
}
}
for (int ii = 0; ii < n; ii++) { // this is to avoid nested chain
if (pairedSimilarFragments[ii].getQueryChr() == pairedSimilarFragments[ans[0]].getQueryChr() && (
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryStart && chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd && chainQueryEnd <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryStartPos() && pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos() && pairedSimilarFragments[ii].getQueryEndPos() <= chainQueryEnd))
&& pairedSimilarFragments[ii].getRefChr() == pairedSimilarFragments[ans[0]].getRefChr() && (
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefStart && chainRefStart <= pairedSimilarFragments[ii].getRefEndPos()) ||
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd && chainRefEnd <= pairedSimilarFragments[ii].getRefEndPos()) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefStartPos() && pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefEndPos() && pairedSimilarFragments[ii].getRefEndPos() <= chainRefEnd))
) {
prev_negative[ii] = -2;
prev_positive[ii] = -2;
if (strict_remove_overlap != 0){
for (int ii = 0; ii < n; ii++) { // this is to avoid nested chain
if (pairedSimilarFragments[ii].getQueryChr() == pairedSimilarFragments[ans[0]].getQueryChr() && (
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryStart && chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd && chainQueryEnd <= pairedSimilarFragments[ii].getQueryEndPos()) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryStartPos() && pairedSimilarFragments[ii].getQueryStartPos() <= chainQueryEnd) ||
(chainQueryStart <= pairedSimilarFragments[ii].getQueryEndPos() && pairedSimilarFragments[ii].getQueryEndPos() <= chainQueryEnd))
&& pairedSimilarFragments[ii].getRefChr() == pairedSimilarFragments[ans[0]].getRefChr() && (
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefStart && chainRefStart <= pairedSimilarFragments[ii].getRefEndPos()) ||
(pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd && chainRefEnd <= pairedSimilarFragments[ii].getRefEndPos()) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefStartPos() && pairedSimilarFragments[ii].getRefStartPos() <= chainRefEnd) ||
(chainRefStart <= pairedSimilarFragments[ii].getRefEndPos() && pairedSimilarFragments[ii].getRefEndPos() <= chainRefEnd))
) {
prev_negative[ii] = -2;
prev_positive[ii] = -2;
}
}
}
}
Expand Down
Loading

0 comments on commit 09dcdf3

Please sign in to comment.