Skip to content

Commit

Permalink
Removed all commented code
Browse files Browse the repository at this point in the history
  • Loading branch information
jhellewell14 committed Jan 17, 2025
1 parent c7e0a71 commit c2c0890
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 159 deletions.
64 changes: 28 additions & 36 deletions src/skalo/compaction.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
use dashmap::DashMap;
use hashbrown::{HashMap, HashSet};
//use std::time::Instant;

use rayon::prelude::*;
use dashmap::DashMap;

//use crate::utils::DATA_INFO;


pub fn compact_graph(all_kmers: &mut HashMap<u128, Vec<u128>>, start_kmers: &HashSet<u128>, end_kmers: &HashSet<u128>) -> DashMap<u128, Vec<u128>> {

//let data_info = DATA_INFO.get().unwrap();

//let start = Instant::now();

pub fn compact_graph(
all_kmers: &mut HashMap<u128, Vec<u128>>,
start_kmers: &HashSet<u128>,
end_kmers: &HashSet<u128>,
) -> DashMap<u128, Vec<u128>> {
let compacted: DashMap<u128, Vec<u128>> = DashMap::new();

// from start k-mers
start_kmers.par_iter().for_each(|kmer| {
if let Some(starting_kmers) = all_kmers.get(kmer) {
Expand All @@ -33,7 +27,9 @@ pub fn compact_graph(all_kmers: &mut HashMap<u128, Vec<u128>>, start_kmers: &Has
vec_visited.push(current_kmer);
visited.insert(current_kmer);

if end_kmers.contains(&current_kmer) || start_kmers.contains(&current_kmer) {
if end_kmers.contains(&current_kmer)
|| start_kmers.contains(&current_kmer)
{
walking_along_path = false;
}
} else {
Expand Down Expand Up @@ -68,7 +64,9 @@ pub fn compact_graph(all_kmers: &mut HashMap<u128, Vec<u128>>, start_kmers: &Has
vec_visited.push(current_kmer);
visited.insert(current_kmer);

if end_kmers.contains(&current_kmer) || start_kmers.contains(&current_kmer) {
if end_kmers.contains(&current_kmer)
|| start_kmers.contains(&current_kmer)
{
walking_along_path = false;
}
} else {
Expand All @@ -78,43 +76,37 @@ pub fn compact_graph(all_kmers: &mut HashMap<u128, Vec<u128>>, start_kmers: &Has
walking_along_path = false;
}
}
// could be "1" but for some reason I get more variant groups with k_graph
//if vec_visited.len() > data_info.k_graph {
if vec_visited.len() > 1 {
compacted.insert(*starting_kmer, vec_visited);
}
}
}
});

//let duration = start.elapsed();
//println!("time taken: {:?}", duration);

//let start = Instant::now();

// modify graph and compacted vector
//let mut nb_removed = 0;
for mut item in compacted.iter_mut() {
let (starting_kmer, vec_visited) = item.pair_mut();
// remove edges corresponding to compacted vector
all_kmers.get_mut(starting_kmer).unwrap().retain(|&neighbor| neighbor != vec_visited[0]);
for window in vec_visited[..vec_visited.len() -1].windows(2) {
all_kmers.get_mut(&window[0]).unwrap().retain(|&neighbor| neighbor != window[1]);
//nb_removed += 1
}

all_kmers
.get_mut(starting_kmer)
.unwrap()
.retain(|&neighbor| neighbor != vec_visited[0]);
for window in vec_visited[..vec_visited.len() - 1].windows(2) {
all_kmers
.get_mut(&window[0])
.unwrap()
.retain(|&neighbor| neighbor != window[1]);
}

// add new edge in place of compacted segment
all_kmers.entry(*starting_kmer)
.or_default()
.push(vec_visited[vec_visited.len() -1]);
all_kmers
.entry(*starting_kmer)
.or_default()
.push(vec_visited[vec_visited.len() - 1]);

// remove last element of compact vector
vec_visited.pop();
}

//let duration = start.elapsed();
//println!("{} edges removed", nb_removed);
//println!("time taken: {:?}", duration);

compacted
}
8 changes: 1 addition & 7 deletions src/skalo/extremities.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use hashbrown::{HashMap, HashSet};
//use std::time::Instant;
use bit_set::BitSet;
use hashbrown::{HashMap, HashSet};

use crate::skalo::utils::{rev_compl_u128, DATA_INFO};

Expand All @@ -15,8 +14,6 @@ pub fn identify_good_kmers(
let mut start_kmers: HashSet<u128> = HashSet::new();
let mut end_kmers: HashSet<u128> = HashSet::new();

//let start = Instant::now();

// iterate over all_kmers
for (kmer, next_kmers) in all_kmers.iter() {
if next_kmers.len() > 1 {
Expand Down Expand Up @@ -57,9 +54,6 @@ pub fn identify_good_kmers(

println!(" . {} entry nodes", start_kmers.len());

//let duration = start.elapsed();
//println!("time taken: {:?}", duration);

(start_kmers, end_kmers)
}

Expand Down
12 changes: 1 addition & 11 deletions src/skalo/input.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use hashbrown::HashMap;
//use std::time::Instant;
use bit_set::BitSet;
use hashbrown::HashMap;

use dashmap::DashMap;
use rayon::prelude::*;
Expand All @@ -19,8 +18,6 @@ pub fn read_input_file() -> (usize, Vec<String>, KmerGraph, KmerSamples) {

println!(" # read file {}", arguments.input_file);

//let start = Instant::now();

// read the skf file and load split-kmers (ska_array), kmer length and sample names
let ska_array = load_array::<u128>(&[arguments.input_file.to_string()], arguments.nb_threads)
.expect("\nerror: could not read the skf file\n\n");
Expand All @@ -31,13 +28,8 @@ pub fn read_input_file() -> (usize, Vec<String>, KmerGraph, KmerSamples) {
println!(" . {}-mers", len_kmer);
println!(" . {} samples", sample_names.len());

//let duration = start.elapsed();
//println!("time taken: {:?}", duration);

println!(" # build colored de Bruijn graph");

//let start = Instant::now();

// build De Bruijn graph
let degenerate_code: HashMap<u8, Vec<char>> = [
(b'A', vec!['A']),
Expand Down Expand Up @@ -129,8 +121,6 @@ pub fn read_input_file() -> (usize, Vec<String>, KmerGraph, KmerSamples) {
let all_kmers: KmerGraph = all_kmers.into_iter().collect();
let kmer_samples: KmerSamples = kmer_samples.into_iter().collect();

//let duration = start.elapsed();
//println!("time taken: {:?}", duration);
println!(" . {} nodes", all_kmers.len());

(len_kmer, sample_names, all_kmers, kmer_samples)
Expand Down
5 changes: 0 additions & 5 deletions src/skalo/positioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,6 @@ pub fn scan_variants(
let mut vec_position_forward: Vec<u32> = Vec::new();
let mut vec_position_reverse: Vec<u32> = Vec::new();

// process only the 1st and last variants
//let first = &vec_variants[0];
//let last = &vec_variants[vec_variants.len() - 1];

//for variant in [first, last] {
for variant in vec_variants {
let seq = variant.sequence.decode();
let rc_seq = rev_compl(&seq);
Expand Down
16 changes: 1 addition & 15 deletions src/skalo/process_variants.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use hashbrown::{HashMap, HashSet};
//use std::time::Instant;
use bit_set::BitSet;
use hashbrown::{HashMap, HashSet};

use crate::skalo::output::create_fasta_and_vcf;
use crate::skalo::positioning::{extract_genomic_kmers, scan_variants};
Expand Down Expand Up @@ -36,8 +35,6 @@ pub fn analyse_variant_groups(

println!(" # process indels");

//let start = Instant::now();

// collect entry kmers of indels
let (final_indels, entries_indels) = process_indels(indel_groups, data_info.k_graph);
println!(" -> {} indels", final_indels.len());
Expand All @@ -56,9 +53,6 @@ pub fn analyse_variant_groups(
}
}

//let duration = start.elapsed();
//println!("time taken: {:?}", duration);

println!(" # process SNPs");

// create a vector of keys sorted by the ratio of size of Vec<VariantInfo> to the length of the first sequence
Expand All @@ -76,8 +70,6 @@ pub fn analyse_variant_groups(
.collect();
sorted_keys.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); // Sort by ratio, descending

//let start = Instant::now();

// start processing SNPs
let mut entries_done: HashSet<u128> = HashSet::new();

Expand All @@ -102,7 +94,6 @@ pub fn analyse_variant_groups(

// get SNP column and kmers
let mut kmers_to_save: HashSet<u128> = HashSet::new();
//let mut found_snp_pos: HashMap<usize, Vec<char>> = HashMap::new();
let mut found_snp_pos: HashMap<usize, Vec<char>> =
HashMap::with_capacity(real_snp_pos.len());

Expand Down Expand Up @@ -172,8 +163,6 @@ pub fn analyse_variant_groups(

// adjust position with SNP pos in variant group and orientation
for (pos, column) in found_snp_pos {
//println!("{}", seq_length);

let final_position = if is_forward {
position + (pos - data_info.k_graph) as u32
} else {
Expand Down Expand Up @@ -207,9 +196,6 @@ pub fn analyse_variant_groups(
}
}

//let duration = start.elapsed();
//println!("time taken: {:?}", duration);

if do_postioning {
println!(
" -> {} SNPs (+ {} w/o position)",
Expand Down
8 changes: 1 addition & 7 deletions src/skalo/read_graph.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use hashbrown::{HashMap, HashSet};
//use std::time::Instant;
use bit_set::BitSet;
use hashbrown::{HashMap, HashSet};

use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*;
Expand Down Expand Up @@ -31,8 +30,6 @@ pub fn build_variant_groups(

println!(" # explore graph");

//let start = Instant::now();

let built_groups = Arc::new(Mutex::new(HashMap::<(u128, u128), Vec<VariantInfo>>::new()));

let pool = ThreadPoolBuilder::new()
Expand Down Expand Up @@ -240,9 +237,6 @@ pub fn build_variant_groups(
});
});

//let duration = start.elapsed();
//println!("time taken: {:?}", duration);

let built_groups_end = built_groups.lock().unwrap();

println!(" . {} variant groups", built_groups_end.len());
Expand Down
Loading

0 comments on commit c2c0890

Please sign in to comment.