Skip to content

Commit

Permalink
Number of kmers can now be auto, which calculates the minimum kmers o…
Browse files Browse the repository at this point in the history
…r a value passed by user
  • Loading branch information
jhellewell14 committed Jan 21, 2025
1 parent 41fdd25 commit e2c734c
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 50 deletions.
23 changes: 22 additions & 1 deletion src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,22 @@ pub fn check_threads(threads: usize) {
}
}

#[doc(hidden)]
fn valid_kmer_min(s: &str) -> Result<u16, String> {

Check warning on line 88 in src/cli.rs

View workflow job for this annotation

GitHub Actions / clippy

function `valid_kmer_min` is never used

warning: function `valid_kmer_min` is never used --> src/cli.rs:88:4 | 88 | fn valid_kmer_min(s: &str) -> Result<u16, String> { | ^^^^^^^^^^^^^^ | = note: `#[warn(dead_code)]` on by default
if s.eq("auto") {
Ok(0)
} else {
let k: u16 = s
.parse()
.map_err(|_| format!("`{s}` isn't a valid minimum kmer count"))?;
if k.ge(&5) {
Ok(k)
} else {
Err("minimum kmer count must be >= 5".to_string())
}
}
}

/// Possible output file types
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum FileType {
Expand Down Expand Up @@ -118,6 +134,11 @@ impl fmt::Display for FilterType {
}
}

pub enum KmerMin {

Check warning on line 137 in src/cli.rs

View workflow job for this annotation

GitHub Actions / clippy

missing documentation for an enum

warning: missing documentation for an enum --> src/cli.rs:137:1 | 137 | pub enum KmerMin { | ^^^^^^^^^^^^^^^^ | note: the lint level is defined here --> src/lib.rs:406:9 | 406 | #![warn(missing_docs)] | ^^^^^^^^^^^^
Auto,

Check warning on line 138 in src/cli.rs

View workflow job for this annotation

GitHub Actions / clippy

missing documentation for a variant

warning: missing documentation for a variant --> src/cli.rs:138:5 | 138 | Auto, | ^^^^
Value(u16),

Check warning on line 139 in src/cli.rs

View workflow job for this annotation

GitHub Actions / clippy

missing documentation for a variant

warning: missing documentation for a variant --> src/cli.rs:139:5 | 139 | Value(u16), | ^^^^^
}

/// Options that apply to all subcommands
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
Expand Down Expand Up @@ -168,7 +189,7 @@ pub enum Commands {

/// Minimum k-mer count (with reads)
#[arg(long)]
min_count: Option<u16>,
min_count: Option<String>,

/// Minimum k-mer quality (with reads)
#[arg(long, default_value_t = DEFAULT_MINQUAL)]
Expand Down
113 changes: 64 additions & 49 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -505,60 +505,75 @@ pub fn main() {
let input_files = get_input_list(file_list, seq_files);
let rc = !*single_strand;

// Are there >=2 fastq files?
let enough_fastq = input_files
.iter()
.filter(|file| file.2.is_some())
.count()
.ge(&2);

// Is min_count provided by the user?
let not_provided = min_count.is_none();
let min_kmer_count: u16 = match min_count {
// If auto attempt to calculate using mixture model
Some(s) if s.eq(&String::from("auto")) => {
// Are there >=2 fastq files?
let enough_fastq = input_files
.iter()
.filter(|file| file.2.is_some())
.count()
.ge(&2);

// Minimum kmer count logic
let min_kmer_count: u16 = if enough_fastq & not_provided {
// Calculate cutoff
// Get first two fastq files
let mut files_iter = input_files
.iter()
.filter(|file| file.2.is_some())
.take(2)
.map(|x| x.1.clone());
let fastq_fwd: String = files_iter.next().unwrap();
let fastq_rev: String = files_iter.next().unwrap();
if enough_fastq {
// Calculate cutoff
// Get first two fastq files
let mut files_iter = input_files
.iter()
.filter(|file| file.2.is_some())
.take(2)
.map(|x| x.1.clone());
let fastq_fwd: String = files_iter.next().unwrap();
let fastq_rev: String = files_iter.next().unwrap();

let cutoff;
if *k <= 31 {
log::info!("k={}: using 64-bit representation", *k);
let mut cov =
CoverageHistogram::<u64>::new(&fastq_fwd, &fastq_rev, *k, rc, args.verbose);
cutoff = cov.fit_histogram().expect("Couldn't fit coverage model");
cov.plot_hist();
} else {
log::info!("k={}: using 128-bit representation", *k);
let mut cov = CoverageHistogram::<u128>::new(
&fastq_fwd,
&fastq_rev,
*k,
rc,
args.verbose,
);
cutoff = cov.fit_histogram().expect("Couldn't fit coverage model");
cov.plot_hist();
let cutoff;
if *k <= 31 {
log::info!("k={}: using 64-bit representation", *k);
let mut cov = CoverageHistogram::<u64>::new(
&fastq_fwd,
&fastq_rev,
*k,
rc,
args.verbose,
);
cutoff = cov.fit_histogram().expect("Couldn't fit coverage model");
cov.plot_hist();
} else {
log::info!("k={}: using 128-bit representation", *k);
let mut cov = CoverageHistogram::<u128>::new(
&fastq_fwd,
&fastq_rev,
*k,
rc,
args.verbose,
);
cutoff = cov.fit_histogram().expect("Couldn't fit coverage model");
cov.plot_hist();
}
log::info!("Using inferred minimum kmer value of {}", cutoff);
cutoff as u16
} else {
// Not enough fastq files, use default and warn user
log::info!("Not enough fastq files, using default kmer count of 5");
DEFAULT_MINCOUNT
}
}
cutoff as u16
} else if !not_provided {
// check provided value and use
let val = min_count.unwrap();
if val.ge(&1) {
val
} else {
panic!("Minimum k-mer count cannot be less than 1");
// User has provided something other than auto, attempt to parse to u16
Some(s) => {
let k: u16 = s.parse().expect("Invalid minimum kmer count");
if k.ge(&5) {
log::info!("Using provided minimum kmer value of {}", k);
k
} else {
panic!("Minimum kmer count must be >= 5");
}
}
// Value not provided, use default
None => {
log::info!("No minimum kmer value provided, using default kmer count of 5");
DEFAULT_MINCOUNT
}
} else {
// Use default
log::info!("Not enough fastq files, using default kmer count of 5");
DEFAULT_MINCOUNT
};

let quality = QualOpts {
Expand Down

0 comments on commit e2c734c

Please sign in to comment.