-Basic Features# +Basic Features# -finaletoolkit.frag.coverage(input_file: str | TabixFile | AlignmentFile | Path, interval_file: str, output_file: str, scale_factor: float = 1.0, min_length: int | None = None, max_length: int | None = None, normalize: bool = False, intersect_policy: str = 'midpoint', quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → list[tuple[str, int, int, str, float]]# +finaletoolkit.frag.coverage(input_file: str | TabixFile | AlignmentFile | Path, interval_file: str, output_file: str, scale_factor: float = 1.0, min_length: int | None = None, max_length: int | None = None, normalize: bool = False, intersect_policy: str = 'midpoint', quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → list[tuple[str, int, int, str, float]]# Return estimated fragment coverage over intervals specified in intervals. Fragments are read from input_file which may be a SAM, BAM, CRAM, or Frag.gz file. Uses an algorithm where the @@ -385,7 +411,7 @@ Basic FeaturesReturns: @@ -399,7 +425,7 @@ Basic Features -finaletoolkit.frag.frag_length(input_file: str | AlignmentFile | TabixFile, contig: str | None = None, start: int | None = None, stop: int | None = None, intersect_policy: str = 'midpoint', output_file: str | None = None, quality_threshold: int = 30, verbose: bool = False) → ndarray# +finaletoolkit.frag.frag_length(input_file: str | AlignmentFile | TabixFile, contig: str | None = None, start: int | None = None, stop: int | None = None, intersect_policy: str = 'midpoint', output_file: str | None = None, quality_threshold: int = 30, verbose: bool = False) → ndarray# Return np.ndarray containing lengths of fragments in input_file that are above the quality threshold and are proper-paired reads. @@ -415,9 +441,9 @@ Basic FeaturesReturns: @@ -432,25 +458,25 @@ Basic Features -finaletoolkit.frag.frag_length_bins(input_file: str | AlignmentFile, contig: str | None = None, start: int | None = None, stop: int | None = None, min_length: int = 0, max_length: int | None = None, bin_size: int = 1, output_file: str | None = None, intersect_policy: str = 'midpoint', quality_threshold: int = 30, histogram_path: str | None = None, verbose: bool | int = False) → tuple[ndarray, ndarray]# +finaletoolkit.frag.frag_length_bins(input_file: str | AlignmentFile, contig: str | None = None, start: int | None = None, stop: int | None = None, min_length: int | None = 0, max_length: int | None = None, bin_size: int = 1, output_file: str | None = None, intersect_policy: str = 'midpoint', quality_threshold: int = 30, histogram_path: str | None = None, verbose: bool | int = False) → tuple[numpy.ndarray, numpy.ndarray]# Takes input_file, computes frag lengths of fragments and returns two arrays containing bins and counts by size. Optionally prints data to output as a tab delimited table or histogram. Parameters: -input_file (str or AlignmentFile) -contig (str, optional) -start (int, optional) -stop (int, optional) -bin_size (int, optional) -output_file (str, optional) +input_file (str or AlignmentFile) – +contig (str, optional) – +start (int, optional) – +stop (int, optional) – +bin_size (int, optional) – +output_file (str, optional) – intersect_policy (str, optional) – Specifies what policy is used to include fragments in the given interval. Default is “midpoint”. Policies include: - midpoint: the average of end coordinates of a fragment lies in the interval. - any: any part of the fragment is in the interval. -workers (int, optional) +workers (int, optional) – Returns: @@ -500,8 +526,7 @@ Basic Features - + @@ -540,8 +565,8 @@ Basic Features - + + diff --git a/docs/_build/html/documentation/api_reference/cleavageprofile.html b/docs/_build/html/documentation/api_reference/cleavageprofile.html index 77ef2982..f8c4f127 100644 --- a/docs/_build/html/documentation/api_reference/cleavageprofile.html +++ b/docs/_build/html/documentation/api_reference/cleavageprofile.html @@ -1,12 +1,13 @@ + - + - + Cleavage Profile — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Cleavage Profile + Cleavage Profile @@ -354,10 +380,10 @@ -Cleavage Profile# +Cleavage Profile# -finaletoolkit.frag.cleavage_profile(input_file: str, chrom_size: int, contig: str, start: int, stop: int, left: int = 0, right: int = 0, fraction_low: int = 1, fraction_high: int = 10000000, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.cleavage_profile(input_file: str | PathLike | AlignmentFile | TabixFile, chrom_size: int, contig: str, start: int, stop: int, left: int = 0, right: int = 0, min_length: int | None = None, max_length: int | None = None, quality_threshold: int = 30, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None) → ndarray# Cleavage profile calculated over a single interval. Parameters: @@ -370,10 +396,12 @@ Cleavage ProfileReturns: @@ -422,8 +450,7 @@ Cleavage Profile - + @@ -460,8 +487,8 @@ Cleavage Profile - + + diff --git a/docs/_build/html/documentation/api_reference/delfi.html b/docs/_build/html/documentation/api_reference/delfi.html index b693ec8f..e37d1237 100644 --- a/docs/_build/html/documentation/api_reference/delfi.html +++ b/docs/_build/html/documentation/api_reference/delfi.html @@ -1,12 +1,13 @@ + - + - + DELFI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - DELFI + DELFI @@ -354,10 +380,10 @@ -DELFI# +DELFI# -finaletoolkit.frag.delfi(input_file: str, autosomes: str, bins_file: str, reference_file: str, blacklist_file: str | None = None, gap_file: str | GenomeGaps | None = None, output_file: str | None = None, gc_correct: bool = True, remove_nocov: bool = True, merge_bins: bool = True, window_size: int = 5000000, quality_threshold: int = 30, workers: int = 1, verbose: int | bool = False) → DataFrame# +finaletoolkit.frag.delfi(input_file: str, chrom_sizes: str, bins_file: str, reference_file: str, blacklist_file: str | None = None, gap_file: str | GenomeGaps | None = None, output_file: str | None = None, gc_correct: bool = True, remove_nocov: bool = True, merge_bins: bool = True, window_size: int = 5000000, quality_threshold: int = 30, workers: int = 1, verbose: int | bool = False) → DataFrame# A function that replicates the methodology of Christiano et al (2019). @@ -365,11 +391,11 @@ DELFI#< input_file (str) – Path string pointing to a bam file containing PE fragment reads. -autosomes (str) – Path string to a chrom.sizes file containing only autosomal +chrom_sizes (str) – Path string to a chrom.sizes file containing only autosomal chromosomes bins_file (str) – Path string to a BED file containing 100kb bins for reference genome of choice. -reference_file (str) – Path string to .2bit file for reference genoe. +reference_file (str) – Path string to .2bit file for reference genome. gap_file (str or GenomeGaps) – Specifies locations of telomeres and centromeres for reference genome. There are three options: - Path string to a BED4+ file where each interval is a @@ -395,18 +421,25 @@ DELFI#< stdout. Default is False. +Returns: +Results of delfi analysis, with column names corresponding to +those generated by the original author’s scripts. + +Return type: +pandas DataFrame + -finaletoolkit.frag.delfi_gc_correct(windows: DataFrame, alpha: float = 0.75, it: int = 8, verbose: bool = False)# +finaletoolkit.frag.delfi_gc_correct(windows: DataFrame, alpha: float = 0.75, it: int = 8, verbose: bool = False)# Helper function that takes window data and performs GC adjustment. -finaletoolkit.frag.delfi_merge_bins(hundred_kb_bins: DataFrame, gc_corrected: bool = True, verbose: bool = False) → DataFrame# +finaletoolkit.frag.delfi_merge_bins(hundred_kb_bins: DataFrame, gc_corrected: bool = True, verbose: bool = False) → DataFrame# @@ -446,8 +479,7 @@ DELFI#< - - + @@ -486,8 +518,8 @@ DELFI#< - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - End-Motifs + End-Motifs @@ -354,10 +380,10 @@ -End-Motifs# +End-Motifs# -class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mer frequencies and contains methods to manipulate this data. @@ -372,7 +398,7 @@ End-Motifs -classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# +classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# Reads kmer frequency from a two-column tab-delimited file. Parameters: @@ -393,7 +419,7 @@ End-Motifs -motif_diversity_score() → float# +motif_diversity_score() → float# Calculates a motif diversity score (MDS) using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -401,7 +427,7 @@ End-Motifs -to_tsv(output_file: str | Path, sep: str = '\t')# +to_tsv(output_file: str | Path, sep: str = '\t')# Prints k-mer frequencies to a tsv @@ -409,7 +435,7 @@ End-Motifs -class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mers over user-specified intervals and contains methods to manipulate this data. @@ -426,7 +452,7 @@ End-Motifs -freq(kmer: str) → list[tuple[str, int, int, float]]# +freq(kmer: str) → list[tuple[str, int, int, float]]# Returns a list of intervals and associated frquency for given kmer. Results are in the form (chrom, 0-based start, 1-based stop, frequency). @@ -434,7 +460,7 @@ End-Motifs -classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',') → EndMotifFreqs# +classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',', header: int = 0) → EndMotifsIntervals# Reads kmer frequency from a tab-delimited file. Expected columns are contig, start, stop, name, count, (kmers). Because exporting to file includes an option to turn counts to a fraction, @@ -451,20 +477,20 @@ End-Motifskmer_freqs Return type: -EndMotifFreqs +EndMotifsIntervals -mds_bed(output_file: str | Path, sep: str = '\t')# +mds_bed(output_file: str | Path, sep: str = '\t')# Writes MDS for each interval to a bed/bedgraph file. -motif_diversity_score() → list[tuple[tuple, float]]# +motif_diversity_score() → list[tuple[tuple, float]]# Calculates a motif diversity score (MDS) for each interval using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -472,7 +498,7 @@ End-Motifs -to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to BED. Parameters: @@ -488,7 +514,7 @@ End-Motifs -to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to bedgraph. Parameters: @@ -504,7 +530,7 @@ End-Motifs -to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Writes all intervals and associated frquencies to file. Columns are contig, start, stop, name, count, (kmers). @@ -523,7 +549,7 @@ End-Motifs -finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# +finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# Function that reads fragments in the specified region from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif counts as a dictionary. This function @@ -541,9 +567,9 @@ End-MotifsReturns: @@ -557,7 +583,7 @@ End-Motifs -finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = False, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifFreqs# +finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifFreqs# Function that reads fragments from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif frequencies as a dictionary. Optionally writes data to a tsv. This @@ -569,10 +595,15 @@ End-MotifsReturns: @@ -586,7 +617,7 @@ End-Motifs -finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifsIntervals# +finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifsIntervals# Function that reads fragments from a BAM, SAM, or tabix indexed file and user-specified intervals and returns the 5’ k-mer (default is 4-mer) end motif. Optionally writes data to a tsv. @@ -601,7 +632,7 @@ End-MotifsReturns: @@ -650,8 +681,7 @@ End-Motifs - + @@ -706,8 +736,8 @@ End-Motifs - + + diff --git a/docs/_build/html/documentation/api_reference/fragfile.html b/docs/_build/html/documentation/api_reference/fragfile.html index fb2b7355..14a6975d 100644 --- a/docs/_build/html/documentation/api_reference/fragfile.html +++ b/docs/_build/html/documentation/api_reference/fragfile.html @@ -1,12 +1,13 @@ + - + - + Frag File Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Frag File Utilities + Frag File Utilities @@ -354,26 +380,27 @@ -Frag File Utilities# +Frag File Utilities# -finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, max_length: int | None = None, min_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False)# +finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, min_length: int | None = None, max_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False, fraction_low: int | None = None, fraction_high: int | None = None)# Accepts the path to a BAM file and creates a bam file where all reads are read1 in a proper pair, exceed the specified quality -threshold, do not intersect a region in the given blacklist -file, and intersects with a region in the region bed. +threshold, and intersects with a region in the region bed. Parameters: input_bam (str) – Path string or AlignmentFile pointing to the BAM file to be filtered. -region_file (str, option) -output_file (str, optional) -min_length (int, optional) -max_length (int, optional) -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +region_file (str, option) – +output_file (str, optional) – +min_length (int, optional) – +max_length (int, optional) – +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – +fraction_low (int, optional) – Deprecated alias for min_length +fraction_high (int, optional) – Deprecated alias for max_length Returns: @@ -387,7 +414,7 @@ Frag File Utilities -finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, strand_location: int = 5, verbose: bool = False)# +finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, verbose: bool = False)# Takes a BigWig and an interval BED and aggregates signal along the intervals with a median filter. For aggregating WPS signals, note that the median filter trims the @@ -406,13 +433,11 @@ Frag File Utilities Parameters: -input_file (str) -interval_file (str) -output_file (str) +input_file (str) – +interval_file (str) – BED file containing intervals. 6th column should have strand. +output_file (str) – median_window_size (int, optional) – default is 0 mean (bool) – use mean instead -strand_location (int) – which column (starting at 0) of the interval file contains the -strand. Default is 5. verbose (int or bool, optional) – default is False @@ -427,7 +452,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str, int]]# Reads chromosome names and sizes from a CHROMSIZE file into a list. Parameters: @@ -445,7 +470,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# Reads chromosome names and sizes from a CHROMSIZE file into a dict. Parameters: @@ -463,19 +488,19 @@ Frag File Utilities -finaletoolkit.utils.frag_generator(input_file: str | pysam.AlignmentFile | pysam.TabixFile | Path, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# +finaletoolkit.utils.frag_generator(input_file: FragFile, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = None, fraction_high: int = None, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# Reads from BAM, SAM, or BED file and returns tuples containing contig (chromosome), start, stop (end), mapq, and strand for each fragment. Optionally may filter for mapq, size, and intersection with a region. Parameters: -input_file (str, pathlike, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed +input_file (str, pathlike, TabixFile, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed bed.gz, or tabix-indexed FinaleDB fragment file. -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -485,7 +510,7 @@ Frag File UtilitiesReturns: @@ -501,17 +526,17 @@ Frag File Utilities -finaletoolkit.utils.frag_array(input_file: str | AlignmentFile | TabixFile | Path, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.frag_array(input_file: str | PathLike | AlignmentFile | TabixFile, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[ScalarType]]# Reads from BAM, SAM, or BED file and returns a three column matrix with fragment start and stop positions and strand. Parameters: -input_file (str or AlignmentFile) -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +input_file (str or AlignmentFile) – +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -522,7 +547,7 @@ Frag File UtilitiesReturns: @@ -540,7 +565,7 @@ Frag File Utilities -finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# +finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# Return True if the sequenced read described in read is not a properly paired read with a Phred score exceeding min_mapq. Based on epifluidlab/cofragr @@ -565,7 +590,7 @@ Frag File Utilities -finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[_ScalarType_co]], starts_1: ndarray[Any, dtype[_ScalarType_co]], stops_1: ndarray[Any, dtype[_ScalarType_co]], contigs_2: ndarray[Any, dtype[_ScalarType_co]], starts_2: ndarray[Any, dtype[_ScalarType_co]], stops_2: ndarray[Any, dtype[_ScalarType_co]]) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[ScalarType]], starts_1: ndarray[Any, dtype[ScalarType]], stops_1: ndarray[Any, dtype[ScalarType]], contigs_2: ndarray[Any, dtype[ScalarType]], starts_2: ndarray[Any, dtype[ScalarType]], stops_2: ndarray[Any, dtype[ScalarType]]) → ndarray[Any, dtype[ScalarType]]# Function that performs vectorized computation of overlaps. Returns an array of same shape as contig_1 that is true if the intervals for set 1 each have any overlap with an interval in set 2. @@ -608,8 +633,7 @@ Frag File Utilities - + @@ -653,8 +677,8 @@ Frag File Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/genomeutils.html b/docs/_build/html/documentation/api_reference/genomeutils.html index 7e139c45..1f363505 100644 --- a/docs/_build/html/documentation/api_reference/genomeutils.html +++ b/docs/_build/html/documentation/api_reference/genomeutils.html @@ -1,12 +1,13 @@ + - + - + Genome Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -48,7 +46,6 @@ - @@ -64,8 +61,19 @@ Back to top - - + + + + + + + + + @@ -73,6 +81,7 @@ Ctrl+K - - + + @@ -115,7 +124,7 @@ - + FinaleToolkit @@ -170,21 +179,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -193,11 +210,15 @@ - - - Search - Ctrl+K - + @@ -216,8 +237,7 @@ - - + @@ -271,11 +291,15 @@ - - - - - + @@ -321,6 +345,8 @@ + + @@ -335,7 +361,7 @@ API - Genome Utilities + Genome Utilities @@ -353,16 +379,16 @@ -Genome Utilities# +Genome Utilities# -class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# +class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# Reads telomere, centromere, and short_arm intervals from a bed file or generates these intervals from UCSC gap and centromere tracks for hg19 and hg38. -classmethod b37()# +classmethod b37()# Creates a GenomeGaps for the Broad Institute GRCh37 reference genome i.e b37. This reference genome is also based on GRCh37, but differs from the UCSC hg19 reference in a few ways, @@ -383,7 +409,7 @@ Genome Utilities -get_arm(contig: str, start: int, stop: int) → str# +get_arm(contig: str, start: int, stop: int) → str# Returns the chromosome arm the interval is in. If in the short arm of an acrocentric chromosome or intersects a centromere, returns an empty string. @@ -410,7 +436,7 @@ Genome Utilities -get_contig_gaps(contig: str) → ContigGaps# +get_contig_gaps(contig: str) → ContigGaps# Creates a ContigGaps for the specified chromosome Parameters: @@ -427,7 +453,7 @@ Genome Utilities -classmethod hg38()# +classmethod hg38()# Creates a GenomeGaps for the hg38 reference genome. This sequences uses chromosome names that start with ‘chr’ and is synonymous with the GRCh38 reference genome. @@ -437,7 +463,7 @@ Genome Utilities -in_tcmere(contig: str, start: int, stop: int) → bool# +in_tcmere(contig: str, start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere Parameters: @@ -458,7 +484,7 @@ Genome Utilities -overlaps_gap(contig: str, start: int, stop: int) → bool# +overlaps_gap(contig: str, start: int, stop: int) → bool# Checks if specified interval overlaps a gap interval Parameters: @@ -479,7 +505,7 @@ Genome Utilities -to_bed(output_file: str | PathLike)# +to_bed(output_file: str | PathLike)# Prints gap intervals in GenomeGaps to a BED4 file where the name is the type of gap interval. @@ -492,7 +518,7 @@ Genome Utilities -classmethod ucsc_hg19()# +classmethod ucsc_hg19()# Creates a GenomeGaps for the UCSC hg19 reference genome. This sequences uses chromosome names that start with ‘chr’ and is based on a version of the GRCh37 reference genome. @@ -510,10 +536,10 @@ Genome Utilities -class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# +class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# -get_arm(start: int, stop: int)# +get_arm(start: int, stop: int)# Returns name of chromosome arm the interval is in. Returns “NOARM” if in a centromere, telomere, or short arm of an acrocentric chromosome. @@ -538,7 +564,7 @@ Genome Utilities -in_gap(start: int, stop: int) → bool# +in_gap(start: int, stop: int) → bool# Checks if specified interval is in a gap. Parameters: @@ -558,7 +584,7 @@ Genome Utilities -in_tcmere(start: int, stop: int) → bool# +in_tcmere(start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere. Parameters: @@ -580,7 +606,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg19 reference sequence. @@ -592,7 +618,7 @@ Genome Utilities -finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the Broad Institute GRCh37 (b37) reference sequence. Also useful for files aligned to human_g1k_v37 (1000 Genomes Project). @@ -605,7 +631,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg38 reference sequence. @@ -643,8 +669,7 @@ Genome Utilities - + @@ -700,8 +725,8 @@ Genome Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/index.html b/docs/_build/html/documentation/api_reference/index.html index 7cf5852d..cc84fce6 100644 --- a/docs/_build/html/documentation/api_reference/index.html +++ b/docs/_build/html/documentation/api_reference/index.html @@ -1,12 +1,13 @@ + - + - + API — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -333,7 +359,7 @@ FinaleToolkit Documentation - API + API @@ -351,7 +377,7 @@ -API# +API# Basic Features @@ -442,8 +468,7 @@ API# - - + @@ -468,8 +493,8 @@ API# - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Window Protection Score (WPS) + Window... @@ -354,10 +380,10 @@ -Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-Cleavage Profile# +Cleavage Profile# -finaletoolkit.frag.cleavage_profile(input_file: str, chrom_size: int, contig: str, start: int, stop: int, left: int = 0, right: int = 0, fraction_low: int = 1, fraction_high: int = 10000000, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.cleavage_profile(input_file: str | PathLike | AlignmentFile | TabixFile, chrom_size: int, contig: str, start: int, stop: int, left: int = 0, right: int = 0, min_length: int | None = None, max_length: int | None = None, quality_threshold: int = 30, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None) → ndarray# Cleavage profile calculated over a single interval. Parameters: @@ -370,10 +396,12 @@ Cleavage ProfileReturns: @@ -422,8 +450,7 @@ Cleavage Profile - + @@ -460,8 +487,8 @@ Cleavage Profile - + + diff --git a/docs/_build/html/documentation/api_reference/delfi.html b/docs/_build/html/documentation/api_reference/delfi.html index b693ec8f..e37d1237 100644 --- a/docs/_build/html/documentation/api_reference/delfi.html +++ b/docs/_build/html/documentation/api_reference/delfi.html @@ -1,12 +1,13 @@ + - + - + DELFI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - DELFI + DELFI @@ -354,10 +380,10 @@ -DELFI# +DELFI# -finaletoolkit.frag.delfi(input_file: str, autosomes: str, bins_file: str, reference_file: str, blacklist_file: str | None = None, gap_file: str | GenomeGaps | None = None, output_file: str | None = None, gc_correct: bool = True, remove_nocov: bool = True, merge_bins: bool = True, window_size: int = 5000000, quality_threshold: int = 30, workers: int = 1, verbose: int | bool = False) → DataFrame# +finaletoolkit.frag.delfi(input_file: str, chrom_sizes: str, bins_file: str, reference_file: str, blacklist_file: str | None = None, gap_file: str | GenomeGaps | None = None, output_file: str | None = None, gc_correct: bool = True, remove_nocov: bool = True, merge_bins: bool = True, window_size: int = 5000000, quality_threshold: int = 30, workers: int = 1, verbose: int | bool = False) → DataFrame# A function that replicates the methodology of Christiano et al (2019). @@ -365,11 +391,11 @@ DELFI#< input_file (str) – Path string pointing to a bam file containing PE fragment reads. -autosomes (str) – Path string to a chrom.sizes file containing only autosomal +chrom_sizes (str) – Path string to a chrom.sizes file containing only autosomal chromosomes bins_file (str) – Path string to a BED file containing 100kb bins for reference genome of choice. -reference_file (str) – Path string to .2bit file for reference genoe. +reference_file (str) – Path string to .2bit file for reference genome. gap_file (str or GenomeGaps) – Specifies locations of telomeres and centromeres for reference genome. There are three options: - Path string to a BED4+ file where each interval is a @@ -395,18 +421,25 @@ DELFI#< stdout. Default is False. +Returns: +Results of delfi analysis, with column names corresponding to +those generated by the original author’s scripts. + +Return type: +pandas DataFrame + -finaletoolkit.frag.delfi_gc_correct(windows: DataFrame, alpha: float = 0.75, it: int = 8, verbose: bool = False)# +finaletoolkit.frag.delfi_gc_correct(windows: DataFrame, alpha: float = 0.75, it: int = 8, verbose: bool = False)# Helper function that takes window data and performs GC adjustment. -finaletoolkit.frag.delfi_merge_bins(hundred_kb_bins: DataFrame, gc_corrected: bool = True, verbose: bool = False) → DataFrame# +finaletoolkit.frag.delfi_merge_bins(hundred_kb_bins: DataFrame, gc_corrected: bool = True, verbose: bool = False) → DataFrame# @@ -446,8 +479,7 @@ DELFI#< - - + @@ -486,8 +518,8 @@ DELFI#< - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - End-Motifs + End-Motifs @@ -354,10 +380,10 @@ -End-Motifs# +End-Motifs# -class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mer frequencies and contains methods to manipulate this data. @@ -372,7 +398,7 @@ End-Motifs -classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# +classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# Reads kmer frequency from a two-column tab-delimited file. Parameters: @@ -393,7 +419,7 @@ End-Motifs -motif_diversity_score() → float# +motif_diversity_score() → float# Calculates a motif diversity score (MDS) using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -401,7 +427,7 @@ End-Motifs -to_tsv(output_file: str | Path, sep: str = '\t')# +to_tsv(output_file: str | Path, sep: str = '\t')# Prints k-mer frequencies to a tsv @@ -409,7 +435,7 @@ End-Motifs -class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mers over user-specified intervals and contains methods to manipulate this data. @@ -426,7 +452,7 @@ End-Motifs -freq(kmer: str) → list[tuple[str, int, int, float]]# +freq(kmer: str) → list[tuple[str, int, int, float]]# Returns a list of intervals and associated frquency for given kmer. Results are in the form (chrom, 0-based start, 1-based stop, frequency). @@ -434,7 +460,7 @@ End-Motifs -classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',') → EndMotifFreqs# +classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',', header: int = 0) → EndMotifsIntervals# Reads kmer frequency from a tab-delimited file. Expected columns are contig, start, stop, name, count, (kmers). Because exporting to file includes an option to turn counts to a fraction, @@ -451,20 +477,20 @@ End-Motifskmer_freqs Return type: -EndMotifFreqs +EndMotifsIntervals -mds_bed(output_file: str | Path, sep: str = '\t')# +mds_bed(output_file: str | Path, sep: str = '\t')# Writes MDS for each interval to a bed/bedgraph file. -motif_diversity_score() → list[tuple[tuple, float]]# +motif_diversity_score() → list[tuple[tuple, float]]# Calculates a motif diversity score (MDS) for each interval using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -472,7 +498,7 @@ End-Motifs -to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to BED. Parameters: @@ -488,7 +514,7 @@ End-Motifs -to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to bedgraph. Parameters: @@ -504,7 +530,7 @@ End-Motifs -to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Writes all intervals and associated frquencies to file. Columns are contig, start, stop, name, count, (kmers). @@ -523,7 +549,7 @@ End-Motifs -finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# +finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# Function that reads fragments in the specified region from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif counts as a dictionary. This function @@ -541,9 +567,9 @@ End-MotifsReturns: @@ -557,7 +583,7 @@ End-Motifs -finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = False, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifFreqs# +finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifFreqs# Function that reads fragments from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif frequencies as a dictionary. Optionally writes data to a tsv. This @@ -569,10 +595,15 @@ End-MotifsReturns: @@ -586,7 +617,7 @@ End-Motifs -finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifsIntervals# +finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifsIntervals# Function that reads fragments from a BAM, SAM, or tabix indexed file and user-specified intervals and returns the 5’ k-mer (default is 4-mer) end motif. Optionally writes data to a tsv. @@ -601,7 +632,7 @@ End-MotifsReturns: @@ -650,8 +681,7 @@ End-Motifs - + @@ -706,8 +736,8 @@ End-Motifs - + + diff --git a/docs/_build/html/documentation/api_reference/fragfile.html b/docs/_build/html/documentation/api_reference/fragfile.html index fb2b7355..14a6975d 100644 --- a/docs/_build/html/documentation/api_reference/fragfile.html +++ b/docs/_build/html/documentation/api_reference/fragfile.html @@ -1,12 +1,13 @@ + - + - + Frag File Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Frag File Utilities + Frag File Utilities @@ -354,26 +380,27 @@ -Frag File Utilities# +Frag File Utilities# -finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, max_length: int | None = None, min_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False)# +finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, min_length: int | None = None, max_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False, fraction_low: int | None = None, fraction_high: int | None = None)# Accepts the path to a BAM file and creates a bam file where all reads are read1 in a proper pair, exceed the specified quality -threshold, do not intersect a region in the given blacklist -file, and intersects with a region in the region bed. +threshold, and intersects with a region in the region bed. Parameters: input_bam (str) – Path string or AlignmentFile pointing to the BAM file to be filtered. -region_file (str, option) -output_file (str, optional) -min_length (int, optional) -max_length (int, optional) -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +region_file (str, option) – +output_file (str, optional) – +min_length (int, optional) – +max_length (int, optional) – +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – +fraction_low (int, optional) – Deprecated alias for min_length +fraction_high (int, optional) – Deprecated alias for max_length Returns: @@ -387,7 +414,7 @@ Frag File Utilities -finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, strand_location: int = 5, verbose: bool = False)# +finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, verbose: bool = False)# Takes a BigWig and an interval BED and aggregates signal along the intervals with a median filter. For aggregating WPS signals, note that the median filter trims the @@ -406,13 +433,11 @@ Frag File Utilities Parameters: -input_file (str) -interval_file (str) -output_file (str) +input_file (str) – +interval_file (str) – BED file containing intervals. 6th column should have strand. +output_file (str) – median_window_size (int, optional) – default is 0 mean (bool) – use mean instead -strand_location (int) – which column (starting at 0) of the interval file contains the -strand. Default is 5. verbose (int or bool, optional) – default is False @@ -427,7 +452,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str, int]]# Reads chromosome names and sizes from a CHROMSIZE file into a list. Parameters: @@ -445,7 +470,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# Reads chromosome names and sizes from a CHROMSIZE file into a dict. Parameters: @@ -463,19 +488,19 @@ Frag File Utilities -finaletoolkit.utils.frag_generator(input_file: str | pysam.AlignmentFile | pysam.TabixFile | Path, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# +finaletoolkit.utils.frag_generator(input_file: FragFile, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = None, fraction_high: int = None, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# Reads from BAM, SAM, or BED file and returns tuples containing contig (chromosome), start, stop (end), mapq, and strand for each fragment. Optionally may filter for mapq, size, and intersection with a region. Parameters: -input_file (str, pathlike, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed +input_file (str, pathlike, TabixFile, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed bed.gz, or tabix-indexed FinaleDB fragment file. -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -485,7 +510,7 @@ Frag File UtilitiesReturns: @@ -501,17 +526,17 @@ Frag File Utilities -finaletoolkit.utils.frag_array(input_file: str | AlignmentFile | TabixFile | Path, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.frag_array(input_file: str | PathLike | AlignmentFile | TabixFile, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[ScalarType]]# Reads from BAM, SAM, or BED file and returns a three column matrix with fragment start and stop positions and strand. Parameters: -input_file (str or AlignmentFile) -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +input_file (str or AlignmentFile) – +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -522,7 +547,7 @@ Frag File UtilitiesReturns: @@ -540,7 +565,7 @@ Frag File Utilities -finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# +finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# Return True if the sequenced read described in read is not a properly paired read with a Phred score exceeding min_mapq. Based on epifluidlab/cofragr @@ -565,7 +590,7 @@ Frag File Utilities -finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[_ScalarType_co]], starts_1: ndarray[Any, dtype[_ScalarType_co]], stops_1: ndarray[Any, dtype[_ScalarType_co]], contigs_2: ndarray[Any, dtype[_ScalarType_co]], starts_2: ndarray[Any, dtype[_ScalarType_co]], stops_2: ndarray[Any, dtype[_ScalarType_co]]) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[ScalarType]], starts_1: ndarray[Any, dtype[ScalarType]], stops_1: ndarray[Any, dtype[ScalarType]], contigs_2: ndarray[Any, dtype[ScalarType]], starts_2: ndarray[Any, dtype[ScalarType]], stops_2: ndarray[Any, dtype[ScalarType]]) → ndarray[Any, dtype[ScalarType]]# Function that performs vectorized computation of overlaps. Returns an array of same shape as contig_1 that is true if the intervals for set 1 each have any overlap with an interval in set 2. @@ -608,8 +633,7 @@ Frag File Utilities - + @@ -653,8 +677,8 @@ Frag File Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/genomeutils.html b/docs/_build/html/documentation/api_reference/genomeutils.html index 7e139c45..1f363505 100644 --- a/docs/_build/html/documentation/api_reference/genomeutils.html +++ b/docs/_build/html/documentation/api_reference/genomeutils.html @@ -1,12 +1,13 @@ + - + - + Genome Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -48,7 +46,6 @@ - @@ -64,8 +61,19 @@ Back to top - - + + + + + + + + + @@ -73,6 +81,7 @@ Ctrl+K - - + + @@ -115,7 +124,7 @@ - + FinaleToolkit @@ -170,21 +179,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -193,11 +210,15 @@ - - - Search - Ctrl+K - + @@ -216,8 +237,7 @@ - - + @@ -271,11 +291,15 @@ - - - - - + @@ -321,6 +345,8 @@ + + @@ -335,7 +361,7 @@ API - Genome Utilities + Genome Utilities @@ -353,16 +379,16 @@ -Genome Utilities# +Genome Utilities# -class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# +class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# Reads telomere, centromere, and short_arm intervals from a bed file or generates these intervals from UCSC gap and centromere tracks for hg19 and hg38. -classmethod b37()# +classmethod b37()# Creates a GenomeGaps for the Broad Institute GRCh37 reference genome i.e b37. This reference genome is also based on GRCh37, but differs from the UCSC hg19 reference in a few ways, @@ -383,7 +409,7 @@ Genome Utilities -get_arm(contig: str, start: int, stop: int) → str# +get_arm(contig: str, start: int, stop: int) → str# Returns the chromosome arm the interval is in. If in the short arm of an acrocentric chromosome or intersects a centromere, returns an empty string. @@ -410,7 +436,7 @@ Genome Utilities -get_contig_gaps(contig: str) → ContigGaps# +get_contig_gaps(contig: str) → ContigGaps# Creates a ContigGaps for the specified chromosome Parameters: @@ -427,7 +453,7 @@ Genome Utilities -classmethod hg38()# +classmethod hg38()# Creates a GenomeGaps for the hg38 reference genome. This sequences uses chromosome names that start with ‘chr’ and is synonymous with the GRCh38 reference genome. @@ -437,7 +463,7 @@ Genome Utilities -in_tcmere(contig: str, start: int, stop: int) → bool# +in_tcmere(contig: str, start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere Parameters: @@ -458,7 +484,7 @@ Genome Utilities -overlaps_gap(contig: str, start: int, stop: int) → bool# +overlaps_gap(contig: str, start: int, stop: int) → bool# Checks if specified interval overlaps a gap interval Parameters: @@ -479,7 +505,7 @@ Genome Utilities -to_bed(output_file: str | PathLike)# +to_bed(output_file: str | PathLike)# Prints gap intervals in GenomeGaps to a BED4 file where the name is the type of gap interval. @@ -492,7 +518,7 @@ Genome Utilities -classmethod ucsc_hg19()# +classmethod ucsc_hg19()# Creates a GenomeGaps for the UCSC hg19 reference genome. This sequences uses chromosome names that start with ‘chr’ and is based on a version of the GRCh37 reference genome. @@ -510,10 +536,10 @@ Genome Utilities -class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# +class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# -get_arm(start: int, stop: int)# +get_arm(start: int, stop: int)# Returns name of chromosome arm the interval is in. Returns “NOARM” if in a centromere, telomere, or short arm of an acrocentric chromosome. @@ -538,7 +564,7 @@ Genome Utilities -in_gap(start: int, stop: int) → bool# +in_gap(start: int, stop: int) → bool# Checks if specified interval is in a gap. Parameters: @@ -558,7 +584,7 @@ Genome Utilities -in_tcmere(start: int, stop: int) → bool# +in_tcmere(start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere. Parameters: @@ -580,7 +606,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg19 reference sequence. @@ -592,7 +618,7 @@ Genome Utilities -finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the Broad Institute GRCh37 (b37) reference sequence. Also useful for files aligned to human_g1k_v37 (1000 Genomes Project). @@ -605,7 +631,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg38 reference sequence. @@ -643,8 +669,7 @@ Genome Utilities - + @@ -700,8 +725,8 @@ Genome Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/index.html b/docs/_build/html/documentation/api_reference/index.html index 7cf5852d..cc84fce6 100644 --- a/docs/_build/html/documentation/api_reference/index.html +++ b/docs/_build/html/documentation/api_reference/index.html @@ -1,12 +1,13 @@ + - + - + API — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -333,7 +359,7 @@ FinaleToolkit Documentation - API + API @@ -351,7 +377,7 @@ -API# +API# Basic Features @@ -442,8 +468,7 @@ API# - - + @@ -468,8 +493,8 @@ API# - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Window Protection Score (WPS) + Window... @@ -354,10 +380,10 @@ -Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-DELFI# +DELFI# -finaletoolkit.frag.delfi(input_file: str, autosomes: str, bins_file: str, reference_file: str, blacklist_file: str | None = None, gap_file: str | GenomeGaps | None = None, output_file: str | None = None, gc_correct: bool = True, remove_nocov: bool = True, merge_bins: bool = True, window_size: int = 5000000, quality_threshold: int = 30, workers: int = 1, verbose: int | bool = False) → DataFrame# +finaletoolkit.frag.delfi(input_file: str, chrom_sizes: str, bins_file: str, reference_file: str, blacklist_file: str | None = None, gap_file: str | GenomeGaps | None = None, output_file: str | None = None, gc_correct: bool = True, remove_nocov: bool = True, merge_bins: bool = True, window_size: int = 5000000, quality_threshold: int = 30, workers: int = 1, verbose: int | bool = False) → DataFrame# A function that replicates the methodology of Christiano et al (2019). @@ -365,11 +391,11 @@ DELFI#< input_file (str) – Path string pointing to a bam file containing PE fragment reads. -autosomes (str) – Path string to a chrom.sizes file containing only autosomal +chrom_sizes (str) – Path string to a chrom.sizes file containing only autosomal chromosomes bins_file (str) – Path string to a BED file containing 100kb bins for reference genome of choice. -reference_file (str) – Path string to .2bit file for reference genoe. +reference_file (str) – Path string to .2bit file for reference genome. gap_file (str or GenomeGaps) – Specifies locations of telomeres and centromeres for reference genome. There are three options: - Path string to a BED4+ file where each interval is a @@ -395,18 +421,25 @@ DELFI#< stdout. Default is False. +Returns: +Results of delfi analysis, with column names corresponding to +those generated by the original author’s scripts. + +Return type: +pandas DataFrame + -finaletoolkit.frag.delfi_gc_correct(windows: DataFrame, alpha: float = 0.75, it: int = 8, verbose: bool = False)# +finaletoolkit.frag.delfi_gc_correct(windows: DataFrame, alpha: float = 0.75, it: int = 8, verbose: bool = False)# Helper function that takes window data and performs GC adjustment. -finaletoolkit.frag.delfi_merge_bins(hundred_kb_bins: DataFrame, gc_corrected: bool = True, verbose: bool = False) → DataFrame# +finaletoolkit.frag.delfi_merge_bins(hundred_kb_bins: DataFrame, gc_corrected: bool = True, verbose: bool = False) → DataFrame# @@ -446,8 +479,7 @@ DELFI#< - - + @@ -486,8 +518,8 @@ DELFI#< - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - End-Motifs + End-Motifs @@ -354,10 +380,10 @@ -End-Motifs# +End-Motifs# -class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mer frequencies and contains methods to manipulate this data. @@ -372,7 +398,7 @@ End-Motifs -classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# +classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# Reads kmer frequency from a two-column tab-delimited file. Parameters: @@ -393,7 +419,7 @@ End-Motifs -motif_diversity_score() → float# +motif_diversity_score() → float# Calculates a motif diversity score (MDS) using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -401,7 +427,7 @@ End-Motifs -to_tsv(output_file: str | Path, sep: str = '\t')# +to_tsv(output_file: str | Path, sep: str = '\t')# Prints k-mer frequencies to a tsv @@ -409,7 +435,7 @@ End-Motifs -class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mers over user-specified intervals and contains methods to manipulate this data. @@ -426,7 +452,7 @@ End-Motifs -freq(kmer: str) → list[tuple[str, int, int, float]]# +freq(kmer: str) → list[tuple[str, int, int, float]]# Returns a list of intervals and associated frquency for given kmer. Results are in the form (chrom, 0-based start, 1-based stop, frequency). @@ -434,7 +460,7 @@ End-Motifs -classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',') → EndMotifFreqs# +classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',', header: int = 0) → EndMotifsIntervals# Reads kmer frequency from a tab-delimited file. Expected columns are contig, start, stop, name, count, (kmers). Because exporting to file includes an option to turn counts to a fraction, @@ -451,20 +477,20 @@ End-Motifskmer_freqs Return type: -EndMotifFreqs +EndMotifsIntervals -mds_bed(output_file: str | Path, sep: str = '\t')# +mds_bed(output_file: str | Path, sep: str = '\t')# Writes MDS for each interval to a bed/bedgraph file. -motif_diversity_score() → list[tuple[tuple, float]]# +motif_diversity_score() → list[tuple[tuple, float]]# Calculates a motif diversity score (MDS) for each interval using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -472,7 +498,7 @@ End-Motifs -to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to BED. Parameters: @@ -488,7 +514,7 @@ End-Motifs -to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to bedgraph. Parameters: @@ -504,7 +530,7 @@ End-Motifs -to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Writes all intervals and associated frquencies to file. Columns are contig, start, stop, name, count, (kmers). @@ -523,7 +549,7 @@ End-Motifs -finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# +finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# Function that reads fragments in the specified region from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif counts as a dictionary. This function @@ -541,9 +567,9 @@ End-MotifsReturns: @@ -557,7 +583,7 @@ End-Motifs -finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = False, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifFreqs# +finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifFreqs# Function that reads fragments from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif frequencies as a dictionary. Optionally writes data to a tsv. This @@ -569,10 +595,15 @@ End-MotifsReturns: @@ -586,7 +617,7 @@ End-Motifs -finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifsIntervals# +finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifsIntervals# Function that reads fragments from a BAM, SAM, or tabix indexed file and user-specified intervals and returns the 5’ k-mer (default is 4-mer) end motif. Optionally writes data to a tsv. @@ -601,7 +632,7 @@ End-MotifsReturns: @@ -650,8 +681,7 @@ End-Motifs - + @@ -706,8 +736,8 @@ End-Motifs - + + diff --git a/docs/_build/html/documentation/api_reference/fragfile.html b/docs/_build/html/documentation/api_reference/fragfile.html index fb2b7355..14a6975d 100644 --- a/docs/_build/html/documentation/api_reference/fragfile.html +++ b/docs/_build/html/documentation/api_reference/fragfile.html @@ -1,12 +1,13 @@ + - + - + Frag File Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Frag File Utilities + Frag File Utilities @@ -354,26 +380,27 @@ -Frag File Utilities# +Frag File Utilities# -finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, max_length: int | None = None, min_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False)# +finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, min_length: int | None = None, max_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False, fraction_low: int | None = None, fraction_high: int | None = None)# Accepts the path to a BAM file and creates a bam file where all reads are read1 in a proper pair, exceed the specified quality -threshold, do not intersect a region in the given blacklist -file, and intersects with a region in the region bed. +threshold, and intersects with a region in the region bed. Parameters: input_bam (str) – Path string or AlignmentFile pointing to the BAM file to be filtered. -region_file (str, option) -output_file (str, optional) -min_length (int, optional) -max_length (int, optional) -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +region_file (str, option) – +output_file (str, optional) – +min_length (int, optional) – +max_length (int, optional) – +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – +fraction_low (int, optional) – Deprecated alias for min_length +fraction_high (int, optional) – Deprecated alias for max_length Returns: @@ -387,7 +414,7 @@ Frag File Utilities -finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, strand_location: int = 5, verbose: bool = False)# +finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, verbose: bool = False)# Takes a BigWig and an interval BED and aggregates signal along the intervals with a median filter. For aggregating WPS signals, note that the median filter trims the @@ -406,13 +433,11 @@ Frag File Utilities Parameters: -input_file (str) -interval_file (str) -output_file (str) +input_file (str) – +interval_file (str) – BED file containing intervals. 6th column should have strand. +output_file (str) – median_window_size (int, optional) – default is 0 mean (bool) – use mean instead -strand_location (int) – which column (starting at 0) of the interval file contains the -strand. Default is 5. verbose (int or bool, optional) – default is False @@ -427,7 +452,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str, int]]# Reads chromosome names and sizes from a CHROMSIZE file into a list. Parameters: @@ -445,7 +470,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# Reads chromosome names and sizes from a CHROMSIZE file into a dict. Parameters: @@ -463,19 +488,19 @@ Frag File Utilities -finaletoolkit.utils.frag_generator(input_file: str | pysam.AlignmentFile | pysam.TabixFile | Path, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# +finaletoolkit.utils.frag_generator(input_file: FragFile, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = None, fraction_high: int = None, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# Reads from BAM, SAM, or BED file and returns tuples containing contig (chromosome), start, stop (end), mapq, and strand for each fragment. Optionally may filter for mapq, size, and intersection with a region. Parameters: -input_file (str, pathlike, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed +input_file (str, pathlike, TabixFile, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed bed.gz, or tabix-indexed FinaleDB fragment file. -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -485,7 +510,7 @@ Frag File UtilitiesReturns: @@ -501,17 +526,17 @@ Frag File Utilities -finaletoolkit.utils.frag_array(input_file: str | AlignmentFile | TabixFile | Path, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.frag_array(input_file: str | PathLike | AlignmentFile | TabixFile, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[ScalarType]]# Reads from BAM, SAM, or BED file and returns a three column matrix with fragment start and stop positions and strand. Parameters: -input_file (str or AlignmentFile) -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +input_file (str or AlignmentFile) – +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -522,7 +547,7 @@ Frag File UtilitiesReturns: @@ -540,7 +565,7 @@ Frag File Utilities -finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# +finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# Return True if the sequenced read described in read is not a properly paired read with a Phred score exceeding min_mapq. Based on epifluidlab/cofragr @@ -565,7 +590,7 @@ Frag File Utilities -finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[_ScalarType_co]], starts_1: ndarray[Any, dtype[_ScalarType_co]], stops_1: ndarray[Any, dtype[_ScalarType_co]], contigs_2: ndarray[Any, dtype[_ScalarType_co]], starts_2: ndarray[Any, dtype[_ScalarType_co]], stops_2: ndarray[Any, dtype[_ScalarType_co]]) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[ScalarType]], starts_1: ndarray[Any, dtype[ScalarType]], stops_1: ndarray[Any, dtype[ScalarType]], contigs_2: ndarray[Any, dtype[ScalarType]], starts_2: ndarray[Any, dtype[ScalarType]], stops_2: ndarray[Any, dtype[ScalarType]]) → ndarray[Any, dtype[ScalarType]]# Function that performs vectorized computation of overlaps. Returns an array of same shape as contig_1 that is true if the intervals for set 1 each have any overlap with an interval in set 2. @@ -608,8 +633,7 @@ Frag File Utilities - + @@ -653,8 +677,8 @@ Frag File Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/genomeutils.html b/docs/_build/html/documentation/api_reference/genomeutils.html index 7e139c45..1f363505 100644 --- a/docs/_build/html/documentation/api_reference/genomeutils.html +++ b/docs/_build/html/documentation/api_reference/genomeutils.html @@ -1,12 +1,13 @@ + - + - + Genome Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -48,7 +46,6 @@ - @@ -64,8 +61,19 @@ Back to top - - + + + + + + + + + @@ -73,6 +81,7 @@ Ctrl+K - - + + @@ -115,7 +124,7 @@ - + FinaleToolkit @@ -170,21 +179,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -193,11 +210,15 @@ - - - Search - Ctrl+K - + @@ -216,8 +237,7 @@ - - + @@ -271,11 +291,15 @@ - - - - - + @@ -321,6 +345,8 @@ + + @@ -335,7 +361,7 @@ API - Genome Utilities + Genome Utilities @@ -353,16 +379,16 @@ -Genome Utilities# +Genome Utilities# -class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# +class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# Reads telomere, centromere, and short_arm intervals from a bed file or generates these intervals from UCSC gap and centromere tracks for hg19 and hg38. -classmethod b37()# +classmethod b37()# Creates a GenomeGaps for the Broad Institute GRCh37 reference genome i.e b37. This reference genome is also based on GRCh37, but differs from the UCSC hg19 reference in a few ways, @@ -383,7 +409,7 @@ Genome Utilities -get_arm(contig: str, start: int, stop: int) → str# +get_arm(contig: str, start: int, stop: int) → str# Returns the chromosome arm the interval is in. If in the short arm of an acrocentric chromosome or intersects a centromere, returns an empty string. @@ -410,7 +436,7 @@ Genome Utilities -get_contig_gaps(contig: str) → ContigGaps# +get_contig_gaps(contig: str) → ContigGaps# Creates a ContigGaps for the specified chromosome Parameters: @@ -427,7 +453,7 @@ Genome Utilities -classmethod hg38()# +classmethod hg38()# Creates a GenomeGaps for the hg38 reference genome. This sequences uses chromosome names that start with ‘chr’ and is synonymous with the GRCh38 reference genome. @@ -437,7 +463,7 @@ Genome Utilities -in_tcmere(contig: str, start: int, stop: int) → bool# +in_tcmere(contig: str, start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere Parameters: @@ -458,7 +484,7 @@ Genome Utilities -overlaps_gap(contig: str, start: int, stop: int) → bool# +overlaps_gap(contig: str, start: int, stop: int) → bool# Checks if specified interval overlaps a gap interval Parameters: @@ -479,7 +505,7 @@ Genome Utilities -to_bed(output_file: str | PathLike)# +to_bed(output_file: str | PathLike)# Prints gap intervals in GenomeGaps to a BED4 file where the name is the type of gap interval. @@ -492,7 +518,7 @@ Genome Utilities -classmethod ucsc_hg19()# +classmethod ucsc_hg19()# Creates a GenomeGaps for the UCSC hg19 reference genome. This sequences uses chromosome names that start with ‘chr’ and is based on a version of the GRCh37 reference genome. @@ -510,10 +536,10 @@ Genome Utilities -class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# +class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# -get_arm(start: int, stop: int)# +get_arm(start: int, stop: int)# Returns name of chromosome arm the interval is in. Returns “NOARM” if in a centromere, telomere, or short arm of an acrocentric chromosome. @@ -538,7 +564,7 @@ Genome Utilities -in_gap(start: int, stop: int) → bool# +in_gap(start: int, stop: int) → bool# Checks if specified interval is in a gap. Parameters: @@ -558,7 +584,7 @@ Genome Utilities -in_tcmere(start: int, stop: int) → bool# +in_tcmere(start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere. Parameters: @@ -580,7 +606,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg19 reference sequence. @@ -592,7 +618,7 @@ Genome Utilities -finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the Broad Institute GRCh37 (b37) reference sequence. Also useful for files aligned to human_g1k_v37 (1000 Genomes Project). @@ -605,7 +631,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg38 reference sequence. @@ -643,8 +669,7 @@ Genome Utilities - + @@ -700,8 +725,8 @@ Genome Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/index.html b/docs/_build/html/documentation/api_reference/index.html index 7cf5852d..cc84fce6 100644 --- a/docs/_build/html/documentation/api_reference/index.html +++ b/docs/_build/html/documentation/api_reference/index.html @@ -1,12 +1,13 @@ + - + - + API — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -333,7 +359,7 @@ FinaleToolkit Documentation - API + API @@ -351,7 +377,7 @@ -API# +API# Basic Features @@ -442,8 +468,7 @@ API# - - + @@ -468,8 +493,8 @@ API# - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Window Protection Score (WPS) + Window... @@ -354,10 +380,10 @@ -Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-End-Motifs# +End-Motifs# -class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifFreqs(kmer_frequencies: Iterable[tuple[str, float]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mer frequencies and contains methods to manipulate this data. @@ -372,7 +398,7 @@ End-Motifs -classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# +classmethod from_file(file_path: str | Path, quality_threshold: int, sep: str = '\t', header: int = 0) → EndMotifFreqs# Reads kmer frequency from a two-column tab-delimited file. Parameters: @@ -393,7 +419,7 @@ End-Motifs -motif_diversity_score() → float# +motif_diversity_score() → float# Calculates a motif diversity score (MDS) using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -401,7 +427,7 @@ End-Motifs -to_tsv(output_file: str | Path, sep: str = '\t')# +to_tsv(output_file: str | Path, sep: str = '\t')# Prints k-mer frequencies to a tsv @@ -409,7 +435,7 @@ End-Motifs -class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# +class finaletoolkit.frag.EndMotifsIntervals(intervals: Iterable[tuple[tuple, dict]], k: int, quality_threshold: int = 20)# Class that stores frequencies of end-motif k-mers over user-specified intervals and contains methods to manipulate this data. @@ -426,7 +452,7 @@ End-Motifs -freq(kmer: str) → list[tuple[str, int, int, float]]# +freq(kmer: str) → list[tuple[str, int, int, float]]# Returns a list of intervals and associated frquency for given kmer. Results are in the form (chrom, 0-based start, 1-based stop, frequency). @@ -434,7 +460,7 @@ End-Motifs -classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',') → EndMotifFreqs# +classmethod from_file(file_path: str, quality_threshold: int, sep: str = ',', header: int = 0) → EndMotifsIntervals# Reads kmer frequency from a tab-delimited file. Expected columns are contig, start, stop, name, count, (kmers). Because exporting to file includes an option to turn counts to a fraction, @@ -451,20 +477,20 @@ End-Motifskmer_freqs Return type: -EndMotifFreqs +EndMotifsIntervals -mds_bed(output_file: str | Path, sep: str = '\t')# +mds_bed(output_file: str | Path, sep: str = '\t')# Writes MDS for each interval to a bed/bedgraph file. -motif_diversity_score() → list[tuple[tuple, float]]# +motif_diversity_score() → list[tuple[tuple, float]]# Calculates a motif diversity score (MDS) for each interval using normalized Shannon entropy as described by Jiang et al (2020). This function is generalized for any k instead of just 4-mers. @@ -472,7 +498,7 @@ End-Motifs -to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bed(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to BED. Parameters: @@ -488,7 +514,7 @@ End-Motifs -to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_bedgraph(kmer: str, output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Take frequency of specified kmer and writes to bedgraph. Parameters: @@ -504,7 +530,7 @@ End-Motifs -to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# +to_tsv(output_file: str | Path, calc_freq: bool = True, sep: str = '\t')# Writes all intervals and associated frquencies to file. Columns are contig, start, stop, name, count, (kmers). @@ -523,7 +549,7 @@ End-Motifs -finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# +finaletoolkit.frag.region_end_motifs(input_file: str, contig: str, start: int, stop: int, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 20, verbose: bool | int = False) → dict# Function that reads fragments in the specified region from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif counts as a dictionary. This function @@ -541,9 +567,9 @@ End-MotifsReturns: @@ -557,7 +583,7 @@ End-Motifs -finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = False, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifFreqs# +finaletoolkit.frag.end_motifs(input_file: str, refseq_file: str | Path, k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifFreqs# Function that reads fragments from a BAM, SAM, or tabix indexed file and returns the 5’ k-mer (default is 4-mer) end motif frequencies as a dictionary. Optionally writes data to a tsv. This @@ -569,10 +595,15 @@ End-MotifsReturns: @@ -586,7 +617,7 @@ End-Motifs -finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, fraction_low: int = 10, fraction_high: int = 600, both_strands: bool = True, output_file: None | str = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False) → EndMotifsIntervals# +finaletoolkit.frag.interval_end_motifs(input_file: str, refseq_file: str | Path, intervals: str | Iterable[tuple[str, int, int, str]], k: int = 4, min_length: int = 10, max_length: int = 600, both_strands: bool = True, output_file: str | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = False, fraction_low: int | None = None, fraction_high: int | None = None) → EndMotifsIntervals# Function that reads fragments from a BAM, SAM, or tabix indexed file and user-specified intervals and returns the 5’ k-mer (default is 4-mer) end motif. Optionally writes data to a tsv. @@ -601,7 +632,7 @@ End-MotifsReturns: @@ -650,8 +681,7 @@ End-Motifs - + @@ -706,8 +736,8 @@ End-Motifs - + + diff --git a/docs/_build/html/documentation/api_reference/fragfile.html b/docs/_build/html/documentation/api_reference/fragfile.html index fb2b7355..14a6975d 100644 --- a/docs/_build/html/documentation/api_reference/fragfile.html +++ b/docs/_build/html/documentation/api_reference/fragfile.html @@ -1,12 +1,13 @@ + - + - + Frag File Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Frag File Utilities + Frag File Utilities @@ -354,26 +380,27 @@ -Frag File Utilities# +Frag File Utilities# -finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, max_length: int | None = None, min_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False)# +finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, min_length: int | None = None, max_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False, fraction_low: int | None = None, fraction_high: int | None = None)# Accepts the path to a BAM file and creates a bam file where all reads are read1 in a proper pair, exceed the specified quality -threshold, do not intersect a region in the given blacklist -file, and intersects with a region in the region bed. +threshold, and intersects with a region in the region bed. Parameters: input_bam (str) – Path string or AlignmentFile pointing to the BAM file to be filtered. -region_file (str, option) -output_file (str, optional) -min_length (int, optional) -max_length (int, optional) -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +region_file (str, option) – +output_file (str, optional) – +min_length (int, optional) – +max_length (int, optional) – +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – +fraction_low (int, optional) – Deprecated alias for min_length +fraction_high (int, optional) – Deprecated alias for max_length Returns: @@ -387,7 +414,7 @@ Frag File Utilities -finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, strand_location: int = 5, verbose: bool = False)# +finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, verbose: bool = False)# Takes a BigWig and an interval BED and aggregates signal along the intervals with a median filter. For aggregating WPS signals, note that the median filter trims the @@ -406,13 +433,11 @@ Frag File Utilities Parameters: -input_file (str) -interval_file (str) -output_file (str) +input_file (str) – +interval_file (str) – BED file containing intervals. 6th column should have strand. +output_file (str) – median_window_size (int, optional) – default is 0 mean (bool) – use mean instead -strand_location (int) – which column (starting at 0) of the interval file contains the -strand. Default is 5. verbose (int or bool, optional) – default is False @@ -427,7 +452,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str, int]]# Reads chromosome names and sizes from a CHROMSIZE file into a list. Parameters: @@ -445,7 +470,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# Reads chromosome names and sizes from a CHROMSIZE file into a dict. Parameters: @@ -463,19 +488,19 @@ Frag File Utilities -finaletoolkit.utils.frag_generator(input_file: str | pysam.AlignmentFile | pysam.TabixFile | Path, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# +finaletoolkit.utils.frag_generator(input_file: FragFile, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = None, fraction_high: int = None, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# Reads from BAM, SAM, or BED file and returns tuples containing contig (chromosome), start, stop (end), mapq, and strand for each fragment. Optionally may filter for mapq, size, and intersection with a region. Parameters: -input_file (str, pathlike, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed +input_file (str, pathlike, TabixFile, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed bed.gz, or tabix-indexed FinaleDB fragment file. -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -485,7 +510,7 @@ Frag File UtilitiesReturns: @@ -501,17 +526,17 @@ Frag File Utilities -finaletoolkit.utils.frag_array(input_file: str | AlignmentFile | TabixFile | Path, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.frag_array(input_file: str | PathLike | AlignmentFile | TabixFile, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[ScalarType]]# Reads from BAM, SAM, or BED file and returns a three column matrix with fragment start and stop positions and strand. Parameters: -input_file (str or AlignmentFile) -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +input_file (str or AlignmentFile) – +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -522,7 +547,7 @@ Frag File UtilitiesReturns: @@ -540,7 +565,7 @@ Frag File Utilities -finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# +finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# Return True if the sequenced read described in read is not a properly paired read with a Phred score exceeding min_mapq. Based on epifluidlab/cofragr @@ -565,7 +590,7 @@ Frag File Utilities -finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[_ScalarType_co]], starts_1: ndarray[Any, dtype[_ScalarType_co]], stops_1: ndarray[Any, dtype[_ScalarType_co]], contigs_2: ndarray[Any, dtype[_ScalarType_co]], starts_2: ndarray[Any, dtype[_ScalarType_co]], stops_2: ndarray[Any, dtype[_ScalarType_co]]) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[ScalarType]], starts_1: ndarray[Any, dtype[ScalarType]], stops_1: ndarray[Any, dtype[ScalarType]], contigs_2: ndarray[Any, dtype[ScalarType]], starts_2: ndarray[Any, dtype[ScalarType]], stops_2: ndarray[Any, dtype[ScalarType]]) → ndarray[Any, dtype[ScalarType]]# Function that performs vectorized computation of overlaps. Returns an array of same shape as contig_1 that is true if the intervals for set 1 each have any overlap with an interval in set 2. @@ -608,8 +633,7 @@ Frag File Utilities - + @@ -653,8 +677,8 @@ Frag File Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/genomeutils.html b/docs/_build/html/documentation/api_reference/genomeutils.html index 7e139c45..1f363505 100644 --- a/docs/_build/html/documentation/api_reference/genomeutils.html +++ b/docs/_build/html/documentation/api_reference/genomeutils.html @@ -1,12 +1,13 @@ + - + - + Genome Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -48,7 +46,6 @@ - @@ -64,8 +61,19 @@ Back to top - - + + + + + + + + + @@ -73,6 +81,7 @@ Ctrl+K - - + + @@ -115,7 +124,7 @@ - + FinaleToolkit @@ -170,21 +179,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -193,11 +210,15 @@ - - - Search - Ctrl+K - + @@ -216,8 +237,7 @@ - - + @@ -271,11 +291,15 @@ - - - - - + @@ -321,6 +345,8 @@ + + @@ -335,7 +361,7 @@ API - Genome Utilities + Genome Utilities @@ -353,16 +379,16 @@ -Genome Utilities# +Genome Utilities# -class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# +class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# Reads telomere, centromere, and short_arm intervals from a bed file or generates these intervals from UCSC gap and centromere tracks for hg19 and hg38. -classmethod b37()# +classmethod b37()# Creates a GenomeGaps for the Broad Institute GRCh37 reference genome i.e b37. This reference genome is also based on GRCh37, but differs from the UCSC hg19 reference in a few ways, @@ -383,7 +409,7 @@ Genome Utilities -get_arm(contig: str, start: int, stop: int) → str# +get_arm(contig: str, start: int, stop: int) → str# Returns the chromosome arm the interval is in. If in the short arm of an acrocentric chromosome or intersects a centromere, returns an empty string. @@ -410,7 +436,7 @@ Genome Utilities -get_contig_gaps(contig: str) → ContigGaps# +get_contig_gaps(contig: str) → ContigGaps# Creates a ContigGaps for the specified chromosome Parameters: @@ -427,7 +453,7 @@ Genome Utilities -classmethod hg38()# +classmethod hg38()# Creates a GenomeGaps for the hg38 reference genome. This sequences uses chromosome names that start with ‘chr’ and is synonymous with the GRCh38 reference genome. @@ -437,7 +463,7 @@ Genome Utilities -in_tcmere(contig: str, start: int, stop: int) → bool# +in_tcmere(contig: str, start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere Parameters: @@ -458,7 +484,7 @@ Genome Utilities -overlaps_gap(contig: str, start: int, stop: int) → bool# +overlaps_gap(contig: str, start: int, stop: int) → bool# Checks if specified interval overlaps a gap interval Parameters: @@ -479,7 +505,7 @@ Genome Utilities -to_bed(output_file: str | PathLike)# +to_bed(output_file: str | PathLike)# Prints gap intervals in GenomeGaps to a BED4 file where the name is the type of gap interval. @@ -492,7 +518,7 @@ Genome Utilities -classmethod ucsc_hg19()# +classmethod ucsc_hg19()# Creates a GenomeGaps for the UCSC hg19 reference genome. This sequences uses chromosome names that start with ‘chr’ and is based on a version of the GRCh37 reference genome. @@ -510,10 +536,10 @@ Genome Utilities -class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# +class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# -get_arm(start: int, stop: int)# +get_arm(start: int, stop: int)# Returns name of chromosome arm the interval is in. Returns “NOARM” if in a centromere, telomere, or short arm of an acrocentric chromosome. @@ -538,7 +564,7 @@ Genome Utilities -in_gap(start: int, stop: int) → bool# +in_gap(start: int, stop: int) → bool# Checks if specified interval is in a gap. Parameters: @@ -558,7 +584,7 @@ Genome Utilities -in_tcmere(start: int, stop: int) → bool# +in_tcmere(start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere. Parameters: @@ -580,7 +606,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg19 reference sequence. @@ -592,7 +618,7 @@ Genome Utilities -finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the Broad Institute GRCh37 (b37) reference sequence. Also useful for files aligned to human_g1k_v37 (1000 Genomes Project). @@ -605,7 +631,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg38 reference sequence. @@ -643,8 +669,7 @@ Genome Utilities - + @@ -700,8 +725,8 @@ Genome Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/index.html b/docs/_build/html/documentation/api_reference/index.html index 7cf5852d..cc84fce6 100644 --- a/docs/_build/html/documentation/api_reference/index.html +++ b/docs/_build/html/documentation/api_reference/index.html @@ -1,12 +1,13 @@ + - + - + API — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -333,7 +359,7 @@ FinaleToolkit Documentation - API + API @@ -351,7 +377,7 @@ -API# +API# Basic Features @@ -442,8 +468,7 @@ API# - - + @@ -468,8 +493,8 @@ API# - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Window Protection Score (WPS) + Window... @@ -354,10 +380,10 @@ -Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-Frag File Utilities# +Frag File Utilities# -finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, max_length: int | None = None, min_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False)# +finaletoolkit.utils.filter_bam(input_file: str, region_file: str | None = None, output_file: str | None = None, min_length: int | None = None, max_length: int | None = None, quality_threshold: int = 30, workers: int = 1, verbose: bool = False, fraction_low: int | None = None, fraction_high: int | None = None)# Accepts the path to a BAM file and creates a bam file where all reads are read1 in a proper pair, exceed the specified quality -threshold, do not intersect a region in the given blacklist -file, and intersects with a region in the region bed. +threshold, and intersects with a region in the region bed. Parameters: input_bam (str) – Path string or AlignmentFile pointing to the BAM file to be filtered. -region_file (str, option) -output_file (str, optional) -min_length (int, optional) -max_length (int, optional) -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +region_file (str, option) – +output_file (str, optional) – +min_length (int, optional) – +max_length (int, optional) – +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – +fraction_low (int, optional) – Deprecated alias for min_length +fraction_high (int, optional) – Deprecated alias for max_length Returns: @@ -387,7 +414,7 @@ Frag File Utilities -finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, strand_location: int = 5, verbose: bool = False)# +finaletoolkit.utils.agg_bw(input_file: str | PathLike, interval_file: str | PathLike, output_file: str | PathLike, median_window_size: int = 120, mean: bool = False, verbose: bool = False)# Takes a BigWig and an interval BED and aggregates signal along the intervals with a median filter. For aggregating WPS signals, note that the median filter trims the @@ -406,13 +433,11 @@ Frag File Utilities Parameters: -input_file (str) -interval_file (str) -output_file (str) +input_file (str) – +interval_file (str) – BED file containing intervals. 6th column should have strand. +output_file (str) – median_window_size (int, optional) – default is 0 mean (bool) – use mean instead -strand_location (int) – which column (starting at 0) of the interval file contains the -strand. Default is 5. verbose (int or bool, optional) – default is False @@ -427,7 +452,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_list(chrom_sizes_file: str | Path) → list[tuple[str, int]]# Reads chromosome names and sizes from a CHROMSIZE file into a list. Parameters: @@ -445,7 +470,7 @@ Frag File Utilities -finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# +finaletoolkit.utils.chrom_sizes_to_dict(chrom_sizes_file: str | Path) → list[tuple[str][int]]# Reads chromosome names and sizes from a CHROMSIZE file into a dict. Parameters: @@ -463,19 +488,19 @@ Frag File Utilities -finaletoolkit.utils.frag_generator(input_file: str | pysam.AlignmentFile | pysam.TabixFile | Path, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# +finaletoolkit.utils.frag_generator(input_file: FragFile, contig: str | None, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = None, fraction_high: int = None, intersect_policy: str = 'midpoint', verbose: bool = False) → Generator[tuple]# Reads from BAM, SAM, or BED file and returns tuples containing contig (chromosome), start, stop (end), mapq, and strand for each fragment. Optionally may filter for mapq, size, and intersection with a region. Parameters: -input_file (str, pathlike, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed +input_file (str, pathlike, TabixFile, or AlignmentFile) – Fragment coordinates stored as a SAM, BAM, CRAM, tabix-indexed bed.gz, or tabix-indexed FinaleDB fragment file. -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -485,7 +510,7 @@ Frag File UtilitiesReturns: @@ -501,17 +526,17 @@ Frag File Utilities -finaletoolkit.utils.frag_array(input_file: str | AlignmentFile | TabixFile | Path, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.frag_array(input_file: str | PathLike | AlignmentFile | TabixFile, contig: str, quality_threshold: int = 30, start: int | None = None, stop: int | None = None, fraction_low: int = 120, fraction_high: int = 180, intersect_policy: str = 'midpoint', verbose: bool = False) → ndarray[Any, dtype[ScalarType]]# Reads from BAM, SAM, or BED file and returns a three column matrix with fragment start and stop positions and strand. Parameters: -input_file (str or AlignmentFile) -contig (str) -quality_threshold (int, optional) -start (int, optional) -stop (int, optional) +input_file (str or AlignmentFile) – +contig (str) – +quality_threshold (int, optional) – +start (int, optional) – +stop (int, optional) – fraction_low (int, optional) – Specifies lowest fragment length included in array. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in array. Default is @@ -522,7 +547,7 @@ Frag File UtilitiesReturns: @@ -540,7 +565,7 @@ Frag File Utilities -finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# +finaletoolkit.utils.low_quality_read_pairs(read, min_mapq=30)# Return True if the sequenced read described in read is not a properly paired read with a Phred score exceeding min_mapq. Based on epifluidlab/cofragr @@ -565,7 +590,7 @@ Frag File Utilities -finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[_ScalarType_co]], starts_1: ndarray[Any, dtype[_ScalarType_co]], stops_1: ndarray[Any, dtype[_ScalarType_co]], contigs_2: ndarray[Any, dtype[_ScalarType_co]], starts_2: ndarray[Any, dtype[_ScalarType_co]], stops_2: ndarray[Any, dtype[_ScalarType_co]]) → ndarray[Any, dtype[_ScalarType_co]]# +finaletoolkit.utils.overlaps(contigs_1: ndarray[Any, dtype[ScalarType]], starts_1: ndarray[Any, dtype[ScalarType]], stops_1: ndarray[Any, dtype[ScalarType]], contigs_2: ndarray[Any, dtype[ScalarType]], starts_2: ndarray[Any, dtype[ScalarType]], stops_2: ndarray[Any, dtype[ScalarType]]) → ndarray[Any, dtype[ScalarType]]# Function that performs vectorized computation of overlaps. Returns an array of same shape as contig_1 that is true if the intervals for set 1 each have any overlap with an interval in set 2. @@ -608,8 +633,7 @@ Frag File Utilities - + @@ -653,8 +677,8 @@ Frag File Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/genomeutils.html b/docs/_build/html/documentation/api_reference/genomeutils.html index 7e139c45..1f363505 100644 --- a/docs/_build/html/documentation/api_reference/genomeutils.html +++ b/docs/_build/html/documentation/api_reference/genomeutils.html @@ -1,12 +1,13 @@ + - + - + Genome Utilities — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -48,7 +46,6 @@ - @@ -64,8 +61,19 @@ Back to top - - + + + + + + + + + @@ -73,6 +81,7 @@ Ctrl+K - - + + @@ -115,7 +124,7 @@ - + FinaleToolkit @@ -170,21 +179,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -193,11 +210,15 @@ - - - Search - Ctrl+K - + @@ -216,8 +237,7 @@ - - + @@ -271,11 +291,15 @@ - - - - - + @@ -321,6 +345,8 @@ + + @@ -335,7 +361,7 @@ API - Genome Utilities + Genome Utilities @@ -353,16 +379,16 @@ -Genome Utilities# +Genome Utilities# -class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# +class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# Reads telomere, centromere, and short_arm intervals from a bed file or generates these intervals from UCSC gap and centromere tracks for hg19 and hg38. -classmethod b37()# +classmethod b37()# Creates a GenomeGaps for the Broad Institute GRCh37 reference genome i.e b37. This reference genome is also based on GRCh37, but differs from the UCSC hg19 reference in a few ways, @@ -383,7 +409,7 @@ Genome Utilities -get_arm(contig: str, start: int, stop: int) → str# +get_arm(contig: str, start: int, stop: int) → str# Returns the chromosome arm the interval is in. If in the short arm of an acrocentric chromosome or intersects a centromere, returns an empty string. @@ -410,7 +436,7 @@ Genome Utilities -get_contig_gaps(contig: str) → ContigGaps# +get_contig_gaps(contig: str) → ContigGaps# Creates a ContigGaps for the specified chromosome Parameters: @@ -427,7 +453,7 @@ Genome Utilities -classmethod hg38()# +classmethod hg38()# Creates a GenomeGaps for the hg38 reference genome. This sequences uses chromosome names that start with ‘chr’ and is synonymous with the GRCh38 reference genome. @@ -437,7 +463,7 @@ Genome Utilities -in_tcmere(contig: str, start: int, stop: int) → bool# +in_tcmere(contig: str, start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere Parameters: @@ -458,7 +484,7 @@ Genome Utilities -overlaps_gap(contig: str, start: int, stop: int) → bool# +overlaps_gap(contig: str, start: int, stop: int) → bool# Checks if specified interval overlaps a gap interval Parameters: @@ -479,7 +505,7 @@ Genome Utilities -to_bed(output_file: str | PathLike)# +to_bed(output_file: str | PathLike)# Prints gap intervals in GenomeGaps to a BED4 file where the name is the type of gap interval. @@ -492,7 +518,7 @@ Genome Utilities -classmethod ucsc_hg19()# +classmethod ucsc_hg19()# Creates a GenomeGaps for the UCSC hg19 reference genome. This sequences uses chromosome names that start with ‘chr’ and is based on a version of the GRCh37 reference genome. @@ -510,10 +536,10 @@ Genome Utilities -class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# +class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# -get_arm(start: int, stop: int)# +get_arm(start: int, stop: int)# Returns name of chromosome arm the interval is in. Returns “NOARM” if in a centromere, telomere, or short arm of an acrocentric chromosome. @@ -538,7 +564,7 @@ Genome Utilities -in_gap(start: int, stop: int) → bool# +in_gap(start: int, stop: int) → bool# Checks if specified interval is in a gap. Parameters: @@ -558,7 +584,7 @@ Genome Utilities -in_tcmere(start: int, stop: int) → bool# +in_tcmere(start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere. Parameters: @@ -580,7 +606,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg19 reference sequence. @@ -592,7 +618,7 @@ Genome Utilities -finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the Broad Institute GRCh37 (b37) reference sequence. Also useful for files aligned to human_g1k_v37 (1000 Genomes Project). @@ -605,7 +631,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg38 reference sequence. @@ -643,8 +669,7 @@ Genome Utilities - + @@ -700,8 +725,8 @@ Genome Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/index.html b/docs/_build/html/documentation/api_reference/index.html index 7cf5852d..cc84fce6 100644 --- a/docs/_build/html/documentation/api_reference/index.html +++ b/docs/_build/html/documentation/api_reference/index.html @@ -1,12 +1,13 @@ + - + - + API — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -333,7 +359,7 @@ FinaleToolkit Documentation - API + API @@ -351,7 +377,7 @@ -API# +API# Basic Features @@ -442,8 +468,7 @@ API# - - + @@ -468,8 +493,8 @@ API# - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Window Protection Score (WPS) + Window... @@ -354,10 +380,10 @@ -Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-Genome Utilities# +Genome Utilities# -class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# +class finaletoolkit.genome.GenomeGaps(gaps_bed: PathLike | str | None = None)# Reads telomere, centromere, and short_arm intervals from a bed file or generates these intervals from UCSC gap and centromere tracks for hg19 and hg38. -classmethod b37()# +classmethod b37()# Creates a GenomeGaps for the Broad Institute GRCh37 reference genome i.e b37. This reference genome is also based on GRCh37, but differs from the UCSC hg19 reference in a few ways, @@ -383,7 +409,7 @@ Genome Utilities -get_arm(contig: str, start: int, stop: int) → str# +get_arm(contig: str, start: int, stop: int) → str# Returns the chromosome arm the interval is in. If in the short arm of an acrocentric chromosome or intersects a centromere, returns an empty string. @@ -410,7 +436,7 @@ Genome Utilities -get_contig_gaps(contig: str) → ContigGaps# +get_contig_gaps(contig: str) → ContigGaps# Creates a ContigGaps for the specified chromosome Parameters: @@ -427,7 +453,7 @@ Genome Utilities -classmethod hg38()# +classmethod hg38()# Creates a GenomeGaps for the hg38 reference genome. This sequences uses chromosome names that start with ‘chr’ and is synonymous with the GRCh38 reference genome. @@ -437,7 +463,7 @@ Genome Utilities -in_tcmere(contig: str, start: int, stop: int) → bool# +in_tcmere(contig: str, start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere Parameters: @@ -458,7 +484,7 @@ Genome Utilities -overlaps_gap(contig: str, start: int, stop: int) → bool# +overlaps_gap(contig: str, start: int, stop: int) → bool# Checks if specified interval overlaps a gap interval Parameters: @@ -479,7 +505,7 @@ Genome Utilities -to_bed(output_file: str | PathLike)# +to_bed(output_file: str | PathLike)# Prints gap intervals in GenomeGaps to a BED4 file where the name is the type of gap interval. @@ -492,7 +518,7 @@ Genome Utilities -classmethod ucsc_hg19()# +classmethod ucsc_hg19()# Creates a GenomeGaps for the UCSC hg19 reference genome. This sequences uses chromosome names that start with ‘chr’ and is based on a version of the GRCh37 reference genome. @@ -510,10 +536,10 @@ Genome Utilities -class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# +class finaletoolkit.genome.ContigGaps(contig: str, centromere: tuple[int, int], telomeres: Iterable[tuple[int, int]], has_short_arm: bool = False)# -get_arm(start: int, stop: int)# +get_arm(start: int, stop: int)# Returns name of chromosome arm the interval is in. Returns “NOARM” if in a centromere, telomere, or short arm of an acrocentric chromosome. @@ -538,7 +564,7 @@ Genome Utilities -in_gap(start: int, stop: int) → bool# +in_gap(start: int, stop: int) → bool# Checks if specified interval is in a gap. Parameters: @@ -558,7 +584,7 @@ Genome Utilities -in_tcmere(start: int, stop: int) → bool# +in_tcmere(start: int, stop: int) → bool# Checks if specified interval is in a centromere or telomere. Parameters: @@ -580,7 +606,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg19_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg19 reference sequence. @@ -592,7 +618,7 @@ Genome Utilities -finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.b37_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the Broad Institute GRCh37 (b37) reference sequence. Also useful for files aligned to human_g1k_v37 (1000 Genomes Project). @@ -605,7 +631,7 @@ Genome Utilities -finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# +finaletoolkit.genome.ucsc_hg38_gap_bed(output_file: str | PathLike)# Creates BED4 of centromeres, telomeres, and short arms for the UCSC hg38 reference sequence. @@ -643,8 +669,7 @@ Genome Utilities - + @@ -700,8 +725,8 @@ Genome Utilities - + + diff --git a/docs/_build/html/documentation/api_reference/index.html b/docs/_build/html/documentation/api_reference/index.html index 7cf5852d..cc84fce6 100644 --- a/docs/_build/html/documentation/api_reference/index.html +++ b/docs/_build/html/documentation/api_reference/index.html @@ -1,12 +1,13 @@ + - + - + API — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + + + + + + + - + - - - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -333,7 +359,7 @@ FinaleToolkit Documentation - API + API @@ -351,7 +377,7 @@ -API# +API# Basic Features @@ -442,8 +468,7 @@ API# - - + @@ -468,8 +493,8 @@ API# - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Window Protection Score (WPS) + Window... @@ -354,10 +380,10 @@ -Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-API# +API# Basic Features @@ -442,8 +468,7 @@ API# - - + @@ -468,8 +493,8 @@ API# - - + + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -322,6 +346,8 @@ + + @@ -336,7 +362,7 @@ API - Window Protection Score (WPS) + Window... @@ -354,10 +380,10 @@ -Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-Window Protection Score (WPS)# +Window Protection Score (WPS)# -finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.wps(input_file: str | AlignmentFile, contig: str, start: int | str, stop: int | str, output_file: str | None = None, window_size: int = 120, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, verbose: bool | int = 0) → ndarray# Return (raw) Windowed Protection Scores as specified in Snyder et al (2016) over a region [start,stop). @@ -365,19 +391,19 @@ Window Protection Score (WPS) input_file (str or pysam.AlignmentFile) – BAM, SAM or tabix file containing paired-end fragment reads or its path. AlignmentFile must be opened in read mode. -contig (str) -start (int) -stop (int) -output_file (string, optional) +contig (str) – +start (int) – +stop (int) – +output_file (string, optional) – window_size (int, optional) – Size of window to calculate WPS. Default is k = 120, equivalent to L-WPS. fraction_low (int, optional) – Specifies lowest fragment length included in calculation. Default is 120, equivalent to long fraction. fraction_high (int, optional) – Specifies highest fragment length included in calculation. Default is 180, equivalent to long fraction. -quality_threshold (int, optional) -workers (int, optional) -verbose (bool, optional) +quality_threshold (int, optional) – +workers (int, optional) – +verbose (bool, optional) – Returns: @@ -391,7 +417,7 @@ Window Protection Score (WPS) -finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, output_file: None | str = None, window_size: int = 120, interval_size: int = 5000, fraction_low: int = 120, fraction_high: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0) → ndarray# +finaletoolkit.frag.multi_wps(input_file: AlignmentFile | str, site_bed: str, chrom_sizes: str | None = None, output_file: str | None = None, window_size: int = 120, interval_size: int = 5000, min_length: int = 120, max_length: int = 180, quality_threshold: int = 30, workers: int = 1, verbose: bool | int = 0, fraction_low: int | None = None, fraction_high: int | None = None)# Function that aggregates WPS over sites in BED file according to the method described by Snyder et al (2016). @@ -402,34 +428,36 @@ Window Protection Score (WPS)Returns: -scores – np array of shape (n, 2) where column 1 is the coordinate and -column 2 is the score and n is the number of coordinates in -region [start,stop) +output_file – location results are stored. Return type: -numpy.ndarray +str -finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, genome_file: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# +finaletoolkit.frag.adjust_wps(input_file: str, interval_file: str, output_file: str, chrom_sizes: str, interval_size: int = 5000, median_window_size: int = 1000, savgol_window_size: int = 21, savgol_poly_deg: int = 2, mean: bool = False, subtract_edges: bool = False, edge_size: int = 500, workers: int = 1, verbose: Union(bool, int) = False)# Adjusts raw WPS data in a BigWig by applying a median filter and Savitsky-Golay filter (Savitsky and Golay, 1964). @@ -439,9 +467,10 @@ Window Protection Score (WPS) - + @@ -533,8 +562,8 @@ Window Protection Score (WPS) - + + diff --git a/docs/_build/html/documentation/cli_reference/index.html b/docs/_build/html/documentation/cli_reference/index.html index 803fb18d..35ab51d3 100644 --- a/docs/_build/html/documentation/cli_reference/index.html +++ b/docs/_build/html/documentation/cli_reference/index.html @@ -1,12 +1,13 @@ + - + - + CLI — FinaleToolkit documentation @@ -16,31 +17,28 @@ document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; document.documentElement.dataset.theme = localStorage.getItem("theme") || ""; - - - - - - + + + - - - + + + + + + + - - + + + - - - + + + @@ -49,7 +47,6 @@ - @@ -65,8 +62,19 @@ Back to top - - + + + + + + + + + @@ -74,6 +82,7 @@ Ctrl+K - - + + @@ -116,7 +125,7 @@ - + FinaleToolkit @@ -171,21 +180,29 @@ - - - Search - Ctrl+K - + - - - - - + @@ -194,11 +211,15 @@ - - - Search - Ctrl+K - + @@ -217,8 +238,7 @@ - - + @@ -272,11 +292,15 @@ - - - - - + @@ -313,6 +337,8 @@ + + @@ -324,7 +350,7 @@ FinaleToolkit Documentation - CLI + CLI @@ -342,7 +368,7 @@ -CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments
-CLI# +CLI# FinaleToolkit is a package and standalone program to extract fragmentation features of cell-free DNA from paired-end sequencing data. usage: finaletoolkit [-h] [-v] @@ -351,7 +377,7 @@ CLI# -Named Arguments# +Named Arguments# -v, --version show program’s version number and exit @@ -359,15 +385,17 @@ Named Arguments -Sub-commands# +Sub-commands# -coverage# +coverage# Calculates fragmentation coverage over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-p {midpoint,any}] [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] input_file interval_file +finaletoolkit coverage [-h] [-o OUTPUT_FILE] [-n] [-s SCALE_FACTOR] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [-q QUALITY_THRESHOLD] [-w WORKERS] [-v] + input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -378,7 +406,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -o, --output_file A BED file containing coverage values over the intervals specified in interval file. @@ -392,6 +420,13 @@ Named Arguments -frag-length-bins# +frag-length-bins# Retrieves fragment lengths grouped in bins given a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [--bin-size BIN_SIZE] - [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] +finaletoolkit frag-length-bins [-h] [-c CONTIG] [-S START] [-E STOP] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] + [--bin-size BIN_SIZE] [-o OUTPUT_FILE] [--histogram-path HISTOGRAM_PATH] [-q QUALITY_THRESHOLD] [-v] input_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -429,7 +464,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -c, --contig Specify the contig or chromosome to select fragments from. (Required if using –start or –stop.) @@ -475,15 +510,15 @@ Named Arguments -frag-length-intervals# +frag-length-intervals# Retrieves fragment length summary statistics over intervals defined in a BED file based on alignment data from a BAM/SAM/CRAM/Fragment file. -finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] [-w WORKERS] - [-v] +finaletoolkit frag-length-intervals [-h] [-min MIN_LENGTH] [-max MAX_LENGTH] [-p {midpoint,any}] [-o OUTPUT_FILE] [-q QUALITY_THRESHOLD] + [-w WORKERS] [-v] input_file interval_file -Positional Arguments# +Positional Arguments# input_file Path to a BAM/SAM/CRAM/Fragment file containing fragment data. @@ -494,7 +529,7 @@ Positional Arguments -Named Arguments# +Named Arguments# -min, --min-length Minimum length for a fragment to be included in fragment length. @@ -528,14 +563,15 @@ Named Arguments