Skip to content

Commit

Permalink
Potential fix for #378.
Browse files Browse the repository at this point in the history
  • Loading branch information
lima1 committed Dec 11, 2024
1 parent 2394ee7 commit 7f40f7c
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 3 deletions.
7 changes: 7 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Changes in version 2.14.0
-------------------------

BUGFIXES
o Fixed large runtime of readAllelicCounts. Thanks luyh-xp (#378).


Changes in version 2.12.0
-------------------------

Expand Down
9 changes: 7 additions & 2 deletions R/readAllelicCountsFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,20 @@ readAllelicCountsFile <- function(file, format, zero=NULL) {
if (!is.null(zero)) flog.warn("zero ignored for GATK4 files.")
con <- file(file, open = "r")
header <- .parseGATKHeader(con)
inputCounts <- read.delim(con, header = FALSE, stringsAsFactors = FALSE)
inputCounts <- try(read.delim(con, header = FALSE, stringsAsFactors = FALSE))
if (is(inputCounts, "try-error")) {
.stopUserError("Error reading AllelicCountsFile ", file)
}
colnames(inputCounts) <- strsplit(header$last_line, "\t")[[1]]
close(con)
gr <- GRanges(seqnames = inputCounts$CONTIG, IRanges(start = inputCounts$POSITION, end = inputCounts$POSITION))
vcf <- VCF(gr,
colData = DataFrame(Samples = 1, row.names = header$sid),
exptData = list(header = VCFHeader(samples = header$sid)))
ref(vcf) <- DNAStringSet(inputCounts$REF_NUCLEOTIDE)
alt(vcf) <- DNAStringSetList(lapply(inputCounts$ALT_NUCLEOTIDE, DNAStringSet))
#alt(vcf) <- DNAStringSetList(split(inputCounts$ALT_NUCLEOTIDE, seq(length(vcf))))
alt(vcf) <- DNAStringSetList(as.list(inputCounts$ALT_NUCLEOTIDE))

info(header(vcf)) <- DataFrame(
Number = "0",
Type = "Flag",
Expand Down
28 changes: 28 additions & 0 deletions inst/extdata/example_allelic_counts_empty.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
@HD VN:1.6
@SQ SN:chr1 LN:249250621
@SQ SN:chr2 LN:243199373
@SQ SN:chr3 LN:198022430
@SQ SN:chr4 LN:191154276
@SQ SN:chr5 LN:180915260
@SQ SN:chr6 LN:171115067
@SQ SN:chr7 LN:159138663
@SQ SN:chr8 LN:146364022
@SQ SN:chr9 LN:141213431
@SQ SN:chr10 LN:135534747
@SQ SN:chr11 LN:135006516
@SQ SN:chr12 LN:133851895
@SQ SN:chr13 LN:115169878
@SQ SN:chr14 LN:107349540
@SQ SN:chr15 LN:102531392
@SQ SN:chr16 LN:90354753
@SQ SN:chr17 LN:81195210
@SQ SN:chr18 LN:78077248
@SQ SN:chr19 LN:59128983
@SQ SN:chr20 LN:63025520
@SQ SN:chr21 LN:48129895
@SQ SN:chr22 LN:51304566
@SQ SN:chrX LN:155270560
@SQ SN:chrY LN:59373566
@SQ SN:chrM LN:16571
@RG ID:PureCN SM:LIB-02240e4
CONTIG POSITION REF_COUNT ALT_COUNT REF_NUCLEOTIDE ALT_NUCLEOTIDE
3 changes: 2 additions & 1 deletion tests/testthat/test_readAllelicCountsFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ context("readAllelicCountsFile")

vcf.file <- system.file("extdata", "example.vcf.gz", package = "PureCN")
ac.file <- system.file("extdata", "example_allelic_counts.tsv", package = "PureCN")
ac.empty.file <- system.file("extdata", "example_allelic_counts_empty.tsv", package = "PureCN")
vcf <- readVcf(vcf.file, "hg19")
data(purecn.example.output)
normal.coverage.file <- system.file('extdata', 'example_normal.txt.gz',
Expand All @@ -12,6 +13,7 @@ tumor.coverage.file <- system.file('extdata', 'example_tumor.txt.gz',
test_that("example parses correctly", {
vcf_ac <- readAllelicCountsFile(ac.file)
expect_equal(as.character(ref(vcf_ac)), as.character(ref(head(vcf,20))))
expect_error(readAllelicCountsFile(ac.empty.file), "Error reading AllelicCountsFile")
})

test_that("parsing -> writing -> parsing works", {
Expand All @@ -29,4 +31,3 @@ test_that("parsing -> writing -> parsing works", {
expect_true(length(ret$results) > 0)
file.remove(output.file)
})

0 comments on commit 7f40f7c

Please sign in to comment.