From 91e8803b3923c9412b941985f87234e6f395cec5 Mon Sep 17 00:00:00 2001 From: Nobuaki Karasawa Date: Tue, 31 Oct 2023 19:22:11 +0900 Subject: [PATCH] fix: fix exon/intron boundary determining process --- src/varity/vcf_to_hgvs/protein.clj | 3 ++- test/varity/vcf_to_hgvs/protein_test.clj | 17 +++++++++-------- test/varity/vcf_to_hgvs_test.clj | 16 +++++++++------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/varity/vcf_to_hgvs/protein.clj b/src/varity/vcf_to_hgvs/protein.clj index ce2d958..045e598 100644 --- a/src/varity/vcf_to_hgvs/protein.clj +++ b/src/varity/vcf_to_hgvs/protein.clj @@ -24,7 +24,8 @@ :else :same) tpos (+ pos (min nref nalt)) d (Math/abs (- nref nalt))] - (when (and (not= 1 nref) (not= 1 nalt) + (when (and (not (= 1 nref nalt)) + (not= 1 (count exon-ranges)) (some (fn [[s e]] (and (not= s e) (or (and (< pos s) (<= s (+ pos nref -1))) diff --git a/test/varity/vcf_to_hgvs/protein_test.clj b/test/varity/vcf_to_hgvs/protein_test.clj index b03ea96..e3c95ca 100644 --- a/test/varity/vcf_to_hgvs/protein_test.clj +++ b/test/varity/vcf_to_hgvs/protein_test.clj @@ -15,15 +15,16 @@ 2 "XX" "X" [[2 3] [7 10]] 3 "XX" "X" [[2 3] [7 10]] 6 "XX" "X" [[2 4] [7 10]] - 6 "XXX" "X" [[2 4] [7 9]] - 3 "XXX" "X" [[2 3] [6 9]] - 1 "XXXXX" "X" [[4 7]] 9 "XXX" "XXX" [[2 4] [8 11]]) - ;; Can't determine whether the splice site is shifted or not - (is (thrown-with-msg? - Exception - #"unsupported" - (#'prot/alt-exon-ranges [[2 4] [8 11]] 3 "XXX" "XXX")))) + ;; Variants overlapping a boundary of exon/intron + (are [p r a] (thrown-with-msg? + Exception + #"unsupported" + (#'prot/alt-exon-ranges [[2 4] [8 11]] p r a)) + 3 "XXX" "XXX" + 6 "XXX" "X" + 3 "XXX" "X" + 1 "XXXXX" "X")) (deftest exon-sequence-test ;; A C G T A C G T A C G T A C G diff --git a/test/varity/vcf_to_hgvs_test.clj b/test/varity/vcf_to_hgvs_test.clj index 6e0caec..0000a8a 100644 --- a/test/varity/vcf_to_hgvs_test.clj +++ b/test/varity/vcf_to_hgvs_test.clj @@ -195,8 +195,6 @@ "chr7" 152247986 "G" "GT" '("p.Y816*") ; cf. rs150073007 (-, nonsense mutation) "chr17" 31159027 "TGC" "T" '("p.A75*") ; not actual example (+, nonsense in del case) "chr2" 47478341 "TG" "T" '("p.L762*" "p.L696*") ;; rs786204050 (+) frameshift with termination - "chr8" 42838217 "GAGATTAACAGGGGTCTGAAGAGGCGGCATTAGTAATCCAATAGCAGCATCAACCTGGGAAACAGGAGGCGGTAAAGGAGGTGGGGGAAGCTGTTCCTGTGGCTCCAGAAGATCTTCTTTCTAAAACAAAAATACAAAGTATGTTTGAATTTAGTAACTAAAAACAGTTTAAA" "G" - '("p.K90Lfs*5" "p.K25*") ; cf. VCV000965170.1 (-, frameshift with termination) "chr17" 7676202 "T" "TGTCCCTTAGTCTT" '("p.P58*" "p.P19*") ; cf. not actual example (-, frameshift with termination) ;; deletion @@ -232,10 +230,6 @@ "p.S1415Ifs*2") ; https://github.com/chrovis/varity/issues/58 "chr17" 31261816 "CC" "C" '("p.N1542Tfs*11" "p.N1563Tfs*11") ; cf. rs1555619041 (+) - ;; frame shift with initiation codon change (e.g. NM_007298:c.-19_80del from BRCA Share) - "chr17" 43124016 "CCAGATGGGACACTCTAAGATTTTCTGCATAGCATTAATGACATTTTGTACTTCTTCAACGCGAAGAGCAGATAAATCCATTTCTTTCTGTTCCAATGAA" "C" - '("p.M1Sfs*13") - ;; Extension "chr2" 188974490 "A" "C" '("p.M1Lext-23") "chr2" 189011772 "T" "C" '("p.*1467Qext*45") ; cf. ClinVar 101338 @@ -300,7 +294,15 @@ ;; The intron in between remains unchanged. "chr1" 26773714 "AACGGTGAGTAAAGCCTGGTCTCGGTGCTGCTATGGATCAGGCTTCGCCACTGCCCACCCTAATCCTGTGTTTCTTTGCCTCCTATAGACAT" - "AGCAACGGTGAGTAAAGCCTGGTCTCGGTGCTGCTATGGATCAGGCTTCGCCACTGCCCACCCTAATCCTGTGTTTCTTTGCCTCCTATAGACATCAT")))) + "AGCAACGGTGAGTAAAGCCTGGTCTCGGTGCTGCTATGGATCAGGCTTCGCCACTGCCCACCCTAATCCTGTGTTTCTTTGCCTCCTATAGACATCAT" + + ;; Variants include exon/intron boundary. + ;; cf. VCV000965170.1 + "chr8" 42838217 "GAGATTAACAGGGGTCTGAAGAGGCGGCATTAGTAATCCAATAGCAGCATCAACCTGGGAAACAGGAGGCGGTAAAGGAGGTGGGGGAAGCTGTTCCTGTGGCTCCAGAAGATCTTCTTTCTAAAACAAAAATACAAAGTATGTTTGAATTTAGTAACTAAAAACAGTTTAAA" "G" + ;; e.g. NM_007298:c.-19_80del from BRCA Share + "chr17" 43124016 "CCAGATGGGACACTCTAAGATTTTCTGCATAGCATTAATGACATTTTGTACTTCTTCAACGCGAAGAGCAGATAAATCCATTTCTTTCTGTTCCAATGAA" "C" + ;; e.g. NM_001174080:c.3696-10_3710del + "chr19" 13283381 "GCACAGGCGGCGAAGGCTGTTGGAGA" "G")))) (deftest coding-dna-ref-gene?-test (testing "valid reference genes"