diff --git a/src/varity/vcf_to_hgvs/protein.clj b/src/varity/vcf_to_hgvs/protein.clj index 52ae369..ce2d958 100644 --- a/src/varity/vcf_to_hgvs/protein.clj +++ b/src/varity/vcf_to_hgvs/protein.clj @@ -26,8 +26,10 @@ d (Math/abs (- nref nalt))] (when (and (not= 1 nref) (not= 1 nalt) (some (fn [[s e]] - (or (<= pos s (+ pos nref -1)) - (<= pos e (+ pos nref -1)))) exon-ranges)) + (and (not= s e) + (or (and (< pos s) (<= s (+ pos nref -1))) + (and (<= pos e) (< e (+ pos nref -1)))))) + exon-ranges)) (throw (ex-info "Variants overlapping a boundary of exon/intron are unsupported" @@ -43,7 +45,7 @@ dele (dec (+ tpos d))] (cond (< dele s) [(- s d) (- e d)] - (<= dels s) (if (< dele e) [dels (- e d)]) + (<= dels s) (when (< dele e) [dels (- e d)]) (<= dels e) (if (< dele e) [s (- e d)] [s (dec dels)]) @@ -238,9 +240,9 @@ ndel (count del)] (mut/protein-deletion (mut/->long-amino-acid (first del)) (coord/protein-coordinate (+ ppos offset)) - (if (> ndel 1) + (when (> ndel 1) (mut/->long-amino-acid (last del))) - (if (> ndel 1) + (when (> ndel 1) (coord/protein-coordinate (+ ppos offset ndel -1)))))) (defn- protein-duplication @@ -249,8 +251,8 @@ nins (count ins)] (mut/protein-duplication (mut/->long-amino-acid (first ins)) (coord/protein-coordinate (- (+ ppos offset) nins)) - (if (> nins 1) (mut/->long-amino-acid (last ins))) - (if (> nins 1) (coord/protein-coordinate (dec (+ ppos offset))))))) + (when (> nins 1) (mut/->long-amino-acid (last ins))) + (when (> nins 1) (coord/protein-coordinate (dec (+ ppos offset))))))) (defn- protein-insertion [ppos pref palt seq-info] @@ -269,9 +271,9 @@ ndel (count del)] (mut/protein-indel (mut/->long-amino-acid (first del)) (coord/protein-coordinate (+ ppos offset)) - (if (> ndel 1) + (when (> ndel 1) (mut/->long-amino-acid (last del))) - (if (> ndel 1) + (when (> ndel 1) (coord/protein-coordinate (+ ppos offset ndel -1))) (->> (seq ins) (map mut/->long-amino-acid))))) @@ -287,8 +289,8 @@ end (dec (+ start nunit))] (mut/protein-repeated-seqs (mut/->long-amino-acid (first unit)) (coord/protein-coordinate start) - (if (< start end) (mut/->long-amino-acid (last unit))) - (if (< start end) (coord/protein-coordinate end)) + (when (< start end) (mut/->long-amino-acid (last unit))) + (when (< start end) (coord/protein-coordinate end)) alt-repeat))) (defn- protein-frame-shift @@ -340,7 +342,7 @@ (defn- mutation [seq-rdr rg pos ref alt options] (let [seq-info (read-sequence-info seq-rdr rg pos ref alt)] - (if-let [pvariant (->protein-variant rg pos ref alt seq-info options)] + (when-let [pvariant (->protein-variant rg pos ref alt seq-info options)] (let [{ppos :pos, pref :ref, palt :alt} (if-not (#{:no-effect :unknown} (:type pvariant)) (common/apply-3'-rule pvariant (:ref-prot-seq seq-info)) @@ -361,7 +363,7 @@ ([variant seq-rdr rg] (->hgvs variant seq-rdr rg {})) ([{:keys [pos ref alt]} seq-rdr rg options] - (if-let [mutation (mutation seq-rdr rg pos ref alt options)] + (when-let [mutation (mutation seq-rdr rg pos ref alt options)] (hgvs/hgvs nil :protein mutation)))) (defn- prot-seq-pstring diff --git a/test/varity/vcf_to_hgvs/protein_test.clj b/test/varity/vcf_to_hgvs/protein_test.clj index 85de624..b03ea96 100644 --- a/test/varity/vcf_to_hgvs/protein_test.clj +++ b/test/varity/vcf_to_hgvs/protein_test.clj @@ -17,7 +17,8 @@ 6 "XX" "X" [[2 4] [7 10]] 6 "XXX" "X" [[2 4] [7 9]] 3 "XXX" "X" [[2 3] [6 9]] - 1 "XXXXX" "X" [[4 7]]) + 1 "XXXXX" "X" [[4 7]] + 9 "XXX" "XXX" [[2 4] [8 11]]) ;; Can't determine whether the splice site is shifted or not (is (thrown-with-msg? Exception @@ -67,7 +68,7 @@ (deftest prot-seq-pstring-test (are [pref-seq palt-seq start end m e] - (= (#'prot/prot-seq-pstring pref-seq palt-seq start end m) e) + (= (#'prot/prot-seq-pstring pref-seq palt-seq start end m) e) "LAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPI" "LAARNVLVKTPQHVKITDFGRAKLLGAEEKEYHAEGGKVPI" 838 878 {:ppos 858, :pref "L", :palt "R"} @@ -116,6 +117,6 @@ :exon-frames [2 0 1 2 0 1 0 0 2 0 -1] :exon-count 11}] (are [pos ref alt res] - (= (with-open [seq-rdr (cseq/reader test-ref-seq-file)] (#'prot/mutation seq-rdr tp53 pos ref alt {})) res) + (= (with-open [seq-rdr (cseq/reader test-ref-seq-file)] (#'prot/mutation seq-rdr tp53 pos ref alt {})) res) 7676197 "G" "GGTCTTGTCCCTTA" (:mutation (hgvs/parse "p.P58*")) 7676202 "T" "TGTCCCTTAGTCTT" (:mutation (hgvs/parse "p.P58*"))))) diff --git a/test/varity/vcf_to_hgvs_test.clj b/test/varity/vcf_to_hgvs_test.clj index 70d6488..6e0caec 100644 --- a/test/varity/vcf_to_hgvs_test.clj +++ b/test/varity/vcf_to_hgvs_test.clj @@ -239,6 +239,7 @@ ;; Extension "chr2" 188974490 "A" "C" '("p.M1Lext-23") "chr2" 189011772 "T" "C" '("p.*1467Qext*45") ; cf. ClinVar 101338 + "chr11" 125655318 "TGA" "TAT" '("p.*477Yext*17" "p.*443Yext*17" "p.*477Yext*24") ;; NOTE: There are very few correct examples... ;; no effect