From 1a892fe539d37669ecb6d8281d8a38782a81a13f Mon Sep 17 00:00:00 2001 From: Nobuaki Karasawa Date: Mon, 15 Jan 2024 10:15:42 +0900 Subject: [PATCH] fix: add ter-site pos not changed condition to extension --- src/varity/vcf_to_hgvs/protein.clj | 47 ++++++++++++++---------- test/varity/vcf_to_hgvs/protein_test.clj | 7 ++++ test/varity/vcf_to_hgvs_test.clj | 1 + 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/varity/vcf_to_hgvs/protein.clj b/src/varity/vcf_to_hgvs/protein.clj index ba35591..c329f58 100644 --- a/src/varity/vcf_to_hgvs/protein.clj +++ b/src/varity/vcf_to_hgvs/protein.clj @@ -148,6 +148,11 @@ alt-positions (set (range pos (+ pos (count ref))))] (boolean (seq (s/intersection ter-site-positions alt-positions))))) +(defn- ter-site-same-pos? + [ref-prot-seq alt-prot-seq] + (let [ter-site-pos (dec (count ref-prot-seq))] + (= \* (get alt-prot-seq ter-site-pos)))) + (defn- apply-offset [pos ref alt exon-ranges ref-include-ter-site pos*] (letfn [(apply-offset* [exon-ranges*] @@ -426,26 +431,28 @@ (coord/unknown-coordinate)))))) (defn- protein-extension - [ppos pref palt seq-info] - (let [{:keys [alt-prot-seq alt-tx-prot-seq ini-offset]} seq-info - [_ ins offset _] (diff-bases pref palt) - rest-seq (if (= ppos 1) - (-> alt-tx-prot-seq - (subs 0 ini-offset) - reverse - (#(apply str %))) - (-> alt-tx-prot-seq - (subs (+ ini-offset (count alt-prot-seq))))) - ter-site (some-> (string/index-of rest-seq (if (= ppos 1) "M" "*")) inc)] - (mut/protein-extension (if (= ppos 1) "Met" "Ter") - (coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset))) - (mut/->long-amino-acid (if (= ppos 1) - (last ins) - (or (last ins) (first rest-seq)))) - (if (= ppos 1) :upstream :downstream) - (if ter-site - (coord/protein-coordinate ter-site) - (coord/unknown-coordinate))))) + [ppos pref palt {:keys [ref-prot-seq alt-prot-seq alt-tx-prot-seq c-ter-adjusted-alt-prot-seq ini-offset]}] + (if (and (not= ppos 1) + (ter-site-same-pos? ref-prot-seq c-ter-adjusted-alt-prot-seq)) + (mut/protein-no-effect) + (let [[_ ins offset _] (diff-bases pref palt) + rest-seq (if (= ppos 1) + (-> alt-tx-prot-seq + (subs 0 ini-offset) + reverse + (#(apply str %))) + (-> alt-tx-prot-seq + (subs (+ ini-offset (count alt-prot-seq))))) + ter-site (some-> (string/index-of rest-seq (if (= ppos 1) "M" "*")) inc)] + (mut/protein-extension (if (= ppos 1) "Met" "Ter") + (coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset))) + (mut/->long-amino-acid (if (= ppos 1) + (last ins) + (or (last ins) (first rest-seq)))) + (if (= ppos 1) :upstream :downstream) + (if ter-site + (coord/protein-coordinate ter-site) + (coord/unknown-coordinate)))))) (defn- mutation [seq-rdr rg pos ref alt options] diff --git a/test/varity/vcf_to_hgvs/protein_test.clj b/test/varity/vcf_to_hgvs/protein_test.clj index f4fc4cb..a944bb2 100644 --- a/test/varity/vcf_to_hgvs/protein_test.clj +++ b/test/varity/vcf_to_hgvs/protein_test.clj @@ -109,6 +109,13 @@ [15 20] 15 [5 10] 5))) +(deftest ter-site-same-pos?-test + (are [p ref alt] (p (#'prot/ter-site-same-pos? ref alt)) + true? "MTGA*" "MTGA*" + true? "MTGA*" "MTGA*CT" + false? "MTGA*" "MTGAQCT*" + false? "MTGA*" "MTGA")) + (deftest apply-offset-test (let [pos 100 ref "GCTGACC" diff --git a/test/varity/vcf_to_hgvs_test.clj b/test/varity/vcf_to_hgvs_test.clj index 205ee5c..e8421ae 100644 --- a/test/varity/vcf_to_hgvs_test.clj +++ b/test/varity/vcf_to_hgvs_test.clj @@ -251,6 +251,7 @@ "chr7" 55181876 "A" "T" '("p.=") ; not actual example (+) "chr7" 55181874 "TGAT" "T" '("p.=") ; not actual example (+) "chr7" 55181876 "A" "AGGT" '("p.=") ; not actual example (+) + "chr3" 149520808 "C" "CTTAA" '("p.=") ; not actual example (-) ;; unknown "chr12" 40393453 "G" "A" '("p.?") ; not actual example (+)