Skip to content

Commit

Permalink
Merge pull request #75 from chrovis/fix/3-prime-rule
Browse files Browse the repository at this point in the history
Fix 3'-rule errors for certain sequence patterns
  • Loading branch information
totakke authored Apr 27, 2023
2 parents 60e0702 + 1854562 commit a155c38
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 16 deletions.
14 changes: 7 additions & 7 deletions src/varity/vcf_to_hgvs/common.clj
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@
0
(let [rbases (string/reverse bases)
n (count bases)
step (count (first (repeat-units rbases)))
tweak (if (= step 1) #(+ (dec n) %) #(* step %))]
step (count (first (repeat-units rbases)))]
(->> seq*
(take (dec pos))
(take (+ pos (dec n)))
reverse
(partition (count rbases) step)
(take-while #(= (apply str %) rbases))
count
tweak)))
dec
(* step))))
(throw (ex-info "The bases is not found on the position."
{:type ::invalid-bases
:sequence seq*
Expand Down Expand Up @@ -148,10 +148,10 @@
([variant seq*]
(apply-3'-rule variant seq* :forward))
([{:keys [pos ref alt] :as variant} seq* direction]
(let [[ref-only alt-only offset _] (diff-bases ref alt)
(let [[ref-only alt-only offset right-offset] (diff-bases ref alt)
type* (cond
(and (zero? (count ref-only)) (pos? (count alt-only))) :ins
(and (pos? (count ref-only)) (zero? (count alt-only))) :del
(and (zero? (count ref-only)) (pos? (count alt-only)) (zero? right-offset)) :ins
(and (pos? (count ref-only)) (zero? (count alt-only)) (zero? right-offset)) :del
:else :other)]
(if (#{:ins :del} type*)
(let [bases (if (string/blank? ref-only) alt-only ref-only)
Expand Down
16 changes: 9 additions & 7 deletions src/varity/vcf_to_hgvs/protein.clj
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
ref-down-exon-seq1 (subs ref-down-exon-seq1 0 (- nref-down-exon-seq1 (mod nref-down-exon-seq1 3)))
alt-exon-seq1 (exon-sequence alt-seq cds-start alt-exon-ranges*)
apply-offset #(or (ffirst (alt-exon-ranges [[% %]] pos ref alt))
(some (fn [[_ e]] (when (<= e %) e)) alt-exon-ranges*))]
(some (fn [[_ e]] (when (<= e %) e)) (reverse alt-exon-ranges*)))]
{:ref-exon-seq ref-exon-seq1
:ref-prot-seq (codon/amino-acid-sequence (cond-> ref-exon-seq1
(= strand :reverse) util-seq/revcomp))
Expand Down Expand Up @@ -308,12 +308,14 @@
format-alt-prot-seq
(subs (dec (+ ppos offset)))
(string/index-of "*"))]
(mut/protein-frame-shift (mut/->long-amino-acid ref)
(coord/protein-coordinate (+ ppos offset))
(mut/->long-amino-acid alt)
(if (and ter-site (pos? ter-site))
(coord/protein-coordinate (inc ter-site))
(coord/unknown-coordinate)))))
(if (= alt \*)
(protein-substitution (+ ppos offset) (str ref) (str alt)) ; eventually fs-ter-substitution
(mut/protein-frame-shift (mut/->long-amino-acid ref)
(coord/protein-coordinate (+ ppos offset))
(mut/->long-amino-acid alt)
(if (and ter-site (pos? ter-site))
(coord/protein-coordinate (inc ter-site))
(coord/unknown-coordinate))))))

(defn- protein-extension
[ppos pref palt seq-info]
Expand Down
10 changes: 9 additions & 1 deletion test/varity/vcf_to_hgvs/common_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
"AGGGGGGT" 7 "G" 5
"AGGGGGGT" 6 "GG" 4
"AGGGGGGT" 5 "GGG" 3
"AGGGGGGT" 4 "GGGG" 2
"AGGGGGGT" 3 "GGG" 1
"AGGGGGGT" 2 "GGGGG" 0
"AAGTGTCC" 5 "GT" 2
"AAGTGTCC" 3 "GT" 0))
(testing "backward-shift throws exception when inputs are illegal"
Expand Down Expand Up @@ -80,7 +83,12 @@
{:pos 10, :ref "T", :alt "TAGTCTT"} :forward {:pos 20, :ref "C", :alt "CTTAGTC"}
{:pos 4, :ref "CAGTCTT", :alt "C"} :forward {:pos 14, :ref "CTTAGTC", :alt "C"}
{:pos 13, :ref "T", :alt "TCTTAGT"} :backward {:pos 4, :ref "C", :alt "CAGTCTT"}
{:pos 13, :ref "TCTTAGT", :alt "T"} :backward {:pos 4, :ref "CAGTCTT", :alt "C"})))
{:pos 13, :ref "TCTTAGT", :alt "T"} :backward {:pos 4, :ref "CAGTCTT", :alt "C"}))
(testing "protein sequence"
(are [v d ret] (= (common/apply-3'-rule v "MDDLWFTFTPGPDE" d) ret)
{:pos 5 :ref "WFT" :alt "W"} :forward {:pos 7 :ref "TFT" :alt "T"}
{:pos 5 :ref "WFT" :alt "F"} :forward {:pos 5 :ref "WFT" :alt "F"}
{:pos 5 :ref "WFT" :alt "T"} :forward {:pos 5 :ref "WFT" :alt "T"})))

(defslowtest normalize-variant-test
(cavia-testing "normalize without error"
Expand Down
26 changes: 25 additions & 1 deletion test/varity/vcf_to_hgvs/protein_test.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
(ns varity.vcf-to-hgvs.protein-test
(:require [clojure.string :as string]
[clojure.test :refer :all]
[varity.vcf-to-hgvs.protein :as prot]))
[cljam.io.sequence :as cseq]
[clj-hgvs.core :as hgvs]
[varity.vcf-to-hgvs.protein :as prot]
[varity.t-common :refer [test-ref-seq-file
defslowtest]]))

(deftest alt-exon-ranges-test
;; 1 [2 3 4] 5 6 7 [8 9 10 11] 12 13 14 15
Expand Down Expand Up @@ -95,3 +99,23 @@
"YDIGGPDQEFGVDVGPVCFL*"
"YDIGGPDQEFGVDVGPVCFLQ"
" ^"])))

(defslowtest protein-seq-3'-rule-test
(let [tp53 {:name2 "TP53"
:name "NM_000546.6"
:chr "chr17"
:tx-start 7668421
:tx-end 7687490
:cds-start 7669609
:cds-end 7676594
:strand :reverse
:cds-start-stat :cmpl
:cds-end-stat :cmpl
:exon-ranges [[7668421 7669690] [7670609 7670715] [7673535 7673608] [7673701 7673837] [7674181 7674290] [7674859 7674971] [7675053 7675236] [7675994 7676272] [7676382 7676403] [7676521 7676622] [7687377 7687490]]
:bin 643
:exon-frames [2 0 1 2 0 1 0 0 2 0 -1]
:exon-count 11}]
(are [pos ref alt res]
(= (with-open [seq-rdr (cseq/reader test-ref-seq-file)] (#'prot/mutation seq-rdr tp53 pos ref alt {})) res)
7676197 "G" "GGTCTTGTCCCTTA" (:mutation (hgvs/parse "p.P58*"))
7676202 "T" "TGTCCCTTAGTCTT" (:mutation (hgvs/parse "p.P58*")))))
16 changes: 16 additions & 0 deletions test/varity/vcf_to_hgvs_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@
"NM_001176:c.-147_-138delTCTCTGCCGG"
"NM_006849:c.-2636_-2627delTCTCTGCCGG") ; cf. rs1460727826 (deletion in UTR)
"chr6" 33086236 "TA" "T" '("NM_002121:c.776delA") ; cf. rs67523850 (deletion in border of UTR)
"chr17" 7675170 "CAA" "C" '("NM_001126115:c.44_45delTT"
"NM_001126116:c.44_45delTT"
"NM_001126117:c.44_45delTT"
"NM_001276697:c.-38_-37delTT"
"NM_001276698:c.-38_-37delTT"
"NM_001276699:c.-38_-37delTT"
"NM_000546:c.440_441delTT"
"NM_001126112:c.440_441delTT"
"NM_001126113:c.440_441delTT"
"NM_001126114:c.440_441delTT"
"NM_001126118:c.323_324delTT"
"NM_001276695:c.323_324delTT"
"NM_001276696:c.323_324delTT"
"NM_001276760:c.323_324delTT"
"NM_001276761:c.323_324delTT")

;; Duplication
"chr2" 47806844 "T" "TGATT" '("NM_000179:c.4068_4071dupGATT"
Expand Down Expand Up @@ -190,6 +205,7 @@
"chr1" 247815239 "AAGG" "A" '("p.S163del") ; cf. rs35979231 (-)
"chr2" 29223408 "AAGCAGT" "A" '("p.Y1096_C1097del") ; cf. rs776101205 (-)
"chr11" 108259071 "GT" "G" '("p.L822*") ; https://github.com/chrovis/varity/issues/49
"chr17" 7676206 "TGAACCATTGTTCAATATCGTCCGGGGACAGCATCAAATCATCCATTGCTTGGGACGGCAAGGGG" "T" '("p.P34Lfs*68" "p.M1ext-78" "p.M1ext-39") ; https://mutalyzer.nl/normalizer/NC_000017.11:g.7676209_7676272del

;; Duplication
"chr2" 26254257 "G" "GACT" '("p.T2dup") ; cf. rs3839049 (+)
Expand Down

0 comments on commit a155c38

Please sign in to comment.