Skip to content

Commit

Permalink
Merge pull request #77 from chrovis/feature/fix-exon-edge-confirmation
Browse files Browse the repository at this point in the history
  • Loading branch information
federkasten authored Jun 26, 2023
2 parents a155c38 + 767f941 commit 71dad6b
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 16 deletions.
28 changes: 15 additions & 13 deletions src/varity/vcf_to_hgvs/protein.clj
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
d (Math/abs (- nref nalt))]
(when (and (not= 1 nref) (not= 1 nalt)
(some (fn [[s e]]
(or (<= pos s (+ pos nref -1))
(<= pos e (+ pos nref -1)))) exon-ranges))
(and (not= s e)
(or (and (< pos s) (<= s (+ pos nref -1)))
(and (<= pos e) (< e (+ pos nref -1))))))
exon-ranges))
(throw
(ex-info
"Variants overlapping a boundary of exon/intron are unsupported"
Expand All @@ -43,7 +45,7 @@
dele (dec (+ tpos d))]
(cond
(< dele s) [(- s d) (- e d)]
(<= dels s) (if (< dele e) [dels (- e d)])
(<= dels s) (when (< dele e) [dels (- e d)])
(<= dels e) (if (< dele e)
[s (- e d)]
[s (dec dels)])
Expand Down Expand Up @@ -238,9 +240,9 @@
ndel (count del)]
(mut/protein-deletion (mut/->long-amino-acid (first del))
(coord/protein-coordinate (+ ppos offset))
(if (> ndel 1)
(when (> ndel 1)
(mut/->long-amino-acid (last del)))
(if (> ndel 1)
(when (> ndel 1)
(coord/protein-coordinate (+ ppos offset ndel -1))))))

(defn- protein-duplication
Expand All @@ -249,8 +251,8 @@
nins (count ins)]
(mut/protein-duplication (mut/->long-amino-acid (first ins))
(coord/protein-coordinate (- (+ ppos offset) nins))
(if (> nins 1) (mut/->long-amino-acid (last ins)))
(if (> nins 1) (coord/protein-coordinate (dec (+ ppos offset)))))))
(when (> nins 1) (mut/->long-amino-acid (last ins)))
(when (> nins 1) (coord/protein-coordinate (dec (+ ppos offset)))))))

(defn- protein-insertion
[ppos pref palt seq-info]
Expand All @@ -269,9 +271,9 @@
ndel (count del)]
(mut/protein-indel (mut/->long-amino-acid (first del))
(coord/protein-coordinate (+ ppos offset))
(if (> ndel 1)
(when (> ndel 1)
(mut/->long-amino-acid (last del)))
(if (> ndel 1)
(when (> ndel 1)
(coord/protein-coordinate (+ ppos offset ndel -1)))
(->> (seq ins) (map mut/->long-amino-acid)))))

Expand All @@ -287,8 +289,8 @@
end (dec (+ start nunit))]
(mut/protein-repeated-seqs (mut/->long-amino-acid (first unit))
(coord/protein-coordinate start)
(if (< start end) (mut/->long-amino-acid (last unit)))
(if (< start end) (coord/protein-coordinate end))
(when (< start end) (mut/->long-amino-acid (last unit)))
(when (< start end) (coord/protein-coordinate end))
alt-repeat)))

(defn- protein-frame-shift
Expand Down Expand Up @@ -340,7 +342,7 @@
(defn- mutation
[seq-rdr rg pos ref alt options]
(let [seq-info (read-sequence-info seq-rdr rg pos ref alt)]
(if-let [pvariant (->protein-variant rg pos ref alt seq-info options)]
(when-let [pvariant (->protein-variant rg pos ref alt seq-info options)]
(let [{ppos :pos, pref :ref, palt :alt}
(if-not (#{:no-effect :unknown} (:type pvariant))
(common/apply-3'-rule pvariant (:ref-prot-seq seq-info))
Expand All @@ -361,7 +363,7 @@
([variant seq-rdr rg]
(->hgvs variant seq-rdr rg {}))
([{:keys [pos ref alt]} seq-rdr rg options]
(if-let [mutation (mutation seq-rdr rg pos ref alt options)]
(when-let [mutation (mutation seq-rdr rg pos ref alt options)]
(hgvs/hgvs nil :protein mutation))))

(defn- prot-seq-pstring
Expand Down
7 changes: 4 additions & 3 deletions test/varity/vcf_to_hgvs/protein_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
6 "XX" "X" [[2 4] [7 10]]
6 "XXX" "X" [[2 4] [7 9]]
3 "XXX" "X" [[2 3] [6 9]]
1 "XXXXX" "X" [[4 7]])
1 "XXXXX" "X" [[4 7]]
9 "XXX" "XXX" [[2 4] [8 11]])
;; Can't determine whether the splice site is shifted or not
(is (thrown-with-msg?
Exception
Expand Down Expand Up @@ -67,7 +68,7 @@

(deftest prot-seq-pstring-test
(are [pref-seq palt-seq start end m e]
(= (#'prot/prot-seq-pstring pref-seq palt-seq start end m) e)
(= (#'prot/prot-seq-pstring pref-seq palt-seq start end m) e)
"LAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPI"
"LAARNVLVKTPQHVKITDFGRAKLLGAEEKEYHAEGGKVPI"
838 878 {:ppos 858, :pref "L", :palt "R"}
Expand Down Expand Up @@ -116,6 +117,6 @@
:exon-frames [2 0 1 2 0 1 0 0 2 0 -1]
:exon-count 11}]
(are [pos ref alt res]
(= (with-open [seq-rdr (cseq/reader test-ref-seq-file)] (#'prot/mutation seq-rdr tp53 pos ref alt {})) res)
(= (with-open [seq-rdr (cseq/reader test-ref-seq-file)] (#'prot/mutation seq-rdr tp53 pos ref alt {})) res)
7676197 "G" "GGTCTTGTCCCTTA" (:mutation (hgvs/parse "p.P58*"))
7676202 "T" "TGTCCCTTAGTCTT" (:mutation (hgvs/parse "p.P58*")))))
1 change: 1 addition & 0 deletions test/varity/vcf_to_hgvs_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@
;; Extension
"chr2" 188974490 "A" "C" '("p.M1Lext-23")
"chr2" 189011772 "T" "C" '("p.*1467Qext*45") ; cf. ClinVar 101338
"chr11" 125655318 "TGA" "TAT" '("p.*477Yext*17" "p.*443Yext*17" "p.*477Yext*24")
;; NOTE: There are very few correct examples...

;; no effect
Expand Down

0 comments on commit 71dad6b

Please sign in to comment.