Skip to content

Commit

Permalink
test v0.3.5
Browse files Browse the repository at this point in the history
  • Loading branch information
andyjslee committed Sep 26, 2024
1 parent 2e622d6 commit 6e574ea
Show file tree
Hide file tree
Showing 11 changed files with 863 additions and 532 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vstolib"
version = "0.3.2"
version = "0.3.5"
edition = "2021"

[package.metadata.maturin]
Expand Down
402 changes: 201 additions & 201 deletions examples/outputs/hg002_sniffles2_ensembl_annotated.tsv

Large diffs are not rendered by default.

402 changes: 201 additions & 201 deletions examples/outputs/hg002_sniffles2_gencode_annotated.tsv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ build-backend = "maturin"

[project]
name = "vstol"
version = "0.3.4"
version = "0.3.5"
requires-python = ">=3.10"
keywords = [
"somatic variants",
Expand Down
93 changes: 69 additions & 24 deletions python/vstolib/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ class Ensembl(Annotator):
@property
def ensembl(self) -> pyensembl.EnsemblRelease:
if self._ensembl is None:
self._ensembl = pyensembl.EnsemblRelease(release=self.release,
species=self.species)
self._ensembl = pyensembl.EnsemblRelease(release=self.release, species=self.species)
return self._ensembl

@property
Expand Down Expand Up @@ -86,8 +85,7 @@ def annotate_position_using_pyensembl(
"""
variant_call_annotations = []
chromosome = chromosome.replace('chr', '')
genes = self.ensembl.genes_at_locus(contig=chromosome,
position=position)
genes = self.ensembl.genes_at_locus(contig=chromosome, position=position)
if len(genes) == 0:
variant_call_annotation = VariantCallAnnotation(
annotator=Annotators.ENSEMBL,
Expand All @@ -98,26 +96,73 @@ def annotate_position_using_pyensembl(
variant_call_annotations.append(variant_call_annotation)
else:
for gene in genes:
region = GenomicRegionTypes.INTRONIC
exon_ids = self.ensembl.exon_ids_of_gene_id(gene.gene_id)
for exon_id in exon_ids:
exon = self.ensembl.exon_by_id(exon_id=exon_id)
if exon.start <= position <= exon.end:
region = GenomicRegionTypes.EXONIC
break
variant_call_annotation = VariantCallAnnotation(
annotator=Annotators.ENSEMBL,
region=region,
species=self.species,
annotator_version=str(self.release),
gene_id=gene.gene_id,
gene_id_stable=gene.gene_id,
gene_name=gene.gene_name,
gene_strand=gene.strand,
gene_type=gene.biotype,
gene_version=''
)
variant_call_annotations.append(variant_call_annotation)
for transcript_id in self.ensembl.transcript_ids_of_gene_id(gene.gene_id):
transcript = self.ensembl.transcript_by_id(transcript_id)
if transcript.start > position or transcript.end < position:
continue
if transcript.contains_start_codon and transcript.contains_stop_codon: # protein-coding transcript
if transcript.strand == '+':
cds_start = transcript.start_codon_positions[0]
cds_end = transcript.stop_codon_positions[-1]
if position < cds_start or position > cds_end:
region = GenomicRegionTypes.UNTRANSLATED_REGION
else:
region = GenomicRegionTypes.INTRONIC
else:
cds_start = transcript.start_codon_positions[-1]
cds_end = transcript.stop_codon_positions[0]
if position > cds_start or position < cds_end:
region = GenomicRegionTypes.UNTRANSLATED_REGION
else:
region = GenomicRegionTypes.INTRONIC
else:
region = GenomicRegionTypes.INTRONIC
if region == GenomicRegionTypes.UNTRANSLATED_REGION:
variant_call_annotation = VariantCallAnnotation(
annotator=Annotators.ENSEMBL,
region=region,
species=self.species,
annotator_version=str(self.release),
gene_id=gene.gene_id,
gene_id_stable=gene.gene_id,
gene_name=gene.gene_name,
gene_strand=gene.strand,
gene_type=gene.biotype,
gene_version='',
transcript_id=transcript.transcript_id,
transcript_id_stable=transcript.transcript_id,
transcript_name=transcript.transcript_name,
transcript_strand=transcript.strand,
transcript_type=transcript.biotype
)
else:
exon_id = ''
for exon in transcript.exons:
if exon.start <= position <= exon.end:
region = GenomicRegionTypes.EXONIC
exon_id = exon.exon_id
break
variant_call_annotation = VariantCallAnnotation(
annotator=Annotators.ENSEMBL,
region=region,
species=self.species,
annotator_version=str(self.release),
gene_id=gene.gene_id,
gene_id_stable=gene.gene_id,
gene_name=gene.gene_name,
gene_strand=gene.strand,
gene_type=gene.biotype,
gene_version='',
transcript_id=transcript.transcript_id,
transcript_id_stable=transcript.transcript_id,
transcript_name=transcript.transcript_name,
transcript_strand=transcript.strand,
transcript_type=transcript.biotype,
transcript_version='',
exon_id=exon_id,
exon_id_stable=exon_id
)
variant_call_annotations.append(variant_call_annotation)
return variant_call_annotations

def annotate_variant_call_using_pyensembl(
Expand Down
Loading

0 comments on commit 6e574ea

Please sign in to comment.