Skip to content

Commit

Permalink
Merge pull request #1684 from nextstrain/entropy-updates
Browse files Browse the repository at this point in the history
Entropy panel mk2
  • Loading branch information
jameshadfield authored Aug 20, 2023
2 parents 3e3f8bd + f30be82 commit 84485a1
Show file tree
Hide file tree
Showing 34 changed files with 2,879 additions and 1,094 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

* Entropy panel now supports more complex genome architectures and has improved styling.
The JSON schema has been extended to allow segmented CDSs, which allows us to represent CDSs such as those which
wrap the origin (common in HepB), and those with ribosomal slippage (nCoV, EBOV). The visual representation of CDSs
now better conveys overlapping CDSs, both in the lower "nav" axis where CDSs are stacked on top of each other and
in the upper "main" axis where we now view the translations for each CDS individually.
A number of small genotype-related bugs have also been fixed and the internal representation of the genome streamlined.
For full details please see [#1684](https://github.com/nextstrain/auspice/pull/1684), and the schema changes are
detailed in [augur PR #1281](https://github.com/nextstrain/augur/pull/1281).

* Remove support for Node.js version 14. ([#1674](https://github.com/nextstrain/auspice/pull/1674))
* Add support for Node.js version 20. ([#1673](https://github.com/nextstrain/auspice/pull/1673))

Expand Down
156 changes: 156 additions & 0 deletions post-process-ncov.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
NEW_ANNOTATION = {
"nuc": {
"start": 1,
"end": 29903,
"strand": "+"
},
"ORF1ab": {
"gene": "ORF1ab",
"strand": "+",
"segments":[
{"start": 266, "end": 13468, "name": "ORF1a"},
{"start": 13468, "end": 21555, "name": "ORF1b"}
],
"display_name": "AKA polyprotein PP1ab. -1 ribisomal frameshift. Cleaved to yield 15 nonstructural proteins (NSP1-10, 12-16)"
},
"PP1a": {
"gene": "ORF1ab",
"start": 266,
"end": 13483,
"display_name": "Polyprotein PP1a. Cleaved to yield 11 nonstructural proteins (NSP1-11)"
},
"NSP3": {
"gene": "ORF1ab",
"color": "#2c7fb8",
"start": 266 + (819-1)*3,
"end": 266 + (2763-1)*3 -1,
"display_name": "Cleaved from short + long polyproteins",
"strand": "+",
},
"RdRp": {
"gene": "ORF1ab",
"color": "#41b6c4",
# Length is 2796nt (932aa)
"segments":[
{ # first segment is before the slip
"start": 266 + (4393-1)*3, # 13442
"end": 13468,
},
{
"start": 13468,
"end": 13468 + 2796 -1
}
],
"display_name": "NSP12; Cleaved from long polyprotein only; I'm not sure if the coordinates are correct, BTW!!!",
"strand": "+",
},
"S": {
"gene": "Spike",
"end": 25384,
"display_name": "structural protein; spike protein; surface glycoprotein",
"start": 21563,
"strand": "+",
},
"E": {
"end": 26472,
"dsiplay_name": "ORF4; structural protein; E protein",
"start": 26245,
"strand": "+",
"type": "CDS"
},
"M": {
"end": 27191,
"start": 26523,
"strand": "+",
"gene": "M",
"display_name": "ORF5; structural protein (membrane glycoprotein)"
},
"N": {
"end": 29533,
"display_name": "nucleocapsid phosphoprotein (ORF9)",
"start": 28274,
"strand": "+",
},
"ORF3a": {
"end": 26220,
"start": 25393,
"strand": "+",
},
"ORF6": {
"end": 27387,
"start": 27202,
"strand": "+",
},
"ORF7a": {
"end": 27759,
"start": 27394,
"strand": "+",
},
"ORF7b": {
"end": 27887,
"start": 27756,
"strand": "+",
},
"ORF8": {
"end": 28259,
"start": 27894,
"strand": "+",
},
"ORF9b": {
"end": 28577,
"start": 28284,
"strand": "+",
},
}

def a_pos_b(m):
return (m[0], int(m[1:-1]), m[-1])

def recurse(node):

mutations = node.get('branch_attrs', {}).get('mutations', {})
if 'ORF1a' in mutations:
# ORF1a -> ORF1ab is no-change
mutations['ORF1ab'] = [*mutations['ORF1a']]
mutations['PP1a'] = [*mutations['ORF1a']]
del mutations['ORF1a']
if 'ORF1b' in mutations:
if 'ORF1ab' not in mutations:
mutations['ORF1ab'] = [];
for m in mutations['ORF1b']:
# ORF1b is in phase with ORF1a
a, pos, b = a_pos_b(m)
mutations['ORF1ab'].append(f"{a}{pos+4401}{b}")
del mutations['ORF1b']

# Extract mutations which fall in NSP3
if 'ORF1ab' in mutations:
mutations['NSP3'] = []
for m in mutations['ORF1ab']:
a, pos, b = a_pos_b(m)
# relative to PP1ab the coords are 819..2763 (in aa space)
if pos>=819 and pos<=2763:
mutations['NSP3'].append(f"{a}{pos-819+1}{b}")

# Extract mutations which fall in RdRp
if 'ORF1ab' in mutations:
mutations['RdRp'] = []
for m in mutations['ORF1ab']:
a, pos, b = a_pos_b(m)
# relative to PP1ab the coords are 4393..5324 (in aa space, so don't need to worry about -1 slippage)
if pos>=4393 and pos<=5324:
mutations['RdRp'].append(f"{a}{pos-4393+1}{b}")

if "children" in node:
[recurse(child) for child in node["children"]]



import json
with open("./data/nextclade_sars-cov-2.json", 'r') as fh:
dataset = json.load(fh)
recurse(dataset['tree'])
dataset['meta']['genome_annotations'] = NEW_ANNOTATION
dataset['meta']['title'] = 'nCoV with adjusted annotations (use with caution!)'
with open("./datasets/entropy2023/entropy-test-data_ncov.json", 'w') as fh:
json.dump(dataset, fh, indent=2)
76 changes: 6 additions & 70 deletions scripts/get-data.sh
Original file line number Diff line number Diff line change
@@ -1,84 +1,20 @@
#!/bin/bash

data_files=(
"dengue_all.json" "dengue_denv1.json" "dengue_denv2.json" "dengue_denv3.json" "dengue_denv4.json"\
"ebola.json" "ebola_root-sequence.json" \
"ebola_2019-09-14-no-epi-id_meta.json" "ebola_2019-09-14-no-epi-id_tree.json" \
"lassa_s_tree.json" "lassa_s_meta.json" \
"lassa_l_tree.json" "lassa_l_meta.json" \
"measles.json" \
"mers_tree.json" "mers_meta.json" \
"mumps_global.json" "mumps_na.json" \
"WNV_NA_tree.json" "WNV_NA_meta.json" \
"entropy-test-data_hepB.json" \
"entropy-test-data_ncov.json" \
"zika.json" \
"tb_global_meta.json" "tb_global_tree.json" \
"enterovirus_d68_genome_meta.json" "enterovirus_d68_genome_tree.json" \
"enterovirus_d68_vp1_meta.json" "enterovirus_d68_vp1_tree.json" \
############## AVIAN FLU ##############
"flu_avian_h7n9_ha.json" \
"flu_avian_h7n9_mp.json" \
"flu_avian_h7n9_na.json" \
"flu_avian_h7n9_np.json" \
"flu_avian_h7n9_ns.json" \
"flu_avian_h7n9_pa.json" \
"flu_avian_h7n9_pb1.json" \
"flu_avian_h7n9_pb2.json" \
############## SEASONAL FLU ##############
"flu_seasonal_h3n2_ha_2y.json" "flu_seasonal_h3n2_ha_2y_tip-frequencies.json" \
"flu_seasonal_h3n2_ha_3y.json" "flu_seasonal_h3n2_ha_3y_tip-frequencies.json" \
"flu_seasonal_h3n2_ha_6y.json" "flu_seasonal_h3n2_ha_6y_tip-frequencies.json" \
"flu_seasonal_h3n2_ha_12y.json" "flu_seasonal_h3n2_ha_12y_tip-frequencies.json" \
"flu_seasonal_h3n2_na_2y.json" "flu_seasonal_h3n2_na_2y_tip-frequencies.json" \
"flu_seasonal_h3n2_na_3y.json" "flu_seasonal_h3n2_na_3y_tip-frequencies.json" \
"flu_seasonal_h3n2_na_6y.json" "flu_seasonal_h3n2_na_6y_tip-frequencies.json" \
"flu_seasonal_h3n2_na_12y.json" "flu_seasonal_h3n2_na_12y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_ha_2y.json" "flu_seasonal_h1n1pdm_ha_2y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_ha_3y.json" "flu_seasonal_h1n1pdm_ha_3y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_ha_6y.json" "flu_seasonal_h1n1pdm_ha_6y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_ha_12y.json" "flu_seasonal_h1n1pdm_ha_12y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_ha_pandemic_meta.json" "flu_seasonal_h1n1pdm_ha_pandemic_tree.json" "flu_seasonal_h1n1pdm_ha_pandemic_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_na_2y.json" "flu_seasonal_h1n1pdm_na_2y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_na_3y.json" "flu_seasonal_h1n1pdm_na_3y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_na_6y.json" "flu_seasonal_h1n1pdm_na_6y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_na_12y.json" "flu_seasonal_h1n1pdm_na_12y_tip-frequencies.json" \
"flu_seasonal_h1n1pdm_na_pandemic_tree.json" "flu_seasonal_h1n1pdm_na_pandemic_meta.json" "flu_seasonal_h1n1pdm_na_pandemic_tip-frequencies.json" \
"flu_seasonal_vic_ha_2y.json" "flu_seasonal_vic_ha_2y_tip-frequencies.json" "flu_seasonal_vic_ha_2y_root-sequence.json" \
"flu_seasonal_vic_ha_3y.json" "flu_seasonal_vic_ha_3y_tip-frequencies.json" "flu_seasonal_vic_ha_3y_root-sequence.json" \
"flu_seasonal_vic_ha_6y.json" "flu_seasonal_vic_ha_6y_tip-frequencies.json" "flu_seasonal_vic_ha_6y_root-sequence.json" \
"flu_seasonal_vic_ha_12y.json" "flu_seasonal_vic_ha_12y_tip-frequencies.json" "flu_seasonal_vic_ha_12y_root-sequence.json" \
"flu_seasonal_vic_na_2y.json" "flu_seasonal_vic_na_2y_tip-frequencies.json" "flu_seasonal_vic_na_2y_root-sequence.json" \
"flu_seasonal_vic_na_3y.json" "flu_seasonal_vic_na_3y_tip-frequencies.json" "flu_seasonal_vic_na_3y_root-sequence.json" \
"flu_seasonal_vic_na_6y.json" "flu_seasonal_vic_na_6y_tip-frequencies.json" "flu_seasonal_vic_na_6y_root-sequence.json" \
"flu_seasonal_vic_na_12y.json" "flu_seasonal_vic_na_12y_tip-frequencies.json" "flu_seasonal_vic_na_12y_root-sequence.json" \
"flu_seasonal_yam_ha_2y.json" "flu_seasonal_yam_ha_2y_tip-frequencies.json" "flu_seasonal_yam_ha_2y_root-sequence.json" \
"flu_seasonal_yam_ha_3y.json" "flu_seasonal_yam_ha_3y_tip-frequencies.json" "flu_seasonal_yam_ha_3y_root-sequence.json" \
"flu_seasonal_yam_ha_6y.json" "flu_seasonal_yam_ha_6y_tip-frequencies.json" "flu_seasonal_yam_ha_6y_root-sequence.json" \
"flu_seasonal_yam_ha_12y.json" "flu_seasonal_yam_ha_12y_tip-frequencies.json" "flu_seasonal_yam_ha_12y_root-sequence.json" \
"flu_seasonal_yam_na_2y.json" "flu_seasonal_yam_na_2y_tip-frequencies.json" "flu_seasonal_yam_na_2y_root-sequence.json" \
"flu_seasonal_yam_na_3y.json" "flu_seasonal_yam_na_3y_tip-frequencies.json" "flu_seasonal_yam_na_3y_root-sequence.json" \
"flu_seasonal_yam_na_6y.json" "flu_seasonal_yam_na_6y_tip-frequencies.json" "flu_seasonal_yam_na_6y_root-sequence.json" \
"flu_seasonal_yam_na_12y.json" "flu_seasonal_yam_na_12y_tip-frequencies.json" "flu_seasonal_yam_na_12y_root-sequence.json" \
############## LATEST CORE SARS-CoV-2 (COVID-19) BUILDS ##############
"ncov_gisaid_global.json" "ncov_gisaid_global_tip-frequencies.json" \
"ncov_gisaid_africa.json" "ncov_gisaid_africa_tip-frequencies.json" \
"ncov_gisaid_asia.json" "ncov_gisaid_asia_tip-frequencies.json" \
"ncov_gisaid_europe.json" "ncov_gisaid_europe_tip-frequencies.json" \
"ncov_gisaid_north-america.json" "ncov_gisaid_north-america_tip-frequencies.json" \
"ncov_gisaid_oceania.json" "ncov_gisaid_oceania_tip-frequencies.json" \
"ncov_gisaid_south-america.json" "ncov_gisaid_south-america_tip-frequencies.json" \
############## TIMESTAMPED SARS-CoV-2 BUILDS USED IN NARRATIVES #############
"ncov_2020-01-23.json" "ncov_2020-01-25.json" "ncov_2020-01-26.json" "ncov_2020-01-30.json" \
"ncov_2020-03-04.json" "ncov_2020-03-05.json" "ncov_2020-03-11.json" "ncov_2020-03-13.json" \
"ncov_2020-03-20.json" "ncov_2020-03-27.json" "ncov_2020-04-03.json" \
"ncov_global_2020-04-09.json" "ncov_north-america_2020-04-17.json" \
"monkeypox_mpxv.json" \
)

rm -rf data/
mkdir -p data/
for i in "${data_files[@]}"
do
curl http://data.nextstrain.org/"${i}" --compressed -o data/"${i}"
curl http://staging.nextstrain.org/"${i}" --compressed -o data/"${i}"
done

echo "Copying the test datasets from test/data to data"
cp -r test/data/*.json data/

echo "The local data directory ./data now contains a selection of up-to-date datasets from http://data.nextstrain.org"
28 changes: 3 additions & 25 deletions src/actions/colors.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { determineColorByGenotypeMutType, calcNodeColor } from "../util/colorHelpers";
import { calcNodeColor } from "../util/colorHelpers";
import { isColorByGenotype } from "../util/getGenotype";
import { calcColorScale } from "../util/colorScale";
import { timerStart, timerEnd } from "../util/perf";
import { changeMutType } from "./entropy";
import { changeEntropyCdsSelection } from "./entropy";
import { updateFrequencyDataDebounced } from "./frequencies";
import * as types from "./types";

Expand All @@ -23,30 +23,9 @@ export const changeColorBy = (providedColorBy = undefined) => {
const nodeColors = calcNodeColor(tree, colorScale);
const nodeColorsToo = treeToo.loaded ? calcNodeColor(treeToo, colorScale) : undefined;

/* step 3: change in mutType? */
const colorByMutType = determineColorByGenotypeMutType(colorBy);
const newMutType = colorByMutType !== controls.mutType ? colorByMutType : false;

timerEnd("changeColorBy calculations"); /* end timer before dispatch */

/* step 4: dispatch */

/*
* Changing the mutType must happen _before_ updating colors because the
* entropy bars need to be recomputed for the new mutType before applying
* the new genotype colorBy. Otherwise, the entropy component tries to
* apply the new genotype colorBy to bars of the wrong mutType, which in
* turn causes all sorts of errors ("entropy out of sync" and selected
* positions not matching the data bars).
*
* The state dependencies are a bit tangled here, but de-tangling them is a
* larger project for another time.
*
* -trs, 14 Nov 2018
*/
if (newMutType) {
dispatch(changeMutType(newMutType));
}
dispatch(changeEntropyCdsSelection(colorBy));

dispatch({
type: types.NEW_COLORS,
Expand All @@ -57,7 +36,6 @@ export const changeColorBy = (providedColorBy = undefined) => {
version: colorScale.version
});

/* step 5 - frequency dispatch */
if (frequencies.loaded) {
updateFrequencyDataDebounced(dispatch, getState);
}
Expand Down
Loading

0 comments on commit 84485a1

Please sign in to comment.