diff --git a/CHANGELOG.md b/CHANGELOG.md index b775b5d0b..513c5cc49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +* Entropy panel now supports more complex genome architectures and has improved styling. +The JSON schema has been extended to allow segmented CDSs, which allows us to represent CDSs such as those which +wrap the origin (common in HepB), and those with ribosomal slippage (nCoV, EBOV). The visual representation of CDSs +now better conveys overlapping CDSs, both in the lower "nav" axis where CDSs are stacked on top of each other and +in the upper "main" axis where we now view the translations for each CDS individually. +A number of small genotype-related bugs have also been fixed and the internal representation of the genome streamlined. +For full details please see [#1684](https://github.com/nextstrain/auspice/pull/1684), and the schema changes are +detailed in [augur PR #1281](https://github.com/nextstrain/augur/pull/1281). + * Remove support for Node.js version 14. ([#1674](https://github.com/nextstrain/auspice/pull/1674)) * Add support for Node.js version 20. ([#1673](https://github.com/nextstrain/auspice/pull/1673)) diff --git a/post-process-ncov.py b/post-process-ncov.py new file mode 100644 index 000000000..8434e8e2c --- /dev/null +++ b/post-process-ncov.py @@ -0,0 +1,156 @@ +NEW_ANNOTATION = { + "nuc": { + "start": 1, + "end": 29903, + "strand": "+" + }, + "ORF1ab": { + "gene": "ORF1ab", + "strand": "+", + "segments":[ + {"start": 266, "end": 13468, "name": "ORF1a"}, + {"start": 13468, "end": 21555, "name": "ORF1b"} + ], + "display_name": "AKA polyprotein PP1ab. -1 ribisomal frameshift. Cleaved to yield 15 nonstructural proteins (NSP1-10, 12-16)" + }, + "PP1a": { + "gene": "ORF1ab", + "start": 266, + "end": 13483, + "display_name": "Polyprotein PP1a. Cleaved to yield 11 nonstructural proteins (NSP1-11)" + }, + "NSP3": { + "gene": "ORF1ab", + "color": "#2c7fb8", + "start": 266 + (819-1)*3, + "end": 266 + (2763-1)*3 -1, + "display_name": "Cleaved from short + long polyproteins", + "strand": "+", + }, + "RdRp": { + "gene": "ORF1ab", + "color": "#41b6c4", + # Length is 2796nt (932aa) + "segments":[ + { # first segment is before the slip + "start": 266 + (4393-1)*3, # 13442 + "end": 13468, + }, + { + "start": 13468, + "end": 13468 + 2796 -1 + } + ], + "display_name": "NSP12; Cleaved from long polyprotein only; I'm not sure if the coordinates are correct, BTW!!!", + "strand": "+", + }, + "S": { + "gene": "Spike", + "end": 25384, + "display_name": "structural protein; spike protein; surface glycoprotein", + "start": 21563, + "strand": "+", + }, + "E": { + "end": 26472, + "dsiplay_name": "ORF4; structural protein; E protein", + "start": 26245, + "strand": "+", + "type": "CDS" + }, + "M": { + "end": 27191, + "start": 26523, + "strand": "+", + "gene": "M", + "display_name": "ORF5; structural protein (membrane glycoprotein)" + }, + "N": { + "end": 29533, + "display_name": "nucleocapsid phosphoprotein (ORF9)", + "start": 28274, + "strand": "+", + }, + "ORF3a": { + "end": 26220, + "start": 25393, + "strand": "+", + }, + "ORF6": { + "end": 27387, + "start": 27202, + "strand": "+", + }, + "ORF7a": { + "end": 27759, + "start": 27394, + "strand": "+", + }, + "ORF7b": { + "end": 27887, + "start": 27756, + "strand": "+", + }, + "ORF8": { + "end": 28259, + "start": 27894, + "strand": "+", + }, + "ORF9b": { + "end": 28577, + "start": 28284, + "strand": "+", + }, +} + +def a_pos_b(m): + return (m[0], int(m[1:-1]), m[-1]) + +def recurse(node): + + mutations = node.get('branch_attrs', {}).get('mutations', {}) + if 'ORF1a' in mutations: + # ORF1a -> ORF1ab is no-change + mutations['ORF1ab'] = [*mutations['ORF1a']] + mutations['PP1a'] = [*mutations['ORF1a']] + del mutations['ORF1a'] + if 'ORF1b' in mutations: + if 'ORF1ab' not in mutations: + mutations['ORF1ab'] = []; + for m in mutations['ORF1b']: + # ORF1b is in phase with ORF1a + a, pos, b = a_pos_b(m) + mutations['ORF1ab'].append(f"{a}{pos+4401}{b}") + del mutations['ORF1b'] + + # Extract mutations which fall in NSP3 + if 'ORF1ab' in mutations: + mutations['NSP3'] = [] + for m in mutations['ORF1ab']: + a, pos, b = a_pos_b(m) + # relative to PP1ab the coords are 819..2763 (in aa space) + if pos>=819 and pos<=2763: + mutations['NSP3'].append(f"{a}{pos-819+1}{b}") + + # Extract mutations which fall in RdRp + if 'ORF1ab' in mutations: + mutations['RdRp'] = [] + for m in mutations['ORF1ab']: + a, pos, b = a_pos_b(m) + # relative to PP1ab the coords are 4393..5324 (in aa space, so don't need to worry about -1 slippage) + if pos>=4393 and pos<=5324: + mutations['RdRp'].append(f"{a}{pos-4393+1}{b}") + + if "children" in node: + [recurse(child) for child in node["children"]] + + + +import json +with open("./data/nextclade_sars-cov-2.json", 'r') as fh: + dataset = json.load(fh) +recurse(dataset['tree']) +dataset['meta']['genome_annotations'] = NEW_ANNOTATION +dataset['meta']['title'] = 'nCoV with adjusted annotations (use with caution!)' +with open("./datasets/entropy2023/entropy-test-data_ncov.json", 'w') as fh: + json.dump(dataset, fh, indent=2) diff --git a/scripts/get-data.sh b/scripts/get-data.sh index ebd53213f..e91c3edf8 100755 --- a/scripts/get-data.sh +++ b/scripts/get-data.sh @@ -1,84 +1,20 @@ #!/bin/bash data_files=( - "dengue_all.json" "dengue_denv1.json" "dengue_denv2.json" "dengue_denv3.json" "dengue_denv4.json"\ - "ebola.json" "ebola_root-sequence.json" \ - "ebola_2019-09-14-no-epi-id_meta.json" "ebola_2019-09-14-no-epi-id_tree.json" \ - "lassa_s_tree.json" "lassa_s_meta.json" \ - "lassa_l_tree.json" "lassa_l_meta.json" \ - "measles.json" \ - "mers_tree.json" "mers_meta.json" \ - "mumps_global.json" "mumps_na.json" \ - "WNV_NA_tree.json" "WNV_NA_meta.json" \ + "entropy-test-data_hepB.json" \ + "entropy-test-data_ncov.json" \ "zika.json" \ - "tb_global_meta.json" "tb_global_tree.json" \ - "enterovirus_d68_genome_meta.json" "enterovirus_d68_genome_tree.json" \ - "enterovirus_d68_vp1_meta.json" "enterovirus_d68_vp1_tree.json" \ - ############## AVIAN FLU ############## - "flu_avian_h7n9_ha.json" \ - "flu_avian_h7n9_mp.json" \ - "flu_avian_h7n9_na.json" \ - "flu_avian_h7n9_np.json" \ - "flu_avian_h7n9_ns.json" \ - "flu_avian_h7n9_pa.json" \ - "flu_avian_h7n9_pb1.json" \ - "flu_avian_h7n9_pb2.json" \ - ############## SEASONAL FLU ############## - "flu_seasonal_h3n2_ha_2y.json" "flu_seasonal_h3n2_ha_2y_tip-frequencies.json" \ - "flu_seasonal_h3n2_ha_3y.json" "flu_seasonal_h3n2_ha_3y_tip-frequencies.json" \ - "flu_seasonal_h3n2_ha_6y.json" "flu_seasonal_h3n2_ha_6y_tip-frequencies.json" \ - "flu_seasonal_h3n2_ha_12y.json" "flu_seasonal_h3n2_ha_12y_tip-frequencies.json" \ - "flu_seasonal_h3n2_na_2y.json" "flu_seasonal_h3n2_na_2y_tip-frequencies.json" \ - "flu_seasonal_h3n2_na_3y.json" "flu_seasonal_h3n2_na_3y_tip-frequencies.json" \ - "flu_seasonal_h3n2_na_6y.json" "flu_seasonal_h3n2_na_6y_tip-frequencies.json" \ - "flu_seasonal_h3n2_na_12y.json" "flu_seasonal_h3n2_na_12y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_ha_2y.json" "flu_seasonal_h1n1pdm_ha_2y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_ha_3y.json" "flu_seasonal_h1n1pdm_ha_3y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_ha_6y.json" "flu_seasonal_h1n1pdm_ha_6y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_ha_12y.json" "flu_seasonal_h1n1pdm_ha_12y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_ha_pandemic_meta.json" "flu_seasonal_h1n1pdm_ha_pandemic_tree.json" "flu_seasonal_h1n1pdm_ha_pandemic_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_na_2y.json" "flu_seasonal_h1n1pdm_na_2y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_na_3y.json" "flu_seasonal_h1n1pdm_na_3y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_na_6y.json" "flu_seasonal_h1n1pdm_na_6y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_na_12y.json" "flu_seasonal_h1n1pdm_na_12y_tip-frequencies.json" \ - "flu_seasonal_h1n1pdm_na_pandemic_tree.json" "flu_seasonal_h1n1pdm_na_pandemic_meta.json" "flu_seasonal_h1n1pdm_na_pandemic_tip-frequencies.json" \ - "flu_seasonal_vic_ha_2y.json" "flu_seasonal_vic_ha_2y_tip-frequencies.json" "flu_seasonal_vic_ha_2y_root-sequence.json" \ - "flu_seasonal_vic_ha_3y.json" "flu_seasonal_vic_ha_3y_tip-frequencies.json" "flu_seasonal_vic_ha_3y_root-sequence.json" \ - "flu_seasonal_vic_ha_6y.json" "flu_seasonal_vic_ha_6y_tip-frequencies.json" "flu_seasonal_vic_ha_6y_root-sequence.json" \ - "flu_seasonal_vic_ha_12y.json" "flu_seasonal_vic_ha_12y_tip-frequencies.json" "flu_seasonal_vic_ha_12y_root-sequence.json" \ - "flu_seasonal_vic_na_2y.json" "flu_seasonal_vic_na_2y_tip-frequencies.json" "flu_seasonal_vic_na_2y_root-sequence.json" \ - "flu_seasonal_vic_na_3y.json" "flu_seasonal_vic_na_3y_tip-frequencies.json" "flu_seasonal_vic_na_3y_root-sequence.json" \ - "flu_seasonal_vic_na_6y.json" "flu_seasonal_vic_na_6y_tip-frequencies.json" "flu_seasonal_vic_na_6y_root-sequence.json" \ - "flu_seasonal_vic_na_12y.json" "flu_seasonal_vic_na_12y_tip-frequencies.json" "flu_seasonal_vic_na_12y_root-sequence.json" \ - "flu_seasonal_yam_ha_2y.json" "flu_seasonal_yam_ha_2y_tip-frequencies.json" "flu_seasonal_yam_ha_2y_root-sequence.json" \ - "flu_seasonal_yam_ha_3y.json" "flu_seasonal_yam_ha_3y_tip-frequencies.json" "flu_seasonal_yam_ha_3y_root-sequence.json" \ - "flu_seasonal_yam_ha_6y.json" "flu_seasonal_yam_ha_6y_tip-frequencies.json" "flu_seasonal_yam_ha_6y_root-sequence.json" \ - "flu_seasonal_yam_ha_12y.json" "flu_seasonal_yam_ha_12y_tip-frequencies.json" "flu_seasonal_yam_ha_12y_root-sequence.json" \ - "flu_seasonal_yam_na_2y.json" "flu_seasonal_yam_na_2y_tip-frequencies.json" "flu_seasonal_yam_na_2y_root-sequence.json" \ - "flu_seasonal_yam_na_3y.json" "flu_seasonal_yam_na_3y_tip-frequencies.json" "flu_seasonal_yam_na_3y_root-sequence.json" \ - "flu_seasonal_yam_na_6y.json" "flu_seasonal_yam_na_6y_tip-frequencies.json" "flu_seasonal_yam_na_6y_root-sequence.json" \ - "flu_seasonal_yam_na_12y.json" "flu_seasonal_yam_na_12y_tip-frequencies.json" "flu_seasonal_yam_na_12y_root-sequence.json" \ - ############## LATEST CORE SARS-CoV-2 (COVID-19) BUILDS ############## - "ncov_gisaid_global.json" "ncov_gisaid_global_tip-frequencies.json" \ - "ncov_gisaid_africa.json" "ncov_gisaid_africa_tip-frequencies.json" \ - "ncov_gisaid_asia.json" "ncov_gisaid_asia_tip-frequencies.json" \ - "ncov_gisaid_europe.json" "ncov_gisaid_europe_tip-frequencies.json" \ - "ncov_gisaid_north-america.json" "ncov_gisaid_north-america_tip-frequencies.json" \ - "ncov_gisaid_oceania.json" "ncov_gisaid_oceania_tip-frequencies.json" \ - "ncov_gisaid_south-america.json" "ncov_gisaid_south-america_tip-frequencies.json" \ - ############## TIMESTAMPED SARS-CoV-2 BUILDS USED IN NARRATIVES ############# - "ncov_2020-01-23.json" "ncov_2020-01-25.json" "ncov_2020-01-26.json" "ncov_2020-01-30.json" \ - "ncov_2020-03-04.json" "ncov_2020-03-05.json" "ncov_2020-03-11.json" "ncov_2020-03-13.json" \ - "ncov_2020-03-20.json" "ncov_2020-03-27.json" "ncov_2020-04-03.json" \ - "ncov_global_2020-04-09.json" "ncov_north-america_2020-04-17.json" \ + "monkeypox_mpxv.json" \ ) rm -rf data/ mkdir -p data/ for i in "${data_files[@]}" do - curl http://data.nextstrain.org/"${i}" --compressed -o data/"${i}" + curl http://staging.nextstrain.org/"${i}" --compressed -o data/"${i}" done +echo "Copying the test datasets from test/data to data" +cp -r test/data/*.json data/ echo "The local data directory ./data now contains a selection of up-to-date datasets from http://data.nextstrain.org" diff --git a/src/actions/colors.js b/src/actions/colors.js index d72a75ba9..11bca2c06 100644 --- a/src/actions/colors.js +++ b/src/actions/colors.js @@ -1,8 +1,8 @@ -import { determineColorByGenotypeMutType, calcNodeColor } from "../util/colorHelpers"; +import { calcNodeColor } from "../util/colorHelpers"; import { isColorByGenotype } from "../util/getGenotype"; import { calcColorScale } from "../util/colorScale"; import { timerStart, timerEnd } from "../util/perf"; -import { changeMutType } from "./entropy"; +import { changeEntropyCdsSelection } from "./entropy"; import { updateFrequencyDataDebounced } from "./frequencies"; import * as types from "./types"; @@ -23,30 +23,9 @@ export const changeColorBy = (providedColorBy = undefined) => { const nodeColors = calcNodeColor(tree, colorScale); const nodeColorsToo = treeToo.loaded ? calcNodeColor(treeToo, colorScale) : undefined; - /* step 3: change in mutType? */ - const colorByMutType = determineColorByGenotypeMutType(colorBy); - const newMutType = colorByMutType !== controls.mutType ? colorByMutType : false; - timerEnd("changeColorBy calculations"); /* end timer before dispatch */ - /* step 4: dispatch */ - - /* - * Changing the mutType must happen _before_ updating colors because the - * entropy bars need to be recomputed for the new mutType before applying - * the new genotype colorBy. Otherwise, the entropy component tries to - * apply the new genotype colorBy to bars of the wrong mutType, which in - * turn causes all sorts of errors ("entropy out of sync" and selected - * positions not matching the data bars). - * - * The state dependencies are a bit tangled here, but de-tangling them is a - * larger project for another time. - * - * -trs, 14 Nov 2018 - */ - if (newMutType) { - dispatch(changeMutType(newMutType)); - } + dispatch(changeEntropyCdsSelection(colorBy)); dispatch({ type: types.NEW_COLORS, @@ -57,7 +36,6 @@ export const changeColorBy = (providedColorBy = undefined) => { version: colorScale.version }); - /* step 5 - frequency dispatch */ if (frequencies.loaded) { updateFrequencyDataDebounced(dispatch, getState); } diff --git a/src/actions/entropy.js b/src/actions/entropy.js index 085078a4a..165cff876 100644 --- a/src/actions/entropy.js +++ b/src/actions/entropy.js @@ -1,22 +1,101 @@ import { debounce } from 'lodash'; import { calcEntropyInView } from "../util/entropy"; +import { nucleotide_gene, equalArrays } from "../util/globals"; import * as types from "./types"; +import { isColorByGenotype, decodeColorByGenotype, getCdsFromGenotype} from "../util/getGenotype"; + /* debounce works better than throttle, as it _won't_ update while events are still coming in (e.g. dragging the date slider) */ export const updateEntropyVisibility = debounce((dispatch, getState) => { const { entropy, controls, tree } = getState(); if (!tree.nodes || !tree.visibility || - !entropy.geneMap || + !entropy.genomeMap || controls.animationPlayPauseButton !== "Play" ) {return;} - const [data, maxYVal] = calcEntropyInView(tree.nodes, tree.visibility, controls.mutType, entropy.geneMap, entropy.showCounts); + const [data, maxYVal] = calcEntropyInView(tree.nodes, tree.visibility, entropy.selectedCds, entropy.showCounts); dispatch({type: types.ENTROPY_DATA, data, maxYVal}); }, 500, { leading: true, trailing: true }); -export const changeMutType = (newMutType) => (dispatch, getState) => { - dispatch({type: types.TOGGLE_MUT_TYPE, data: newMutType}); - updateEntropyVisibility(dispatch, getState); +/** + * Returns a thunk which makes zero or one dispatches to update the entropy reducer + * if the selected CDS and/or positions need updating. + * + * The argument may vary: + * - It may be a colorBy string, which may or may not be a genotype coloring + * - It may be a CDS (Object) + * - It may be the constant `nucleotide_gene` + * + * The expected state updates to (selectedCds, selectedPositions) are as follows: + * (nuc = nucleotide_gene, x,y,z are positions, [*] means any (or no) positions selected, + * no-op means that no dispatches are made and thus the state is unchanged): + * + * ----------------------------------------------------------------------------------- + * PREVIOUS STATE | EXAMPLE ARGUMENT | NEW STATE ($ = entropy bar recalc needed) + * ----------------------------------------------------------------------------------- + * nuc, [x] | "gt-nuc_x" | no-op + * nuc, [*] | "gt-nuc_x" | nuc, [x] + * nuc, [*] | "notGenotype" | nuc, [] + * nuc, [x] | "gt-nuc_y" | nuc, [y] + * nuc, [*] | "gt-cdsA_x" | CdsA, [x] $ + * CdsA, [*] | "gt-cdsA_y" | CdsA, [y] + * CdsA, [*] | "gt-cdsB_z" | CdsB, [z] $ + * CdsA, [*] | "notGenotype" | CdsA, [] + * CdsA, [*] | "gt-nuc-z" | nuc, [z] $ + * ----------------------------------------------------------------------------------- + * nuc, [*] | nucleotide_gene | no-op + * nuc, [*] | CdsA | CdsA, [] $ + * CdsA, [*] | CdsA | no-op + * CdsA, [*] | CdsB | CdsB, [] $ + * CdsA, [*] | nucleotide_gene | nuc, [] $ + * ----------------------------------------------------------------------------------- + * + * @returns {ReduxThunk} + */ +export const changeEntropyCdsSelection = (arg) => (dispatch, getState) => { + const action = {type: types.CHANGE_ENTROPY_CDS_SELECTION} + const entropy = getState().entropy; + + if (arg === nucleotide_gene) { + if (entropy.selectedCds === nucleotide_gene) { + return + } + action.selectedCds = arg; + action.selectedPositions = []; + } else if (typeof arg === 'string') { + if (!isColorByGenotype(arg)) { + action.selectedCds = entropy.selectedCds; + action.selectedPositions = []; + } else { + const gt = decodeColorByGenotype(arg) + if (!gt) { + console.error("Error decoding genotype colorBy for entropy recalc.") + return; + } + const cds = getCdsFromGenotype(gt.gene, entropy.genomeMap); + action.selectedCds = cds; + action.selectedPositions = gt.positions; + } + } else if (typeof arg === 'object' && arg.name && arg.segments) { + // arg: CDS - see type def in `entropyCreateStateFromJsons.ts` + action.selectedCds = arg; + action.selectedPositions = []; + } else { + console.error("Incorrect argument passed to changeEntropyCdsSelection:", arg); + return; + } + + /* is nothing's changed, then we can avoid dispatches */ + if (entropy.selectedCds !== action.selectedCds) { + const state = getState(); + const [data, maxYVal] = calcEntropyInView(state.tree.nodes, state.tree.visibility, action.selectedCds, entropy.showCounts); + action.bars = data; + action.maxYVal = maxYVal; + } else if (equalArrays(action.selectedPositions, entropy.selectedPositions)) { + return; + } + + dispatch(action); }; export const showCountsNotEntropy = (showCounts) => (dispatch, getState) => { @@ -24,7 +103,6 @@ export const showCountsNotEntropy = (showCounts) => (dispatch, getState) => { updateEntropyVisibility(dispatch, getState); }; -export const changeZoom = (zoomc) => (dispatch, getState) => { +export const changeZoom = (zoomc) => (dispatch) => { dispatch({type: types.CHANGE_ZOOM, zoomc}); - updateEntropyVisibility(dispatch, getState); }; diff --git a/src/actions/recomputeReduxState.js b/src/actions/recomputeReduxState.js index d3941a526..9c3ce592e 100644 --- a/src/actions/recomputeReduxState.js +++ b/src/actions/recomputeReduxState.js @@ -13,12 +13,12 @@ import { calcEntropyInView } from "../util/entropy"; import { treeJsonToState } from "../util/treeJsonProcessing"; import { castIncorrectTypes } from "../util/castJsonTypes"; import { entropyCreateState } from "../util/entropyCreateStateFromJsons"; -import { determineColorByGenotypeMutType, calcNodeColor } from "../util/colorHelpers"; +import { calcNodeColor } from "../util/colorHelpers"; import { calcColorScale, createVisibleLegendValues } from "../util/colorScale"; import { computeMatrixFromRawData, checkIfNormalizableFromRawData } from "../util/processFrequencies"; import { applyInViewNodesToTree } from "../actions/tree"; import { validateScatterVariables } from "../util/scatterplotHelpers"; -import { isColorByGenotype, decodeColorByGenotype, decodeGenotypeFilters, encodeGenotypeFilters } from "../util/getGenotype"; +import { isColorByGenotype, decodeColorByGenotype, encodeColorByGenotype, decodeGenotypeFilters, encodeGenotypeFilters, getCdsFromGenotype } from "../util/getGenotype"; import { getTraitFromNode, getDivFromNode, collectGenotypeStates } from "../util/treeMiscHelpers"; import { collectAvailableTipLabelOptions } from "../components/controls/choose-tip-label"; import { hasMultipleGridPanels } from "./panelDisplay"; @@ -202,7 +202,7 @@ const restoreQueryableStateToDefaults = (state) => { return state; }; -const modifyStateViaMetadata = (state, metadata) => { +const modifyStateViaMetadata = (state, metadata, genomeMap) => { if (metadata.date_range) { /* this may be useful if, e.g., one were to want to display an outbreak from 2000-2005 (the default is the present day) */ @@ -272,9 +272,10 @@ const modifyStateViaMetadata = (state, metadata) => { state.panelsAvailable = metadata.panels.slice(); state.panelsToDisplay = metadata.panels.slice(); } else { - /* while `metadata.panels` is required by the schema, every single dataset can display a tree, so this is a nice fallback */ - state.panelsAvailable = ["tree"]; - state.panelsToDisplay = ["tree"]; + /* while `metadata.panels` is required by the schema, we provide a fallback + Entropy will be removed below if it's not possible to display it. */ + state.panelsAvailable = ["tree", "entropy"]; + state.panelsToDisplay = ["tree", "entropy"]; } /* if we lack geoResolutions, remove map from panels to display */ @@ -283,10 +284,31 @@ const modifyStateViaMetadata = (state, metadata) => { state.panelsToDisplay = state.panelsToDisplay.filter((item) => item !== "map"); } - /* if we lack genome annotations, remove entropy from panels to display */ - if (!metadata.genomeAnnotations || !metadata.genomeAnnotations.nuc) { + /** + * Entropy panel display is only possible if all three of the following are true: + * (i) the genomeMap exists + * (ii) mutations exist on the tree + * (iii) the JSON specifies "entropy" as a panel _or_ doesn't specify panels at all (see above) + * If (i && ii) then we can select genotype colorings. If this is not specified in the JSON + * then we add it here as I think it's the best UX (otherwise we either have to disable + * the entropy panel or disable the ability to select gt colorings from that panel) + */ + if (!metadata.colorings) metadata.colorings = {}; + if (genomeMap && state.coloringsPresentOnTree.has("gt")) { + if (!Object.keys(metadata.colorings).includes('gt')) { + metadata.colorings.gt = { + title: "Genotype", + type: "categorical" + } + } + } else { state.panelsAvailable = state.panelsAvailable.filter((item) => item !== "entropy"); state.panelsToDisplay = state.panelsToDisplay.filter((item) => item !== "entropy"); + if (Object.keys(metadata.colorings).includes('gt')) { + console.error("Genotype coloring ('gt') was specified as an option in the JSON, however the data does not support this: " + + "check that 'metadata.genome_annotations' is correct and that mutations have been assigned to 'branch_attrs' on the tree.") + delete metadata.colorings.gt; + } } /* After calculating the available panels, we check this against the display default (if that was set) @@ -309,17 +331,6 @@ const modifyStateViaMetadata = (state, metadata) => { state.panelLayout = "full"; state.canTogglePanelLayout = false; } - /* genome annotations in metadata */ - if (metadata.genomeAnnotations) { - for (const gene of Object.keys(metadata.genomeAnnotations)) { - state.geneLength[gene] = metadata.genomeAnnotations[gene].end - metadata.genomeAnnotations[gene].start; - if (gene !== nucleotide_gene) { - state.geneLength[gene] /= 3; - } - } - } else { - console.warn("JSONs did not include `genome_annotations`"); - } return state; }; @@ -351,10 +362,18 @@ const modifyControlsStateViaTree = (state, tree, treeToo, colorings) => { state.absoluteDateMinNumeric = calendarToNumeric(state.absoluteDateMin); state.absoluteDateMaxNumeric = calendarToNumeric(state.absoluteDateMax); - /* For the colorings (defined in the JSON) we need to check whether they - (a) actually exist on the tree and (b) have confidence values. - TODO - this whole file should be reorganised to make things clearer. - perhaps there's a better place to put this... */ + /** + * The following section makes an (expensive) traversal through the tree, checking if + * the provided colorings (via the JSON) are actually present on the tree. A + * previous comment implied that any colorings not present in the tree would be removed, + * but they are not! In fact, `coloringsPresentOnTreeWithConfidence` is never accessed, + * and `coloringsPresentOnTree` was only accessed when metadata CSV/TSV is dropped on. + * I'm now checking if "gt" is in `coloringsPresentOnTree` to decide whether to allow + * a genotype colorBy and the entropy panel, but this approach could be improved as well. + * We should either remove this completely, make it work as intended, or execute this + * only when a file is dropped. (I've gone down too many rabbit holes in this PR to + * do this now, unfortunately.) james, 2023 + */ state.coloringsPresentOnTree = new Set(); state.coloringsPresentOnTreeWithConfidence = new Set(); // subset of above @@ -392,15 +411,6 @@ const modifyControlsStateViaTree = (state, tree, treeToo, colorings) => { examineNodes(tree.nodes); if (treeToo) examineNodes(treeToo.nodes); - - /* ensure specified mutType is indeed available */ - if (!aaMuts && !nucMuts) { - state.mutType = null; - } else if (state.mutType === "aa" && !aaMuts) { - state.mutType = "nuc"; - } else if (state.mutType === "nuc" && !nucMuts) { - state.mutType = "aa"; - } if (aaMuts || nucMuts) { state.coloringsPresentOnTree.add("gt"); } @@ -431,7 +441,7 @@ const modifyControlsStateViaTree = (state, tree, treeToo, colorings) => { return state; }; -const checkAndCorrectErrorsInState = (state, metadata, query, tree, viewingNarrative) => { +const checkAndCorrectErrorsInState = (state, metadata, genomeMap, query, tree, viewingNarrative) => { /* want to check that the (currently set) colorBy (state.colorBy) is valid, * and fall-back to an available colorBy if not */ @@ -462,22 +472,49 @@ const checkAndCorrectErrorsInState = (state, metadata, query, tree, viewingNarra delete query.c; }; if (isColorByGenotype(state.colorBy)) { - /* Check that the genotype is valid with the current data */ - if (!decodeColorByGenotype(state.colorBy, state.geneLength)) { + /* Check that the genotype is valid (and so are all positions) with the current data */ + if (!genomeMap) { fallBackToDefaultColorBy(); + } else { + const decoded = decodeColorByGenotype(state.colorBy, genomeMap) + if (!decoded) { // note that a console.error printed by decodeColorByGenotype in this case + fallBackToDefaultColorBy(); + } else { + const encoded = encodeColorByGenotype(decoded); + if (state.colorBy!==encoded) { + /* color-by is partially valid - i.e. some positions are invalid (note that position ordering is unchanged) */ + console.error(`Genotype color-by ${state.colorBy} contains at lease one invalid position. ` + + `Color-by has been changed to ${encoded}.`) + query.c = encoded; + state.colorBy = encoded + } + } } } else if (Object.keys(metadata.colorings).indexOf(state.colorBy) === -1) { /* if it's a _non_ genotype colorBy AND it's not a valid option, fall back to the default */ fallBackToDefaultColorBy(); } - /* zoom */ - if (state.zoomMax > state["absoluteZoomMax"]) { state.zoomMax = state["absoluteZoomMax"]; } - if (state.zoomMin < state["absoluteZoomMin"]) { state.zoomMin = state["absoluteZoomMin"]; } - if (state.zoomMin > state.zoomMax) { - const tempMin = state.zoomMin; - state.zoomMin = state.zoomMax; - state.zoomMax = tempMin; + /* Validate requested entropy zoom bounds (via URL) */ + const genomeBounds = genomeMap?.[0]?.range; + if (!genomeBounds) { + /* Annotations block missing / invalid, so it makes no sense to have entropy zoom bounds */ + [state.zoomMin, state.zoomMax] = [undefined, undefined]; + delete query.gmin; + delete query.gmax; + } else { + if (state.zoomMin <= genomeBounds[0] || state.zoomMin >= genomeBounds[1]) { + state.zoomMin = undefined; + delete query.gmin; + } + if (state.zoomMax <= genomeBounds[0] || state.zoomMax >= genomeBounds[1]) { + state.zoomMax = undefined; + delete query.gmax; + } + if (state.zoomMin && state.zoomMax && state.zoomMin>state.zoomMax) { + [state.zoomMin, state.zoomMax] = [state.zoomMax, state.zoomMin]; + [query.gmin, query.gmax] = [state.zoomMin, state.zoomMax]; + } } /* colorBy confidence */ @@ -530,14 +567,6 @@ const checkAndCorrectErrorsInState = (state, metadata, query, tree, viewingNarra delete query.ci; // rm ci from the query if it doesn't apply } - /* if colorBy is a genotype then we need to set mutType */ - if (state.colorBy) { - const maybeMutType = determineColorByGenotypeMutType(state.colorBy); - if (maybeMutType) { - state.mutType = maybeMutType; - } - } - /* ensure selected filters (via the URL query) are valid. If not, modify state + URL. */ const filterNames = Object.keys(state.filters).filter((filterName) => state.filters[filterName].length); const stateCounts = countTraitsAcrossTree(tree.nodes, filterNames, false, true); @@ -695,9 +724,6 @@ const createMetadataStateFromJSON = (json) => { if (json.meta.build_url) { metadata.buildUrl = json.meta.build_url; } - if (json.meta.genome_annotations) { - metadata.genomeAnnotations = json.meta.genome_annotations; - } if (json.meta.data_provenance) { metadata.dataProvenance = json.meta.data_provenance; } @@ -765,7 +791,7 @@ export const createStateFromQueryOrJSONs = ({ /* create metadata state */ metadata = createMetadataStateFromJSON(json); /* entropy state */ - entropy = entropyCreateState(metadata.genomeAnnotations); + entropy = entropyCreateState(json.meta.genome_annotations); /* ensure default frequencies state */ frequencies = getDefaultFrequenciesState(); /* ensure default measurements state */ @@ -788,9 +814,7 @@ export const createStateFromQueryOrJSONs = ({ /* new controls state - don't apply query yet (or error check!) */ controls = getDefaultControlsState(); controls = modifyControlsStateViaTree(controls, tree, treeToo, metadata.colorings); - controls = modifyStateViaMetadata(controls, metadata); - controls["absoluteZoomMin"] = 0; - controls["absoluteZoomMax"] = entropy.lengthSequence; + controls = modifyStateViaMetadata(controls, metadata, entropy.genomeMap); } else if (oldState) { /* creating deep copies avoids references to (nested) objects remaining the same which can affect props comparisons. Due to the size of some of the state, we only do this selectively */ @@ -802,7 +826,7 @@ export const createStateFromQueryOrJSONs = ({ frequencies = {...oldState.frequencies}; measurements = {...oldState.measurements}; controls = restoreQueryableStateToDefaults(controls); - controls = modifyStateViaMetadata(controls, metadata); + controls = modifyStateViaMetadata(controls, metadata, entropy.genomeMap); } /* For the creation of state, we want to parse out URL query parameters @@ -826,7 +850,7 @@ export const createStateFromQueryOrJSONs = ({ } const viewingNarrative = (narrativeBlocks || (oldState && oldState.narrative.display)); - controls = checkAndCorrectErrorsInState(controls, metadata, query, tree, viewingNarrative); /* must run last */ + controls = checkAndCorrectErrorsInState(controls, metadata, entropy.genomeMap, query, tree, viewingNarrative); /* must run last */ /* calculate colours if loading from JSONs or if the query demands change */ @@ -882,12 +906,20 @@ export const createStateFromQueryOrJSONs = ({ /* calculate entropy in view */ if (entropy.loaded) { - const [entropyBars, entropyMaxYVal] = calcEntropyInView(tree.nodes, tree.visibility, controls.mutType, entropy.geneMap, entropy.showCounts); + /* The selected CDS + positions are only known if a genotype color-by has been set (display defaults | url) */ + entropy.selectedCds = nucleotide_gene; + entropy.selectedPositions = []; + if (isColorByGenotype(controls.colorBy)) { + const gt = decodeColorByGenotype(controls.colorBy); + const cds = getCdsFromGenotype(gt?.gene, entropy.genomeMap); + if (cds) { + entropy.selectedCds = cds; + entropy.selectedPositions = gt?.positions || [] + } + } + const [entropyBars, entropyMaxYVal] = calcEntropyInView(tree.nodes, tree.visibility, entropy.selectedCds, entropy.showCounts); entropy.bars = entropyBars; entropy.maxYVal = entropyMaxYVal; - entropy.zoomMax = controls["zoomMax"]; - entropy.zoomMin = controls["zoomMin"]; - entropy.zoomCoordinates = [controls["zoomMin"], controls["zoomMax"]]; } /* update frequencies if they exist (not done for new JSONs) */ diff --git a/src/actions/types.js b/src/actions/types.js index a617c47f7..034f4e9ba 100644 --- a/src/actions/types.js +++ b/src/actions/types.js @@ -17,7 +17,7 @@ export const CHANGE_LANGUAGE = "CHANGE_LANGUAGE"; export const CLEAN_START = "CLEAN_START"; export const APPLY_FILTER = "APPLY_FILTER"; export const UPDATE_VISIBILITY_AND_BRANCH_THICKNESS = "UPDATE_VISIBILITY_AND_BRANCH_THICKNESS"; -export const TOGGLE_MUT_TYPE = "TOGGLE_MUT_TYPE"; +export const CHANGE_ENTROPY_CDS_SELECTION = "CHANGE_ENTROPY_CDS_SELECTION"; export const CHANGE_ANALYSIS_VALUE = "CHANGE_ANALYSIS_VALUE"; export const CHANGE_ANIMATION_START = "CHANGE_ANIMATION_START"; export const CHANGE_ANIMATION_TIME = "CHANGE_ANIMATION_TIME"; diff --git a/src/components/controls/color-by.js b/src/components/controls/color-by.js index 9a5599a9b..15db72b1f 100644 --- a/src/components/controls/color-by.js +++ b/src/components/controls/color-by.js @@ -6,7 +6,7 @@ import { sidebarField } from "../../globalStyles"; import { controlsWidth, nucleotide_gene } from "../../util/globals"; import { changeColorBy } from "../../actions/colors"; import { analyticsControlsEvent } from "../../util/googleAnalytics"; -import { isColorByGenotype, decodeColorByGenotype, encodeColorByGenotype, decodePositions } from "../../util/getGenotype"; +import { isColorByGenotype, decodeColorByGenotype, encodeColorByGenotype, decodePositions, getCdsLength } from "../../util/getGenotype"; import CustomSelect from "./customSelect"; /* the reason why we have colorBy as state (here) and in redux @@ -16,9 +16,8 @@ import CustomSelect from "./customSelect"; @connect((state) => { return { colorBy: state.controls.colorBy, - geneLength: state.controls.geneLength, colorings: state.metadata.colorings, - geneMap: state.entropy.geneMap + genomeMap: state.entropy.genomeMap, }; }) class ColorBy extends React.Component { @@ -50,9 +49,9 @@ class ColorBy extends React.Component { } static propTypes = { colorBy: PropTypes.string.isRequired, - geneLength: PropTypes.object.isRequired, colorings: PropTypes.object.isRequired, - dispatch: PropTypes.func.isRequired + dispatch: PropTypes.func.isRequired, + genomeMap: PropTypes.array, } // Applies the given state to the immutable blank state and replaces the @@ -101,10 +100,10 @@ class ColorBy extends React.Component { if (geneSelected && positionSelected) { const genotype = encodeColorByGenotype({ gene: geneSelected, - positions: decodePositions(positionSelected, this.props.geneLength[geneSelected]) + positions: decodePositions(positionSelected, getCdsLength(this.props.genomeMap, geneSelected)) }); - if (genotype) { + if (genotype && genotype!==this.props.colorBy) { this.dispatchColorByGenotype(genotype); } } @@ -136,13 +135,22 @@ class ColorBy extends React.Component { }, 400); getGtGeneOptions() { - const options = []; - if (this.props.geneMap) { - // Make the nucleotide the first option since it can be annoying to find the nucleotide - // option when there are ~200 genes like in monkeypox. - options.push({value: nucleotide_gene, label: "nucleotide"}); - Object.keys(this.props.geneMap).forEach((prot) => options.push({value: prot, label: prot})); - } + if (!this.props.genomeMap?.length) return []; + const options = [ + // Nuc is first option, especially helpful when there are many many genes/CDSs + {value: nucleotide_gene, label: "nucleotide"} + ] + this.props.genomeMap[0].genes.forEach((gene) => { + /** + * A lot of the code in this file refers to "gene(s)", however the actual dropdown + * options represent CDSs, as these are what we have translations / mutations for. + * The `genomeMap` differentiates between the two, and we may one day have a more + * complex dropdown UI which exposes the associated gene for a CDS. + */ + gene.cds.forEach((cds) => { + options.push({value: cds.name, label: cds.name}) + }) + }) return options; } @@ -168,10 +176,8 @@ class ColorBy extends React.Component { gtPositionInput() { const { geneSelected } = this.state; - const geneLength = Math.floor(this.props.geneLength[geneSelected]); - const placeholder = geneSelected - ? `${geneSelected} position (1–${geneLength})…` + ? `${geneSelected} position (1–${getCdsLength(this.props.genomeMap, geneSelected)})…` : `position…`; return ( diff --git a/src/components/download/downloadButtons.js b/src/components/download/downloadButtons.js index c148093cd..8e7adca03 100644 --- a/src/components/download/downloadButtons.js +++ b/src/components/download/downloadButtons.js @@ -1,7 +1,7 @@ import React from "react"; import { withTheme } from 'styled-components'; import * as icons from "../framework/svg-icons"; -import { NODE_VISIBLE } from "../../util/globals"; +import { NODE_VISIBLE, nucleotide_gene } from "../../util/globals"; import { materialButton } from "../../globalStyles"; import * as helpers from "./helperFunctions"; import { getFullAuthorInfoFromNode } from "../../util/treeMiscHelpers"; @@ -16,7 +16,7 @@ const iconWidth = 25; * A React Component displaying buttons which trigger data-downloads. Intended for display within the * larger Download modal component */ -export const DownloadButtons = ({dispatch, t, tree, entropy, metadata, colorBy, mutType, distanceMeasure, panelsToDisplay, panelLayout, filters, visibility, visibleStateCounts, relevantPublications}) => { +export const DownloadButtons = ({dispatch, t, tree, entropy, metadata, colorBy, distanceMeasure, panelsToDisplay, panelLayout, filters, visibility, visibleStateCounts, relevantPublications}) => { const totalTipCount = metadata.mainTreeNumTips; const selectedTipsCount = getNumSelectedTips(tree.nodes, tree.visibility); const partialData = selectedTipsCount !== totalTipCount; @@ -34,6 +34,7 @@ export const DownloadButtons = ({dispatch, t, tree, entropy, metadata, colorBy, } } } + const entropyBar = entropy?.selectedCds===nucleotide_gene ? "nucleotide" : "codon"; return ( <> @@ -85,9 +86,9 @@ export const DownloadButtons = ({dispatch, t, tree, entropy, metadata, colorBy, {entropy.loaded && ( - ); @@ -162,13 +178,12 @@ class Entropy extends React.Component { setUp(props) { const chart = new EntropyChart( this.d3entropy, - props.annotations, - props.geneMap, - props.geneLength[nucleotide_gene], + props.genomeMap, { /* callbacks */ onHover: this.onHover.bind(this), onLeave: this.onLeave.bind(this), - onClick: this.onClick.bind(this) + onClick: this.onClick.bind(this), + onCdsClick: this.onCdsClick.bind(this) } ); chart.render(props); @@ -204,50 +219,17 @@ class Entropy extends React.Component { updateParams.zoomMax = nextProps.zoomMax; updateParams.zoomMin = nextProps.zoomMin; } - if (this.props.bars !== nextProps.bars) { /* will always be true if mutType has changed */ - updateParams.aa = nextProps.mutType === "aa"; - updateParams.newBars = nextProps.bars; - updateParams.maxYVal = nextProps.maxYVal; + if (this.props.bars !== nextProps.bars) { + updateParams.newBars = [nextProps.bars, nextProps.maxYVal]; } - if (this.props.colorBy !== nextProps.colorBy && (isColorByGenotype(this.props.colorBy) || isColorByGenotype(nextProps.colorBy))) { - if (isColorByGenotype(nextProps.colorBy)) { - const colorByGenotype = decodeColorByGenotype(nextProps.colorBy, nextProps.geneLength); - if (colorByGenotype.aa) { /* if it is a gene, zoom to it */ - updateParams.gene = colorByGenotype.gene; - updateParams.start = nextProps.geneMap[updateParams.gene].start; - updateParams.end = nextProps.geneMap[updateParams.gene].end; - } else { /* if a nuc, want to do different things if 1 or multiple */ - const positions = colorByGenotype.positions; - const zoomCoord = this.state.chart.zoomCoordinates; - const maxNt = this.state.chart.maxNt; - /* find out what new coords would be - if different enough, change zoom */ - let startUpdate, endUpdate; - if (positions.length > 1) { - const start = Math.min.apply(null, positions); - const end = Math.max.apply(null, positions); - startUpdate = start - (end-start)*0.05; - endUpdate = end + (end-start)*0.05; - } else { - const pos = positions[0]; - const eitherSide = maxNt*0.05; - const newStartEnd = (pos-eitherSide) <= 0 ? [0, pos+eitherSide] : - (pos+eitherSide) >= maxNt ? [pos-eitherSide, maxNt] : [pos-eitherSide, pos+eitherSide]; - startUpdate = newStartEnd[0]; - endUpdate = newStartEnd[1]; - } - /* if the zoom would be different enough, change it */ - if (!(startUpdate > zoomCoord[0]-maxNt*0.4 && startUpdate < zoomCoord[0]+maxNt*0.4) || - !(endUpdate > zoomCoord[1]-maxNt*0.4 && endUpdate < zoomCoord[1]+maxNt*0.4) || - !(positions.every((x) => x > zoomCoord[0]) && positions.every((x) => x < zoomCoord[1]))) { - updateParams.gene = colorByGenotype.gene; - updateParams.start = startUpdate; - updateParams.end = endUpdate; - } - } - updateParams.selected = decodeColorByGenotype(nextProps.colorBy, nextProps.geneLength); - } else { - updateParams.clearSelected = true; - } + if (this.props.selectedCds !== nextProps.selectedCds) { + updateParams.selectedCds = nextProps.selectedCds; + } + if (this.props.showCounts !== nextProps.showCounts) { + updateParams.showCounts = nextProps.showCounts; + } + if (!equalArrays(this.props.selectedPositions, nextProps.selectedPositions)) { + updateParams.selectedPositions = nextProps.selectedPositions } if (Object.keys(updateParams).length) { this.state.chart.update(updateParams); @@ -264,20 +246,21 @@ class Entropy extends React.Component { } } + title() { + if (this.props.width<500) return "Diversity"; + if (this.props.selectedCds===nucleotide_gene) { + return "Nucleotide diversity of genome" + } + return `Amino acid diversity of CDS ${this.props.selectedCds.name}` + } + render() { - const { t } = this.props; const styles = getStyles(this.props.width); return ( - - + + + {this.state.hovered ? this.state.hovered.tooltip(this.props.t) : null} + { this.d3entropy = c; }} id="d3entropy"/> - {this.aaNtSwitch(styles)} + {this.resetLayout(styles)} {this.entropyCountSwitch(styles)} + + + + +
+ This panel displays the observed diversity across the current genome or a selected CDS + {` (currently you are viewing ${this.props.selectedCds===nucleotide_gene?'the genome':`CDS ${this.props.selectedCds.name}`}). `} +

+ The lower axis shows the genome with +ve strand CDSs above and -ve strand CDSs below and + the grey overlay allows zooming in to a region. + The upper axis shows either the zoomed in region of the genome or a selected CDS; + in the latter case the coordinates represent amino acids. +

+ Clicking on a CDS will select it and show it on the upper axis. + {` Clicking "Reset Layout" will always return you to viewing the entire genome.`} +

+
); } diff --git a/src/components/entropy/infoPanel.js b/src/components/entropy/infoPanel.js index 9496b798a..28c736c3f 100644 --- a/src/components/entropy/infoPanel.js +++ b/src/components/entropy/infoPanel.js @@ -1,16 +1,16 @@ import React from "react"; import { infoPanelStyles } from "../../globalStyles"; -const InfoPanel = ({hovered, width, height, mutType, showCounts, geneMap, t}) => { +const InfoPanel = ({d3event, width, height, children}) => { /* this is a function - we can bail early */ - if (!hovered) { + if (!d3event) { return null; } const styles = { container: { position: "absolute", - width: 200, + maxWidth: 500, padding: "10px", borderRadius: 10, zIndex: 1000, @@ -26,12 +26,9 @@ const InfoPanel = ({hovered, width, height, mutType, showCounts, geneMap, t}) => } }; - /* don't use d3entropy or main chart SVG as these change on zoom. - The Y axis is safe as it is invariant */ - const bounds = document.getElementById("entropyYAxis").getBoundingClientRect(); const pos = { - x: hovered.x - bounds.left + 15, - y: hovered.y - bounds.top + x: d3event.layerX, + y: d3event.layerY }; if (pos.x < width * 0.7) { @@ -45,45 +42,11 @@ const InfoPanel = ({hovered, width, height, mutType, showCounts, geneMap, t}) => styles.container.bottom = height - pos.y; } - const isNegStrand = hovered.d.prot ? geneMap[hovered.d.prot].strand === "-" : null; - - const nucPos = hovered.d.prot ? - mutType === "aa" ? - isNegStrand ? geneMap[hovered.d.prot].end - hovered.d.codon * 3 + 3 : - geneMap[hovered.d.prot].start + hovered.d.codon * 3 - : isNegStrand ? geneMap[hovered.d.prot].end - hovered.d.x : - hovered.d.x - geneMap[hovered.d.prot].start-1 - : null; - - const codonFromNuc = hovered.d.prot && mutType !== "aa" ? Math.floor((nucPos)/3) + 1 : null; return (
-
-
- { - mutType === "aa" ? t("Codon {{codon}} in protein {{protein}}", {codon: hovered.d.codon, protein: hovered.d.prot}) : - hovered.d.prot ? `${t("Nucleotide {{nuc}}", {nuc: hovered.d.x})} (${t("Codon {{codon}} in protein {{protein}}", {codon: codonFromNuc, protein: hovered.d.prot})})` : - t("Nucleotide {{nuc}}", {nuc: hovered.d.x}) - } -
-

-

- {mutType === "aa" ? t("Nuc positions {{a}} to {{b}}", {a: nucPos-2, b: nucPos}) : ``} -
-

-

- {isNegStrand === null ? `` : isNegStrand ? t("Negative strand") : t("Positive strand")} -
-

-

- {showCounts ? `${t("Num mutations")}: ${hovered.d.y}` : `${t("entropy")}: ${hovered.d.y}`} -
-
- {t("Click to color tree & map")} -
-
+ {children}
- ); + ) }; export default InfoPanel; diff --git a/src/components/main/index.js b/src/components/main/index.js index 2b7a8ca77..2eeca0b14 100644 --- a/src/components/main/index.js +++ b/src/components/main/index.js @@ -134,7 +134,7 @@ class Main extends React.Component { const {availableWidth, availableHeight, sidebarWidth, overlayStyles} = calcStyles(this.props.browserDimensions, this.props.displayNarrative, this.props.sidebarOpen, this.state.mobileDisplay); const overlayHandler = () => {this.props.dispatch({type: TOGGLE_SIDEBAR, value: false});}; - const {full, grid, chart} = + const {full, grid, chartEntropy, chartFrequencies} = calcPanelDims(this.props.panelsToDisplay, this.props.displayNarrative, availableWidth, availableHeight); /* We use tree name(s) as a react key so that components remount when datasets change */ const keyName = `${this.props.treeName}${this.props.secondTreeName ? `:${this.props.secondTreeName}` : ''}`; @@ -205,13 +205,13 @@ class Main extends React.Component { } {this.props.panelsToDisplay.includes("entropy") ? ( - + ) : null } {this.props.panelsToDisplay.includes("frequencies") && this.props.frequenciesLoaded ? ( - + ) : null } diff --git a/src/components/main/utils.js b/src/components/main/utils.js index 67763012c..f0af76203 100644 --- a/src/components/main/utils.js +++ b/src/components/main/utils.js @@ -31,9 +31,11 @@ export const calcPanelDims = (panels, narrativeIsDisplayed, availableWidth, avai const full = computeResponsive({horizontal: fullWidthFraction, vertical: fullHeightFraction, availableWidth, availableHeight}); const grid = computeResponsive({horizontal: gridWidthFraction, vertical: gridHeightFraction, availableWidth, availableHeight}); - const chart = computeResponsive({horizontal: chartWidthFraction, vertical: chartHeightFraction, availableWidth, availableHeight, minHeight: 150}); + /* Frequencies are usable with a (SVG) height of ~200px, but the Entropy panel needs more */ + const chartFrequencies = computeResponsive({horizontal: chartWidthFraction, vertical: chartHeightFraction, availableWidth, availableHeight, minHeight: 200}); + const chartEntropy = computeResponsive({horizontal: chartWidthFraction, vertical: chartHeightFraction, availableWidth, availableHeight, minHeight: 300}); - return {full, grid, chart}; + return {full, grid, chartFrequencies, chartEntropy}; }; diff --git a/src/components/tree/index.js b/src/components/tree/index.js index 0ba76de27..dd34a5a58 100644 --- a/src/components/tree/index.js +++ b/src/components/tree/index.js @@ -13,10 +13,10 @@ const Tree = connect((state) => ({ scatterVariables: state.controls.scatterVariables, temporalConfidence: state.controls.temporalConfidence, distanceMeasure: state.controls.distanceMeasure, - mutType: state.controls.mutType, explodeAttr: state.controls.explodeAttr, colorScale: state.controls.colorScale, - metadata: state.metadata, + colorings: state.metadata.colorings, + genomeMap: state.entropy.genomeMap, showTreeToo: state.controls.showTreeToo, showTangle: state.controls.showTangle, panelsToDisplay: state.controls.panelsToDisplay, diff --git a/src/components/tree/tree.js b/src/components/tree/tree.js index e89b96c55..5018b18f1 100644 --- a/src/components/tree/tree.js +++ b/src/components/tree/tree.js @@ -66,7 +66,7 @@ class Tree extends React.Component { if (this.props.showTreeToo) { this.setUpAndRenderTreeToo(this.props, newState); /* modifies newState in place */ } - newState.geneSortFn = sortByGeneOrder(this.props.metadata.genomeAnnotations); + newState.geneSortFn = sortByGeneOrder(this.props.genomeMap); this.setState(newState); /* this will trigger an unnecessary CDU :( */ } } @@ -200,7 +200,7 @@ class Tree extends React.Component { colorBy={this.props.colorBy} colorByConfidence={this.props.colorByConfidence} colorScale={this.props.colorScale} - colorings={this.props.metadata.colorings} + colorings={this.props.colorings} geneSortFn={this.state.geneSortFn} observedMutations={this.props.tree.observedMutations} panelDims={{width: this.props.width, height: this.props.height, spaceBetweenTrees}} @@ -210,7 +210,7 @@ class Tree extends React.Component { clearSelectedNode={this.clearSelectedNode} selectedNode={this.state.selectedNode} observedMutations={this.props.tree.observedMutations} - colorings={this.props.metadata.colorings} + colorings={this.props.colorings} geneSortFn={this.state.geneSortFn} t={t} /> diff --git a/src/css/entropy.css b/src/css/entropy.css index 77b8feeb0..4685b917d 100644 --- a/src/css/entropy.css +++ b/src/css/entropy.css @@ -5,15 +5,13 @@ --medGrey: #888; } +/* Normally elements with .overlay are children of the .brush, however +the overlay added on key-presses is is not a child of .brush */ #d3entropy .overlay { fill: none; pointer-events: all; } -#d3entropy .brush { - stroke: none; -} - #d3entropy .niceText { font-family: "Lato", "Helvetica Neue", "Helvetica", "sans-serif"; font-size: 14px; @@ -26,3 +24,19 @@ cursor: -moz-grab; cursor: -webkit-grab; } + +#d3entropy .cdsText { + pointer-events: none; +} +#d3entropy .cds { + cursor: pointer; +} + +table.entropy-table { + font-weight: 300; +} + +table.entropy-table td { + padding-bottom: 3px; + padding-right: 5px; +} \ No newline at end of file diff --git a/src/middleware/changeURL.js b/src/middleware/changeURL.js index 105af27b9..516d5d793 100644 --- a/src/middleware/changeURL.js +++ b/src/middleware/changeURL.js @@ -2,8 +2,8 @@ import queryString from "query-string"; import * as types from "../actions/types"; import { numericToCalendar } from "../util/dateHelpers"; import { shouldDisplayTemporalConfidence } from "../reducers/controls"; -import { genotypeSymbol, strainSymbol } from "../util/globals"; -import { encodeGenotypeFilters } from "../util/getGenotype"; +import { genotypeSymbol, nucleotide_gene, strainSymbol } from "../util/globals"; +import { encodeGenotypeFilters, decodeColorByGenotype, isColorByGenotype } from "../util/getGenotype"; /** * This middleware acts to keep the app state and the URL query state in sync by @@ -52,11 +52,29 @@ export const changeURLMiddleware = (store) => (next) => (action) => { that will still make sense when the default for the given branch labelling is "show all" */ query.showBranchLabels = action.value ? 'all' : undefined; break; - case types.CHANGE_ZOOM: - /* entropy panel genome zoom coordinates */ - query.gmin = action.zoomc[0] === state.controls.absoluteZoomMin ? undefined : action.zoomc[0]; - query.gmax = action.zoomc[1] >= state.controls.absoluteZoomMax ? undefined : action.zoomc[1]; + case types.CHANGE_ZOOM: { + /* entropy panel genome zoom coordinates. On a page load these queries + will be combined with the selected CDS from the color-by genotype, or be + applied to the nuc view. As such, if the entropy panel's selected CDS is + _not_ the same as the colorBy (if applicable) then we don't set gmin/gmax + */ + const entropyCdsName = state.entropy.selectedCds===nucleotide_gene ? + nucleotide_gene : + state.entropy.selectedCds.name; + const colorByCdsName = isColorByGenotype(state.controls.colorBy) && + decodeColorByGenotype(state.controls.colorBy, state.entropy.genomeMap).gene; + const bounds = state.entropy.genomeMap[0].range; // guaranteed to exist, as the action comes from + if ( + ((!colorByCdsName || colorByCdsName===nucleotide_gene) && entropyCdsName===nucleotide_gene) || + (colorByCdsName===entropyCdsName) + ) { + query.gmin = action.zoomc[0] <= bounds[0] ? undefined : action.zoomc[0]; + query.gmax = action.zoomc[1] >= bounds[1] ? undefined : action.zoomc[1]; + } else { + [query.gmin, query.gmax] = [undefined, undefined]; + } break; + } case types.NEW_COLORS: query.c = action.colorBy === state.controls.defaults.colorBy ? undefined : action.colorBy; break; diff --git a/src/reducers/controls.js b/src/reducers/controls.js index bb0378949..ca186171a 100644 --- a/src/reducers/controls.js +++ b/src/reducers/controls.js @@ -4,7 +4,6 @@ import { defaultGeoResolution, defaultDateRange, defaultDistanceMeasure, defaultLayout, - defaultMutType, controlsHiddenWidth, strainSymbol, twoColumnBreakpoint } from "../util/globals"; @@ -46,8 +45,6 @@ export const getDefaultControlsState = () => { region: null, search: null, strain: null, - geneLength: {}, - mutType: defaultMutType, temporalConfidence: { exists: false, display: false, on: false }, layout: defaults.layout, scatterVariables: {}, @@ -243,10 +240,6 @@ const Controls = (state = getDefaultControlsState(), action) => { filters }); } - case types.TOGGLE_MUT_TYPE: - return Object.assign({}, state, { - mutType: action.data - }); case types.TOGGLE_TEMPORAL_CONF: return Object.assign({}, state, { temporalConfidence: Object.assign({}, state.temporalConfidence, { @@ -322,6 +315,15 @@ const Controls = (state = getDefaultControlsState(), action) => { return {...state, measurementsDisplay: action.data}; case types.APPLY_MEASUREMENTS_FILTER: return {...state, measurementsFilters: action.data}; + /** + * Currently the CHANGE_ZOOM action (entropy panel zoom changed) does not + * update the zoomMin/zoomMax, and as such they only represent the initially + * requested zoom range. The following commented out code will keep the + * state in sync, but corresponding changes will be required to the entropy + * code. + */ + // case types.CHANGE_ZOOM: // this is the entropy panel zoom + // return {...state, zoomMin: action.zoomc[0], zoomMax: action.zoomc[1]}; default: return state; } diff --git a/src/reducers/entropy.js b/src/reducers/entropy.js index 26ed29014..a17b212b2 100644 --- a/src/reducers/entropy.js +++ b/src/reducers/entropy.js @@ -1,13 +1,16 @@ // import _filter from "lodash/filter"; import * as types from "../actions/types"; -const Entropy = (state = {loaded: false, showCounts: false}, action) => { +export const defaultEntropyState = () => { + const startingState = { + loaded: false, + showCounts: false, + } + return {...startingState}; +} + +const Entropy = (state = defaultEntropyState(), action) => { switch (action.type) { - case types.CHANGE_ZOOM: - return Object.assign({}, state, { - zoomMax: action.zoomc[1], - zoomMin: action.zoomc[0] - }); case types.DATA_INVALID: return {loaded: false, showCounts: false}; case types.URL_QUERY_CHANGE_WITH_COMPUTED_STATE: /* fallthrough */ @@ -23,6 +26,8 @@ const Entropy = (state = {loaded: false, showCounts: false}, action) => { return Object.assign({}, state, { showCounts: action.showCounts }); + case types.CHANGE_ENTROPY_CDS_SELECTION: + return {...state, ...action}; default: return state; } diff --git a/src/util/colorHelpers.js b/src/util/colorHelpers.js index a81bc78eb..389e143cf 100644 --- a/src/util/colorHelpers.js +++ b/src/util/colorHelpers.js @@ -24,17 +24,6 @@ export const getAverageColorFromNodes = (nodes, nodeColors) => { return avg.toString(); }; -export const determineColorByGenotypeMutType = (colorBy) => { - if (isColorByGenotype(colorBy)) { - const genotype = decodeColorByGenotype(colorBy); - return genotype.aa - ? "aa" - : "nuc"; - } - return false; -}; - - /** * what colorBy trait names are present in the tree but _not_ in the provided scale? * @param {Array} nodes - list of nodes diff --git a/src/util/colorScale.js b/src/util/colorScale.js index 59309fb01..604516157 100644 --- a/src/util/colorScale.js +++ b/src/util/colorScale.js @@ -36,13 +36,13 @@ export const calcColorScale = (colorBy, controls, tree, treeToo, metadata) => { let colorScale, legendValues, legendBounds, legendLabels, domain; let genotype; - if (isColorByGenotype(colorBy) && controls.geneLength) { - genotype = decodeColorByGenotype(colorBy, controls.geneLength); + if (isColorByGenotype(colorBy)) { + genotype = decodeColorByGenotype(colorBy); setGenotype(tree.nodes, genotype.gene, genotype.positions, metadata.rootSequence); /* modifies nodes recursively */ } const scaleType = genotype ? "categorical" : colorings[colorBy].type; if (genotype) { - ({legendValues, colorScale} = createScaleForGenotype(tree.nodes, controls.mutType)); + ({legendValues, colorScale} = createScaleForGenotype(tree.nodes, genotype.aa)); domain = [...legendValues]; } else if (colorings && colorings[colorBy]) { if (scaleType === "continuous" || scaleType==="temporal") { @@ -152,18 +152,18 @@ export function createNonContinuousScaleFromProvidedScaleMap(colorBy, providedSc }; } -function createScaleForGenotype(t1nodes, mutType) { - const legendValues = orderOfGenotypeAppearance(t1nodes, mutType); - const trueValues = mutType === "nuc" ? - legendValues.filter((x) => x !== "X" && x !== "-" && x !== "N" && x !== "") : - legendValues.filter((x) => x !== "X" && x !== "-" && x !== ""); +function createScaleForGenotype(t1nodes, aaGenotype) { + const legendValues = orderOfGenotypeAppearance(t1nodes, aaGenotype); + const trueValues = aaGenotype ? + legendValues.filter((x) => x !== "X" && x !== "-" && x !== "") : + legendValues.filter((x) => x !== "X" && x !== "-" && x !== "N" && x !== ""); const domain = [undefined, ...legendValues]; const range = [unknownColor, ...genotypeColors.slice(0, trueValues.length)]; // Bases are returned by orderOfGenotypeAppearance in order, unknowns at end if (legendValues.indexOf("-") !== -1) { range.push(rgb(217, 217, 217)); } - if (legendValues.indexOf("N") !== -1 && mutType === "nuc") { + if (legendValues.indexOf("N") !== -1 && !aaGenotype) { range.push(rgb(153, 153, 153)); } if (legendValues.indexOf("X") !== -1) { diff --git a/src/util/entropy.js b/src/util/entropy.js index 45a2831c4..186b1e52d 100644 --- a/src/util/entropy.js +++ b/src/util/entropy.js @@ -1,14 +1,5 @@ import { genotypeColors, NODE_VISIBLE, nucleotide_gene } from "./globals"; -const intersectGenes = function intersectGenes(geneMap, pos) { - for (const gene of Object.keys(geneMap)) { - if (pos >= geneMap[gene].start && pos <= geneMap[gene].end) { - return gene; - } - } - return false; -}; - /** * Get mutations on node. Returns false if mutations not set. * @param {object} n node @@ -20,191 +11,125 @@ const getNodeMutations = (n) => { return false; }; -const calcMutationCounts = (nodes, visibility, geneMap, isAA) => { +const calcMutationCounts = (nodes, visibility, selectedCds) => { + const isAA = selectedCds!==nucleotide_gene; + const sparse = []; + const cdsName = isAA ? selectedCds.name : nucleotide_gene; - const sparse = isAA ? {} : []; - if (isAA) { - Object.keys(geneMap).forEach((n) => {sparse[n] = {};}); - } nodes.forEach((n) => { if (visibility[n.arrayIdx] !== NODE_VISIBLE) {return;} const mutations = getNodeMutations(n); - if (!mutations) return; - if (isAA) { - for (const prot of Object.keys(mutations).filter((p) => p !== "nuc")) { - mutations[prot].forEach((m) => { - const pos = parseInt(m.slice(1, m.length - 1), 10); - const A = m.slice(0, 1); - const B = m.slice(-1); - if (A !== 'X' && B !== 'X') { - sparse[prot][pos] ? sparse[prot][pos]++ : sparse[prot][pos] = 1; - } - }); + mutations?.[cdsName]?.forEach((m) => { + const pos = parseInt(m.slice(1, m.length - 1), 10); + const A = m.slice(0, 1); + const B = m.slice(-1); + if (valid(A, B, isAA)) { + sparse[pos] ? sparse[pos]++ : sparse[pos] = 1; } - } else { - if (!mutations.nuc) return; - mutations.nuc.forEach((m) => { - const pos = parseInt(m.slice(1, m.length - 1), 10); - const A = m.slice(0, 1); - const B = m.slice(-1); - if (A !== "N" && A !== "-" && B !== "N" && B !== "-") { - sparse[pos] ? sparse[pos]++ : sparse[pos] = 1; - } - }); - } + }); }); const counts = []; - let j = 0; - let m = 0; - if (isAA) { - const prots = Object.keys(sparse); - for (let i = 0; i < prots.length; i++) { - for (const k in sparse[prots[i]]) { - const numK = parseInt(k, 10); - if (sparse[prots[i]][numK] > m) {m = sparse[prots[i]][numK];} - counts[j] = { - x: geneMap[prots[i]].start + 3 * numK - 1, // check - y: sparse[prots[i]][numK], - codon: numK, // check - fill: genotypeColors[i % 10], - prot: prots[i] - }; - j++; - } - } - } else { - for (let i = 0; i < sparse.length; i++) { - if (!sparse[i]) {continue;} - if (sparse[i] > m) {m = sparse[i];} - counts[j] = {x: i, y: sparse[i]}; /* TODO reset y scale in D3 or compute entropy */ - j++; - } - for (const nt of counts) { - nt.prot = intersectGenes(geneMap, nt.x); - } + let j = 0; /* length of the counts array */ + let m = 0; /* maximum observed count */ + for (let i = 0; i < sparse.length; i++) { + if (!sparse[i]) {continue;} + if (sparse[i] > m) {m = sparse[i];} + counts[j] = isAA ? + {codon: parseInt(i, 10), y: sparse[i]} : + {x: i, y: sparse[i]}; /* TODO reset y scale in D3 or compute entropy */ + j++; } return [counts, m]; }; -const calcEntropy = (nodes, visibility, geneMap, isAA) => { - const arrayOfProts = isAA ? Object.keys(geneMap) : [nucleotide_gene]; - const initialState = {}; +const calcEntropy = (nodes, visibility, selectedCds) => { + const isAA = selectedCds!==nucleotide_gene; + const name = isAA ? selectedCds.name : nucleotide_gene; + /* anc_state: the ancestral (root) nuc/aa for each position, known once we see the 1st mutation */ const anc_state = {}; - const counts = {}; // same struct as state, but with counts not chars - arrayOfProts.forEach((p) => { - initialState[p] = {}; - counts[p] = {}; - anc_state[p] = {}; - }); - const root = nodes[0]; - let visibleTips = 0; - - /* assignFn is called by forEach to parse and assign the mutations at each node. - It cannot use fat arrow as we need to access "this" - "this" is bound to [prot, state] */ - const assignFn = function assignFn(m) { - const prot = this[0]; - const state = this[1]; - const pos = parseInt(m.slice(1, m.length - 1), 10); - const A = m.slice(0, 1); - const B = m.slice(m.length - 1, m.length); - // console.log("mut @ ", pos, ":", A, " -> ", B) - if (isAA) { - if (A === "X" || B === "X") return; - } else if (A === "N" || A === "-" || B === "N" || B === "-") return; - if (!anc_state[prot][pos]) { - // if we don't know the ancestral state, set it via the first encountered state - anc_state[prot][pos] = A; - } - state[prot][pos] = B; - }; + const counts = {}; /* for each position keep a dict of nuc/aa -> observed counts */ + let visibleTips = 0; /* simple counter */ const recurse = (node, state) => { - // if mutation observed - do something + /* state is a dict of position -> nuc/aa observed on branches leading to the parent of this node. + We update it by scanning the mutations observed on the branch leading to this node, + and also update the ancestral states for any positions if not already known */ const mutations = getNodeMutations(node); - if (mutations) { - if (isAA) { - for (const prot of Object.keys(mutations).filter((p) => p !== "nuc")) { - if (arrayOfProts.includes(prot)) { - mutations[prot].forEach(assignFn, [prot, state]); - } - } - } else if (mutations.nuc) { - mutations.nuc.forEach(assignFn, [nucleotide_gene, state]); + mutations?.[name]?.forEach((mutation) => { + const pos = parseInt(mutation.slice(1, mutation.length - 1), 10); + const A = mutation.slice(0, 1); + const B = mutation.slice(mutation.length - 1, mutation.length); + if (!valid(A, B, isAA)) return; + if (!anc_state[pos]) { + anc_state[pos] = A; } - } - + state[pos] = B; + }); + /** + * If there are children, copy the state & recurse into that node. + * If it's a (visible) tip then use the state (which represents all mutations + * between the root and this node) to increment counts[pos][nuc/aa] + */ if (node.hasChildren) { for (const child of node.children) { - /* if there were no changes to the state (i.e. no mutations ) - at the node, then we don't need to deep clone the state Object - (i.e. can just use references). This will be much quicker, - but increase programmatic complexity. (TODO) */ - const newState = {}; - arrayOfProts.forEach((p) => { - newState[p] = Object.assign({}, state[p]); - }); - recurse(child, newState); + /* visit child nodes, passing each a _copy_ of the state so it can diverge */ + recurse(child, Object.assign({}, state)); } } else if (visibility[node.arrayIdx] === NODE_VISIBLE) { visibleTips++; - for (const prot of arrayOfProts) { - for (const pos of Object.keys(state[prot])) { - if (!counts[prot][pos]) { - counts[prot][pos] = {}; - counts[prot][pos][state[prot][pos]] = 1; - } else if (!counts[prot][pos][state[prot][pos]]) { - counts[prot][pos][state[prot][pos]] = 1; - } else { - counts[prot][pos][state[prot][pos]]++; - } + for (const [pos, nuc_aa] of Object.entries(state)) { + if (!counts[pos]) { + counts[pos] = {}; + counts[pos][nuc_aa] = 1; + } else if (!counts[pos][nuc_aa]) { + counts[pos][nuc_aa] = 1; + } else { + counts[pos][nuc_aa]++; } } } }; - recurse(root, initialState); + /* begin the recursion at the root (nodes[0]) with empty state (i.e. zero mutations observed) */ + recurse(nodes[0], {}); - let m = 0; - let i = 0; + /** + * For each position where we have observed a mutation (anywhere), there may well be tips which + * did not have observed mutations leading to them, so we assign these the ancestral nuc/aa. + * We can then calculate the Shannon entropy for this position & push it onto the entropy array. + */ const entropy = []; - for (const prot of arrayOfProts) { - for (const k of Object.keys(counts[prot])) { - let nObserved = 0; - for (const kk of Object.keys(counts[prot][k])) { - nObserved += counts[prot][k][kk]; - } - const nUnobserved = visibleTips - nObserved; - // console.log("\tn(visible tips):", visibleTips, "n(unobserved):", nUnobserved); - if (nUnobserved > 0) { - // console.log("\tancestral state:", anc_state[prot][k]); - if (counts[prot][k][anc_state[prot][k]]) { - counts[prot][k][anc_state[prot][k]] += nUnobserved; - } else { - counts[prot][k][anc_state[prot][k]] = nUnobserved; - } - } - // console.log("\tcounts (complete):", counts[prot][k], " total:", visibleTips); - let s = 0; - for (const kk of Object.keys(counts[prot][k])) { - const a = counts[prot][k][kk] / visibleTips; - s += (-1 * a * Math.log(a)); + let m = 0; /* maximum observed entropy value */ + let i = 0; /* length of the entropy array */ + for (const position of Object.keys(counts)) { + let nObserved = 0; /* number of tips with directly observed nuc_aa (at this position) */ + for (const observedCount of Object.values(counts[position])) { + nObserved += observedCount; + } + const nUnobserved = visibleTips - nObserved; + if (nUnobserved > 0) { + if (counts[position][anc_state[position]]) { + counts[position][anc_state[position]] += nUnobserved; + } else { + counts[position][anc_state[position]] = nUnobserved; } - if (s > m) {m = s;} - entropy[i] = isAA ? { - x: geneMap[prot].start + 3 * k - 1, // check - y: s.toFixed(3), - codon: parseInt(k, 10), // check - fill: genotypeColors[i % 10], - prot: prot - } : { - x: parseInt(k, 10), - y: s.toFixed(3), - prot: intersectGenes(geneMap, k) - }; - i++; } + + let s = 0; /* shannon entropy */ + for (const count of Object.values(counts[position])) { + const a = count / visibleTips; + s += (-1 * a * Math.log(a)); + } + if (s > m) {m = s;} /* update maximum observed entropy */ + entropy[i] = isAA ? { + y: s.toFixed(3), + codon: parseInt(position, 10), // check + fill: genotypeColors[i % 10], + } : { + x: parseInt(position, 10), + y: s.toFixed(3), + }; + i++; } - // console.log(entropy) return [entropy, m]; }; @@ -212,14 +137,211 @@ const calcEntropy = (nodes, visibility, geneMap, isAA) => { * traverse the tree and compile the entropy data for the visible branches * @param {Array} nodes - list of nodes * @param {Array} visibility - 1-1 correspondence with nodes. -* @param {String} mutType - amino acid | nucleotide mutations - "aa" | "nuc" -* @param {obj} geneMap used to NT fill colours. This should be improved. +* @param {String|Cds} selectedCds - selected CDS or `nucleotide_gene` (string) * @param {bool} showCounts show counts or entropy values? * @return {obj} keys: the entries in attrs. Values: an object mapping values -> counts * TODO: this algorithm can be much improved, and the data structures returned improved also */ -export const calcEntropyInView = (nodes, visibility, mutType, geneMap, showCounts) => { +export const calcEntropyInView = (nodes, visibility, selectedCds, showCounts) => { return showCounts ? - calcMutationCounts(nodes, visibility, geneMap, mutType === "aa") : - calcEntropy(nodes, visibility, geneMap, mutType === "aa"); + calcMutationCounts(nodes, visibility, selectedCds) : + calcEntropy(nodes, visibility, selectedCds); }; + +/** + * Returns an array of all CDSs in the (first chromosome of the) genome + */ +export function getCds(genomeMap) { + let cds = []; + genomeMap[0].genes.forEach((gene) => { + cds = cds.concat(gene.cds); + }) + return cds; +} + +/** returns undefined if not found */ +export function getCdsByName(genomeMap, name) { + return getCds(genomeMap).filter((cds)=>cds.name===name)[0]; +} + +/** + * Given a CDS (and all the inherent complexity possible there), and two points + * on the genome (rangeGenome), return rangeLocal coordinates for the part of + * the CDS that's "in view". The returned coordinates are expanded outwards to + * include the entire codon (in the 2/3rd of cases where they fall inside a + * codon). + * If _either_ of the rangeGenome positions are _beyond_ the CDS then we return + * the entire rangeLocal of the cds + set the flag `valid` to false. + * + * For wrapping genes, the UI forces the rangeGenome (i.e. the zoomCoordinates) + * to be on either side of the origin, and we maintain thus assumption here. + * + * Returns [rangeLocalInView:rangeLocal, valid:bool] + */ +export function getCdsRangeLocalFromRangeGenome(cds, rangeGenome) { + const positive = cds.strand==='+'; + const [zoomStart, zoomEnd] = cds.isWrapping ? + [rangeGenome[1], rangeGenome[0]] : // .toReversed() not available for Auspice?!? + rangeGenome; + const segments = cds.segments; + // segA is the segment closest to genome position 1. segB is closest to the end. + const [segA, segB] = positive ? + [segments[0], segments[segments.length-1]] : + [segments[segments.length-1], segments[0]] + + let [cdsLocalStart, cdsLocalEnd] = [1, cds.length]; + + if (zoomStart < segA.rangeGenome[0] || zoomEnd > segB.rangeGenome[1]) { + return [[cdsLocalStart, cdsLocalEnd], false]; + } + /** + * The general approach is to visit the segments in the order they appear, + * i.e. in the context of the strand it's always 5' -> 3' but for -ve strand + * CDSs this appears 3' -> 5' in the context of the +ve strand. Once we find + * an intersection we can work out the appropriate local coordinate. + * Remember that zoomStart/End are reversed if the CDS is wrapping! + */ + let prevSeg; + for (const seg of segments) { + /* If the zoom start (5') is inside the segment, then we know one of the local bounds */ + if (seg.rangeGenome[0]<=zoomStart && seg.rangeGenome[1]>=zoomStart) { + if (positive) { + const delta = zoomStart - seg.rangeGenome[0]; + cdsLocalStart = seg.rangeLocal[0] + delta; + } else { + const delta = zoomStart - seg.rangeGenome[0]; + cdsLocalEnd = seg.rangeLocal[1] - delta; + } + } + /* If the zoom end (3') is inside the segment, then we know one of the local bounds */ + if (seg.rangeGenome[0]<=zoomEnd && seg.rangeGenome[1]>=zoomEnd) { + if (positive) { + const delta = zoomEnd - seg.rangeGenome[0]; + cdsLocalEnd = seg.rangeLocal[0] + delta; + // if (segments.length>1) { + // console.log(`Zoom end (${zoomEnd}) in segment`, seg.rangeGenome, seg.rangeLocal, cdsLocalEnd) + // } + } else { + const delta = seg.rangeGenome[1] - zoomEnd; + cdsLocalStart = seg.rangeLocal[0] + delta; + } + } + /* Check to see if the zoom fell in the space between segments */ + if (prevSeg) { + if (positive) { + if (prevSeg.rangeGenome[1] < zoomStart && seg.rangeGenome[0] > zoomStart) { + cdsLocalStart = seg.rangeLocal[0]; + } + if (prevSeg.rangeGenome[1] < zoomEnd && seg.rangeGenome[0] > zoomEnd) { + cdsLocalEnd = prevSeg.rangeLocal[1]; + } + } else { + if (prevSeg.rangeGenome[0] > zoomStart && seg.rangeGenome[1] < zoomStart) { + cdsLocalEnd = prevSeg.rangeLocal[1]; + } + if (prevSeg.rangeGenome[0] > zoomEnd && seg.rangeGenome[1] < zoomEnd) { + cdsLocalStart = seg.rangeLocal[0]; + } + } + } + prevSeg = seg; + } + /* Expand the local CDS coordinates so that they are not within a codon. Note + that this does result is some weirdness; for example a segment finishes at + (genome) position 10 but pos 10 is inside a codon (in that segment), and we've + zoomed to pos 10, then we'll expand the local coordinates into the next + segment to complete the codon even though the next segment may be well beyond + the zoom bounds */ + cdsLocalStart -= (cdsLocalStart-1)%3; + const endOverhang = cdsLocalEnd%3; + if (endOverhang) { + cdsLocalEnd += endOverhang===1 ? 2 : 1; + } + + return [[cdsLocalStart, cdsLocalEnd], true]; +} + +export function getNucCoordinatesFromAaPos(cds, aaPos) { + let frame; + const nucCoordinates = []; + const ntCodonStart = (aaPos-1)*3 + 1; /* codon triplet starts here (inclusive, 1-based) */ + const positive = cds.strand==='+'; + for (let i=0; i= segment.rangeGenome[0] && nucCoordinates.length<3) { + nucCoordinates.push(ntPosGenome--); + } + } + /** If we were lucky and the entire codon fell into a single segment, then + * we're done. If not, we proceed to the next segment and grab the one or + * two nucleotides to make up the codon */ + if (nucCoordinates.length!==3) { + /* Codon bridges 2 segments */ + segment = cds.segments[i+1]; + /* rewrite "frame x" to "frames x → y" */ + frame = frame.replace('frame', 'frames') + ` → ${segment.frame}`; + /* sanity check the phase */ + if (segment.phase!==(3-nucCoordinates.length)) { + console.error(`Internal Error -- phase mismatch for CDS ${cds.name} when mapping codon ${aaPos}`); + nucCoordinates.push("Internal Error!") + break; + } + /* grab the necessary nucleotides -- at most there'll be two needed */ + if (positive) { + nucCoordinates.push(segment.rangeGenome[0]); + if (nucCoordinates.length<3) { + nucCoordinates.push(segment.rangeGenome[0]+1); + } + } else { + nucCoordinates.push(segment.rangeGenome[1]); + if (nucCoordinates.length<3) { + nucCoordinates.push(segment.rangeGenome[1]-1); + } + } + } + break; + } + return {frame, nucCoordinates}; +} + +/** A, B are the from/to nuc/aa of an observed mutation */ +function valid(A, B, isAA) { + if (isAA) { + return A !== 'X' && B !== 'X'; + } + return A !== "N" && A !== "-" && B !== "N" && B !== "-"; +} + +/** + * Given a nucleotide in Genome space (e.g. from `rangeGenome`) find all CDSs + * which have a segment covering that nucleotide and return the local coordinate + * of that position (in both nuc + aa coordinates) + * Returns {cds: CDS; nucLocal: number; aaLocal: number}[] + */ +export function nucleotideToAaPosition(genomeMap, nucPos) { + const matches = []; + getCds(genomeMap).forEach((cds) => { + for (const segment of cds.segments) { + if (segment.rangeGenome[0] <= nucPos && segment.rangeGenome[1] >= nucPos) { + const delta = cds.strand==='+' ? + nucPos - segment.rangeGenome[0] : + segment.rangeGenome[1] - nucPos; + const nucLocal = segment.rangeLocal[0]+delta; + const aaLocal = Math.ceil(nucLocal/3); + matches.push({cds, nucLocal, aaLocal}) + /* Don't return here - we want to check future segments as segments can + be overlapping */ + } + } + }) + return matches; +} \ No newline at end of file diff --git a/src/util/entropyCreateStateFromJsons.js b/src/util/entropyCreateStateFromJsons.js deleted file mode 100644 index f46da57c3..000000000 --- a/src/util/entropyCreateStateFromJsons.js +++ /dev/null @@ -1,68 +0,0 @@ -import { genotypeColors, nucleotide_gene } from "./globals"; - -/* a Note on co-ordinates. -Auspice v1 (and the JSONs it consumed) used 1-based mutations and -0-based, BED-like feature annotations. -Auspice v2 JSONs (which the client will always receive) uses GFF-like -1-based, close ended feature annotations. We adjust the starts here so that -the display remains unchanged, however this should be revisited at a later date. -*/ - -const getAnnotations = (jsonData) => { - const annotations = []; - const nuc = []; - let aaCount = 0; - for (const prot of Object.keys(jsonData)) { - if (prot !== nucleotide_gene) { - aaCount++; - annotations.push({ - prot: prot, - start: jsonData[prot].start - 1, // see above - end: jsonData[prot].end, - strand: jsonData[prot].strand, - fill: genotypeColors[aaCount % 10] - }); - } else { - nuc.push({ - start: jsonData[prot].start - 1, // see above - end: jsonData[prot].end - }); - } - } - return [annotations, nuc]; -}; - -const processAnnotations = (annotations) => { - const m = {}; /* m === geneMap */ - annotations.forEach((d) => { - m[d.prot] = d; - }); - const sorted = Object.keys(m).sort((a, b) => - m[a].start < m[b].start ? -1 : m[a].start > m[b].start ? 1 : 0 - ); - for (const gene of Object.keys(m)) { - m[gene].idx = sorted.indexOf(gene); - } - return m; -}; - -export const entropyCreateState = (genomeAnnotations) => { - if (genomeAnnotations && genomeAnnotations.nuc) { - const ant = getAnnotations(genomeAnnotations); - const annotations = ant[0]; - const lengthSequence = ant[1][0].end; - return { - showCounts: false, - loaded: true, - annotations, - lengthSequence, - geneMap: processAnnotations(annotations) - }; - } - return { - showCounts: false, - loaded: false, - annotations: [], - geneMap: {} - }; -}; diff --git a/src/util/entropyCreateStateFromJsons.ts b/src/util/entropyCreateStateFromJsons.ts new file mode 100644 index 000000000..bd042373e --- /dev/null +++ b/src/util/entropyCreateStateFromJsons.ts @@ -0,0 +1,419 @@ +import { genotypeColors } from "./globals"; +import { defaultEntropyState } from "../reducers/entropy"; + +type JsonAnnotations = Record +// enum Strand {'+', '-'} // other GFF-valid options are '.' and '?' +type Strand = string; +type JsonSegmentRange = {start: number, end: number}; // Start is 1-based, End is 1-based closed (GFF) +interface JsonAnnotation { + /* Other properties are commonly set in the JSON structure, but the following are + the only ones read by Auspice */ + end?: number; + start?: number; + segments?: JsonSegmentRange[]; + strand: Strand; + gene?: string; + color?: string; + display_name?: string; + description?: string; +} + +/* Specifies the range of the each segment's corresponding position in the genome, +or defines the range of the genome (chromosome) itself. +Start is always less than or equal to end. +Start is 1-based, End is 1-based closed. I.e. GFF. */ +type RangeGenome = [number, number]; +/* Same as RangeGenome but now relative to the nucleotides which make up the CDS +(i.e. after slippage, splicing etc). The first CDS segment's RangeLocal will always +start at 1, and the end value (of the last segment) corresponds to the number of nt in the CDS: +range_segLast[1] - range_seg1[0] + 1 = 3 * number_of_amino_acids_in_translated_CDS */ +type RangeLocal = [number, number]; +type ChromosomeMetadata = { + strandsObserved: Set, + posStrandStackHeight: number, + negStrandStackHeight: number, +} + +type GenomeAnnotation = Chromosome[]; +interface Chromosome { + name: string; + range: RangeGenome; + genes: Gene[]; + metadata: ChromosomeMetadata +} +interface Gene { + name: string; + cds: CDS[]; +} +interface CDS { + length: number; /* length of the CDS in nucleotides. Will be a multiple of 3 */ + segments: CdsSegment[]; + strand: Strand; + color: string; + name: string; + isWrapping: boolean; + displayName?: string; + description?: string; + stackPosition?: number; +} +interface CdsSegment { + rangeLocal: RangeLocal; + rangeGenome: RangeGenome; + segmentNumber: number; /* 1-based */ + phase: number; /* 0, 1 or 2. Indicates where the next codon begins relative to the 5' end of this segment */ + frame: number; /* 0, 1 or 2. The frame the codons are in, relative to the 5' end of the genome. It thus takes into account the phase */ +} + +/** + * This is in flux -- Richard's working on an updated representation for the JSON + * Here we do our best to massage the JSON annotations block into a hierarchical + * representation of Genome → Chromosome[] → Gene[] → CDS[] → CDS_Segment[]. + * The intention is for this structure to entirely replace the various other pieces of redux + * state such as 'annotations', 'geneMap', 'geneLengths', 'genomeAnnotations'. + * + * Each key:value entry in the JSON annotation block, where key!=='nuc', is interpreted as + * a CDS. There is currently no way to encode multiple CDS segments¹. Each CDS name + * is unique, as JavaScript JSON parsing guarantees the keys to be unique (even if there are + * duplicates in the JSON). + * + * By default, each CDS name (key) is set as the gene name as well, so 1 gene = 1 CDS. + * We extend the JSON to allow `value.gene` which, if set, can group multiple CDSs into + * a single gene. We also allow `value.color`, which sets the _gene_ colour (optional). + * + * ¹ The exception being a single CDS which wraps around the origin, which we are able + * to split into two segments here. + */ +export const genomeMap = (annotations: JsonAnnotations): GenomeAnnotation => { + + const nucAnnotation = Object.entries(annotations) + .filter(([name,]) => name==='nuc') + .map(([, annotation]) => annotation)[0]; + if (!nucAnnotation) throw new Error("Genome annotation missing 'nuc' definition") + if (!nucAnnotation.start || !nucAnnotation.end) throw new Error("Genome annotation for 'nuc' missing start or end") + if (nucAnnotation.strand==='-') throw new Error("Auspice can only display genomes represented as positive strand." + + "Note that -ve strand RNA viruses are typically annotated as 5' → 3'."); + const rangeGenome: RangeGenome = [nucAnnotation.start, nucAnnotation.end]; + + + /* Group by genes -- most JSONs will not include this information, so it'll essentially be + one CDS per gene, but that's just fine! */ + const annotationsPerGene: Record = {}; + Object.entries(annotations) + .filter(([name,]) => name!=='nuc') + .map(([annotationKey, annotation]) => { + const geneName = annotation.gene || annotationKey; + if (!(geneName in annotationsPerGene)) annotationsPerGene[geneName] = {}; + const gene = annotationsPerGene[geneName] as JsonAnnotations; // TODO - why do I need to cast? + gene[annotationKey] = annotation; + }) + + const nextColor = nextColorGenerator(); + + const strandsObserved: Set = new Set(); + + const genes = Object.entries(annotationsPerGene) + .map(([geneName, cdsAnnotations]) => { + const gene: Gene = { + name: geneName, + cds: [] + } + const defaultColor = nextColor.next().value; // default colours are per-gene (not per-CDS) + + gene.cds = Object.entries(cdsAnnotations) + .map(([cdsName, annotation]) => cdsFromAnnotation(cdsName, annotation, rangeGenome, defaultColor)) + .filter((cds) => cds.name!=='__INVALID__'); + gene.cds.forEach((cds) => strandsObserved.add(cds.strand)); + return gene; + }) + + const metadata: ChromosomeMetadata = { + strandsObserved, + posStrandStackHeight: calculateStackPosition(genes, '+'), + negStrandStackHeight: calculateStackPosition(genes, '-'), + } + + const chromosome: Chromosome = { + name: 'source', + range: rangeGenome, + genes, + metadata + } + return [chromosome]; +} + +export const entropyCreateState = (genomeAnnotations: JsonAnnotations) => { + if (genomeAnnotations) { + try { + return { + showCounts: false, + loaded: true, + genomeMap: genomeMap(genomeAnnotations) + }; + } catch (e) { + if (e instanceof Error) console.error(e.message); + console.error("Genotype colorings and the entropy panel will not be available.") + // fallthrough + } + } + return defaultEntropyState(); +}; + + +function validColor(color:(string|undefined)) { + if (!color) return false; + return color; // TODO XXX +} + +function* nextColorGenerator() { + let i=0; + while (true) { + yield genotypeColors[i++] as string; + if (i===genotypeColors.length) i=0; + } +} + +/** + * Returns a CDS object parsed from the provided JsonAnnotation block + */ +function cdsFromAnnotation(cdsName: string, annotation: JsonAnnotation, rangeGenome: RangeGenome, defaultColor: (string|void)): CDS { + const invalidCds: CDS = { + name: '__INVALID__', + length: 0, + segments: [], + strand: '+', + isWrapping: false, + color: '#000', + } + const strand = annotation.strand; + if (!(strand==='+' || strand==='-')) { + /** GFF allows for strands '?' (features whose strandedness is relevant, but unknown) and '.' (features that are not stranded), + * which are represented by augur as '?' and null, respectively. (null comes from `None` in python.) + * In both cases it's not a good idea to make an assumption of strandedness, or to assume it's even a CDS. */ + console.error(`[Genome annotation] ${cdsName} has strand ` + + (Object.prototype.hasOwnProperty.call(annotation, "strand") ? String(annotation.strand) : '(missing)') + + ". This CDS will be ignored."); + return invalidCds; + } + const positive = strand==='+'; + + let length = 0; // rangeLocal length + const segments: CdsSegment[] = []; + if (annotation.start && annotation.end) { + /* The simplest case is where a JSON annotation block defines a + contiguous CDS, however it may be a wrapping CDS (i.e. cds end > genome + end */ + if (annotation.end <= rangeGenome[1]) { + length = annotation.end-annotation.start+1; + segments.push({ + segmentNumber: 1, + rangeLocal: [1, length], + rangeGenome: [annotation.start, annotation.end], + phase: 0, + frame: _frame(annotation.start, annotation.end, 0, rangeGenome[1], positive), + }) + } else { + /* We turn this into the equivalent JsonSegments to minimise code duplication */ + annotation.segments = [ + {start: annotation.start, end: rangeGenome[1]}, + {start: 1, end: annotation.end-rangeGenome[1]} + ] + /* -ve strand segments are 3' -> 5', so segment[0] is at the start of the genome */ + if (!positive) annotation.segments.reverse(); + } + } + + if (annotation.segments && Array.isArray(annotation.segments)) { + if (segments.length) { // indicates we've already created one from start/stop coords + console.error(`[Genome annotation] ${cdsName} defines both start/stop and segments, but they are mutually exclusive.`); + return invalidCds; + } + let previousRangeLocalEnd = 0; + let segmentNumber = 1; + for (const segment of annotation.segments) { + /* The segments, as defined in the JSON, must be ordered according to the order the appear in the CDS. + For -ve strand that's 3' to 5'. The rangeGenome within each segment is always 5' to 3'. */ + const segmentLength = segment.end - segment.start + 1; // in nucleotides + /* phase is the number of nucs we need to add to the so-far-observed length to make it mod 3 */ + const phase = length%3===0 ? 0 : (length%3===1 ? 2 : 1); + + const s: CdsSegment = { + segmentNumber: segmentNumber++, + rangeLocal: [previousRangeLocalEnd+1, previousRangeLocalEnd+segmentLength], + rangeGenome: [segment.start, segment.end], + phase, + frame: _frame(segment.start, segment.end, phase, rangeGenome[1], positive) + }; + segments.push(s); + length += segmentLength; + previousRangeLocalEnd += segmentLength; + + } + } + if (!segments.length) { + console.error(`[Genome annotation] ${cdsName} requires either start+end or segments to be defined`); + return invalidCds; + } + + if (length%3) { + console.error(`[Genome annotation] ${cdsName} has length ${length} which is not a multiple of 3`); + return invalidCds; // skip parsing of this CDS's annotation block + } + + const cds: CDS = { + name: cdsName, + length, + segments, + strand, + isWrapping: _isCdsWrapping(strand, segments), + color: validColor(annotation.color) || defaultColor || '#000', + } + if (typeof annotation.display_name === 'string') { + cds.displayName = annotation.display_name; + } + if (typeof annotation.description === 'string') { + cds.description = annotation.description; + } + return cds +} + +/** + * Calculates the (open reading) frame the provided segment is in. + * For +ve strand this is calculated 5'->3', for -ve strand it's 3'->5'. + * The frame is calculated once the CDS is back in phase. + * start: 1 based, rangeGenome[0] of the segment + * end: 1 based, rangeGenome[1] of the segment + * start < end always + * genomeLength: 1 based + * positiveStrand: boolean + * Returns a number in the set {0, 1, 2} + */ +function _frame(start:number, end:number, phase: number, genomeLength:number, positiveStrand:boolean) { + return positiveStrand ? + (start+phase-1)%3 : + Math.abs((end-phase-genomeLength)%3); +} + +/** + * Given a list of genes (each with CDSs), we want to calculate and assign each + * CDS a "stack position" such that each CDS can be plotted with no overlaps. + * All segments of a given CDS will have the same stack position. (Stack here + * refers to this being a stacking problem.) The stack position starts at 1. + * Returns the maximum position observed. + */ +function calculateStackPosition(genes: Gene[], strand: (Strand|null) = null):number { + /* List of CDSs, sorted by their earliest occurrence in the genome (for any segment) */ + let cdss = genes.reduce((acc: CDS[], gene) => [...acc, ...gene.cds], []); + if (strand) { + cdss = cdss.filter((cds) => cds.strand===strand); + } + cdss.sort((a, b) => + Math.min(...a.segments.map((s) => s.rangeGenome[0])) < Math.min(...b.segments.map((s) => s.rangeGenome[0])) ? + -1 : 1 + ); + let stack: CDS[] = []; // current CDSs in stack + for (const newCds of cdss) { + /* remove any CDS from the stack which has ended (completely) before this one starts */ + const newMinStart = Math.min(...newCds.segments.map((s) => s.rangeGenome[0])); + stack = stack.filter((cds) => + !(Math.max(...cds.segments.map((s) => s.rangeGenome[1])) < newMinStart) + ); + // console.log("\nstacK:", stack.map((cds) => cds.name).join(", ")); + // console.log("\tconsideing", newCds.name) + /* If there are any empty slots in the current stack, take the lowest! */ + const existingY = stack.map((cds) => cds.stackPosition || 0).sort(); + const empty = _emptySlots(existingY); + if (empty) { + // console.log("\t\ttaking empty slot", empty) + newCds.stackPosition = empty; + stack.push(newCds); + continue; + } + /* If any CDS in the stack has a single space (i.e. between 2 segments) into which the entire + new CDS (i.e. all segments of newCds) can fit into, then we can re-use that position */ + const reuseablePosition = _fitCdssTogether(stack, newCds); + if (reuseablePosition) { + // console.log("\t\treusing position", reuseablePosition) + newCds.stackPosition = reuseablePosition; + stack.push(newCds); + continue; + } + /* fallthrough: use a higher position! */ + newCds.stackPosition = (existingY[existingY.length-1] || 0) + 1; + // console.log("\t\tAdding to the top!", newCds.stackPosition) + stack.push(newCds); + } + + return Math.max(...cdss.map((cds) => cds.stackPosition || 0)); +} + +/** + * Given an array of sorted integers, if there are any spaces (starting with 1) + * then return the value which can fill that space. Returns 0 if no spaces. + */ +function _emptySlots(values: number[]):number { + if ((values[0] || 0) > 1) return 1; + for (let i=1; i1) return a+1; + } + return 0; +} + +/** + * If the newCds completely (i.e. all of its segments) fits inside a single + * between-segment space of an existing segment, then return the stackPosition + * of that existing CDS. Otherwise return 0; + */ +function _fitCdssTogether(existing: CDS[], newCds: CDS):number { + const a = Math.min(...newCds.segments.map((s) => s.rangeGenome[0])); + const b = Math.max(...newCds.segments.map((s) => s.rangeGenome[1])); + for (const cds of existing) { + if (cds.segments.length===1) continue; + const segments = [...cds.segments] + segments.sort((a, b) => a.rangeGenome[0]b) { + /* yes - we can fit into the same position as this cds, but check if + another CDS in the stack is occupying this space first! */ + let spaceTaken = false; + existing.forEach((el) => { + if (el.stackPosition!==stackPosition) return; // only consider same row + if (spaceTaken) return; // saves time + el.segments.forEach((s) => { + if (s.rangeGenome[1]>=a && s.rangeGenome[0]<=b) { + spaceTaken = true + } + }) + }) + if (!spaceTaken) { + return stackPosition; + } + } + } + } + return 0; +} + + +/* Does a CDS wrap the origin? */ +function _isCdsWrapping(strand: Strand, segments: CdsSegment[]): boolean { + const positive = strand==='+'; + // segments ordered to guarantee rangeLocal will always be greater (than the previous segment) + let prevSegment; + for (const segment of segments) { + if (prevSegment) { + if (positive && prevSegment.rangeGenome[0] > segment.rangeGenome[0]) { + return true; + } + if (!positive && prevSegment.rangeGenome[0] < segment.rangeGenome[0]) { + return true; + } + } + prevSegment = segment; + } + return false; // fallthrough +} diff --git a/src/util/getGenotype.js b/src/util/getGenotype.js index acdc1b418..8310dbae1 100644 --- a/src/util/getGenotype.js +++ b/src/util/getGenotype.js @@ -25,34 +25,35 @@ export const encodeColorByGenotype = ({ gene, positions }) => { * gt-nuc_142,144 → { gene: "nuc", aa: false, positions: [142,144] } * gt-HA1_144,142 → { gene: "HA1", aa: true, positions: [144,142] } */ -export const decodeColorByGenotype = (colorBy, geneLengths) => { - // If we're passed a map of gene name → length, then validate the decoded +export const decodeColorByGenotype = (colorBy, genomeMap) => { + // If we're passed a genomeMap, then validate the decoded // gene name and positions. Otherwise, just decode without validation. - const validate = typeof geneLengths === "object" && Object.keys(geneLengths).length; + const validate = !!genomeMap; // Split the encoded string into tokens of gene and positions. const match = colorBy.match(/^gt-(.+)_([0-9,]+)$/); if (match) { - const [, gene, encodedPositions] = match; - const geneLength = validate ? geneLengths[gene] : 'Infinity'; + const [, cdsName, encodedPositions] = match; + + const geneLength = validate ? getCdsLength(genomeMap, cdsName) : 'Infinity'; if (validate && !geneLength) { - console.error("decodeColorByGenotype failed: no gene length", colorBy, gene, geneLengths); + console.error(`decodeColorByGenotype failed for color-by ${colorBy} (cds name: ${cdsName}) ` + + "as it wasn't found in the genome map;"); return null; } const positions = decodePositions(encodedPositions, geneLength); - if (!positions.length) { console.error("decodeColorByGenotype failed: no valid positions", colorBy, encodedPositions, geneLength); return null; } return { - gene, + gene: cdsName, positions, - aa: gene !== nucleotide_gene + aa: cdsName!==nucleotide_gene, }; } @@ -109,3 +110,36 @@ export const makeGenotypeLabel = (colorBy) => { if (!genotype) return false; return `Genotype ${genotype.gene}: ${genotype.positions.join(", ")}`; }; + +export const getCdsFromGenotype = (name, genomeMap) => { + if (!Array.isArray(genomeMap) || !genomeMap.length || !name) return null; + if (name==='nuc') return nucleotide_gene; + for (const gene of genomeMap[0].genes) { + for (const cds of gene.cds) { + if (cds.name===name) { + return cds; + } + } + } + return null; +} + +/** + * Returns the length of the genome (in nucleotides, if cdsName='nuc') + * or the length of the CDS (in codons, including the stop codon) + * @param {GenomeAnnotation} genomeMap + * @param {string} cdsName (may include "nuc") + * @returns {number} + */ +export function getCdsLength(genomeMap, cdsName) { + if (cdsName===nucleotide_gene) { + return genomeMap[0].range[1]; + } + for (const gene of genomeMap[0].genes) { + for (const cds of gene.cds) { + // length is checked to be a multiple of 3 when genomeMap is created + if (cds.name===cdsName) return cds.length/3; + } + } + return 0; // should never happen, as the cdsName originates from the genomeMap +} \ No newline at end of file diff --git a/src/util/globals.js b/src/util/globals.js index 80d4251c4..aaf6281c9 100644 --- a/src/util/globals.js +++ b/src/util/globals.js @@ -40,7 +40,6 @@ export const reallyBigNumber = 10000000; export const LBItime_window = 0.5; export const LBItau = 0.0005; export const attemptUntangle = false; -export const defaultMutType = "aa"; export const nucleotide_gene = "nuc"; export const plot_frequencies = false; export const genericDomain = [0, 0.111, 0.222, 0.333, 0.444, 0.555, 0.666, 0.777, 0.888, 1.0]; @@ -248,3 +247,5 @@ const aminoAcids = { }; export const getAminoAcidName = (x) => aminoAcids[x.toUpperCase()] || "Unknown"; + +export const equalArrays = (a,b) => a.length===b.length && a.every((el, idx) => b[idx]===el); diff --git a/src/util/setGenotype.js b/src/util/setGenotype.js index c57904453..b543c9165 100644 --- a/src/util/setGenotype.js +++ b/src/util/setGenotype.js @@ -67,7 +67,7 @@ export const setGenotype = (nodes, prot, positions, rootSequence) => { // console.log(`set ${ancNodes.length} nodes to the ancestral state: ${ancState}`) }; -export const orderOfGenotypeAppearance = (nodes, mutType) => { +export const orderOfGenotypeAppearance = (nodes, aaGenotype) => { const seen = {}; nodes.forEach((n) => { let numDate = getTraitFromNode(n, "num_date"); @@ -80,7 +80,7 @@ export const orderOfGenotypeAppearance = (nodes, mutType) => { ordered.sort((a, b) => seen[a] < seen[b] ? -1 : 1); let orderedBases; // remove 'non-bases' (X - N) - if (mutType === "nuc") { + if (!aaGenotype) { orderedBases = ordered.filter((x) => x !== "X" && x !== "-" && x !== "N"); } else { orderedBases = ordered.filter((x) => x !== "X" && x !== "-"); @@ -89,7 +89,7 @@ export const orderOfGenotypeAppearance = (nodes, mutType) => { if (ordered.indexOf("-") !== -1) { orderedBases.push("-"); } - if (ordered.indexOf("N") !== -1 && mutType === "nuc") { + if (ordered.indexOf("N") !== -1 && !aaGenotype) { orderedBases.push("N"); } if (ordered.indexOf("X") !== -1) { diff --git a/src/util/treeMiscHelpers.js b/src/util/treeMiscHelpers.js index 9cd7cfaa0..80acb6fa8 100644 --- a/src/util/treeMiscHelpers.js +++ b/src/util/treeMiscHelpers.js @@ -273,27 +273,22 @@ export const getTipChanges = (tipNode) => { * Returns a function which will sort a list, where each element in the list * is a gene name. Sorted by start position of the gene, with "nuc" first. */ -export const sortByGeneOrder = (genomeAnnotations) => { - if (!(genomeAnnotations instanceof Object)) { - return (a, b) => { - if (a==="nuc") return -1; - if (b==="nuc") return 1; - return 0; - }; - } - const geneOrder = Object.entries(genomeAnnotations) - .sort((a, b) => { - if (b[0]==="nuc") return 1; // show nucleotide "gene" first - if (a[1].start < b[1].start) return -1; - if (a[1].start > b[1].start) return 1; - return 0; - }) - .map(([name]) => name); - +export const sortByGeneOrder = (genomeMap) => { + if (!genomeMap) return () => 0; + /* Sort CDSs based on the genome position of the start codon */ + const cdsPos = genomeMap[0].genes.map((gene) => + gene.cds.map((cds) => [cds.name, cds.segments[0].rangeGenome[cds.strand==='+' ? 0 : 1]]) + ).flat(); + cdsPos.sort((a, b) => a[1]>b[1] ? 1 : -1) + const order = {}; + cdsPos.forEach(([name,], idx) => {order[name] = idx+1;}); + order.nuc=0; // Nuc is always first + /* Returned function takes two CDS names so it can be used as the sort function + for an array of CDS names */ return (a, b) => { - if (geneOrder.indexOf(a) < geneOrder.indexOf(b)) return -1; - if (geneOrder.indexOf(a) > geneOrder.indexOf(b)) return 1; - return 0; + if (order[a]===undefined) return 1; + if (order[b]===undefined) return -1; + return order[a] - order[b]; }; }; diff --git a/test/annotation-parsing.test.js b/test/annotation-parsing.test.js new file mode 100644 index 000000000..2d30f7c11 --- /dev/null +++ b/test/annotation-parsing.test.js @@ -0,0 +1,286 @@ +import { genomeMap } from "../src/util/entropyCreateStateFromJsons"; +import dataset from './data/test_complex-genome-annotation.json'; +import { getNucCoordinatesFromAaPos, getCdsRangeLocalFromRangeGenome, + nucleotideToAaPosition} from "../src/util/entropy"; + +const genome = genomeMap(dataset.meta.genome_annotations) +const chromosome = genome[0]; + +test("Chromosome coordinates", () => { + expect(chromosome.range[0]).toBe(1); + expect(chromosome.range[1]).toBe(100); +}); + +test("+ve strand CDS with a single segment", () => { + const cds = getCds('pos-single') + const length = 2*3; // expect CDS is 2 AA long, which is 2*3 nucleotides + /* Test certain properties -- we don't care about colour, display name for these tests */ + expect(cds).toEqual(expect.objectContaining({ + length, strand: '+', + isWrapping: false, + segments: [ + {rangeGenome: [23, 28], rangeLocal: [1, length], phase: 0, frame: 1, segmentNumber: 1} + ] + })) +}); + +test("-ve strand CDS with a single segment", () => { + const cds = getCds('neg-single'); + const length = 3*3; + expect(cds).toEqual(expect.objectContaining({ + length, strand: '-', + isWrapping: false, + segments: [ + {rangeGenome: [72, 80], rangeLocal: [1, length], phase: 0, frame: 2, segmentNumber: 1} + ] + })) +}); + +test("+ve strand CDS which wraps the origin", () => { + const cds = getCds('pos-wrapping') + const length = 6*3; + expect(cds).toEqual(expect.objectContaining({ + length, strand: '+', + isWrapping: true, + segments: [ + {rangeGenome: [93, 100], rangeLocal: [1, 8], phase: 0, frame: 2, segmentNumber: 1}, + {rangeGenome: [1, 10], rangeLocal: [9, length], phase: 1, frame: 1, segmentNumber: 2}, + ] + })) +}); + +test("-ve strand CDS which wraps the origin", () => { + const cds = getCds('neg-wrapping') + const length = 6*3; + expect(cds).toEqual(expect.objectContaining({ + length, strand: '-', + isWrapping: true, + segments: [ + // segment order is based on the direction of the CDS, so for -ve strand it's 3' to 5' + // but within each segment the rangeGenome is still 5' to 3' + {rangeGenome: [1, 8], rangeLocal: [1, 8], phase: 0, frame: 2, segmentNumber: 1}, + {rangeGenome: [91, 100], rangeLocal: [9, length], phase: 1, frame: 1, segmentNumber: 2}, + ] + })) +}); + +test("+ve strand CDS with multiple (non-wrapping) segments", () => { + const cds = getCds('pos-multi') + const length = 7*3; + expect(cds).toEqual(expect.objectContaining({ + length, strand: '+', + isWrapping: false, + segments: [ + // -1 frameshift (e.g. ribosome slip) between 1st & 2nd segments + {rangeGenome: [31, 36], rangeLocal: [1, 6], phase: 0, frame: 0, segmentNumber: 1}, // 2 amino acids + {rangeGenome: [36, 43], rangeLocal: [7, 14], phase: 0, frame: 2, segmentNumber: 2}, // 2 2/3 amino acids + {rangeGenome: [63, 69], rangeLocal: [15, length], phase: 1, frame: 0, segmentNumber: 3} // 1/3 + 2 amino acids + ] + })) +}); + + +test("-ve strand CDS with multiple (non-wrapping) segments", () => { + const cds = getCds('neg-multi') + const length = 8*3; + expect(cds).toEqual(expect.objectContaining({ + length, strand: '-', + isWrapping: false, + segments: [ + // segment order is based on the direction of the CDS, so for -ve strand it's 3' to 5' + // but within each segment the rangeGenome is still 5' to 3' + // This example has a -1 frameshift between 1st & 2nd segments (so nuc 53 is in both) + // and then a 27nt jump to the 3rd segment. + {rangeGenome: [53, 60], rangeLocal: [1, 8], phase: 0, frame: 1, segmentNumber: 1}, // 2 2/3 amino acids + {rangeGenome: [46, 53], rangeLocal: [9, 16], phase: 1, frame: 0, segmentNumber: 2}, // 1/3 + 2 + 1/3 amino acids + {rangeGenome: [12, 19], rangeLocal: [17, length], phase: 2, frame: 2, segmentNumber: 3} // 2/3 + 2 amino acids + ] + })) +}); + +const aaToNuc = { + 'pos-single': [ + [1, [23,24,25]], /* The codon for the first AA is at genome coordinates 23,24,25 (1-based) */ + [2, [26,27,28]] + ], + 'pos-multi': [ + [2, [34,35,36]], [3, [36,37,38]], [5, [42,43,63]], [7, [67,68,69]] + ], + 'pos-wrapping': [ + [3, [99,100,1]] + ], + 'neg-single': [ + [1, [80,79,78]], [3, [74,73,72]] + ], + 'neg-multi': [ + [1, [60,59,58]], [3, [54,53, 53]], [6, [46,19,18]] + ], + 'neg-wrapping': [ + [1, [8,7,6]], [3, [2,1,100]], [6, [93,92,91]] + ] +} + +describe('AA positions mapped to nucleotide positions on the genome', () => { + for (const [name, data] of Object.entries(aaToNuc)) { + const cds = getCds(name); + test(prettifyName(name), () => { + data.forEach(([aaPos, nucPosns]) => { + const res = getNucCoordinatesFromAaPos(cds, aaPos); + expect(res.nucCoordinates).toStrictEqual(nucPosns); + }) + }) + } +}) + +const genomeZoomToCdsLocalZoom = { + 'pos-single': [ + // genome zoom bounds cds rangeLocal valid? + [[21, 25], [[1, 6], false]], + [[21, 30], [[1, 6], false]], + [[25, 30], [[1, 6], false]], + [[23, 28], [[1, 6], true]], + [[24, 28], [[1, 6], true]], + [[27, 28], [[4, 6], true]], + [[26, 27], [[4, 6], true]], + [[24, 26], [[1, 6], true]], + ], + 'pos-multi': [ + // either (or both) zoom bound(s) beyond the entire CDS is invalid + [[30, 35], [[1, 21], false]], + [[30, 70], [[1, 21], false]], + [[31, 70], [[1, 21], false]], + // zoom bounds match CDS bounds + [[31, 69], [[1, 21], true]], + // Test zoom bounds where one falls in-between CDS segments + // note that local pos 15 is genome nuc 63 + [[31, 60], [[1, 15], true]], + // note that local pos 13,14 are genome pos 42,43 + [[45, 69], [[13, 21], true]], + // Each zoom bound is within a segment + [[31, 39], [[1, 12], true]], + [[35, 65], [[4, 18], true]], + [[63, 67], [[13, 21], true]], + // Pos 36 is in 2 segments and 2 different AAs. The results here are perhaps + // not the intuitive ones, and we can improve the algorithm if we genuinely + // run into this edge case. + [[36, 67], [[7, 21], true]], + [[31, 36], [[1, 9], true]], + ], + 'pos-wrapping': [ + // either (or both) zoom bound(s) beyond the entire CDS is invalid + [[20, 90], [[1, 18], false]], + [[10, 90], [[1, 18], false]], + [[20, 99], [[1, 18], false]], + // zoom bounds match CDS bounds + [[10, 93], [[1, 18], true]], + // Valid coords - i.e. within the extent of the CDS + [[10, 98], [[4, 18], true]], + [[1, 96], [[4, 9], true]], + // Note that the following will fail, because the UI forces that the zoom bounds + // are either side of the origin + // [[4, 8], [[10, 18], true]], + ], + 'neg-single': [ + // following have one (or both) zoom coords beyond the CDS => invalid + [[71, 77], [[1, 9], false]], + [[71, 100], [[1, 9], false]], + [[10, 77], [[1, 9], false]], + [[1, 100], [[1, 9], false]], + // following are valid as both zoom coords are inside (or on) CDS boundary + [[72, 80], [[1, 9], true]], + [[72, 76], [[4, 9], true]], + [[74, 76], [[4, 9], true]], + [[77, 78], [[1, 6], true]], + ], + 'neg-multi': [ + // either (or both) zoom bound(s) beyond the entire CDS is invalid + [[11, 17], [[1, 24], false]], + [[11, 70], [[1, 24], false]], + [[50, 70], [[1, 24], false]], + // zoom bounds match CDS bounds + [[12, 60], [[1, 24], true]], + // Test zoom bounds where one falls in-between CDS segments + [[12, 40], [[16, 24], true]], + [[40, 60], [[1, 18], true]], + // Each zoom bound is within a segment + [[16, 55], [[4, 21], true]], + [[48, 59], [[1, 15], true]], + [[12, 16], [[19, 24], true]], + // Pos 53 is in 2 segments but the same AA + [[53, 60], [[1, 9], true]], + [[12, 53], [[7, 24], true]], + ], + 'neg-wrapping': [ + // either (or both) zoom bound(s) beyond the entire CDS is invalid + [[20, 90], [[1, 18], false]], + [[5, 90], [[1, 18], false]], + [[20, 95], [[1, 18], false]], + // zoom bounds match CDS bounds + [[8, 91], [[1, 18], true]], + // Valid coords - i.e. within the extent of the CDS + [[5, 95], [[4, 15], true]], + // Note that the following will fail, because the UI forces that the zoom bounds + // are either side of the origin + // [[91, 99], [[10, 18], true]], + ], +} + +describe('Genome zoom bounds mapped to cds local coordinates', () => { + for (const [name, data] of Object.entries(genomeZoomToCdsLocalZoom)) { + const cds = getCds(name); + test(prettifyName(name), () => { + data.forEach(([genomeZoomBounds, expectedResult]) => { + expect(getCdsRangeLocalFromRangeGenome(cds, genomeZoomBounds)) + .toStrictEqual(expectedResult); + }) + }) + } +}) + + +/** + * Note that order of CDSs (if multiple matches) is the order they + * appear in the JSON + */ +const nucleotideToAaPositionData = [ + [23, [{cds: getCds('pos-single'), nucLocal: 1, aaLocal: 1}]], + [75, [{cds: getCds('neg-single'), nucLocal: 6, aaLocal: 2}]], + [9, [{cds: getCds('pos-wrapping'), nucLocal: 17, aaLocal: 6}]], + [92, [{cds: getCds('neg-wrapping'), nucLocal: 17, aaLocal: 6}]], + [4, [{cds: getCds('pos-wrapping'), nucLocal: 12, aaLocal: 4}, + {cds: getCds('neg-wrapping'), nucLocal: 5, aaLocal: 2}]], + [93, [{cds: getCds('pos-wrapping'), nucLocal: 1, aaLocal: 1}, + {cds: getCds('neg-wrapping'), nucLocal: 16, aaLocal: 6}]], + [48, [{cds: getCds('neg-multi'), nucLocal: 14, aaLocal: 5}]], + [63, [{cds: getCds('pos-multi'), nucLocal: 15, aaLocal: 5}]], + // Pos 36 appears in 2 segments of the pos-multi CDS, in different codons + [36, [{cds: getCds('pos-multi'), nucLocal: 6, aaLocal: 2}, + {cds: getCds('pos-multi'), nucLocal: 7, aaLocal: 3}]], + // Pos 53 appears in 2 segments of the neg-multi CDS, both in the same codon + [53, [{cds: getCds('neg-multi'), nucLocal: 8, aaLocal: 3}, + {cds: getCds('neg-multi'), nucLocal: 9, aaLocal: 3}]], +] + + +test("Single nucleotide positions are correctly mapped to amino acid positions", () => { + for (const [nucPos, expectedResult] of nucleotideToAaPositionData) { + expect(nucleotideToAaPosition(genome, nucPos)).toStrictEqual(expectedResult); + } +}); + + +/** Assumes that the gene name wasn't specified in the JSON and thus + * we have one gene with one CDS + */ +function getCds(name) { + const genes = chromosome.genes.filter((g) => g.name===name); + if (genes.length!==1) throw new Error("Multiple (or no!) matching genes"); + if (genes[0].cds.length!==1) throw new Error("Multiple (or no!) matching CDSs"); + const cds = genes[0].cds[0]; + expect(cds.name).toBe(name) + return cds; +} + +function prettifyName(name) { + return `${name.startsWith('pos')?'+ve':'-ve'} strand CDS ${name.replace(/.+-/,'')}`; +} \ No newline at end of file diff --git a/test/data/test_complex-genome-annotation.json b/test/data/test_complex-genome-annotation.json new file mode 100644 index 000000000..8e82bc4ed --- /dev/null +++ b/test/data/test_complex-genome-annotation.json @@ -0,0 +1,50 @@ +{ + "version": "v2", + "meta": { + "title": "Test dataset for complex genome annotations", + "updated": "2023-07-09", + "display_defaults": {"panels": ["entropy"], "distance_measure": "div"}, + "panels": ["tree", "entropy"], + "genome_annotations": { + "nuc": {"start": 1, "end": 100}, + "pos-single": {"start": 23, "end": 28, "strand": "+", + "display_name": "Frame1 (F1); Mutations at aa1=nuc23-25, aa2=nuc26-28; "}, + "pos-multi": {"segments": [{"start": 31, "end": 36}, {"start": 36, "end": 43}, {"start": 63, "end": 69}], "strand": "+", + "display_name": "Seg1/3: 31..36 F0 p0; Seg2/3: 36..43, F2, p0; Seg 3/3: 63..69 F2 p1; Mutations aa2 → nuc34-36, aa3 → nuc36-68, aa5: 42,43,63 aa7 → 67-69"}, + "pos-wrapping": {"start": 93, "end": 110, "strand": "+", + "display_name": "Breaks down to 2 segments: 93-100 (F2), 1-10 (phase 1, F1). Mutations: aa3: 99,100,1"}, + "neg-single": {"start": 72, "end": 80, "strand": "-", + "display_name": "Frame2 (R2); Mutations at aa1=78-80, aa3=nuc72-74;"}, + "neg-multi": {"segments": [{"start": 53, "end": 60}, {"start": 46, "end": 53}, {"start": 12, "end": 19}], "strand": "-", + "display_name": "Segment order important. Seg1/3: 53..60 R1 p0; Seg2/3: 46..53, F0, p1; Seg 3/3: 12..19 R2 p2; Mutations: aa1 → nuc60,59,58, aa3 → nuc54,53,53 (slippage), aa6 → nuc46,19,18"}, + "neg-wrapping": {"start": 91, "end": 108, "strand": "-", + "display_name": "Breaks down to 2 segments: 1..8 R2 p0 and 91..100 R1 p1 (order important). Mutations: aa1 → nuc8,7,6, aa3 → nuc2,1,100, aa6 → nuc93,92,91"} + } + }, + "tree": { + "name": "root", + "node_attrs": {"div": 0}, + "children": [ + { + "name": "A", + "node_attrs": {"div": 1}, + "branch_attrs": {"mutations": { + "nuc": ["A20T"], + "pos-single": ["A1B", "G2H"], + "pos-multi": ["A2B", "G3H", "L7S"], + "pos-wrapping": ["L3S"] + }} + }, + { + "name": "B", + "node_attrs": {"div": 1}, + "branch_attrs": {"mutations": { + "nuc": ["A20G"], + "neg-single": ["A1B", "G3H"], + "neg-multi": ["A1B", "G3H", "L6S"], + "neg-wrapping": ["A1B", "G3H", "L6S"] + }} + } + ] + } +} \ No newline at end of file diff --git a/test/entropy.test.js b/test/entropy.test.js new file mode 100644 index 000000000..946b1ca84 --- /dev/null +++ b/test/entropy.test.js @@ -0,0 +1,181 @@ +import { calcEntropyInView, getCdsByName } from "../src/util/entropy"; +import { treeJsonToState } from "../src/util/treeJsonProcessing"; +import {NODE_VISIBLE, NODE_NOT_VISIBLE, nucleotide_gene} from "../src/util/globals"; + +function tree1() { + const nodes = treeJsonToState({ + name: "ROOT", + children: [ + { + name: "tipX", + branch_attrs: {mutations: { + nuc: ["A5T", "A10T"], + CDS2: ["L4M"] + }} + }, + { + name: "tipY", + branch_attrs: {mutations: { + nuc: ["A5G"], + CDS2: ["L4P"] + }} + }, + { + name: "internalNodeZ", + branch_attrs: {mutations: { + nuc: ["A5C", "A15T"], + CDS1: ["A1B"] + }}, + children: [ + { + name: "tipZ", + branch_attrs: {mutations: { + nuc: ["A5G"], // NOTE this is nonsensical - A→C followed by A→G + CDS2: ["L4S"] // Codon 4 should be stop, but auspice doesn't check + }} + } + ] + } + ] + }).nodes; + + const visibility = nodes.map(() => NODE_VISIBLE) + + const genomeMap = [ + { + name: 'source', + range: [1,18], + genes: [ + { + name: "GENE1", color: "NA", + cds: [ + { + name: "CDS1", length: 9, strand: "+", + segments: [ + {rangeGenome: [6,14], rangeLocal: [1, 9], phase: 0, frame: 2}, + ] + } + ] + }, + { + name: "GENE2", color: "NA", + cds: [ + { + name: "CDS2", length: 12, strand: "-", + segments: [ + {rangeGenome: [5,16], rangeLocal: [1, 12], phase: 0, frame: 1}, + ] + } + ] + } + ] + } + ] + + return {nodes, visibility, genomeMap}; +} + +test("Basic mutation counts", () => { + const {nodes, visibility, genomeMap} = tree1(); + + /* Very simple counting of nucleotide changes across all nodes */ + let [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, true); + expect(max).toBe(4); + expect(counts).toStrictEqual([{x:5,y:4}, {x:10,y:1}, {x:15,y:1}]); + + [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS1'), true); + let result = [{codon:1,y:1}] + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(result); + + [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS2'), true); + result = [{codon:4,y:3}] + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(result) +}); + +test("Visibility mask + counts", () => { + const {nodes, visibility} = tree1(); + + nodes.forEach((n, i) => { + if (n.name==='tipZ') visibility[i]=NODE_NOT_VISIBLE; + }); + const [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, true); + expect(max).toBe(3); + expect(counts).toStrictEqual([{x:5,y:3}, {x:10,y:1}, {x:15,y:1}]); +}) + +test("Basic entropy calculations", () => { + const {nodes, visibility, genomeMap} = tree1(); + + let [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, false); + let result = [ + {x:5, y:entropy(1,2)}, // T,G,G + {x:10, y:entropy(1,2)}, // T,A,A + {x:15, y:entropy(1,2)}] // A,A,T + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(stringifyY(result)); + + [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS1'), false); + result = [{codon:1, y:entropy(1,2)}]; // A,A,B + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(stringifyY(result)); + + [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS2'), false); + result = [{codon:4, y:entropy(1,1,1)}]; // M,P,S + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(stringifyY(result)); +}); + + +test("Visibility mask + entropy", () => { + const {nodes, visibility, genomeMap} = tree1(); + + nodes.forEach((n, i) => { + if (n.name==='tipX' || n.name==='tipY') visibility[i]=NODE_NOT_VISIBLE; + }); + let [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, false); + let result = [ + {x:5,y:entropy(1)}, // G + // NOTE - position 10 is not reported as no visible nodes have it + {x:15,y:entropy(1)}]; // T + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(stringifyY(result)); + + + [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS1'), false); + result = [{codon:1,y:entropy(1)}] // B + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(stringifyY(result)); + + [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS2'), false); + result = [{codon:4,y:entropy(1)}]; // B + expect(max).toBe(getMax(result)); + expect(noFill(counts)).toStrictEqual(stringifyY(result)); +}); + + +function noFill(data) { + return data.map((d) => { + delete d.fill; // currently calculated based on gene order, but we'll remove this concept shortly + return d; + }) +} + +function entropy(...observations) { + let s = 0; + const total = observations.reduce((acc, cv) => acc+cv, 0); + observations.forEach((obs) => { + s-=obs/total * Math.log(obs/total); + }); + return s +} + +function stringifyY(data) { + data.forEach((d) => d.y = d.y.toFixed(3)); + return data; +} + +function getMax(data) { + return data.reduce((acc, cv) => cv.y>acc?cv.y:acc, 0) +} \ No newline at end of file