+
+ {t("Codon {{codon}} in protein {{protein}}", {codon: d.codon, protein: this.selectedCds.name})}
+
+
+
+ {t("Nuc positions {{positions}}", {positions: nucCoordinates.join(", ")})}
+
+
+
+ {cds.strand==='-' ? 'Negative strand' : `Positive strand${cds.strand!=='+' ? ' (assumed)' : ''}`}
+ {`, ${frame}`}
+
+
+
+ {this.showCounts ? `${t("Num changes observed")}: ${d.y}` : `${t("Normalised Shannon entropy")}: ${d.y}`}
+
+
+ {t("Click to color tree & map by this genotype")}
+
+
+ );
+ }
+ return _render.bind(this)
+}
+
+EntropyChart.prototype._mainTooltipNuc = function _mainTooltipAa(d) {
+ const _render = function _render(t) {
+ const nuc = d.x;
+ const aa = nucleotideToAaPosition(this.genomeMap, nuc);
+ let overlaps; // JSX to convey overlapping CDS info
+ if (aa) {
+ overlaps = (
+
+ {t("Nucleotide {{nuc}}", {nuc})}
+
+
+ {overlaps}
+
+
+ {this.showCounts ? `${t("Num changes observed")}: ${d.y}` : `${t("Normalised Shannon entropy")}: ${d.y}`}
+
+
+ {t("Click to color tree & map by this genotype")}
+
+
+ );
+ }
+ return _render.bind(this)
+}
+
+
+EntropyChart.prototype._cdsTooltip = function _cdsTooltip(d) {
+ const _render = function _render(t) {
+ const segmented = d.cds.segments.length > 1;
+ return (
+
+
-
);
@@ -162,13 +178,12 @@ class Entropy extends React.Component {
setUp(props) {
const chart = new EntropyChart(
this.d3entropy,
- props.annotations,
- props.geneMap,
- props.geneLength[nucleotide_gene],
+ props.genomeMap,
{ /* callbacks */
onHover: this.onHover.bind(this),
onLeave: this.onLeave.bind(this),
- onClick: this.onClick.bind(this)
+ onClick: this.onClick.bind(this),
+ onCdsClick: this.onCdsClick.bind(this)
}
);
chart.render(props);
@@ -204,50 +219,17 @@ class Entropy extends React.Component {
updateParams.zoomMax = nextProps.zoomMax;
updateParams.zoomMin = nextProps.zoomMin;
}
- if (this.props.bars !== nextProps.bars) { /* will always be true if mutType has changed */
- updateParams.aa = nextProps.mutType === "aa";
- updateParams.newBars = nextProps.bars;
- updateParams.maxYVal = nextProps.maxYVal;
+ if (this.props.bars !== nextProps.bars) {
+ updateParams.newBars = [nextProps.bars, nextProps.maxYVal];
}
- if (this.props.colorBy !== nextProps.colorBy && (isColorByGenotype(this.props.colorBy) || isColorByGenotype(nextProps.colorBy))) {
- if (isColorByGenotype(nextProps.colorBy)) {
- const colorByGenotype = decodeColorByGenotype(nextProps.colorBy, nextProps.geneLength);
- if (colorByGenotype.aa) { /* if it is a gene, zoom to it */
- updateParams.gene = colorByGenotype.gene;
- updateParams.start = nextProps.geneMap[updateParams.gene].start;
- updateParams.end = nextProps.geneMap[updateParams.gene].end;
- } else { /* if a nuc, want to do different things if 1 or multiple */
- const positions = colorByGenotype.positions;
- const zoomCoord = this.state.chart.zoomCoordinates;
- const maxNt = this.state.chart.maxNt;
- /* find out what new coords would be - if different enough, change zoom */
- let startUpdate, endUpdate;
- if (positions.length > 1) {
- const start = Math.min.apply(null, positions);
- const end = Math.max.apply(null, positions);
- startUpdate = start - (end-start)*0.05;
- endUpdate = end + (end-start)*0.05;
- } else {
- const pos = positions[0];
- const eitherSide = maxNt*0.05;
- const newStartEnd = (pos-eitherSide) <= 0 ? [0, pos+eitherSide] :
- (pos+eitherSide) >= maxNt ? [pos-eitherSide, maxNt] : [pos-eitherSide, pos+eitherSide];
- startUpdate = newStartEnd[0];
- endUpdate = newStartEnd[1];
- }
- /* if the zoom would be different enough, change it */
- if (!(startUpdate > zoomCoord[0]-maxNt*0.4 && startUpdate < zoomCoord[0]+maxNt*0.4) ||
- !(endUpdate > zoomCoord[1]-maxNt*0.4 && endUpdate < zoomCoord[1]+maxNt*0.4) ||
- !(positions.every((x) => x > zoomCoord[0]) && positions.every((x) => x < zoomCoord[1]))) {
- updateParams.gene = colorByGenotype.gene;
- updateParams.start = startUpdate;
- updateParams.end = endUpdate;
- }
- }
- updateParams.selected = decodeColorByGenotype(nextProps.colorBy, nextProps.geneLength);
- } else {
- updateParams.clearSelected = true;
- }
+ if (this.props.selectedCds !== nextProps.selectedCds) {
+ updateParams.selectedCds = nextProps.selectedCds;
+ }
+ if (this.props.showCounts !== nextProps.showCounts) {
+ updateParams.showCounts = nextProps.showCounts;
+ }
+ if (!equalArrays(this.props.selectedPositions, nextProps.selectedPositions)) {
+ updateParams.selectedPositions = nextProps.selectedPositions
}
if (Object.keys(updateParams).length) {
this.state.chart.update(updateParams);
@@ -264,20 +246,21 @@ class Entropy extends React.Component {
}
}
+ title() {
+ if (this.props.width<500) return "Diversity";
+ if (this.props.selectedCds===nucleotide_gene) {
+ return "Nucleotide diversity of genome"
+ }
+ return `Amino acid diversity of CDS ${this.props.selectedCds.name}`
+ }
+
render() {
- const { t } = this.props;
const styles = getStyles(this.props.width);
return (
-
-
+
+
+ {this.state.hovered ? this.state.hovered.tooltip(this.props.t) : null}
+
- {this.aaNtSwitch(styles)}
+ {this.resetLayout(styles)}
{this.entropyCountSwitch(styles)}
+
+
+
+
+
+ This panel displays the observed diversity across the current genome or a selected CDS
+ {` (currently you are viewing ${this.props.selectedCds===nucleotide_gene?'the genome':`CDS ${this.props.selectedCds.name}`}). `}
+
+ The lower axis shows the genome with +ve strand CDSs above and -ve strand CDSs below and
+ the grey overlay allows zooming in to a region.
+ The upper axis shows either the zoomed in region of the genome or a selected CDS;
+ in the latter case the coordinates represent amino acids.
+
+ Clicking on a CDS will select it and show it on the upper axis.
+ {` Clicking "Reset Layout" will always return you to viewing the entire genome.`}
+
+
);
}
diff --git a/src/components/entropy/infoPanel.js b/src/components/entropy/infoPanel.js
index 9496b798a..28c736c3f 100644
--- a/src/components/entropy/infoPanel.js
+++ b/src/components/entropy/infoPanel.js
@@ -1,16 +1,16 @@
import React from "react";
import { infoPanelStyles } from "../../globalStyles";
-const InfoPanel = ({hovered, width, height, mutType, showCounts, geneMap, t}) => {
+const InfoPanel = ({d3event, width, height, children}) => {
/* this is a function - we can bail early */
- if (!hovered) {
+ if (!d3event) {
return null;
}
const styles = {
container: {
position: "absolute",
- width: 200,
+ maxWidth: 500,
padding: "10px",
borderRadius: 10,
zIndex: 1000,
@@ -26,12 +26,9 @@ const InfoPanel = ({hovered, width, height, mutType, showCounts, geneMap, t}) =>
}
};
- /* don't use d3entropy or main chart SVG as these change on zoom.
- The Y axis is safe as it is invariant */
- const bounds = document.getElementById("entropyYAxis").getBoundingClientRect();
const pos = {
- x: hovered.x - bounds.left + 15,
- y: hovered.y - bounds.top
+ x: d3event.layerX,
+ y: d3event.layerY
};
if (pos.x < width * 0.7) {
@@ -45,45 +42,11 @@ const InfoPanel = ({hovered, width, height, mutType, showCounts, geneMap, t}) =>
styles.container.bottom = height - pos.y;
}
- const isNegStrand = hovered.d.prot ? geneMap[hovered.d.prot].strand === "-" : null;
-
- const nucPos = hovered.d.prot ?
- mutType === "aa" ?
- isNegStrand ? geneMap[hovered.d.prot].end - hovered.d.codon * 3 + 3 :
- geneMap[hovered.d.prot].start + hovered.d.codon * 3
- : isNegStrand ? geneMap[hovered.d.prot].end - hovered.d.x :
- hovered.d.x - geneMap[hovered.d.prot].start-1
- : null;
-
- const codonFromNuc = hovered.d.prot && mutType !== "aa" ? Math.floor((nucPos)/3) + 1 : null;
return (
-
-
- {
- mutType === "aa" ? t("Codon {{codon}} in protein {{protein}}", {codon: hovered.d.codon, protein: hovered.d.prot}) :
- hovered.d.prot ? `${t("Nucleotide {{nuc}}", {nuc: hovered.d.x})} (${t("Codon {{codon}} in protein {{protein}}", {codon: codonFromNuc, protein: hovered.d.prot})})` :
- t("Nucleotide {{nuc}}", {nuc: hovered.d.x})
- }
-
-
-
- {mutType === "aa" ? t("Nuc positions {{a}} to {{b}}", {a: nucPos-2, b: nucPos}) : ``}
-
-
-
- {isNegStrand === null ? `` : isNegStrand ? t("Negative strand") : t("Positive strand")}
-
-
-
- {showCounts ? `${t("Num mutations")}: ${hovered.d.y}` : `${t("entropy")}: ${hovered.d.y}`}
-
-
- {t("Click to color tree & map")}
-
-
+ {children}
- );
+ )
};
export default InfoPanel;
diff --git a/src/components/main/index.js b/src/components/main/index.js
index 2b7a8ca77..2eeca0b14 100644
--- a/src/components/main/index.js
+++ b/src/components/main/index.js
@@ -134,7 +134,7 @@ class Main extends React.Component {
const {availableWidth, availableHeight, sidebarWidth, overlayStyles} =
calcStyles(this.props.browserDimensions, this.props.displayNarrative, this.props.sidebarOpen, this.state.mobileDisplay);
const overlayHandler = () => {this.props.dispatch({type: TOGGLE_SIDEBAR, value: false});};
- const {full, grid, chart} =
+ const {full, grid, chartEntropy, chartFrequencies} =
calcPanelDims(this.props.panelsToDisplay, this.props.displayNarrative, availableWidth, availableHeight);
/* We use tree name(s) as a react key so that components remount when datasets change */
const keyName = `${this.props.treeName}${this.props.secondTreeName ? `:${this.props.secondTreeName}` : ''}`;
@@ -205,13 +205,13 @@ class Main extends React.Component {
}
{this.props.panelsToDisplay.includes("entropy") ?
(
-
+
) :
null
}
{this.props.panelsToDisplay.includes("frequencies") && this.props.frequenciesLoaded ?
(
-
+
) :
null
}
diff --git a/src/components/main/utils.js b/src/components/main/utils.js
index 67763012c..f0af76203 100644
--- a/src/components/main/utils.js
+++ b/src/components/main/utils.js
@@ -31,9 +31,11 @@ export const calcPanelDims = (panels, narrativeIsDisplayed, availableWidth, avai
const full = computeResponsive({horizontal: fullWidthFraction, vertical: fullHeightFraction, availableWidth, availableHeight});
const grid = computeResponsive({horizontal: gridWidthFraction, vertical: gridHeightFraction, availableWidth, availableHeight});
- const chart = computeResponsive({horizontal: chartWidthFraction, vertical: chartHeightFraction, availableWidth, availableHeight, minHeight: 150});
+ /* Frequencies are usable with a (SVG) height of ~200px, but the Entropy panel needs more */
+ const chartFrequencies = computeResponsive({horizontal: chartWidthFraction, vertical: chartHeightFraction, availableWidth, availableHeight, minHeight: 200});
+ const chartEntropy = computeResponsive({horizontal: chartWidthFraction, vertical: chartHeightFraction, availableWidth, availableHeight, minHeight: 300});
- return {full, grid, chart};
+ return {full, grid, chartFrequencies, chartEntropy};
};
diff --git a/src/components/tree/index.js b/src/components/tree/index.js
index 0ba76de27..dd34a5a58 100644
--- a/src/components/tree/index.js
+++ b/src/components/tree/index.js
@@ -13,10 +13,10 @@ const Tree = connect((state) => ({
scatterVariables: state.controls.scatterVariables,
temporalConfidence: state.controls.temporalConfidence,
distanceMeasure: state.controls.distanceMeasure,
- mutType: state.controls.mutType,
explodeAttr: state.controls.explodeAttr,
colorScale: state.controls.colorScale,
- metadata: state.metadata,
+ colorings: state.metadata.colorings,
+ genomeMap: state.entropy.genomeMap,
showTreeToo: state.controls.showTreeToo,
showTangle: state.controls.showTangle,
panelsToDisplay: state.controls.panelsToDisplay,
diff --git a/src/components/tree/tree.js b/src/components/tree/tree.js
index e89b96c55..5018b18f1 100644
--- a/src/components/tree/tree.js
+++ b/src/components/tree/tree.js
@@ -66,7 +66,7 @@ class Tree extends React.Component {
if (this.props.showTreeToo) {
this.setUpAndRenderTreeToo(this.props, newState); /* modifies newState in place */
}
- newState.geneSortFn = sortByGeneOrder(this.props.metadata.genomeAnnotations);
+ newState.geneSortFn = sortByGeneOrder(this.props.genomeMap);
this.setState(newState); /* this will trigger an unnecessary CDU :( */
}
}
@@ -200,7 +200,7 @@ class Tree extends React.Component {
colorBy={this.props.colorBy}
colorByConfidence={this.props.colorByConfidence}
colorScale={this.props.colorScale}
- colorings={this.props.metadata.colorings}
+ colorings={this.props.colorings}
geneSortFn={this.state.geneSortFn}
observedMutations={this.props.tree.observedMutations}
panelDims={{width: this.props.width, height: this.props.height, spaceBetweenTrees}}
@@ -210,7 +210,7 @@ class Tree extends React.Component {
clearSelectedNode={this.clearSelectedNode}
selectedNode={this.state.selectedNode}
observedMutations={this.props.tree.observedMutations}
- colorings={this.props.metadata.colorings}
+ colorings={this.props.colorings}
geneSortFn={this.state.geneSortFn}
t={t}
/>
diff --git a/src/css/entropy.css b/src/css/entropy.css
index 77b8feeb0..4685b917d 100644
--- a/src/css/entropy.css
+++ b/src/css/entropy.css
@@ -5,15 +5,13 @@
--medGrey: #888;
}
+/* Normally elements with .overlay are children of the .brush, however
+the overlay added on key-presses is is not a child of .brush */
#d3entropy .overlay {
fill: none;
pointer-events: all;
}
-#d3entropy .brush {
- stroke: none;
-}
-
#d3entropy .niceText {
font-family: "Lato", "Helvetica Neue", "Helvetica", "sans-serif";
font-size: 14px;
@@ -26,3 +24,19 @@
cursor: -moz-grab;
cursor: -webkit-grab;
}
+
+#d3entropy .cdsText {
+ pointer-events: none;
+}
+#d3entropy .cds {
+ cursor: pointer;
+}
+
+table.entropy-table {
+ font-weight: 300;
+}
+
+table.entropy-table td {
+ padding-bottom: 3px;
+ padding-right: 5px;
+}
\ No newline at end of file
diff --git a/src/middleware/changeURL.js b/src/middleware/changeURL.js
index 105af27b9..516d5d793 100644
--- a/src/middleware/changeURL.js
+++ b/src/middleware/changeURL.js
@@ -2,8 +2,8 @@ import queryString from "query-string";
import * as types from "../actions/types";
import { numericToCalendar } from "../util/dateHelpers";
import { shouldDisplayTemporalConfidence } from "../reducers/controls";
-import { genotypeSymbol, strainSymbol } from "../util/globals";
-import { encodeGenotypeFilters } from "../util/getGenotype";
+import { genotypeSymbol, nucleotide_gene, strainSymbol } from "../util/globals";
+import { encodeGenotypeFilters, decodeColorByGenotype, isColorByGenotype } from "../util/getGenotype";
/**
* This middleware acts to keep the app state and the URL query state in sync by
@@ -52,11 +52,29 @@ export const changeURLMiddleware = (store) => (next) => (action) => {
that will still make sense when the default for the given branch labelling is "show all" */
query.showBranchLabels = action.value ? 'all' : undefined;
break;
- case types.CHANGE_ZOOM:
- /* entropy panel genome zoom coordinates */
- query.gmin = action.zoomc[0] === state.controls.absoluteZoomMin ? undefined : action.zoomc[0];
- query.gmax = action.zoomc[1] >= state.controls.absoluteZoomMax ? undefined : action.zoomc[1];
+ case types.CHANGE_ZOOM: {
+ /* entropy panel genome zoom coordinates. On a page load these queries
+ will be combined with the selected CDS from the color-by genotype, or be
+ applied to the nuc view. As such, if the entropy panel's selected CDS is
+ _not_ the same as the colorBy (if applicable) then we don't set gmin/gmax
+ */
+ const entropyCdsName = state.entropy.selectedCds===nucleotide_gene ?
+ nucleotide_gene :
+ state.entropy.selectedCds.name;
+ const colorByCdsName = isColorByGenotype(state.controls.colorBy) &&
+ decodeColorByGenotype(state.controls.colorBy, state.entropy.genomeMap).gene;
+ const bounds = state.entropy.genomeMap[0].range; // guaranteed to exist, as the action comes from
+ if (
+ ((!colorByCdsName || colorByCdsName===nucleotide_gene) && entropyCdsName===nucleotide_gene) ||
+ (colorByCdsName===entropyCdsName)
+ ) {
+ query.gmin = action.zoomc[0] <= bounds[0] ? undefined : action.zoomc[0];
+ query.gmax = action.zoomc[1] >= bounds[1] ? undefined : action.zoomc[1];
+ } else {
+ [query.gmin, query.gmax] = [undefined, undefined];
+ }
break;
+ }
case types.NEW_COLORS:
query.c = action.colorBy === state.controls.defaults.colorBy ? undefined : action.colorBy;
break;
diff --git a/src/reducers/controls.js b/src/reducers/controls.js
index bb0378949..ca186171a 100644
--- a/src/reducers/controls.js
+++ b/src/reducers/controls.js
@@ -4,7 +4,6 @@ import { defaultGeoResolution,
defaultDateRange,
defaultDistanceMeasure,
defaultLayout,
- defaultMutType,
controlsHiddenWidth,
strainSymbol,
twoColumnBreakpoint } from "../util/globals";
@@ -46,8 +45,6 @@ export const getDefaultControlsState = () => {
region: null,
search: null,
strain: null,
- geneLength: {},
- mutType: defaultMutType,
temporalConfidence: { exists: false, display: false, on: false },
layout: defaults.layout,
scatterVariables: {},
@@ -243,10 +240,6 @@ const Controls = (state = getDefaultControlsState(), action) => {
filters
});
}
- case types.TOGGLE_MUT_TYPE:
- return Object.assign({}, state, {
- mutType: action.data
- });
case types.TOGGLE_TEMPORAL_CONF:
return Object.assign({}, state, {
temporalConfidence: Object.assign({}, state.temporalConfidence, {
@@ -322,6 +315,15 @@ const Controls = (state = getDefaultControlsState(), action) => {
return {...state, measurementsDisplay: action.data};
case types.APPLY_MEASUREMENTS_FILTER:
return {...state, measurementsFilters: action.data};
+ /**
+ * Currently the CHANGE_ZOOM action (entropy panel zoom changed) does not
+ * update the zoomMin/zoomMax, and as such they only represent the initially
+ * requested zoom range. The following commented out code will keep the
+ * state in sync, but corresponding changes will be required to the entropy
+ * code.
+ */
+ // case types.CHANGE_ZOOM: // this is the entropy panel zoom
+ // return {...state, zoomMin: action.zoomc[0], zoomMax: action.zoomc[1]};
default:
return state;
}
diff --git a/src/reducers/entropy.js b/src/reducers/entropy.js
index 26ed29014..a17b212b2 100644
--- a/src/reducers/entropy.js
+++ b/src/reducers/entropy.js
@@ -1,13 +1,16 @@
// import _filter from "lodash/filter";
import * as types from "../actions/types";
-const Entropy = (state = {loaded: false, showCounts: false}, action) => {
+export const defaultEntropyState = () => {
+ const startingState = {
+ loaded: false,
+ showCounts: false,
+ }
+ return {...startingState};
+}
+
+const Entropy = (state = defaultEntropyState(), action) => {
switch (action.type) {
- case types.CHANGE_ZOOM:
- return Object.assign({}, state, {
- zoomMax: action.zoomc[1],
- zoomMin: action.zoomc[0]
- });
case types.DATA_INVALID:
return {loaded: false, showCounts: false};
case types.URL_QUERY_CHANGE_WITH_COMPUTED_STATE: /* fallthrough */
@@ -23,6 +26,8 @@ const Entropy = (state = {loaded: false, showCounts: false}, action) => {
return Object.assign({}, state, {
showCounts: action.showCounts
});
+ case types.CHANGE_ENTROPY_CDS_SELECTION:
+ return {...state, ...action};
default:
return state;
}
diff --git a/src/util/colorHelpers.js b/src/util/colorHelpers.js
index a81bc78eb..389e143cf 100644
--- a/src/util/colorHelpers.js
+++ b/src/util/colorHelpers.js
@@ -24,17 +24,6 @@ export const getAverageColorFromNodes = (nodes, nodeColors) => {
return avg.toString();
};
-export const determineColorByGenotypeMutType = (colorBy) => {
- if (isColorByGenotype(colorBy)) {
- const genotype = decodeColorByGenotype(colorBy);
- return genotype.aa
- ? "aa"
- : "nuc";
- }
- return false;
-};
-
-
/**
* what colorBy trait names are present in the tree but _not_ in the provided scale?
* @param {Array} nodes - list of nodes
diff --git a/src/util/colorScale.js b/src/util/colorScale.js
index 59309fb01..604516157 100644
--- a/src/util/colorScale.js
+++ b/src/util/colorScale.js
@@ -36,13 +36,13 @@ export const calcColorScale = (colorBy, controls, tree, treeToo, metadata) => {
let colorScale, legendValues, legendBounds, legendLabels, domain;
let genotype;
- if (isColorByGenotype(colorBy) && controls.geneLength) {
- genotype = decodeColorByGenotype(colorBy, controls.geneLength);
+ if (isColorByGenotype(colorBy)) {
+ genotype = decodeColorByGenotype(colorBy);
setGenotype(tree.nodes, genotype.gene, genotype.positions, metadata.rootSequence); /* modifies nodes recursively */
}
const scaleType = genotype ? "categorical" : colorings[colorBy].type;
if (genotype) {
- ({legendValues, colorScale} = createScaleForGenotype(tree.nodes, controls.mutType));
+ ({legendValues, colorScale} = createScaleForGenotype(tree.nodes, genotype.aa));
domain = [...legendValues];
} else if (colorings && colorings[colorBy]) {
if (scaleType === "continuous" || scaleType==="temporal") {
@@ -152,18 +152,18 @@ export function createNonContinuousScaleFromProvidedScaleMap(colorBy, providedSc
};
}
-function createScaleForGenotype(t1nodes, mutType) {
- const legendValues = orderOfGenotypeAppearance(t1nodes, mutType);
- const trueValues = mutType === "nuc" ?
- legendValues.filter((x) => x !== "X" && x !== "-" && x !== "N" && x !== "") :
- legendValues.filter((x) => x !== "X" && x !== "-" && x !== "");
+function createScaleForGenotype(t1nodes, aaGenotype) {
+ const legendValues = orderOfGenotypeAppearance(t1nodes, aaGenotype);
+ const trueValues = aaGenotype ?
+ legendValues.filter((x) => x !== "X" && x !== "-" && x !== "") :
+ legendValues.filter((x) => x !== "X" && x !== "-" && x !== "N" && x !== "");
const domain = [undefined, ...legendValues];
const range = [unknownColor, ...genotypeColors.slice(0, trueValues.length)];
// Bases are returned by orderOfGenotypeAppearance in order, unknowns at end
if (legendValues.indexOf("-") !== -1) {
range.push(rgb(217, 217, 217));
}
- if (legendValues.indexOf("N") !== -1 && mutType === "nuc") {
+ if (legendValues.indexOf("N") !== -1 && !aaGenotype) {
range.push(rgb(153, 153, 153));
}
if (legendValues.indexOf("X") !== -1) {
diff --git a/src/util/entropy.js b/src/util/entropy.js
index 45a2831c4..186b1e52d 100644
--- a/src/util/entropy.js
+++ b/src/util/entropy.js
@@ -1,14 +1,5 @@
import { genotypeColors, NODE_VISIBLE, nucleotide_gene } from "./globals";
-const intersectGenes = function intersectGenes(geneMap, pos) {
- for (const gene of Object.keys(geneMap)) {
- if (pos >= geneMap[gene].start && pos <= geneMap[gene].end) {
- return gene;
- }
- }
- return false;
-};
-
/**
* Get mutations on node. Returns false if mutations not set.
* @param {object} n node
@@ -20,191 +11,125 @@ const getNodeMutations = (n) => {
return false;
};
-const calcMutationCounts = (nodes, visibility, geneMap, isAA) => {
+const calcMutationCounts = (nodes, visibility, selectedCds) => {
+ const isAA = selectedCds!==nucleotide_gene;
+ const sparse = [];
+ const cdsName = isAA ? selectedCds.name : nucleotide_gene;
- const sparse = isAA ? {} : [];
- if (isAA) {
- Object.keys(geneMap).forEach((n) => {sparse[n] = {};});
- }
nodes.forEach((n) => {
if (visibility[n.arrayIdx] !== NODE_VISIBLE) {return;}
const mutations = getNodeMutations(n);
- if (!mutations) return;
- if (isAA) {
- for (const prot of Object.keys(mutations).filter((p) => p !== "nuc")) {
- mutations[prot].forEach((m) => {
- const pos = parseInt(m.slice(1, m.length - 1), 10);
- const A = m.slice(0, 1);
- const B = m.slice(-1);
- if (A !== 'X' && B !== 'X') {
- sparse[prot][pos] ? sparse[prot][pos]++ : sparse[prot][pos] = 1;
- }
- });
+ mutations?.[cdsName]?.forEach((m) => {
+ const pos = parseInt(m.slice(1, m.length - 1), 10);
+ const A = m.slice(0, 1);
+ const B = m.slice(-1);
+ if (valid(A, B, isAA)) {
+ sparse[pos] ? sparse[pos]++ : sparse[pos] = 1;
}
- } else {
- if (!mutations.nuc) return;
- mutations.nuc.forEach((m) => {
- const pos = parseInt(m.slice(1, m.length - 1), 10);
- const A = m.slice(0, 1);
- const B = m.slice(-1);
- if (A !== "N" && A !== "-" && B !== "N" && B !== "-") {
- sparse[pos] ? sparse[pos]++ : sparse[pos] = 1;
- }
- });
- }
+ });
});
const counts = [];
- let j = 0;
- let m = 0;
- if (isAA) {
- const prots = Object.keys(sparse);
- for (let i = 0; i < prots.length; i++) {
- for (const k in sparse[prots[i]]) {
- const numK = parseInt(k, 10);
- if (sparse[prots[i]][numK] > m) {m = sparse[prots[i]][numK];}
- counts[j] = {
- x: geneMap[prots[i]].start + 3 * numK - 1, // check
- y: sparse[prots[i]][numK],
- codon: numK, // check
- fill: genotypeColors[i % 10],
- prot: prots[i]
- };
- j++;
- }
- }
- } else {
- for (let i = 0; i < sparse.length; i++) {
- if (!sparse[i]) {continue;}
- if (sparse[i] > m) {m = sparse[i];}
- counts[j] = {x: i, y: sparse[i]}; /* TODO reset y scale in D3 or compute entropy */
- j++;
- }
- for (const nt of counts) {
- nt.prot = intersectGenes(geneMap, nt.x);
- }
+ let j = 0; /* length of the counts array */
+ let m = 0; /* maximum observed count */
+ for (let i = 0; i < sparse.length; i++) {
+ if (!sparse[i]) {continue;}
+ if (sparse[i] > m) {m = sparse[i];}
+ counts[j] = isAA ?
+ {codon: parseInt(i, 10), y: sparse[i]} :
+ {x: i, y: sparse[i]}; /* TODO reset y scale in D3 or compute entropy */
+ j++;
}
return [counts, m];
};
-const calcEntropy = (nodes, visibility, geneMap, isAA) => {
- const arrayOfProts = isAA ? Object.keys(geneMap) : [nucleotide_gene];
- const initialState = {};
+const calcEntropy = (nodes, visibility, selectedCds) => {
+ const isAA = selectedCds!==nucleotide_gene;
+ const name = isAA ? selectedCds.name : nucleotide_gene;
+ /* anc_state: the ancestral (root) nuc/aa for each position, known once we see the 1st mutation */
const anc_state = {};
- const counts = {}; // same struct as state, but with counts not chars
- arrayOfProts.forEach((p) => {
- initialState[p] = {};
- counts[p] = {};
- anc_state[p] = {};
- });
- const root = nodes[0];
- let visibleTips = 0;
-
- /* assignFn is called by forEach to parse and assign the mutations at each node.
- It cannot use fat arrow as we need to access "this"
- "this" is bound to [prot, state] */
- const assignFn = function assignFn(m) {
- const prot = this[0];
- const state = this[1];
- const pos = parseInt(m.slice(1, m.length - 1), 10);
- const A = m.slice(0, 1);
- const B = m.slice(m.length - 1, m.length);
- // console.log("mut @ ", pos, ":", A, " -> ", B)
- if (isAA) {
- if (A === "X" || B === "X") return;
- } else if (A === "N" || A === "-" || B === "N" || B === "-") return;
- if (!anc_state[prot][pos]) {
- // if we don't know the ancestral state, set it via the first encountered state
- anc_state[prot][pos] = A;
- }
- state[prot][pos] = B;
- };
+ const counts = {}; /* for each position keep a dict of nuc/aa -> observed counts */
+ let visibleTips = 0; /* simple counter */
const recurse = (node, state) => {
- // if mutation observed - do something
+ /* state is a dict of position -> nuc/aa observed on branches leading to the parent of this node.
+ We update it by scanning the mutations observed on the branch leading to this node,
+ and also update the ancestral states for any positions if not already known */
const mutations = getNodeMutations(node);
- if (mutations) {
- if (isAA) {
- for (const prot of Object.keys(mutations).filter((p) => p !== "nuc")) {
- if (arrayOfProts.includes(prot)) {
- mutations[prot].forEach(assignFn, [prot, state]);
- }
- }
- } else if (mutations.nuc) {
- mutations.nuc.forEach(assignFn, [nucleotide_gene, state]);
+ mutations?.[name]?.forEach((mutation) => {
+ const pos = parseInt(mutation.slice(1, mutation.length - 1), 10);
+ const A = mutation.slice(0, 1);
+ const B = mutation.slice(mutation.length - 1, mutation.length);
+ if (!valid(A, B, isAA)) return;
+ if (!anc_state[pos]) {
+ anc_state[pos] = A;
}
- }
-
+ state[pos] = B;
+ });
+ /**
+ * If there are children, copy the state & recurse into that node.
+ * If it's a (visible) tip then use the state (which represents all mutations
+ * between the root and this node) to increment counts[pos][nuc/aa]
+ */
if (node.hasChildren) {
for (const child of node.children) {
- /* if there were no changes to the state (i.e. no mutations )
- at the node, then we don't need to deep clone the state Object
- (i.e. can just use references). This will be much quicker,
- but increase programmatic complexity. (TODO) */
- const newState = {};
- arrayOfProts.forEach((p) => {
- newState[p] = Object.assign({}, state[p]);
- });
- recurse(child, newState);
+ /* visit child nodes, passing each a _copy_ of the state so it can diverge */
+ recurse(child, Object.assign({}, state));
}
} else if (visibility[node.arrayIdx] === NODE_VISIBLE) {
visibleTips++;
- for (const prot of arrayOfProts) {
- for (const pos of Object.keys(state[prot])) {
- if (!counts[prot][pos]) {
- counts[prot][pos] = {};
- counts[prot][pos][state[prot][pos]] = 1;
- } else if (!counts[prot][pos][state[prot][pos]]) {
- counts[prot][pos][state[prot][pos]] = 1;
- } else {
- counts[prot][pos][state[prot][pos]]++;
- }
+ for (const [pos, nuc_aa] of Object.entries(state)) {
+ if (!counts[pos]) {
+ counts[pos] = {};
+ counts[pos][nuc_aa] = 1;
+ } else if (!counts[pos][nuc_aa]) {
+ counts[pos][nuc_aa] = 1;
+ } else {
+ counts[pos][nuc_aa]++;
}
}
}
};
- recurse(root, initialState);
+ /* begin the recursion at the root (nodes[0]) with empty state (i.e. zero mutations observed) */
+ recurse(nodes[0], {});
- let m = 0;
- let i = 0;
+ /**
+ * For each position where we have observed a mutation (anywhere), there may well be tips which
+ * did not have observed mutations leading to them, so we assign these the ancestral nuc/aa.
+ * We can then calculate the Shannon entropy for this position & push it onto the entropy array.
+ */
const entropy = [];
- for (const prot of arrayOfProts) {
- for (const k of Object.keys(counts[prot])) {
- let nObserved = 0;
- for (const kk of Object.keys(counts[prot][k])) {
- nObserved += counts[prot][k][kk];
- }
- const nUnobserved = visibleTips - nObserved;
- // console.log("\tn(visible tips):", visibleTips, "n(unobserved):", nUnobserved);
- if (nUnobserved > 0) {
- // console.log("\tancestral state:", anc_state[prot][k]);
- if (counts[prot][k][anc_state[prot][k]]) {
- counts[prot][k][anc_state[prot][k]] += nUnobserved;
- } else {
- counts[prot][k][anc_state[prot][k]] = nUnobserved;
- }
- }
- // console.log("\tcounts (complete):", counts[prot][k], " total:", visibleTips);
- let s = 0;
- for (const kk of Object.keys(counts[prot][k])) {
- const a = counts[prot][k][kk] / visibleTips;
- s += (-1 * a * Math.log(a));
+ let m = 0; /* maximum observed entropy value */
+ let i = 0; /* length of the entropy array */
+ for (const position of Object.keys(counts)) {
+ let nObserved = 0; /* number of tips with directly observed nuc_aa (at this position) */
+ for (const observedCount of Object.values(counts[position])) {
+ nObserved += observedCount;
+ }
+ const nUnobserved = visibleTips - nObserved;
+ if (nUnobserved > 0) {
+ if (counts[position][anc_state[position]]) {
+ counts[position][anc_state[position]] += nUnobserved;
+ } else {
+ counts[position][anc_state[position]] = nUnobserved;
}
- if (s > m) {m = s;}
- entropy[i] = isAA ? {
- x: geneMap[prot].start + 3 * k - 1, // check
- y: s.toFixed(3),
- codon: parseInt(k, 10), // check
- fill: genotypeColors[i % 10],
- prot: prot
- } : {
- x: parseInt(k, 10),
- y: s.toFixed(3),
- prot: intersectGenes(geneMap, k)
- };
- i++;
}
+
+ let s = 0; /* shannon entropy */
+ for (const count of Object.values(counts[position])) {
+ const a = count / visibleTips;
+ s += (-1 * a * Math.log(a));
+ }
+ if (s > m) {m = s;} /* update maximum observed entropy */
+ entropy[i] = isAA ? {
+ y: s.toFixed(3),
+ codon: parseInt(position, 10), // check
+ fill: genotypeColors[i % 10],
+ } : {
+ x: parseInt(position, 10),
+ y: s.toFixed(3),
+ };
+ i++;
}
- // console.log(entropy)
return [entropy, m];
};
@@ -212,14 +137,211 @@ const calcEntropy = (nodes, visibility, geneMap, isAA) => {
* traverse the tree and compile the entropy data for the visible branches
* @param {Array} nodes - list of nodes
* @param {Array} visibility - 1-1 correspondence with nodes.
-* @param {String} mutType - amino acid | nucleotide mutations - "aa" | "nuc"
-* @param {obj} geneMap used to NT fill colours. This should be improved.
+* @param {String|Cds} selectedCds - selected CDS or `nucleotide_gene` (string)
* @param {bool} showCounts show counts or entropy values?
* @return {obj} keys: the entries in attrs. Values: an object mapping values -> counts
* TODO: this algorithm can be much improved, and the data structures returned improved also
*/
-export const calcEntropyInView = (nodes, visibility, mutType, geneMap, showCounts) => {
+export const calcEntropyInView = (nodes, visibility, selectedCds, showCounts) => {
return showCounts ?
- calcMutationCounts(nodes, visibility, geneMap, mutType === "aa") :
- calcEntropy(nodes, visibility, geneMap, mutType === "aa");
+ calcMutationCounts(nodes, visibility, selectedCds) :
+ calcEntropy(nodes, visibility, selectedCds);
};
+
+/**
+ * Returns an array of all CDSs in the (first chromosome of the) genome
+ */
+export function getCds(genomeMap) {
+ let cds = [];
+ genomeMap[0].genes.forEach((gene) => {
+ cds = cds.concat(gene.cds);
+ })
+ return cds;
+}
+
+/** returns undefined if not found */
+export function getCdsByName(genomeMap, name) {
+ return getCds(genomeMap).filter((cds)=>cds.name===name)[0];
+}
+
+/**
+ * Given a CDS (and all the inherent complexity possible there), and two points
+ * on the genome (rangeGenome), return rangeLocal coordinates for the part of
+ * the CDS that's "in view". The returned coordinates are expanded outwards to
+ * include the entire codon (in the 2/3rd of cases where they fall inside a
+ * codon).
+ * If _either_ of the rangeGenome positions are _beyond_ the CDS then we return
+ * the entire rangeLocal of the cds + set the flag `valid` to false.
+ *
+ * For wrapping genes, the UI forces the rangeGenome (i.e. the zoomCoordinates)
+ * to be on either side of the origin, and we maintain thus assumption here.
+ *
+ * Returns [rangeLocalInView:rangeLocal, valid:bool]
+ */
+export function getCdsRangeLocalFromRangeGenome(cds, rangeGenome) {
+ const positive = cds.strand==='+';
+ const [zoomStart, zoomEnd] = cds.isWrapping ?
+ [rangeGenome[1], rangeGenome[0]] : // .toReversed() not available for Auspice?!?
+ rangeGenome;
+ const segments = cds.segments;
+ // segA is the segment closest to genome position 1. segB is closest to the end.
+ const [segA, segB] = positive ?
+ [segments[0], segments[segments.length-1]] :
+ [segments[segments.length-1], segments[0]]
+
+ let [cdsLocalStart, cdsLocalEnd] = [1, cds.length];
+
+ if (zoomStart < segA.rangeGenome[0] || zoomEnd > segB.rangeGenome[1]) {
+ return [[cdsLocalStart, cdsLocalEnd], false];
+ }
+ /**
+ * The general approach is to visit the segments in the order they appear,
+ * i.e. in the context of the strand it's always 5' -> 3' but for -ve strand
+ * CDSs this appears 3' -> 5' in the context of the +ve strand. Once we find
+ * an intersection we can work out the appropriate local coordinate.
+ * Remember that zoomStart/End are reversed if the CDS is wrapping!
+ */
+ let prevSeg;
+ for (const seg of segments) {
+ /* If the zoom start (5') is inside the segment, then we know one of the local bounds */
+ if (seg.rangeGenome[0]<=zoomStart && seg.rangeGenome[1]>=zoomStart) {
+ if (positive) {
+ const delta = zoomStart - seg.rangeGenome[0];
+ cdsLocalStart = seg.rangeLocal[0] + delta;
+ } else {
+ const delta = zoomStart - seg.rangeGenome[0];
+ cdsLocalEnd = seg.rangeLocal[1] - delta;
+ }
+ }
+ /* If the zoom end (3') is inside the segment, then we know one of the local bounds */
+ if (seg.rangeGenome[0]<=zoomEnd && seg.rangeGenome[1]>=zoomEnd) {
+ if (positive) {
+ const delta = zoomEnd - seg.rangeGenome[0];
+ cdsLocalEnd = seg.rangeLocal[0] + delta;
+ // if (segments.length>1) {
+ // console.log(`Zoom end (${zoomEnd}) in segment`, seg.rangeGenome, seg.rangeLocal, cdsLocalEnd)
+ // }
+ } else {
+ const delta = seg.rangeGenome[1] - zoomEnd;
+ cdsLocalStart = seg.rangeLocal[0] + delta;
+ }
+ }
+ /* Check to see if the zoom fell in the space between segments */
+ if (prevSeg) {
+ if (positive) {
+ if (prevSeg.rangeGenome[1] < zoomStart && seg.rangeGenome[0] > zoomStart) {
+ cdsLocalStart = seg.rangeLocal[0];
+ }
+ if (prevSeg.rangeGenome[1] < zoomEnd && seg.rangeGenome[0] > zoomEnd) {
+ cdsLocalEnd = prevSeg.rangeLocal[1];
+ }
+ } else {
+ if (prevSeg.rangeGenome[0] > zoomStart && seg.rangeGenome[1] < zoomStart) {
+ cdsLocalEnd = prevSeg.rangeLocal[1];
+ }
+ if (prevSeg.rangeGenome[0] > zoomEnd && seg.rangeGenome[1] < zoomEnd) {
+ cdsLocalStart = seg.rangeLocal[0];
+ }
+ }
+ }
+ prevSeg = seg;
+ }
+ /* Expand the local CDS coordinates so that they are not within a codon. Note
+ that this does result is some weirdness; for example a segment finishes at
+ (genome) position 10 but pos 10 is inside a codon (in that segment), and we've
+ zoomed to pos 10, then we'll expand the local coordinates into the next
+ segment to complete the codon even though the next segment may be well beyond
+ the zoom bounds */
+ cdsLocalStart -= (cdsLocalStart-1)%3;
+ const endOverhang = cdsLocalEnd%3;
+ if (endOverhang) {
+ cdsLocalEnd += endOverhang===1 ? 2 : 1;
+ }
+
+ return [[cdsLocalStart, cdsLocalEnd], true];
+}
+
+export function getNucCoordinatesFromAaPos(cds, aaPos) {
+ let frame;
+ const nucCoordinates = [];
+ const ntCodonStart = (aaPos-1)*3 + 1; /* codon triplet starts here (inclusive, 1-based) */
+ const positive = cds.strand==='+';
+ for (let i=0; i= segment.rangeGenome[0] && nucCoordinates.length<3) {
+ nucCoordinates.push(ntPosGenome--);
+ }
+ }
+ /** If we were lucky and the entire codon fell into a single segment, then
+ * we're done. If not, we proceed to the next segment and grab the one or
+ * two nucleotides to make up the codon */
+ if (nucCoordinates.length!==3) {
+ /* Codon bridges 2 segments */
+ segment = cds.segments[i+1];
+ /* rewrite "frame x" to "frames x → y" */
+ frame = frame.replace('frame', 'frames') + ` → ${segment.frame}`;
+ /* sanity check the phase */
+ if (segment.phase!==(3-nucCoordinates.length)) {
+ console.error(`Internal Error -- phase mismatch for CDS ${cds.name} when mapping codon ${aaPos}`);
+ nucCoordinates.push("Internal Error!")
+ break;
+ }
+ /* grab the necessary nucleotides -- at most there'll be two needed */
+ if (positive) {
+ nucCoordinates.push(segment.rangeGenome[0]);
+ if (nucCoordinates.length<3) {
+ nucCoordinates.push(segment.rangeGenome[0]+1);
+ }
+ } else {
+ nucCoordinates.push(segment.rangeGenome[1]);
+ if (nucCoordinates.length<3) {
+ nucCoordinates.push(segment.rangeGenome[1]-1);
+ }
+ }
+ }
+ break;
+ }
+ return {frame, nucCoordinates};
+}
+
+/** A, B are the from/to nuc/aa of an observed mutation */
+function valid(A, B, isAA) {
+ if (isAA) {
+ return A !== 'X' && B !== 'X';
+ }
+ return A !== "N" && A !== "-" && B !== "N" && B !== "-";
+}
+
+/**
+ * Given a nucleotide in Genome space (e.g. from `rangeGenome`) find all CDSs
+ * which have a segment covering that nucleotide and return the local coordinate
+ * of that position (in both nuc + aa coordinates)
+ * Returns {cds: CDS; nucLocal: number; aaLocal: number}[]
+ */
+export function nucleotideToAaPosition(genomeMap, nucPos) {
+ const matches = [];
+ getCds(genomeMap).forEach((cds) => {
+ for (const segment of cds.segments) {
+ if (segment.rangeGenome[0] <= nucPos && segment.rangeGenome[1] >= nucPos) {
+ const delta = cds.strand==='+' ?
+ nucPos - segment.rangeGenome[0] :
+ segment.rangeGenome[1] - nucPos;
+ const nucLocal = segment.rangeLocal[0]+delta;
+ const aaLocal = Math.ceil(nucLocal/3);
+ matches.push({cds, nucLocal, aaLocal})
+ /* Don't return here - we want to check future segments as segments can
+ be overlapping */
+ }
+ }
+ })
+ return matches;
+}
\ No newline at end of file
diff --git a/src/util/entropyCreateStateFromJsons.js b/src/util/entropyCreateStateFromJsons.js
deleted file mode 100644
index f46da57c3..000000000
--- a/src/util/entropyCreateStateFromJsons.js
+++ /dev/null
@@ -1,68 +0,0 @@
-import { genotypeColors, nucleotide_gene } from "./globals";
-
-/* a Note on co-ordinates.
-Auspice v1 (and the JSONs it consumed) used 1-based mutations and
-0-based, BED-like feature annotations.
-Auspice v2 JSONs (which the client will always receive) uses GFF-like
-1-based, close ended feature annotations. We adjust the starts here so that
-the display remains unchanged, however this should be revisited at a later date.
-*/
-
-const getAnnotations = (jsonData) => {
- const annotations = [];
- const nuc = [];
- let aaCount = 0;
- for (const prot of Object.keys(jsonData)) {
- if (prot !== nucleotide_gene) {
- aaCount++;
- annotations.push({
- prot: prot,
- start: jsonData[prot].start - 1, // see above
- end: jsonData[prot].end,
- strand: jsonData[prot].strand,
- fill: genotypeColors[aaCount % 10]
- });
- } else {
- nuc.push({
- start: jsonData[prot].start - 1, // see above
- end: jsonData[prot].end
- });
- }
- }
- return [annotations, nuc];
-};
-
-const processAnnotations = (annotations) => {
- const m = {}; /* m === geneMap */
- annotations.forEach((d) => {
- m[d.prot] = d;
- });
- const sorted = Object.keys(m).sort((a, b) =>
- m[a].start < m[b].start ? -1 : m[a].start > m[b].start ? 1 : 0
- );
- for (const gene of Object.keys(m)) {
- m[gene].idx = sorted.indexOf(gene);
- }
- return m;
-};
-
-export const entropyCreateState = (genomeAnnotations) => {
- if (genomeAnnotations && genomeAnnotations.nuc) {
- const ant = getAnnotations(genomeAnnotations);
- const annotations = ant[0];
- const lengthSequence = ant[1][0].end;
- return {
- showCounts: false,
- loaded: true,
- annotations,
- lengthSequence,
- geneMap: processAnnotations(annotations)
- };
- }
- return {
- showCounts: false,
- loaded: false,
- annotations: [],
- geneMap: {}
- };
-};
diff --git a/src/util/entropyCreateStateFromJsons.ts b/src/util/entropyCreateStateFromJsons.ts
new file mode 100644
index 000000000..bd042373e
--- /dev/null
+++ b/src/util/entropyCreateStateFromJsons.ts
@@ -0,0 +1,419 @@
+import { genotypeColors } from "./globals";
+import { defaultEntropyState } from "../reducers/entropy";
+
+type JsonAnnotations = Record
+// enum Strand {'+', '-'} // other GFF-valid options are '.' and '?'
+type Strand = string;
+type JsonSegmentRange = {start: number, end: number}; // Start is 1-based, End is 1-based closed (GFF)
+interface JsonAnnotation {
+ /* Other properties are commonly set in the JSON structure, but the following are
+ the only ones read by Auspice */
+ end?: number;
+ start?: number;
+ segments?: JsonSegmentRange[];
+ strand: Strand;
+ gene?: string;
+ color?: string;
+ display_name?: string;
+ description?: string;
+}
+
+/* Specifies the range of the each segment's corresponding position in the genome,
+or defines the range of the genome (chromosome) itself.
+Start is always less than or equal to end.
+Start is 1-based, End is 1-based closed. I.e. GFF. */
+type RangeGenome = [number, number];
+/* Same as RangeGenome but now relative to the nucleotides which make up the CDS
+(i.e. after slippage, splicing etc). The first CDS segment's RangeLocal will always
+start at 1, and the end value (of the last segment) corresponds to the number of nt in the CDS:
+range_segLast[1] - range_seg1[0] + 1 = 3 * number_of_amino_acids_in_translated_CDS */
+type RangeLocal = [number, number];
+type ChromosomeMetadata = {
+ strandsObserved: Set,
+ posStrandStackHeight: number,
+ negStrandStackHeight: number,
+}
+
+type GenomeAnnotation = Chromosome[];
+interface Chromosome {
+ name: string;
+ range: RangeGenome;
+ genes: Gene[];
+ metadata: ChromosomeMetadata
+}
+interface Gene {
+ name: string;
+ cds: CDS[];
+}
+interface CDS {
+ length: number; /* length of the CDS in nucleotides. Will be a multiple of 3 */
+ segments: CdsSegment[];
+ strand: Strand;
+ color: string;
+ name: string;
+ isWrapping: boolean;
+ displayName?: string;
+ description?: string;
+ stackPosition?: number;
+}
+interface CdsSegment {
+ rangeLocal: RangeLocal;
+ rangeGenome: RangeGenome;
+ segmentNumber: number; /* 1-based */
+ phase: number; /* 0, 1 or 2. Indicates where the next codon begins relative to the 5' end of this segment */
+ frame: number; /* 0, 1 or 2. The frame the codons are in, relative to the 5' end of the genome. It thus takes into account the phase */
+}
+
+/**
+ * This is in flux -- Richard's working on an updated representation for the JSON
+ * Here we do our best to massage the JSON annotations block into a hierarchical
+ * representation of Genome → Chromosome[] → Gene[] → CDS[] → CDS_Segment[].
+ * The intention is for this structure to entirely replace the various other pieces of redux
+ * state such as 'annotations', 'geneMap', 'geneLengths', 'genomeAnnotations'.
+ *
+ * Each key:value entry in the JSON annotation block, where key!=='nuc', is interpreted as
+ * a CDS. There is currently no way to encode multiple CDS segments¹. Each CDS name
+ * is unique, as JavaScript JSON parsing guarantees the keys to be unique (even if there are
+ * duplicates in the JSON).
+ *
+ * By default, each CDS name (key) is set as the gene name as well, so 1 gene = 1 CDS.
+ * We extend the JSON to allow `value.gene` which, if set, can group multiple CDSs into
+ * a single gene. We also allow `value.color`, which sets the _gene_ colour (optional).
+ *
+ * ¹ The exception being a single CDS which wraps around the origin, which we are able
+ * to split into two segments here.
+ */
+export const genomeMap = (annotations: JsonAnnotations): GenomeAnnotation => {
+
+ const nucAnnotation = Object.entries(annotations)
+ .filter(([name,]) => name==='nuc')
+ .map(([, annotation]) => annotation)[0];
+ if (!nucAnnotation) throw new Error("Genome annotation missing 'nuc' definition")
+ if (!nucAnnotation.start || !nucAnnotation.end) throw new Error("Genome annotation for 'nuc' missing start or end")
+ if (nucAnnotation.strand==='-') throw new Error("Auspice can only display genomes represented as positive strand." +
+ "Note that -ve strand RNA viruses are typically annotated as 5' → 3'.");
+ const rangeGenome: RangeGenome = [nucAnnotation.start, nucAnnotation.end];
+
+
+ /* Group by genes -- most JSONs will not include this information, so it'll essentially be
+ one CDS per gene, but that's just fine! */
+ const annotationsPerGene: Record = {};
+ Object.entries(annotations)
+ .filter(([name,]) => name!=='nuc')
+ .map(([annotationKey, annotation]) => {
+ const geneName = annotation.gene || annotationKey;
+ if (!(geneName in annotationsPerGene)) annotationsPerGene[geneName] = {};
+ const gene = annotationsPerGene[geneName] as JsonAnnotations; // TODO - why do I need to cast?
+ gene[annotationKey] = annotation;
+ })
+
+ const nextColor = nextColorGenerator();
+
+ const strandsObserved: Set = new Set();
+
+ const genes = Object.entries(annotationsPerGene)
+ .map(([geneName, cdsAnnotations]) => {
+ const gene: Gene = {
+ name: geneName,
+ cds: []
+ }
+ const defaultColor = nextColor.next().value; // default colours are per-gene (not per-CDS)
+
+ gene.cds = Object.entries(cdsAnnotations)
+ .map(([cdsName, annotation]) => cdsFromAnnotation(cdsName, annotation, rangeGenome, defaultColor))
+ .filter((cds) => cds.name!=='__INVALID__');
+ gene.cds.forEach((cds) => strandsObserved.add(cds.strand));
+ return gene;
+ })
+
+ const metadata: ChromosomeMetadata = {
+ strandsObserved,
+ posStrandStackHeight: calculateStackPosition(genes, '+'),
+ negStrandStackHeight: calculateStackPosition(genes, '-'),
+ }
+
+ const chromosome: Chromosome = {
+ name: 'source',
+ range: rangeGenome,
+ genes,
+ metadata
+ }
+ return [chromosome];
+}
+
+export const entropyCreateState = (genomeAnnotations: JsonAnnotations) => {
+ if (genomeAnnotations) {
+ try {
+ return {
+ showCounts: false,
+ loaded: true,
+ genomeMap: genomeMap(genomeAnnotations)
+ };
+ } catch (e) {
+ if (e instanceof Error) console.error(e.message);
+ console.error("Genotype colorings and the entropy panel will not be available.")
+ // fallthrough
+ }
+ }
+ return defaultEntropyState();
+};
+
+
+function validColor(color:(string|undefined)) {
+ if (!color) return false;
+ return color; // TODO XXX
+}
+
+function* nextColorGenerator() {
+ let i=0;
+ while (true) {
+ yield genotypeColors[i++] as string;
+ if (i===genotypeColors.length) i=0;
+ }
+}
+
+/**
+ * Returns a CDS object parsed from the provided JsonAnnotation block
+ */
+function cdsFromAnnotation(cdsName: string, annotation: JsonAnnotation, rangeGenome: RangeGenome, defaultColor: (string|void)): CDS {
+ const invalidCds: CDS = {
+ name: '__INVALID__',
+ length: 0,
+ segments: [],
+ strand: '+',
+ isWrapping: false,
+ color: '#000',
+ }
+ const strand = annotation.strand;
+ if (!(strand==='+' || strand==='-')) {
+ /** GFF allows for strands '?' (features whose strandedness is relevant, but unknown) and '.' (features that are not stranded),
+ * which are represented by augur as '?' and null, respectively. (null comes from `None` in python.)
+ * In both cases it's not a good idea to make an assumption of strandedness, or to assume it's even a CDS. */
+ console.error(`[Genome annotation] ${cdsName} has strand ` +
+ (Object.prototype.hasOwnProperty.call(annotation, "strand") ? String(annotation.strand) : '(missing)') +
+ ". This CDS will be ignored.");
+ return invalidCds;
+ }
+ const positive = strand==='+';
+
+ let length = 0; // rangeLocal length
+ const segments: CdsSegment[] = [];
+ if (annotation.start && annotation.end) {
+ /* The simplest case is where a JSON annotation block defines a
+ contiguous CDS, however it may be a wrapping CDS (i.e. cds end > genome
+ end */
+ if (annotation.end <= rangeGenome[1]) {
+ length = annotation.end-annotation.start+1;
+ segments.push({
+ segmentNumber: 1,
+ rangeLocal: [1, length],
+ rangeGenome: [annotation.start, annotation.end],
+ phase: 0,
+ frame: _frame(annotation.start, annotation.end, 0, rangeGenome[1], positive),
+ })
+ } else {
+ /* We turn this into the equivalent JsonSegments to minimise code duplication */
+ annotation.segments = [
+ {start: annotation.start, end: rangeGenome[1]},
+ {start: 1, end: annotation.end-rangeGenome[1]}
+ ]
+ /* -ve strand segments are 3' -> 5', so segment[0] is at the start of the genome */
+ if (!positive) annotation.segments.reverse();
+ }
+ }
+
+ if (annotation.segments && Array.isArray(annotation.segments)) {
+ if (segments.length) { // indicates we've already created one from start/stop coords
+ console.error(`[Genome annotation] ${cdsName} defines both start/stop and segments, but they are mutually exclusive.`);
+ return invalidCds;
+ }
+ let previousRangeLocalEnd = 0;
+ let segmentNumber = 1;
+ for (const segment of annotation.segments) {
+ /* The segments, as defined in the JSON, must be ordered according to the order the appear in the CDS.
+ For -ve strand that's 3' to 5'. The rangeGenome within each segment is always 5' to 3'. */
+ const segmentLength = segment.end - segment.start + 1; // in nucleotides
+ /* phase is the number of nucs we need to add to the so-far-observed length to make it mod 3 */
+ const phase = length%3===0 ? 0 : (length%3===1 ? 2 : 1);
+
+ const s: CdsSegment = {
+ segmentNumber: segmentNumber++,
+ rangeLocal: [previousRangeLocalEnd+1, previousRangeLocalEnd+segmentLength],
+ rangeGenome: [segment.start, segment.end],
+ phase,
+ frame: _frame(segment.start, segment.end, phase, rangeGenome[1], positive)
+ };
+ segments.push(s);
+ length += segmentLength;
+ previousRangeLocalEnd += segmentLength;
+
+ }
+ }
+ if (!segments.length) {
+ console.error(`[Genome annotation] ${cdsName} requires either start+end or segments to be defined`);
+ return invalidCds;
+ }
+
+ if (length%3) {
+ console.error(`[Genome annotation] ${cdsName} has length ${length} which is not a multiple of 3`);
+ return invalidCds; // skip parsing of this CDS's annotation block
+ }
+
+ const cds: CDS = {
+ name: cdsName,
+ length,
+ segments,
+ strand,
+ isWrapping: _isCdsWrapping(strand, segments),
+ color: validColor(annotation.color) || defaultColor || '#000',
+ }
+ if (typeof annotation.display_name === 'string') {
+ cds.displayName = annotation.display_name;
+ }
+ if (typeof annotation.description === 'string') {
+ cds.description = annotation.description;
+ }
+ return cds
+}
+
+/**
+ * Calculates the (open reading) frame the provided segment is in.
+ * For +ve strand this is calculated 5'->3', for -ve strand it's 3'->5'.
+ * The frame is calculated once the CDS is back in phase.
+ * start: 1 based, rangeGenome[0] of the segment
+ * end: 1 based, rangeGenome[1] of the segment
+ * start < end always
+ * genomeLength: 1 based
+ * positiveStrand: boolean
+ * Returns a number in the set {0, 1, 2}
+ */
+function _frame(start:number, end:number, phase: number, genomeLength:number, positiveStrand:boolean) {
+ return positiveStrand ?
+ (start+phase-1)%3 :
+ Math.abs((end-phase-genomeLength)%3);
+}
+
+/**
+ * Given a list of genes (each with CDSs), we want to calculate and assign each
+ * CDS a "stack position" such that each CDS can be plotted with no overlaps.
+ * All segments of a given CDS will have the same stack position. (Stack here
+ * refers to this being a stacking problem.) The stack position starts at 1.
+ * Returns the maximum position observed.
+ */
+function calculateStackPosition(genes: Gene[], strand: (Strand|null) = null):number {
+ /* List of CDSs, sorted by their earliest occurrence in the genome (for any segment) */
+ let cdss = genes.reduce((acc: CDS[], gene) => [...acc, ...gene.cds], []);
+ if (strand) {
+ cdss = cdss.filter((cds) => cds.strand===strand);
+ }
+ cdss.sort((a, b) =>
+ Math.min(...a.segments.map((s) => s.rangeGenome[0])) < Math.min(...b.segments.map((s) => s.rangeGenome[0])) ?
+ -1 : 1
+ );
+ let stack: CDS[] = []; // current CDSs in stack
+ for (const newCds of cdss) {
+ /* remove any CDS from the stack which has ended (completely) before this one starts */
+ const newMinStart = Math.min(...newCds.segments.map((s) => s.rangeGenome[0]));
+ stack = stack.filter((cds) =>
+ !(Math.max(...cds.segments.map((s) => s.rangeGenome[1])) < newMinStart)
+ );
+ // console.log("\nstacK:", stack.map((cds) => cds.name).join(", "));
+ // console.log("\tconsideing", newCds.name)
+ /* If there are any empty slots in the current stack, take the lowest! */
+ const existingY = stack.map((cds) => cds.stackPosition || 0).sort();
+ const empty = _emptySlots(existingY);
+ if (empty) {
+ // console.log("\t\ttaking empty slot", empty)
+ newCds.stackPosition = empty;
+ stack.push(newCds);
+ continue;
+ }
+ /* If any CDS in the stack has a single space (i.e. between 2 segments) into which the entire
+ new CDS (i.e. all segments of newCds) can fit into, then we can re-use that position */
+ const reuseablePosition = _fitCdssTogether(stack, newCds);
+ if (reuseablePosition) {
+ // console.log("\t\treusing position", reuseablePosition)
+ newCds.stackPosition = reuseablePosition;
+ stack.push(newCds);
+ continue;
+ }
+ /* fallthrough: use a higher position! */
+ newCds.stackPosition = (existingY[existingY.length-1] || 0) + 1;
+ // console.log("\t\tAdding to the top!", newCds.stackPosition)
+ stack.push(newCds);
+ }
+
+ return Math.max(...cdss.map((cds) => cds.stackPosition || 0));
+}
+
+/**
+ * Given an array of sorted integers, if there are any spaces (starting with 1)
+ * then return the value which can fill that space. Returns 0 if no spaces.
+ */
+function _emptySlots(values: number[]):number {
+ if ((values[0] || 0) > 1) return 1;
+ for (let i=1; i1) return a+1;
+ }
+ return 0;
+}
+
+/**
+ * If the newCds completely (i.e. all of its segments) fits inside a single
+ * between-segment space of an existing segment, then return the stackPosition
+ * of that existing CDS. Otherwise return 0;
+ */
+function _fitCdssTogether(existing: CDS[], newCds: CDS):number {
+ const a = Math.min(...newCds.segments.map((s) => s.rangeGenome[0]));
+ const b = Math.max(...newCds.segments.map((s) => s.rangeGenome[1]));
+ for (const cds of existing) {
+ if (cds.segments.length===1) continue;
+ const segments = [...cds.segments]
+ segments.sort((a, b) => a.rangeGenome[0]b) {
+ /* yes - we can fit into the same position as this cds, but check if
+ another CDS in the stack is occupying this space first! */
+ let spaceTaken = false;
+ existing.forEach((el) => {
+ if (el.stackPosition!==stackPosition) return; // only consider same row
+ if (spaceTaken) return; // saves time
+ el.segments.forEach((s) => {
+ if (s.rangeGenome[1]>=a && s.rangeGenome[0]<=b) {
+ spaceTaken = true
+ }
+ })
+ })
+ if (!spaceTaken) {
+ return stackPosition;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+
+/* Does a CDS wrap the origin? */
+function _isCdsWrapping(strand: Strand, segments: CdsSegment[]): boolean {
+ const positive = strand==='+';
+ // segments ordered to guarantee rangeLocal will always be greater (than the previous segment)
+ let prevSegment;
+ for (const segment of segments) {
+ if (prevSegment) {
+ if (positive && prevSegment.rangeGenome[0] > segment.rangeGenome[0]) {
+ return true;
+ }
+ if (!positive && prevSegment.rangeGenome[0] < segment.rangeGenome[0]) {
+ return true;
+ }
+ }
+ prevSegment = segment;
+ }
+ return false; // fallthrough
+}
diff --git a/src/util/getGenotype.js b/src/util/getGenotype.js
index acdc1b418..8310dbae1 100644
--- a/src/util/getGenotype.js
+++ b/src/util/getGenotype.js
@@ -25,34 +25,35 @@ export const encodeColorByGenotype = ({ gene, positions }) => {
* gt-nuc_142,144 → { gene: "nuc", aa: false, positions: [142,144] }
* gt-HA1_144,142 → { gene: "HA1", aa: true, positions: [144,142] }
*/
-export const decodeColorByGenotype = (colorBy, geneLengths) => {
- // If we're passed a map of gene name → length, then validate the decoded
+export const decodeColorByGenotype = (colorBy, genomeMap) => {
+ // If we're passed a genomeMap, then validate the decoded
// gene name and positions. Otherwise, just decode without validation.
- const validate = typeof geneLengths === "object" && Object.keys(geneLengths).length;
+ const validate = !!genomeMap;
// Split the encoded string into tokens of gene and positions.
const match = colorBy.match(/^gt-(.+)_([0-9,]+)$/);
if (match) {
- const [, gene, encodedPositions] = match;
- const geneLength = validate ? geneLengths[gene] : 'Infinity';
+ const [, cdsName, encodedPositions] = match;
+
+ const geneLength = validate ? getCdsLength(genomeMap, cdsName) : 'Infinity';
if (validate && !geneLength) {
- console.error("decodeColorByGenotype failed: no gene length", colorBy, gene, geneLengths);
+ console.error(`decodeColorByGenotype failed for color-by ${colorBy} (cds name: ${cdsName}) ` +
+ "as it wasn't found in the genome map;");
return null;
}
const positions = decodePositions(encodedPositions, geneLength);
-
if (!positions.length) {
console.error("decodeColorByGenotype failed: no valid positions", colorBy, encodedPositions, geneLength);
return null;
}
return {
- gene,
+ gene: cdsName,
positions,
- aa: gene !== nucleotide_gene
+ aa: cdsName!==nucleotide_gene,
};
}
@@ -109,3 +110,36 @@ export const makeGenotypeLabel = (colorBy) => {
if (!genotype) return false;
return `Genotype ${genotype.gene}: ${genotype.positions.join(", ")}`;
};
+
+export const getCdsFromGenotype = (name, genomeMap) => {
+ if (!Array.isArray(genomeMap) || !genomeMap.length || !name) return null;
+ if (name==='nuc') return nucleotide_gene;
+ for (const gene of genomeMap[0].genes) {
+ for (const cds of gene.cds) {
+ if (cds.name===name) {
+ return cds;
+ }
+ }
+ }
+ return null;
+}
+
+/**
+ * Returns the length of the genome (in nucleotides, if cdsName='nuc')
+ * or the length of the CDS (in codons, including the stop codon)
+ * @param {GenomeAnnotation} genomeMap
+ * @param {string} cdsName (may include "nuc")
+ * @returns {number}
+ */
+export function getCdsLength(genomeMap, cdsName) {
+ if (cdsName===nucleotide_gene) {
+ return genomeMap[0].range[1];
+ }
+ for (const gene of genomeMap[0].genes) {
+ for (const cds of gene.cds) {
+ // length is checked to be a multiple of 3 when genomeMap is created
+ if (cds.name===cdsName) return cds.length/3;
+ }
+ }
+ return 0; // should never happen, as the cdsName originates from the genomeMap
+}
\ No newline at end of file
diff --git a/src/util/globals.js b/src/util/globals.js
index 80d4251c4..aaf6281c9 100644
--- a/src/util/globals.js
+++ b/src/util/globals.js
@@ -40,7 +40,6 @@ export const reallyBigNumber = 10000000;
export const LBItime_window = 0.5;
export const LBItau = 0.0005;
export const attemptUntangle = false;
-export const defaultMutType = "aa";
export const nucleotide_gene = "nuc";
export const plot_frequencies = false;
export const genericDomain = [0, 0.111, 0.222, 0.333, 0.444, 0.555, 0.666, 0.777, 0.888, 1.0];
@@ -248,3 +247,5 @@ const aminoAcids = {
};
export const getAminoAcidName = (x) => aminoAcids[x.toUpperCase()] || "Unknown";
+
+export const equalArrays = (a,b) => a.length===b.length && a.every((el, idx) => b[idx]===el);
diff --git a/src/util/setGenotype.js b/src/util/setGenotype.js
index c57904453..b543c9165 100644
--- a/src/util/setGenotype.js
+++ b/src/util/setGenotype.js
@@ -67,7 +67,7 @@ export const setGenotype = (nodes, prot, positions, rootSequence) => {
// console.log(`set ${ancNodes.length} nodes to the ancestral state: ${ancState}`)
};
-export const orderOfGenotypeAppearance = (nodes, mutType) => {
+export const orderOfGenotypeAppearance = (nodes, aaGenotype) => {
const seen = {};
nodes.forEach((n) => {
let numDate = getTraitFromNode(n, "num_date");
@@ -80,7 +80,7 @@ export const orderOfGenotypeAppearance = (nodes, mutType) => {
ordered.sort((a, b) => seen[a] < seen[b] ? -1 : 1);
let orderedBases;
// remove 'non-bases' (X - N)
- if (mutType === "nuc") {
+ if (!aaGenotype) {
orderedBases = ordered.filter((x) => x !== "X" && x !== "-" && x !== "N");
} else {
orderedBases = ordered.filter((x) => x !== "X" && x !== "-");
@@ -89,7 +89,7 @@ export const orderOfGenotypeAppearance = (nodes, mutType) => {
if (ordered.indexOf("-") !== -1) {
orderedBases.push("-");
}
- if (ordered.indexOf("N") !== -1 && mutType === "nuc") {
+ if (ordered.indexOf("N") !== -1 && !aaGenotype) {
orderedBases.push("N");
}
if (ordered.indexOf("X") !== -1) {
diff --git a/src/util/treeMiscHelpers.js b/src/util/treeMiscHelpers.js
index 9cd7cfaa0..80acb6fa8 100644
--- a/src/util/treeMiscHelpers.js
+++ b/src/util/treeMiscHelpers.js
@@ -273,27 +273,22 @@ export const getTipChanges = (tipNode) => {
* Returns a function which will sort a list, where each element in the list
* is a gene name. Sorted by start position of the gene, with "nuc" first.
*/
-export const sortByGeneOrder = (genomeAnnotations) => {
- if (!(genomeAnnotations instanceof Object)) {
- return (a, b) => {
- if (a==="nuc") return -1;
- if (b==="nuc") return 1;
- return 0;
- };
- }
- const geneOrder = Object.entries(genomeAnnotations)
- .sort((a, b) => {
- if (b[0]==="nuc") return 1; // show nucleotide "gene" first
- if (a[1].start < b[1].start) return -1;
- if (a[1].start > b[1].start) return 1;
- return 0;
- })
- .map(([name]) => name);
-
+export const sortByGeneOrder = (genomeMap) => {
+ if (!genomeMap) return () => 0;
+ /* Sort CDSs based on the genome position of the start codon */
+ const cdsPos = genomeMap[0].genes.map((gene) =>
+ gene.cds.map((cds) => [cds.name, cds.segments[0].rangeGenome[cds.strand==='+' ? 0 : 1]])
+ ).flat();
+ cdsPos.sort((a, b) => a[1]>b[1] ? 1 : -1)
+ const order = {};
+ cdsPos.forEach(([name,], idx) => {order[name] = idx+1;});
+ order.nuc=0; // Nuc is always first
+ /* Returned function takes two CDS names so it can be used as the sort function
+ for an array of CDS names */
return (a, b) => {
- if (geneOrder.indexOf(a) < geneOrder.indexOf(b)) return -1;
- if (geneOrder.indexOf(a) > geneOrder.indexOf(b)) return 1;
- return 0;
+ if (order[a]===undefined) return 1;
+ if (order[b]===undefined) return -1;
+ return order[a] - order[b];
};
};
diff --git a/test/annotation-parsing.test.js b/test/annotation-parsing.test.js
new file mode 100644
index 000000000..2d30f7c11
--- /dev/null
+++ b/test/annotation-parsing.test.js
@@ -0,0 +1,286 @@
+import { genomeMap } from "../src/util/entropyCreateStateFromJsons";
+import dataset from './data/test_complex-genome-annotation.json';
+import { getNucCoordinatesFromAaPos, getCdsRangeLocalFromRangeGenome,
+ nucleotideToAaPosition} from "../src/util/entropy";
+
+const genome = genomeMap(dataset.meta.genome_annotations)
+const chromosome = genome[0];
+
+test("Chromosome coordinates", () => {
+ expect(chromosome.range[0]).toBe(1);
+ expect(chromosome.range[1]).toBe(100);
+});
+
+test("+ve strand CDS with a single segment", () => {
+ const cds = getCds('pos-single')
+ const length = 2*3; // expect CDS is 2 AA long, which is 2*3 nucleotides
+ /* Test certain properties -- we don't care about colour, display name for these tests */
+ expect(cds).toEqual(expect.objectContaining({
+ length, strand: '+',
+ isWrapping: false,
+ segments: [
+ {rangeGenome: [23, 28], rangeLocal: [1, length], phase: 0, frame: 1, segmentNumber: 1}
+ ]
+ }))
+});
+
+test("-ve strand CDS with a single segment", () => {
+ const cds = getCds('neg-single');
+ const length = 3*3;
+ expect(cds).toEqual(expect.objectContaining({
+ length, strand: '-',
+ isWrapping: false,
+ segments: [
+ {rangeGenome: [72, 80], rangeLocal: [1, length], phase: 0, frame: 2, segmentNumber: 1}
+ ]
+ }))
+});
+
+test("+ve strand CDS which wraps the origin", () => {
+ const cds = getCds('pos-wrapping')
+ const length = 6*3;
+ expect(cds).toEqual(expect.objectContaining({
+ length, strand: '+',
+ isWrapping: true,
+ segments: [
+ {rangeGenome: [93, 100], rangeLocal: [1, 8], phase: 0, frame: 2, segmentNumber: 1},
+ {rangeGenome: [1, 10], rangeLocal: [9, length], phase: 1, frame: 1, segmentNumber: 2},
+ ]
+ }))
+});
+
+test("-ve strand CDS which wraps the origin", () => {
+ const cds = getCds('neg-wrapping')
+ const length = 6*3;
+ expect(cds).toEqual(expect.objectContaining({
+ length, strand: '-',
+ isWrapping: true,
+ segments: [
+ // segment order is based on the direction of the CDS, so for -ve strand it's 3' to 5'
+ // but within each segment the rangeGenome is still 5' to 3'
+ {rangeGenome: [1, 8], rangeLocal: [1, 8], phase: 0, frame: 2, segmentNumber: 1},
+ {rangeGenome: [91, 100], rangeLocal: [9, length], phase: 1, frame: 1, segmentNumber: 2},
+ ]
+ }))
+});
+
+test("+ve strand CDS with multiple (non-wrapping) segments", () => {
+ const cds = getCds('pos-multi')
+ const length = 7*3;
+ expect(cds).toEqual(expect.objectContaining({
+ length, strand: '+',
+ isWrapping: false,
+ segments: [
+ // -1 frameshift (e.g. ribosome slip) between 1st & 2nd segments
+ {rangeGenome: [31, 36], rangeLocal: [1, 6], phase: 0, frame: 0, segmentNumber: 1}, // 2 amino acids
+ {rangeGenome: [36, 43], rangeLocal: [7, 14], phase: 0, frame: 2, segmentNumber: 2}, // 2 2/3 amino acids
+ {rangeGenome: [63, 69], rangeLocal: [15, length], phase: 1, frame: 0, segmentNumber: 3} // 1/3 + 2 amino acids
+ ]
+ }))
+});
+
+
+test("-ve strand CDS with multiple (non-wrapping) segments", () => {
+ const cds = getCds('neg-multi')
+ const length = 8*3;
+ expect(cds).toEqual(expect.objectContaining({
+ length, strand: '-',
+ isWrapping: false,
+ segments: [
+ // segment order is based on the direction of the CDS, so for -ve strand it's 3' to 5'
+ // but within each segment the rangeGenome is still 5' to 3'
+ // This example has a -1 frameshift between 1st & 2nd segments (so nuc 53 is in both)
+ // and then a 27nt jump to the 3rd segment.
+ {rangeGenome: [53, 60], rangeLocal: [1, 8], phase: 0, frame: 1, segmentNumber: 1}, // 2 2/3 amino acids
+ {rangeGenome: [46, 53], rangeLocal: [9, 16], phase: 1, frame: 0, segmentNumber: 2}, // 1/3 + 2 + 1/3 amino acids
+ {rangeGenome: [12, 19], rangeLocal: [17, length], phase: 2, frame: 2, segmentNumber: 3} // 2/3 + 2 amino acids
+ ]
+ }))
+});
+
+const aaToNuc = {
+ 'pos-single': [
+ [1, [23,24,25]], /* The codon for the first AA is at genome coordinates 23,24,25 (1-based) */
+ [2, [26,27,28]]
+ ],
+ 'pos-multi': [
+ [2, [34,35,36]], [3, [36,37,38]], [5, [42,43,63]], [7, [67,68,69]]
+ ],
+ 'pos-wrapping': [
+ [3, [99,100,1]]
+ ],
+ 'neg-single': [
+ [1, [80,79,78]], [3, [74,73,72]]
+ ],
+ 'neg-multi': [
+ [1, [60,59,58]], [3, [54,53, 53]], [6, [46,19,18]]
+ ],
+ 'neg-wrapping': [
+ [1, [8,7,6]], [3, [2,1,100]], [6, [93,92,91]]
+ ]
+}
+
+describe('AA positions mapped to nucleotide positions on the genome', () => {
+ for (const [name, data] of Object.entries(aaToNuc)) {
+ const cds = getCds(name);
+ test(prettifyName(name), () => {
+ data.forEach(([aaPos, nucPosns]) => {
+ const res = getNucCoordinatesFromAaPos(cds, aaPos);
+ expect(res.nucCoordinates).toStrictEqual(nucPosns);
+ })
+ })
+ }
+})
+
+const genomeZoomToCdsLocalZoom = {
+ 'pos-single': [
+ // genome zoom bounds cds rangeLocal valid?
+ [[21, 25], [[1, 6], false]],
+ [[21, 30], [[1, 6], false]],
+ [[25, 30], [[1, 6], false]],
+ [[23, 28], [[1, 6], true]],
+ [[24, 28], [[1, 6], true]],
+ [[27, 28], [[4, 6], true]],
+ [[26, 27], [[4, 6], true]],
+ [[24, 26], [[1, 6], true]],
+ ],
+ 'pos-multi': [
+ // either (or both) zoom bound(s) beyond the entire CDS is invalid
+ [[30, 35], [[1, 21], false]],
+ [[30, 70], [[1, 21], false]],
+ [[31, 70], [[1, 21], false]],
+ // zoom bounds match CDS bounds
+ [[31, 69], [[1, 21], true]],
+ // Test zoom bounds where one falls in-between CDS segments
+ // note that local pos 15 is genome nuc 63
+ [[31, 60], [[1, 15], true]],
+ // note that local pos 13,14 are genome pos 42,43
+ [[45, 69], [[13, 21], true]],
+ // Each zoom bound is within a segment
+ [[31, 39], [[1, 12], true]],
+ [[35, 65], [[4, 18], true]],
+ [[63, 67], [[13, 21], true]],
+ // Pos 36 is in 2 segments and 2 different AAs. The results here are perhaps
+ // not the intuitive ones, and we can improve the algorithm if we genuinely
+ // run into this edge case.
+ [[36, 67], [[7, 21], true]],
+ [[31, 36], [[1, 9], true]],
+ ],
+ 'pos-wrapping': [
+ // either (or both) zoom bound(s) beyond the entire CDS is invalid
+ [[20, 90], [[1, 18], false]],
+ [[10, 90], [[1, 18], false]],
+ [[20, 99], [[1, 18], false]],
+ // zoom bounds match CDS bounds
+ [[10, 93], [[1, 18], true]],
+ // Valid coords - i.e. within the extent of the CDS
+ [[10, 98], [[4, 18], true]],
+ [[1, 96], [[4, 9], true]],
+ // Note that the following will fail, because the UI forces that the zoom bounds
+ // are either side of the origin
+ // [[4, 8], [[10, 18], true]],
+ ],
+ 'neg-single': [
+ // following have one (or both) zoom coords beyond the CDS => invalid
+ [[71, 77], [[1, 9], false]],
+ [[71, 100], [[1, 9], false]],
+ [[10, 77], [[1, 9], false]],
+ [[1, 100], [[1, 9], false]],
+ // following are valid as both zoom coords are inside (or on) CDS boundary
+ [[72, 80], [[1, 9], true]],
+ [[72, 76], [[4, 9], true]],
+ [[74, 76], [[4, 9], true]],
+ [[77, 78], [[1, 6], true]],
+ ],
+ 'neg-multi': [
+ // either (or both) zoom bound(s) beyond the entire CDS is invalid
+ [[11, 17], [[1, 24], false]],
+ [[11, 70], [[1, 24], false]],
+ [[50, 70], [[1, 24], false]],
+ // zoom bounds match CDS bounds
+ [[12, 60], [[1, 24], true]],
+ // Test zoom bounds where one falls in-between CDS segments
+ [[12, 40], [[16, 24], true]],
+ [[40, 60], [[1, 18], true]],
+ // Each zoom bound is within a segment
+ [[16, 55], [[4, 21], true]],
+ [[48, 59], [[1, 15], true]],
+ [[12, 16], [[19, 24], true]],
+ // Pos 53 is in 2 segments but the same AA
+ [[53, 60], [[1, 9], true]],
+ [[12, 53], [[7, 24], true]],
+ ],
+ 'neg-wrapping': [
+ // either (or both) zoom bound(s) beyond the entire CDS is invalid
+ [[20, 90], [[1, 18], false]],
+ [[5, 90], [[1, 18], false]],
+ [[20, 95], [[1, 18], false]],
+ // zoom bounds match CDS bounds
+ [[8, 91], [[1, 18], true]],
+ // Valid coords - i.e. within the extent of the CDS
+ [[5, 95], [[4, 15], true]],
+ // Note that the following will fail, because the UI forces that the zoom bounds
+ // are either side of the origin
+ // [[91, 99], [[10, 18], true]],
+ ],
+}
+
+describe('Genome zoom bounds mapped to cds local coordinates', () => {
+ for (const [name, data] of Object.entries(genomeZoomToCdsLocalZoom)) {
+ const cds = getCds(name);
+ test(prettifyName(name), () => {
+ data.forEach(([genomeZoomBounds, expectedResult]) => {
+ expect(getCdsRangeLocalFromRangeGenome(cds, genomeZoomBounds))
+ .toStrictEqual(expectedResult);
+ })
+ })
+ }
+})
+
+
+/**
+ * Note that order of CDSs (if multiple matches) is the order they
+ * appear in the JSON
+ */
+const nucleotideToAaPositionData = [
+ [23, [{cds: getCds('pos-single'), nucLocal: 1, aaLocal: 1}]],
+ [75, [{cds: getCds('neg-single'), nucLocal: 6, aaLocal: 2}]],
+ [9, [{cds: getCds('pos-wrapping'), nucLocal: 17, aaLocal: 6}]],
+ [92, [{cds: getCds('neg-wrapping'), nucLocal: 17, aaLocal: 6}]],
+ [4, [{cds: getCds('pos-wrapping'), nucLocal: 12, aaLocal: 4},
+ {cds: getCds('neg-wrapping'), nucLocal: 5, aaLocal: 2}]],
+ [93, [{cds: getCds('pos-wrapping'), nucLocal: 1, aaLocal: 1},
+ {cds: getCds('neg-wrapping'), nucLocal: 16, aaLocal: 6}]],
+ [48, [{cds: getCds('neg-multi'), nucLocal: 14, aaLocal: 5}]],
+ [63, [{cds: getCds('pos-multi'), nucLocal: 15, aaLocal: 5}]],
+ // Pos 36 appears in 2 segments of the pos-multi CDS, in different codons
+ [36, [{cds: getCds('pos-multi'), nucLocal: 6, aaLocal: 2},
+ {cds: getCds('pos-multi'), nucLocal: 7, aaLocal: 3}]],
+ // Pos 53 appears in 2 segments of the neg-multi CDS, both in the same codon
+ [53, [{cds: getCds('neg-multi'), nucLocal: 8, aaLocal: 3},
+ {cds: getCds('neg-multi'), nucLocal: 9, aaLocal: 3}]],
+]
+
+
+test("Single nucleotide positions are correctly mapped to amino acid positions", () => {
+ for (const [nucPos, expectedResult] of nucleotideToAaPositionData) {
+ expect(nucleotideToAaPosition(genome, nucPos)).toStrictEqual(expectedResult);
+ }
+});
+
+
+/** Assumes that the gene name wasn't specified in the JSON and thus
+ * we have one gene with one CDS
+ */
+function getCds(name) {
+ const genes = chromosome.genes.filter((g) => g.name===name);
+ if (genes.length!==1) throw new Error("Multiple (or no!) matching genes");
+ if (genes[0].cds.length!==1) throw new Error("Multiple (or no!) matching CDSs");
+ const cds = genes[0].cds[0];
+ expect(cds.name).toBe(name)
+ return cds;
+}
+
+function prettifyName(name) {
+ return `${name.startsWith('pos')?'+ve':'-ve'} strand CDS ${name.replace(/.+-/,'')}`;
+}
\ No newline at end of file
diff --git a/test/data/test_complex-genome-annotation.json b/test/data/test_complex-genome-annotation.json
new file mode 100644
index 000000000..8e82bc4ed
--- /dev/null
+++ b/test/data/test_complex-genome-annotation.json
@@ -0,0 +1,50 @@
+{
+ "version": "v2",
+ "meta": {
+ "title": "Test dataset for complex genome annotations",
+ "updated": "2023-07-09",
+ "display_defaults": {"panels": ["entropy"], "distance_measure": "div"},
+ "panels": ["tree", "entropy"],
+ "genome_annotations": {
+ "nuc": {"start": 1, "end": 100},
+ "pos-single": {"start": 23, "end": 28, "strand": "+",
+ "display_name": "Frame1 (F1); Mutations at aa1=nuc23-25, aa2=nuc26-28; "},
+ "pos-multi": {"segments": [{"start": 31, "end": 36}, {"start": 36, "end": 43}, {"start": 63, "end": 69}], "strand": "+",
+ "display_name": "Seg1/3: 31..36 F0 p0; Seg2/3: 36..43, F2, p0; Seg 3/3: 63..69 F2 p1; Mutations aa2 → nuc34-36, aa3 → nuc36-68, aa5: 42,43,63 aa7 → 67-69"},
+ "pos-wrapping": {"start": 93, "end": 110, "strand": "+",
+ "display_name": "Breaks down to 2 segments: 93-100 (F2), 1-10 (phase 1, F1). Mutations: aa3: 99,100,1"},
+ "neg-single": {"start": 72, "end": 80, "strand": "-",
+ "display_name": "Frame2 (R2); Mutations at aa1=78-80, aa3=nuc72-74;"},
+ "neg-multi": {"segments": [{"start": 53, "end": 60}, {"start": 46, "end": 53}, {"start": 12, "end": 19}], "strand": "-",
+ "display_name": "Segment order important. Seg1/3: 53..60 R1 p0; Seg2/3: 46..53, F0, p1; Seg 3/3: 12..19 R2 p2; Mutations: aa1 → nuc60,59,58, aa3 → nuc54,53,53 (slippage), aa6 → nuc46,19,18"},
+ "neg-wrapping": {"start": 91, "end": 108, "strand": "-",
+ "display_name": "Breaks down to 2 segments: 1..8 R2 p0 and 91..100 R1 p1 (order important). Mutations: aa1 → nuc8,7,6, aa3 → nuc2,1,100, aa6 → nuc93,92,91"}
+ }
+ },
+ "tree": {
+ "name": "root",
+ "node_attrs": {"div": 0},
+ "children": [
+ {
+ "name": "A",
+ "node_attrs": {"div": 1},
+ "branch_attrs": {"mutations": {
+ "nuc": ["A20T"],
+ "pos-single": ["A1B", "G2H"],
+ "pos-multi": ["A2B", "G3H", "L7S"],
+ "pos-wrapping": ["L3S"]
+ }}
+ },
+ {
+ "name": "B",
+ "node_attrs": {"div": 1},
+ "branch_attrs": {"mutations": {
+ "nuc": ["A20G"],
+ "neg-single": ["A1B", "G3H"],
+ "neg-multi": ["A1B", "G3H", "L6S"],
+ "neg-wrapping": ["A1B", "G3H", "L6S"]
+ }}
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/test/entropy.test.js b/test/entropy.test.js
new file mode 100644
index 000000000..946b1ca84
--- /dev/null
+++ b/test/entropy.test.js
@@ -0,0 +1,181 @@
+import { calcEntropyInView, getCdsByName } from "../src/util/entropy";
+import { treeJsonToState } from "../src/util/treeJsonProcessing";
+import {NODE_VISIBLE, NODE_NOT_VISIBLE, nucleotide_gene} from "../src/util/globals";
+
+function tree1() {
+ const nodes = treeJsonToState({
+ name: "ROOT",
+ children: [
+ {
+ name: "tipX",
+ branch_attrs: {mutations: {
+ nuc: ["A5T", "A10T"],
+ CDS2: ["L4M"]
+ }}
+ },
+ {
+ name: "tipY",
+ branch_attrs: {mutations: {
+ nuc: ["A5G"],
+ CDS2: ["L4P"]
+ }}
+ },
+ {
+ name: "internalNodeZ",
+ branch_attrs: {mutations: {
+ nuc: ["A5C", "A15T"],
+ CDS1: ["A1B"]
+ }},
+ children: [
+ {
+ name: "tipZ",
+ branch_attrs: {mutations: {
+ nuc: ["A5G"], // NOTE this is nonsensical - A→C followed by A→G
+ CDS2: ["L4S"] // Codon 4 should be stop, but auspice doesn't check
+ }}
+ }
+ ]
+ }
+ ]
+ }).nodes;
+
+ const visibility = nodes.map(() => NODE_VISIBLE)
+
+ const genomeMap = [
+ {
+ name: 'source',
+ range: [1,18],
+ genes: [
+ {
+ name: "GENE1", color: "NA",
+ cds: [
+ {
+ name: "CDS1", length: 9, strand: "+",
+ segments: [
+ {rangeGenome: [6,14], rangeLocal: [1, 9], phase: 0, frame: 2},
+ ]
+ }
+ ]
+ },
+ {
+ name: "GENE2", color: "NA",
+ cds: [
+ {
+ name: "CDS2", length: 12, strand: "-",
+ segments: [
+ {rangeGenome: [5,16], rangeLocal: [1, 12], phase: 0, frame: 1},
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+
+ return {nodes, visibility, genomeMap};
+}
+
+test("Basic mutation counts", () => {
+ const {nodes, visibility, genomeMap} = tree1();
+
+ /* Very simple counting of nucleotide changes across all nodes */
+ let [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, true);
+ expect(max).toBe(4);
+ expect(counts).toStrictEqual([{x:5,y:4}, {x:10,y:1}, {x:15,y:1}]);
+
+ [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS1'), true);
+ let result = [{codon:1,y:1}]
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(result);
+
+ [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS2'), true);
+ result = [{codon:4,y:3}]
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(result)
+});
+
+test("Visibility mask + counts", () => {
+ const {nodes, visibility} = tree1();
+
+ nodes.forEach((n, i) => {
+ if (n.name==='tipZ') visibility[i]=NODE_NOT_VISIBLE;
+ });
+ const [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, true);
+ expect(max).toBe(3);
+ expect(counts).toStrictEqual([{x:5,y:3}, {x:10,y:1}, {x:15,y:1}]);
+})
+
+test("Basic entropy calculations", () => {
+ const {nodes, visibility, genomeMap} = tree1();
+
+ let [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, false);
+ let result = [
+ {x:5, y:entropy(1,2)}, // T,G,G
+ {x:10, y:entropy(1,2)}, // T,A,A
+ {x:15, y:entropy(1,2)}] // A,A,T
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(stringifyY(result));
+
+ [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS1'), false);
+ result = [{codon:1, y:entropy(1,2)}]; // A,A,B
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(stringifyY(result));
+
+ [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS2'), false);
+ result = [{codon:4, y:entropy(1,1,1)}]; // M,P,S
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(stringifyY(result));
+});
+
+
+test("Visibility mask + entropy", () => {
+ const {nodes, visibility, genomeMap} = tree1();
+
+ nodes.forEach((n, i) => {
+ if (n.name==='tipX' || n.name==='tipY') visibility[i]=NODE_NOT_VISIBLE;
+ });
+ let [counts, max] = calcEntropyInView(nodes, visibility, nucleotide_gene, false);
+ let result = [
+ {x:5,y:entropy(1)}, // G
+ // NOTE - position 10 is not reported as no visible nodes have it
+ {x:15,y:entropy(1)}]; // T
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(stringifyY(result));
+
+
+ [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS1'), false);
+ result = [{codon:1,y:entropy(1)}] // B
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(stringifyY(result));
+
+ [counts, max] = calcEntropyInView(nodes, visibility, getCdsByName(genomeMap, 'CDS2'), false);
+ result = [{codon:4,y:entropy(1)}]; // B
+ expect(max).toBe(getMax(result));
+ expect(noFill(counts)).toStrictEqual(stringifyY(result));
+});
+
+
+function noFill(data) {
+ return data.map((d) => {
+ delete d.fill; // currently calculated based on gene order, but we'll remove this concept shortly
+ return d;
+ })
+}
+
+function entropy(...observations) {
+ let s = 0;
+ const total = observations.reduce((acc, cv) => acc+cv, 0);
+ observations.forEach((obs) => {
+ s-=obs/total * Math.log(obs/total);
+ });
+ return s
+}
+
+function stringifyY(data) {
+ data.forEach((d) => d.y = d.y.toFixed(3));
+ return data;
+}
+
+function getMax(data) {
+ return data.reduce((acc, cv) => cv.y>acc?cv.y:acc, 0)
+}
\ No newline at end of file