Skip to content

Commit

Permalink
Merge pull request #1455 from nextstrain/feat/ref-and-ann-from-tree-json
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored May 29, 2024
2 parents 0692bd4 + 44fb8a5 commit 9a172d1
Show file tree
Hide file tree
Showing 40 changed files with 876 additions and 284 deletions.
134 changes: 74 additions & 60 deletions packages/nextclade-cli/src/dataset/dataset_download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@ use color_eyre::{Section, SectionExt};
use eyre::{eyre, ContextCompat, Report, WrapErr};
use itertools::Itertools;
use log::{warn, LevelFilter};
use nextclade::analyze::virus_properties::{LabelledMutationsConfig, VirusProperties};
use nextclade::analyze::virus_properties::VirusProperties;
use nextclade::gene::gene_map::{filter_gene_map, GeneMap};
use nextclade::io::dataset::{Dataset, DatasetFiles, DatasetMeta, DatasetsIndexJson};
use nextclade::io::dataset::{Dataset, DatasetsIndexJson};
use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str};
use nextclade::io::file::create_file_or_stdout;
use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string};
use nextclade::run::nextclade_wasm::NextcladeParams;
use nextclade::run::nextclade_wasm::{NextcladeParams, NextcladeParamsOptional};
use nextclade::tree::tree::AuspiceTree;
use nextclade::utils::fs::list_files_recursive;
use nextclade::utils::option::OptionMapRefFallible;
use nextclade::utils::string::{format_list, surround_with_quotes, Indent};
use nextclade::{make_error, make_internal_error, o};
use std::collections::{BTreeMap, BTreeSet};
use std::collections::BTreeSet;
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek, Write};
use std::ops::Deref;
Expand All @@ -35,13 +35,16 @@ pub fn nextclade_get_inputs(
if input_dataset.is_file() && has_extension(input_dataset, "zip") {
dataset_zip_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else if input_dataset.is_file() && has_extension(input_dataset, "json") {
dataset_json_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else if input_dataset.is_dir() {
dataset_dir_load(run_args, input_dataset, cdses)
.wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}"))
} else {
make_error!(
"--input-dataset: path is invalid. \
Expected a directory path or a zip archive file path, but got: '{input_dataset:#?}'"
Expected a directory path, a zip file path or json file path, but got: '{input_dataset:#?}'"
)
}
} else {
Expand Down Expand Up @@ -119,14 +122,10 @@ pub fn dataset_zip_load(
.wrap_err("When reading pathogen JSON from dataset")?
.ok_or_else(|| eyre!("Pathogen JSON must always be present in the dataset but not found."))?;

let ref_record = read_from_path_or_zip(
&run_args.inputs.input_ref,
&mut zip,
&Some(&virus_properties.files.reference),
)?
.map_ref_fallible(read_one_fasta_str)
.wrap_err("When reading reference sequence from dataset")?
.ok_or_else(|| eyre!("Reference sequence must always be present in the dataset but not found."))?;
let ref_record = read_from_path_or_zip(&run_args.inputs.input_ref, &mut zip, &virus_properties.files.reference)?
.map_ref_fallible(read_one_fasta_str)
.wrap_err("When reading reference sequence from dataset")?
.ok_or_else(|| eyre!("Reference sequence must always be present in the dataset but not found."))?;

let gene_map = read_from_path_or_zip(
&run_args.inputs.input_annotation,
Expand Down Expand Up @@ -157,8 +156,8 @@ fn verify_dataset_files<'a, T: AsRef<str> + 'a + ?Sized>(
files_present: impl Iterator<Item = &'a T> + 'a,
) {
let declared: BTreeSet<&str> = [
Some(virus_properties.files.reference.as_str()),
Some(virus_properties.files.pathogen_json.as_str()),
virus_properties.files.reference.as_deref(),
virus_properties.files.pathogen_json.as_deref(),
virus_properties.files.genome_annotation.as_deref(),
virus_properties.files.tree_json.as_deref(),
virus_properties.files.examples.as_deref(),
Expand Down Expand Up @@ -238,8 +237,17 @@ pub fn dataset_dir_load(
let virus_properties = VirusProperties::from_path(input_pathogen_json)?;

let input_ref = input_ref
.clone()
.unwrap_or_else(|| dataset_dir.join(&virus_properties.files.reference));
.as_ref()
.cloned()
.or_else(|| {
virus_properties
.files
.reference
.as_ref()
.map(|reference| dataset_dir.join(reference))
})
.expect("Reference sequence is required but it is neither declared in the dataset's pathogen.json `.files` section, nor provided as a separate file");

let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?;

let gene_map = input_annotation
Expand Down Expand Up @@ -283,6 +291,51 @@ pub fn dataset_dir_load(
})
}

pub fn dataset_json_load(
run_args: &NextcladeRunArgs,
dataset_json: impl AsRef<Path>,
cdses: &Option<Vec<String>>,
) -> Result<NextcladeParams, Report> {
let dataset_json = dataset_json.as_ref();

let NextcladeRunInputArgs {
input_ref,
input_tree,
input_pathogen_json,
input_annotation,
..
} = &run_args.inputs;

let auspice_json = AuspiceTree::from_path(dataset_json).wrap_err("When reading Auspice JSON v2")?;

let overrides = {
let virus_properties = input_pathogen_json
.map_ref_fallible(VirusProperties::from_path)
.wrap_err("When parsing pathogen JSON")?;

let ref_record = input_ref
.map_ref_fallible(read_one_fasta)
.wrap_err("When parsing reference sequence")?;

let tree = input_tree
.map_ref_fallible(AuspiceTree::from_path)
.wrap_err("When parsing reference tree Auspice JSON v2")?;

let gene_map = input_annotation
.map_ref_fallible(GeneMap::from_path)
.wrap_err("When parsing genome annotation")?;

NextcladeParamsOptional {
ref_record,
gene_map,
tree,
virus_properties,
}
};

NextcladeParams::from_auspice(&auspice_json, &overrides, cdses)
}

pub fn dataset_individual_files_load(
run_args: &NextcladeRunArgs,
cdses: &Option<Vec<String>>,
Expand All @@ -297,41 +350,7 @@ pub fn dataset_individual_files_load(
.and_then(|input_pathogen_json| read_file_to_string(input_pathogen_json).ok())
.map_ref_fallible(VirusProperties::from_str)
.wrap_err("When reading pathogen JSON")?
.unwrap_or_else(|| {
// The only case where we allow pathogen.json to be missing is when there's no dataset and files are provided
// explicitly through args. Let's create a dummy value to avoid making the field optional,
// and avoid adding `Default` trait.
VirusProperties {
schema_version: "".to_owned(),
attributes: BTreeMap::default(),
shortcuts: vec![],
meta: DatasetMeta::default(),
files: DatasetFiles {
reference: "".to_owned(),
pathogen_json: "".to_owned(),
genome_annotation: None,
tree_json: None,
examples: None,
readme: None,
changelog: None,
rest_files: BTreeMap::default(),
other: serde_json::Value::default(),
},
default_cds: None,
cds_order_preference: vec![],
mut_labels: LabelledMutationsConfig::default(),
qc: None,
general_params: None,
alignment_params: None,
tree_builder_params: None,
phenotype_data: None,
aa_motifs: vec![],
versions: vec![],
version: None,
compatibility: None,
other: serde_json::Value::default(),
}
});
.unwrap_or_default();

let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?;

Expand Down Expand Up @@ -401,14 +420,9 @@ pub fn dataset_str_download_and_load(
.wrap_err("When reading pathogen JSON from dataset")?
.ok_or_else(|| eyre!("Required file not found in dataset: 'pathogen.json'. Please report it to dataset authors."))?;

let ref_record = read_from_path_or_url(
&http,
&dataset,
&run_args.inputs.input_ref,
&Some(dataset.files.reference.clone()),
)?
.map_ref_fallible(read_one_fasta_str)?
.wrap_err("When reading reference sequence from dataset")?;
let ref_record = read_from_path_or_url(&http, &dataset, &run_args.inputs.input_ref, &dataset.files.reference)?
.map_ref_fallible(read_one_fasta_str)?
.wrap_err("When reading reference sequence from dataset")?;

let gene_map = read_from_path_or_url(
&http,
Expand Down
3 changes: 1 addition & 2 deletions packages/nextclade-web/src/components/Error/ErrorContent.tsx
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import React, { useCallback, useMemo, useState } from 'react'
import { Button, Col, Row } from 'reactstrap'
import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl'
import styled from 'styled-components'
import { CopyToClipboard } from 'react-copy-to-clipboard'
import { FaClipboardCheck, FaClipboardList } from 'react-icons/fa'

import { ErrorGeneric } from 'src/components/Error/error-types/ErrorGeneric'
import { ErrorNetworkConnectionFailure } from 'src/components/Error/error-types/ErrorNetworkConnectionFailure'
import { ErrorNetworkRequestFailure } from 'src/components/Error/error-types/ErrorNetworkRequestFailure'
import { NextcladeV2ErrorContent } from 'src/components/Error/error-types/NextcladeV2ErrorContent'
import { ErrorContentExplanation, getErrorReportText } from 'src/components/Error/ErrorContentExplanation'
import { sanitizeError } from 'src/helpers/sanitizeError'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory'
import { HttpRequestError } from 'src/io/axiosFetch'
import { ErrorMessageMonospace } from './ErrorStyles'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import React, { useMemo } from 'react'
import { ErrorContainer, ErrorMessage } from 'src/components/Error/ErrorStyles'
import { LinkExternal } from 'src/components/Link/LinkExternal'
import { PROJECT_NAME, RELEASE_OLD_URL } from 'src/constants'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory'
import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl'
import urljoin from 'url-join'

export interface Props {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Dataset } from '_SchemaRoot'
import { isEmpty } from 'lodash'
import React, { useCallback } from 'react'
import { Button } from 'reactstrap'
import { useRecoilValue } from 'recoil'
Expand Down Expand Up @@ -44,6 +45,10 @@ export function ButtonLoadExample({ ...rest }) {
setExampleSequences(datasetCurrent)
}, [datasetCurrent, setExampleSequences])

if (isEmpty(datasetCurrent?.files?.examples)) {
return null
}

return (
<Button {...rest} color="link" onClick={onClick} disabled={hasInputErrors || !datasetCurrent}>
{t('Load example')}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ export function DatasetContentSection() {
return (
<ContentSection>
<Nav tabs>
{currentDataset?.files.readme && (
{currentDataset?.files?.readme && (
<TabLabel tabId={0} activeTabId={activeTabId} setActiveTabId={setActiveTabId}>
{'Summary'}
</TabLabel>
)}
{currentDataset?.files.changelog && (
{currentDataset?.files?.changelog && (
<TabLabel tabId={1} activeTabId={activeTabId} setActiveTabId={setActiveTabId}>
{'History'}
</TabLabel>
Expand All @@ -40,10 +40,10 @@ export function DatasetContentSection() {
</Nav>
<TabContent activeTab={activeTabId}>
<TabPane tabId={0}>
{currentDataset?.files.readme && <MarkdownRemote url={currentDataset?.files.readme} />}
{currentDataset?.files?.readme && <MarkdownRemote url={currentDataset?.files.readme} />}
</TabPane>
<TabPane tabId={1}>
{currentDataset?.files.changelog && <MarkdownRemote url={currentDataset?.files.changelog} />}
{currentDataset?.files?.changelog && <MarkdownRemote url={currentDataset?.files.changelog} />}
</TabPane>
<TabPane tabId={2}>{currentDataset && <DatasetContentTabAdvanced />}</TabPane>
</TabContent>
Expand Down
2 changes: 1 addition & 1 deletion packages/nextclade-web/src/components/Main/DatasetInfo.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ export function DatasetInfo({ dataset, showSuggestions, ...restProps }: DatasetI
if (version?.tag === 'unreleased') {
updatedAt = `${updatedAt} (${t('unreleased')})`
}
return updatedAt
return updatedAt ?? t('unknown')
}, [t, version?.tag, version?.updatedAt])

const datasetName = attrStrMaybe(attributes, 'name') ?? path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export function ExampleSequencePicker({ ...restProps }: LanguageSwitcherProps) {
const { datasets: allDatasets } = useRecoilValue(datasetsAtom)

const filtered = useMemo(() => {
const datasets = allDatasets.filter((dataset) => !isNil(dataset.files.examples))
const datasets = allDatasets.filter((dataset) => !isNil(dataset?.files?.examples))
if (searchTerm.trim().length === 0) {
return datasets
}
Expand Down
8 changes: 7 additions & 1 deletion packages/nextclade-web/src/helpers/formatDate.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import { isEmpty } from 'lodash'
import { DateTime } from 'luxon'
import { notUndefinedOrNull } from 'src/helpers/notUndefined'

export function formatDateIsoUtcSimple(dateTimeStr: string) {
const utc = DateTime.fromISO(dateTimeStr, { zone: 'UTC' })

const date = utc.toISODate()

if (isEmpty(date)) {
return undefined
}

const time = utc.toISOTime({
suppressMilliseconds: true,
suppressSeconds: true,
includeOffset: false,
})

return [date, time, `(${utc.zoneName})`].join(' ')
return [date, time, `(${utc.zoneName})`].filter(notUndefinedOrNull).filter(isEmpty).join(' ')
}
Loading

0 comments on commit 9a172d1

Please sign in to comment.