diff --git a/packages/nextclade-cli/src/dataset/dataset_download.rs b/packages/nextclade-cli/src/dataset/dataset_download.rs index 380afc318..9fb3916be 100644 --- a/packages/nextclade-cli/src/dataset/dataset_download.rs +++ b/packages/nextclade-cli/src/dataset/dataset_download.rs @@ -5,19 +5,19 @@ use color_eyre::{Section, SectionExt}; use eyre::{eyre, ContextCompat, Report, WrapErr}; use itertools::Itertools; use log::{warn, LevelFilter}; -use nextclade::analyze::virus_properties::{LabelledMutationsConfig, VirusProperties}; +use nextclade::analyze::virus_properties::VirusProperties; use nextclade::gene::gene_map::{filter_gene_map, GeneMap}; -use nextclade::io::dataset::{Dataset, DatasetFiles, DatasetMeta, DatasetsIndexJson}; +use nextclade::io::dataset::{Dataset, DatasetsIndexJson}; use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str}; use nextclade::io::file::create_file_or_stdout; use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string}; -use nextclade::run::nextclade_wasm::NextcladeParams; +use nextclade::run::nextclade_wasm::{NextcladeParams, NextcladeParamsOptional}; use nextclade::tree::tree::AuspiceTree; use nextclade::utils::fs::list_files_recursive; use nextclade::utils::option::OptionMapRefFallible; use nextclade::utils::string::{format_list, surround_with_quotes, Indent}; use nextclade::{make_error, make_internal_error, o}; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeSet; use std::fs::File; use std::io::{BufReader, Cursor, Read, Seek, Write}; use std::ops::Deref; @@ -35,13 +35,16 @@ pub fn nextclade_get_inputs( if input_dataset.is_file() && has_extension(input_dataset, "zip") { dataset_zip_load(run_args, input_dataset, cdses) .wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}")) + } else if input_dataset.is_file() && has_extension(input_dataset, "json") { + dataset_json_load(run_args, input_dataset, cdses) + .wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}")) } else if input_dataset.is_dir() { dataset_dir_load(run_args, input_dataset, cdses) .wrap_err_with(|| format!("When loading dataset from {input_dataset:#?}")) } else { make_error!( "--input-dataset: path is invalid. \ - Expected a directory path or a zip archive file path, but got: '{input_dataset:#?}'" + Expected a directory path, a zip file path or json file path, but got: '{input_dataset:#?}'" ) } } else { @@ -119,14 +122,10 @@ pub fn dataset_zip_load( .wrap_err("When reading pathogen JSON from dataset")? .ok_or_else(|| eyre!("Pathogen JSON must always be present in the dataset but not found."))?; - let ref_record = read_from_path_or_zip( - &run_args.inputs.input_ref, - &mut zip, - &Some(&virus_properties.files.reference), - )? - .map_ref_fallible(read_one_fasta_str) - .wrap_err("When reading reference sequence from dataset")? - .ok_or_else(|| eyre!("Reference sequence must always be present in the dataset but not found."))?; + let ref_record = read_from_path_or_zip(&run_args.inputs.input_ref, &mut zip, &virus_properties.files.reference)? + .map_ref_fallible(read_one_fasta_str) + .wrap_err("When reading reference sequence from dataset")? + .ok_or_else(|| eyre!("Reference sequence must always be present in the dataset but not found."))?; let gene_map = read_from_path_or_zip( &run_args.inputs.input_annotation, @@ -157,8 +156,8 @@ fn verify_dataset_files<'a, T: AsRef + 'a + ?Sized>( files_present: impl Iterator + 'a, ) { let declared: BTreeSet<&str> = [ - Some(virus_properties.files.reference.as_str()), - Some(virus_properties.files.pathogen_json.as_str()), + virus_properties.files.reference.as_deref(), + virus_properties.files.pathogen_json.as_deref(), virus_properties.files.genome_annotation.as_deref(), virus_properties.files.tree_json.as_deref(), virus_properties.files.examples.as_deref(), @@ -238,8 +237,17 @@ pub fn dataset_dir_load( let virus_properties = VirusProperties::from_path(input_pathogen_json)?; let input_ref = input_ref - .clone() - .unwrap_or_else(|| dataset_dir.join(&virus_properties.files.reference)); + .as_ref() + .cloned() + .or_else(|| { + virus_properties + .files + .reference + .as_ref() + .map(|reference| dataset_dir.join(reference)) + }) + .expect("Reference sequence is required but it is neither declared in the dataset's pathogen.json `.files` section, nor provided as a separate file"); + let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?; let gene_map = input_annotation @@ -283,6 +291,51 @@ pub fn dataset_dir_load( }) } +pub fn dataset_json_load( + run_args: &NextcladeRunArgs, + dataset_json: impl AsRef, + cdses: &Option>, +) -> Result { + let dataset_json = dataset_json.as_ref(); + + let NextcladeRunInputArgs { + input_ref, + input_tree, + input_pathogen_json, + input_annotation, + .. + } = &run_args.inputs; + + let auspice_json = AuspiceTree::from_path(dataset_json).wrap_err("When reading Auspice JSON v2")?; + + let overrides = { + let virus_properties = input_pathogen_json + .map_ref_fallible(VirusProperties::from_path) + .wrap_err("When parsing pathogen JSON")?; + + let ref_record = input_ref + .map_ref_fallible(read_one_fasta) + .wrap_err("When parsing reference sequence")?; + + let tree = input_tree + .map_ref_fallible(AuspiceTree::from_path) + .wrap_err("When parsing reference tree Auspice JSON v2")?; + + let gene_map = input_annotation + .map_ref_fallible(GeneMap::from_path) + .wrap_err("When parsing genome annotation")?; + + NextcladeParamsOptional { + ref_record, + gene_map, + tree, + virus_properties, + } + }; + + NextcladeParams::from_auspice(&auspice_json, &overrides, cdses) +} + pub fn dataset_individual_files_load( run_args: &NextcladeRunArgs, cdses: &Option>, @@ -297,41 +350,7 @@ pub fn dataset_individual_files_load( .and_then(|input_pathogen_json| read_file_to_string(input_pathogen_json).ok()) .map_ref_fallible(VirusProperties::from_str) .wrap_err("When reading pathogen JSON")? - .unwrap_or_else(|| { - // The only case where we allow pathogen.json to be missing is when there's no dataset and files are provided - // explicitly through args. Let's create a dummy value to avoid making the field optional, - // and avoid adding `Default` trait. - VirusProperties { - schema_version: "".to_owned(), - attributes: BTreeMap::default(), - shortcuts: vec![], - meta: DatasetMeta::default(), - files: DatasetFiles { - reference: "".to_owned(), - pathogen_json: "".to_owned(), - genome_annotation: None, - tree_json: None, - examples: None, - readme: None, - changelog: None, - rest_files: BTreeMap::default(), - other: serde_json::Value::default(), - }, - default_cds: None, - cds_order_preference: vec![], - mut_labels: LabelledMutationsConfig::default(), - qc: None, - general_params: None, - alignment_params: None, - tree_builder_params: None, - phenotype_data: None, - aa_motifs: vec![], - versions: vec![], - version: None, - compatibility: None, - other: serde_json::Value::default(), - } - }); + .unwrap_or_default(); let ref_record = read_one_fasta(input_ref).wrap_err("When reading reference sequence")?; @@ -401,14 +420,9 @@ pub fn dataset_str_download_and_load( .wrap_err("When reading pathogen JSON from dataset")? .ok_or_else(|| eyre!("Required file not found in dataset: 'pathogen.json'. Please report it to dataset authors."))?; - let ref_record = read_from_path_or_url( - &http, - &dataset, - &run_args.inputs.input_ref, - &Some(dataset.files.reference.clone()), - )? - .map_ref_fallible(read_one_fasta_str)? - .wrap_err("When reading reference sequence from dataset")?; + let ref_record = read_from_path_or_url(&http, &dataset, &run_args.inputs.input_ref, &dataset.files.reference)? + .map_ref_fallible(read_one_fasta_str)? + .wrap_err("When reading reference sequence from dataset")?; let gene_map = read_from_path_or_url( &http, diff --git a/packages/nextclade-web/src/components/Error/ErrorContent.tsx b/packages/nextclade-web/src/components/Error/ErrorContent.tsx index 0ab256631..e93a10e33 100644 --- a/packages/nextclade-web/src/components/Error/ErrorContent.tsx +++ b/packages/nextclade-web/src/components/Error/ErrorContent.tsx @@ -1,17 +1,16 @@ import React, { useCallback, useMemo, useState } from 'react' import { Button, Col, Row } from 'reactstrap' import { useTranslationSafe } from 'src/helpers/useTranslationSafe' -import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl' import styled from 'styled-components' import { CopyToClipboard } from 'react-copy-to-clipboard' import { FaClipboardCheck, FaClipboardList } from 'react-icons/fa' - import { ErrorGeneric } from 'src/components/Error/error-types/ErrorGeneric' import { ErrorNetworkConnectionFailure } from 'src/components/Error/error-types/ErrorNetworkConnectionFailure' import { ErrorNetworkRequestFailure } from 'src/components/Error/error-types/ErrorNetworkRequestFailure' import { NextcladeV2ErrorContent } from 'src/components/Error/error-types/NextcladeV2ErrorContent' import { ErrorContentExplanation, getErrorReportText } from 'src/components/Error/ErrorContentExplanation' import { sanitizeError } from 'src/helpers/sanitizeError' +import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory' import { HttpRequestError } from 'src/io/axiosFetch' import { ErrorMessageMonospace } from './ErrorStyles' diff --git a/packages/nextclade-web/src/components/Error/error-types/NextcladeV2ErrorContent.tsx b/packages/nextclade-web/src/components/Error/error-types/NextcladeV2ErrorContent.tsx index c4c199054..bc177d9ba 100644 --- a/packages/nextclade-web/src/components/Error/error-types/NextcladeV2ErrorContent.tsx +++ b/packages/nextclade-web/src/components/Error/error-types/NextcladeV2ErrorContent.tsx @@ -3,8 +3,8 @@ import React, { useMemo } from 'react' import { ErrorContainer, ErrorMessage } from 'src/components/Error/ErrorStyles' import { LinkExternal } from 'src/components/Link/LinkExternal' import { PROJECT_NAME, RELEASE_OLD_URL } from 'src/constants' +import { NextcladeV2Error } from 'src/io/fetchSingleDatasetDirectory' import { useTranslationSafe } from 'src/helpers/useTranslationSafe' -import { NextcladeV2Error } from 'src/io/fetchSingleDatasetFromUrl' import urljoin from 'url-join' export interface Props { diff --git a/packages/nextclade-web/src/components/Main/ButtonLoadExample.tsx b/packages/nextclade-web/src/components/Main/ButtonLoadExample.tsx index 4d2bbb827..745eab0cc 100644 --- a/packages/nextclade-web/src/components/Main/ButtonLoadExample.tsx +++ b/packages/nextclade-web/src/components/Main/ButtonLoadExample.tsx @@ -1,4 +1,5 @@ import { Dataset } from '_SchemaRoot' +import { isEmpty } from 'lodash' import React, { useCallback } from 'react' import { Button } from 'reactstrap' import { useRecoilValue } from 'recoil' @@ -44,6 +45,10 @@ export function ButtonLoadExample({ ...rest }) { setExampleSequences(datasetCurrent) }, [datasetCurrent, setExampleSequences]) + if (isEmpty(datasetCurrent?.files?.examples)) { + return null + } + return (