From 68bde1d879e0a42cd77dbe3ce2951df63a216730 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 12 May 2021 13:43:55 +0100 Subject: [PATCH 1/4] Strip out samplesheet validation for now --- CHANGELOG.md | 2 +- lib/NfcoreSchema.groovy | 31 ------------------------------- workflows/rnaseq.nf | 1 - workflows/sra_download.nf | 1 - 4 files changed, 1 insertion(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b61201f29..c55341801 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[3.1](https://github.com/nf-core/rnaseq/releases/tag/3.1)] - 2021-05-12 +## [[3.1](https://github.com/nf-core/rnaseq/releases/tag/3.1)] - 2021-05-13 ### :warning: Major enhancements diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index e35d892e4..16986434f 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -177,37 +177,6 @@ class NfcoreSchema { } } - // - // Function to validate a file by its schema, eg. sample sheets - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateFile(workflow, log, params, param_name, obj, schema_filename) { - // Load the schema - InputStream inputStream = new File(getSchemaPath(workflow, schema_filename)).newInputStream() - JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream)) - Schema schema = SchemaLoader.load(rawSchema) - - // Convert the groovy object to a JSONArray - def jsonObj = new JsonBuilder(obj) - JSONArray objJSON = new JSONArray(jsonObj.toString()) - - // Validate - try { - schema.validate(objJSON) - } catch (ValidationException e) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println "" - println "=${colors.red}==== ERROR: Validation of '$param_name' file failed! =============================" - JSONObject exceptionJSON = e.toJSON() - println e.getMessage() - e.getCausingExceptions().stream().map(ValidationException::getMessage).forEach(System.out::println) - println "===================================================================================${colors.reset}" - println "" - System.exit(1) - } - log.debug "Validation passed: '$param_name' with '$schema_filename'" - } - // // Beautify parameters for --help // diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf index ecca9c52c..a4112c772 100755 --- a/workflows/rnaseq.nf +++ b/workflows/rnaseq.nf @@ -27,7 +27,6 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -NfcoreSchema.validateFile(workflow, log, params, '--input', file(params.input).splitCsv( header:true, sep:','), 'assets/schema_input.json') // Check rRNA databases for sortmerna ch_ribo_db = file(params.ribo_database_manifest) diff --git a/workflows/sra_download.nf b/workflows/sra_download.nf index 8c417519a..c680ac5e0 100755 --- a/workflows/sra_download.nf +++ b/workflows/sra_download.nf @@ -14,7 +14,6 @@ if (params.public_data_ids) { } else { exit 1, 'Input file with public database ids not specified!' } -NfcoreSchema.validateFile(workflow, log, params, '--public_data_ids', file(params.public_data_ids).splitCsv(header:false, sep:'', strip:true), 'assets/schema_public_data_ids.json') /* ======================================================================================== From 4240cbb46d103454ff6b210cda026fbae302de2b Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 13 May 2021 11:55:39 +0100 Subject: [PATCH 2/4] Fix regex for public_data_ids parameter --- assets/schema_public_data_ids.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/assets/schema_public_data_ids.json b/assets/schema_public_data_ids.json index 8c9a45bf3..ff1639c6a 100644 --- a/assets/schema_public_data_ids.json +++ b/assets/schema_public_data_ids.json @@ -8,7 +8,8 @@ "type": "array", "items": { "type": "string", - "pattern": "^[SEPG][RAS][RXSMPAJXE][EN]?[AB]?\\d{5,9}$" + "pattern": "^[SEPG][RAS][RXSMPAJXE][EN]?[AB]?\\d{4,9}$", + "errorMessage": "Please provide a valid SRA, GEO or ENA identifier" } } } From 0b02474ad6f30344b23ed3022227bb6690c362e3 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 13 May 2021 11:56:10 +0100 Subject: [PATCH 3/4] Cherry pick Schema changes from #633 --- assets/schema_input.json | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 264b26a23..cee1bd82b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -9,17 +9,20 @@ "properties": { "sample": { "type": "string", - "pattern": "^\\S+$" + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" }, "fastq_1": { "type": "string", - "pattern": "^\\S+\\.fa?s?t?q\\.gz$" + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", "anyOf": [ { "type": "string", - "pattern": "^\\S+\\.fa?s?t?q\\.gz$" + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, { "type": "string", @@ -29,6 +32,7 @@ }, "strandedness": { "type": "string", + "errorMessage": "Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'", "enum": [ "forward", "reverse", From be072e435d51dadd782353a398a07693a877f714 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 13 May 2021 12:01:55 +0100 Subject: [PATCH 4/4] Update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c55341801..b8c7c46e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes * Updated pipeline template to nf-core/tools `1.14` +* Initial implementation of a standardised samplesheet JSON schema to use with user interfaces and for validation * Only FastQ files that require to be concatenated will be passed to `CAT_FASTQ` process * [[#449](https://github.com/nf-core/modules/pull/449)] - `--genomeSAindexNbases` will now be auto-calculated before building STAR indices * [[#460](https://github.com/nf-core/rnaseq/issues/460)] - Auto-detect and bypass featureCounts execution if biotype doesn't exist in GTF @@ -34,7 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * [[#604](https://github.com/nf-core/rnaseq/issues/604)] - Additional fasta with GENCODE annotation results in biotype error * [[#610](https://github.com/nf-core/rnaseq/issues/610)] - save R objects as RDS * [[#619](https://github.com/nf-core/rnaseq/issues/619)] - implicit declaration of the workflow in main -* [[#629](https://github.com/nf-core/modules/pull/629)] - Add and fix EditorConfig linting in entire pipeline +* [[#629](https://github.com/nf-core/rnaseq/pull/629)] - Add and fix EditorConfig linting in entire pipeline * [[nf-core/modules#423](https://github.com/nf-core/modules/pull/423)] - Replace `publish_by_id` module option to `publish_by_meta` * [[nextflow#2060](https://github.com/nextflow-io/nextflow/issues/2060)] - Pipeline execution hang when native task fail to be submitted