From 68bde1d879e0a42cd77dbe3ce2951df63a216730 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Wed, 12 May 2021 13:43:55 +0100
Subject: [PATCH 1/4] Strip out samplesheet validation for now

---
 CHANGELOG.md              |  2 +-
 lib/NfcoreSchema.groovy   | 31 -------------------------------
 workflows/rnaseq.nf       |  1 -
 workflows/sra_download.nf |  1 -
 4 files changed, 1 insertion(+), 34 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b61201f29..c55341801 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [[3.1](https://github.com/nf-core/rnaseq/releases/tag/3.1)] - 2021-05-12
+## [[3.1](https://github.com/nf-core/rnaseq/releases/tag/3.1)] - 2021-05-13
 
 ### :warning: Major enhancements
 
diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy
index e35d892e4..16986434f 100755
--- a/lib/NfcoreSchema.groovy
+++ b/lib/NfcoreSchema.groovy
@@ -177,37 +177,6 @@ class NfcoreSchema {
         }
     }
 
-    //
-    // Function to validate a file by its schema, eg. sample sheets
-    //
-    /* groovylint-disable-next-line UnusedPrivateMethodParameter */
-    public static void validateFile(workflow, log, params, param_name, obj, schema_filename) {
-        // Load the schema
-        InputStream inputStream = new File(getSchemaPath(workflow, schema_filename)).newInputStream()
-        JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream))
-        Schema schema = SchemaLoader.load(rawSchema)
-
-        // Convert the groovy object to a JSONArray
-        def jsonObj = new JsonBuilder(obj)
-        JSONArray objJSON = new JSONArray(jsonObj.toString())
-
-        // Validate
-        try {
-            schema.validate(objJSON)
-        } catch (ValidationException e) {
-            Map colors = NfcoreTemplate.logColours(params.monochrome_logs)
-            println ""
-            println "=${colors.red}====   ERROR: Validation of '$param_name' file failed!   ============================="
-            JSONObject exceptionJSON = e.toJSON()
-            println e.getMessage()
-            e.getCausingExceptions().stream().map(ValidationException::getMessage).forEach(System.out::println)
-            println "===================================================================================${colors.reset}"
-            println ""
-            System.exit(1)
-        }
-        log.debug "Validation passed: '$param_name' with '$schema_filename'"
-    }
-
     //
     // Beautify parameters for --help
     //
diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf
index ecca9c52c..a4112c772 100755
--- a/workflows/rnaseq.nf
+++ b/workflows/rnaseq.nf
@@ -27,7 +27,6 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 
 // Check mandatory parameters
 if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
-NfcoreSchema.validateFile(workflow, log, params, '--input', file(params.input).splitCsv( header:true, sep:','), 'assets/schema_input.json')
 
 // Check rRNA databases for sortmerna
 ch_ribo_db = file(params.ribo_database_manifest)
diff --git a/workflows/sra_download.nf b/workflows/sra_download.nf
index 8c417519a..c680ac5e0 100755
--- a/workflows/sra_download.nf
+++ b/workflows/sra_download.nf
@@ -14,7 +14,6 @@ if (params.public_data_ids) {
 } else {
     exit 1, 'Input file with public database ids not specified!'
 }
-NfcoreSchema.validateFile(workflow, log, params, '--public_data_ids', file(params.public_data_ids).splitCsv(header:false, sep:'', strip:true), 'assets/schema_public_data_ids.json')
 
 /*
 ========================================================================================

From 4240cbb46d103454ff6b210cda026fbae302de2b Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Thu, 13 May 2021 11:55:39 +0100
Subject: [PATCH 2/4] Fix regex for public_data_ids parameter

---
 assets/schema_public_data_ids.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/assets/schema_public_data_ids.json b/assets/schema_public_data_ids.json
index 8c9a45bf3..ff1639c6a 100644
--- a/assets/schema_public_data_ids.json
+++ b/assets/schema_public_data_ids.json
@@ -8,7 +8,8 @@
         "type": "array",
         "items": {
             "type": "string",
-            "pattern": "^[SEPG][RAS][RXSMPAJXE][EN]?[AB]?\\d{5,9}$"
+            "pattern": "^[SEPG][RAS][RXSMPAJXE][EN]?[AB]?\\d{4,9}$",
+            "errorMessage": "Please provide a valid SRA, GEO or ENA identifier"
         }
     }
 }

From 0b02474ad6f30344b23ed3022227bb6690c362e3 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Thu, 13 May 2021 11:56:10 +0100
Subject: [PATCH 3/4] Cherry pick Schema changes from #633

---
 assets/schema_input.json | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 264b26a23..cee1bd82b 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -9,17 +9,20 @@
         "properties": {
             "sample": {
                 "type": "string",
-                "pattern": "^\\S+$"
+                "pattern": "^\\S+$",
+                "errorMessage": "Sample name must be provided and cannot contain spaces"
             },
             "fastq_1": {
                 "type": "string",
-                "pattern": "^\\S+\\.fa?s?t?q\\.gz$"
+                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
+                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
             },
             "fastq_2": {
+                "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
                 "anyOf": [
                     {
                         "type": "string",
-                        "pattern": "^\\S+\\.fa?s?t?q\\.gz$"
+                        "pattern": "^\\S+\\.f(ast)?q\\.gz$"
                     },
                     {
                         "type": "string",
@@ -29,6 +32,7 @@
             },
             "strandedness": {
                 "type": "string",
+                "errorMessage": "Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'",
                 "enum": [
                     "forward",
                     "reverse",

From be072e435d51dadd782353a398a07693a877f714 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Thu, 13 May 2021 12:01:55 +0100
Subject: [PATCH 4/4] Update CHANGELOG

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c55341801..b8c7c46e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Enhancements & fixes
 
 * Updated pipeline template to nf-core/tools `1.14`
+* Initial implementation of a standardised samplesheet JSON schema to use with user interfaces and for validation
 * Only FastQ files that require to be concatenated will be passed to `CAT_FASTQ` process
 * [[#449](https://github.com/nf-core/modules/pull/449)] - `--genomeSAindexNbases` will now be auto-calculated before building STAR indices
 * [[#460](https://github.com/nf-core/rnaseq/issues/460)] - Auto-detect and bypass featureCounts execution if biotype doesn't exist in GTF
@@ -34,7 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * [[#604](https://github.com/nf-core/rnaseq/issues/604)] - Additional fasta with GENCODE annotation results in biotype error
 * [[#610](https://github.com/nf-core/rnaseq/issues/610)] - save R objects as RDS
 * [[#619](https://github.com/nf-core/rnaseq/issues/619)] - implicit declaration of the workflow in main
-* [[#629](https://github.com/nf-core/modules/pull/629)] - Add and fix EditorConfig linting in entire pipeline
+* [[#629](https://github.com/nf-core/rnaseq/pull/629)] - Add and fix EditorConfig linting in entire pipeline
 * [[nf-core/modules#423](https://github.com/nf-core/modules/pull/423)] - Replace `publish_by_id` module option to `publish_by_meta`
 * [[nextflow#2060](https://github.com/nextflow-io/nextflow/issues/2060)] - Pipeline execution hang when native task fail to be submitted