NationalGenomicsInfrastructure · chuan-wang · Mar 26, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/VERSIONLOG.md b/VERSIONLOG.md
@@ -1,5 +1,13 @@
 # TACA Version Log
 
+## 20240321.1
+
+Include project IDs in the run folder tarball
+
+## 20240315.1
+
+Fix cases that MiSeq samplesheet misses index or index2
+
 ## 20240304.1
 
 - Make sure TACA can handle runs that generate NO sequencing data at all

diff --git a/taca/analysis/analysis.py b/taca/analysis/analysis.py
@@ -114,8 +114,8 @@
     parser = run.runParserObj
     # Check if I have NoIndex lanes
     for element in parser.obj["samplesheet_csv"]:
-        if (
-            "NoIndex" in element["index"] or not element["index"]
+        if "NoIndex" in element.get("index", "") or not element.get(
+            "index"
         ):  # NoIndex in the case of HiSeq, empty in the case of HiSeqX
             lane = element["Lane"]  # This is a lane with NoIndex
             # In this case PF Cluster is the number of undetermined reads
@@ -208,7 +208,7 @@
 
     # Create a tar archive of the runfolder
     dir_name = os.path.basename(run_dir)
-    archive = run_dir + ".tar.gz"
+    archive = run_dir + "_" + "_".join(pid_list) + ".tar.gz"
     run_dir_path = os.path.dirname(run_dir)
 
     # Prepare the options for excluding lanes
@@ -411,13 +411,13 @@
                     )
                 else:
                     sbt = f"{run.id} Demultiplexing Completed!"
-                    msg = """The run {run} has been demultiplexed without any error or warning.
+                    msg = f"""The run {run.id} has been demultiplexed without any error or warning.
 
                     The Run will be transferred to the analysis cluster for further analysis.
 
-                    The run is available at : https://genomics-status.scilifelab.se/flowcells/{run}
+                    The run is available at : https://genomics-status.scilifelab.se/flowcells/{run.id}
 
-                    """.format(run=run.id)
+                    """
                 run.send_mail(sbt, msg, rcp=CONFIG["mail"]["recipients"])
 
             # Copy demultiplex stats file, InterOp meta data and run xml files to shared file system for LIMS purpose

diff --git a/taca/cleanup/cleanup.py b/taca/cleanup/cleanup.py
@@ -153,9 +153,7 @@ def cleanup_miarka(
         if all_undet_files:
             undet_size = _def_get_size_unit(sum(map(os.path.getsize, all_undet_files)))
             if misc.query_yes_no(
-                "In total found {} undetermined files which are {} in size, delete now ?".format(
-                    len(all_undet_files), undet_size
-                ),
+                f"In total found {len(all_undet_files)} undetermined files which are {undet_size} in size, delete now ?",
                 default="no",
             ):
                 _remove_files(all_undet_files)
@@ -313,11 +311,7 @@ def cleanup_miarka(
         for proj, info in project_clean_list.items():
             proj_count += 1
             if not misc.query_yes_no(
-                "{}Delete files for this project ({}/{})".format(
-                    get_proj_meta_info(info, days_fastq),
-                    proj_count,
-                    len(project_clean_list),
-                ),
+                f"{get_proj_meta_info(info, days_fastq)}Delete files for this project ({proj_count}/{len(project_clean_list)})",
                 default="no",
             ):
                 logger.info(f"Will not remove files for project {proj}")

diff --git a/taca/illumina/Runs.py b/taca/illumina/Runs.py
@@ -350,10 +350,10 @@
         # Send an email notifying that the transfer was successful
         runname = self.id
         sbt = f"Rsync of data for run {runname} to the analysis cluster has finished"
-        msg = """ Rsync of data for run {run} to the analysis cluster has finished!
+        msg = f""" Rsync of data for run {runname} to the analysis cluster has finished!
 
-        The run is available at : https://genomics-status.scilifelab.se/flowcells/{run}
-        """.format(run=runname)
+        The run is available at : https://genomics-status.scilifelab.se/flowcells/{runname}
+        """
         if mail_recipients:
             send_mail(sbt, msg, mail_recipients)
 
@@ -453,6 +453,10 @@
         # Prepare a list for lanes with NoIndex samples
         noindex_lanes = []
         for entry in self.runParserObj.samplesheet.data:
+            if not entry.get("index"):
+                entry["index"] = ""
+            if not entry.get("index2"):
+                entry["index2"] = ""
             if entry["index"].upper() == "NOINDEX" or (
                 entry["index"] == "" and entry["index2"] == ""
             ):

diff --git a/taca/illumina/Standard_Runs.py b/taca/illumina/Standard_Runs.py
@@ -131,6 +131,10 @@
             sample_name = sample.get("Sample_Name") or sample.get("SampleName")
             umi_length = [0, 0]
             read_length = read_cycles
+            if not sample.get("index"):
+                sample["index"] = ""
+            if not sample.get("index2"):
+                sample["index2"] = ""
             # Read the length of read 1 and read 2 from the field Recipe
             if sample.get("Recipe") and RECIPE_PAT.findall(sample.get("Recipe")):
                 ss_read_length = [

diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py
@@ -99,14 +99,7 @@ def update_statusdb(run_dir):
                                 for k, v in remote_doc.items():
                                     obj["values"][k] = v
                                 logger.info(
-                                    "Updating {} {} {} {} {} as {}".format(
-                                        run_id,
-                                        project,
-                                        flowcell,
-                                        lane,
-                                        sample,
-                                        sample_status,
-                                    )
+                                    f"Updating {run_id} {project} {flowcell} {lane} {sample} as {sample_status}"
                                 )
                                 # Sorts timestamps
                                 obj["values"] = OrderedDict(
@@ -123,14 +116,7 @@ def update_statusdb(run_dir):
                         # Creates new entry
                         else:
                             logger.info(
-                                "Creating {} {} {} {} {} as {}".format(
-                                    run_id,
-                                    project,
-                                    flowcell,
-                                    lane,
-                                    sample,
-                                    sample_status,
-                                )
+                                f"Creating {run_id} {project} {flowcell} {lane} {sample} as {sample_status}"
                             )
                             # Creates record
                             db.save(obj)