Skip to content

Commit

Permalink
Fix project run stats WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
chuan-wang committed Sep 24, 2024
1 parent 5b4f947 commit fac9912
Showing 1 changed file with 49 additions and 14 deletions.
63 changes: 49 additions & 14 deletions taca/element/Element_Runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,19 @@ def symlink_demux_dir(src_dir, dest_dir):
print(f"Error linking {src_path} to {dest_path}: {e}")


# Write to csv
def write_to_csv(data, filename):
# Get the fieldnames from the keys of the first dictionary
fieldnames = data[0].keys()
# Open the file and write the CSV
with open(filename, mode='w', newline='') as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
# Write the header (fieldnames)
writer.writeheader()
# Write the data (rows)
writer.writerows(data)


# Collect demux info into a list of dictionaries
# Structure: [{'sub_demux_count':XXX, 'SampleName':XXX, 'Index1':XXX, 'Index2':XXX, 'Lane':XXX, 'Project':XXX, 'Recipe':XXX}]
def collect_demux_runmanifest(self, demux_results_dirs):
Expand Down Expand Up @@ -550,17 +563,38 @@ def aggregate_undet_fastq(self, demux_runmanifest):
base_name = os.path.basename(fastqfile)
os.symlink(fastqfile, os.path.join(project_dest, base_name))

# Write to csv
def write_to_csv(data, filename):
# Get the fieldnames from the keys of the first dictionary
fieldnames = data[0].keys()
# Open the file and write the CSV
with open(filename, mode='w', newline='') as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
# Write the header (fieldnames)
writer.writeheader()
# Write the data (rows)
writer.writerows(data)

# Read in each Project_RunStats.json to fetch PercentMismatch, PercentQ30, PercentQ40 and QualityScoreMean
# Note that Element promised that they would include these stats into IndexAssignment.csv
# But for now we have to do this by ourselves in this hard way
def get_project_runstats(self, sub_demux, demux_runmanifest):
project_runstats = []
project_list = sorted(list(set(sample['Project'] for sample in demux_runmanifest if sample['sub_demux_count']==sub_demux)))
for project in project_list:
project_runstats_json_path = os.path.join(self.run_dir, f"Demultiplexing_{sub_demux}", "Samples", project, f"{project}_RunStats.json")
if os.path.exists(project_runstats_json_path):
with open(project_runstats_json_path) as stats_json:
project_runstats_json = json.load(stats_json)
for sample in project_runstats_json["SampleStats"]:
sample_name = sample["SampleName"]
for occurrence in sample["Occurrences"]:
lane = occurrence["Lane"]
expected_sequence = occurrence["ExpectedSequence"]
percentage_mismatch = occurrence["PercentMismatch"]
percentage_q30 = occurrence["PercentQ30"]
percentage_q40 = occurrence["PercentQ40"]
quality_score_mean = occurrence["QualityScoreMean"]
project_runstats.append({ "SampleName" : sample_name,
"Lane" : lane,
"ExpectedSequence" : expected_sequence,
"PercentMismatch" : percentage_mismatch,
"PercentQ30" : percentage_q30,
"PercentQ40" : percentage_q40,
"QualityScoreMean" : quality_score_mean
})
else:
continue
return project_runstats


# Aggregate stats in IndexAssignment.csv
Expand All @@ -569,6 +603,10 @@ def aggregate_stats_assigned(self, demux_runmanifest):
sub_demux_list = sorted(list(set(sample['sub_demux_count'] for sample in demux_runmanifest)))
lanes = sorted(list(set(sample['Lane'] for sample in demux_runmanifest)))
for sub_demux in sub_demux_list:
# Read in each Project_RunStats.json to fetch PercentMismatch, PercentQ30, PercentQ40 and QualityScoreMean
# Note that Element promised that they would include these stats into IndexAssignment.csv
# But for now we have to do this by ourselves in this hard way
project_runstats = get_project_runstats(sub_demux, demux_runmanifest)
# Read in IndexAssignment.csv
assigned_csv = os.path.join(self.run_dir, f"Demultiplexing_{sub_demux}", "IndexAssignment.csv")
if os.path.exists(assigned_csv):
Expand Down Expand Up @@ -686,9 +724,6 @@ def aggregate_demux_results(self, demux_results_dirs):
aggregate_stats_assigned(demux_runmanifest)
# Aggregate stats in UnassignedSequences.csv
aggregate_stats_unassigned(demux_runmanifest)
# Aggregate stats in Project_RunStats.json
TBD



def upload_demux_results_to_statusdb(self):
Expand Down

0 comments on commit fac9912

Please sign in to comment.