#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import yaml # REQUIRE pyyaml <- not in python package

from utils import log, check_files

#     _____                         _                      _                  _   _
#    / ____|                       | |                    (_)                | | (_)
#   | |       _ __    ___    __ _  | |_    ___     _ __    _   _ __     ___  | |  _   _ __     ___
#   | |      | '__|  / _ \  / _` | | __|  / _ \   | '_ \  | | | '_ \   / _ \ | | | | | '_ \   / _ \
#   | |____  | |    |  __/ | (_| | | |_  |  __/   | |_) | | | | |_) | |  __/ | | | | | | | | |  __/
#    \_____| |_|     \___|  \__,_|  \__|  \___|   | .__/  |_| | .__/   \___| |_| |_| |_| |_|  \___|
#                                                 | |         | |
#                                                 |_|         |_|

def create_pipeline(args) :
    """Pipeline to prepare reference genome files for variant calling"""

    # Parse arguments & check files
    config_file = args.config[0]
    config_file = check_files([config_file])[0]

    print("# runGATK.py pipeline")
    print("Configuration file:\t{}\n".format(config_file))
    print("===============================================================================\n")

    # Read config file and create a .bash file
    f = open(config_file, "r")
    cfg = yaml.load(f, Loader=yaml.FullLoader)
    f.close()

    # Check output directory
    if not os.path.isdir(cfg["output_directory"]) :
        os.makedirs(cfg["output_directory"])
    else :
        raise Exception("ERROR: Output directory already exists!")

    # Check runGATK, reference and input reads
    runGATK = check_files([cfg["rungatk"]])[0]
    ref = check_files([cfg["reference"]])[0]

    for sample, libraries in cfg["samples"].items() :
        for lib, reads in libraries.items() :
            reads["R1"] = check_files([reads["R1"]])[0]
            reads["R2"] = check_files([reads["R2"]])[0]

    # Write pipeline
    f = open(os.path.join(cfg["output_directory"], "pipeline.sh"), 'w')
    f.write("#!/bin/bash\n\n# Global variables\nrungatk={}\nref={}\n\n".format(runGATK, ref))

    # Step preparing reference
    f.write("\n## Preparing reference\n")
    f.write("echo 'Preparing reference files:'\n")
    f.write("python ${{rungatk}} prepare -ws {} ${{ref}}\n".format(cfg["prepare_options"]["window_size"]))

    # Step Trimming
    if cfg["trim_options"]["skip_trimming"] : # Skip trimmming
        pass
    else : # Do not skip trimmming
        f.write("\n## Trimming reads\n")
        f.write("echo 'Trimming:'\n")

        # Create a reads file
        readslist = os.path.join(os.path.abspath(cfg["output_directory"]), "sample_reads.list")
        fr = open(readslist, "w")
        for sample, libraries in cfg["samples"].items() : # for each library
            for lib, reads in libraries.items() : # for reads in library
                fr.write("{}\t{}\n".format(reads["R1"], reads["R2"]))
        fr.close()

        dc = {
              "threads":cfg["trim_options"]["threads_per_job"],
              "jobs":cfg["trim_options"]["njobs"],
              "options":cfg["trim_options"]["fastp_options"],
              "reads":readslist
              }
        f.write("python ${{rungatk}} trim -t {threads} -p {jobs} -fo=\"{options}\" {reads}\n".format(**dc))

    # Step aligning
    f.write("\n## Aligning trimmed reads\n")
    f.write("echo 'Aligning:'\n")

    # Per sample alignment
    trimreads = {sample:{"R1":[], "R2":[]} for sample in cfg["samples"].keys()}
    # Store libraries with trimled.gz suffix
    for sample, libraries in cfg["samples"].items() :
        for lib, reads in libraries.items() :
            trimreads[sample]["R1"].append(reads["R1"] + ".trimmed.gz")
            trimreads[sample]["R2"].append(reads["R2"] + ".trimmed.gz")

    for sample in cfg["samples"].keys() :
        f.write("# Aligning {}\n".format(sample))

        dc = {
              "sample":sample,
              "threads":cfg["align_options"]["threads"],
              "ram":cfg["align_options"]["ram_per_thread"],
              "mapq":cfg["align_options"]["mapq"],
              "R1":",".join(r for r in trimreads[sample]["R1"]),
              "R2":",".join(r for r in trimreads[sample]["R2"]),
              }

        cmd = "python ${{rungatk}} align -t {threads} -r {ram} -m {mapq} {sample} ${{ref}} {R1} {R2}".format(**dc)
        if cfg["align_options"]["keep_temp"] == "true" :
            cmd += " --keep-temp\n"
        else :
            cmd += "\n"
        f.write(cmd)

    # Step calling samples
    f.write("\n## Calling samples\n")
    f.write("echo 'Calling:'\n")

    for sample in cfg["samples"].keys() :
        f.write("# Calling {}\n".format(sample))
        fi = cfg["call_options"]["fi"] if cfg["call_options"]["fi"] != '' else "\'\'"
        dc = {
              "sample":sample,
              "njobs":cfg["call_options"]["njobs"],
              "ht":cfg["call_options"]["ht"],
              "pim":cfg["call_options"]["pim"],
              "he":cfg["call_options"]["he"],
              "ihe":cfg["call_options"]["ihe"],
              "mrpas":cfg["call_options"]["mrpas"],
              "fi":fi,
              "erc":cfg["call_options"]["erc"],
              "om":cfg["call_options"]["om"],
              "jo":cfg["call_options"]["java_options"],
              "pjo":cfg["call_options"]["picard_java_options"],
              "mmn":cfg["call_options"]["mmn"],
        }
        cmd = "python ${{rungatk}} call {sample} ${{ref}} -pjo=\"{pjo}\" -jo=\"{jo}\" "
        cmd += "-p {njobs} -ht {ht} -he {he} -ihe {ihe} -mrpas {mrpas} -erc {erc} "
        cmd += "-om {om} -fi {fi} -mmn {mmn}"
        if cfg["call_options"]["bam_output"] == "true" :
            cmd += " --bam-out"
        if cfg["call_options"]["log"] == "true" :
            cmd += " --log"
        if cfg["call_options"]["keep_temp"] == "true" :
            cmd += " --keep-temp"
        cmd = cmd.format(**dc) + "\n"
        f.write(cmd)

    # Genotyping
    f.write("\n## Joint-genotyping\n")
    f.write("echo 'Genotyping:'\n")

    fi = cfg["genotype_options"]["fi"] if cfg["genotype_options"]["fi"] != '' else "\'\'"
    dc = {
          "sample":sample,
          "njobs":cfg["genotype_options"]["njobs"],
          "sp":cfg["genotype_options"]["sp"],
          "fi":fi,
          "he":cfg["genotype_options"]["he"],
          "ihe":cfg["genotype_options"]["ihe"],
          "jo":cfg["genotype_options"]["java_options"],
          "pjo":cfg["genotype_options"]["picard_java_options"],
          "dbjo":cfg["genotype_options"]["db_java_options"],
          "mmn":cfg["genotype_options"]["mmn"],
          "bs":cfg["genotype_options"]["bs"],
          "samples":",".join(sample for sample in cfg["samples"].keys())
    }

    cmd = "python ${{rungatk}} genotype ${{ref}} {samples} jointgenotyping "
    cmd += "-jo=\"{jo}\" -pjo=\"{pjo}\" -dbjo=\"{dbjo}\" -p {njobs} "
    cmd += "-he {he} -ihe {ihe} -fi {fi} -mmn {mmn} -bs {bs}"

    if cfg["genotype_options"]["all_sites"] == "true" :
        cmd += " --all-sites"
    if cfg["genotype_options"]["keep_temp"] == "true" :
        cmd += " --keep-temp"
    if cfg["genotype_options"]["log"] == "true" :
        cmd += " --log"

    cmd = cmd.format(**dc)
    cmd += "\n"
    f.write(cmd)

    f.write("\necho \"Done!\"\n")
    f.close()

    print("Done!")
    print("A file pipeline.sh was created in the ./output directory.")
    print("You can make it executable with `chmod +x pipeline.sh`")
    print("A final joint-genotyped .vcf file will be stored in \"output/jointegenotyping\".")