diff --git a/README.md b/README.md index 2b0f09c..7fcf059 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,50 @@ # Phylo_flow pipeline ## Description -This tool, developed within the scope of xxxxx, aims toxxxxxxx. +This tool, developed within the scope of ASB, aims to create a pipeline. ![Pipeline](./extras/pictures/pipeline.png) ## Requisites -* Docker; -* Docker Desktop (Opcional) -* Acess to internet (Only when installing the app) +* Python; +* Mafft; +* modeltest-ng; +* raxml-ng; +* Toytree. + +## Installation +```bash +# Download the project from GitLab +wget https://github.com/Rendrick27/Assignment_01/archive/refs/heads/main.zip + +# Unzip the folder +unzip Assignment_01-main.zip +``` + +## Usage +```bash +# Navigate to the Snakemake directory +cd Assignment_01-main.zip +``` +Then, run the following command: +```bash +snakemake --use-conda all --cores 1 +``` + +## Docker +### Build it +```bash +docker build -t {image_name} . + +docker run -it --name {container_name} {image_name} /bin/bash +``` +### Pull docker image +```bash +docker pull rendrick27/phylo_flow:latest +``` + +## Settings +You may adjust settings in the Snakemake file, such as threads and bootstraps in params, but remember that using more threads may cause more issues. ## Credits
diff --git a/python/sequence_model_processor.py b/python/sequence_model_processor.py index 9814753..06afc68 100644 --- a/python/sequence_model_processor.py +++ b/python/sequence_model_processor.py @@ -1,6 +1,7 @@ -import sys -import os import glob +import os +import sys + def get_sequence_length(fasta_file): """ @@ -10,7 +11,8 @@ def get_sequence_length(fasta_file): fasta_file (str): Path to the FASTA file. Returns: - int: The total length of the sequence in the file, excluding header lines and whitespace. + int: The total length of the sequence in the file, + excluding header lines and whitespace. """ seq_length = 0 with open(fasta_file, 'r') as file: @@ -22,13 +24,15 @@ def get_sequence_length(fasta_file): seq_length += len(line.strip()) return seq_length + def process_sequences(directory, output_file): """ - Processes each FASTA file in the directory to calculate sequence lengths and generate output - based on corresponding model data. + Processes each FASTA file in the directory to calculate sequence + lengths and generate output based on corresponding model data. Args: - directory (str): Directory containing FASTA files and corresponding model files. + directory (str): Directory containing FASTA files + and corresponding model files. output_file (str): File to which the formatted output will be written. """ fasta_files = sorted(glob.glob(os.path.join(directory, '*.fasta'))) @@ -56,6 +60,7 @@ def process_sequences(directory, output_file): for output in outputs: file.write(output + "\n") + if __name__ == "__main__": if len(sys.argv) < 3: print("Usage: python script.py