From 455f6d96a165fc4efa56c7f12c99dcadbcb3f999 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Wed, 30 Nov 2022 15:18:11 -0800 Subject: [PATCH 1/2] Change default method precision to fp32, and add explicit _fp64 equivalent options --- README.md | 1 + src/api.cpp | 32 +++++++++++++++++++++----------- src/su.cpp | 10 ++++++---- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 5ec46b9..c980f5c 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ The methods can be used directly through the command line after install: -i The input BIOM table. -t The input phylogeny in newick. -m The method, [unweighted | weighted_normalized | weighted_unnormalized | generalized | + unweighted_fp64 | weighted_normalized_fp64 | weighted_unnormalized_fp64 | generalized_fp64 | unweighted_fp32 | weighted_normalized_fp32 | weighted_unnormalized_fp32 | generalized_fp32]. -o The output distance matrix. -a [OPTIONAL] Generalized UniFrac alpha, default is 1. diff --git a/src/api.cpp b/src/api.cpp index fc3ab50..71309cb 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -43,15 +43,23 @@ return err; \ } -#define SET_METHOD(requested_method, err) Method method; \ - if(std::strcmp(requested_method, "unweighted") == 0) \ - method = unweighted; \ - else if(std::strcmp(requested_method, "weighted_normalized") == 0) \ - method = weighted_normalized; \ - else if(std::strcmp(requested_method, "weighted_unnormalized") == 0) \ - method = weighted_unnormalized; \ - else if(std::strcmp(requested_method, "generalized") == 0) \ - method = generalized; \ +#define SET_METHOD(requested_method, err) Method method; \ + if(std::strcmp(requested_method, "unweighted") == 0) \ + method = unweighted_fp32; \ + else if(std::strcmp(requested_method, "weighted_normalized") == 0) \ + method = weighted_normalized_fp32; \ + else if(std::strcmp(requested_method, "weighted_unnormalized") == 0) \ + method = weighted_unnormalized_fp32; \ + else if(std::strcmp(requested_method, "generalized") == 0) \ + method = generalized_fp32; \ + else if(std::strcmp(requested_method, "unweighted_fp64") == 0) \ + method = unweighted; \ + else if(std::strcmp(requested_method, "weighted_normalized_fp64") == 0) \ + method = weighted_normalized; \ + else if(std::strcmp(requested_method, "weighted_unnormalized_fp64") == 0) \ + method = weighted_unnormalized; \ + else if(std::strcmp(requested_method, "generalized_fp64") == 0) \ + method = generalized; \ else if(std::strcmp(requested_method, "unweighted_fp32") == 0) \ method = unweighted_fp32; \ else if(std::strcmp(requested_method, "weighted_normalized_fp32") == 0) \ @@ -166,10 +174,12 @@ void initialize_mat_no_biom(mat_t* &result, char** sample_ids, unsigned int n_sa } inline compute_status is_fp64_method(const std::string &method_string, bool &fp64) { - if ((method_string=="unweighted_fp32") || (method_string=="weighted_normalized_fp32") || (method_string=="weighted_unnormalized_fp32") || (method_string=="generalized_fp32")) { + if ((method_string=="unweighted") || (method_string=="weighted_normalized") || (method_string=="weighted_unnormalized") || (method_string=="generalized")) { fp64 = false; - } else if ((method_string=="unweighted") || (method_string=="weighted_normalized") || (method_string=="weighted_unnormalized") || (method_string=="generalized")) { + } else if ((method_string=="unweighted_fp64") || (method_string=="weighted_normalized_fp64") || (method_string=="weighted_unnormalized_fp64") || (method_string=="generalized_fp64")) { fp64 = true; + } else if ((method_string=="unweighted_fp32") || (method_string=="weighted_normalized_fp32") || (method_string=="weighted_unnormalized_fp32") || (method_string=="generalized_fp32")) { + fp64 = false; } else { return unknown_method; } diff --git a/src/su.cpp b/src/su.cpp index cfe1cfb..c5f698b 100644 --- a/src/su.cpp +++ b/src/su.cpp @@ -20,7 +20,9 @@ void usage() { std::cout << std::endl; std::cout << " -i\t\tThe input BIOM table." << std::endl; std::cout << " -t\t\tThe input phylogeny in newick." << std::endl; - std::cout << " -m\t\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized | unweighted_fp32 | weighted_normalized_fp32 | weighted_unnormalized_fp32 | generalized_fp32]." << std::endl; + std::cout << " -m\t\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized |" << std::endl; + std::cout << " unweighted_fp64 | weighted_normalized_fp64 | weighted_unnormalized_fp64 | generalized_fp64 |" << std::endl; + std::cout << " unweighted_fp32 | weighted_normalized_fp32 | weighted_unnormalized_fp32 | generalized_fp32]." << std::endl; std::cout << " -o\t\tThe output distance matrix." << std::endl; std::cout << " -a\t\t[OPTIONAL] Generalized UniFrac alpha, default is 1." << std::endl; std::cout << " -f\t\t[OPTIONAL] Bypass tips, reduces compute by about 50%." << std::endl; @@ -430,10 +432,10 @@ Format get_format(const std::string &format_string, const std::string &method_st } else if (format_string == "hdf5_fp64") { format_val = format_hdf5_fp64; } else if (format_string == "hdf5") { - if ((method_string=="unweighted_fp32") || (method_string=="weighted_normalized_fp32") || (method_string=="weighted_unnormalized_fp32") || (method_string=="generalized_fp32")) - format_val = format_hdf5_fp32; - else + if ((method_string=="unweighted_fp64") || (method_string=="weighted_normalized_fp64") || (method_string=="weighted_unnormalized_fp64") || (method_string=="generalized_fp64")) format_val = format_hdf5_fp64; + else + format_val = format_hdf5_fp32; } return format_val; From f770850bedc6371a49b731c0e6318209a63b32e0 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Wed, 30 Nov 2022 15:22:50 -0800 Subject: [PATCH 2/2] Update test --- .github/workflows/main.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5828163..8a19254 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -103,30 +103,35 @@ jobs: ./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5 ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1 rm -f t1.h5 + # retry with default precision handling + time ssu -m weighted_unnormalized -i test500.biom -t test500.tre --pcoa 4 -r hdf5 -o t1.h5 + ./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5 + ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1 + rm -f t1.h5 time ssu -f -m weighted_unnormalized_fp32 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp32 -o t1.h5 # matrrix will be different, but PCOA similar ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1 rm -f t1.h5 - time ssu -m weighted_unnormalized -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp64 -o t1.h5 + time ssu -m weighted_unnormalized_fp64 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp64 -o t1.h5 # minimal precision loss between fp32 and fp64 ./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5 ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1 rm -f t1.h5 # weighted_normalized - time ssu -f -m weighted_normalized_fp32 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp32 -o t1.h5 + time ssu -f -m weighted_normalized -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp32 -o t1.h5 ./compare_unifrac_matrix.py test500.weighted_normalized_fp32.f.h5 t1.h5 1.e-5 ./compare_unifrac_pcoa.py test500.weighted_normalized_fp32.f.h5 t1.h5 3 0.1 rm -f t1.h5 - time ssu -f -m weighted_normalized -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp64 -o t1.h5 + time ssu -f -m weighted_normalized_fp64 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp64 -o t1.h5 ./compare_unifrac_matrix.py test500.weighted_normalized_fp32.f.h5 t1.h5 1.e-5 ./compare_unifrac_pcoa.py test500.weighted_normalized_fp32.f.h5 t1.h5 3 0.1 rm -f t1.h5 # unweighted - time ssu -f -m unweighted_fp32 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp32 -o t1.h5 + time ssu -f -m unweighted -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp32 -o t1.h5 ./compare_unifrac_matrix.py test500.unweighted_fp32.f.h5 t1.h5 1.e-5 ./compare_unifrac_pcoa.py test500.unweighted_fp32.f.h5 t1.h5 3 0.1 rm -f t1.h5 - time ssu -f -m unweighted -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp64 -o t1.h5 + time ssu -f -m unweighted_fp64 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp64 -o t1.h5 ./compare_unifrac_matrix.py test500.unweighted_fp32.f.h5 t1.h5 1.e-5 ./compare_unifrac_pcoa.py test500.unweighted_fp32.f.h5 t1.h5 3 0.1 rm -f t1.h5