From 59d815a1c49610ff2e5c13f4c96ae3e01dfcc963 Mon Sep 17 00:00:00 2001 From: Emma Dann <32264060+emdann@users.noreply.github.com> Date: Sat, 4 Jan 2025 00:54:29 -0800 Subject: [PATCH] Fix edgeR rpy2 tests (#692) * fix broken rpy2 edger tests * updated edger tests --- .../_differential_gene_expression/_edger.py | 6 +++-- .../test_edger.py | 24 ++++++++++++++++--- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pertpy/tools/_differential_gene_expression/_edger.py b/pertpy/tools/_differential_gene_expression/_edger.py index bf72ee9f..a52ba5b9 100644 --- a/pertpy/tools/_differential_gene_expression/_edger.py +++ b/pertpy/tools/_differential_gene_expression/_edger.py @@ -60,8 +60,10 @@ def fit(self, **kwargs): # adata, design, mask, layer logger.info("Calculating NormFactors") dge = edger.calcNormFactors(dge) - with localconverter(get_conversion() + pandas2ri.converter): - design_r = ro.conversion.py2rpy(pd.DataFrame(self.design)) + with localconverter(get_conversion() + numpy2ri.converter): + # dt = np.dtype([(name, 'float64') for name in self.design.columns]) + # design_array = np.array(self.design.values, dtype=dt) + design_r = ro.conversion.py2rpy(self.design.values) logger.info("Estimating Dispersions") dge = edger.estimateDisp(dge, design=design_r) diff --git a/tests/tools/_differential_gene_expression/test_edger.py b/tests/tools/_differential_gene_expression/test_edger.py index 27b4584a..be2557db 100644 --- a/tests/tools/_differential_gene_expression/test_edger.py +++ b/tests/tools/_differential_gene_expression/test_edger.py @@ -1,4 +1,5 @@ -from pertpy.tools._differential_gene_expression import EdgeR +from pertpy.tools._differential_gene_expression import EdgeR, PyDESeq2 +import numpy.testing as npt def test_edger_simple(test_adata): @@ -13,6 +14,17 @@ def test_edger_simple(test_adata): res_df = method.test_contrasts(method.contrast("condition", "A", "B")) assert len(res_df) == test_adata.n_vars + # Compare against snapshot + npt.assert_almost_equal( + res_df.p_value.values, + [8.0000e-05, 1.8000e-04, 5.3000e-04, 1.1800e-03, 3.3800e-02, 3.3820e-02, 7.7980e-02, 1.3715e-01, 2.5052e-01, 9.2485e-01], + decimal=4, + ) + npt.assert_almost_equal( + res_df.log_fc.values, + [ 0.61208, -0.39374, 0.57944, 0.7343 , -0.58675, 0.42575, -0.23951, -0.20761, 0.17489, 0.0247], + decimal=4, + ) def test_edger_complex(test_adata): @@ -28,6 +40,12 @@ def test_edger_complex(test_adata): # Check that the index of the result matches the var_names of the AnnData object assert set(test_adata.var_names) == set(res_df["variable"]) + # Compare ranking of genes from a different method (without design matrix handling) + down_gene = res_df.set_index("variable").loc['gene3', 'log_fc'] + up_gene = res_df.set_index("variable").loc['gene1', 'log_fc'] + assert down_gene < up_gene -# TODO: there should be a test checking if, for a concrete example, the output p-values and effect sizes are what -# we expect (-> frozen snapshot, that way we also get a heads-up if something changes upstream) + method = PyDESeq2(adata=test_adata, design="~condition1+group") + method.fit() + deseq_res_df = method.test_contrasts(method.contrast("condition1", "A", "B")) + assert all(res_df.sort_values('log_fc')['variable'].values == deseq_res_df.sort_values('log_fc')['variable'].values)