From 1b73528b0ab20ffba0425a5bf1d5d9a27b77855f Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Thu, 14 Nov 2024 14:56:14 -0800 Subject: [PATCH] Add feature_type column to var (#1310) --- .../cellxgene_census_builder/build_soma/experiment_builder.py | 2 +- .../src/cellxgene_census_builder/build_soma/globals.py | 4 +++- tools/cellxgene_census_builder/tests/conftest.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py index 1066e9989..75e4f4937 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/experiment_builder.py @@ -297,7 +297,7 @@ def get_obs_and_var( var_df = ( adata.var.copy() .rename_axis("feature_id") - .reset_index()[["feature_id", "feature_name", "feature_length"]] + .reset_index()[["feature_id", "feature_name", "feature_length", "feature_type"]] ) return obs_df, var_df diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py index 2ad0109e3..5bd339bae 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py @@ -186,19 +186,21 @@ "feature_length", "feature_reference", "feature_biotype", + "feature_type", ) CENSUS_VAR_TABLE_SPEC = TableSpec.create( [ ("soma_joinid", pa.int64()), ("feature_id", pa.large_string()), ("feature_name", pa.large_string()), + ("feature_type", pa.large_string()), ("feature_length", pa.int64()), ("nnz", pa.int64()), ("n_measured_obs", pa.int64()), ], use_arrow_dictionary=USE_ARROW_DICTIONARY, ) -_StringLabelVar = ["feature_id", "feature_name"] +_StringLabelVar = ["feature_id", "feature_name", "feature_type"] _NumericVar = ["nnz", "n_measured_obs", "feature_length"] CENSUS_VAR_PLATFORM_CONFIG = { "tiledb": { diff --git a/tools/cellxgene_census_builder/tests/conftest.py b/tools/cellxgene_census_builder/tests/conftest.py index c03027cf4..98e0c8ae7 100644 --- a/tools/cellxgene_census_builder/tests/conftest.py +++ b/tools/cellxgene_census_builder/tests/conftest.py @@ -99,6 +99,7 @@ def get_anndata( "feature_biotype": "gene", "feature_is_filtered": False, "feature_name": "ERCC-00002 (spike-in control)", + "feature_type": "synthetic", "feature_reference": organism.organism_ontology_term_id, "feature_length": 1000, },