store: remove integer values from DBCol variants (#7441)

So far we’ve been using ‘col##’ names for the RocksDB column families. This commit proposes to move away from this pattern for new columns and instead use variant name as the column family name. For example, rather than ‘col50’ for flat state we would simply use ‘FlatState’. This has a couple of advantages. Firstly, there’s no longer need to manually assign unique numbers to columns. Secondly, troubleshooting on RocksDB level becomes easier since it’s simpler to map from column family name to the column name used in source code. As far as I can tell, RocksDB doesn’t have a simple way to rename a column family. Because of that, this commit only makes the change in names for future columns leaving currently existing ones with ‘col##’ names.
near · Aug 26, 2022 · 5ad65be · 5ad65be
1 parent d004bb4
commit 5ad65be
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 63 deletions.
diff --git a/chain/client/src/rocksdb_metrics.rs b/chain/client/src/rocksdb_metrics.rs
@@ -33,12 +33,6 @@ pub(crate) struct RocksDBMetrics {
     gauges: HashMap<String, GaugeVec>,
 }
 
-/// Returns column's name as name in the DBCol enum without '_' prefix for deprecated columns.
-fn col_verbose_name(col: near_store::DBCol) -> &'static str {
-    let name: &str = col.into();
-    name.strip_prefix("_").unwrap_or(name)
-}
-
 impl RocksDBMetrics {
     pub fn export_stats_as_metrics(
         &mut self,
@@ -99,7 +93,6 @@ impl RocksDBMetrics {
                     // Label = column's verbose name.
                     StatsValue::ColumnValue(col, value) => {
                         let key = &stat_name;
-                        let label = col_verbose_name(col);
 
                         // Checking for metric to be present.
                         let gauge = match self.int_vec_gauges.entry(key.to_string()) {
@@ -112,7 +105,7 @@ impl RocksDBMetrics {
                             Entry::Occupied(entry) => entry.into_mut(),
                         };
                         // Writing value for column.
-                        gauge.with_label_values(&[&label]).set(value);
+                        gauge.with_label_values(&[<&str>::from(col)]).set(value);
                     }
                 }
             }

diff --git a/core/store/src/columns.rs b/core/store/src/columns.rs
@@ -1,8 +1,21 @@
 use borsh::{BorshDeserialize, BorshSerialize};
 use std::fmt;
 
-/// This enum holds the information about the columns that we use within the RocksDB storage.
-/// You can think about our storage as 2-dimensional table (with key and column as indexes/coordinates).
+/// This enum holds the information about the columns that we use within the
+/// RocksDB storage.
+///
+/// You can think about our storage as 2-dimensional table (with key and column
+/// as indexes/coordinates).
+///
+/// Note that the names of the variants in this enumeration correspond to the
+/// name of the RocksDB column families.  As such, it is *not* safe to rename
+/// a variant.
+///
+/// The only exception is adding an underscore at the beginning of the name to
+/// indicate that the column has been deprecated.  Deprecated columns are not
+/// used except for the database migration code which needs to deal with the
+/// deprecation.  Make sure to add `#[strum(serialize = "OriginalName")]`
+/// attribute in front of the variant when you deprecate a column.
 #[derive(
     PartialEq,
     Copy,
@@ -20,122 +33,124 @@ pub enum DBCol {
     /// Column to indicate which version of database this is.
     /// - *Rows*: single row [VERSION_KEY]
     /// - *Content type*: The version of the database (u32), serialized as JSON.
-    DbVersion = 0,
+    DbVersion,
     /// Column that store Misc cells.
     /// - *Rows*: multiple, for example "GENESIS_JSON_HASH", "HEAD_KEY", [LATEST_KNOWN_KEY] etc.
     /// - *Content type*: cell specific.
-    BlockMisc = 1,
+    BlockMisc,
     /// Column that stores Block content.
     /// - *Rows*: block hash (CryptHash)
     /// - *Content type*: [near_primitives::block::Block]
-    Block = 2,
+    Block,
     /// Column that stores Block headers.
     /// - *Rows*: block hash (CryptoHash)
     /// - *Content type*: [near_primitives::block_header::BlockHeader]
-    BlockHeader = 3,
+    BlockHeader,
     /// Column that stores mapping from block height to block hash.
     /// - *Rows*: height (u64)
     /// - *Content type*: block hash (CryptoHash)
-    BlockHeight = 4,
+    BlockHeight,
     /// Column that stores the Trie state.
     /// - *Rows*: trie_node_or_value_hash (CryptoHash)
     /// - *Content type*: Serializd RawTrieNodeWithSize or value ()
-    State = 5,
+    State,
     /// Mapping from BlockChunk to ChunkExtra
     /// - *Rows*: BlockChunk (block_hash, shard_uid)
     /// - *Content type*: [near_primitives::types::ChunkExtra]
-    ChunkExtra = 6,
+    ChunkExtra,
     /// Mapping from transaction outcome id (CryptoHash) to list of outcome ids with proofs.
     /// Multiple outcomes can arise due to forks.
     /// - *Rows*: outcome id (CryptoHash)
     /// - *Content type*: Vec of [near_primitives::transactions::ExecutionOutcomeWithIdAndProof]
-    TransactionResult = 7,
+    TransactionResult,
     /// Mapping from Block + Shard to list of outgoing receipts.
     /// - *Rows*: block + shard
     /// - *Content type*: Vec of [near_primitives::receipt::Receipt]
-    OutgoingReceipts = 8,
+    OutgoingReceipts,
     /// Mapping from Block + Shard to list of incoming receipt proofs.
     /// Each proof might prove multiple receipts.
     /// - *Rows*: (block, shard)
     /// - *Content type*: Vec of [near_primitives::sharding::ReceiptProof]
-    IncomingReceipts = 9,
+    IncomingReceipts,
     /// Info about the peers that we are connected to. Mapping from peer_id to KnownPeerState.
     /// - *Rows*: peer_id (PublicKey)
     /// - *Content type*: [network_primitives::types::KnownPeerState]
-    Peers = 10,
+    Peers,
     /// Mapping from EpochId to EpochInfo
     /// - *Rows*: EpochId (CryptoHash)
     /// - *Content type*: [near_primitives::epoch_manager::EpochInfo]
-    EpochInfo = 11,
+    EpochInfo,
     /// Mapping from BlockHash to BlockInfo
     /// - *Rows*: BlockHash (CryptoHash)
     /// - *Content type*: [near_primitives::epoch_manager::BlockInfo]
-    BlockInfo = 12,
+    BlockInfo,
     /// Mapping from ChunkHash to ShardChunk.
     /// - *Rows*: ChunkHash (CryptoHash)
     /// - *Content type*: [near_primitives::sharding::ShardChunk]
-    Chunks = 13,
+    Chunks,
     /// Storage for  PartialEncodedChunk.
     /// - *Rows*: ChunkHash (CryptoHash)
     /// - *Content type*: [near_primitives::sharding::PartialEncodedChunk]
-    PartialChunks = 14,
+    PartialChunks,
     /// Blocks for which chunks need to be applied after the state is downloaded for a particular epoch
     /// - *Rows*: BlockHash (CryptoHash)
     /// - *Content type*: Vec of BlockHash (CryptoHash)
-    BlocksToCatchup = 15,
+    BlocksToCatchup,
     /// Blocks for which the state is being downloaded
     /// - *Rows*: First block of the epoch (CryptoHash)
     /// - *Content type*: StateSyncInfo
-    StateDlInfos = 16,
+    StateDlInfos,
     /// Blocks that were ever challenged.
     /// - *Rows*: BlockHash (CryptoHash)
     /// - *Content type*: 'true' (bool)
-    ChallengedBlocks = 17,
+    ChallengedBlocks,
     /// Contains all the Shard State Headers.
     /// - *Rows*: StateHeaderKey (ShardId || BlockHash)
     /// - *Content type*: ShardStateSyncResponseHeader
-    StateHeaders = 18,
+    StateHeaders,
     /// Contains all the invalid chunks (that we had trouble decoding or verifying).
     /// - *Rows*: ShardChunkHeader object
     /// - *Content type*: EncodedShardChunk
-    InvalidChunks = 19,
+    InvalidChunks,
     /// Contains 'BlockExtra' information that is computed after block was processed.
     /// Currently it stores only challenges results.
     /// - *Rows*: BlockHash (CryptoHash)
     /// - *Content type*: BlockExtra
-    BlockExtra = 20,
+    BlockExtra,
     /// Store hash of all block per each height, to detect double signs.
     /// - *Rows*: int (height of the block)
     /// - *Content type*: Map: EpochId -> Set of BlockHash(CryptoHash)
-    BlockPerHeight = 21,
+    BlockPerHeight,
     /// Contains State parts that we've received.
     /// - *Rows*: StatePartKey (BlockHash || ShardId || PartId (u64))
     /// - *Content type*: state part (bytes)
-    StateParts = 22,
+    StateParts,
     /// Contains mapping from epoch_id to epoch start (first block height of the epoch)
     /// - *Rows*: EpochId (CryptoHash)  -- TODO: where does the epoch_id come from? it looks like blockHash..
     /// - *Content type*: BlockHeight (int)
-    EpochStart = 23,
+    EpochStart,
     /// Map account_id to announce_account (which peer has announced which account in the current epoch). // TODO: explain account annoucement
     /// - *Rows*: AccountId (str)
     /// - *Content type*: AnnounceAccount
-    AccountAnnouncements = 24,
+    AccountAnnouncements,
     /// Next block hashes in the sequence of the canonical chain blocks.
     /// - *Rows*: BlockHash (CryptoHash)
     /// - *Content type*: next block: BlockHash (CryptoHash)
-    NextBlockHashes = 25,
+    NextBlockHashes,
     /// `LightClientBlock`s corresponding to the last final block of each completed epoch.
     /// - *Rows*: EpochId (CryptoHash)
     /// - *Content type*: LightClientBlockView
-    EpochLightClientBlocks = 26,
+    EpochLightClientBlocks,
     /// Mapping from Receipt id to destination Shard Id, i.e, the shard that this receipt is sent to.
     /// - *Rows*: ReceiptId (CryptoHash)
     /// - *Content type*: Shard Id || ref_count (u64 || u64)
-    ReceiptIdToShardId = 27,
+    ReceiptIdToShardId,
     // Deprecated.
-    _NextBlockWithNewChunk = 28,
+    #[strum(serialize = "NextBlockWithNewChunk")]
+    _NextBlockWithNewChunk,
     // Deprecated.
-    _LastBlockWithNewChunk = 29,
+    #[strum(serialize = "LastBlockWithNewChunk")]
+    _LastBlockWithNewChunk,
     /// Network storage:
     ///   When given edge is removed (or we didn't get any ping from it for a while), we remove it from our 'in memory'
     ///   view and persist into storage.
@@ -157,88 +172,89 @@ pub enum DBCol {
     /// Map each saved peer on disk with its component id (a.k.a. nonce).
     /// - *Rows*: peer_id
     /// - *Column type*:  (nonce) u64
-    PeerComponent = 30,
+    PeerComponent,
     /// Map component id  (a.k.a. nonce) with all edges in this component.
     /// These are all the edges that were purged and persisted to disk at the same time.
     /// - *Rows*: nonce
     /// - *Column type*: `Vec<near_network::routing::Edge>`
-    ComponentEdges = 31,
+    ComponentEdges,
     /// Biggest component id (a.k.a nonce) used.
     /// - *Rows*: single row (empty row name)
     /// - *Column type*: (nonce) u64
-    LastComponentNonce = 32,
+    LastComponentNonce,
     /// Map of transactions
     /// - *Rows*: transaction hash
     /// - *Column type*: SignedTransaction
-    Transactions = 33,
+    Transactions,
     /// Mapping from a given (Height, ShardId) to the Chunk hash.
     /// - *Rows*: (Height || ShardId) - (u64 || u64)
     /// - *Column type*: ChunkHash (CryptoHash)
-    ChunkPerHeightShard = 34,
+    ChunkPerHeightShard,
     /// Changes to state (Trie) that we have recorded.
     /// - *Rows*: BlockHash || TrieKey (TrieKey is written via custom to_vec)
     /// - *Column type*: TrieKey, new value and reason for change (RawStateChangesWithTrieKey)
-    StateChanges = 35,
+    StateChanges,
     /// Mapping from Block to its refcount. (Refcounts are used in handling chain forks)
     /// - *Rows*: BlockHash (CryptoHash)
     /// - *Column type*: refcount (u64)
-    BlockRefCount = 36,
+    BlockRefCount,
     /// Changes to Trie that we recorded during given block/shard processing.
     /// - *Rows*: BlockHash || ShardId
     /// - *Column type*: old root, new root, list of insertions, list of deletions (TrieChanges)
-    TrieChanges = 37,
+    TrieChanges,
     /// Mapping from a block hash to a merkle tree of block hashes that are in the chain before it.
     /// - *Rows*: BlockHash
     /// - *Column type*: PartialMerkleTree - MerklePath to the leaf + number of leaves in the whole tree.
-    BlockMerkleTree = 38,
+    BlockMerkleTree,
     /// Mapping from height to the set of Chunk Hashes that were included in the block at that height.
     /// - *Rows*: height (u64)
     /// - *Column type*: Vec<ChunkHash (CryptoHash)>
-    ChunkHashesByHeight = 39,
+    ChunkHashesByHeight,
     /// Mapping from block ordinal number (number of the block in the chain) to the BlockHash.
     /// - *Rows*: ordinal (u64)
     /// - *Column type*: BlockHash (CryptoHash)
-    BlockOrdinal = 40,
+    BlockOrdinal,
     /// GC Count for each column - number of times we did the GarbageCollection on the column.
     /// - *Rows*: column id (byte)
     /// - *Column type*: u64
-    GCCount = 41,
+    GCCount,
     /// All Outcome ids by block hash and shard id. For each shard it is ordered by execution order.
     /// TODO: seems that it has only 'transaction ids' there (not sure if intentional)
     /// - *Rows*: BlockShardId (BlockHash || ShardId) - 40 bytes
     /// - *Column type*: Vec <OutcomeId (CryptoHash)>
-    OutcomeIds = 42,
+    OutcomeIds,
     /// Deprecated
-    _TransactionRefCount = 43,
+    #[strum(serialize = "TransactionRefCount")]
+    _TransactionRefCount,
     /// Heights of blocks that have been processed.
     /// - *Rows*: height (u64)
     /// - *Column type*: empty
-    ProcessedBlockHeights = 44,
+    ProcessedBlockHeights,
     /// Mapping from receipt hash to Receipt.
     /// - *Rows*: receipt (CryptoHash)
     /// - *Column type*: Receipt
-    Receipts = 45,
+    Receipts,
     /// Precompiled machine code of the contract, used by StoreCompiledContractCache.
     /// - *Rows*: ContractCacheKey or code hash (not sure)
     /// - *Column type*: near-vm-runner CacheRecord
-    CachedContractCode = 46,
+    CachedContractCode,
     /// Epoch validator information used for rpc purposes.
     /// - *Rows*: epoch id (CryptoHash)
     /// - *Column type*: EpochSummary
-    EpochValidatorInfo = 47,
+    EpochValidatorInfo,
     /// Header Hashes indexed by Height.
     /// - *Rows*: height (u64)
     /// - *Column type*: Vec<HeaderHashes (CryptoHash)>
-    HeaderHashesByHeight = 48,
+    HeaderHashesByHeight,
     /// State changes made by a chunk, used for splitting states
     /// - *Rows*: BlockShardId (BlockHash || ShardId) - 40 bytes
     /// - *Column type*: StateChangesForSplitStates
-    StateChangesForSplitStates = 49,
+    StateChangesForSplitStates,
     /// State changes made by a chunk, used for splitting states
     /// - *Rows*: serialized TrieKey (Vec<u8>)
     /// - *Column type*: ValueRef
     #[cfg(feature = "protocol_feature_flat_state")]
-    FlatState = 50,
+    FlatState,
 }
 
 impl DBCol {

diff --git a/core/store/src/db/rocksdb.rs b/core/store/src/db/rocksdb.rs
@@ -552,8 +552,68 @@ fn into_other(error: rocksdb::Error) -> io::Error {
     io::Error::new(io::ErrorKind::Other, error.into_string())
 }
 
-fn col_name(col: DBCol) -> String {
-    format!("col{}", col as usize)
+/// Returns name of a RocksDB column family corresponding to given column.
+///
+/// Historically we used `col##` names (with `##` being index of the column).
+/// We have since deprecated this convention.  All future column families are
+/// named the same as the variant of the [`DBCol`] enum.
+fn col_name(col: DBCol) -> &'static str {
+    match col {
+        DBCol::DbVersion => "col0",
+        DBCol::BlockMisc => "col1",
+        DBCol::Block => "col2",
+        DBCol::BlockHeader => "col3",
+        DBCol::BlockHeight => "col4",
+        DBCol::State => "col5",
+        DBCol::ChunkExtra => "col6",
+        DBCol::TransactionResult => "col7",
+        DBCol::OutgoingReceipts => "col8",
+        DBCol::IncomingReceipts => "col9",
+        DBCol::Peers => "col10",
+        DBCol::EpochInfo => "col11",
+        DBCol::BlockInfo => "col12",
+        DBCol::Chunks => "col13",
+        DBCol::PartialChunks => "col14",
+        DBCol::BlocksToCatchup => "col15",
+        DBCol::StateDlInfos => "col16",
+        DBCol::ChallengedBlocks => "col17",
+        DBCol::StateHeaders => "col18",
+        DBCol::InvalidChunks => "col19",
+        DBCol::BlockExtra => "col20",
+        DBCol::BlockPerHeight => "col21",
+        DBCol::StateParts => "col22",
+        DBCol::EpochStart => "col23",
+        DBCol::AccountAnnouncements => "col24",
+        DBCol::NextBlockHashes => "col25",
+        DBCol::EpochLightClientBlocks => "col26",
+        DBCol::ReceiptIdToShardId => "col27",
+        DBCol::_NextBlockWithNewChunk => "col28",
+        DBCol::_LastBlockWithNewChunk => "col29",
+        DBCol::PeerComponent => "col30",
+        DBCol::ComponentEdges => "col31",
+        DBCol::LastComponentNonce => "col32",
+        DBCol::Transactions => "col33",
+        DBCol::ChunkPerHeightShard => "col34",
+        DBCol::StateChanges => "col35",
+        DBCol::BlockRefCount => "col36",
+        DBCol::TrieChanges => "col37",
+        DBCol::BlockMerkleTree => "col38",
+        DBCol::ChunkHashesByHeight => "col39",
+        DBCol::BlockOrdinal => "col40",
+        DBCol::GCCount => "col41",
+        DBCol::OutcomeIds => "col42",
+        DBCol::_TransactionRefCount => "col43",
+        DBCol::ProcessedBlockHeights => "col44",
+        DBCol::Receipts => "col45",
+        DBCol::CachedContractCode => "col46",
+        DBCol::EpochValidatorInfo => "col47",
+        DBCol::HeaderHashesByHeight => "col48",
+        DBCol::StateChangesForSplitStates => "col49",
+        // If you’re adding a new column, do *not* create a new case for it.
+        // All new columns are handled by this default case:
+        #[allow(unreachable_patterns)]
+        _ => <&str>::from(col),
+    }
 }
 
 #[cfg(test)]