ChainSafe · hanabi1224 · Jul 17, 2023 · Jul 7, 2023 · Jul 7, 2023 · Jul 13, 2023
@@ -33,6 +33,8 @@
   `forest-cli archive info` command for inspecting archives.
 - [#3159](https://github.com/ChainSafe/forest/issues/3159): Add
   `forest-cli archive export -e=X` command for exporting archives.
+- [#3150](https://github.com/ChainSafe/forest/pull/3150):
+  `forest-cli car concat` subcommand for concatenating 2 `.car` files.
 
 ### Changed
 

@@ -66,6 +66,7 @@ where
                         Subcommand::Archive(cmd) => cmd.run(config).await,
                         Subcommand::Attach(cmd) => cmd.run(config),
                         Subcommand::Shutdown(cmd) => cmd.run(config).await,
+                        Subcommand::Car(cmd) => cmd.run().await,
                     }
                 }
                 Err(e) => {

@@ -0,0 +1,125 @@
+// Copyright 2019-2023 ChainSafe Systems
+// SPDX-License-Identifier: Apache-2.0, MIT
+
+use std::path::PathBuf;
+
+use async_recursion::async_recursion;
+use clap::Subcommand;
+use futures::{AsyncRead, StreamExt};
+use fvm_ipld_car::Block;
+use fvm_ipld_car::CarHeader;
+use fvm_ipld_car::CarReader;
+use tokio_util::compat::TokioAsyncReadCompatExt;
+
+use crate::ipld::CidHashSet;
+
+#[derive(Debug, Subcommand)]
+pub enum CarCommands {
+    Concat {
+        /// A list of `.car` file paths
+        car_files: Vec<PathBuf>,
+        /// The output `.car` file path
+        #[arg(short, long)]
+        output: PathBuf,
+    },
+}
+
+impl CarCommands {
+    pub async fn run(self) -> anyhow::Result<()> {
+        match self {
+            Self::Concat { car_files, output } => {
+                let mut readers = Vec::with_capacity(car_files.len());
+                for f in car_files {
+                    readers.push(
+                        CarReader::new(
+                            tokio::io::BufReader::new(tokio::fs::File::open(f).await?).compat(),
+                        )
+                        .await?,
+                    );
+                }
+                let mut roots = vec![];
+                {
+                    let mut seen = CidHashSet::default();
+                    for reader in &readers {
+                        for &root in &reader.header.roots {
+                            if seen.insert(root) {
+                                println!("roots.push {root}");
+                                roots.push(root);
+                            }
+                        }
+                    }
+                }
+
+                let mut stream = Box::pin(
+                    futures::stream::unfold(
+                        MultiCarDedupReader::new(readers),
+                        move |mut reader| async {
+                            reader
+                                .next_block()
+                                .await
+                                .expect("Failed calling `MultiCarDedupReader::next_block`")
+                                .map(|b| (b, reader))
+                        },
+                    )
+                    .map(|out| (out.cid, out.data)),
+                );
+
+                let car_writer = CarHeader::from(roots);
+                let mut output_file =
+                    tokio::io::BufWriter::new(tokio::fs::File::create(output).await?).compat();
+                car_writer
+                    .write_stream_async(&mut output_file, &mut stream)
+                    .await?;
+            }
+        }
+        Ok(())
+    }
+}
+
+struct MultiCarDedupReader<R>
+where
+    R: AsyncRead + Send + Unpin,
+{
+    readers: Vec<CarReader<R>>,
+    index: usize,
+    seen: CidHashSet,
+}
+
+impl<R> MultiCarDedupReader<R>
+where
+    R: AsyncRead + Send + Unpin,
+{
+    fn new(readers: Vec<CarReader<R>>) -> Self {
+        Self {
+            readers,
+            index: 0,
+            seen: Default::default(),
+        }
+    }
+
+    #[async_recursion]
+    async fn next_block(&mut self) -> Result<Option<Block>, fvm_ipld_car::Error> {
+        while let Some(block) = if self.index >= self.readers.len() {
+            Ok(None)
+        } else if let Some(block) = self.readers[self.index].next_block().await? {
+            // Note: Using while loop here because below code causes stack overflow in unit tests
+            // ```rust
+            // if self.seen.insert(block.cid) {
+            //  Ok(Some(block))
+            // } else {
+            //     self.next_block().await
+            // }
+            // ```
+            Ok(Some(block))
+        } else {
+            self.index += 1;
+            self.next_block().await
+        }? {
+            if self.seen.insert(block.cid) {
+                return Ok(Some(block));
+            }
+        }
+
+        Ok(None)
+    }
+}
@@ -9,6 +9,7 @@
 mod archive_cmd;
 mod attach_cmd;
 mod auth_cmd;
+mod car_cmd;
 mod chain_cmd;
 mod config_cmd;
 mod db_cmd;
@@ -37,7 +38,7 @@ use tracing::error;
 
 pub(super) use self::{
     archive_cmd::ArchiveCommands, attach_cmd::AttachCommand, auth_cmd::AuthCommands,
-    chain_cmd::ChainCommands, config_cmd::ConfigCommands, db_cmd::DBCommands,
+    car_cmd::CarCommands, chain_cmd::ChainCommands, config_cmd::ConfigCommands, db_cmd::DBCommands,
     fetch_params_cmd::FetchCommands, mpool_cmd::MpoolCommands, net_cmd::NetCommands,
     send_cmd::SendCommand, shutdown_cmd::ShutdownCommand, snapshot_cmd::SnapshotCommands,
     state_cmd::StateCommands, sync_cmd::SyncCommands, wallet_cmd::WalletCommands,
@@ -118,6 +119,10 @@ pub enum Subcommand {
 
     /// Shutdown Forest
     Shutdown(ShutdownCommand),
+
+    /// Utilities for manipulating CAR files
+    #[command(subcommand)]
+    Car(CarCommands),
 }
 
 /// Pretty-print a JSON-RPC error and exit

@@ -0,0 +1,118 @@
+// Copyright 2019-2023 ChainSafe Systems
+// SPDX-License-Identifier: Apache-2.0, MIT
+
+pub mod common;
+
+use std::path::Path;
+
+use anyhow::*;
+use cid::{
+    multihash::{self, MultihashDigest},
+    Cid,
+};
+use futures::StreamExt;
+use fvm_ipld_car::{CarHeader, CarReader};
+use fvm_ipld_encoding::DAG_CBOR;
+use rand::{rngs::SmallRng, Rng, SeedableRng};
+use tempfile::NamedTempFile;
+use tokio_util::compat::TokioAsyncReadCompatExt;
+
+use crate::common::cli;
+
+#[tokio::test]
+async fn forest_cli_car_concat() -> Result<()> {
+    let a = NamedTempFile::new()?;
+    new_car(1024, a.path()).await?;
+    let b = NamedTempFile::new()?;
+    new_car(2048, b.path()).await?;
+    let output = NamedTempFile::new()?;
+
+    cli()?
+        .arg("car")
+        .arg("concat")
+        .arg(a.path().as_os_str().to_str().unwrap())
+        .arg(b.path().as_os_str().to_str().unwrap())
+        .arg("-o")
+        .arg(output.path().as_os_str().to_str().unwrap())
+        .assert()
+        .success();
+
+    validate_car(output.path()).await?;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn forest_cli_car_concat_same_file() -> Result<()> {
+    let output = NamedTempFile::new()?;
+
+    cli()?
+        .arg("car")
+        .arg("concat")
+        .arg("./test-snapshots/chain4.car")
+        .arg("./test-snapshots/chain4.car")
+        .arg("-o")
+        .arg(output.path().as_os_str().to_str().unwrap())
+        .assert()
+        .success();
+
+    validate_car(output.path()).await?;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn forest_cli_car_concat_same_file_3_times() -> Result<()> {
+    let output = NamedTempFile::new()?;
+
+    cli()?
+        .arg("car")
+        .arg("concat")
+        .arg("./test-snapshots/chain4.car")
+        .arg("./test-snapshots/chain4.car")
+        .arg("./test-snapshots/chain4.car")
+        .arg("-o")
+        .arg(output.path().as_os_str().to_str().unwrap())
+        .assert()
+        .success();
+
+    validate_car(output.path()).await?;
+
+    Ok(())
+}
+
+async fn new_car(size: usize, path: impl AsRef<Path>) -> Result<()> {
+    let rng = SmallRng::seed_from_u64(0xdeadbeef);
+    let (cid, _data) = new_block(&mut rng.clone());
+    let header = CarHeader::from(vec![cid]);
+
+    let mut block_stream = Box::pin(
+        futures::stream::unfold(rng, |mut rng| async { Some((new_block(&mut rng), rng)) })
+            .take(size),
+    );
+
+    let mut writer = tokio::fs::File::create(path).await?.compat();
+    header
+        .write_stream_async(&mut writer, &mut block_stream)
+        .await?;
+
+    Ok(())
+}
+
+fn new_block(rng: &mut SmallRng) -> (Cid, Vec<u8>) {
+    let mut data = [0; 64];
+    rng.fill(&mut data);
+    let cid = Cid::new_v1(DAG_CBOR, multihash::Code::Blake2b256.digest(&data));
+    (cid, data.to_vec())
+}
+
+async fn validate_car(path: impl AsRef<Path>) -> Result<()> {
+    let mut reader = CarReader::new(tokio::fs::File::open(path).await?.compat()).await?;
+    assert!(reader.validate);
+    let mut count = 0;
+    while reader.next_block().await?.is_some() {
+        count += 1;
+    }
+    println!("Result car block count: {count}");
+    Ok(())
+}
@@ -8,6 +8,10 @@ use assert_cmd::Command;
 use tempfile::TempDir;
 
 pub fn cli() -> Result<Command> {
+    Ok(Command::cargo_bin("forest-cli")?)
+}
+
+pub fn daemon() -> Result<Command> {
     Ok(Command::cargo_bin("forest")?)
 }
 

@@ -5,14 +5,14 @@ pub mod common;
 
 use anyhow::Result;
 
-use crate::common::{cli, create_tmp_config, CommonEnv};
+use crate::common::{create_tmp_config, daemon, CommonEnv};
 
 #[test]
 fn importing_bad_snapshot_should_fail() -> Result<()> {
     let (config_file, data_dir) = create_tmp_config()?;
     let temp_file = data_dir.path().join("bad-snapshot.car");
     std::fs::write(&temp_file, "bad-snapshot")?;
-    cli()?
+    daemon()?
         .common_env()
         .arg("--rpc-address")
         .arg("127.0.0.1:0")

@@ -9,13 +9,13 @@ use forest_filecoin::{
     KeyStore, KeyStoreConfig, ENCRYPTED_KEYSTORE_NAME, FOREST_KEYSTORE_PHRASE_ENV, KEYSTORE_NAME,
 };
 
-use crate::common::{cli, create_tmp_config, CommonArgs};
+use crate::common::{create_tmp_config, daemon, CommonArgs};
 
 // https://github.com/ChainSafe/forest/issues/2499
 #[test]
 fn forest_headless_encrypt_keystore_no_passphrase_should_fail() -> Result<()> {
     let (config_file, _data_dir) = create_tmp_config()?;
-    cli()?
+    daemon()?
         .common_args()
         .arg("--config")
         .arg(config_file)
@@ -28,7 +28,7 @@ fn forest_headless_encrypt_keystore_no_passphrase_should_fail() -> Result<()> {
 #[test]
 fn forest_headless_no_encrypt_no_passphrase_should_succeed() -> Result<()> {
     let (config_file, data_dir) = create_tmp_config()?;
-    cli()?
+    daemon()?
         .common_args()
         .arg("--config")
         .arg(config_file)
@@ -45,7 +45,7 @@ fn forest_headless_no_encrypt_no_passphrase_should_succeed() -> Result<()> {
 #[test]
 fn forest_headless_encrypt_keystore_with_passphrase_should_succeed() -> Result<()> {
     let (config_file, data_dir) = create_tmp_config()?;
-    cli()?
+    daemon()?
         .env(FOREST_KEYSTORE_PHRASE_ENV, "hunter2")
         .common_args()
         .arg("--config")
@@ -61,7 +61,7 @@ fn forest_headless_encrypt_keystore_with_passphrase_should_succeed() -> Result<(
 fn should_create_jwt_admin_token() -> Result<()> {
     let (config_file, data_dir) = create_tmp_config()?;
     let token_path = data_dir.path().join("admin-token");
-    cli()?
+    daemon()?
         .common_args()
         .arg("--config")
         .arg(config_file)