From 7aa17f60989959f48922590bcfe7a7ac55ef392d Mon Sep 17 00:00:00 2001 From: "C. Weaver" Date: Mon, 17 Apr 2023 16:38:18 -0400 Subject: [PATCH 1/3] Avoid computing MD5 checksums during FLAC compression. This is unnecessary as G3Frame protects all serialized data with a CRC, and removing it greatly decreases the time to serialize compressed waveforms. --- core/src/G3Timestream.cxx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/src/G3Timestream.cxx b/core/src/G3Timestream.cxx index caa08650..4e27260a 100644 --- a/core/src/G3Timestream.cxx +++ b/core/src/G3Timestream.cxx @@ -93,6 +93,13 @@ static void flac_decoder_error_cb(const FLAC__StreamDecoder *decoder, log_fatal("FLAC decoding error (%d)", status); } } + +extern "C"{ + // Provide our own declaration of this function. + // This libFLAC interface is private but stable, and this use is officially sanctioned: + // https://github.com/xiph/flac/commit/3baaf23faa05eca1cfc34737d95131ad0b628d4c + FLAC__bool FLAC__stream_encoder_set_do_md5(FLAC__StreamEncoder *encoder, FLAC__bool value); +} #endif template void G3Timestream::save(A &ar, unsigned v) const @@ -152,6 +159,7 @@ template void G3Timestream::save(A &ar, unsigned v) const // XXX: should assert if high-order 8 bits are not clear FLAC__stream_encoder_set_bits_per_sample(encoder, 24); FLAC__stream_encoder_set_compression_level(encoder, use_flac_); + FLAC__stream_encoder_set_do_md5(encoder, false); FLAC__stream_encoder_init_stream(encoder, flac_encoder_write_cb, NULL, NULL, NULL, (void*)(&outbuf)); FLAC__stream_encoder_process (encoder, chanmap, inbuf.size()); @@ -237,6 +245,7 @@ template void G3Timestream::load(A &ar, unsigned v) callback.outbuf->reserve(callback.nbytes); FLAC__StreamDecoder *decoder = FLAC__stream_decoder_new(); + FLAC__stream_decoder_set_md5_checking(decoder, false); FLAC__stream_decoder_init_stream(decoder, flac_decoder_read_cb, NULL, NULL, NULL, NULL, flac_decoder_write_cb, NULL, flac_decoder_error_cb, From 7f8d8a029e41a058fcf7b724b77033008b5dd6e5 Mon Sep 17 00:00:00 2001 From: "C. Weaver" Date: Mon, 17 Apr 2023 16:49:45 -0400 Subject: [PATCH 2/3] Skip checking integer data for NaNs, as it cannot have any. --- core/src/G3Timestream.cxx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/core/src/G3Timestream.cxx b/core/src/G3Timestream.cxx index 4e27260a..b6853c8e 100644 --- a/core/src/G3Timestream.cxx +++ b/core/src/G3Timestream.cxx @@ -137,11 +137,13 @@ template void G3Timestream::save(A &ar, unsigned v) const // rare case that only some samples are valid, store a // validity mask. std::vector nanbuf(size(), false); - for (size_t i = 0; i < size(); i++) { - if (!std::isfinite((*this)[i])) { - nans++; - nanbuf[i] = true; - inbuf[i] = 0; + if(data_type_==TS_DOUBLE || data_type_==TS_FLOAT){ + for (size_t i = 0; i < size(); i++) { + if (!std::isfinite((*this)[i])) { + nans++; + nanbuf[i] = true; + inbuf[i] = 0; + } } } nanflag = SomeNan; From 53f30bf5bfffcb1df3d3f0e8165205bfa8dd6e42 Mon Sep 17 00:00:00 2001 From: "C. Weaver" Date: Mon, 17 Apr 2023 17:02:21 -0400 Subject: [PATCH 3/3] Specialize type conversion by input type prior to FLAC compression. This avoids pointless double conversion for several of the possible storage types. --- core/src/G3Timestream.cxx | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/core/src/G3Timestream.cxx b/core/src/G3Timestream.cxx index b6853c8e..d4aa6cb4 100644 --- a/core/src/G3Timestream.cxx +++ b/core/src/G3Timestream.cxx @@ -124,9 +124,30 @@ template void G3Timestream::save(A &ar, unsigned v) const // Copy to 24-bit integers inbuf.resize(size()); - for (size_t i = 0; i < size(); i++) - inbuf[i] = ((int32_t((*this)[i]) & 0x00ffffff) << 8) - >> 8; + switch (data_type_) { + case TS_DOUBLE: + for (size_t i = 0; i < size(); i++) + inbuf[i] = ((int32_t(((double *)data_)[i]) & 0x00ffffff) << 8) >> 8; + break; + case TS_FLOAT: + for (size_t i = 0; i < size(); i++) + inbuf[i] = ((int32_t(((float *)data_)[i]) & 0x00ffffff) << 8) >> 8; + break; + case TS_INT32: + { + // Using this rather raw form for the loop can enable automatic + // unrolling and vectorization. + int32_t* in_ptr=(int32_t *)data_; + int32_t* out_ptr=&inbuf[0]; + for(int32_t* end=in_ptr+size(); in_ptr!=end; in_ptr++,out_ptr++) + *out_ptr = ((*in_ptr & 0x00ffffff) << 8) >> 8; + } + break; + case TS_INT64: + for (size_t i = 0; i < size(); i++) + inbuf[i] = ((int32_t(((int64_t *)data_)[i]) & 0x00ffffff) << 8) >> 8; + break; + } chanmap[0] = &inbuf[0]; // Mark bad samples using an out-of-band signal. Since we