From 514167631a6ade9c19436e34ad5ed438f5937cae Mon Sep 17 00:00:00 2001 From: Matthew Hasselfield Date: Fri, 13 Nov 2020 13:11:00 -0500 Subject: [PATCH 1/3] G3VectorInt stores int64_t instead of int32_t This allows G3VectorInt to properly store any valid G3Int, which was not previously the case. The G3Vector serialization for int64_t has been specialized and split (load/save) and is implemented in G3Vector.cxx This passes all core tests, which includes loading v1 vectors. --- core/include/core/G3Vector.h | 34 +++++++++++- core/src/G3Vector.cxx | 102 +++++++++++++++++++++++++++++++++-- 2 files changed, 131 insertions(+), 5 deletions(-) diff --git a/core/include/core/G3Vector.h b/core/include/core/G3Vector.h index 7a4c0f4c..9ca2683d 100644 --- a/core/include/core/G3Vector.h +++ b/core/include/core/G3Vector.h @@ -34,6 +34,9 @@ class G3Vector : public G3FrameObject, public std::vector { cereal::base_class >(this)); } + template void load(A &ar, unsigned v); + template void save(A &ar, unsigned v) const; + std::string Summary() const { if (this->size() < 5) @@ -70,12 +73,41 @@ G3_SERIALIZABLE(y, 1); G3VECTOR_OF(std::complex, G3VectorComplexDouble); G3VECTOR_OF(double, G3VectorDouble); -G3VECTOR_OF(int32_t, G3VectorInt); G3VECTOR_OF(uint8_t, G3VectorUnsignedChar); G3VECTOR_OF(std::string, G3VectorString); G3VECTOR_OF(G3VectorString, G3VectorVectorString); G3VECTOR_OF(G3FrameObjectPtr, G3VectorFrameObject); G3VECTOR_OF(G3Time, G3VectorTime); + +/* G3VectorInt needs a separate implementation in order to support v1 + * (vector) and v2 (vector). So here we delete the + * default ::serialize implementation (which should be ignored anyway + * due to the specialize request below) and then declare ::load/save, + * to be implemented in G3Vector.cxx. */ + +template <> +template +void G3Vector::serialize(A &ar, const unsigned v) = delete; + +template <> +template +void G3Vector::load(A &ar, const unsigned v); + +template <> +template +void G3Vector::save(A &ar, const unsigned v) const; + + +#define G3VECTOR_SPLIT(x, y, v) \ +typedef G3Vector< x > y; \ +namespace cereal { \ + template struct specialize {}; \ +} \ +G3_POINTERS(y); \ +G3_SERIALIZABLE(y, v); + +G3VECTOR_SPLIT(int64_t, G3VectorInt, 2); + #endif diff --git a/core/src/G3Vector.cxx b/core/src/G3Vector.cxx index a248643c..cc497461 100644 --- a/core/src/G3Vector.cxx +++ b/core/src/G3Vector.cxx @@ -4,7 +4,7 @@ #include #include -G3_SERIALIZABLE_CODE(G3VectorInt); +G3_SPLIT_SERIALIZABLE_CODE(G3VectorInt); G3_SERIALIZABLE_CODE(G3VectorDouble); G3_SERIALIZABLE_CODE(G3VectorComplexDouble); G3_SERIALIZABLE_CODE(G3VectorString); @@ -13,6 +13,100 @@ G3_SERIALIZABLE_CODE(G3VectorFrameObject); G3_SERIALIZABLE_CODE(G3VectorUnsignedChar); G3_SERIALIZABLE_CODE(G3VectorTime); +/* Special load/save for int64_t. */ + +static +int bit_count(std::vector const &d) { + // Returns the smallest number N such that all ints in the + // vector could be safely expressed as intN_t. Assumes two's + // complement integers. Return value will be between 1 and + // 64. + uint64_t mask = 0; + for (auto c: d) { + if (c < 0) + mask |= ~c; + else + mask |= c; + } + for (int i=1; i<64; i++) { + if (mask == 0) + return i; + mask >>= 1; + } + return 64; +} + +template +void load_as(A &ar, std::vector &dest) { + std::vector temp; + ar & cereal::make_nvp("vector", temp); + dest.resize(temp.size()); + std::copy(temp.begin(), temp.end(), dest.begin()); +} + +template +void save_as(A &ar, const std::vector &src) { + std::vector temp(src.begin(), src.end()); + ar & cereal::make_nvp("vector", temp); +} + +template <> +template +void G3Vector::load(A &ar, const unsigned v) +{ + ar & cereal::make_nvp("G3FrameObject", + cereal::base_class(this)); + int store_bits = 32; + if (v >= 2) + ar & cereal::make_nvp("store_bits", store_bits); + + switch(store_bits) { + case 64: + ar & cereal::make_nvp("vector", + cereal::base_class >(this)); + break; + case 32: + load_as(ar, *this); + break; + case 16: + load_as(ar, *this); + break; + case 8: + load_as(ar, *this); + break; + } +} + +template <> +template +void G3Vector::save(A &ar, const unsigned v) const +{ + // v == 2 + ar & cereal::make_nvp("G3FrameObject", + cereal::base_class(this)); + // Count the interesting bits, and convert to nearest power of 2. + int sig_bits = bit_count(*this); + int store_bits = 8; + while (store_bits < sig_bits) + store_bits *= 2; + ar & cereal::make_nvp("store_bits", store_bits); + switch(store_bits) { + case 8: + save_as(ar, *this); + break; + case 16: + save_as(ar, *this); + break; + case 32: + save_as(ar, *this); + break; + default: + ar & cereal::make_nvp("vector", + cereal::base_class >(this)); + } +} + + template <> G3VectorDoublePtr container_from_object(boost::python::object v) @@ -45,7 +139,7 @@ static int G3VectorInt_getbuffer(PyObject *obj, Py_buffer *view, int flags) { return pyvector_getbuffer(obj, view, flags, - "i"); + "q"); } static int @@ -85,9 +179,9 @@ PYBINDINGS("core") { vcclass->tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER; #endif - boost::python::object vecint = register_g3vector("G3VectorInt", + boost::python::object vecint = register_g3vector("G3VectorInt", "Array of integers. Treat as a serializable version of " - "numpy.array(dtype=int32). Can be efficiently cast to and from " + "numpy.array(dtype=int64). Can be efficiently cast to and from " "numpy arrays."); // Add buffer protocol interface PyTypeObject *viclass = (PyTypeObject *)vecint.ptr(); From edbb25ab010f873ba67b7bedcd7dd61f1ae65071 Mon Sep 17 00:00:00 2001 From: Matthew Hasselfield Date: Mon, 16 Nov 2020 08:41:14 -0500 Subject: [PATCH 2/3] Tests for G3VectorInt Updates portability.py to include v2, at least for LE. New test "vecint.py" to confirm full range support and compression work. --- core/CMakeLists.txt | 1 + core/tests/portability.py | 2 +- core/tests/portability/test-le-v2.g3 | Bin 0 -> 342 bytes core/tests/vecint.py | 70 +++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 core/tests/portability/test-le-v2.g3 create mode 100644 core/tests/vecint.py diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 9c32a03b..57be1cfd 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -56,6 +56,7 @@ add_spt3g_test(multifileio) add_spt3g_test(splitfileio) add_spt3g_test(compressedfileio) add_spt3g_test(portability) +add_spt3g_test(vecint) add_spt3g_test(ts_bufferprotocol) add_spt3g_test(timestream_slicing) add_spt3g_test(timestream_times) diff --git a/core/tests/portability.py b/core/tests/portability.py index bb622237..fd52c5a1 100755 --- a/core/tests/portability.py +++ b/core/tests/portability.py @@ -28,7 +28,7 @@ # For now, we test files from big-endian (PPC64) and little-endian (amd64) # 64-bit systems. Should include some 32-bit ones. -for test in ['test-be.g3', 'test-le.g3']: +for test in ['test-be.g3', 'test-le.g3', 'test-le-v2.g3']: print(test) testdata = core.G3Reader(os.path.join(testpath, test))(None)[0] diff --git a/core/tests/portability/test-le-v2.g3 b/core/tests/portability/test-le-v2.g3 new file mode 100644 index 0000000000000000000000000000000000000000..8911984eb3c65d559f1c664f1d43440edf720ef8 GIT binary patch literal 342 zcmZQ%WME)m0b)NOW(Hyq@J%d;g|HYHf$|O9P>#DXP{t*{G$|()Bn;9D!c0(UKA5un z%)E5}yx`1=lGN0^vu8j+-vMSgP#B_4!hsE{AuKi7Gq1!L%0sgfC|QzU1e6C^39<@g zB?l103>4G~2YCPl+%n5j)gVlW%UPiucVnmlFgL>_V1@u~u}9Vj_lq&ae%!u>_>aW_ MLd!U?>nXhl0QOxoBLDyZ literal 0 HcmV?d00001 diff --git a/core/tests/vecint.py b/core/tests/vecint.py new file mode 100644 index 00000000..5d8df594 --- /dev/null +++ b/core/tests/vecint.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +from spt3g import core + +import unittest +import os + +test_filename = 'inttest.g3' + +# Type of int needed to store the specified value. +bit_sizes = [ + ( 8, 0), + ( 8, 1), + ( 8, -1), + ( 8, 127), + ( 8, -128), + (16, 128), + (16, -129), + (16, 32767), + (16, -32768), + (32, 32768), + (32, -32769), + (32, 2147483647), + (32, -2147483648), + (64, 2147483648), + (64, 2147483649), + (64, 9223372036854775807), + (64, -9223372036854775808), +] + +class TestVectorInt(unittest.TestCase): + def tearDown(self): + os.remove(test_filename) + + def test_serialize(self): + """Confirm full ranges can be saved and loaded.""" + + w = core.G3Writer(test_filename) + for isize, val in bit_sizes: + f = core.G3Frame() + f['v'] = core.G3VectorInt([val] * 10) + w.Process(f) + del w + + r = core.G3Reader(test_filename) + for isize, val in bit_sizes: + f = r.Process(None)[0] + v_in = list(f['v']) + self.assertTrue(all([_v == val for _v in v_in]), + "Failed to save/load value %i" % val) + del r + + def test_compression(self): + """Confirm that minimum necessary int size is used for serialization.""" + count = 10000 + overhead = 200 + for isize, val in bit_sizes: + w = core.G3Writer(test_filename) + f = core.G3Frame() + f['v'] = core.G3VectorInt([val] * count) + w.Process(f) + del w + on_disk = os.path.getsize(test_filename) + self.assertTrue(abs(on_disk - count * isize / 8) <= overhead, + "Storage for val %i took %.2f bytes/item, " + "too far from %.2f bytes/item" % + (val, on_disk / count, isize / 8)) + +if __name__ == '__main__': + unittest.main() From 84974a00a8faaaf04a121aef2610d9714485e713 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Mon, 16 Nov 2020 14:17:45 -0500 Subject: [PATCH 3/3] Drop explicit use of Process() --- core/tests/vecint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/tests/vecint.py b/core/tests/vecint.py index 5d8df594..51df851f 100644 --- a/core/tests/vecint.py +++ b/core/tests/vecint.py @@ -39,12 +39,12 @@ def test_serialize(self): for isize, val in bit_sizes: f = core.G3Frame() f['v'] = core.G3VectorInt([val] * 10) - w.Process(f) + w(f) del w r = core.G3Reader(test_filename) for isize, val in bit_sizes: - f = r.Process(None)[0] + f = r(None)[0] v_in = list(f['v']) self.assertTrue(all([_v == val for _v in v_in]), "Failed to save/load value %i" % val) @@ -58,7 +58,7 @@ def test_compression(self): w = core.G3Writer(test_filename) f = core.G3Frame() f['v'] = core.G3VectorInt([val] * count) - w.Process(f) + w(f) del w on_disk = os.path.getsize(test_filename) self.assertTrue(abs(on_disk - count * isize / 8) <= overhead,