CMB-S4 · nwhitehorn · Nov 16, 2020 · Nov 13, 2020 · Nov 16, 2020 · Nov 16, 2020
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
@@ -56,6 +56,7 @@ add_spt3g_test(multifileio)
 add_spt3g_test(splitfileio)
 add_spt3g_test(compressedfileio)
 add_spt3g_test(portability)
+add_spt3g_test(vecint)
 add_spt3g_test(ts_bufferprotocol)
 add_spt3g_test(timestream_slicing)
 add_spt3g_test(timestream_times)

diff --git a/core/include/core/G3Vector.h b/core/include/core/G3Vector.h
@@ -34,6 +34,9 @@ class G3Vector : public G3FrameObject, public std::vector<Value> {
 		    cereal::base_class<std::vector<Value> >(this));
 	}
 
+	template <class A> void load(A &ar, unsigned v);
+	template <class A> void save(A &ar, unsigned v) const;
+
 	std::string Summary() const
 	{
 		if (this->size() < 5)
@@ -70,12 +73,41 @@ G3_SERIALIZABLE(y, 1);
 
 G3VECTOR_OF(std::complex<double>, G3VectorComplexDouble);
 G3VECTOR_OF(double, G3VectorDouble);
-G3VECTOR_OF(int32_t, G3VectorInt);
 G3VECTOR_OF(uint8_t, G3VectorUnsignedChar);
 G3VECTOR_OF(std::string, G3VectorString);
 G3VECTOR_OF(G3VectorString, G3VectorVectorString);
 G3VECTOR_OF(G3FrameObjectPtr, G3VectorFrameObject);
 G3VECTOR_OF(G3Time, G3VectorTime);
 
+
+/* G3VectorInt needs a separate implementation in order to support v1
+ * (vector<int32_t>) and v2 (vector<int64_t>).  So here we delete the
+ * default ::serialize implementation (which should be ignored anyway
+ * due to the specialize request below) and then declare ::load/save,
+ * to be implemented in G3Vector.cxx.  */
+
+template <>
+template <class A>
+void G3Vector<int64_t>::serialize(A &ar, const unsigned v) = delete;
+
+template <>
+template <class A>
+void G3Vector<int64_t>::load(A &ar, const unsigned v);
+
+template <>
+template <class A>
+void G3Vector<int64_t>::save(A &ar, const unsigned v) const;
+
+
+#define G3VECTOR_SPLIT(x, y, v) \
+typedef G3Vector< x > y; \
+namespace cereal { \
+	template <class A> struct specialize<A, y, cereal::specialization::member_load_save> {}; \
+} \
+G3_POINTERS(y); \
+G3_SERIALIZABLE(y, v);
+
+G3VECTOR_SPLIT(int64_t, G3VectorInt, 2);
+
 #endif
 
diff --git a/core/src/G3Vector.cxx b/core/src/G3Vector.cxx
@@ -4,7 +4,7 @@
 #include <G3Vector.h>
 #include <complex>
 
-G3_SERIALIZABLE_CODE(G3VectorInt);
+G3_SPLIT_SERIALIZABLE_CODE(G3VectorInt);
 G3_SERIALIZABLE_CODE(G3VectorDouble);
 G3_SERIALIZABLE_CODE(G3VectorComplexDouble);
 G3_SERIALIZABLE_CODE(G3VectorString);
@@ -13,6 +13,100 @@ G3_SERIALIZABLE_CODE(G3VectorFrameObject);
 G3_SERIALIZABLE_CODE(G3VectorUnsignedChar);
 G3_SERIALIZABLE_CODE(G3VectorTime);
 
+/* Special load/save for int64_t. */
+
+static
+int bit_count(std::vector<int64_t> const &d) {
+	// Returns the smallest number N such that all ints in the
+	// vector could be safely expressed as intN_t.  Assumes two's
+	// complement integers.  Return value will be between 1 and
+	// 64.
+	uint64_t mask = 0;
+	for (auto c: d) {
+		if (c < 0)
+			mask |= ~c;
+		else
+			mask |= c;
+	}
+	for (int i=1; i<64; i++) {
+		if (mask == 0)
+			return i;
+		mask >>= 1;
+	}
+	return 64;
+}
+
+template <class A, typename FROM_TYPE, typename TO_TYPE>
+void load_as(A &ar, std::vector<TO_TYPE> &dest) {
+	std::vector<FROM_TYPE> temp;
+	ar & cereal::make_nvp("vector", temp);
+	dest.resize(temp.size());
+	std::copy(temp.begin(), temp.end(), dest.begin());
+}
+
+template <class A, typename FROM_TYPE, typename TO_TYPE>
+void save_as(A &ar, const std::vector<FROM_TYPE> &src) {
+	std::vector<TO_TYPE> temp(src.begin(), src.end());
+	ar & cereal::make_nvp("vector", temp);
+}
+
+template <>
+template <class A>
+void G3Vector<int64_t>::load(A &ar, const unsigned v)
+{
+	ar & cereal::make_nvp("G3FrameObject",
+			      cereal::base_class<G3FrameObject>(this));
+	int store_bits = 32;
+	if (v >= 2)
+		ar & cereal::make_nvp("store_bits", store_bits);
+
+	switch(store_bits) {
+	case 64:
+		ar & cereal::make_nvp("vector",
+				      cereal::base_class<std::vector<int64_t> >(this));
+		break;
+	case 32:
+		load_as<A, int32_t, int64_t>(ar, *this);
+		break;
+	case 16:
+		load_as<A, int16_t, int64_t>(ar, *this);
+		break;
+	case 8:
+		load_as<A, int8_t, int64_t>(ar, *this);
+		break;
+	}
+}
+
+template <>
+template <class A>
+void G3Vector<int64_t>::save(A &ar, const unsigned v) const
+{
+	// v == 2
+	ar & cereal::make_nvp("G3FrameObject",
+			      cereal::base_class<G3FrameObject>(this));
+	// Count the interesting bits, and convert to nearest power of 2.
+	int sig_bits = bit_count(*this);
+	int store_bits = 8;
+	while (store_bits < sig_bits)
+		store_bits *= 2;
+	ar & cereal::make_nvp("store_bits", store_bits);
+	switch(store_bits) {
+	case 8:
+		save_as<A, int64_t, int8_t>(ar, *this);
+		break;
+	case 16:
+		save_as<A, int64_t, int16_t>(ar, *this);
+		break;
+	case 32:
+		save_as<A, int64_t, int32_t>(ar, *this);
+		break;
+	default:
+		ar & cereal::make_nvp("vector",
+				      cereal::base_class<std::vector<int64_t> >(this));
+	}		
+}
+
+
 template <>
 G3VectorDoublePtr
 container_from_object(boost::python::object v)
@@ -45,7 +139,7 @@ static int
 G3VectorInt_getbuffer(PyObject *obj, Py_buffer *view, int flags)
 {
 	return pyvector_getbuffer<G3VectorInt::value_type>(obj, view, flags,
-	    "i");
+	    "q");
 }
 
 static int
@@ -85,9 +179,9 @@ PYBINDINGS("core") {
 	vcclass->tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER;
 #endif
 
-	boost::python::object vecint = register_g3vector<int32_t>("G3VectorInt",
+	boost::python::object vecint = register_g3vector<int64_t>("G3VectorInt",
 	    "Array of integers. Treat as a serializable version of "
-	    "numpy.array(dtype=int32). Can be efficiently cast to and from "
+	    "numpy.array(dtype=int64). Can be efficiently cast to and from "
 	    "numpy arrays.");
 	// Add buffer protocol interface
 	PyTypeObject *viclass = (PyTypeObject *)vecint.ptr();

diff --git a/core/tests/portability.py b/core/tests/portability.py
@@ -28,7 +28,7 @@
 # For now, we test files from big-endian (PPC64) and little-endian (amd64)
 # 64-bit systems. Should include some 32-bit ones.
 
-for test in ['test-be.g3', 'test-le.g3']:
+for test in ['test-be.g3', 'test-le.g3', 'test-le-v2.g3']:
 	print(test)
 	testdata = core.G3Reader(os.path.join(testpath, test))(None)[0]
 

diff --git a/core/tests/portability/test-le-v2.g3 b/core/tests/portability/test-le-v2.g3
diff --git a/core/tests/vecint.py b/core/tests/vecint.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+from spt3g import core
+
+import unittest
+import os
+
+test_filename = 'inttest.g3'
+
+# Type of int needed to store the specified value.
+bit_sizes = [
+    ( 8,  0),
+    ( 8,  1),
+    ( 8, -1),
+    ( 8,  127),
+    ( 8, -128),
+    (16,  128),
+    (16, -129),
+    (16,  32767),
+    (16, -32768),
+    (32,  32768),
+    (32, -32769),
+    (32,  2147483647),
+    (32, -2147483648),
+    (64,  2147483648),
+    (64,  2147483649),
+    (64,  9223372036854775807),
+    (64, -9223372036854775808),
+]
+
+class TestVectorInt(unittest.TestCase):
+    def tearDown(self):
+        os.remove(test_filename)
+
+    def test_serialize(self):
+        """Confirm full ranges can be saved and loaded."""
+
+        w = core.G3Writer(test_filename)
+        for isize, val in bit_sizes:
+            f = core.G3Frame()
+            f['v'] = core.G3VectorInt([val] * 10)
+            w(f)
+        del w
+
+        r = core.G3Reader(test_filename)
+        for isize, val in bit_sizes:
+            f = r(None)[0]
+            v_in = list(f['v'])
+            self.assertTrue(all([_v == val for _v in v_in]),
+                             "Failed to save/load value %i" % val)
+        del r
+
+    def test_compression(self):
+        """Confirm that minimum necessary int size is used for serialization."""
+        count = 10000
+        overhead = 200
+        for isize, val in bit_sizes:
+            w = core.G3Writer(test_filename)
+            f = core.G3Frame()
+            f['v'] = core.G3VectorInt([val] * count)
+            w(f)
+            del w
+            on_disk = os.path.getsize(test_filename)
+            self.assertTrue(abs(on_disk - count * isize / 8) <= overhead,
+                            "Storage for val %i took %.2f bytes/item, "
+                            "too far from %.2f bytes/item" %
+                            (val, on_disk / count, isize / 8))
+
+if __name__ == '__main__':
+    unittest.main()