From 1b5766edaa0196a9a9f91fc666aa6d152f4fb9d2 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Jan 2024 14:13:33 -0500
Subject: [PATCH 001/137] moved dependencies to requirements subdir

---
 python/tests/requirements/dbr133.txt |  7 +++++++
 python/tests/requirements/dev.txt    |  3 +++
 python/tox.ini                       | 19 ++++++++++---------
 3 files changed, 20 insertions(+), 9 deletions(-)
 create mode 100644 python/tests/requirements/dbr133.txt
 create mode 100644 python/tests/requirements/dev.txt

diff --git a/python/tests/requirements/dbr133.txt b/python/tests/requirements/dbr133.txt
new file mode 100644
index 00000000..633a452c
--- /dev/null
+++ b/python/tests/requirements/dbr133.txt
@@ -0,0 +1,7 @@
+delta-spark==2.4.0
+ipython==8.10.0
+numpy==1.21.5
+pandas==1.4.4
+pyarrow==8.0.0
+pyspark==3.4.1
+scipy==1.9.1
\ No newline at end of file
diff --git a/python/tests/requirements/dev.txt b/python/tests/requirements/dev.txt
new file mode 100644
index 00000000..c8b70429
--- /dev/null
+++ b/python/tests/requirements/dev.txt
@@ -0,0 +1,3 @@
+chispa
+jsonref
+python-dateutil
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index d6af2f91..eb160f2f 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,9 +11,9 @@ envlist =
     build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
-    py37-pyspark300,
-    py38-pyspark{312,321},
-    py39-pyspark{330,332}
+    ; py38-pyspark{312,321},
+    ; py39-pyspark{330,332},
+    py10-dbr133
 skip_missing_interpreters = true
 
 
@@ -24,13 +24,14 @@ wheel_build_env = .pkg
 setenv =
     COVERAGE_FILE = .coverage.{envname}
 deps =
-    pyspark300: pyspark==3.0.0
-    pyspark312: pyspark==3.1.2
-    pyspark321: pyspark==3.2.1
-    pyspark330: pyspark==3.3.0
-    pyspark332: pyspark==3.3.2
+    ; pyspark312: pyspark==3.1.2
+    ; pyspark321: pyspark==3.2.1
+    ; pyspark330: pyspark==3.3.0
+    ; pyspark332: pyspark==3.3.2
+    dbr133: -rtests/requirements/dbr133.txt
+    -rtests/requirements/dev.txt
     coverage>=7,<8
-    -rrequirements.txt
+    ; -rrequirements.txt
 commands =
     coverage --version
     coverage run -m unittest discover -s tests -p '*_tests.py'

From 42516b02b0b7795a037b630c57cf6f15dd29f81e Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Jan 2024 17:49:51 -0500
Subject: [PATCH 002/137] added delta helper func to configure spark session

---
 python/tests/base.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index 7da859c8..e8a4e856 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -5,9 +5,9 @@
 from typing import Union
 
 import jsonref
-from chispa import assert_df_equality
-
 import pyspark.sql.functions as sfn
+from chispa import assert_df_equality
+from delta.pip_utils import configure_spark_with_delta_pip
 from pyspark.sql import SparkSession
 from pyspark.sql.dataframe import DataFrame
 
@@ -28,9 +28,14 @@ class SparkTest(unittest.TestCase):
     def setUpClass(cls) -> None:
         # create and configure PySpark Session
         cls.spark = (
-            SparkSession.builder.appName("unit-tests")
-            .config("spark.jars.packages", "io.delta:delta-core_2.12:1.1.0")
-            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
+            configure_spark_with_delta_pip(
+                SparkSession.builder.appName("unit-tests")
+            )
+            # .config("spark.jars.packages", "io.delta:delta-core_2.12:1.1.0")
+            .config(
+                "spark.sql.extensions",
+                "io.delta.sql.DeltaSparkSessionExtension",
+            )
             .config(
                 "spark.sql.catalog.spark_catalog",
                 "org.apache.spark.sql.delta.catalog.DeltaCatalog",
@@ -70,7 +75,9 @@ def tearDown(self) -> None:
     def get_data_as_sdf(self, name: str, convert_ts_col=True):
         td = self.test_data[name]
         ts_cols = []
-        if convert_ts_col and (td.get("ts_col", None) or td.get("other_ts_cols", [])):
+        if convert_ts_col and (
+            td.get("ts_col", None) or td.get("other_ts_cols", [])
+        ):
             ts_cols = [td["ts_col"]] if "ts_col" in td else []
             ts_cols.extend(td.get("other_ts_cols", []))
         return self.buildTestDF(td["schema"], td["data"], ts_cols)
@@ -124,7 +131,7 @@ def __loadTestData(self, test_case_path: str) -> dict:
         :param test_case_path: string representation of the data path e.g. : "tsdf_tests.BasicTests.test_describe"
         :type test_case_path: str
         """
-        file_name, class_name, func_name = test_case_path.split(".")
+        file_name, class_name, func_name = test_case_path.split(".")[-3:]
 
         # find our test data file
         test_data_file = self.__getTestDataFilePath(file_name)
@@ -137,7 +144,9 @@ def __loadTestData(self, test_case_path: str) -> dict:
             data_metadata_from_json = jsonref.load(f)
             # warn if data not present
             if class_name not in data_metadata_from_json:
-                warnings.warn(f"Could not load test data for {file_name}.{class_name}")
+                warnings.warn(
+                    f"Could not load test data for {file_name}.{class_name}"
+                )
                 return {}
             if func_name not in data_metadata_from_json[class_name]:
                 warnings.warn(

From 5133f9de3b45e40eaf73b22f2c5fc51833c12173 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Jan 2024 18:03:51 -0500
Subject: [PATCH 003/137] cleaned up comments

---
 python/tests/base.py | 2 --
 python/tox.ini       | 8 +-------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index e8a4e856..a4b47070 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -10,7 +10,6 @@
 from delta.pip_utils import configure_spark_with_delta_pip
 from pyspark.sql import SparkSession
 from pyspark.sql.dataframe import DataFrame
-
 from tempo.intervals import IntervalsDF
 from tempo.tsdf import TSDF
 
@@ -31,7 +30,6 @@ def setUpClass(cls) -> None:
             configure_spark_with_delta_pip(
                 SparkSession.builder.appName("unit-tests")
             )
-            # .config("spark.jars.packages", "io.delta:delta-core_2.12:1.1.0")
             .config(
                 "spark.sql.extensions",
                 "io.delta.sql.DeltaSparkSessionExtension",
diff --git a/python/tox.ini b/python/tox.ini
index eb160f2f..007aa90d 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,8 +11,6 @@ envlist =
     build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
-    ; py38-pyspark{312,321},
-    ; py39-pyspark{330,332},
     py10-dbr133
 skip_missing_interpreters = true
 
@@ -24,14 +22,10 @@ wheel_build_env = .pkg
 setenv =
     COVERAGE_FILE = .coverage.{envname}
 deps =
-    ; pyspark312: pyspark==3.1.2
-    ; pyspark321: pyspark==3.2.1
-    ; pyspark330: pyspark==3.3.0
-    ; pyspark332: pyspark==3.3.2
     dbr133: -rtests/requirements/dbr133.txt
     -rtests/requirements/dev.txt
     coverage>=7,<8
-    ; -rrequirements.txt
+
 commands =
     coverage --version
     coverage run -m unittest discover -s tests -p '*_tests.py'

From 002040fd6994e8313a426722ceb781e91ffe2a8b Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Fri, 19 Jan 2024 08:08:13 -0500
Subject: [PATCH 004/137] added DBR 12.2 and 14.2

---
 python/tests/requirements/dbr122.txt | 7 +++++++
 python/tests/requirements/dbr142.txt | 7 +++++++
 python/tox.ini                       | 6 +++++-
 3 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 python/tests/requirements/dbr122.txt
 create mode 100644 python/tests/requirements/dbr142.txt

diff --git a/python/tests/requirements/dbr122.txt b/python/tests/requirements/dbr122.txt
new file mode 100644
index 00000000..d5f44af9
--- /dev/null
+++ b/python/tests/requirements/dbr122.txt
@@ -0,0 +1,7 @@
+delta-spark==2.2.0
+ipython==8.5.0
+numpy==1.21.5
+pandas==1.4.2
+pyarrow==7.0.0
+pyspark==3.3.2
+scipy==1.7.3
\ No newline at end of file
diff --git a/python/tests/requirements/dbr142.txt b/python/tests/requirements/dbr142.txt
new file mode 100644
index 00000000..b4ad90dd
--- /dev/null
+++ b/python/tests/requirements/dbr142.txt
@@ -0,0 +1,7 @@
+delta-spark==3.0.0
+ipython==8.14.0
+numpy==1.23.5
+pandas==1.5.3
+pyarrow==8.0.0
+pyspark==3.5.0
+scipy==1.10.0
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index 007aa90d..72798c61 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,7 +11,9 @@ envlist =
     build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
-    py10-dbr133
+    py39-dbr122
+    py310-dbr133
+    py310-dbr142
 skip_missing_interpreters = true
 
 
@@ -22,7 +24,9 @@ wheel_build_env = .pkg
 setenv =
     COVERAGE_FILE = .coverage.{envname}
 deps =
+    dbr122: -rtests/requirements/dbr122.txt
     dbr133: -rtests/requirements/dbr133.txt
+    dbr142: -rtests/requirements/dbr142.txt
     -rtests/requirements/dev.txt
     coverage>=7,<8
 

From e89df24ee37453df662e87650d0d8a07e42016e3 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Fri, 19 Jan 2024 12:53:13 -0500
Subject: [PATCH 005/137] changed conditional to use isinstance

---
 python/tempo/tsdf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py
index bbec6b78..29e517d2 100644
--- a/python/tempo/tsdf.py
+++ b/python/tempo/tsdf.py
@@ -13,6 +13,7 @@
 from pyspark.sql import SparkSession
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import TimestampType
 from pyspark.sql.window import Window, WindowSpec
 from scipy.fft import fft, fftfreq  # type: ignore
 
@@ -1102,7 +1103,7 @@ def withRangeStats(
             ]
 
         # build window
-        if str(self.df.schema[self.ts_col].dataType) == "TimestampType":
+        if isinstance(self.df.schema[self.ts_col].dataType, TimestampType):
             self.df = self.__add_double_ts()
             prohibited_cols.extend(["double_ts"])
             w = self.__rangeBetweenWindow(

From fdaf6ec1064dcb152d01596e31e58f4cc8865e57 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Fri, 19 Jan 2024 13:04:29 -0500
Subject: [PATCH 006/137] added DBR 11.3 to tox.ini

---
 python/tests/requirements/dbr113.txt | 7 +++++++
 python/tox.ini                       | 2 ++
 2 files changed, 9 insertions(+)
 create mode 100644 python/tests/requirements/dbr113.txt

diff --git a/python/tests/requirements/dbr113.txt b/python/tests/requirements/dbr113.txt
new file mode 100644
index 00000000..a2fe6b88
--- /dev/null
+++ b/python/tests/requirements/dbr113.txt
@@ -0,0 +1,7 @@
+delta-spark==2.1.0
+ipython==7.32.0
+numpy==1.20.3
+pandas==1.3.4
+pyarrow==7.0.0
+pyspark==3.3.0
+scipy==1.7.1
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index 72798c61..e1fb1397 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,6 +11,7 @@ envlist =
     build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
+    py39-dbr113
     py39-dbr122
     py310-dbr133
     py310-dbr142
@@ -24,6 +25,7 @@ wheel_build_env = .pkg
 setenv =
     COVERAGE_FILE = .coverage.{envname}
 deps =
+    dbr113: -rtests/requirements/dbr113.txt
     dbr122: -rtests/requirements/dbr122.txt
     dbr133: -rtests/requirements/dbr133.txt
     dbr142: -rtests/requirements/dbr142.txt

From 4812cafaa055680e96cd5e6d42694f8171d28fae Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Sat, 20 Jan 2024 18:28:27 -0500
Subject: [PATCH 007/137] added DBR 10.4 to tox.ini

---
 python/tests/requirements/dbr104.txt | 7 +++++++
 python/tox.ini                       | 2 ++
 2 files changed, 9 insertions(+)
 create mode 100644 python/tests/requirements/dbr104.txt

diff --git a/python/tests/requirements/dbr104.txt b/python/tests/requirements/dbr104.txt
new file mode 100644
index 00000000..4e2284cf
--- /dev/null
+++ b/python/tests/requirements/dbr104.txt
@@ -0,0 +1,7 @@
+delta-spark==1.1.0
+ipython==7.22.0
+numpy==1.20.1
+pandas==1.2.4
+pyarrow==4.0.0
+pyspark==3.2.1
+scipy==1.6.2
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index e1fb1397..7210cea6 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,6 +11,7 @@ envlist =
     build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
+    py38-dbr104
     py39-dbr113
     py39-dbr122
     py310-dbr133
@@ -25,6 +26,7 @@ wheel_build_env = .pkg
 setenv =
     COVERAGE_FILE = .coverage.{envname}
 deps =
+    dbr104: -rtests/requirements/dbr104.txt
     dbr113: -rtests/requirements/dbr113.txt
     dbr122: -rtests/requirements/dbr122.txt
     dbr133: -rtests/requirements/dbr133.txt

From 9b5b6504736d16131acebba2255c993ed11faa05 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Sat, 20 Jan 2024 20:50:00 -0500
Subject: [PATCH 008/137] set ingore_metadata=True

---
 python/tests/base.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index a4b47070..cdba2845 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -27,9 +27,7 @@ class SparkTest(unittest.TestCase):
     def setUpClass(cls) -> None:
         # create and configure PySpark Session
         cls.spark = (
-            configure_spark_with_delta_pip(
-                SparkSession.builder.appName("unit-tests")
-            )
+            configure_spark_with_delta_pip(SparkSession.builder.appName("unit-tests"))
             .config(
                 "spark.sql.extensions",
                 "io.delta.sql.DeltaSparkSessionExtension",
@@ -73,9 +71,7 @@ def tearDown(self) -> None:
     def get_data_as_sdf(self, name: str, convert_ts_col=True):
         td = self.test_data[name]
         ts_cols = []
-        if convert_ts_col and (
-            td.get("ts_col", None) or td.get("other_ts_cols", [])
-        ):
+        if convert_ts_col and (td.get("ts_col", None) or td.get("other_ts_cols", [])):
             ts_cols = [td["ts_col"]] if "ts_col" in td else []
             ts_cols.extend(td.get("other_ts_cols", []))
         return self.buildTestDF(td["schema"], td["data"], ts_cols)
@@ -142,9 +138,7 @@ def __loadTestData(self, test_case_path: str) -> dict:
             data_metadata_from_json = jsonref.load(f)
             # warn if data not present
             if class_name not in data_metadata_from_json:
-                warnings.warn(
-                    f"Could not load test data for {file_name}.{class_name}"
-                )
+                warnings.warn(f"Could not load test data for {file_name}.{class_name}")
                 return {}
             if func_name not in data_metadata_from_json[class_name]:
                 warnings.warn(
@@ -232,4 +226,5 @@ def assertDataFrameEquality(
             ignore_row_order=ignore_row_order,
             ignore_column_order=ignore_column_order,
             ignore_nullable=ignore_nullable,
+            ignore_metadata=True,
         )

From 2a5c33d408f499d077b435d688295699dd399800 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Tue, 23 Jan 2024 07:44:01 -0500
Subject: [PATCH 009/137] simplified tox.ini and moved doc requirements to docs
 dir

---
 docs/requirements.txt   |  6 ++++++
 python/requirements.txt | 19 -------------------
 python/tox.ini          | 28 +++++++++++++++++-----------
 3 files changed, 23 insertions(+), 30 deletions(-)
 create mode 100644 docs/requirements.txt
 delete mode 100644 python/requirements.txt

diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..7a76c34a
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,6 @@
+sphinx-autobuild==2021.3.14
+sphinx-copybutton==0.5.1
+Sphinx==4.5.0
+sphinx-design==0.2.0
+sphinx-panels==0.6.0
+furo==2022.9.29
\ No newline at end of file
diff --git a/python/requirements.txt b/python/requirements.txt
deleted file mode 100644
index 1a6844a9..00000000
--- a/python/requirements.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-ipython==8.10.0
-numpy==1.24.3
-chispa==0.9.2
-pandas==1.5.2
-pyarrow==12.0.0
-python-dateutil==2.8.2
-pytz==2022.7.1
-scipy==1.10.1
-six==1.16.0
-wheel==0.38.4
-semver==2.13.0
-sphinx-autobuild==2021.3.14
-furo==2022.9.29
-sphinx-copybutton==0.5.1
-Sphinx==4.5.0
-sphinx-design==0.2.0
-sphinx-panels==0.6.0
-jsonref==1.1.0
-python-dateutil==2.8.2
diff --git a/python/tox.ini b/python/tox.ini
index 7210cea6..1779e4dc 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,11 +11,13 @@ envlist =
     build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
-    py38-dbr104
-    py39-dbr113
-    py39-dbr122
-    py310-dbr133
-    py310-dbr142
+    dbr73
+    dbr91
+    dbr104
+    dbr113
+    dbr122
+    dbr133
+    dbr142
 skip_missing_interpreters = true
 
 
@@ -25,12 +27,16 @@ package = wheel
 wheel_build_env = .pkg
 setenv =
     COVERAGE_FILE = .coverage.{envname}
+basepython =    
+    dbr142: py310
+    dbr133: py310
+    dbr122: py39
+    dbr113: py39
+    dbr104: py38
+    dbr91: py38
+    dbr73: py37
 deps =
-    dbr104: -rtests/requirements/dbr104.txt
-    dbr113: -rtests/requirements/dbr113.txt
-    dbr122: -rtests/requirements/dbr122.txt
-    dbr133: -rtests/requirements/dbr133.txt
-    dbr142: -rtests/requirements/dbr142.txt
+    -rtests/requirements/{envname}.txt
     -rtests/requirements/dev.txt
     coverage>=7,<8
 
@@ -66,7 +72,7 @@ deps =
     mypy>=1,<2
     pandas-stubs>=2,<3
     types-pytz>=2023,<2024
-    -rrequirements.txt
+    -rtests/requirements/dbr133.txt
 commands =
     mypy {toxinidir}/tempo
 

From 14d29ec52531c1119cf6456e3cc51ed50a8ccb7c Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Jan 2024 16:09:16 -0500
Subject: [PATCH 010/137] added some TODOs

---
 python/tempo/io.py       | 7 +++++--
 python/tests/io_tests.py | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/tempo/io.py b/python/tempo/io.py
index f3466ef5..3a9e2a43 100644
--- a/python/tempo/io.py
+++ b/python/tempo/io.py
@@ -6,11 +6,10 @@
 from typing import Optional
 
 import pyspark.sql.functions as sfn
+import tempo.tsdf as t_tsdf
 from pyspark.sql import SparkSession
 from pyspark.sql.utils import ParseException
 
-import tempo.tsdf as t_tsdf
-
 logger = logging.getLogger(__name__)
 
 
@@ -31,11 +30,15 @@ def write(
     df = tsdf.df
     ts_col = tsdf.ts_col
     partitionCols = tsdf.partitionCols
+
+    # TODO: this assumption of "event_time" column name is not appropriate
     if optimizationCols:
         optimizationCols = optimizationCols + ["event_time"]
     else:
         optimizationCols = ["event_time"]
 
+    # TODO: improve this logic. We should be checking for optimizationCols, not
+    # DATABRICKS_RUNTIME_VERSION
     useDeltaOpt = os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
 
     view_df = df.withColumn("event_dt", sfn.to_date(sfn.col(ts_col))).withColumn(
diff --git a/python/tests/io_tests.py b/python/tests/io_tests.py
index 44b837e3..de16d300 100644
--- a/python/tests/io_tests.py
+++ b/python/tests/io_tests.py
@@ -59,6 +59,7 @@ def test_write_to_delta_non_dbr_environment_logging(self):
             ],
         )
 
+    # TODO: FIX ME
     @mock.patch.dict(os.environ, {"DATABRICKS_RUNTIME_VERSION": "10.4"})
     def test_write_to_delta_bad_dbr_environment_logging(self):
         """Test useDeltaOpt Exception"""

From 7e5a5de5459c3e3a6017173664af2f6c66e9ee1d Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Jan 2024 16:22:02 -0500
Subject: [PATCH 011/137] added DBR 9.1

---
 python/tests/requirements/dbr91.txt | 7 +++++++
 python/tox.ini                      | 8 +-------
 2 files changed, 8 insertions(+), 7 deletions(-)
 create mode 100644 python/tests/requirements/dbr91.txt

diff --git a/python/tests/requirements/dbr91.txt b/python/tests/requirements/dbr91.txt
new file mode 100644
index 00000000..faf44bb8
--- /dev/null
+++ b/python/tests/requirements/dbr91.txt
@@ -0,0 +1,7 @@
+delta-spark==1.0.0
+ipython==7.22.0
+numpy==1.19.2
+pandas==1.2.4
+pyarrow==4.0.0
+pyspark==3.1.2
+scipy==1.6.2
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index 1779e4dc..f5283081 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,13 +11,7 @@ envlist =
     build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
-    dbr73
-    dbr91
-    dbr104
-    dbr113
-    dbr122
-    dbr133
-    dbr142
+    dbr{91,104,113,122,133,142}
 skip_missing_interpreters = true
 
 

From cf1abe0b89e16461578a080afc4ead365e8bf60d Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 25 Jan 2024 11:21:47 -0500
Subject: [PATCH 012/137] updated CONTRIBUTING.md

---
 CONTRIBUTING.md | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 04bf3428..c0ca74e2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -43,12 +43,9 @@ Run the following command in your terminal to create a virtual environment in th
 tox --devenv .venv -e {environment-name}
 ```
 The `—devenv` flag tells `tox` to create a development environment, and `.venv` is the folder where the virtual environment will be created.
-Pre-defined environments can be found within the `tox.ini` file for different Python versions and their corresponding PySpark version. They include:
-- py37-pyspark300
-- py38-pyspark312
-- py38-pyspark321
-- py39-pyspark330
-- py39-pyspark332
+
+## Environments we test
+The environments we test against are defined within the `tox.ini` file, and the requirements for those environments are stored in `python/tests/requirements`. The makeup of these environments is inspired by the [Databricks Runtime](https://docs.databricks.com/en/release-notes/runtime/index.html#) (hence the naming convention), but it's important to note that developing Databricks is **not** a requirement. We're simply  mimicking some of the different runtime versions because (a) we recognize that much of the user base uses `tempo` on Databricks and (b) it saves development time spent trying to build out test environments with different versions of Python and PySpark from scratch.
 
 ## Run tests locally for one or more environments
 You can run tests locally for one or more environments defined enviornments without setting up a development environment first.

From f3297a94b5333790ea084f9484e867a0b18b0010 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Fri, 26 Jan 2024 19:37:35 -0500
Subject: [PATCH 013/137] fixed test to check for appropriate delta-spark
 version

---
 python/tests/io_tests.py          | 72 ++++++++++++-------------------
 python/tests/requirements/dev.txt |  1 +
 2 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/python/tests/io_tests.py b/python/tests/io_tests.py
index de16d300..101d1f70 100644
--- a/python/tests/io_tests.py
+++ b/python/tests/io_tests.py
@@ -1,10 +1,12 @@
 import logging
-import os
 import unittest
-from unittest import mock
+from importlib.metadata import version
 
+from packaging import version as pkg_version
 from tests.base import SparkTest
 
+DELTA_VERSION = version("delta-spark")
+
 
 class DeltaWriteTest(SparkTest):
     def test_write_to_delta_without_optimization_cols(self):
@@ -37,30 +39,6 @@ def test_write_to_delta_with_optimization_cols(self):
         # should be equal to the expected dataframe
         self.assertEqual(self.spark.table(table_name).count(), 7)
 
-    def test_write_to_delta_non_dbr_environment_logging(self):
-        """Test logging when writing"""
-
-        table_name = "my_table_optimization_col"
-
-        # load test data
-        input_tsdf = self.get_data_as_tsdf("input_data")
-
-        with self.assertLogs(level="WARNING") as warning_captured:
-            # test write to delta
-            input_tsdf.write(self.spark, table_name, ["date"])
-
-        self.assertEqual(len(warning_captured.records), 1)
-        self.assertEqual(
-            warning_captured.output,
-            [
-                "WARNING:tempo.io:"
-                "Delta optimizations attempted on a non-Databricks platform. "
-                "Switch to use Databricks Runtime to get optimization advantages."
-            ],
-        )
-
-    # TODO: FIX ME
-    @mock.patch.dict(os.environ, {"DATABRICKS_RUNTIME_VERSION": "10.4"})
     def test_write_to_delta_bad_dbr_environment_logging(self):
         """Test useDeltaOpt Exception"""
 
@@ -69,25 +47,29 @@ def test_write_to_delta_bad_dbr_environment_logging(self):
         # load test data
         input_tsdf = self.get_data_as_tsdf("input_data")
 
-        with self.assertLogs(level="ERROR") as error_captured:
-            # test write to delta
-            input_tsdf.write(self.spark, table_name, ["date"])
-
-        self.assertEqual(len(error_captured.records), 1)
-        print(error_captured.output)
-        self.assertEqual(
-            error_captured.output,
-            [
-                "ERROR:tempo.io:"
-                "Delta optimizations attempted, but was not successful.\nError: \nmismatched input "
-                "'optimize' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', "
-                "'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', "
-                "'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', "
-                "'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', "
-                "'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)\n\n== SQL ==\noptimize "
-                "my_table_optimization_col_fails zorder by (symbol,date,event_time)\n^^^\n"
-            ],
-        )
+        if pkg_version.parse(DELTA_VERSION) < pkg_version.parse("2.0.0"):
+
+            with self.assertLogs(level="ERROR") as error_captured:
+                # should fail to run optimize
+                input_tsdf.write(self.spark, table_name, ["date"])
+            
+            self.assertEqual(len(error_captured.records), 1)
+            print(error_captured.output)
+            self.assertEqual(
+                error_captured.output,
+                [
+                    "ERROR:tempo.io:"
+                    "Delta optimizations attempted, but was not successful.\nError: \nmismatched input "
+                    "'optimize' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', "
+                    "'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', "
+                    "'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', "
+                    "'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', "
+                    "'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)\n\n== SQL ==\noptimize "
+                    "my_table_optimization_col_fails zorder by (symbol,date,event_time)\n^^^\n"
+                ],
+            )
+        else:
+            pass
 
 
 # MAIN
diff --git a/python/tests/requirements/dev.txt b/python/tests/requirements/dev.txt
index c8b70429..c8090248 100644
--- a/python/tests/requirements/dev.txt
+++ b/python/tests/requirements/dev.txt
@@ -1,3 +1,4 @@
 chispa
 jsonref
+packaging
 python-dateutil
\ No newline at end of file

From 16d29abdd12e8f7794ad2bebec3ba6bbb0935e9d Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Fri, 26 Jan 2024 19:38:23 -0500
Subject: [PATCH 014/137] removed useDeltaOpt conditional in io.write

---
 python/tempo/io.py | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/python/tempo/io.py b/python/tempo/io.py
index 3a9e2a43..cd43c32a 100644
--- a/python/tempo/io.py
+++ b/python/tempo/io.py
@@ -37,10 +37,6 @@ def write(
     else:
         optimizationCols = ["event_time"]
 
-    # TODO: improve this logic. We should be checking for optimizationCols, not
-    # DATABRICKS_RUNTIME_VERSION
-    useDeltaOpt = os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
-
     view_df = df.withColumn("event_dt", sfn.to_date(sfn.col(ts_col))).withColumn(
         "event_time",
         sfn.translate(sfn.split(sfn.col(ts_col).cast("string"), " ")[1], ":", "").cast(
@@ -55,21 +51,15 @@ def write(
         tabName
     )
 
-    if useDeltaOpt:
-        try:
-            spark.sql(
-                "optimize {} zorder by {}".format(
-                    tabName, "(" + ",".join(partitionCols + optimizationCols) + ")"
-                )
-            )
-        except ParseException as e:
-            logger.error(
-                "Delta optimizations attempted, but was not successful.\nError: {}".format(
-                    e
-                )
+    try:
+        spark.sql(
+            "optimize {} zorder by {}".format(
+                tabName, "(" + ",".join(partitionCols + optimizationCols) + ")"
             )
-    else:
-        logger.warning(
-            "Delta optimizations attempted on a non-Databricks platform. "
-            "Switch to use Databricks Runtime to get optimization advantages."
         )
+    except ParseException as e:
+        logger.error(
+            "Delta optimizations attempted, but was not successful.\nError: {}".format(
+                e
+            )
+        )
\ No newline at end of file

From fc0b3572f410fc0388623ac77b4bfdb99f087c6a Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Fri, 26 Jan 2024 19:42:44 -0500
Subject: [PATCH 015/137] formatting

---
 python/tempo/io.py         |  2 +-
 python/tempo/utils.py      | 24 ++++++++----------------
 python/tests/io_tests.py   |  2 +-
 python/tests/tsdf_tests.py |  3 +--
 4 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/python/tempo/io.py b/python/tempo/io.py
index cd43c32a..7b4d842d 100644
--- a/python/tempo/io.py
+++ b/python/tempo/io.py
@@ -62,4 +62,4 @@ def write(
             "Delta optimizations attempted, but was not successful.\nError: {}".format(
                 e
             )
-        )
\ No newline at end of file
+        )
diff --git a/python/tempo/utils.py b/python/tempo/utils.py
index d539da1b..5260e3c4 100644
--- a/python/tempo/utils.py
+++ b/python/tempo/utils.py
@@ -139,13 +139,11 @@ def calculate_time_horizon(
 
 
 @overload
-def display_html(df: pandasDataFrame) -> None:
-    ...
+def display_html(df: pandasDataFrame) -> None: ...
 
 
 @overload
-def display_html(df: DataFrame) -> None:
-    ...
+def display_html(df: DataFrame) -> None: ...
 
 
 def display_html(df: Union[pandasDataFrame, DataFrame]) -> None:
@@ -192,16 +190,13 @@ def get_display_df(tsdf: t_tsdf.TSDF, k: int) -> DataFrame:
     # to know more refer: /databricks/python_shell/scripts/db_ipykernel_launcher.py
 
     @overload
-    def display_improvised(obj: t_tsdf.TSDF) -> None:
-        ...
+    def display_improvised(obj: t_tsdf.TSDF) -> None: ...
 
     @overload
-    def display_improvised(obj: pandasDataFrame) -> None:
-        ...
+    def display_improvised(obj: pandasDataFrame) -> None: ...
 
     @overload
-    def display_improvised(obj: DataFrame) -> None:
-        ...
+    def display_improvised(obj: DataFrame) -> None: ...
 
     def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> None:
         if isinstance(obj, t_tsdf.TSDF):
@@ -214,16 +209,13 @@ def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> N
 elif ENV_CAN_RENDER_HTML:
 
     @overload
-    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None: ...
 
     @overload
-    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None: ...
 
     @overload
-    def display_html_improvised(obj: Optional[DataFrame]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[DataFrame]) -> None: ...
 
     def display_html_improvised(
         obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]
diff --git a/python/tests/io_tests.py b/python/tests/io_tests.py
index 101d1f70..7a138218 100644
--- a/python/tests/io_tests.py
+++ b/python/tests/io_tests.py
@@ -52,7 +52,7 @@ def test_write_to_delta_bad_dbr_environment_logging(self):
             with self.assertLogs(level="ERROR") as error_captured:
                 # should fail to run optimize
                 input_tsdf.write(self.spark, table_name, ["date"])
-            
+
             self.assertEqual(len(error_captured.records), 1)
             print(error_captured.output)
             self.assertEqual(
diff --git a/python/tests/tsdf_tests.py b/python/tests/tsdf_tests.py
index c36263e4..33af3155 100644
--- a/python/tests/tsdf_tests.py
+++ b/python/tests/tsdf_tests.py
@@ -876,8 +876,7 @@ def test_withPartitionCols(self):
         self.assertEqual(init_tsdf.partitionCols, [])
         self.assertEqual(actual_tsdf.partitionCols, ["symbol"])
 
-    def test_tsdf_interpolate(self):
-        ...
+    def test_tsdf_interpolate(self): ...
 
 
 class FourierTransformTest(SparkTest):

From e11fbb2b0445a9fb659a1305d579b5985207c7c2 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Fri, 26 Jan 2024 19:45:24 -0500
Subject: [PATCH 016/137] linting

---
 python/tempo/io.py    |  1 -
 python/tempo/utils.py | 32 +++++++++++++++++++-------------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/python/tempo/io.py b/python/tempo/io.py
index 7b4d842d..fee46bb6 100644
--- a/python/tempo/io.py
+++ b/python/tempo/io.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import logging
-import os
 from collections import deque
 from typing import Optional
 
diff --git a/python/tempo/utils.py b/python/tempo/utils.py
index 5260e3c4..4a10ebfb 100644
--- a/python/tempo/utils.py
+++ b/python/tempo/utils.py
@@ -5,17 +5,15 @@
 import warnings
 from typing import List, Optional, Union, overload
 
+import pyspark.sql.functions as sfn
+import tempo.resample as t_resample
+import tempo.tsdf as t_tsdf
 from IPython import get_ipython
 from IPython.core.display import HTML
 from IPython.display import display as ipydisplay
 from pandas.core.frame import DataFrame as pandasDataFrame
-
-import pyspark.sql.functions as sfn
 from pyspark.sql.dataframe import DataFrame
 
-import tempo.resample as t_resample
-import tempo.tsdf as t_tsdf
-
 logger = logging.getLogger(__name__)
 IS_DATABRICKS = "DB_HOME" in os.environ.keys()
 
@@ -139,11 +137,13 @@ def calculate_time_horizon(
 
 
 @overload
-def display_html(df: pandasDataFrame) -> None: ...
+def display_html(df: pandasDataFrame) -> None:
+    ...
 
 
 @overload
-def display_html(df: DataFrame) -> None: ...
+def display_html(df: DataFrame) -> None:
+    ...
 
 
 def display_html(df: Union[pandasDataFrame, DataFrame]) -> None:
@@ -190,13 +190,16 @@ def get_display_df(tsdf: t_tsdf.TSDF, k: int) -> DataFrame:
     # to know more refer: /databricks/python_shell/scripts/db_ipykernel_launcher.py
 
     @overload
-    def display_improvised(obj: t_tsdf.TSDF) -> None: ...
+    def display_improvised(obj: t_tsdf.TSDF) -> None:
+        ...
 
     @overload
-    def display_improvised(obj: pandasDataFrame) -> None: ...
+    def display_improvised(obj: pandasDataFrame) -> None:
+        ...
 
     @overload
-    def display_improvised(obj: DataFrame) -> None: ...
+    def display_improvised(obj: DataFrame) -> None:
+        ...
 
     def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> None:
         if isinstance(obj, t_tsdf.TSDF):
@@ -209,13 +212,16 @@ def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> N
 elif ENV_CAN_RENDER_HTML:
 
     @overload
-    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None: ...
+    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None:
+        ...
 
     @overload
-    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None: ...
+    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None:
+        ...
 
     @overload
-    def display_html_improvised(obj: Optional[DataFrame]) -> None: ...
+    def display_html_improvised(obj: Optional[DataFrame]) -> None:
+        ...
 
     def display_html_improvised(
         obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]

From c557fc1adae927156bfe66b55d8255b9a68ac79b Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 21 Feb 2024 20:15:51 -0500
Subject: [PATCH 017/137] removed event_time assumption and made zorder
 contingent on optimizationCols

---
 python/tempo/io.py | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/python/tempo/io.py b/python/tempo/io.py
index fee46bb6..22fe4d8e 100644
--- a/python/tempo/io.py
+++ b/python/tempo/io.py
@@ -30,12 +30,6 @@ def write(
     ts_col = tsdf.ts_col
     partitionCols = tsdf.partitionCols
 
-    # TODO: this assumption of "event_time" column name is not appropriate
-    if optimizationCols:
-        optimizationCols = optimizationCols + ["event_time"]
-    else:
-        optimizationCols = ["event_time"]
-
     view_df = df.withColumn("event_dt", sfn.to_date(sfn.col(ts_col))).withColumn(
         "event_time",
         sfn.translate(sfn.split(sfn.col(ts_col).cast("string"), " ")[1], ":", "").cast(
@@ -50,15 +44,17 @@ def write(
         tabName
     )
 
-    try:
-        spark.sql(
-            "optimize {} zorder by {}".format(
-                tabName, "(" + ",".join(partitionCols + optimizationCols) + ")"
+    if optimizationCols:
+        try:
+            spark.sql(
+                "optimize {} zorder by {}".format(
+                    tabName,
+                    "(" + ",".join(partitionCols + optimizationCols + [ts_col]) + ")",
+                )
             )
-        )
-    except ParseException as e:
-        logger.error(
-            "Delta optimizations attempted, but was not successful.\nError: {}".format(
-                e
+        except ParseException as e:
+            logger.error(
+                "Delta optimizations attempted, but was not successful.\nError: {}".format(
+                    e
+                )
             )
-        )

From e316127ac6718d03c748a9bb12444ceb83741405 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Mon, 4 Mar 2024 15:50:27 -0600
Subject: [PATCH 018/137] moved requirements and removed DBR 7 testing

---
 python/{tests => }/requirements/dbr104.txt | 0
 python/{tests => }/requirements/dbr113.txt | 0
 python/{tests => }/requirements/dbr122.txt | 0
 python/{tests => }/requirements/dbr133.txt | 0
 python/{tests => }/requirements/dbr142.txt | 0
 python/{tests => }/requirements/dbr91.txt  | 0
 python/{tests => }/requirements/dev.txt    | 0
 python/tox.ini                             | 5 ++---
 8 files changed, 2 insertions(+), 3 deletions(-)
 rename python/{tests => }/requirements/dbr104.txt (100%)
 rename python/{tests => }/requirements/dbr113.txt (100%)
 rename python/{tests => }/requirements/dbr122.txt (100%)
 rename python/{tests => }/requirements/dbr133.txt (100%)
 rename python/{tests => }/requirements/dbr142.txt (100%)
 rename python/{tests => }/requirements/dbr91.txt (100%)
 rename python/{tests => }/requirements/dev.txt (100%)

diff --git a/python/tests/requirements/dbr104.txt b/python/requirements/dbr104.txt
similarity index 100%
rename from python/tests/requirements/dbr104.txt
rename to python/requirements/dbr104.txt
diff --git a/python/tests/requirements/dbr113.txt b/python/requirements/dbr113.txt
similarity index 100%
rename from python/tests/requirements/dbr113.txt
rename to python/requirements/dbr113.txt
diff --git a/python/tests/requirements/dbr122.txt b/python/requirements/dbr122.txt
similarity index 100%
rename from python/tests/requirements/dbr122.txt
rename to python/requirements/dbr122.txt
diff --git a/python/tests/requirements/dbr133.txt b/python/requirements/dbr133.txt
similarity index 100%
rename from python/tests/requirements/dbr133.txt
rename to python/requirements/dbr133.txt
diff --git a/python/tests/requirements/dbr142.txt b/python/requirements/dbr142.txt
similarity index 100%
rename from python/tests/requirements/dbr142.txt
rename to python/requirements/dbr142.txt
diff --git a/python/tests/requirements/dbr91.txt b/python/requirements/dbr91.txt
similarity index 100%
rename from python/tests/requirements/dbr91.txt
rename to python/requirements/dbr91.txt
diff --git a/python/tests/requirements/dev.txt b/python/requirements/dev.txt
similarity index 100%
rename from python/tests/requirements/dev.txt
rename to python/requirements/dev.txt
diff --git a/python/tox.ini b/python/tox.ini
index f5283081..6a43ba43 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -28,10 +28,9 @@ basepython =
     dbr113: py39
     dbr104: py38
     dbr91: py38
-    dbr73: py37
 deps =
-    -rtests/requirements/{envname}.txt
-    -rtests/requirements/dev.txt
+    -rrequirements/{envname}.txt
+    -rrequirements/dev.txt
     coverage>=7,<8
 
 commands =

From d7fb52cfe70a3e3658a0e16189016ab2c01a863b Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Tue, 16 Apr 2024 15:12:01 -0500
Subject: [PATCH 019/137] fixed tox.ini and added pyenv to test.yml

---
 .github/workflows/test.yml | 89 +++++++++++++++-----------------------
 python/tox.ini             | 11 +----
 2 files changed, 36 insertions(+), 64 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 27d2d8a3..15319943 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,69 +1,50 @@
 name: build
 
 on:
-  push:
-    branches: [ '*' ]
   pull_request:
     branches: [ 'master' ]
 
 jobs:
-  black-linting:
+  tox:
     runs-on: ubuntu-latest
-    name: Black Lint
-    steps:
-      - uses: actions/checkout@v2
-      - uses: psf/black@stable
-        with:
-          options: "--check --verbose"
-          src: "./python"
-          version: "23.3.0"
-  flake8-lint:
-    runs-on: ubuntu-latest
-    name: Flake8 Lint
-    steps:
-      - name: Check out source repository
-        uses: actions/checkout@v2
-      - name: Set up Python environment
-        uses: actions/setup-python@v2
-        with:
-          python-version: "3.9"
-      - name: flake8 Lint
-        uses: py-actions/flake8@v2
-        with:
-          args: "--config python/.flake8"
-          path: "./python"
-  type-checks:
-    runs-on: ubuntu-latest
-    name: Type Checks
-    steps:
-      - uses: actions/checkout@v2
-      - uses: actions/setup-python@v2
-        with:
-          python-version: "3.9"
-      - name: Type check
-        working-directory: ./python
-        run: |
-          pip install tox
-          tox -e type-check
-  test:
-    name: Build and Test Module
-    runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest]
-    env:
-      OS: ${{ matrix.os }}
-      PYTHON: '3.9'
+        env:
+          - dbr142
+          - dbr133
+          - dbr122
+          - dbr113
+          - dbr104
+          - dbr91
+
     steps:
-    - uses: actions/checkout@master
-    - name: Setup Python
-      uses: actions/setup-python@master
-      with:
-        python-version: 3.9
-    - name: Set Spark env
+    - uses: actions/checkout@v3
+
+    - name: Install pyenv
+      run: |
+        git clone https://github.com/pyenv/pyenv.git ~/.pyenv
+        echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
+        echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
+        echo 'eval "$(pyenv init -)"' >> ~/.bashrc
+        source ~/.bashrc
+        
+    - name: Install Python versions
+      run: |  
+        pyenv install 3.8 3.9 3.10
+
+    - name: Verify Python installations
       run: |
-        export SPARK_LOCAL_IP=127.0.0.1
-        export SPARK_SUBMIT_OPTS="--illegal-access=permit -Dio.netty.tryReflectionSetAccessible=true"
+        python3.8 --version
+        python3.9 --version  
+        python3.10 --version
+
+    - name: Install tox
+      run: pip install tox
+
+    - name: Run tox
+      working-directory: ./python
+      run: tox -e ${{ matrix.env }}
+
     - name: Generate coverage report
       working-directory: ./python
       run: |
diff --git a/python/tox.ini b/python/tox.ini
index 6a43ba43..053ecd09 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -32,19 +32,10 @@ deps =
     -rrequirements/{envname}.txt
     -rrequirements/dev.txt
     coverage>=7,<8
-
 commands =
     coverage --version
     coverage run -m unittest discover -s tests -p '*_tests.py'
 
-[testenv:format]
-description = run formatters
-skipsdist = true
-skip_install = true
-deps =
-    black
-commands =
-    black {toxinidir}
 
 [testenv:lint]
 description = run linters
@@ -65,7 +56,7 @@ deps =
     mypy>=1,<2
     pandas-stubs>=2,<3
     types-pytz>=2023,<2024
-    -rtests/requirements/dbr133.txt
+    -rrequirements/dbr133.txt
 commands =
     mypy {toxinidir}/tempo
 

From 506d088a68e1be46c50636c47886c7a0bbfbf351 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Tue, 16 Apr 2024 15:20:09 -0500
Subject: [PATCH 020/137] manual trigger

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 15319943..bd3d1730 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,6 +3,7 @@ name: build
 on:
   pull_request:
     branches: [ 'master' ]
+  workflow_dispatch:
 
 jobs:
   tox:

From d465f3acef676094707371ddd484e404af1af2ec Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:31:41 -0500
Subject: [PATCH 021/137] readded push trigger

---
 .github/workflows/test.yml | 4 +++-
 python/tox.ini             | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bd3d1730..c2a1f1af 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,7 +3,9 @@ name: build
 on:
   pull_request:
     branches: [ 'master' ]
-  workflow_dispatch:
+  # workflow_dispatch:
+  push:
+    branches: ['*']
 
 jobs:
   tox:
diff --git a/python/tox.ini b/python/tox.ini
index 053ecd09..bee9ea36 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -56,7 +56,7 @@ deps =
     mypy>=1,<2
     pandas-stubs>=2,<3
     types-pytz>=2023,<2024
-    -rrequirements/dbr133.txt
+    -rrequirements/dbr142.txt
 commands =
     mypy {toxinidir}/tempo
 

From 941855f6beb24a7665a4b2e7e599bfef0a86fa67 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:39:13 -0500
Subject: [PATCH 022/137] split into two jobs

---
 .github/workflows/test.yml | 43 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c2a1f1af..f30a62f7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,7 +8,28 @@ on:
     branches: ['*']
 
 jobs:
+  install_python:
+    runs-on: ubuntu latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install pyenv
+        run: |
+          git clone https://github.com/pyenv/pyenv.git ~/.pyenv
+          echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
+          echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
+          echo 'eval "$(pyenv init -)"' >> ~/.bashrc
+          source ~/.bashrc     
+      - name: Install Python versions
+        run: |  
+          pyenv install 3.8 3.9 3.10
+      - name: Verify Python installations
+        run: |
+          python3.8 --version
+          python3.9 --version  
+          python3.10 --version
+
   tox:
+    needs: install_python
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -19,35 +40,13 @@ jobs:
           - dbr113
           - dbr104
           - dbr91
-
     steps:
     - uses: actions/checkout@v3
-
-    - name: Install pyenv
-      run: |
-        git clone https://github.com/pyenv/pyenv.git ~/.pyenv
-        echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
-        echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
-        echo 'eval "$(pyenv init -)"' >> ~/.bashrc
-        source ~/.bashrc
-        
-    - name: Install Python versions
-      run: |  
-        pyenv install 3.8 3.9 3.10
-
-    - name: Verify Python installations
-      run: |
-        python3.8 --version
-        python3.9 --version  
-        python3.10 --version
-
     - name: Install tox
       run: pip install tox
-
     - name: Run tox
       working-directory: ./python
       run: tox -e ${{ matrix.env }}
-
     - name: Generate coverage report
       working-directory: ./python
       run: |

From 51399c3e5944fcb89aa7147ac032d70b90395499 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:40:29 -0500
Subject: [PATCH 023/137] typo

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f30a62f7..faa25650 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,7 +9,7 @@ on:
 
 jobs:
   install_python:
-    runs-on: ubuntu latest
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
       - name: Install pyenv

From c0a908ea7ab1530b61ee99c77035cbdbe495bb4e Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:44:52 -0500
Subject: [PATCH 024/137] modified pyenv install cmd

---
 .github/workflows/test.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index faa25650..5af1562d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,11 +14,10 @@ jobs:
       - uses: actions/checkout@v3
       - name: Install pyenv
         run: |
-          git clone https://github.com/pyenv/pyenv.git ~/.pyenv
+          curl https://pyenv.run | bash
           echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
-          echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
+          echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
           echo 'eval "$(pyenv init -)"' >> ~/.bashrc
-          source ~/.bashrc     
       - name: Install Python versions
         run: |  
           pyenv install 3.8 3.9 3.10

From 4a465cc5cc84a10a089a396200ac16cb03ae6dd2 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:46:57 -0500
Subject: [PATCH 025/137] restart shell cmd

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5af1562d..4e922dcf 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,6 +18,7 @@ jobs:
           echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
           echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
           echo 'eval "$(pyenv init -)"' >> ~/.bashrc
+          exec "$SHELL"
       - name: Install Python versions
         run: |  
           pyenv install 3.8 3.9 3.10

From fedabd3fddf8380aa8c9c21842fcb4ab30c6a572 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:49:24 -0500
Subject: [PATCH 026/137] added cmds to .profile

---
 .github/workflows/test.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4e922dcf..6e496a98 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,6 +18,9 @@ jobs:
           echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
           echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
           echo 'eval "$(pyenv init -)"' >> ~/.bashrc
+          echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.profile
+          echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.profile
+          echo 'eval "$(pyenv init -)"' >> ~/.profile
           exec "$SHELL"
       - name: Install Python versions
         run: |  

From e75201529442240f2a80655c6f5adf8c2f6953a9 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:50:17 -0500
Subject: [PATCH 027/137] .bash_profile

---
 .github/workflows/test.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6e496a98..bf56a6bf 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -21,6 +21,9 @@ jobs:
           echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.profile
           echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.profile
           echo 'eval "$(pyenv init -)"' >> ~/.profile
+          echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bash_profile
+          echo '[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bash_profile
+          echo 'eval "$(pyenv init -)"' >> ~/.bash_profile
           exec "$SHELL"
       - name: Install Python versions
         run: |  

From e1f606f63dce8cd0a4d85f1981662650b2f6611e Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:52:51 -0500
Subject: [PATCH 028/137] combined steps

---
 .github/workflows/test.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bf56a6bf..d285efdf 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -25,8 +25,6 @@ jobs:
           echo '[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bash_profile
           echo 'eval "$(pyenv init -)"' >> ~/.bash_profile
           exec "$SHELL"
-      - name: Install Python versions
-        run: |  
           pyenv install 3.8 3.9 3.10
       - name: Verify Python installations
         run: |

From 33d8b021ddae93cd0e6e24285e34e217671f113f Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:54:41 -0500
Subject: [PATCH 029/137] added print statement

---
 .github/workflows/test.yml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d285efdf..151d41f5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -26,11 +26,12 @@ jobs:
           echo 'eval "$(pyenv init -)"' >> ~/.bash_profile
           exec "$SHELL"
           pyenv install 3.8 3.9 3.10
-      - name: Verify Python installations
-        run: |
-          python3.8 --version
-          python3.9 --version  
-          python3.10 --version
+          echo "installation complete"
+      # - name: Verify Python installations
+      #   run: |
+      #     python3.8 --version
+      #     python3.9 --version  
+      #     python3.10 --version
 
   tox:
     needs: install_python

From 7865d376424708f58d49f6ab09b7c07026396026 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 20:56:28 -0500
Subject: [PATCH 030/137] removed shell restart

---
 .github/workflows/test.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 151d41f5..55abe0a2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -24,7 +24,6 @@ jobs:
           echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bash_profile
           echo '[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bash_profile
           echo 'eval "$(pyenv init -)"' >> ~/.bash_profile
-          exec "$SHELL"
           pyenv install 3.8 3.9 3.10
           echo "installation complete"
       # - name: Verify Python installations

From 545065ff299b95821331a4582925697ddb47e817 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 21:03:35 -0500
Subject: [PATCH 031/137] pulled pyenv action from mlflow

---
 .github/workflows/test.yml | 45 ++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 55abe0a2..bf7fbd50 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,29 +8,32 @@ on:
     branches: ['*']
 
 jobs:
-  install_python:
+  install_python_with_pyenv:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - name: Install pyenv
-        run: |
-          curl https://pyenv.run | bash
-          echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc
-          echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc
-          echo 'eval "$(pyenv init -)"' >> ~/.bashrc
-          echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.profile
-          echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.profile
-          echo 'eval "$(pyenv init -)"' >> ~/.profile
-          echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bash_profile
-          echo '[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bash_profile
-          echo 'eval "$(pyenv init -)"' >> ~/.bash_profile
-          pyenv install 3.8 3.9 3.10
-          echo "installation complete"
-      # - name: Verify Python installations
-      #   run: |
-      #     python3.8 --version
-      #     python3.9 --version  
-      #     python3.10 --version
+    - name: Install python build tools
+      shell: bash
+      # Ref: https://github.com/pyenv/pyenv/wiki#suggested-build-environment
+      run: |
+        sudo apt-get update -y
+        sudo apt-get install -y make build-essential libssl-dev zlib1g-dev \
+        libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \
+        libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev ffmpeg
+    - name: Install pyenv
+      shell: bash
+      run: |
+        git clone https://github.com/pyenv/pyenv.git "$HOME/.pyenv"
+    - name: Setup environment variables
+      shell: bash
+      run: |
+        PYENV_ROOT="$HOME/.pyenv"
+        PYENV_BIN="$PYENV_ROOT/bin"
+        echo "$PYENV_BIN" >> $GITHUB_PATH
+        echo "PYENV_ROOT=$PYENV_ROOT" >> $GITHUB_ENV
+    - name: Check pyenv version
+      shell: bash
+      run: |
+        pyenv --version
 
   tox:
     needs: install_python

From 10ff339d98570ebb70a6ca7b01d25351740c80c4 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 21:04:23 -0500
Subject: [PATCH 032/137] renamed job

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bf7fbd50..f4bbee91 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -36,7 +36,7 @@ jobs:
         pyenv --version
 
   tox:
-    needs: install_python
+    needs: install_python_with_pyenv
     runs-on: ubuntu-latest
     strategy:
       matrix:

From 15d0c028c6fdd043aadf1c8c39e991aea460b443 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 21:06:15 -0500
Subject: [PATCH 033/137] forgot installation cmd

---
 .github/workflows/test.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f4bbee91..c87c6664 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -34,7 +34,11 @@ jobs:
       shell: bash
       run: |
         pyenv --version
-
+    - name: Install python versions
+      shell: bash
+      run: |
+        pyenv install 3.8 3.9 3.10
+        
   tox:
     needs: install_python_with_pyenv
     runs-on: ubuntu-latest

From fe89957900158ecf56f429ca5f78590612f7204b Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 21:21:17 -0500
Subject: [PATCH 034/137] moved python installation into matrix

---
 .github/workflows/test.yml | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c87c6664..c2ccd71c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,8 +8,17 @@ on:
     branches: ['*']
 
 jobs:
-  install_python_with_pyenv:
+  tox:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        env:
+          - dbr142
+          - dbr133
+          - dbr122
+          - dbr113
+          - dbr104
+          - dbr91
     steps:
     - name: Install python build tools
       shell: bash
@@ -38,20 +47,6 @@ jobs:
       shell: bash
       run: |
         pyenv install 3.8 3.9 3.10
-        
-  tox:
-    needs: install_python_with_pyenv
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        env:
-          - dbr142
-          - dbr133
-          - dbr122
-          - dbr113
-          - dbr104
-          - dbr91
-    steps:
     - uses: actions/checkout@v3
     - name: Install tox
       run: pip install tox

From b513b859cf67ab02cabc7e6ecf282747d223b7ff Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:08:43 -0500
Subject: [PATCH 035/137] tox type checking now works

---
 python/tox.ini | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index bee9ea36..e4b0eae3 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -55,10 +55,10 @@ skip_install = true
 deps =
     mypy>=1,<2
     pandas-stubs>=2,<3
-    types-pytz>=2023,<2024
-    -rrequirements/dbr142.txt
+    numpy
+    types-openpyxl
 commands =
-    mypy {toxinidir}/tempo
+    mypy --install-types {toxinidir}/tempo
 
 [testenv:build-dist]
 description = build distribution

From de56c1485e21596e9532c6dbcd61bf5020df3a1c Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:09:08 -0500
Subject: [PATCH 036/137] ignore imports for mypy

---
 python/tempo/tsdf.py  |  4 ++--
 python/tempo/utils.py | 30 +++++++++++-------------------
 2 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py
index 29e517d2..2cfa64fb 100644
--- a/python/tempo/tsdf.py
+++ b/python/tempo/tsdf.py
@@ -8,8 +8,8 @@
 import numpy as np
 import pandas as pd
 import pyspark.sql.functions as sfn
-from IPython.core.display import HTML
-from IPython.display import display as ipydisplay
+from IPython.core.display import HTML  # type: ignore
+from IPython.display import display as ipydisplay  # type: ignore
 from pyspark.sql import SparkSession
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
diff --git a/python/tempo/utils.py b/python/tempo/utils.py
index 4a10ebfb..fbedcca6 100644
--- a/python/tempo/utils.py
+++ b/python/tempo/utils.py
@@ -8,9 +8,9 @@
 import pyspark.sql.functions as sfn
 import tempo.resample as t_resample
 import tempo.tsdf as t_tsdf
-from IPython import get_ipython
-from IPython.core.display import HTML
-from IPython.display import display as ipydisplay
+from IPython import get_ipython  # type: ignore
+from IPython.core.display import HTML  # type: ignore
+from IPython.display import display as ipydisplay  # type: ignore
 from pandas.core.frame import DataFrame as pandasDataFrame
 from pyspark.sql.dataframe import DataFrame
 
@@ -137,13 +137,11 @@ def calculate_time_horizon(
 
 
 @overload
-def display_html(df: pandasDataFrame) -> None:
-    ...
+def display_html(df: pandasDataFrame) -> None: ...
 
 
 @overload
-def display_html(df: DataFrame) -> None:
-    ...
+def display_html(df: DataFrame) -> None: ...
 
 
 def display_html(df: Union[pandasDataFrame, DataFrame]) -> None:
@@ -190,16 +188,13 @@ def get_display_df(tsdf: t_tsdf.TSDF, k: int) -> DataFrame:
     # to know more refer: /databricks/python_shell/scripts/db_ipykernel_launcher.py
 
     @overload
-    def display_improvised(obj: t_tsdf.TSDF) -> None:
-        ...
+    def display_improvised(obj: t_tsdf.TSDF) -> None: ...
 
     @overload
-    def display_improvised(obj: pandasDataFrame) -> None:
-        ...
+    def display_improvised(obj: pandasDataFrame) -> None: ...
 
     @overload
-    def display_improvised(obj: DataFrame) -> None:
-        ...
+    def display_improvised(obj: DataFrame) -> None: ...
 
     def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> None:
         if isinstance(obj, t_tsdf.TSDF):
@@ -212,16 +207,13 @@ def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> N
 elif ENV_CAN_RENDER_HTML:
 
     @overload
-    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None: ...
 
     @overload
-    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None: ...
 
     @overload
-    def display_html_improvised(obj: Optional[DataFrame]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[DataFrame]) -> None: ...
 
     def display_html_improvised(
         obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]

From 5eb5ac75032b27a82f10d47db6594045d58921d5 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:09:20 -0500
Subject: [PATCH 037/137] added pyenv local cmd

---
 .github/workflows/test.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c2ccd71c..4c0f2ff0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -52,7 +52,9 @@ jobs:
       run: pip install tox
     - name: Run tox
       working-directory: ./python
-      run: tox -e ${{ matrix.env }}
+      run: |
+        pyenv local 3.8 3.9 3.10
+        tox -e ${{ matrix.env }}
     - name: Generate coverage report
       working-directory: ./python
       run: |

From fab134332a5d1730801dddb551dd0c5b72f1b9b4 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:33:33 -0500
Subject: [PATCH 038/137] trying gabrielfalcao/pyenv-action@v18

---
 .github/workflows/test.yml | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4c0f2ff0..7403c06d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -20,33 +20,11 @@ jobs:
           - dbr104
           - dbr91
     steps:
-    - name: Install python build tools
-      shell: bash
-      # Ref: https://github.com/pyenv/pyenv/wiki#suggested-build-environment
-      run: |
-        sudo apt-get update -y
-        sudo apt-get install -y make build-essential libssl-dev zlib1g-dev \
-        libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \
-        libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev ffmpeg
-    - name: Install pyenv
-      shell: bash
-      run: |
-        git clone https://github.com/pyenv/pyenv.git "$HOME/.pyenv"
-    - name: Setup environment variables
-      shell: bash
-      run: |
-        PYENV_ROOT="$HOME/.pyenv"
-        PYENV_BIN="$PYENV_ROOT/bin"
-        echo "$PYENV_BIN" >> $GITHUB_PATH
-        echo "PYENV_ROOT=$PYENV_ROOT" >> $GITHUB_ENV
-    - name: Check pyenv version
-      shell: bash
-      run: |
-        pyenv --version
-    - name: Install python versions
-      shell: bash
-      run: |
-        pyenv install 3.8 3.9 3.10
+    - name: setup pyenv
+      uses: "gabrielfalcao/pyenv-action@v18"
+      with:
+        default: 3.9
+        versions: 3.8, 3.10
     - uses: actions/checkout@v3
     - name: Install tox
       run: pip install tox

From 2358319eba055c6e0b8c99f8d7381e6712dcbabe Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:39:26 -0500
Subject: [PATCH 039/137] removed default

---
 .github/workflows/test.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7403c06d..ee4b5900 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -23,8 +23,7 @@ jobs:
     - name: setup pyenv
       uses: "gabrielfalcao/pyenv-action@v18"
       with:
-        default: 3.9
-        versions: 3.8, 3.10
+        versions: 3.8, 3.9, 3.10
     - uses: actions/checkout@v3
     - name: Install tox
       run: pip install tox

From 35d47bd3dbd9180f3e9be381e04840e150165476 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:44:16 -0500
Subject: [PATCH 040/137] trying setup-python action

---
 .github/workflows/test.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ee4b5900..1cc81ed3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -20,10 +20,9 @@ jobs:
           - dbr104
           - dbr91
     steps:
-    - name: setup pyenv
-      uses: "gabrielfalcao/pyenv-action@v18"
+    - uses: actions/setup-python@v4
       with:
-        versions: 3.8, 3.9, 3.10
+        python-version: ['3.8', '3.9', '3.10'] 
     - uses: actions/checkout@v3
     - name: Install tox
       run: pip install tox

From 9eab483f1bf724483bf8850e2287c04ab934672f Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:47:03 -0500
Subject: [PATCH 041/137] changed sequence to strings

---
 .github/workflows/test.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1cc81ed3..5ec1c483 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,7 +22,10 @@ jobs:
     steps:
     - uses: actions/setup-python@v4
       with:
-        python-version: ['3.8', '3.9', '3.10'] 
+        python-version: |
+          3.8
+          3.9
+          3.10
     - uses: actions/checkout@v3
     - name: Install tox
       run: pip install tox

From 30d44369fd9bb178ea4723cc15faf14175d59c09 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 22:48:13 -0500
Subject: [PATCH 042/137] removed pyenv cmd

---
 .github/workflows/test.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5ec1c483..94a5036e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -32,7 +32,6 @@ jobs:
     - name: Run tox
       working-directory: ./python
       run: |
-        pyenv local 3.8 3.9 3.10
         tox -e ${{ matrix.env }}
     - name: Generate coverage report
       working-directory: ./python

From 5e2554ac2899277353965cc9376badc583c31cb5 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 23:19:45 -0500
Subject: [PATCH 043/137] only trying two env

---
 .github/workflows/test.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 94a5036e..d8c14f4a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,10 +15,10 @@ jobs:
         env:
           - dbr142
           - dbr133
-          - dbr122
-          - dbr113
-          - dbr104
-          - dbr91
+          # - dbr122
+          # - dbr113
+          # - dbr104
+          # - dbr91
     steps:
     - uses: actions/setup-python@v4
       with:

From f29e5b7fd9da5fbfb6551c6fc0ebb1c288a4541f Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 17 Apr 2024 23:31:09 -0500
Subject: [PATCH 044/137] tox-gh-actions plugin

---
 .github/workflows/test.yml | 29 +++++++++++------------------
 python/tox.ini             |  5 +++++
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d8c14f4a..758078be 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -12,27 +12,20 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        env:
-          - dbr142
-          - dbr133
-          # - dbr122
-          # - dbr113
-          # - dbr104
-          # - dbr91
+        python-version: ['3.8', '3.9', '3.10']
     steps:
-    - uses: actions/setup-python@v4
-      with:
-        python-version: |
-          3.8
-          3.9
-          3.10
     - uses: actions/checkout@v3
-    - name: Install tox
-      run: pip install tox
-    - name: Run tox
-      working-directory: ./python
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
       run: |
-        tox -e ${{ matrix.env }}
+        python -m pip install --upgrade pip
+        python -m pip install tox tox-gh-actions
+    - name: Test with tox
+      working-directory: ./python
+      run: tox
     - name: Generate coverage report
       working-directory: ./python
       run: |
diff --git a/python/tox.ini b/python/tox.ini
index e4b0eae3..da62e4ac 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -14,6 +14,11 @@ envlist =
     dbr{91,104,113,122,133,142}
 skip_missing_interpreters = true
 
+[gh-actions]
+python =
+    3.8: dbr91, dbr104
+    3.9: dbr113, dbr122
+    3.10: dbr133, dbr142
 
 [testenv]
 description = run the tests under {envname}

From 124f93ac68b2d4a435362168f37c04223836e5dd Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 08:03:54 -0500
Subject: [PATCH 045/137] fetch tags true

---
 .github/workflows/test.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 758078be..0cedd629 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,7 +14,9 @@ jobs:
       matrix:
         python-version: ['3.8', '3.9', '3.10']
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
+      with:
+        fetch-tags: true
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:

From 36f633cf6ad3e1200199881b9898ef7df716bf0a Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 08:16:06 -0500
Subject: [PATCH 046/137] testing git

---
 .github/workflows/test.yml | 48 +++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0cedd629..a1cfbf05 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -17,26 +17,30 @@ jobs:
     - uses: actions/checkout@v4
       with:
         fetch-tags: true
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install tox tox-gh-actions
-    - name: Test with tox
-      working-directory: ./python
-      run: tox
-    - name: Generate coverage report
-      working-directory: ./python
+    - name: test
       run: |
-        python -I -m pip install 'coverage<8,>=7' pyspark==3.2.1 -r requirements.txt
-        coverage run -m unittest discover -s tests -p '*_tests.py'
-        coverage combine
-        coverage xml
-    - name: Publish test coverage
-      uses: codecov/codecov-action@v3
-      with:
-          fail_ci_if_error: true
-          files: ./python/coverage.xml
+        ls
+        git status
+    # - name: Set up Python ${{ matrix.python-version }}
+    #   uses: actions/setup-python@v4
+    #   with:
+    #     python-version: ${{ matrix.python-version }}
+    # - name: Install dependencies
+    #   run: |
+    #     python -m pip install --upgrade pip
+    #     python -m pip install tox tox-gh-actions
+    # - name: Test with tox
+    #   working-directory: ./python
+    #   run: tox
+    # - name: Generate coverage report
+    #   working-directory: ./python
+    #   run: |
+    #     python -I -m pip install 'coverage<8,>=7' pyspark==3.2.1 -r requirements.txt
+    #     coverage run -m unittest discover -s tests -p '*_tests.py'
+    #     coverage combine
+    #     coverage xml
+    # - name: Publish test coverage
+    #   uses: codecov/codecov-action@v3
+    #   with:
+    #       fail_ci_if_error: true
+    #       files: ./python/coverage.xml

From acba106f9dc5c337a810566ed25b4d182d553486 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 08:18:09 -0500
Subject: [PATCH 047/137] testing git describe

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a1cfbf05..08963f61 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -20,7 +20,7 @@ jobs:
     - name: test
       run: |
         ls
-        git status
+        git describe --abbrev=0 --tags
     # - name: Set up Python ${{ matrix.python-version }}
     #   uses: actions/setup-python@v4
     #   with:

From d5e60cbcf379619d78949ed28a66f430f15e37fc Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 08:29:42 -0500
Subject: [PATCH 048/137] fetch-depth = 0

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 08963f61..3e4bb268 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,6 +16,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
       with:
+        fetch-depth: 0
         fetch-tags: true
     - name: test
       run: |

From 13efc504f57c62d66e2bd4db80160d79654539bb Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 08:31:17 -0500
Subject: [PATCH 049/137] uncomment

---
 .github/workflows/test.yml | 48 +++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 3e4bb268..1209f0b2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,30 +18,26 @@ jobs:
       with:
         fetch-depth: 0
         fetch-tags: true
-    - name: test
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install tox tox-gh-actions
+    - name: Test with tox
+      working-directory: ./python
+      run: tox
+    - name: Generate coverage report
+      working-directory: ./python
       run: |
-        ls
-        git describe --abbrev=0 --tags
-    # - name: Set up Python ${{ matrix.python-version }}
-    #   uses: actions/setup-python@v4
-    #   with:
-    #     python-version: ${{ matrix.python-version }}
-    # - name: Install dependencies
-    #   run: |
-    #     python -m pip install --upgrade pip
-    #     python -m pip install tox tox-gh-actions
-    # - name: Test with tox
-    #   working-directory: ./python
-    #   run: tox
-    # - name: Generate coverage report
-    #   working-directory: ./python
-    #   run: |
-    #     python -I -m pip install 'coverage<8,>=7' pyspark==3.2.1 -r requirements.txt
-    #     coverage run -m unittest discover -s tests -p '*_tests.py'
-    #     coverage combine
-    #     coverage xml
-    # - name: Publish test coverage
-    #   uses: codecov/codecov-action@v3
-    #   with:
-    #       fail_ci_if_error: true
-    #       files: ./python/coverage.xml
+        python -I -m pip install 'coverage<8,>=7' pyspark==3.2.1 -r requirements.txt
+        coverage run -m unittest discover -s tests -p '*_tests.py'
+        coverage combine
+        coverage xml
+    - name: Publish test coverage
+      uses: codecov/codecov-action@v3
+      with:
+          fail_ci_if_error: true
+          files: ./python/coverage.xml

From 8ceb750153b3b77f488bde98d45b63b9002211e3 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 08:42:19 -0500
Subject: [PATCH 050/137] removed unneccessary tox envs

---
 python/tox.ini | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index da62e4ac..01c9bddd 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -5,10 +5,8 @@ requires =
     wheel>=0.38,<1
 isolated_build = true
 envlist =
-    format
     lint
     type-check
-    build-dist
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
     dbr{91,104,113,122,133,142}

From ed756bb87551001efd99b6021c0ffd728019b908 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 08:58:51 -0500
Subject: [PATCH 051/137] removed dupe codecov step

---
 .github/workflows/test.yml | 13 +++----------
 python/tox.ini             |  1 -
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1209f0b2..ea40abdc 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,4 +1,4 @@
-name: build
+name: Test
 
 on:
   pull_request:
@@ -8,7 +8,7 @@ on:
     branches: ['*']
 
 jobs:
-  tox:
+  test:
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -26,16 +26,9 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install tox tox-gh-actions
-    - name: Test with tox
+    - name: Execute tox envs
       working-directory: ./python
       run: tox
-    - name: Generate coverage report
-      working-directory: ./python
-      run: |
-        python -I -m pip install 'coverage<8,>=7' pyspark==3.2.1 -r requirements.txt
-        coverage run -m unittest discover -s tests -p '*_tests.py'
-        coverage combine
-        coverage xml
     - name: Publish test coverage
       uses: codecov/codecov-action@v3
       with:
diff --git a/python/tox.ini b/python/tox.ini
index 01c9bddd..f6208d73 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -39,7 +39,6 @@ commands =
     coverage --version
     coverage run -m unittest discover -s tests -p '*_tests.py'
 
-
 [testenv:lint]
 description = run linters
 skipsdist = true

From 4b30e4e9761c2030885c03230bf3656b3dbf2b7c Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 09:14:26 -0500
Subject: [PATCH 052/137] fixed coverage cmds in tox.ini

---
 python/tox.ini | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index f6208d73..014a0624 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -10,6 +10,8 @@ envlist =
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
     dbr{91,104,113,122,133,142}
+    coverage-init
+    coverage-report
 skip_missing_interpreters = true
 
 [gh-actions]
@@ -70,7 +72,7 @@ deps =
 commands =
     python -m build --sdist --wheel {posargs: {toxinidir}}
 
-[testenv:cov-init]
+[testenv:coverage-init]
 setenv =
     COVERAGE_FILE = .coverage
 commands =

From 74352ea517958adab40f1e2a86ded264a0910ec5 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 09:24:57 -0500
Subject: [PATCH 053/137] removed coverage-init

---
 python/tox.ini | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index 014a0624..59f09479 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -10,7 +10,6 @@ envlist =
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
     dbr{91,104,113,122,133,142}
-    coverage-init
     coverage-report
 skip_missing_interpreters = true
 
@@ -72,12 +71,6 @@ deps =
 commands =
     python -m build --sdist --wheel {posargs: {toxinidir}}
 
-[testenv:coverage-init]
-setenv =
-    COVERAGE_FILE = .coverage
-commands =
-    coverage erase
-
 [testenv:coverage-report]
 description = combine coverage data and generate reports
 deps = coverage>=7,<8
@@ -86,6 +79,7 @@ skip_install = true
 setenv =
     COVERAGE_FILE = .coverage
 commands =
+    coverage erase
     coverage --version
     coverage combine
     coverage report -m

From f1479e735cd4a0f3df0dfcb79a62b17edd780392 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 10:00:54 -0500
Subject: [PATCH 054/137] moved erase cmd

---
 python/tox.ini | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index 59f09479..4084530a 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -37,7 +37,7 @@ deps =
     -rrequirements/dev.txt
     coverage>=7,<8
 commands =
-    coverage --version
+    coverage erase
     coverage run -m unittest discover -s tests -p '*_tests.py'
 
 [testenv:lint]
@@ -79,8 +79,6 @@ skip_install = true
 setenv =
     COVERAGE_FILE = .coverage
 commands =
-    coverage erase
-    coverage --version
     coverage combine
     coverage report -m
     coverage xml

From 14c30304ce3d04cc797b5e2e87e2f005fb20b076 Mon Sep 17 00:00:00 2001
From: Lorin <lorin@databricks.com>
Date: Thu, 18 Apr 2024 10:08:35 -0600
Subject: [PATCH 055/137] PR template

---
 .github/pull_request_template.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .github/pull_request_template.md

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 00000000..504a1aeb
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,29 @@
+## Changes
+<!-- Summary of your changes that are easy to understand. Add screenshots when necessary -->
+
+### Linked issues
+<!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword -->
+
+Resolves #..
+
+### Functionality 
+
+- [ ] added relevant user documentation
+- [ ] added a new Class method
+- [ ] modified existing Class method: `...`
+- [ ] added a new function
+- [ ] modified existing function: `...`
+- [ ] added a new test
+- [ ] modified existing test: `...`
+- [ ] added a new example
+- [ ] modified existing example: `...`
+- [ ] added a new utility
+- [ ] modified existing utility: `...`
+
+### Tests
+<!-- How is this tested? Please see the checklist below and also describe any other relevant tests -->
+
+- [ ] manually tested
+- [ ] added unit tests
+- [ ] added integration tests
+- [ ] verified on staging environment (screenshot attached)
\ No newline at end of file

From eeaea58c6805ddb68cf258eed324ad155032b486 Mon Sep 17 00:00:00 2001
From: Lorin <lorin@databricks.com>
Date: Thu, 18 Apr 2024 10:37:26 -0600
Subject: [PATCH 056/137] issue templates

---
 .github/ISSUE_TEMPLATE/bug.yml     | 66 ++++++++++++++++++++++++++++++
 .github/ISSUE_TEMPLATE/config.yml  |  9 ++++
 .github/ISSUE_TEMPLATE/feature.yml | 33 +++++++++++++++
 3 files changed, 108 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug.yml
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml
 create mode 100644 .github/ISSUE_TEMPLATE/feature.yml

diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml
new file mode 100644
index 00000000..4978fafd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug.yml
@@ -0,0 +1,66 @@
+# See https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms
+# and https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema
+name: Bug Report
+description: Something is not working with Tempo
+title: "[BUG]: "
+labels: ["bug", "needs-triage"]
+projects: ["databrickslabs/12"]
+body:
+  - type: checkboxes
+    attributes:
+      label: Is there an existing issue for this?
+      description: Please search to see if an issue already exists for the bug you encountered.
+      options:
+      - label: I have searched the existing issues
+        required: true
+  - type: textarea
+    attributes:
+      label: Current Behavior
+      description: |
+        A concise description of what you're experiencing.
+        **Do not paste links to attachments with logs and/or images, as all issues will attachments will get deleted.**
+        Use the `Relevant log output` field to paste redacted log output without personal identifying information (PII).
+        You can Ctrl/Cmd+V the screenshot, which would appear as a rendered image if it doesn't contain any PII.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Expected Behavior
+      description: A concise description of what you expected to happen.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Steps To Reproduce
+      description: Steps to reproduce the behavior.
+      placeholder: |
+        1. In this environment...
+        1. With this config...
+        1. Run '...'
+        1. See error...
+    validations:
+      required: false
+  - type: dropdown
+    id: cloud
+    attributes:
+      label: Cloud
+      description: What cloud are you using?
+      options:
+        - AWS
+        - Azure
+        - GCP
+    validations:
+      required: true
+  - type: textarea
+    id: version
+    attributes:
+      label: Version
+      description: What version of our software are you running?
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant log output
+      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
+      render: shell
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..41af3259
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,9 @@
+blank_issues_enabled: false
+contact_links:
+  - name: General Databricks questions
+    url: https://help.databricks.com/
+    about: Issues related to Databricks and not related to UCX
+
+  - name: UCX Documentation
+    url: https://databrickslabs.github.io/tempo/
+    about: Documentation about Tempo
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/feature.yml b/.github/ISSUE_TEMPLATE/feature.yml
new file mode 100644
index 00000000..7dcc0600
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature.yml
@@ -0,0 +1,33 @@
+# See https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms
+# and https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema
+name: Feature Request
+description: Something new needs to happen with Tempo
+title: "[FEATURE]: "
+labels: ["enhancement", "needs-triage"]
+projects: ["databrickslabs/13"]
+body:
+  - type: checkboxes
+    attributes:
+      label: Is there an existing issue for this?
+      description: Please search to see if an issue already exists for the feature request you're willing to submit
+      options:
+      - label: I have searched the existing issues
+        required: true
+  - type: textarea
+    attributes:
+      label: Problem statement
+      description: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Proposed Solution
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Additional Context
+      description: Add any other context, references or screenshots about the feature request here.
+    validations:
+      required: false

From 47870bb7a5763108888e34d35174efb83b5d35c1 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 11:03:01 -0500
Subject: [PATCH 057/137] dbr 14.3 support

---
 python/requirements/{dbr142.txt => dbr143.txt} | 2 +-
 python/tox.ini                                 | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename python/requirements/{dbr142.txt => dbr143.txt} (82%)

diff --git a/python/requirements/dbr142.txt b/python/requirements/dbr143.txt
similarity index 82%
rename from python/requirements/dbr142.txt
rename to python/requirements/dbr143.txt
index b4ad90dd..19c4342e 100644
--- a/python/requirements/dbr142.txt
+++ b/python/requirements/dbr143.txt
@@ -1,4 +1,4 @@
-delta-spark==3.0.0
+delta-spark==3.1.0
 ipython==8.14.0
 numpy==1.23.5
 pandas==1.5.3
diff --git a/python/tox.ini b/python/tox.ini
index 4084530a..cf95f623 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -9,7 +9,7 @@ envlist =
     type-check
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
-    dbr{91,104,113,122,133,142}
+    dbr{91,104,113,122,133,143}
     coverage-report
 skip_missing_interpreters = true
 
@@ -17,7 +17,7 @@ skip_missing_interpreters = true
 python =
     3.8: dbr91, dbr104
     3.9: dbr113, dbr122
-    3.10: dbr133, dbr142
+    3.10: dbr133, dbr143
 
 [testenv]
 description = run the tests under {envname}
@@ -26,7 +26,7 @@ wheel_build_env = .pkg
 setenv =
     COVERAGE_FILE = .coverage.{envname}
 basepython =    
-    dbr142: py310
+    dbr143: py310
     dbr133: py310
     dbr122: py39
     dbr113: py39

From 3c7f1883976c63cc95e4cf2f9f1e8c56dc955a76 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 11:53:48 -0500
Subject: [PATCH 058/137] removed lint and type-check from default envlist

---
 python/tox.ini | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index cf95f623..edee0a19 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -5,8 +5,6 @@ requires =
     wheel>=0.38,<1
 isolated_build = true
 envlist =
-    lint
-    type-check
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
     dbr{91,104,113,122,133,143}

From 39e49c45e95ef442a96a729a3b183a8280dbe12e Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 11:59:35 -0500
Subject: [PATCH 059/137] made reusable ci action

---
 .github/actions/ci.yml     | 56 ++++++++++++++++++++++++++++++++++++++
 .github/workflows/test.yml | 30 ++------------------
 2 files changed, 59 insertions(+), 27 deletions(-)
 create mode 100644 .github/actions/ci.yml

diff --git a/.github/actions/ci.yml b/.github/actions/ci.yml
new file mode 100644
index 00000000..e5c1d84e
--- /dev/null
+++ b/.github/actions/ci.yml
@@ -0,0 +1,56 @@
+name: CI
+
+on:
+  workflow_dispatch:
+  workflow_call:  
+  
+jobs:
+  lint-and-check:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        fetch-tags: true
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: 3.10
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install tox tox-gh-actions
+    - name: Execute tox envs
+      working-directory: ./python
+      run: tox lint
+    - name: Execute tox envs
+      working-directory: ./python
+      run: tox type-check
+
+  test:
+    needs: ci
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.9', '3.10']
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        fetch-tags: true
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install tox tox-gh-actions
+    - name: Execute tox envs
+      working-directory: ./python
+      run: tox
+    - name: Publish test coverage
+      uses: codecov/codecov-action@v3
+      with:
+          fail_ci_if_error: true
+          files: ./python/coverage.xml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ea40abdc..e72e95ed 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,34 +3,10 @@ name: Test
 on:
   pull_request:
     branches: [ 'master' ]
-  # workflow_dispatch:
   push:
     branches: ['*']
 
+
 jobs:
-  test:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.8', '3.9', '3.10']
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        fetch-tags: true
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install tox tox-gh-actions
-    - name: Execute tox envs
-      working-directory: ./python
-      run: tox
-    - name: Publish test coverage
-      uses: codecov/codecov-action@v3
-      with:
-          fail_ci_if_error: true
-          files: ./python/coverage.xml
+  ci:
+    uses: ./.github/actions/ci.yml

From c7a509a8ed880034168a1ab7710780d9391ac01b Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 12:02:11 -0500
Subject: [PATCH 060/137] forgot version ref

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e72e95ed..e3ae3285 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,4 +9,4 @@ on:
 
 jobs:
   ci:
-    uses: ./.github/actions/ci.yml
+    uses: ./.github/actions/ci.yml@main

From 4939ac582b6cb4bfcec79072eacf707eac3c16c6 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 12:03:32 -0500
Subject: [PATCH 061/137] moved to workflows subdir

---
 .github/{actions => workflows}/ci.yml | 0
 .github/workflows/test.yml            | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename .github/{actions => workflows}/ci.yml (100%)

diff --git a/.github/actions/ci.yml b/.github/workflows/ci.yml
similarity index 100%
rename from .github/actions/ci.yml
rename to .github/workflows/ci.yml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e3ae3285..c3177ed6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,4 +9,4 @@ on:
 
 jobs:
   ci:
-    uses: ./.github/actions/ci.yml@main
+    uses: ./.github/workflows/ci.yml@main

From 12ef690917856c24d0417f9bcad7d19dbe661d95 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 12:04:05 -0500
Subject: [PATCH 062/137] removed version ref

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c3177ed6..4a6abe2b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,4 +9,4 @@ on:
 
 jobs:
   ci:
-    uses: ./.github/workflows/ci.yml@main
+    uses: ./.github/workflows/ci.yml

From 2eafd0e163970c7f22b5b44494ebeccd8f1d38ee Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 12:04:46 -0500
Subject: [PATCH 063/137] fixed job name mismatch

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e5c1d84e..d8deee70 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
       run: tox type-check
 
   test:
-    needs: ci
+    needs: lint-and-check
     runs-on: ubuntu-latest
     strategy:
       matrix:

From 9eb5fb04d10287d4ea6fd19f8bc3fa33bdb8defb Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 12:05:39 -0500
Subject: [PATCH 064/137] convert int to str

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d8deee70..a63986f5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: 3.10
+        python-version: '3.10'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

From 5dd768ae89ad806bbcf33be878c1dbc91d660f6f Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Thu, 18 Apr 2024 12:06:24 -0500
Subject: [PATCH 065/137] forgot -e flag

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a63986f5..e4d6d9a8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,10 +22,10 @@ jobs:
         python -m pip install tox tox-gh-actions
     - name: Execute tox envs
       working-directory: ./python
-      run: tox lint
+      run: tox -e lint
     - name: Execute tox envs
       working-directory: ./python
-      run: tox type-check
+      run: tox -e type-check
 
   test:
     needs: lint-and-check

From 3a5b4a278dde286354116e7bc3e4c65e527f2bbd Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Tue, 23 Apr 2024 15:00:29 -0500
Subject: [PATCH 066/137] split push and release actions

---
 .github/workflows/ci.yml        | 56 -------------------
 .github/workflows/onrelease.yml |  4 ++
 .github/workflows/test.yml      | 98 +++++++++++++++++++++++++++++++--
 3 files changed, 96 insertions(+), 62 deletions(-)
 delete mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index e4d6d9a8..00000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: CI
-
-on:
-  workflow_dispatch:
-  workflow_call:  
-  
-jobs:
-  lint-and-check:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        fetch-tags: true
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.10'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install tox tox-gh-actions
-    - name: Execute tox envs
-      working-directory: ./python
-      run: tox -e lint
-    - name: Execute tox envs
-      working-directory: ./python
-      run: tox -e type-check
-
-  test:
-    needs: lint-and-check
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.8', '3.9', '3.10']
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        fetch-tags: true
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install tox tox-gh-actions
-    - name: Execute tox envs
-      working-directory: ./python
-      run: tox
-    - name: Publish test coverage
-      uses: codecov/codecov-action@v3
-      with:
-          fail_ci_if_error: true
-          files: ./python/coverage.xml
diff --git a/.github/workflows/onrelease.yml b/.github/workflows/onrelease.yml
index 11bc9922..11027acc 100644
--- a/.github/workflows/onrelease.yml
+++ b/.github/workflows/onrelease.yml
@@ -47,3 +47,7 @@ jobs:
           user: __token__
           password: ${{ secrets.LABS_PYPI_TOKEN }}
           packages_dir: python/dist/
+  
+  docs:
+    needs: release
+    uses: ./.github/workflows/docs.yml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4a6abe2b..54ac2aa6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,12 +1,98 @@
-name: Test
+name: push
 
 on:
   pull_request:
-    branches: [ 'master' ]
+    types: [opened, synchronize]
   push:
-    branches: ['*']
-
+    branches: ['master']
+  workflow_dispatch:
 
 jobs:
-  ci:
-    uses: ./.github/workflows/ci.yml
+  lint-and-check:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        fetch-tags: true
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install tox tox-gh-actions
+    - name: Execute tox envs
+      working-directory: ./python
+      run: tox -e lint
+    - name: Execute tox envs
+      working-directory: ./python
+      run: tox -e type-check
+  
+  analyze:
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Learn more about CodeQL language support at https://git.io/codeql-language-support
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+        # queries: ./path/to/local/query, your-org/your-repo/queries@main
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 https://git.io/JvXDl
+    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
+    #    and modify them (or add more) to build your code if your project
+    #    uses a compiled language
+    #- run: |
+    #   make bootstrap
+    #   make release
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
+
+  test:
+    needs: lint-and-check
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.9', '3.10']
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        fetch-tags: true
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install tox tox-gh-actions
+    - name: Execute tox envs
+      working-directory: ./python
+      run: tox
+    - name: Publish test coverage
+      uses: codecov/codecov-action@v3
+      with:
+          fail_ci_if_error: true
+          files: ./python/coverage.xml
\ No newline at end of file

From 96db7762ea09fc696a041308c975d071cf77a580 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 15:17:12 -0500
Subject: [PATCH 067/137] testing new push action

---
 .github/workflows/{test.yml => push.yml} | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)
 rename .github/workflows/{test.yml => push.yml} (93%)

diff --git a/.github/workflows/test.yml b/.github/workflows/push.yml
similarity index 93%
rename from .github/workflows/test.yml
rename to .github/workflows/push.yml
index 54ac2aa6..dff677bd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/push.yml
@@ -4,12 +4,15 @@ on:
   pull_request:
     types: [opened, synchronize]
   push:
-    branches: ['master']
-  workflow_dispatch:
+    branches: ['*']
+  workflow_dispatch: 
+
+env:
+  OS: ubuntu-latest
 
 jobs:
   lint-and-check:
-    runs-on: ubuntu-latest
+    runs-on: ${{ env.OS }}
     steps:
     - uses: actions/checkout@v4
       with:
@@ -31,7 +34,7 @@ jobs:
       run: tox -e type-check
   
   analyze:
-    runs-on: ubuntu-latest
+    runs-on: "${{ env.OS }}"
     permissions:
       actions: read
       contents: read
@@ -71,7 +74,7 @@ jobs:
 
   test:
     needs: lint-and-check
-    runs-on: ubuntu-latest
+    runs-on: "${{ env.OS }}"
     strategy:
       matrix:
         python-version: ['3.8', '3.9', '3.10']
@@ -92,7 +95,7 @@ jobs:
       working-directory: ./python
       run: tox
     - name: Publish test coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
           fail_ci_if_error: true
           files: ./python/coverage.xml
\ No newline at end of file

From b5089e2d783a6d5444c791e1b588968327cde3bd Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 15:26:23 -0500
Subject: [PATCH 068/137] try again

---
 .github/workflows/push.yml | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index dff677bd..59553005 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -7,12 +7,9 @@ on:
     branches: ['*']
   workflow_dispatch: 
 
-env:
-  OS: ubuntu-latest
-
 jobs:
   lint-and-check:
-    runs-on: ${{ env.OS }}
+    runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
       with:
@@ -34,7 +31,7 @@ jobs:
       run: tox -e type-check
   
   analyze:
-    runs-on: "${{ env.OS }}"
+    runs-on: ubuntu-latest
     permissions:
       actions: read
       contents: read
@@ -74,7 +71,7 @@ jobs:
 
   test:
     needs: lint-and-check
-    runs-on: "${{ env.OS }}"
+    runs-on: ubuntu-latest
     strategy:
       matrix:
         python-version: ['3.8', '3.9', '3.10']

From eff62defa9a464ba84633741fc40dfe3b7be9a33 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 16:18:53 -0500
Subject: [PATCH 069/137] added code cov token

---
 .github/workflows/push.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 59553005..4f8eb270 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -95,4 +95,5 @@ jobs:
       uses: codecov/codecov-action@v4
       with:
           fail_ci_if_error: true
-          files: ./python/coverage.xml
\ No newline at end of file
+          files: ./python/coverage.xml
+          token: ${{ secrets.CODECOV_TOKEN }}
\ No newline at end of file

From d24d0f4d381191b494efc1a814350e6e1911b93f Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 16:19:28 -0500
Subject: [PATCH 070/137] fixed tox build env

---
 python/tox.ini | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index edee0a19..9e1655ed 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -66,8 +66,9 @@ description = build distribution
 skip_install = true
 deps =
     build
+    semver
 commands =
-    python -m build --sdist --wheel {posargs: {toxinidir}}
+    python setup.py clean bdist_wheel
 
 [testenv:coverage-report]
 description = combine coverage data and generate reports

From 457dea866f49340676ab20c0f812f90d3a2ecec5 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 16:27:52 -0500
Subject: [PATCH 071/137] testing release action

---
 .github/workflows/onrelease.yml | 53 -----------------------
 .github/workflows/release.yml   | 74 +++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 53 deletions(-)
 delete mode 100644 .github/workflows/onrelease.yml
 create mode 100644 .github/workflows/release.yml

diff --git a/.github/workflows/onrelease.yml b/.github/workflows/onrelease.yml
deleted file mode 100644
index 11027acc..00000000
--- a/.github/workflows/onrelease.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-name: release
-
-on:
-  push:
-    tags:
-      - 'v*' # only release a versioned tag, such as v.X.Y.Z
-
-jobs:
-  release:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      max-parallel: 1
-      matrix:
-        python-version: [ 3.9 ]
-        os: [ ubuntu-latest ]
-
-    steps:
-      - uses: actions/checkout@v1
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v1
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - uses: actions/cache@v2
-        id: cache
-        with:
-          path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-          restore-keys: |
-            ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-            ${{ runner.os }}-pip-
-      - name: Install pip
-        run: python -m pip install --upgrade pip
-
-      - name: Install dependencies
-        working-directory: ./python
-        run: pip install -U -r requirements.txt
-
-      - name: Build dist
-        working-directory: ./python
-        run: python setup.py clean bdist_wheel
-
-      - name: Publish a Python distribution to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          user: __token__
-          password: ${{ secrets.LABS_PYPI_TOKEN }}
-          packages_dir: python/dist/
-  
-  docs:
-    needs: release
-    uses: ./.github/workflows/docs.yml
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 00000000..b34b69dc
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,74 @@
+name: release
+
+on:
+  push:
+    # tags:
+    #   - 'v*' # only release a versioned tag, such as v.X.Y.Z
+    branches: ['*']
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      # - uses: actions/cache@v2
+      #   id: cache
+      #   with:
+      #     path: ~/.cache/pip
+      #     key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+      #     restore-keys: |
+      #       ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+      #       ${{ runner.os }}-pip-
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install tox
+
+      - name: Build dist
+        working-directory: ./python
+        run: tox -e build-dist
+
+      # - name: Publish a Python distribution to PyPI
+      #   uses: pypa/gh-action-pypi-publish@release/v1
+      #   if: $${{ github.ref }} == 'refs/heads/master'
+      #   with:
+      #     user: __token__
+      #     password: ${{ secrets.LABS_PYPI_TOKEN }}
+      #     packages_dir: python/dist/
+  
+  docs:
+    needs: release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: '0'
+      - name: Copy Requirements
+        uses: canastro/copy-file-action@master
+        with:
+          source: "python/requirements/dbr143.txt"
+          target: "docs/requirements.txt"
+      - name: Build HTML
+        uses: ammaraskar/sphinx-action@0.4
+        with:
+          pre-build-command: "apt-get update -y && apt-get install -y git && git config --global --add safe.directory /github/workspace"
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v1
+        with:
+          name: html-docs
+          path: docs/_build/html/
+      # - name: Deploy 🚀
+      #   uses: peaceiris/actions-gh-pages@v3
+      #   if: $${{ github.ref }} == 'refs/heads/master'
+      #   with:
+      #     github_token: ${{ secrets.GITHUB_TOKEN }}
+      #     publish_dir: docs/_build/html

From 97e860c4a03106de22d396b07990391f043755b5 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 16:30:19 -0500
Subject: [PATCH 072/137] fixed checkout depth

---
 .github/workflows/release.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b34b69dc..6b2a69c5 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -12,6 +12,9 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true
 
       - name: Set up Python
         uses: actions/setup-python@v4

From 6a97cb908b1cd77d91d113fa300210e1ef0e48f2 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 16:33:54 -0500
Subject: [PATCH 073/137] removed requirements copy step

---
 .github/workflows/release.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 6b2a69c5..8c1b6f29 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -55,11 +55,11 @@ jobs:
         uses: actions/checkout@v4
         with:
           fetch-depth: '0'
-      - name: Copy Requirements
-        uses: canastro/copy-file-action@master
-        with:
-          source: "python/requirements/dbr143.txt"
-          target: "docs/requirements.txt"
+      # - name: Copy Requirements
+      #   uses: canastro/copy-file-action@master
+      #   with:
+      #     source: "python/requirements/dbr143.txt"
+      #     target: "docs/requirements.txt"
       - name: Build HTML
         uses: ammaraskar/sphinx-action@0.4
         with:

From 59b2c65f44cb2c236d9109095a96a68b06a4e7c7 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 17:31:31 -0500
Subject: [PATCH 074/137] fixed bad docstrings

---
 python/tempo/tsdf.py | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py
index 4c186ec2..c3b9ad86 100644
--- a/python/tempo/tsdf.py
+++ b/python/tempo/tsdf.py
@@ -338,15 +338,12 @@ def __getTimePartitions(self, tsPartitionVal: int, fraction: float = 0.1) -> "TS
     def select(self, *cols: Union[str, List[str]]) -> "TSDF":
         """
         pyspark.sql.DataFrame.select() method's equivalent for TSDF objects
-        Parameters
-        ----------
-        cols : str or list of strs
-        column names (string).
-        If one of the column names is '*', that column is expanded to include all columns
-        in the current :class:`TSDF`.
-
-        Examples
-        --------
+
+        :param cols: str or list of strs column names (string). If one of the column names is '*', that
+        column is expanded to include all columns in the current :class:`TSDF`.
+
+        ## Examples
+        .. code-block:: python
         tsdf.select('*').collect()
         [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
         tsdf.select('name', 'age').collect()
@@ -533,23 +530,22 @@ def show(
         """
         pyspark.sql.DataFrame.show() method's equivalent for TSDF objects
 
-        Parameters
-        ----------
-        n : int, optional
-        Number of rows to show.
-        truncate : bool or int, optional
-        If set to ``True``, truncate strings longer than 20 chars by default.
-        If set to a number greater than one, truncates long strings to length ``truncate``
+        :param n: Number of rows to show. (default: 20)
+        :param truncate: If set to True, truncate strings longer than 20 chars by default.
+        If set to a number greater than one, truncates long strings to length truncate
         and align cells right.
-        vertical : bool, optional
-        If set to ``True``, print output rows vertically (one line
-        per column value).
+        :param vertical: If set to True, print output rows vertically (one line per column value).
 
-        Example to show usage
-        ---------------------
+        ## Example to show usage:        
+        .. code-block:: python
         from pyspark.sql.functions import *
 
-        phone_accel_df = spark.read.format("csv").option("header", "true").load("dbfs:/home/tempo/Phones_accelerometer").withColumn("event_ts", (col("Arrival_Time").cast("double")/1000).cast("timestamp")).withColumn("x", col("x").cast("double")).withColumn("y", col("y").cast("double")).withColumn("z", col("z").cast("double")).withColumn("event_ts_dbl", col("event_ts").cast("double"))
+        phone_accel_df = spark.read.format("csv").option("header", "true").load("dbfs:/home/tempo/Phones_accelerometer") \n
+            .withColumn("event_ts", (col("Arrival_Time").cast("double")/1000).cast("timestamp")) \n
+            .withColumn("x", col("x").cast("double")) \n
+            .withColumn("y", col("y").cast("double")) \n
+            .withColumn("z", col("z").cast("double")) \n
+            .withColumn("event_ts_dbl", col("event_ts").cast("double"))
 
         from tempo import *
 
@@ -557,7 +553,6 @@ def show(
 
         # Call show method here
         phone_accel_tsdf.show()
-
         """
         # validate k <= n
         if k > n:

From 357abe6038687aaecd1dcd2b29a1f29b57521183 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 17:31:53 -0500
Subject: [PATCH 075/137] got tox build-docs working

---
 python/setup.py | 2 +-
 python/tox.ini  | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/setup.py b/python/setup.py
index a2d6d6a4..e3817ebd 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -21,7 +21,7 @@
     long_description_content_type="text/markdown",
     url="https://databrickslabs.github.io/tempo/",
     packages=find_packages(where=".", include=["tempo"]),
-    install_requires=["ipython", "pandas", "scipy"],
+    install_requires=["ipython", "pandas", "scipy", "pyspark"],
     extras_require=dict(tests=["pytest"]),
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/python/tox.ini b/python/tox.ini
index 9e1655ed..27f173a1 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -70,6 +70,15 @@ deps =
 commands =
     python setup.py clean bdist_wheel
 
+[testenv:build-docs]
+description = build distribution
+allowlist_externals = make
+deps =
+    -r ../docs/requirements.txt
+    semver
+commands =
+    make --directory ../docs html
+
 [testenv:coverage-report]
 description = combine coverage data and generate reports
 deps = coverage>=7,<8

From 6383c1bd3e2b0c11003cbbc8efc0a90c97d37398 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 17:35:46 -0500
Subject: [PATCH 076/137] toxified release

---
 .github/workflows/release.yml | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 8c1b6f29..b4ebb92a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -9,7 +9,6 @@ on:
 jobs:
   release:
     runs-on: ubuntu-latest
-
     steps:
       - uses: actions/checkout@v4
         with:
@@ -48,27 +47,37 @@ jobs:
       #     packages_dir: python/dist/
   
   docs:
-    needs: release
     runs-on: ubuntu-latest
     steps:
-      - name: Checkout Code
-        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
         with:
-          fetch-depth: '0'
-      # - name: Copy Requirements
-      #   uses: canastro/copy-file-action@master
+          python-version: '3.10'
+
+      # - uses: actions/cache@v2
+      #   id: cache
       #   with:
-      #     source: "python/requirements/dbr143.txt"
-      #     target: "docs/requirements.txt"
-      - name: Build HTML
-        uses: ammaraskar/sphinx-action@0.4
-        with:
-          pre-build-command: "apt-get update -y && apt-get install -y git && git config --global --add safe.directory /github/workspace"
+      #     path: ~/.cache/pip
+      #     key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+      #     restore-keys: |
+      #       ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+      #       ${{ runner.os }}-pip-
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install tox
+
+      - name: Build dist
+        working-directory: ./python
+        run: tox -e build-docs
+
       - name: Upload artifacts
         uses: actions/upload-artifact@v1
         with:
           name: html-docs
           path: docs/_build/html/
+
       # - name: Deploy 🚀
       #   uses: peaceiris/actions-gh-pages@v3
       #   if: $${{ github.ref }} == 'refs/heads/master'

From 5bcee9749bef2a3d4f9f1c267b59271f826447bd Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 17:54:35 -0500
Subject: [PATCH 077/137] formatting

---
 python/tempo/tsdf.py  |  2 +-
 python/tempo/utils.py | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py
index c3b9ad86..6876eb94 100644
--- a/python/tempo/tsdf.py
+++ b/python/tempo/tsdf.py
@@ -536,7 +536,7 @@ def show(
         and align cells right.
         :param vertical: If set to True, print output rows vertically (one line per column value).
 
-        ## Example to show usage:        
+        ## Example to show usage:
         .. code-block:: python
         from pyspark.sql.functions import *
 
diff --git a/python/tempo/utils.py b/python/tempo/utils.py
index fbedcca6..74d8bc25 100644
--- a/python/tempo/utils.py
+++ b/python/tempo/utils.py
@@ -137,11 +137,13 @@ def calculate_time_horizon(
 
 
 @overload
-def display_html(df: pandasDataFrame) -> None: ...
+def display_html(df: pandasDataFrame) -> None:
+    ...
 
 
 @overload
-def display_html(df: DataFrame) -> None: ...
+def display_html(df: DataFrame) -> None:
+    ...
 
 
 def display_html(df: Union[pandasDataFrame, DataFrame]) -> None:
@@ -188,13 +190,16 @@ def get_display_df(tsdf: t_tsdf.TSDF, k: int) -> DataFrame:
     # to know more refer: /databricks/python_shell/scripts/db_ipykernel_launcher.py
 
     @overload
-    def display_improvised(obj: t_tsdf.TSDF) -> None: ...
+    def display_improvised(obj: t_tsdf.TSDF) -> None:
+        ...
 
     @overload
-    def display_improvised(obj: pandasDataFrame) -> None: ...
+    def display_improvised(obj: pandasDataFrame) -> None:
+        ...
 
     @overload
-    def display_improvised(obj: DataFrame) -> None: ...
+    def display_improvised(obj: DataFrame) -> None:
+        ...
 
     def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> None:
         if isinstance(obj, t_tsdf.TSDF):
@@ -207,13 +212,16 @@ def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> N
 elif ENV_CAN_RENDER_HTML:
 
     @overload
-    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None: ...
+    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None:
+        ...
 
     @overload
-    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None: ...
+    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None:
+        ...
 
     @overload
-    def display_html_improvised(obj: Optional[DataFrame]) -> None: ...
+    def display_html_improvised(obj: Optional[DataFrame]) -> None:
+        ...
 
     def display_html_improvised(
         obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]

From 68e0c39179dedf773233a1c85c26844687368438 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 17:55:31 -0500
Subject: [PATCH 078/137] forgot checkout step

---
 .github/workflows/release.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b4ebb92a..2cc5f6ad 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -49,6 +49,11 @@ jobs:
   docs:
     runs-on: ubuntu-latest
     steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true
+          
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -68,7 +73,7 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install tox
 
-      - name: Build dist
+      - name: Build docs
         working-directory: ./python
         run: tox -e build-docs
 

From 706fdc9bac01659bf2464974419dad8832b74aed Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 17:55:43 -0500
Subject: [PATCH 079/137] black check and diff

---
 .github/workflows/push.yml | 2 +-
 python/tox.ini             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 4f8eb270..e3e59fad 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -25,7 +25,7 @@ jobs:
         python -m pip install tox tox-gh-actions
     - name: Execute tox envs
       working-directory: ./python
-      run: tox -e lint
+      run: tox -e lint -- --check --diff
     - name: Execute tox envs
       working-directory: ./python
       run: tox -e type-check
diff --git a/python/tox.ini b/python/tox.ini
index 27f173a1..c95d7611 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -46,7 +46,7 @@ deps =
     flake8
     black
 commands =
-    black --check {toxinidir}/tempo
+    black {posargs} {toxinidir}/tempo
     flake8 --config {toxinidir}/.flake8 {toxinidir}/tempo
 
 [testenv:type-check]

From 7703b7e9e5ca4afc0f9dec24671d0081ae9102c0 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 19:25:28 -0500
Subject: [PATCH 080/137] stupid black version mismatch

---
 python/tempo/utils.py | 24 ++++++++----------------
 python/tox.ini        |  2 +-
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/python/tempo/utils.py b/python/tempo/utils.py
index 74d8bc25..fbedcca6 100644
--- a/python/tempo/utils.py
+++ b/python/tempo/utils.py
@@ -137,13 +137,11 @@ def calculate_time_horizon(
 
 
 @overload
-def display_html(df: pandasDataFrame) -> None:
-    ...
+def display_html(df: pandasDataFrame) -> None: ...
 
 
 @overload
-def display_html(df: DataFrame) -> None:
-    ...
+def display_html(df: DataFrame) -> None: ...
 
 
 def display_html(df: Union[pandasDataFrame, DataFrame]) -> None:
@@ -190,16 +188,13 @@ def get_display_df(tsdf: t_tsdf.TSDF, k: int) -> DataFrame:
     # to know more refer: /databricks/python_shell/scripts/db_ipykernel_launcher.py
 
     @overload
-    def display_improvised(obj: t_tsdf.TSDF) -> None:
-        ...
+    def display_improvised(obj: t_tsdf.TSDF) -> None: ...
 
     @overload
-    def display_improvised(obj: pandasDataFrame) -> None:
-        ...
+    def display_improvised(obj: pandasDataFrame) -> None: ...
 
     @overload
-    def display_improvised(obj: DataFrame) -> None:
-        ...
+    def display_improvised(obj: DataFrame) -> None: ...
 
     def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> None:
         if isinstance(obj, t_tsdf.TSDF):
@@ -212,16 +207,13 @@ def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> N
 elif ENV_CAN_RENDER_HTML:
 
     @overload
-    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None: ...
 
     @overload
-    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None: ...
 
     @overload
-    def display_html_improvised(obj: Optional[DataFrame]) -> None:
-        ...
+    def display_html_improvised(obj: Optional[DataFrame]) -> None: ...
 
     def display_html_improvised(
         obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]
diff --git a/python/tox.ini b/python/tox.ini
index c95d7611..9b391e7f 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -44,7 +44,7 @@ skipsdist = true
 skip_install = true
 deps =
     flake8
-    black
+    black==24.4.1
 commands =
     black {posargs} {toxinidir}/tempo
     flake8 --config {toxinidir}/.flake8 {toxinidir}/tempo

From 837e806fc5861c7f6449aaf99f9087a29ef6c5eb Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 20:03:17 -0500
Subject: [PATCH 081/137] updated doc requirements

---
 docs/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 7a76c34a..3aeb1336 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,6 +1,7 @@
 sphinx-autobuild==2021.3.14
 sphinx-copybutton==0.5.1
-Sphinx==4.5.0
+sphinx==4.5.0
 sphinx-design==0.2.0
 sphinx-panels==0.6.0
+sphinxcontrib-applehelp==1.0.4
 furo==2022.9.29
\ No newline at end of file

From 59e9fc112cae5ebac8d2e8afbc5aa374071fab12 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 20:22:38 -0500
Subject: [PATCH 082/137] downgraded to py3.9 on docs job

---
 .github/workflows/release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 2cc5f6ad..fc8328b8 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -53,11 +53,11 @@ jobs:
         with:
           fetch-depth: 0
           fetch-tags: true
-          
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.10'
+          python-version: '3.9'
 
       # - uses: actions/cache@v2
       #   id: cache

From bc837c05c4aa540f12e2bc0a5089675b70a3d2ae Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 20:31:26 -0500
Subject: [PATCH 083/137] relaxing all constraints

---
 docs/requirements.txt | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 3aeb1336..b96f09f0 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,6 @@
-sphinx-autobuild==2021.3.14
-sphinx-copybutton==0.5.1
-sphinx==4.5.0
-sphinx-design==0.2.0
-sphinx-panels==0.6.0
-sphinxcontrib-applehelp==1.0.4
-furo==2022.9.29
\ No newline at end of file
+sphinx-autobuild
+sphinx-copybutton
+sphinx
+sphinx-design
+sphinx-panels
+furo
\ No newline at end of file

From 09db68c2b854e05f741b39e7757bec79155b3bb8 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 20:52:38 -0500
Subject: [PATCH 084/137] switched to newer tox gh plugin

---
 .github/workflows/push.yml | 2 +-
 python/tox.ini             | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index e3e59fad..341af40e 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -87,7 +87,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install tox tox-gh-actions
+        python -m pip install tox tox-gh
     - name: Execute tox envs
       working-directory: ./python
       run: tox
diff --git a/python/tox.ini b/python/tox.ini
index 9b391e7f..41763d15 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,11 +11,11 @@ envlist =
     coverage-report
 skip_missing_interpreters = true
 
-[gh-actions]
+[gh]
 python =
-    3.8: dbr91, dbr104
-    3.9: dbr113, dbr122
-    3.10: dbr133, dbr143
+    3.8 = dbr91, dbr104
+    3.9 = dbr113, dbr122
+    3.10 = dbr133, dbr143
 
 [testenv]
 description = run the tests under {envname}

From ee09bd5e5d20cc1e1b2a121bf6eee5eae21a74fe Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 21:46:40 -0500
Subject: [PATCH 085/137] tox gh extension is no good

---
 .github/workflows/push.yml    | 22 +++++++++++----
 .github/workflows/release.yml | 50 +++++++++++------------------------
 python/tox.ini                |  6 -----
 3 files changed, 33 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 341af40e..64e9c327 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -74,23 +74,35 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10']
+        config:
+          - py: '3.8'
+            dbr: dbr91
+          - py: '3.8'
+            dbr: dbr104
+          - py: '3.9'
+            dbr: dbr113
+          - py: '3.9'
+            dbr: dbr122
+          - py: '3.10'
+            dbr: dbr133
+          - py: '3.10'
+            dbr: dbr143
     steps:
     - uses: actions/checkout@v4
       with:
         fetch-depth: 0
         fetch-tags: true
-    - name: Set up Python ${{ matrix.python-version }}
+    - name: Set up Python ${{ matrix.config.py }}
       uses: actions/setup-python@v4
       with:
-        python-version: ${{ matrix.python-version }}
+        python-version: ${{ matrix.config.py }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install tox tox-gh
+        python -m pip install tox
     - name: Execute tox envs
       working-directory: ./python
-      run: tox
+      run: tox -e ${{ matrix.config.dbr }} coverage-report
     - name: Publish test coverage
       uses: codecov/codecov-action@v4
       with:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index fc8328b8..eb4b23b0 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,9 +1,9 @@
-name: release
+name: build-release
 
 on:
+  pull_request:
+    types: [opened, synchronize]
   push:
-    # tags:
-    #   - 'v*' # only release a versioned tag, such as v.X.Y.Z
     branches: ['*']
 
 jobs:
@@ -20,15 +20,6 @@ jobs:
         with:
           python-version: '3.10'
 
-      # - uses: actions/cache@v2
-      #   id: cache
-      #   with:
-      #     path: ~/.cache/pip
-      #     key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-      #     restore-keys: |
-      #       ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-      #       ${{ runner.os }}-pip-
-
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
@@ -38,13 +29,13 @@ jobs:
         working-directory: ./python
         run: tox -e build-dist
 
-      # - name: Publish a Python distribution to PyPI
-      #   uses: pypa/gh-action-pypi-publish@release/v1
-      #   if: $${{ github.ref }} == 'refs/heads/master'
-      #   with:
-      #     user: __token__
-      #     password: ${{ secrets.LABS_PYPI_TOKEN }}
-      #     packages_dir: python/dist/
+      - name: Publish a Python distribution to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        if: startsWith(github.ref, 'refs/tags/v')
+        with:
+          user: __token__
+          password: ${{ secrets.LABS_PYPI_TOKEN }}
+          packages_dir: python/dist/
   
   docs:
     runs-on: ubuntu-latest
@@ -59,15 +50,6 @@ jobs:
         with:
           python-version: '3.9'
 
-      # - uses: actions/cache@v2
-      #   id: cache
-      #   with:
-      #     path: ~/.cache/pip
-      #     key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-      #     restore-keys: |
-      #       ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-      #       ${{ runner.os }}-pip-
-
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
@@ -83,9 +65,9 @@ jobs:
           name: html-docs
           path: docs/_build/html/
 
-      # - name: Deploy 🚀
-      #   uses: peaceiris/actions-gh-pages@v3
-      #   if: $${{ github.ref }} == 'refs/heads/master'
-      #   with:
-      #     github_token: ${{ secrets.GITHUB_TOKEN }}
-      #     publish_dir: docs/_build/html
+      - name: Deploy 🚀
+        uses: peaceiris/actions-gh-pages@v3
+        if: startsWith(github.ref, 'refs/tags/v')
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: docs/_build/html
diff --git a/python/tox.ini b/python/tox.ini
index 41763d15..971b7035 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -11,12 +11,6 @@ envlist =
     coverage-report
 skip_missing_interpreters = true
 
-[gh]
-python =
-    3.8 = dbr91, dbr104
-    3.9 = dbr113, dbr122
-    3.10 = dbr133, dbr143
-
 [testenv]
 description = run the tests under {envname}
 package = wheel

From c47a48e6e165fe8b8f452c2a7b549f510f02106e Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 21:48:54 -0500
Subject: [PATCH 086/137] comma

---
 .github/workflows/push.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 64e9c327..a3e8c707 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -102,7 +102,7 @@ jobs:
         python -m pip install tox
     - name: Execute tox envs
       working-directory: ./python
-      run: tox -e ${{ matrix.config.dbr }} coverage-report
+      run: tox -e ${{ matrix.config.dbr }},coverage-report
     - name: Publish test coverage
       uses: codecov/codecov-action@v4
       with:

From 0f0144d1551305c78a11a55107ae116ec38caff9 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 22:18:00 -0500
Subject: [PATCH 087/137] fixme reminders

---
 .github/workflows/{release.yml => build-release.yml} | 2 +-
 .github/workflows/push.yml                           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename .github/workflows/{release.yml => build-release.yml} (98%)

diff --git a/.github/workflows/release.yml b/.github/workflows/build-release.yml
similarity index 98%
rename from .github/workflows/release.yml
rename to .github/workflows/build-release.yml
index eb4b23b0..af342019 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/build-release.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     types: [opened, synchronize]
   push:
-    branches: ['*']
+    branches: ['*'] # FIXME
 
 jobs:
   release:
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index a3e8c707..882b3ca6 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     types: [opened, synchronize]
   push:
-    branches: ['*']
+    branches: ['*'] # FIXME
   workflow_dispatch: 
 
 jobs:

From dd1407f41a1e6dd257356810b234cf0d797c99e1 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 24 Apr 2024 22:18:31 -0500
Subject: [PATCH 088/137] removed extra workflows

---
 .github/workflows/codeql-analysis.yml | 70 ---------------------------
 .github/workflows/docs.yml            | 42 ----------------
 2 files changed, 112 deletions(-)
 delete mode 100644 .github/workflows/codeql-analysis.yml
 delete mode 100644 .github/workflows/docs.yml

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
deleted file mode 100644
index ad042902..00000000
--- a/.github/workflows/codeql-analysis.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-# For most projects, this workflow file will not need changing; you simply need
-# to commit it to your repository.
-#
-# You may wish to alter this file to override the set of languages analyzed,
-# or to provide custom queries or build logic.
-#
-# ******** NOTE ********
-# We have attempted to detect the languages in your repository. Please check
-# the `language` matrix defined below to confirm you have the correct set of
-# supported CodeQL languages.
-#
-name: "CodeQL"
-
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [ master ]
-  schedule:
-    - cron: '21 18 * * 3'
-
-jobs:
-  analyze:
-    name: Analyze
-    runs-on: ubuntu-latest
-    permissions:
-      actions: read
-      contents: read
-      security-events: write
-
-    strategy:
-      fail-fast: false
-      matrix:
-        language: [ 'python' ]
-        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
-        # Learn more about CodeQL language support at https://git.io/codeql-language-support
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v2
-
-    # Initializes the CodeQL tools for scanning.
-    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
-      with:
-        languages: ${{ matrix.language }}
-        # If you wish to specify custom queries, you can do so here or in a config file.
-        # By default, queries listed here will override any specified in a config file.
-        # Prefix the list here with "+" to use these queries and those in the config file.
-        # queries: ./path/to/local/query, your-org/your-repo/queries@main
-
-    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
-    # If this step fails, then you should remove it and run the build manually (see below)
-    - name: Autobuild
-      uses: github/codeql-action/autobuild@v1
-
-    # ℹ️ Command-line programs to run using the OS shell.
-    # 📚 https://git.io/JvXDl
-
-    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
-    #    and modify them (or add more) to build your code if your project
-    #    uses a compiled language
-
-    #- run: |
-    #   make bootstrap
-    #   make release
-
-    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
deleted file mode 100644
index a4158c26..00000000
--- a/.github/workflows/docs.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-name: docs
-
-on:
-  push:
-    tags:
-      - 'v*' # only release a versioned tag, such as v.X.Y.Z
-
-jobs:
-  build-docs:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ ubuntu-latest ]
-    env:
-      OS: ${{ matrix.os }}
-      PYTHON: '3.9'
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: '0'
-      - name: Copy Requirements
-        uses: canastro/copy-file-action@master
-        with:
-          source: "python/requirements.txt"
-          target: "docs/requirements.txt"
-      - name: Build HTML
-        uses: ammaraskar/sphinx-action@0.4
-        with:
-          pre-build-command: "apt-get update -y && apt-get install -y git && git config --global --add safe.directory /github/workspace"
-      - name: Upload artifacts
-        uses: actions/upload-artifact@v1
-        with:
-          name: html-docs
-          path: docs/_build/html/
-      - name: Deploy 🚀
-        uses: peaceiris/actions-gh-pages@v3
-        if: $${{ github.ref }} == 'refs/heads/master'
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: docs/_build/html

From b4e3cfee6ff1a2db69d6e91c9c2b2bedfb9a12ff Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Thu, 25 Apr 2024 11:28:35 -0600
Subject: [PATCH 089/137] remove build requirements for dbr91 env

---
 python/requirements/dbr91.txt | 7 -------
 python/tox.ini                | 1 -
 2 files changed, 8 deletions(-)
 delete mode 100644 python/requirements/dbr91.txt

diff --git a/python/requirements/dbr91.txt b/python/requirements/dbr91.txt
deleted file mode 100644
index faf44bb8..00000000
--- a/python/requirements/dbr91.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-delta-spark==1.0.0
-ipython==7.22.0
-numpy==1.19.2
-pandas==1.2.4
-pyarrow==4.0.0
-pyspark==3.1.2
-scipy==1.6.2
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index 971b7035..dfbbdab2 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -23,7 +23,6 @@ basepython =
     dbr122: py39
     dbr113: py39
     dbr104: py38
-    dbr91: py38
 deps =
     -rrequirements/{envname}.txt
     -rrequirements/dev.txt

From 3b4a8253f0b301283935617d21c5b9f96b324138 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Fri, 26 Apr 2024 21:41:18 -0600
Subject: [PATCH 090/137] changes to buld locally on arm64 arch

---
 python/pyproject.toml                |  7 +++++-
 python/requirements/dbr104.txt       | 14 +++++------
 python/requirements/dbr104_arm64.txt |  4 ++++
 python/requirements/dev.txt          | 10 ++++----
 python/tox.ini                       | 35 +++++++++++++++++++++++++---
 5 files changed, 55 insertions(+), 15 deletions(-)
 create mode 100644 python/requirements/dbr104_arm64.txt

diff --git a/python/pyproject.toml b/python/pyproject.toml
index d7a6d464..0de78f8f 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,2 +1,7 @@
 [build-system]
-requires = ["semver"]  # PEP 518 - what is required to build
+# PEP 518 - what is required to build this project
+requires = [
+    "semver>=3,<4",
+    "setuptools>=69,<70",
+    "wheel>=0.37,<1",
+]
diff --git a/python/requirements/dbr104.txt b/python/requirements/dbr104.txt
index 4e2284cf..193084ef 100644
--- a/python/requirements/dbr104.txt
+++ b/python/requirements/dbr104.txt
@@ -1,7 +1,7 @@
-delta-spark==1.1.0
-ipython==7.22.0
-numpy==1.20.1
-pandas==1.2.4
-pyarrow==4.0.0
-pyspark==3.2.1
-scipy==1.6.2
\ No newline at end of file
+delta-spark~=1.1.0
+ipython~=7.22.0
+numpy~=1.20.1
+pandas~=1.2.4
+pyarrow~=4.0.0
+pyspark~=3.2.1
+scipy~=1.6.2
\ No newline at end of file
diff --git a/python/requirements/dbr104_arm64.txt b/python/requirements/dbr104_arm64.txt
new file mode 100644
index 00000000..d85d30a1
--- /dev/null
+++ b/python/requirements/dbr104_arm64.txt
@@ -0,0 +1,4 @@
+delta-spark~=1.1.0
+ipython~=7.22.0
+numpy~=1.20.1
+pyspark~=3.2.1
diff --git a/python/requirements/dev.txt b/python/requirements/dev.txt
index c8090248..2fbed1d1 100644
--- a/python/requirements/dev.txt
+++ b/python/requirements/dev.txt
@@ -1,4 +1,6 @@
-chispa
-jsonref
-packaging
-python-dateutil
\ No newline at end of file
+pip>=23,<24
+chispa>=0.10,<1
+coverage>=7,<8
+jsonref>=1,<2
+packaging>=24,<25
+python-dateutil>=2,<3
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index dfbbdab2..1104ce10 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -2,8 +2,7 @@
 requires =
     tox>4,<5
     virtualenv>20,<21
-    wheel>=0.38,<1
-isolated_build = true
+isolated_build = True
 envlist =
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
@@ -24,10 +23,40 @@ basepython =
     dbr113: py39
     dbr104: py38
 deps =
+    -rrequirements/dev.txt
     -rrequirements/{envname}.txt
+commands =
+    coverage erase
+    coverage run -m unittest discover -s tests -p '*_tests.py'
+
+[testenv:dbr104]
+deps =
     -rrequirements/dev.txt
-    coverage>=7,<8
 commands =
+    # Get the architecture of the system
+    sys_arch=$(uname -m)
+    echo "System Architecture: $sys_arch"
+    # Check if the architecture is arm64
+    if [ "sys_arch" = "arm64" ]; then
+        pip install -r requirements/{envname}_arm64.txt
+        ;https://github.com/apache/arrow/blob/release-4.0.0/python/requirements-wheel-build.txt
+        ;Building pyarrow with no dependencies because NumPy 1.16.5 is not supported on
+        ;MacOS with arm64 arch. The Numpy version is pinned in the requirements.txt to
+        ;1.20 to avoid the issue, and is compatible with pyarrow 4.0.
+        pip install pyarrow~=4.0.0 --no-deps
+        ;https://pandas.pydata.org/pandas-docs/version/1.2/getting_started/install.html#dependencies
+        ;Building pandas with no dependencies because NumPy 1.16.5 is not supported on
+        ;MacOS with arm64 arch. The Numpy version is pinned in the requirements.txt to
+        ;1.20 to avoid the issue, and is compatible with pandas 1.2.
+        pip install pandas~=1.2.4 --no-deps
+        ;https://docs.scipy.org/doc/scipy/dev/toolchain.html#numpy
+        ;Building scipy with no dependencies because NumPy 1.16.5 is not supported on
+        ;MacOS with arm64 arch. The Numpy version is pinned in the requirements.txt to
+        ;1.20 to avoid the issue, and is compatible with scipy.1.6.
+        pip install scipy~=1.6.2 --no-deps
+    else
+        pip install -r requirements/{envname}.txt
+    fi
     coverage erase
     coverage run -m unittest discover -s tests -p '*_tests.py'
 

From 4f18dc08eaea146bc805ca096d98d93fbd3e0546 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Sat, 27 Apr 2024 17:58:41 -0600
Subject: [PATCH 091/137] simplify shell for dbr104 setup

---
 python/requirements/dbr104_arm64.txt |  3 +++
 python/tox.ini                       | 18 +++++-------------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/python/requirements/dbr104_arm64.txt b/python/requirements/dbr104_arm64.txt
index d85d30a1..afb266cc 100644
--- a/python/requirements/dbr104_arm64.txt
+++ b/python/requirements/dbr104_arm64.txt
@@ -2,3 +2,6 @@ delta-spark~=1.1.0
 ipython~=7.22.0
 numpy~=1.20.1
 pyspark~=3.2.1
+pyarrow~=4.0.0
+pandas~=1.2.4
+scipy~=1.6.2
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index 1104ce10..a4a36099 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -38,22 +38,14 @@ commands =
     echo "System Architecture: $sys_arch"
     # Check if the architecture is arm64
     if [ "sys_arch" = "arm64" ]; then
-        pip install -r requirements/{envname}_arm64.txt
-        ;https://github.com/apache/arrow/blob/release-4.0.0/python/requirements-wheel-build.txt
-        ;Building pyarrow with no dependencies because NumPy 1.16.5 is not supported on
+        ;Building  with no dependencies because NumPy<=1.20 is not supported on
         ;MacOS with arm64 arch. The Numpy version is pinned in the requirements.txt to
-        ;1.20 to avoid the issue, and is compatible with pyarrow 4.0.
-        pip install pyarrow~=4.0.0 --no-deps
+        ;1.20 to avoid the issue, and is compatible with the other dependencies.
+        ;https://github.com/apache/arrow/blob/release-4.0.0/python/requirements-wheel-build.txt
         ;https://pandas.pydata.org/pandas-docs/version/1.2/getting_started/install.html#dependencies
-        ;Building pandas with no dependencies because NumPy 1.16.5 is not supported on
-        ;MacOS with arm64 arch. The Numpy version is pinned in the requirements.txt to
-        ;1.20 to avoid the issue, and is compatible with pandas 1.2.
-        pip install pandas~=1.2.4 --no-deps
         ;https://docs.scipy.org/doc/scipy/dev/toolchain.html#numpy
-        ;Building scipy with no dependencies because NumPy 1.16.5 is not supported on
-        ;MacOS with arm64 arch. The Numpy version is pinned in the requirements.txt to
-        ;1.20 to avoid the issue, and is compatible with scipy.1.6.
-        pip install scipy~=1.6.2 --no-deps
+        ;NB: Installation order matters in the requirements file
+        pip install --no-deps -r requirements/{envname}_${sys_arch}.txt
     else
         pip install -r requirements/{envname}.txt
     fi

From e4e8f6af3005ccb8b328ca97ce4fd109cc25e37f Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Sat, 27 Apr 2024 18:56:39 -0600
Subject: [PATCH 092/137] simplify shell for dbr104 setup

---
 python/install_cmd.txt                        |  1 +
 python/requirements/dbr104/.gitignore         |  1 +
 python/requirements/{ => dbr104}/dbr104.txt   |  2 +-
 .../dbr104/set_install_command.sh             | 10 ++++++++
 python/requirements/dbr104_arm64.txt          |  7 ------
 python/tox.ini                                | 23 ++++++-------------
 6 files changed, 20 insertions(+), 24 deletions(-)
 create mode 100644 python/install_cmd.txt
 create mode 100644 python/requirements/dbr104/.gitignore
 rename python/requirements/{ => dbr104}/dbr104.txt (100%)
 create mode 100755 python/requirements/dbr104/set_install_command.sh
 delete mode 100644 python/requirements/dbr104_arm64.txt

diff --git a/python/install_cmd.txt b/python/install_cmd.txt
new file mode 100644
index 00000000..057e3233
--- /dev/null
+++ b/python/install_cmd.txt
@@ -0,0 +1 @@
+pip install --no-binary pyarrow,pandas,scipy {opts} {packages}
diff --git a/python/requirements/dbr104/.gitignore b/python/requirements/dbr104/.gitignore
new file mode 100644
index 00000000..a9ac5094
--- /dev/null
+++ b/python/requirements/dbr104/.gitignore
@@ -0,0 +1 @@
+install_cmd.txt
\ No newline at end of file
diff --git a/python/requirements/dbr104.txt b/python/requirements/dbr104/dbr104.txt
similarity index 100%
rename from python/requirements/dbr104.txt
rename to python/requirements/dbr104/dbr104.txt
index 193084ef..5081a954 100644
--- a/python/requirements/dbr104.txt
+++ b/python/requirements/dbr104/dbr104.txt
@@ -1,6 +1,6 @@
+numpy~=1.20.1
 delta-spark~=1.1.0
 ipython~=7.22.0
-numpy~=1.20.1
 pandas~=1.2.4
 pyarrow~=4.0.0
 pyspark~=3.2.1
diff --git a/python/requirements/dbr104/set_install_command.sh b/python/requirements/dbr104/set_install_command.sh
new file mode 100755
index 00000000..99ff398e
--- /dev/null
+++ b/python/requirements/dbr104/set_install_command.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# Based on architecture, set an appropriate install command
+SYS_ARCH=$(uname -m)
+export SYS_ARCH
+if [ "$SYS_ARCH" = "arm64" ]; then
+    NO_BINARY_PACKAGES="pyarrow,pandas,scipy"
+    echo "pip install --no-binary $NO_BINARY_PACKAGES {opts} {packages}" > install_cmd.txt
+else
+    echo "pip install {opts} {packages}" > install_cmd.txt
+fi
diff --git a/python/requirements/dbr104_arm64.txt b/python/requirements/dbr104_arm64.txt
deleted file mode 100644
index afb266cc..00000000
--- a/python/requirements/dbr104_arm64.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-delta-spark~=1.1.0
-ipython~=7.22.0
-numpy~=1.20.1
-pyspark~=3.2.1
-pyarrow~=4.0.0
-pandas~=1.2.4
-scipy~=1.6.2
\ No newline at end of file
diff --git a/python/tox.ini b/python/tox.ini
index a4a36099..1fd699f3 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -30,25 +30,16 @@ commands =
     coverage run -m unittest discover -s tests -p '*_tests.py'
 
 [testenv:dbr104]
+allowlist_externals = bash
+commands_pre =
+   bash -c "./requirements/{envname}/set_install_command.sh"
+install_command =
+    bash -c "cat requirements/{envname}/install_cmd.txt"
 deps =
     -rrequirements/dev.txt
+    ;NB: dependency order matters for this env
+    -rrequirements/{envname}/{envname}.txt
 commands =
-    # Get the architecture of the system
-    sys_arch=$(uname -m)
-    echo "System Architecture: $sys_arch"
-    # Check if the architecture is arm64
-    if [ "sys_arch" = "arm64" ]; then
-        ;Building  with no dependencies because NumPy<=1.20 is not supported on
-        ;MacOS with arm64 arch. The Numpy version is pinned in the requirements.txt to
-        ;1.20 to avoid the issue, and is compatible with the other dependencies.
-        ;https://github.com/apache/arrow/blob/release-4.0.0/python/requirements-wheel-build.txt
-        ;https://pandas.pydata.org/pandas-docs/version/1.2/getting_started/install.html#dependencies
-        ;https://docs.scipy.org/doc/scipy/dev/toolchain.html#numpy
-        ;NB: Installation order matters in the requirements file
-        pip install --no-deps -r requirements/{envname}_${sys_arch}.txt
-    else
-        pip install -r requirements/{envname}.txt
-    fi
     coverage erase
     coverage run -m unittest discover -s tests -p '*_tests.py'
 

From 59171a5845519eb706d2097cae3e18bfa29ce177 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Sat, 27 Apr 2024 22:43:49 -0600
Subject: [PATCH 093/137] no build deps for packages that need numpy

---
 python/requirements/dbr104/.gitignore         |  1 -
 python/requirements/dbr104/dbr104_arm.txt     |  4 +++
 .../dbr104/install_non_dev_dependencies.sh    | 36 +++++++++++++++++++
 .../dbr104/set_install_command.sh             | 10 ------
 python/tox.ini                                | 10 ++----
 5 files changed, 43 insertions(+), 18 deletions(-)
 delete mode 100644 python/requirements/dbr104/.gitignore
 create mode 100644 python/requirements/dbr104/dbr104_arm.txt
 create mode 100755 python/requirements/dbr104/install_non_dev_dependencies.sh
 delete mode 100755 python/requirements/dbr104/set_install_command.sh

diff --git a/python/requirements/dbr104/.gitignore b/python/requirements/dbr104/.gitignore
deleted file mode 100644
index a9ac5094..00000000
--- a/python/requirements/dbr104/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-install_cmd.txt
\ No newline at end of file
diff --git a/python/requirements/dbr104/dbr104_arm.txt b/python/requirements/dbr104/dbr104_arm.txt
new file mode 100644
index 00000000..0196bcf4
--- /dev/null
+++ b/python/requirements/dbr104/dbr104_arm.txt
@@ -0,0 +1,4 @@
+numpy~=1.20.1
+delta-spark~=1.1.0
+ipython~=7.22.0
+pyspark~=3.2.1
\ No newline at end of file
diff --git a/python/requirements/dbr104/install_non_dev_dependencies.sh b/python/requirements/dbr104/install_non_dev_dependencies.sh
new file mode 100755
index 00000000..c78dcfe2
--- /dev/null
+++ b/python/requirements/dbr104/install_non_dev_dependencies.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+set -e  # Exit on error
+[ -n "$DEBUG" ] && set -x  # Enable debugging if DEBUG environment variable is set
+
+# This runs from the root of the repository
+ARM_REQ_FILE="$(pwd)/requirements/dbr104/dbr104_arm.txt"
+GENERIC_REQ_FILE="$(pwd)/requirements/dbr104/dbr104.txt"
+
+# Check necessary commands and files
+command -v pip >/dev/null 2>&1 || { echo >&2 "pip is required but it's not installed. Aborting."; exit 1; }
+[ -f "$ARM_REQ_FILE" ] || { echo >&2 "Required file $ARM_REQ_FILE not found. Aborting."; exit 1; }
+[ -f "$GENERIC_REQ_FILE" ] || { echo >&2 "Required file $GENERIC_REQ_FILE not found. Aborting."; exit 1; }
+
+# Get the architecture of the system
+sys_arch=$(uname -m)
+echo "System Architecture: $sys_arch"
+
+echo "Upgrading pip..."
+pip install --upgrade pip
+
+case "$sys_arch" in
+    arm*)
+        echo "ARM Architecture detected. Specific model: $sys_arch"
+        echo "Installing ARM-specific dependencies..."
+        pip install -r "$ARM_REQ_FILE"
+        pip install --no-deps pandas~=1.2.4
+        pip install --no-deps pyarrow~=4.0.0
+        pip install --no-deps scipy~=1.6.2
+        ;;
+    *)
+        echo "Non-ARM Architecture: $sys_arch"
+        echo "Installing generic dependencies..."
+        pip install -r "$GENERIC_REQ_FILE"
+        ;;
+esac
diff --git a/python/requirements/dbr104/set_install_command.sh b/python/requirements/dbr104/set_install_command.sh
deleted file mode 100755
index 99ff398e..00000000
--- a/python/requirements/dbr104/set_install_command.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-# Based on architecture, set an appropriate install command
-SYS_ARCH=$(uname -m)
-export SYS_ARCH
-if [ "$SYS_ARCH" = "arm64" ]; then
-    NO_BINARY_PACKAGES="pyarrow,pandas,scipy"
-    echo "pip install --no-binary $NO_BINARY_PACKAGES {opts} {packages}" > install_cmd.txt
-else
-    echo "pip install {opts} {packages}" > install_cmd.txt
-fi
diff --git a/python/tox.ini b/python/tox.ini
index 1fd699f3..d2638a95 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -30,16 +30,12 @@ commands =
     coverage run -m unittest discover -s tests -p '*_tests.py'
 
 [testenv:dbr104]
-allowlist_externals = bash
-commands_pre =
-   bash -c "./requirements/{envname}/set_install_command.sh"
-install_command =
-    bash -c "cat requirements/{envname}/install_cmd.txt"
+allowlist_externals = chmod, bash, source
 deps =
     -rrequirements/dev.txt
-    ;NB: dependency order matters for this env
-    -rrequirements/{envname}/{envname}.txt
 commands =
+    chmod +x ./requirements/dbr104/install_non_dev_dependencies.sh
+    source ./requirements/dbr104/install_non_dev_dependencies.sh
     coverage erase
     coverage run -m unittest discover -s tests -p '*_tests.py'
 

From a0cc65b47492dc7a4ee77c0781d8919adc0e2953 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Sun, 28 Apr 2024 01:01:02 -0600
Subject: [PATCH 094/137] remove dbr104 env support

---
 python/requirements/dbr104/dbr104.txt         |  7 ----
 python/requirements/dbr104/dbr104_arm.txt     |  4 ---
 .../dbr104/install_non_dev_dependencies.sh    | 36 -------------------
 python/setup.py                               |  1 -
 python/tox.ini                                | 13 +------
 5 files changed, 1 insertion(+), 60 deletions(-)
 delete mode 100644 python/requirements/dbr104/dbr104.txt
 delete mode 100644 python/requirements/dbr104/dbr104_arm.txt
 delete mode 100755 python/requirements/dbr104/install_non_dev_dependencies.sh

diff --git a/python/requirements/dbr104/dbr104.txt b/python/requirements/dbr104/dbr104.txt
deleted file mode 100644
index 5081a954..00000000
--- a/python/requirements/dbr104/dbr104.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-numpy~=1.20.1
-delta-spark~=1.1.0
-ipython~=7.22.0
-pandas~=1.2.4
-pyarrow~=4.0.0
-pyspark~=3.2.1
-scipy~=1.6.2
\ No newline at end of file
diff --git a/python/requirements/dbr104/dbr104_arm.txt b/python/requirements/dbr104/dbr104_arm.txt
deleted file mode 100644
index 0196bcf4..00000000
--- a/python/requirements/dbr104/dbr104_arm.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-numpy~=1.20.1
-delta-spark~=1.1.0
-ipython~=7.22.0
-pyspark~=3.2.1
\ No newline at end of file
diff --git a/python/requirements/dbr104/install_non_dev_dependencies.sh b/python/requirements/dbr104/install_non_dev_dependencies.sh
deleted file mode 100755
index c78dcfe2..00000000
--- a/python/requirements/dbr104/install_non_dev_dependencies.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-
-set -e  # Exit on error
-[ -n "$DEBUG" ] && set -x  # Enable debugging if DEBUG environment variable is set
-
-# This runs from the root of the repository
-ARM_REQ_FILE="$(pwd)/requirements/dbr104/dbr104_arm.txt"
-GENERIC_REQ_FILE="$(pwd)/requirements/dbr104/dbr104.txt"
-
-# Check necessary commands and files
-command -v pip >/dev/null 2>&1 || { echo >&2 "pip is required but it's not installed. Aborting."; exit 1; }
-[ -f "$ARM_REQ_FILE" ] || { echo >&2 "Required file $ARM_REQ_FILE not found. Aborting."; exit 1; }
-[ -f "$GENERIC_REQ_FILE" ] || { echo >&2 "Required file $GENERIC_REQ_FILE not found. Aborting."; exit 1; }
-
-# Get the architecture of the system
-sys_arch=$(uname -m)
-echo "System Architecture: $sys_arch"
-
-echo "Upgrading pip..."
-pip install --upgrade pip
-
-case "$sys_arch" in
-    arm*)
-        echo "ARM Architecture detected. Specific model: $sys_arch"
-        echo "Installing ARM-specific dependencies..."
-        pip install -r "$ARM_REQ_FILE"
-        pip install --no-deps pandas~=1.2.4
-        pip install --no-deps pyarrow~=4.0.0
-        pip install --no-deps scipy~=1.6.2
-        ;;
-    *)
-        echo "Non-ARM Architecture: $sys_arch"
-        echo "Installing generic dependencies..."
-        pip install -r "$GENERIC_REQ_FILE"
-        ;;
-esac
diff --git a/python/setup.py b/python/setup.py
index e3817ebd..8ac0c757 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -21,7 +21,6 @@
     long_description_content_type="text/markdown",
     url="https://databrickslabs.github.io/tempo/",
     packages=find_packages(where=".", include=["tempo"]),
-    install_requires=["ipython", "pandas", "scipy", "pyspark"],
     extras_require=dict(tests=["pytest"]),
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/python/tox.ini b/python/tox.ini
index d2638a95..3d236e34 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -6,7 +6,7 @@ isolated_build = True
 envlist =
     ; Mirror Supported LTS DBR versions here: https://docs.databricks.com/release-notes/runtime/
     ; Use correct PySpark version based on Python version present in env name
-    dbr{91,104,113,122,133,143}
+    dbr{113,122,133,143}
     coverage-report
 skip_missing_interpreters = true
 
@@ -21,7 +21,6 @@ basepython =
     dbr133: py310
     dbr122: py39
     dbr113: py39
-    dbr104: py38
 deps =
     -rrequirements/dev.txt
     -rrequirements/{envname}.txt
@@ -29,16 +28,6 @@ commands =
     coverage erase
     coverage run -m unittest discover -s tests -p '*_tests.py'
 
-[testenv:dbr104]
-allowlist_externals = chmod, bash, source
-deps =
-    -rrequirements/dev.txt
-commands =
-    chmod +x ./requirements/dbr104/install_non_dev_dependencies.sh
-    source ./requirements/dbr104/install_non_dev_dependencies.sh
-    coverage erase
-    coverage run -m unittest discover -s tests -p '*_tests.py'
-
 [testenv:lint]
 description = run linters
 skipsdist = true

From 550d4e030d54555383e2451e4ba0842d4403a0c7 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Sun, 28 Apr 2024 15:41:33 -0500
Subject: [PATCH 095/137] removed support for python 3.8 DBRs

---
 .github/workflows/build-release.yml | 4 ++--
 .github/workflows/push.yml          | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index af342019..3035371c 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     types: [opened, synchronize]
   push:
-    branches: ['*'] # FIXME
+    branches: ['master']
 
 jobs:
   release:
@@ -35,7 +35,7 @@ jobs:
         with:
           user: __token__
           password: ${{ secrets.LABS_PYPI_TOKEN }}
-          packages_dir: python/dist/
+          packages-dir: python/dist/
   
   docs:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 882b3ca6..ed0da60f 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     types: [opened, synchronize]
   push:
-    branches: ['*'] # FIXME
+    branches: ['master']
   workflow_dispatch: 
 
 jobs:
@@ -75,10 +75,6 @@ jobs:
     strategy:
       matrix:
         config:
-          - py: '3.8'
-            dbr: dbr91
-          - py: '3.8'
-            dbr: dbr104
           - py: '3.9'
             dbr: dbr113
           - py: '3.9'

From 333975a561dc3324651e180651b05365fd5f55da Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Sun, 28 Apr 2024 15:45:56 -0500
Subject: [PATCH 096/137] updated contribution docs

---
 CONTRIBUTING.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c0ca74e2..86fd4a69 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,18 +12,18 @@ Be sure to carefully follow the instructions to configure your shell environment
 
 Use `pyenv` to install the following Python versions for testing.
 ```bash
-pyenv install 3.7 3.8 3.9
+pyenv install 3.8 3.9 3.10
 ```
 
 You will probably want to set one of these versions as your global Python version. This will be the version of Python that is used when you run `python` commands in your terminal.
 For example, to set Python 3.9 as your global Python version, run the following command:
 ```bash
-pyenv global 3.9
+pyenv global 3.10
 ```
 
 Within the `tempo/python` folder, run the below command to create a `.python-version` file that will tell `pyenv` which Python version to use when running commands in this directory:
 ```bash
-pyenv local 3.7 3.8 3.9
+pyenv local 3.8 3.9 3.10
 ```
 
 This allows `tox` to create virtual environments using any of the Python versions listed in the `.python-version` file.
@@ -64,9 +64,10 @@ This will run tests for all listed environments.
 ### Run additional checks locally
 `tox` has special environments for additional checks that must be performed as part of the PR process. These include formatting, linting, type checking, etc.
 These environments are also defined in the `tox.ini`file and skip installing dependencies listed in the `requirements.txt` file and building the distribution when those are not required . They can be specified using the `-e` flag:
-* format
 * lint
 * type-check
+* build-dist
+* build-docs
 * coverage-report
 
 # Code style & Standards

From 4d02b80d0d8dc584568bcf12e06c85e435b749bf Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Mon, 29 Apr 2024 11:58:44 -0500
Subject: [PATCH 097/137] set fail-fast to false

---
 .github/workflows/push.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index ed0da60f..78eb93c1 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -83,6 +83,7 @@ jobs:
             dbr: dbr133
           - py: '3.10'
             dbr: dbr143
+      fail-fast: false
     steps:
     - uses: actions/checkout@v4
       with:

From b5a9a0cd590aeb105d32917e790106547b8cd350 Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Tue, 14 May 2024 16:20:04 -0700
Subject: [PATCH 098/137] checkpoint commit of some updates to the code that
 converts nano-second precision timestamps

---
 python/tempo/tsdf.py | 45 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py
index 8ae1d44f..d471ce1b 100644
--- a/python/tempo/tsdf.py
+++ b/python/tempo/tsdf.py
@@ -65,9 +65,11 @@ def __init__(
         # Timestamp string matching then do some pattern matching to extract
         # the time stamp.
         if isinstance(df.schema[ts_col].dataType, StringType):  # pragma: no cover
-            sample_ts = df.limit(1).collect()[0][0]
+            sample_ts = df.select(ts_col).limit(1).collect()[0][0]
             self.__validate_ts_string(sample_ts)
-            self.df = self.__add_double_ts().withColumnRenamed("double_ts", self.ts_col)
+            self.df = self.__add_double_ts()\
+                          .drop(self.ts_col)\
+                          .withColumnRenamed("double_ts", self.ts_col)
 
         """
     Make sure DF is ordered by its respective ts_col and partition columns.
@@ -77,6 +79,45 @@ def __init__(
     # Helper functions
     #
 
+    @staticmethod
+    def parse_nanos_timestamp(df: DataFrame,
+                              str_ts_col: str,
+                              ts_fmt: str = "yyyy-MM-dd HH:mm:ss",
+                              double_ts_col: Optional[str] = None,
+                              parsed_ts_col: Optional[str] = None) -> DataFrame:
+        """
+        Parse a string timestamp column with nanosecond precision into a double timestamp column.
+
+        :param df: DataFrame containing the string timestamp column
+        :param str_ts_col: Name of the string timestamp column
+        :param ts_fmt: Format of the string timestamp column (default: "yyyy-MM-dd HH:mm:ss")
+        :param double_ts_col: Name of the double timestamp column to create, if None
+                the source string column will be overwritten
+        :param parsed_ts_col: Name of the parsed timestamp column to create, if None
+                no parsed timestamp column will be kept
+
+        :return: DataFrame with the double timestamp column
+        """
+
+        # add a parsed timestamp column if requested
+        src_df = df.withColumn(parsed_ts_col,
+                               sfn.to_timestamp(sfn.col(str_ts_col), ts_fmt)) \
+            if parsed_ts_col else df
+
+        return (
+            src_df.withColumn("nanos",
+                              sfn.when(sfn.col(str_ts_col).contains("."),
+                                       sfn.concat(sfn.lit("0."),
+                                                  sfn.split(sfn.col(str_ts_col),
+                                                            r"\.")[1])
+                                       ).otherwise(0).cast("double"))
+            .withColumn("long_ts",
+                        sfn.unix_timestamp(str_ts_col, ts_fmt))
+            .withColumn((double_ts_col or str_ts_col),
+                        sfn.col("long_ts") + sfn.col("nanos")))
+
+
+
     def __add_double_ts(self) -> DataFrame:
         """Add a double (epoch) version of the string timestamp out to nanos"""
         return (

From 7469a5031c2fa3a31418470eb4349dc995507bcc Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Wed, 15 May 2024 08:33:19 -0700
Subject: [PATCH 099/137] refactored test code data format to allow for better
 separation of DF creation from TSDF constructor args

---
 python/tests/as_of_join_tests.py              |  52 +-
 python/tests/base.py                          | 219 ++++++--
 .../unit_test_data/as_of_join_tests.json      | 523 +++++++++++-------
 3 files changed, 497 insertions(+), 297 deletions(-)

diff --git a/python/tests/as_of_join_tests.py b/python/tests/as_of_join_tests.py
index 0b02c866..958374d9 100644
--- a/python/tests/as_of_join_tests.py
+++ b/python/tests/as_of_join_tests.py
@@ -9,10 +9,10 @@ def test_asof_join(self):
         """AS-OF Join with out a time-partition test"""
 
         # Construct dataframes
-        tsdf_left = self.get_data_as_tsdf("left")
-        tsdf_right = self.get_data_as_tsdf("right")
-        dfExpected = self.get_data_as_sdf("expected")
-        noRightPrefixdfExpected = self.get_data_as_sdf("expected_no_right_prefix")
+        tsdf_left = self.get_test_df_builder("left").as_tsdf()
+        tsdf_right = self.get_test_df_builder("right").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        noRightPrefixdfExpected = self.get_test_df_builder("expected_no_right_prefix").as_sdf()
 
         # perform the join
         joined_df = tsdf_left.asofJoin(
@@ -35,12 +35,12 @@ def test_asof_join_skip_nulls_disabled(self):
         """AS-OF Join with skip nulls disabled"""
 
         # fetch test data
-        tsdf_left = self.get_data_as_tsdf("left")
-        tsdf_right = self.get_data_as_tsdf("right")
-        dfExpectedSkipNulls = self.get_data_as_sdf("expected_skip_nulls")
-        dfExpectedSkipNullsDisabled = self.get_data_as_sdf(
+        tsdf_left = self.get_test_df_builder("left").as_tsdf()
+        tsdf_right = self.get_test_df_builder("right").as_tsdf()
+        dfExpectedSkipNulls = self.get_test_df_builder("expected_skip_nulls").as_sdf()
+        dfExpectedSkipNullsDisabled = self.get_test_df_builder(
             "expected_skip_nulls_disabled"
-        )
+        ).as_sdf()
 
         # perform the join with skip nulls enabled (default)
         joined_df = tsdf_left.asofJoin(
@@ -62,9 +62,9 @@ def test_sequence_number_sort(self):
         """Skew AS-OF Join with Partition Window Test"""
 
         # fetch test data
-        tsdf_left = self.get_data_as_tsdf("left")
-        tsdf_right = self.get_data_as_tsdf("right")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_left = self.get_test_df_builder("left").as_tsdf()
+        tsdf_right = self.get_test_df_builder("right").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # perform the join
         joined_df = tsdf_left.asofJoin(tsdf_right, right_prefix="right").df
@@ -76,9 +76,9 @@ def test_partitioned_asof_join(self):
         """AS-OF Join with a time-partition"""
         with self.assertLogs(level="WARNING") as warning_captured:
             # fetch test data
-            tsdf_left = self.get_data_as_tsdf("left")
-            tsdf_right = self.get_data_as_tsdf("right")
-            dfExpected = self.get_data_as_sdf("expected")
+            tsdf_left = self.get_test_df_builder("left").as_tsdf()
+            tsdf_right = self.get_test_df_builder("right").as_tsdf()
+            dfExpected = self.get_test_df_builder("expected").as_sdf()
 
             joined_df = tsdf_left.asofJoin(
                 tsdf_right,
@@ -103,15 +103,17 @@ def test_asof_join_nanos(self):
         """As of join with nanosecond timestamps"""
 
         # fetch test data
-        tsdf_left = self.get_data_as_tsdf("left")
-        tsdf_right = self.get_data_as_tsdf("right")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_left = self.get_test_df_builder("left").as_tsdf()
+        tsdf_right = self.get_test_df_builder("right").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # perform join
         joined_df = tsdf_left.asofJoin(
             tsdf_right, left_prefix="left", right_prefix="right"
         ).df
 
+        joined_df.show()
+
         # compare
         self.assertDataFrameEquality(joined_df, dfExpected)
 
@@ -119,8 +121,8 @@ def test_asof_join_tolerance(self):
         """As of join with tolerance band"""
 
         # fetch test data
-        tsdf_left = self.get_data_as_tsdf("left")
-        tsdf_right = self.get_data_as_tsdf("right")
+        tsdf_left = self.get_test_df_builder("left").as_tsdf()
+        tsdf_right = self.get_test_df_builder("right").as_tsdf()
 
         tolerance_test_values = [None, 0, 5.5, 7, 10]
         for tolerance in tolerance_test_values:
@@ -133,17 +135,17 @@ def test_asof_join_tolerance(self):
             ).df
 
             # compare
-            expected_tolerance = self.get_data_as_sdf(f"expected_tolerance_{tolerance}")
+            expected_tolerance = self.get_test_df_builder(f"expected_tolerance_{tolerance}").as_sdf()
             self.assertDataFrameEquality(joined_df, expected_tolerance)
 
     def test_asof_join_sql_join_opt_and_bytes_threshold(self):
         """AS-OF Join with out a time-partition test"""
         with patch("tempo.tsdf.TSDF._TSDF__getBytesFromPlan", return_value=1000):
             # Construct dataframes
-            tsdf_left = self.get_data_as_tsdf("left")
-            tsdf_right = self.get_data_as_tsdf("right")
-            dfExpected = self.get_data_as_sdf("expected")
-            noRightPrefixdfExpected = self.get_data_as_sdf("expected_no_right_prefix")
+            tsdf_left = self.get_test_df_builder("left").as_tsdf()
+            tsdf_right = self.get_test_df_builder("right").as_tsdf()
+            dfExpected = self.get_test_df_builder("expected").as_sdf()
+            noRightPrefixdfExpected = self.get_test_df_builder("expected_no_right_prefix").as_sdf()
 
             # perform the join
             joined_df = tsdf_left.asofJoin(
diff --git a/python/tests/base.py b/python/tests/base.py
index cdba2845..06f90277 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -2,7 +2,7 @@
 import re
 import unittest
 import warnings
-from typing import Union
+from typing import Union, Optional
 
 import jsonref
 import pyspark.sql.functions as sfn
@@ -14,6 +14,132 @@
 from tempo.tsdf import TSDF
 
 
+class TestDataFrameBuilder:
+    """
+    A class to hold metadata about a Spark DataFrame
+    """
+
+    def __init__(self, spark: SparkSession, test_data: dict):
+        """
+        :param spark: the SparkSession to use
+        :param test_data: a dictionary containing the test data & metadata
+        """
+        self.spark = spark
+        self.__test_data = test_data
+
+    # Spark DataFrame metadata
+
+    @property
+    def df(self) -> dict:
+        """
+        :return: the DataFrame component of the test data
+        """
+        return self.__test_data["df"]
+
+    @property
+    def df_schema(self) -> str:
+        """
+        :return: the schema component of the test data
+        """
+        return self.df["schema"]
+
+    def df_data(self) -> list:
+        """
+        :return: the data component of the test data
+        """
+        return self.df["data"]
+
+    # TSDF metadata
+
+    @property
+    def tsdf_constructor(self) -> Optional[str]:
+        """
+        :return: the name of the TSDF constructor to use
+        """
+        return self.__test_data.get("tsdf_constructor", None)
+
+    @property
+    def tsdf(self) -> dict:
+        """
+        :return: the timestamp index metadata component of the test data
+        """
+        return self.__test_data["tsdf"]
+
+    @property
+    def ts_schema(self) -> Optional[dict]:
+        """
+        :return: the timestamp index schema component of the test data
+        """
+        return self.tsdf.get("ts_schema", None)
+
+    @property
+    def ts_idx_class(self) -> str:
+        """
+        :return: the timestamp index class component of the test data
+        """
+        return self.ts_schema["ts_idx_class"]
+
+    @property
+    def ts_col(self) -> str:
+        """
+        :return: the timestamp column component of the test data
+        """
+        return self.ts_schema["ts_col"]
+
+    @property
+    def ts_idx(self) -> dict:
+        """
+        :return: the timestamp index data component of the test data
+        """
+        return self.ts_schema["ts_idx"]
+
+    # Builder functions
+
+    def as_sdf(self) -> DataFrame:
+        """
+        Constructs a Spark Dataframe from the test data
+        """
+        # build dataframe
+        df = self.spark.createDataFrame(self.df_data(), self.df_schema)
+
+        # convert timestamp columns
+        if "ts_convert" in self.df:
+            for ts_col in self.df["ts_convert"]:
+                # handle nested columns
+                if "." in ts_col:
+                    col, field = ts_col.split(".")
+                    convert_field_expr = sfn.to_timestamp(sfn.col(col).getField(field))
+                    df = df.withColumn(
+                        col, sfn.col(col).withField(field, convert_field_expr)
+                    )
+                else:
+                    df = df.withColumn(ts_col, sfn.to_timestamp(ts_col))
+        # convert date columns
+        if "date_convert" in self.df:
+            for date_col in self.df["date_convert"]:
+                # handle nested columns
+                if "." in date_col:
+                    col, field = date_col.split(".")
+                    convert_field_expr = sfn.to_timestamp(sfn.col(col).getField(field))
+                    df = df.withColumn(
+                        col, sfn.col(col).withField(field, convert_field_expr)
+                    )
+                else:
+                    df = df.withColumn(date_col, sfn.to_date(date_col))
+
+        return df
+
+    def as_tsdf(self) -> TSDF:
+        """
+        Constructs a TSDF from the test data
+        """
+        sdf = self.as_sdf()
+        if self.tsdf_constructor is not None:
+            return getattr(TSDF, self.tsdf_constructor)(sdf, **self.tsdf)
+        else:
+            return TSDF(sdf, **self.tsdf)
+
+
 class SparkTest(unittest.TestCase):
     #
     # Fixtures
@@ -68,24 +194,24 @@ def tearDown(self) -> None:
     # Utility Functions
     #
 
-    def get_data_as_sdf(self, name: str, convert_ts_col=True):
-        td = self.test_data[name]
-        ts_cols = []
-        if convert_ts_col and (td.get("ts_col", None) or td.get("other_ts_cols", [])):
-            ts_cols = [td["ts_col"]] if "ts_col" in td else []
-            ts_cols.extend(td.get("other_ts_cols", []))
-        return self.buildTestDF(td["schema"], td["data"], ts_cols)
-
-    def get_data_as_tsdf(self, name: str, convert_ts_col=True):
-        df = self.get_data_as_sdf(name, convert_ts_col)
-        td = self.test_data[name]
-        tsdf = TSDF(
-            df,
-            ts_col=td["ts_col"],
-            partition_cols=td.get("partition_cols", None),
-            sequence_col=td.get("sequence_col", None),
-        )
-        return tsdf
+    # def get_data_as_sdf(self, name: str, convert_ts_col=True):
+    #     td = self.test_data[name]
+    #     ts_cols = []
+    #     if convert_ts_col and (td.get("ts_col", None) or td.get("other_ts_cols", [])):
+    #         ts_cols = [td["ts_col"]] if "ts_col" in td else []
+    #         ts_cols.extend(td.get("other_ts_cols", []))
+    #     return self.buildTestDF(td["schema"], td["data"], ts_cols)
+    #
+    # def get_data_as_tsdf(self, name: str, convert_ts_col=True):
+    #     df = self.get_data_as_sdf(name, convert_ts_col)
+    #     td = self.test_data[name]
+    #     tsdf = TSDF(
+    #         df,
+    #         ts_col=td["ts_col"],
+    #         partition_cols=td.get("partition_cols", None),
+    #         sequence_col=td.get("sequence_col", None),
+    #     )
+    #     return tsdf
 
     def get_data_as_idf(self, name: str, convert_ts_col=True):
         df = self.get_data_as_sdf(name, convert_ts_col)
@@ -112,7 +238,8 @@ def __getTestDataFilePath(self, test_file_name: str) -> str:
             dir_path = "./tests"
         elif cwd != "tests":
             raise RuntimeError(
-                f"Cannot locate test data file {test_file_name}, running from dir {os.getcwd()}"
+                f"Cannot locate test data file {test_file_name}, running from dir"
+                f" {os.getcwd()}"
             )
 
         # return appropriate path
@@ -136,40 +263,11 @@ def __loadTestData(self, test_case_path: str) -> dict:
         # proces the data file
         with open(test_data_file, "r") as f:
             data_metadata_from_json = jsonref.load(f)
-            # warn if data not present
-            if class_name not in data_metadata_from_json:
-                warnings.warn(f"Could not load test data for {file_name}.{class_name}")
-                return {}
-            if func_name not in data_metadata_from_json[class_name]:
-                warnings.warn(
-                    f"Could not load test data for {file_name}.{class_name}.{func_name}"
-                )
-                return {}
+            # return the data
             return data_metadata_from_json[class_name][func_name]
 
-    def buildTestDF(self, schema, data, ts_cols=["event_ts"]):
-        """
-        Constructs a Spark Dataframe from the given components
-        :param schema: the schema to use for the Dataframe
-        :param data: values to use for the Dataframe
-        :param ts_cols: list of column names to be converted to Timestamp values
-        :return: a Spark Dataframe, constructed from the given schema and values
-        """
-        # build dataframe
-        df = self.spark.createDataFrame(data, schema)
-
-        # check if ts_col follows standard timestamp format, then check if timestamp has micro/nanoseconds
-        for tsc in ts_cols:
-            ts_value = str(df.select(ts_cols).limit(1).collect()[0][0])
-            ts_pattern = r"^\d{4}-\d{2}-\d{2}| \d{2}:\d{2}:\d{2}\.\d*$"
-            decimal_pattern = r"[.]\d+"
-            if re.match(ts_pattern, str(ts_value)) is not None:
-                if (
-                    re.search(decimal_pattern, ts_value) is None
-                    or len(re.search(decimal_pattern, ts_value)[0]) <= 4
-                ):
-                    df = df.withColumn(tsc, sfn.to_timestamp(sfn.col(tsc)))
-        return df
+    def get_test_df_builder(self, name: str) -> TestDataFrameBuilder:
+        return TestDataFrameBuilder(self.spark, self.test_data[name])
 
     #
     # Assertion Functions
@@ -201,12 +299,10 @@ def assertSchemaContainsField(self, schema, field):
         # the attributes of the fields must be equal
         self.assertFieldsEqual(field, schema[field.name])
 
-    @staticmethod
     def assertDataFrameEquality(
-        df1: Union[IntervalsDF, TSDF, DataFrame],
-        df2: Union[IntervalsDF, TSDF, DataFrame],
-        from_tsdf: bool = False,
-        from_idf: bool = False,
+        self,
+        df1: Union[TSDF, DataFrame],
+        df2: Union[TSDF, DataFrame],
         ignore_row_order: bool = False,
         ignore_column_order: bool = True,
         ignore_nullable: bool = True,
@@ -216,10 +312,17 @@ def assertDataFrameEquality(
         That is, they have equivalent schemas, and both contain the same values
         """
 
-        if from_tsdf or from_idf:
+        # handle TSDFs
+        if isinstance(df1, TSDF):
+            # df2 must also be a TSDF
+            self.assertIsInstance(df2, TSDF)
+            # should have the same schemas
+            self.assertEqual(df1.ts_schema, df2.ts_schema)
+            # get the underlying Spark DataFrames
             df1 = df1.df
             df2 = df2.df
 
+        # handle DataFrames
         assert_df_equality(
             df1,
             df2,
diff --git a/python/tests/unit_test_data/as_of_join_tests.json b/python/tests/unit_test_data/as_of_join_tests.json
index 0b7bba7e..6c183b8b 100644
--- a/python/tests/unit_test_data/as_of_join_tests.json
+++ b/python/tests/unit_test_data/as_of_join_tests.json
@@ -1,15 +1,20 @@
 {
   "__SharedData": {
     "shared_left": {
-      "schema": "symbol string, event_ts string, trade_pr float",
-      "ts_col": "event_ts",
-      "partition_cols": ["symbol"],
-      "data": [
-        ["S1", "2020-08-01 00:00:10", 349.21],
-        ["S1", "2020-08-01 00:01:12", 351.32],
-        ["S1", "2020-09-01 00:02:10", 361.1],
-        ["S1", "2020-09-01 00:19:12", 362.1]
-      ]
+      "tsdf": {
+        "ts_col": "event_ts",
+        "partition_cols": ["symbol"]
+      },
+      "df": {
+        "schema": "symbol string, event_ts string, trade_pr float",
+        "ts_convert": ["event_ts"],
+        "data": [
+          ["S1", "2020-08-01 00:00:10", 349.21],
+          ["S1", "2020-08-01 00:01:12", 351.32],
+          ["S1", "2020-09-01 00:02:10", 361.1],
+          ["S1", "2020-09-01 00:19:12", 362.1]
+        ]
+      }
     },
     "test_asof_expected_data": [
       ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:01", 345.11, 351.12],
@@ -24,32 +29,45 @@
         "$ref": "#/__SharedData/shared_left"
       },
       "right": {
-        "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:01:05", 348.10, 353.13],
-          ["S1", "2020-09-01 00:02:01", 358.93, 365.12],
-          ["S1", "2020-09-01 00:15:01", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:01:05", 348.10, 353.13],
+            ["S1", "2020-09-01 00:02:01", 358.93, 365.12],
+            ["S1", "2020-09-01 00:15:01", 359.21, 365.31]
+          ]
+        }
       },
       "expected": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": {
-          "$ref": "#/__SharedData/test_asof_expected_data"
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": {
+            "$ref": "#/__SharedData/test_asof_expected_data"
+          }
         }
       },
       "expected_no_right_prefix": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, event_ts string, bid_pr float, ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["event_ts"],
-        "data": {
-          "$ref": "#/__SharedData/test_asof_expected_data"
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, event_ts string, bid_pr float, ask_pr float",
+          "ts_convert": ["left_event_ts", "event_ts"],
+          "data": {
+            "$ref": "#/__SharedData/test_asof_expected_data"
+          }
         }
       }
     },
@@ -58,158 +76,210 @@
         "$ref": "#/__SharedData/shared_left"
       },
       "right": {
-        "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:01:05", null, 353.13],
-          ["S1", "2020-09-01 00:02:01", null, null],
-          ["S1", "2020-09-01 00:15:01", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:01:05", null, 353.13],
+            ["S1", "2020-09-01 00:02:01", null, null],
+            ["S1", "2020-09-01 00:15:01", 359.21, 365.31]
+          ]
+        }
       },
       "expected_skip_nulls": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 345.11, 353.13],
-          ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", 345.11, 353.13],
-          ["S1", "2020-09-01 00:19:12", 362.1, "2020-09-01 00:15:01", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 345.11, 353.13],
+            ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", 345.11, 353.13],
+            ["S1", "2020-09-01 00:19:12", 362.1, "2020-09-01 00:15:01", 359.21, 365.31]
+          ]
+        }
       },
       "expected_skip_nulls_disabled": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", null, 353.13],
-          ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", null, null],
-          ["S1", "2020-09-01 00:19:12", 362.1, "2020-09-01 00:15:01", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", null, 353.13],
+            ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", null, null],
+            ["S1", "2020-09-01 00:19:12", 362.1, "2020-09-01 00:15:01", 359.21, 365.31]
+          ]
+        }
       }
     },
     "test_sequence_number_sort": {
       "left": {
-        "schema": "symbol string, event_ts string, trade_pr float, trade_id int",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, 1],
-          ["S1", "2020-08-01 00:00:10", 350.21, 5],
-          ["S1", "2020-08-01 00:01:12", 351.32, 2],
-          ["S1", "2020-09-01 00:02:10", 361.1, 3],
-          ["S1", "2020-09-01 00:19:12", 362.1, 4]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float, trade_id int",
+          "ts_convert": ["event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, 1],
+            ["S1", "2020-08-01 00:00:10", 350.21, 5],
+            ["S1", "2020-08-01 00:01:12", 351.32, 2],
+            ["S1", "2020-09-01 00:02:10", 361.1, 3],
+            ["S1", "2020-09-01 00:19:12", 362.1, 4]
+          ]
+        }
       },
       "right": {
-        "schema": "symbol string, event_ts string, bid_pr float, ask_pr float, seq_nb long",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "sequence_col": "seq_nb",
-        "data": [
-          ["S1", "2020-08-01 00:00:01", 345.11, 351.12, 1],
-          ["S1", "2020-08-01 00:00:10", 19.11, 20.12, 1],
-          ["S1", "2020-08-01 00:01:05", 348.10, 1000.13, 3],
-          ["S1", "2020-08-01 00:01:05", 348.10, 100.13, 2],
-          ["S1", "2020-09-01 00:02:01", 358.93, 365.12, 4],
-          ["S1", "2020-09-01 00:15:01", 359.21, 365.31, 5]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"],
+          "sequence_col": "seq_nb"
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, bid_pr float, ask_pr float, seq_nb long",
+          "ts_convert": ["event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:01", 345.11, 351.12, 1],
+            ["S1", "2020-08-01 00:00:10", 19.11, 20.12, 1],
+            ["S1", "2020-08-01 00:01:05", 348.10, 1000.13, 3],
+            ["S1", "2020-08-01 00:01:05", 348.10, 100.13, 2],
+            ["S1", "2020-09-01 00:02:01", 358.93, 365.12, 4],
+            ["S1", "2020-09-01 00:15:01", 359.21, 365.31, 5]
+          ]
+        }
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float, trade_id int, right_event_ts string, right_bid_pr float, right_ask_pr float, right_seq_nb long",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, 1, "2020-08-01 00:00:10", 19.11, 20.12, 1],
-          ["S1", "2020-08-01 00:00:10", 350.21, 5, "2020-08-01 00:00:10", 19.11, 20.12, 1],
-          ["S1", "2020-08-01 00:01:12", 351.32, 2, "2020-08-01 00:01:05", 348.10, 1000.13, 3],
-          ["S1", "2020-09-01 00:02:10", 361.1, 3, "2020-09-01 00:02:01", 358.93, 365.12, 4],
-          ["S1", "2020-09-01 00:19:12", 362.1, 4, "2020-09-01 00:15:01", 359.21, 365.31, 5]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float, trade_id int, right_event_ts string, right_bid_pr float, right_ask_pr float, right_seq_nb long",
+          "ts_convert": ["event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, 1, "2020-08-01 00:00:10", 19.11, 20.12, 1],
+            ["S1", "2020-08-01 00:00:10", 350.21, 5, "2020-08-01 00:00:10", 19.11, 20.12, 1],
+            ["S1", "2020-08-01 00:01:12", 351.32, 2, "2020-08-01 00:01:05", 348.10, 1000.13, 3],
+            ["S1", "2020-09-01 00:02:10", 361.1, 3, "2020-09-01 00:02:01", 358.93, 365.12, 4],
+            ["S1", "2020-09-01 00:19:12", 362.1, 4, "2020-09-01 00:15:01", 359.21, 365.31, 5]
+          ]
+        }
       }
     },
     "test_partitioned_asof_join": {
       "left": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2020-08-01 00:00:02", 349.21],
-          ["S1", "2020-08-01 00:00:08", 351.32],
-          ["S1", "2020-08-01 00:00:11", 361.12],
-          ["S1", "2020-08-01 00:00:18", 364.31],
-          ["S1", "2020-08-01 00:00:19", 362.94],
-          ["S1", "2020-08-01 00:00:21", 364.27],
-          ["S1", "2020-08-01 00:00:23", 367.36]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:02", 349.21],
+            ["S1", "2020-08-01 00:00:08", 351.32],
+            ["S1", "2020-08-01 00:00:11", 361.12],
+            ["S1", "2020-08-01 00:00:18", 364.31],
+            ["S1", "2020-08-01 00:00:19", 362.94],
+            ["S1", "2020-08-01 00:00:21", 364.27],
+            ["S1", "2020-08-01 00:00:23", 367.36]
+          ]
+        }
       },
       "right": {
-        "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:00:09", 348.10, 353.13],
-          ["S1", "2020-08-01 00:00:12", 358.93, 365.12],
-          ["S1", "2020-08-01 00:00:19", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:00:09", 348.10, 353.13],
+            ["S1", "2020-08-01 00:00:12", 358.93, 365.12],
+            ["S1", "2020-08-01 00:00:19", 359.21, 365.31]
+          ]
+        }
       },
       "expected": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:02", 349.21, "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:00:08", 351.32, "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:00:11", 361.12, "2020-08-01 00:00:09", 348.10, 353.13],
-          ["S1", "2020-08-01 00:00:18", 364.31, "2020-08-01 00:00:12", 358.93, 365.12],
-          ["S1", "2020-08-01 00:00:19", 362.94, "2020-08-01 00:00:19", 359.21, 365.31],
-          ["S1", "2020-08-01 00:00:21", 364.27, "2020-08-01 00:00:19", 359.21, 365.31],
-          ["S1", "2020-08-01 00:00:23", 367.36, "2020-08-01 00:00:19", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:02", 349.21, "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:00:08", 351.32, "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:00:11", 361.12, "2020-08-01 00:00:09", 348.10, 353.13],
+            ["S1", "2020-08-01 00:00:18", 364.31, "2020-08-01 00:00:12", 358.93, 365.12],
+            ["S1", "2020-08-01 00:00:19", 362.94, "2020-08-01 00:00:19", 359.21, 365.31],
+            ["S1", "2020-08-01 00:00:21", 364.27, "2020-08-01 00:00:19", 359.21, 365.31],
+            ["S1", "2020-08-01 00:00:23", 367.36, "2020-08-01 00:00:19", 359.21, 365.31]
+          ]
+        }
       }
     },
     "test_asof_join_nanos": {
       "left": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2022-01-01 09:59:59.123456789", 349.21],
-          ["S1", "2022-01-01 10:00:00.123456788", 351.32],
-          ["S1", "2022-01-01 10:00:00.123456789", 361.12],
-          ["S1", "2022-01-01 10:00:01.123456789", 364.31]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+            "schema": "symbol string, event_ts string, trade_pr float",
+            "data": [
+                ["S1", "2020-08-01 00:00:10.123456789", 349.21],
+                ["S1", "2020-08-01 00:01:12.123456789", 351.32],
+                ["S1", "2020-09-01 00:02:10.123456789", 361.1],
+                ["S1", "2020-09-01 00:19:12.123456789", 362.1]
+            ]
+        }
       },
       "right": {
-        "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2022-01-01 10:00:00.1234567", 345.11, 351.12],
-          ["S1", "2022-01-01 10:00:00.12345671", 348.10, 353.13],
-          ["S1", "2022-01-01 10:00:00.12345675", 358.93, 365.12],
-          ["S1", "2022-01-01 10:00:00.12345677", 358.91, 365.33],
-          ["S1", "2022-01-01 10:00:01.10000001", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+            "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
+            "data": [
+                ["S1", "2020-08-01 00:00:01.123456789", 345.11, 351.12],
+                ["S1", "2020-08-01 00:01:05.123456789", 348.10, 353.13],
+                ["S1", "2020-09-01 00:02:01.123456789", 358.93, 365.12],
+                ["S1", "2020-09-01 00:15:01.123456789", 359.21, 365.31]
+            ]
+        }
       },
       "expected": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_ask_pr float, right_bid_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2022-01-01 09:59:59.123456789", 349.21, null, null, null],
-          ["S1", "2022-01-01 10:00:00.123456788", 351.32, "2022-01-01 10:00:00.12345677", 365.33, 358.91],
-          ["S1", "2022-01-01 10:00:00.123456789", 361.12, "2022-01-01 10:00:00.12345677", 365.33, 358.91],
-          ["S1", "2022-01-01 10:00:01.123456789", 364.31, "2022-01-01 10:00:01.10000001", 365.31, 359.21]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+            "schema": "symbol string, left_event_ts double, left_trade_pr float, right_event_ts double, right_bid_pr float, right_ask_pr float",
+            "data": [
+                ["S1", 1.5962400101234567E9, 349.21, 1.5962400011234567E9, 345.11, 351.12],
+                ["S1", 1.5962400721234567E9, 351.32, 1.5962400651234567E9, 348.10, 353.13],
+                ["S1", 1.5989185301234567E9, 361.1, 1.5989185211234567E9, 358.93, 365.12],
+                ["S1", 1.5989195521234567E9, 362.1, 1.5989193011234567E9, 359.21, 365.31]
+            ]
+        }
       }
     },
     "test_asof_join_tolerance": {
@@ -217,76 +287,101 @@
         "$ref": "#/__SharedData/shared_left"
       },
       "right": {
-        "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": ["symbol"],
-        "data": [
-          ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
-          ["S1", "2020-08-01 00:00:10", 345.22, 351.33],
-          ["S1", "2020-08-01 00:01:05", 348.10, 353.13],
-          ["S1", "2020-09-01 00:02:01", 358.93, 365.12],
-          ["S1", "2020-09-01 00:15:01", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, bid_pr float, ask_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:01", 345.11, 351.12],
+            ["S1", "2020-08-01 00:00:10", 345.22, 351.33],
+            ["S1", "2020-08-01 00:01:05", 348.10, 353.13],
+            ["S1", "2020-09-01 00:02:01", 358.93, 365.12],
+            ["S1", "2020-09-01 00:15:01", 359.21, 365.31]
+          ]
+        }
       },
       "expected_tolerance_None": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
-          ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 348.10, 353.13],
-          ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", 358.93, 365.12],
-          ["S1", "2020-09-01 00:19:12", 362.1, "2020-09-01 00:15:01", 359.21, 365.31]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
+            ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 348.10, 353.13],
+            ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", 358.93, 365.12],
+            ["S1", "2020-09-01 00:19:12", 362.1, "2020-09-01 00:15:01", 359.21, 365.31]
+          ]
+        }
       },
       "expected_tolerance_0": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
-          ["S1", "2020-08-01 00:01:12", 351.32, null, null, null],
-          ["S1", "2020-09-01 00:02:10", 361.1, null, null, null],
-          ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
+            ["S1", "2020-08-01 00:01:12", 351.32, null, null, null],
+            ["S1", "2020-09-01 00:02:10", 361.1, null, null, null],
+            ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
+          ]
+        }
       },
       "expected_tolerance_5.5": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
-          ["S1", "2020-08-01 00:01:12", 351.32, null, null, null],
-          ["S1", "2020-09-01 00:02:10", 361.1, null, null, null],
-          ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
+            ["S1", "2020-08-01 00:01:12", 351.32, null, null, null],
+            ["S1", "2020-09-01 00:02:10", 361.1, null, null, null],
+            ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
+          ]
+        }
       },
       "expected_tolerance_7": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
-          ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 348.10, 353.13],
-          ["S1", "2020-09-01 00:02:10", 361.1, null, null, null],
-          ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+          "ts_convert": ["left_event_ts", "right_event_ts"],
+          "data": [
+            ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
+            ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 348.10, 353.13],
+            ["S1", "2020-09-01 00:02:10", 361.1, null, null, null],
+            ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
+          ]
+        }
       },
       "expected_tolerance_10": {
-        "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
-        "ts_col": "left_event_ts",
-        "partition_cols": ["symbol"],
-        "other_ts_cols": ["right_event_ts"],
-        "data": [
-          ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
-          ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 348.10, 353.13],
-          ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", 358.93, 365.12],
-          ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
-        ]
+        "tsdf": {
+          "ts_col": "left_event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+            "schema": "symbol string, left_event_ts string, left_trade_pr float, right_event_ts string, right_bid_pr float, right_ask_pr float",
+            "ts_convert": ["left_event_ts", "right_event_ts"],
+            "data": [
+                ["S1", "2020-08-01 00:00:10", 349.21, "2020-08-01 00:00:10", 345.22, 351.33],
+                ["S1", "2020-08-01 00:01:12", 351.32, "2020-08-01 00:01:05", 348.10, 353.13],
+                ["S1", "2020-09-01 00:02:10", 361.1, "2020-09-01 00:02:01", 358.93, 365.12],
+                ["S1", "2020-09-01 00:19:12", 362.1, null, null, null]
+            ]
+        }
       }
     },
     "test_asof_join_sql_join_opt_and_bytes_threshold": {

From 0e1c3ef668239a5d6fbe50b26690d62a9924f001 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 15 May 2024 10:49:08 -0500
Subject: [PATCH 100/137] formatting

---
 python/tempo/tsdf.py | 54 +++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/python/tempo/tsdf.py b/python/tempo/tsdf.py
index c305fb77..f5c0a86f 100644
--- a/python/tempo/tsdf.py
+++ b/python/tempo/tsdf.py
@@ -67,9 +67,11 @@ def __init__(
         if isinstance(df.schema[ts_col].dataType, StringType):  # pragma: no cover
             sample_ts = df.select(ts_col).limit(1).collect()[0][0]
             self.__validate_ts_string(sample_ts)
-            self.df = self.__add_double_ts()\
-                          .drop(self.ts_col)\
-                          .withColumnRenamed("double_ts", self.ts_col)
+            self.df = (
+                self.__add_double_ts()
+                .drop(self.ts_col)
+                .withColumnRenamed("double_ts", self.ts_col)
+            )
 
         """
     Make sure DF is ordered by its respective ts_col and partition columns.
@@ -80,11 +82,13 @@ def __init__(
     #
 
     @staticmethod
-    def parse_nanos_timestamp(df: DataFrame,
-                              str_ts_col: str,
-                              ts_fmt: str = "yyyy-MM-dd HH:mm:ss",
-                              double_ts_col: Optional[str] = None,
-                              parsed_ts_col: Optional[str] = None) -> DataFrame:
+    def parse_nanos_timestamp(
+        df: DataFrame,
+        str_ts_col: str,
+        ts_fmt: str = "yyyy-MM-dd HH:mm:ss",
+        double_ts_col: Optional[str] = None,
+        parsed_ts_col: Optional[str] = None,
+    ) -> DataFrame:
         """
         Parse a string timestamp column with nanosecond precision into a double timestamp column.
 
@@ -100,23 +104,27 @@ def parse_nanos_timestamp(df: DataFrame,
         """
 
         # add a parsed timestamp column if requested
-        src_df = df.withColumn(parsed_ts_col,
-                               sfn.to_timestamp(sfn.col(str_ts_col), ts_fmt)) \
-            if parsed_ts_col else df
+        src_df = (
+            df.withColumn(parsed_ts_col, sfn.to_timestamp(sfn.col(str_ts_col), ts_fmt))
+            if parsed_ts_col
+            else df
+        )
 
         return (
-            src_df.withColumn("nanos",
-                              sfn.when(sfn.col(str_ts_col).contains("."),
-                                       sfn.concat(sfn.lit("0."),
-                                                  sfn.split(sfn.col(str_ts_col),
-                                                            r"\.")[1])
-                                       ).otherwise(0).cast("double"))
-            .withColumn("long_ts",
-                        sfn.unix_timestamp(str_ts_col, ts_fmt))
-            .withColumn((double_ts_col or str_ts_col),
-                        sfn.col("long_ts") + sfn.col("nanos")))
-
-
+            src_df.withColumn(
+                "nanos",
+                sfn.when(
+                    sfn.col(str_ts_col).contains("."),
+                    sfn.concat(sfn.lit("0."), sfn.split(sfn.col(str_ts_col), r"\.")[1]),
+                )
+                .otherwise(0)
+                .cast("double"),
+            )
+            .withColumn("long_ts", sfn.unix_timestamp(str_ts_col, ts_fmt))
+            .withColumn(
+                (double_ts_col or str_ts_col), sfn.col("long_ts") + sfn.col("nanos")
+            )
+        )
 
     def __add_double_ts(self) -> DataFrame:
         """Add a double (epoch) version of the string timestamp out to nanos"""

From 4c903183f48aea3a7e755361b4e58c63caedee37 Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Wed, 15 May 2024 09:18:08 -0700
Subject: [PATCH 101/137] We should just check for style compliance with black,
 not apply them

---
 python/tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 3d236e34..7913bb3d 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -36,7 +36,7 @@ deps =
     flake8
     black==24.4.1
 commands =
-    black {posargs} {toxinidir}/tempo
+    black --check --verbose {posargs} {toxinidir}/tempo
     flake8 --config {toxinidir}/.flake8 {toxinidir}/tempo
 
 [testenv:type-check]

From 81f0239a13703f5a2fff5a3571cf7414753fa20e Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Wed, 15 May 2024 10:48:03 -0700
Subject: [PATCH 102/137] Revert "We should just check for style compliance
 with black, not apply them"

This reverts commit 4c903183f48aea3a7e755361b4e58c63caedee37.
---
 python/tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 7913bb3d..3d236e34 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -36,7 +36,7 @@ deps =
     flake8
     black==24.4.1
 commands =
-    black --check --verbose {posargs} {toxinidir}/tempo
+    black {posargs} {toxinidir}/tempo
     flake8 --config {toxinidir}/.flake8 {toxinidir}/tempo
 
 [testenv:type-check]

From fc55de287257f986b40eb486f48c242921e7be7c Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Wed, 15 May 2024 11:33:35 -0700
Subject: [PATCH 103/137] moving dbr version specifiers to use compatibility
 syntax

---
 python/requirements/dbr113.txt | 14 +++++++-------
 python/requirements/dbr122.txt | 14 +++++++-------
 python/requirements/dbr133.txt | 14 +++++++-------
 python/requirements/dbr143.txt | 14 +++++++-------
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/python/requirements/dbr113.txt b/python/requirements/dbr113.txt
index a2fe6b88..a12535ff 100644
--- a/python/requirements/dbr113.txt
+++ b/python/requirements/dbr113.txt
@@ -1,7 +1,7 @@
-delta-spark==2.1.0
-ipython==7.32.0
-numpy==1.20.3
-pandas==1.3.4
-pyarrow==7.0.0
-pyspark==3.3.0
-scipy==1.7.1
\ No newline at end of file
+delta-spark~=2.1.0
+ipython~=7.32.0
+numpy~=1.20.3
+pandas~=1.3.4
+pyarrow~=7.0.0
+pyspark~=3.3.0
+scipy~=1.7.1
\ No newline at end of file
diff --git a/python/requirements/dbr122.txt b/python/requirements/dbr122.txt
index d5f44af9..73bd3071 100644
--- a/python/requirements/dbr122.txt
+++ b/python/requirements/dbr122.txt
@@ -1,7 +1,7 @@
-delta-spark==2.2.0
-ipython==8.5.0
-numpy==1.21.5
-pandas==1.4.2
-pyarrow==7.0.0
-pyspark==3.3.2
-scipy==1.7.3
\ No newline at end of file
+delta-spark~=2.2.0
+ipython~=8.5.0
+numpy~=1.21.5
+pandas~=1.4.2
+pyarrow~=7.0.0
+pyspark~=3.3.2
+scipy~=1.7.3
\ No newline at end of file
diff --git a/python/requirements/dbr133.txt b/python/requirements/dbr133.txt
index 633a452c..6eb67e61 100644
--- a/python/requirements/dbr133.txt
+++ b/python/requirements/dbr133.txt
@@ -1,7 +1,7 @@
-delta-spark==2.4.0
-ipython==8.10.0
-numpy==1.21.5
-pandas==1.4.4
-pyarrow==8.0.0
-pyspark==3.4.1
-scipy==1.9.1
\ No newline at end of file
+delta-spark~=2.4.0
+ipython~=8.10.0
+numpy~=1.21.5
+pandas~=1.4.4
+pyarrow~=8.0.0
+pyspark~=3.4.1
+scipy~=1.9.1
\ No newline at end of file
diff --git a/python/requirements/dbr143.txt b/python/requirements/dbr143.txt
index 19c4342e..165cc0c7 100644
--- a/python/requirements/dbr143.txt
+++ b/python/requirements/dbr143.txt
@@ -1,7 +1,7 @@
-delta-spark==3.1.0
-ipython==8.14.0
-numpy==1.23.5
-pandas==1.5.3
-pyarrow==8.0.0
-pyspark==3.5.0
-scipy==1.10.0
\ No newline at end of file
+delta-spark~=3.1.0
+ipython~=8.14.0
+numpy~=1.23.5
+pandas~=1.5.3
+pyarrow~=8.0.0
+pyspark~=3.5.0
+scipy~=1.10.0
\ No newline at end of file

From 8e42fb415190819f53e57ed70baf721b20075926 Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Tue, 21 May 2024 14:12:21 -0700
Subject: [PATCH 104/137] updating 2 test cases

---
 python/tests/tsdf_tests.py                  |   8 +-
 python/tests/unit_test_data/tsdf_tests.json | 302 ++++++++++----------
 2 files changed, 161 insertions(+), 149 deletions(-)

diff --git a/python/tests/tsdf_tests.py b/python/tests/tsdf_tests.py
index 33af3155..6baebd0b 100644
--- a/python/tests/tsdf_tests.py
+++ b/python/tests/tsdf_tests.py
@@ -938,8 +938,8 @@ def test_range_stats(self):
         """Test of range stats for 20 minute rolling window"""
 
         # construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
 
@@ -979,8 +979,8 @@ def test_group_stats(self):
         """Test of range stats for 20 minute rolling window"""
 
         # construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # using lookback of 20 minutes
         featured_df = tsdf_init.withGroupedStats(freq="1 min").df
diff --git a/python/tests/unit_test_data/tsdf_tests.json b/python/tests/unit_test_data/tsdf_tests.json
index 3cf1482a..7000c602 100644
--- a/python/tests/unit_test_data/tsdf_tests.json
+++ b/python/tests/unit_test_data/tsdf_tests.json
@@ -1373,162 +1373,174 @@
   "RangeStatsTest": {
     "test_range_stats": {
       "init": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "symbol string, event_ts string, mean_trade_pr float, count_trade_pr long, min_trade_pr float, max_trade_pr float, sum_trade_pr float, stddev_trade_pr float, zscore_trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21,
-            1,
-            349.21,
-            349.21,
-            349.21,
-            null,
-            null
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            350.26,
-            2,
-            349.21,
-            351.32,
-            700.53,
-            1.49,
-            0.71
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1,
-            1,
-            361.1,
-            361.1,
-            361.1,
-            null,
-            null
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            361.6,
-            2,
-            361.1,
-            362.1,
-            723.2,
-            0.71,
-            0.71
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, mean_trade_pr float, count_trade_pr long, min_trade_pr float, max_trade_pr float, sum_trade_pr float, stddev_trade_pr float, zscore_trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21,
+              1,
+              349.21,
+              349.21,
+              349.21,
+              null,
+              null
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              350.26,
+              2,
+              349.21,
+              351.32,
+              700.53,
+              1.49,
+              0.71
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1,
+              1,
+              361.1,
+              361.1,
+              361.1,
+              null,
+              null
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              361.6,
+              2,
+              361.1,
+              362.1,
+              723.2,
+              0.71,
+              0.71
+            ]
           ]
-        ]
+        }
       }
     },
     "test_group_stats": {
       "init": {
-        "schema": "symbol string, event_ts string, trade_pr float, index integer",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21,
-            1
-          ],
-          [
-            "S1",
-            "2020-08-01 00:00:33",
-            351.32,
-            1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1,
-            1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:49",
-            362.1,
-            1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float, index integer",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21,
+              1
+            ],
+            [
+              "S1",
+              "2020-08-01 00:00:33",
+              351.32,
+              1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1,
+              1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:49",
+              362.1,
+              1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "symbol string, event_ts string, mean_trade_pr float, count_trade_pr long, min_trade_pr float, max_trade_pr float, sum_trade_pr float, stddev_trade_pr float, mean_index integer, count_index integer, min_index integer, max_index integer, sum_index integer, stddev_index integer",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            350.26,
-            2,
-            349.21,
-            351.32,
-            700.53,
-            1.49,
-            1,
-            2,
-            1,
-            1,
-            2,
-            0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:00",
-            361.6,
-            2,
-            361.1,
-            362.1,
-            723.2,
-            0.71,
-            1,
-            2,
-            1,
-            1,
-            2,
-            0
-          ]
-        ]
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+            "schema": "symbol string, event_ts string, mean_trade_pr float, count_trade_pr long, min_trade_pr float, max_trade_pr float, sum_trade_pr float, stddev_trade_pr float, mean_index integer, count_index integer, min_index integer, max_index integer, sum_index integer, stddev_index integer",
+            "ts_convert": ["event_ts"],
+            "data": [
+              [
+                "S1",
+                "2020-08-01 00:00:00",
+                350.26,
+                2,
+                349.21,
+                351.32,
+                700.53,
+                1.49,
+                1,
+                2,
+                1,
+                1,
+                2,
+                0
+              ],
+              [
+                "S1",
+                "2020-09-01 00:02:00",
+                361.6,
+                2,
+                361.1,
+                362.1,
+                723.2,
+                0.71,
+                1,
+                2,
+                1,
+                1,
+                2,
+                0
+              ]
+            ]
+        }
       }
     }
   },

From dbf08c0468fbcf9dd46bad093ba95b8c5299df6f Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 22 May 2024 07:51:52 -0500
Subject: [PATCH 105/137] renamed test action

---
 .github/workflows/{push.yml => test.yml} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename .github/workflows/{push.yml => test.yml} (99%)

diff --git a/.github/workflows/push.yml b/.github/workflows/test.yml
similarity index 99%
rename from .github/workflows/push.yml
rename to .github/workflows/test.yml
index 78eb93c1..8ee69b2e 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/test.yml
@@ -1,4 +1,4 @@
-name: push
+name: test
 
 on:
   pull_request:

From e543b696e3c79edc22684c61b0ea1c7500e29ba5 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 22 May 2024 12:18:12 -0500
Subject: [PATCH 106/137] testing json refactor

---
 ...{resample_tests.py => resample_tests_2.py} |   0
 python/tests/unit_test_data/json-fixer.ipynb  | 366 +++++++++
 .../unit_test_data/resample_tests_2.json      | 733 ++++++++++++++++++
 3 files changed, 1099 insertions(+)
 rename python/tests/{resample_tests.py => resample_tests_2.py} (100%)
 create mode 100644 python/tests/unit_test_data/json-fixer.ipynb
 create mode 100644 python/tests/unit_test_data/resample_tests_2.json

diff --git a/python/tests/resample_tests.py b/python/tests/resample_tests_2.py
similarity index 100%
rename from python/tests/resample_tests.py
rename to python/tests/resample_tests_2.py
diff --git a/python/tests/unit_test_data/json-fixer.ipynb b/python/tests/unit_test_data/json-fixer.ipynb
new file mode 100644
index 00000000..64df7a47
--- /dev/null
+++ b/python/tests/unit_test_data/json-fixer.ipynb
@@ -0,0 +1,366 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "with open('./resample_tests.json', 'r') as file:\n",
+    "    before = json.load(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "after = {}\n",
+    "for i in before.keys(): # i is test class\n",
+    "    if i == \"__SharedData\":\n",
+    "        continue\n",
+    "    after[i] = {}\n",
+    "    for j in before[i].keys(): # j is test method\n",
+    "        after[i][j] = {}\n",
+    "        for k in before[i][j].keys(): # input, expected, etc.\n",
+    "\n",
+    "            after[i][j][k] = {\n",
+    "                        \"tsdf\": {\n",
+    "                            \"ts_col\": before[i][j][k].get(\"ts_col\", None),\n",
+    "                            \"other_ts_cols\": before[i][j][k].get(\"other_ts_cols\", None),\n",
+    "                            \"partition_cols\": before[i][j][k].get(\"partition_col\", None),\n",
+    "                            \"sequenc_col\": before[i][j][k].get(\"sequence_col\", None),\n",
+    "                            \"start_ts\": before[i][j][k].get(\"start_ts\", None),\n",
+    "                            \"end_ts\": before[i][j][k].get(\"end_ts\", None),\n",
+    "                            \"series\": before[i][j][k].get(\"series\", None),\n",
+    "                            \n",
+    "                        },\n",
+    "                        \"df\": {\n",
+    "                            \"schema\": before[i][j][k].get(\"schema\", None),\n",
+    "                            \"ts_convert\": before[i][j][k].get(\"ts_convert\", None),\n",
+    "                            \"data\": before[i][j][k].get(\"data\", None)\n",
+    "                        },\n",
+    "                        \"$ref\": before[i][j][k].get(\"$ref\", None)\n",
+    "                    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "after_2 = {}\n",
+    "for i in before.keys(): # i is test class\n",
+    "    if i != \"__SharedData\":\n",
+    "        continue\n",
+    "    after_2[i] = {}\n",
+    "    for j in before[i].keys(): # j is test method\n",
+    "            after_2[i][j] = {\n",
+    "                        \"tsdf\": {\n",
+    "                            \"ts_col\": before[i][j].get(\"ts_col\", None),\n",
+    "                            \"other_ts_cols\": before[i][j].get(\"other_ts_cols\", None),\n",
+    "                            \"partition_cols\": before[i][j].get(\"partition_col\", None),\n",
+    "                            \"sequence_col\": before[i][j].get(\"sequence_col\", None),\n",
+    "                            \"start_ts\": before[i][j].get(\"start_ts\", None),\n",
+    "                            \"end_ts\": before[i][j].get(\"end_ts\", None),\n",
+    "                            \"series\": before[i][j].get(\"series\", None),\n",
+    "                            \n",
+    "                        },\n",
+    "                        \"df\": {\n",
+    "                            \"schema\": before[i][j].get(\"schema\", None),\n",
+    "                            \"ts_convert\": before[i][j].get(\"ts_convert\", None),\n",
+    "                            \"data\": before[i][j].get(\"data\", None)\n",
+    "                        },\n",
+    "                        \"$ref\": before[i][j].get(\"$ref\", None)\n",
+    "                    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ResampleUnitTests': {'test_appendAggKey_freq_is_none': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'}},\n",
+       "  'test_appendAggKey_freq_microsecond': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'}},\n",
+       "  'test_appendAggKey_freq_is_invalid': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'}},\n",
+       "  'test_aggregate_floor': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
+       "     'ts_convert': None,\n",
+       "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 349.21, 10.0],\n",
+       "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 5.0]]},\n",
+       "    '$ref': None}},\n",
+       "  'test_aggregate_average': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, trade_pr double, trade_pr_2 double',\n",
+       "     'ts_convert': None,\n",
+       "     'data': [['S1', '2020-08-01 00:00:00', 348.8760009765625, 8.0],\n",
+       "      ['S1', '2020-09-01 00:00:00', 361.6000061035156, 4.5]]},\n",
+       "    '$ref': None}},\n",
+       "  'test_aggregate_min': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
+       "     'ts_convert': None,\n",
+       "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 340.21, 6.0],\n",
+       "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 4.0]]},\n",
+       "    '$ref': None}},\n",
+       "  'test_aggregate_min_with_prefix': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float',\n",
+       "     'ts_convert': None,\n",
+       "     'data': {'$ref': '#/ResampleUnitTests/test_aggregate_min/expected_data/data'}},\n",
+       "    '$ref': None}},\n",
+       "  'test_aggregate_min_with_fill': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
+       "     'ts_convert': None,\n",
+       "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 340.21, 6.0],\n",
+       "      ['S1', '2020-08-02 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-03 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-04 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-05 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-06 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-07 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-08 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-09 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-10 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-11 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-12 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-13 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-14 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-15 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-16 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-17 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-18 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-19 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-20 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-21 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-22 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-23 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-24 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-25 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-26 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-27 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-28 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-29 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-30 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-08-31 00:00:00', None, 0.0, 0.0],\n",
+       "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 4.0]]},\n",
+       "    '$ref': None}},\n",
+       "  'test_aggregate_max': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
+       "     'ts_convert': None,\n",
+       "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 353.32, 10.0],\n",
+       "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 362.1, 5.0]]},\n",
+       "    '$ref': None}},\n",
+       "  'test_aggregate_ceiling': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
+       "     'ts_convert': None,\n",
+       "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 350.32, 6.0],\n",
+       "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 362.1, 4.0]]},\n",
+       "    '$ref': None}},\n",
+       "  'test_aggregate_invalid_func_arg': {'input_data': {'tsdf': {'ts_col': None,\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "    '$ref': '#/__SharedData/input_data'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'other_ts_cols': None,\n",
+       "     'partition_cols': None,\n",
+       "     'sequenc_col': None,\n",
+       "     'start_ts': None,\n",
+       "     'end_ts': None,\n",
+       "     'series': None},\n",
+       "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
+       "     'ts_convert': None,\n",
+       "     'data': [['S1', '2020-07-31 20:00:00', 'SAME_DT', 348.88, 8.0],\n",
+       "      ['S1', '2020-08-31 20:00:00', 'SAME_DT', 361.6, 4.5]]},\n",
+       "    '$ref': None}}}}"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "after"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "combined = after | after_2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"./resample_tests_2.json\", \"w\") as file:\n",
+    "    json.dump(combined, file, indent=4)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv142",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/tests/unit_test_data/resample_tests_2.json b/python/tests/unit_test_data/resample_tests_2.json
new file mode 100644
index 00000000..bcb89ab9
--- /dev/null
+++ b/python/tests/unit_test_data/resample_tests_2.json
@@ -0,0 +1,733 @@
+{
+    "ResampleUnitTests": {
+        "test_appendAggKey_freq_is_none": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            }
+        },
+        "test_appendAggKey_freq_microsecond": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            }
+        },
+        "test_appendAggKey_freq_is_invalid": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            }
+        },
+        "test_aggregate_floor": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+                    "ts_convert": null,
+                    "data": [
+                        [
+                            "S1",
+                            "2020-08-01 00:00:00",
+                            "SAME_DT",
+                            349.21,
+                            10.0
+                        ],
+                        [
+                            "S1",
+                            "2020-09-01 00:00:00",
+                            "SAME_DT",
+                            361.1,
+                            5.0
+                        ]
+                    ]
+                },
+                "$ref": null
+            }
+        },
+        "test_aggregate_average": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
+                    "ts_convert": null,
+                    "data": [
+                        [
+                            "S1",
+                            "2020-08-01 00:00:00",
+                            348.8760009765625,
+                            8.0
+                        ],
+                        [
+                            "S1",
+                            "2020-09-01 00:00:00",
+                            361.6000061035156,
+                            4.5
+                        ]
+                    ]
+                },
+                "$ref": null
+            }
+        },
+        "test_aggregate_min": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+                    "ts_convert": null,
+                    "data": [
+                        [
+                            "S1",
+                            "2020-08-01 00:00:00",
+                            "SAME_DT",
+                            340.21,
+                            6.0
+                        ],
+                        [
+                            "S1",
+                            "2020-09-01 00:00:00",
+                            "SAME_DT",
+                            361.1,
+                            4.0
+                        ]
+                    ]
+                },
+                "$ref": null
+            }
+        },
+        "test_aggregate_min_with_prefix": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
+                    "ts_convert": null,
+                    "data": {
+                        "$ref": "#/ResampleUnitTests/test_aggregate_min/expected_data/data"
+                    }
+                },
+                "$ref": null
+            }
+        },
+        "test_aggregate_min_with_fill": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+                    "ts_convert": null,
+                    "data": [
+                        [
+                            "S1",
+                            "2020-08-01 00:00:00",
+                            "SAME_DT",
+                            340.21,
+                            6.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-02 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-03 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-04 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-05 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-06 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-07 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-08 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-09 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-10 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-11 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-12 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-13 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-14 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-15 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-16 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-17 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-18 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-19 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-20 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-21 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-22 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-23 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-24 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-25 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-26 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-27 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-28 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-29 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-30 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-31 00:00:00",
+                            null,
+                            0.0,
+                            0.0
+                        ],
+                        [
+                            "S1",
+                            "2020-09-01 00:00:00",
+                            "SAME_DT",
+                            361.1,
+                            4.0
+                        ]
+                    ]
+                },
+                "$ref": null
+            }
+        },
+        "test_aggregate_max": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+                    "ts_convert": null,
+                    "data": [
+                        [
+                            "S1",
+                            "2020-08-01 00:00:00",
+                            "SAME_DT",
+                            353.32,
+                            10.0
+                        ],
+                        [
+                            "S1",
+                            "2020-09-01 00:00:00",
+                            "SAME_DT",
+                            362.1,
+                            5.0
+                        ]
+                    ]
+                },
+                "$ref": null
+            }
+        },
+        "test_aggregate_ceiling": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+                    "ts_convert": null,
+                    "data": [
+                        [
+                            "S1",
+                            "2020-08-01 00:00:00",
+                            "SAME_DT",
+                            350.32,
+                            6.0
+                        ],
+                        [
+                            "S1",
+                            "2020-09-01 00:00:00",
+                            "SAME_DT",
+                            362.1,
+                            4.0
+                        ]
+                    ]
+                },
+                "$ref": null
+            }
+        },
+        "test_aggregate_invalid_func_arg": {
+            "input_data": {
+                "tsdf": {
+                    "ts_col": null,
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": null,
+                    "ts_convert": null,
+                    "data": null
+                },
+                "$ref": "#/__SharedData/input_data"
+            },
+            "expected_data": {
+                "tsdf": {
+                    "ts_col": "event_ts",
+                    "other_ts_cols": null,
+                    "partition_cols": null,
+                    "sequenc_col": null,
+                    "start_ts": null,
+                    "end_ts": null,
+                    "series": null
+                },
+                "df": {
+                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+                    "ts_convert": null,
+                    "data": [
+                        [
+                            "S1",
+                            "2020-07-31 20:00:00",
+                            "SAME_DT",
+                            348.88,
+                            8.0
+                        ],
+                        [
+                            "S1",
+                            "2020-08-31 20:00:00",
+                            "SAME_DT",
+                            361.6,
+                            4.5
+                        ]
+                    ]
+                },
+                "$ref": null
+            }
+        }
+    },
+    "__SharedData": {
+        "input_data": {
+            "tsdf": {
+                "ts_col": "event_ts",
+                "other_ts_cols": null,
+                "partition_cols": null,
+                "sequence_col": null,
+                "start_ts": null,
+                "end_ts": null,
+                "series": null
+            },
+            "df": {
+                "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+                "ts_convert": null,
+                "data": [
+                    [
+                        "S1",
+                        "SAME_DT",
+                        "2020-08-01 00:00:10",
+                        349.21,
+                        10.0
+                    ],
+                    [
+                        "S1",
+                        "SAME_DT",
+                        "2020-08-01 00:00:11",
+                        340.21,
+                        9.0
+                    ],
+                    [
+                        "S1",
+                        "SAME_DT",
+                        "2020-08-01 00:01:12",
+                        353.32,
+                        8.0
+                    ],
+                    [
+                        "S1",
+                        "SAME_DT",
+                        "2020-08-01 00:01:13",
+                        351.32,
+                        7.0
+                    ],
+                    [
+                        "S1",
+                        "SAME_DT",
+                        "2020-08-01 00:01:14",
+                        350.32,
+                        6.0
+                    ],
+                    [
+                        "S1",
+                        "SAME_DT",
+                        "2020-09-01 00:01:12",
+                        361.1,
+                        5.0
+                    ],
+                    [
+                        "S1",
+                        "SAME_DT",
+                        "2020-09-01 00:19:12",
+                        362.1,
+                        4.0
+                    ]
+                ]
+            },
+            "$ref": null
+        }
+    }
+}
\ No newline at end of file

From 8d5a583dfec3c99ef3b488ce77ccbebc994b1ef1 Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 22 May 2024 13:47:13 -0500
Subject: [PATCH 107/137] found new error

---
 python/tests/resample_2_tests.py              | 172 +++++++++
 ...{resample_tests_2.py => resample_tests.py} |   0
 python/tests/unit_test_data/json-fixer.ipynb  | 325 +++++++-----------
 ...ple_tests_2.json => resample_2_tests.json} | 257 ++------------
 4 files changed, 324 insertions(+), 430 deletions(-)
 create mode 100644 python/tests/resample_2_tests.py
 rename python/tests/{resample_tests_2.py => resample_tests.py} (100%)
 rename python/tests/unit_test_data/{resample_tests_2.json => resample_2_tests.json} (67%)

diff --git a/python/tests/resample_2_tests.py b/python/tests/resample_2_tests.py
new file mode 100644
index 00000000..f3ccc8da
--- /dev/null
+++ b/python/tests/resample_2_tests.py
@@ -0,0 +1,172 @@
+import unittest
+
+from tempo import TSDF
+from tempo.resample import (
+    _appendAggKey,
+    aggregate,
+    checkAllowableFreq,
+    validateFuncExists,
+)
+from tests.base import SparkTest
+
+
+class ResampleUnitTests(SparkTest):
+    def test_appendAggKey_freq_is_none(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+
+        self.assertRaises(TypeError, _appendAggKey, input_tsdf)
+
+    def test_appendAggKey_freq_microsecond(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+
+        appendAggKey_tuple = _appendAggKey(input_tsdf, "1 MICROSECOND")
+        appendAggKey_tsdf = appendAggKey_tuple[0]
+
+        self.assertIsInstance(appendAggKey_tsdf, TSDF)
+        self.assertIn("agg_key", appendAggKey_tsdf.df.columns)
+        self.assertEqual(appendAggKey_tuple[1], "1")
+        self.assertEqual(appendAggKey_tuple[2], "microseconds")
+
+    def test_appendAggKey_freq_is_invalid(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+
+        self.assertRaises(
+            ValueError,
+            _appendAggKey,
+            input_tsdf,
+            "1 invalid",
+        )
+
+    def test_aggregate_floor(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+
+        aggregate_df = aggregate(input_tsdf, "1 DAY", "floor")
+
+        self.assertDataFrameEquality(
+            aggregate_df,
+            expected_data,
+        )
+
+    def test_aggregate_average(self):
+        # TODO: fix DATE returns `null`
+        # DATE is being included in metricCols when metricCols is None
+        # this occurs for all aggregate functions but causes negative side effects with avg
+        # is this intentional?
+        # resample.py -> lines 86 to 87
+        # occurring in all `func` arguments but causing null values for "mean"
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+
+        # explicitly declaring metricCols to remove DATE so that test can pass for now
+        aggregate_df = aggregate(
+            input_tsdf, "1 DAY", "mean", ["trade_pr", "trade_pr_2"]
+        )
+
+        self.assertDataFrameEquality(
+            aggregate_df,
+            expected_data,
+        )
+
+    def test_aggregate_min(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+
+        aggregate_df = aggregate(input_tsdf, "1 DAY", "min")
+
+        self.assertDataFrameEquality(
+            aggregate_df,
+            expected_data,
+        )
+
+    def test_aggregate_min_with_prefix(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+
+        aggregate_df = aggregate(input_tsdf, "1 DAY", "min", prefix="min")
+
+        self.assertDataFrameEquality(
+            aggregate_df,
+            expected_data,
+        )
+
+    def test_aggregate_min_with_fill(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+
+        aggregate_df = aggregate(input_tsdf, "1 DAY", "min", fill=True)
+
+        self.assertDataFrameEquality(
+            aggregate_df,
+            expected_data,
+        )
+
+    def test_aggregate_max(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+
+        aggregate_df = aggregate(input_tsdf, "1 DAY", "max")
+
+        self.assertDataFrameEquality(
+            aggregate_df,
+            expected_data,
+        )
+
+    def test_aggregate_ceiling(self):
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+
+        aggregate_df = aggregate(input_tsdf, "1 DAY", "ceil")
+
+        self.assertDataFrameEquality(
+            aggregate_df,
+            expected_data,
+        )
+
+    def test_aggregate_invalid_func_arg(self):
+        # TODO : we should not be hitting an UnboundLocalError
+        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+
+        self.assertRaises(UnboundLocalError, aggregate, input_tsdf, "1 DAY", "average")
+
+    def test_check_allowable_freq_none(self):
+        self.assertRaises(TypeError, checkAllowableFreq, None)
+
+    def test_check_allowable_freq_microsecond(self):
+        self.assertEqual(checkAllowableFreq("1 MICROSECOND"), ("1", "microsec"))
+
+    def test_check_allowable_freq_millisecond(self):
+        self.assertEqual(checkAllowableFreq("1 MILLISECOND"), ("1", "ms"))
+
+    def test_check_allowable_freq_second(self):
+        self.assertEqual(checkAllowableFreq("1 SECOND"), ("1", "sec"))
+
+    def test_check_allowable_freq_minute(self):
+        self.assertEqual(checkAllowableFreq("1 MINUTE"), ("1", "min"))
+
+    def test_check_allowable_freq_hour(self):
+        self.assertEqual(checkAllowableFreq("1 HOUR"), ("1", "hour"))
+
+    def test_check_allowable_freq_day(self):
+        self.assertEqual(checkAllowableFreq("1 DAY"), ("1", "day"))
+
+    def test_check_allowable_freq_no_interval(self):
+        # TODO: should first element return str for consistency?
+        self.assertEqual(checkAllowableFreq("day"), (1, "day"))
+
+    def test_check_allowable_freq_exception_not_in_allowable_freqs(self):
+        self.assertRaises(ValueError, checkAllowableFreq, "wrong")
+
+    def test_check_allowable_freq_exception(self):
+        self.assertRaises(ValueError, checkAllowableFreq, "wrong wrong")
+
+    def test_validate_func_exists_type_error(self):
+        self.assertRaises(TypeError, validateFuncExists, None)
+
+    def test_validate_func_exists_value_error(self):
+        self.assertRaises(ValueError, validateFuncExists, "non-existent")
+
+
+# MAIN
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/tests/resample_tests_2.py b/python/tests/resample_tests.py
similarity index 100%
rename from python/tests/resample_tests_2.py
rename to python/tests/resample_tests.py
diff --git a/python/tests/unit_test_data/json-fixer.ipynb b/python/tests/unit_test_data/json-fixer.ipynb
index 64df7a47..11c22779 100644
--- a/python/tests/unit_test_data/json-fixer.ipynb
+++ b/python/tests/unit_test_data/json-fixer.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def update_dict(dictionary, key, value):\n",
+    "    if value is not None:\n",
+    "        dictionary[key] = value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -26,30 +37,28 @@
     "    for j in before[i].keys(): # j is test method\n",
     "        after[i][j] = {}\n",
     "        for k in before[i][j].keys(): # input, expected, etc.\n",
-    "\n",
+    "            tsdf = {}\n",
+    "            update_dict(tsdf, \"ts_col\", before[i][j][k].get(\"ts_col\", None))\n",
+    "            update_dict(tsdf, \"other_ts_cols\", before[i][j][k].get(\"other_ts_cols\", None))\n",
+    "            update_dict(tsdf, \"partition_cols\", before[i][j][k].get(\"partition_col\", None))\n",
+    "            update_dict(tsdf,  \"sequence_col\", before[i][j][k].get(\"sequence_col\", None))\n",
+    "            update_dict(tsdf, \"start_ts\", before[i][j][k].get(\"start_ts\", None))\n",
+    "            update_dict(tsdf, \"end_ts\", before[i][j][k].get(\"end_ts\", None))\n",
+    "            update_dict(tsdf, \"series\", before[i][j][k].get(\"series\", None))\n",
+    "            sdf = {}\n",
+    "            update_dict(sdf, \"schema\", before[i][j][k].get(\"schema\", None))\n",
+    "            update_dict(sdf, \"ts_convert\", before[i][j][k].get(\"ts_convert\", None))\n",
+    "            update_dict(sdf, \"data\", before[i][j][k].get(\"data\", None))\n",
     "            after[i][j][k] = {\n",
-    "                        \"tsdf\": {\n",
-    "                            \"ts_col\": before[i][j][k].get(\"ts_col\", None),\n",
-    "                            \"other_ts_cols\": before[i][j][k].get(\"other_ts_cols\", None),\n",
-    "                            \"partition_cols\": before[i][j][k].get(\"partition_col\", None),\n",
-    "                            \"sequenc_col\": before[i][j][k].get(\"sequence_col\", None),\n",
-    "                            \"start_ts\": before[i][j][k].get(\"start_ts\", None),\n",
-    "                            \"end_ts\": before[i][j][k].get(\"end_ts\", None),\n",
-    "                            \"series\": before[i][j][k].get(\"series\", None),\n",
-    "                            \n",
-    "                        },\n",
-    "                        \"df\": {\n",
-    "                            \"schema\": before[i][j][k].get(\"schema\", None),\n",
-    "                            \"ts_convert\": before[i][j][k].get(\"ts_convert\", None),\n",
-    "                            \"data\": before[i][j][k].get(\"data\", None)\n",
-    "                        },\n",
+    "                        \"tsdf\": tsdf,\n",
+    "                        \"df\": sdf,\n",
     "                        \"$ref\": before[i][j][k].get(\"$ref\", None)\n",
     "                    }"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,162 +68,107 @@
     "        continue\n",
     "    after_2[i] = {}\n",
     "    for j in before[i].keys(): # j is test method\n",
-    "            after_2[i][j] = {\n",
-    "                        \"tsdf\": {\n",
-    "                            \"ts_col\": before[i][j].get(\"ts_col\", None),\n",
-    "                            \"other_ts_cols\": before[i][j].get(\"other_ts_cols\", None),\n",
-    "                            \"partition_cols\": before[i][j].get(\"partition_col\", None),\n",
-    "                            \"sequence_col\": before[i][j].get(\"sequence_col\", None),\n",
-    "                            \"start_ts\": before[i][j].get(\"start_ts\", None),\n",
-    "                            \"end_ts\": before[i][j].get(\"end_ts\", None),\n",
-    "                            \"series\": before[i][j].get(\"series\", None),\n",
-    "                            \n",
-    "                        },\n",
-    "                        \"df\": {\n",
-    "                            \"schema\": before[i][j].get(\"schema\", None),\n",
-    "                            \"ts_convert\": before[i][j].get(\"ts_convert\", None),\n",
-    "                            \"data\": before[i][j].get(\"data\", None)\n",
-    "                        },\n",
-    "                        \"$ref\": before[i][j].get(\"$ref\", None)\n",
-    "                    }"
+    "        tsdf = {}\n",
+    "        update_dict(tsdf, \"ts_col\", before[i][j].get(\"ts_col\", None))\n",
+    "        update_dict(tsdf, \"other_ts_cols\", before[i][j].get(\"other_ts_cols\", None))\n",
+    "        update_dict(tsdf, \"partition_cols\", before[i][j].get(\"partition_col\", None))\n",
+    "        update_dict(tsdf,  \"sequence_col\", before[i][j].get(\"sequence_col\", None))\n",
+    "        update_dict(tsdf, \"start_ts\", before[i][j].get(\"start_ts\", None))\n",
+    "        update_dict(tsdf, \"end_ts\", before[i][j].get(\"end_ts\", None))\n",
+    "        update_dict(tsdf, \"series\", before[i][j].get(\"series\", None))\n",
+    "        sdf = {}\n",
+    "        update_dict(sdf, \"schema\", before[i][j].get(\"schema\", None))\n",
+    "        update_dict(sdf, \"ts_convert\", before[i][j].get(\"ts_convert\", None))\n",
+    "        update_dict(sdf, \"data\", before[i][j].get(\"data\", None))\n",
+    "        after_2[i][j] = {\n",
+    "                    \"tsdf\": tsdf,\n",
+    "                    \"df\": sdf,\n",
+    "                    \"$ref\": before[i][j].get(\"$ref\", None)\n",
+    "                }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'__SharedData': {'input_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'df': {'schema': 'symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float',\n",
+       "    'data': [['S1', 'SAME_DT', '2020-08-01 00:00:10', 349.21, 10.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:00:11', 340.21, 9.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:01:12', 353.32, 8.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:01:13', 351.32, 7.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:01:14', 350.32, 6.0],\n",
+       "     ['S1', 'SAME_DT', '2020-09-01 00:01:12', 361.1, 5.0],\n",
+       "     ['S1', 'SAME_DT', '2020-09-01 00:19:12', 362.1, 4.0]]},\n",
+       "   '$ref': None}}}"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "after_2"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'ResampleUnitTests': {'test_appendAggKey_freq_is_none': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "{'ResampleUnitTests': {'test_appendAggKey_freq_is_none': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'}},\n",
-       "  'test_appendAggKey_freq_microsecond': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_appendAggKey_freq_microsecond': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'}},\n",
-       "  'test_appendAggKey_freq_is_invalid': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_appendAggKey_freq_is_invalid': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'}},\n",
-       "  'test_aggregate_floor': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_floor': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
-       "     'ts_convert': None,\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 349.21, 10.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 5.0]]},\n",
        "    '$ref': None}},\n",
-       "  'test_aggregate_average': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_average': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, trade_pr double, trade_pr_2 double',\n",
-       "     'ts_convert': None,\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 348.8760009765625, 8.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 361.6000061035156, 4.5]]},\n",
        "    '$ref': None}},\n",
-       "  'test_aggregate_min': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_min': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
-       "     'ts_convert': None,\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 340.21, 6.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 4.0]]},\n",
        "    '$ref': None}},\n",
-       "  'test_aggregate_min_with_prefix': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_min_with_prefix': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float',\n",
-       "     'ts_convert': None,\n",
        "     'data': {'$ref': '#/ResampleUnitTests/test_aggregate_min/expected_data/data'}},\n",
        "    '$ref': None}},\n",
-       "  'test_aggregate_min_with_fill': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_min_with_fill': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
-       "     'ts_convert': None,\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 340.21, 6.0],\n",
        "      ['S1', '2020-08-02 00:00:00', None, 0.0, 0.0],\n",
        "      ['S1', '2020-08-03 00:00:00', None, 0.0, 0.0],\n",
@@ -248,96 +202,59 @@
        "      ['S1', '2020-08-31 00:00:00', None, 0.0, 0.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 4.0]]},\n",
        "    '$ref': None}},\n",
-       "  'test_aggregate_max': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_max': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
-       "     'ts_convert': None,\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 353.32, 10.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 362.1, 5.0]]},\n",
        "    '$ref': None}},\n",
-       "  'test_aggregate_ceiling': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_ceiling': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
-       "     'ts_convert': None,\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 350.32, 6.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 362.1, 4.0]]},\n",
        "    '$ref': None}},\n",
-       "  'test_aggregate_invalid_func_arg': {'input_data': {'tsdf': {'ts_col': None,\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
-       "    'df': {'schema': None, 'ts_convert': None, 'data': None},\n",
+       "  'test_aggregate_invalid_func_arg': {'input_data': {'tsdf': {},\n",
+       "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
-       "     'other_ts_cols': None,\n",
-       "     'partition_cols': None,\n",
-       "     'sequenc_col': None,\n",
-       "     'start_ts': None,\n",
-       "     'end_ts': None,\n",
-       "     'series': None},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
-       "     'ts_convert': None,\n",
        "     'data': [['S1', '2020-07-31 20:00:00', 'SAME_DT', 348.88, 8.0],\n",
        "      ['S1', '2020-08-31 20:00:00', 'SAME_DT', 361.6, 4.5]]},\n",
-       "    '$ref': None}}}}"
+       "    '$ref': None}}},\n",
+       " '__SharedData': {'input_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'df': {'schema': 'symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float',\n",
+       "    'data': [['S1', 'SAME_DT', '2020-08-01 00:00:10', 349.21, 10.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:00:11', 340.21, 9.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:01:12', 353.32, 8.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:01:13', 351.32, 7.0],\n",
+       "     ['S1', 'SAME_DT', '2020-08-01 00:01:14', 350.32, 6.0],\n",
+       "     ['S1', 'SAME_DT', '2020-09-01 00:01:12', 361.1, 5.0],\n",
+       "     ['S1', 'SAME_DT', '2020-09-01 00:19:12', 362.1, 4.0]]},\n",
+       "   '$ref': None}}}"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "after"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "combined = after | after_2"
+    "combined = after | after_2\n",
+    "combined"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(\"./resample_tests_2.json\", \"w\") as file:\n",
+    "with open(\"./resample_2_tests.json\", \"w\") as file:\n",
     "    json.dump(combined, file, indent=4)"
    ]
   }
diff --git a/python/tests/unit_test_data/resample_tests_2.json b/python/tests/unit_test_data/resample_2_tests.json
similarity index 67%
rename from python/tests/unit_test_data/resample_tests_2.json
rename to python/tests/unit_test_data/resample_2_tests.json
index bcb89ab9..4391c32f 100644
--- a/python/tests/unit_test_data/resample_tests_2.json
+++ b/python/tests/unit_test_data/resample_2_tests.json
@@ -2,92 +2,37 @@
     "ResampleUnitTests": {
         "test_appendAggKey_freq_is_none": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             }
         },
         "test_appendAggKey_freq_microsecond": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             }
         },
         "test_appendAggKey_freq_is_invalid": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             }
         },
         "test_aggregate_floor": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "ts_convert": null,
                     "data": [
                         [
                             "S1",
@@ -110,35 +55,16 @@
         },
         "test_aggregate_average": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
-                    "ts_convert": null,
                     "data": [
                         [
                             "S1",
@@ -159,35 +85,16 @@
         },
         "test_aggregate_min": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "ts_convert": null,
                     "data": [
                         [
                             "S1",
@@ -210,35 +117,16 @@
         },
         "test_aggregate_min_with_prefix": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
-                    "ts_convert": null,
                     "data": {
                         "$ref": "#/ResampleUnitTests/test_aggregate_min/expected_data/data"
                     }
@@ -248,35 +136,16 @@
         },
         "test_aggregate_min_with_fill": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "ts_convert": null,
                     "data": [
                         [
                             "S1",
@@ -509,35 +378,16 @@
         },
         "test_aggregate_max": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "ts_convert": null,
                     "data": [
                         [
                             "S1",
@@ -560,35 +410,16 @@
         },
         "test_aggregate_ceiling": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "ts_convert": null,
                     "data": [
                         [
                             "S1",
@@ -611,35 +442,16 @@
         },
         "test_aggregate_invalid_func_arg": {
             "input_data": {
-                "tsdf": {
-                    "ts_col": null,
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
-                },
-                "df": {
-                    "schema": null,
-                    "ts_convert": null,
-                    "data": null
-                },
+                "tsdf": {},
+                "df": {},
                 "$ref": "#/__SharedData/input_data"
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts",
-                    "other_ts_cols": null,
-                    "partition_cols": null,
-                    "sequenc_col": null,
-                    "start_ts": null,
-                    "end_ts": null,
-                    "series": null
+                    "ts_col": "event_ts"
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "ts_convert": null,
                     "data": [
                         [
                             "S1",
@@ -664,17 +476,10 @@
     "__SharedData": {
         "input_data": {
             "tsdf": {
-                "ts_col": "event_ts",
-                "other_ts_cols": null,
-                "partition_cols": null,
-                "sequence_col": null,
-                "start_ts": null,
-                "end_ts": null,
-                "series": null
+                "ts_col": "event_ts"
             },
             "df": {
                 "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-                "ts_convert": null,
                 "data": [
                     [
                         "S1",

From 35e2b508dc77d55a9f35c4490066eb620a360efa Mon Sep 17 00:00:00 2001
From: Taylor Isbell <taylor.isbell@databricks.com>
Date: Wed, 22 May 2024 14:11:10 -0500
Subject: [PATCH 108/137] still broken

---
 python/tests/unit_test_data/json-fixer.ipynb  | 59 +++++++++++--------
 .../unit_test_data/resample_2_tests.json      | 48 +++++++++++----
 2 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/python/tests/unit_test_data/json-fixer.ipynb b/python/tests/unit_test_data/json-fixer.ipynb
index 11c22779..d114d323 100644
--- a/python/tests/unit_test_data/json-fixer.ipynb
+++ b/python/tests/unit_test_data/json-fixer.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
     "            tsdf = {}\n",
     "            update_dict(tsdf, \"ts_col\", before[i][j][k].get(\"ts_col\", None))\n",
     "            update_dict(tsdf, \"other_ts_cols\", before[i][j][k].get(\"other_ts_cols\", None))\n",
-    "            update_dict(tsdf, \"partition_cols\", before[i][j][k].get(\"partition_col\", None))\n",
+    "            update_dict(tsdf, \"partition_cols\", before[i][j][k].get(\"partition_cols\", None))\n",
     "            update_dict(tsdf,  \"sequence_col\", before[i][j][k].get(\"sequence_col\", None))\n",
     "            update_dict(tsdf, \"start_ts\", before[i][j][k].get(\"start_ts\", None))\n",
     "            update_dict(tsdf, \"end_ts\", before[i][j][k].get(\"end_ts\", None))\n",
@@ -58,7 +58,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,7 +71,7 @@
     "        tsdf = {}\n",
     "        update_dict(tsdf, \"ts_col\", before[i][j].get(\"ts_col\", None))\n",
     "        update_dict(tsdf, \"other_ts_cols\", before[i][j].get(\"other_ts_cols\", None))\n",
-    "        update_dict(tsdf, \"partition_cols\", before[i][j].get(\"partition_col\", None))\n",
+    "        update_dict(tsdf, \"partition_cols\", before[i][j].get(\"partition_cols\", None))\n",
     "        update_dict(tsdf,  \"sequence_col\", before[i][j].get(\"sequence_col\", None))\n",
     "        update_dict(tsdf, \"start_ts\", before[i][j].get(\"start_ts\", None))\n",
     "        update_dict(tsdf, \"end_ts\", before[i][j].get(\"end_ts\", None))\n",
@@ -83,19 +83,19 @@
     "        after_2[i][j] = {\n",
     "                    \"tsdf\": tsdf,\n",
     "                    \"df\": sdf,\n",
-    "                    \"$ref\": before[i][j].get(\"$ref\", None)\n",
     "                }"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'__SharedData': {'input_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "{'__SharedData': {'input_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "    'partition_cols': ['symbol']},\n",
        "   'df': {'schema': 'symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float',\n",
        "    'data': [['S1', 'SAME_DT', '2020-08-01 00:00:10', 349.21, 10.0],\n",
        "     ['S1', 'SAME_DT', '2020-08-01 00:00:11', 340.21, 9.0],\n",
@@ -103,11 +103,10 @@
        "     ['S1', 'SAME_DT', '2020-08-01 00:01:13', 351.32, 7.0],\n",
        "     ['S1', 'SAME_DT', '2020-08-01 00:01:14', 350.32, 6.0],\n",
        "     ['S1', 'SAME_DT', '2020-09-01 00:01:12', 361.1, 5.0],\n",
-       "     ['S1', 'SAME_DT', '2020-09-01 00:19:12', 362.1, 4.0]]},\n",
-       "   '$ref': None}}}"
+       "     ['S1', 'SAME_DT', '2020-09-01 00:19:12', 362.1, 4.0]]}}}}"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -118,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -136,7 +135,8 @@
        "  'test_aggregate_floor': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 349.21, 10.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 5.0]]},\n",
@@ -144,7 +144,8 @@
        "  'test_aggregate_average': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, trade_pr double, trade_pr_2 double',\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 348.8760009765625, 8.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 361.6000061035156, 4.5]]},\n",
@@ -152,7 +153,8 @@
        "  'test_aggregate_min': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 340.21, 6.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 361.1, 4.0]]},\n",
@@ -160,14 +162,16 @@
        "  'test_aggregate_min_with_prefix': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float',\n",
        "     'data': {'$ref': '#/ResampleUnitTests/test_aggregate_min/expected_data/data'}},\n",
        "    '$ref': None}},\n",
        "  'test_aggregate_min_with_fill': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 340.21, 6.0],\n",
        "      ['S1', '2020-08-02 00:00:00', None, 0.0, 0.0],\n",
@@ -205,7 +209,8 @@
        "  'test_aggregate_max': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 353.32, 10.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 362.1, 5.0]]},\n",
@@ -213,7 +218,8 @@
        "  'test_aggregate_ceiling': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
        "     'data': [['S1', '2020-08-01 00:00:00', 'SAME_DT', 350.32, 6.0],\n",
        "      ['S1', '2020-09-01 00:00:00', 'SAME_DT', 362.1, 4.0]]},\n",
@@ -221,12 +227,14 @@
        "  'test_aggregate_invalid_func_arg': {'input_data': {'tsdf': {},\n",
        "    'df': {},\n",
        "    '$ref': '#/__SharedData/input_data'},\n",
-       "   'expected_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       "   'expected_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "     'partition_cols': ['symbol']},\n",
        "    'df': {'schema': 'symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float',\n",
        "     'data': [['S1', '2020-07-31 20:00:00', 'SAME_DT', 348.88, 8.0],\n",
        "      ['S1', '2020-08-31 20:00:00', 'SAME_DT', 361.6, 4.5]]},\n",
        "    '$ref': None}}},\n",
-       " '__SharedData': {'input_data': {'tsdf': {'ts_col': 'event_ts'},\n",
+       " '__SharedData': {'input_data': {'tsdf': {'ts_col': 'event_ts',\n",
+       "    'partition_cols': ['symbol']},\n",
        "   'df': {'schema': 'symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float',\n",
        "    'data': [['S1', 'SAME_DT', '2020-08-01 00:00:10', 349.21, 10.0],\n",
        "     ['S1', 'SAME_DT', '2020-08-01 00:00:11', 340.21, 9.0],\n",
@@ -234,11 +242,10 @@
        "     ['S1', 'SAME_DT', '2020-08-01 00:01:13', 351.32, 7.0],\n",
        "     ['S1', 'SAME_DT', '2020-08-01 00:01:14', 350.32, 6.0],\n",
        "     ['S1', 'SAME_DT', '2020-09-01 00:01:12', 361.1, 5.0],\n",
-       "     ['S1', 'SAME_DT', '2020-09-01 00:19:12', 362.1, 4.0]]},\n",
-       "   '$ref': None}}}"
+       "     ['S1', 'SAME_DT', '2020-09-01 00:19:12', 362.1, 4.0]]}}}}"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -250,7 +257,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/python/tests/unit_test_data/resample_2_tests.json b/python/tests/unit_test_data/resample_2_tests.json
index 4391c32f..e8c6a40e 100644
--- a/python/tests/unit_test_data/resample_2_tests.json
+++ b/python/tests/unit_test_data/resample_2_tests.json
@@ -29,7 +29,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
@@ -61,7 +64,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
@@ -91,7 +97,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
@@ -123,7 +132,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
@@ -142,7 +154,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
@@ -384,7 +399,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
@@ -416,7 +434,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
@@ -448,7 +469,10 @@
             },
             "expected_data": {
                 "tsdf": {
-                    "ts_col": "event_ts"
+                    "ts_col": "event_ts",
+                    "partition_cols": [
+                        "symbol"
+                    ]
                 },
                 "df": {
                     "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
@@ -476,7 +500,10 @@
     "__SharedData": {
         "input_data": {
             "tsdf": {
-                "ts_col": "event_ts"
+                "ts_col": "event_ts",
+                "partition_cols": [
+                    "symbol"
+                ]
             },
             "df": {
                 "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
@@ -531,8 +558,7 @@
                         4.0
                     ]
                 ]
-            },
-            "$ref": null
+            }
         }
     }
 }
\ No newline at end of file

From afb11759e273beb8c9ece7ba182ef057e0ffe634 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 12:11:21 -0600
Subject: [PATCH 109/137] variable name refactoring

---
 python/tests/as_of_join_tests.py | 38 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/python/tests/as_of_join_tests.py b/python/tests/as_of_join_tests.py
index 958374d9..c815147b 100644
--- a/python/tests/as_of_join_tests.py
+++ b/python/tests/as_of_join_tests.py
@@ -6,13 +6,13 @@
 
 class AsOfJoinTest(SparkTest):
     def test_asof_join(self):
-        """AS-OF Join with out a time-partition test"""
+        """AS-OF Join without a time-partition test"""
 
         # Construct dataframes
         tsdf_left = self.get_test_df_builder("left").as_tsdf()
         tsdf_right = self.get_test_df_builder("right").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
-        noRightPrefixdfExpected = self.get_test_df_builder("expected_no_right_prefix").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
+        no_right_prefixdf_expected = self.get_test_df_builder("expected_no_right_prefix").as_sdf()
 
         # perform the join
         joined_df = tsdf_left.asofJoin(
@@ -23,13 +23,13 @@ def test_asof_join(self):
         ).df
 
         # joined dataframe should equal the expected dataframe
-        self.assertDataFrameEquality(joined_df, dfExpected)
-        self.assertDataFrameEquality(non_prefix_joined_df, noRightPrefixdfExpected)
+        self.assertDataFrameEquality(joined_df, df_expected)
+        self.assertDataFrameEquality(non_prefix_joined_df, no_right_prefixdf_expected)
 
         spark_sql_joined_df = tsdf_left.asofJoin(
             tsdf_right, left_prefix="left", right_prefix="right"
         ).df
-        self.assertDataFrameEquality(spark_sql_joined_df, dfExpected)
+        self.assertDataFrameEquality(spark_sql_joined_df, df_expected)
 
     def test_asof_join_skip_nulls_disabled(self):
         """AS-OF Join with skip nulls disabled"""
@@ -37,8 +37,8 @@ def test_asof_join_skip_nulls_disabled(self):
         # fetch test data
         tsdf_left = self.get_test_df_builder("left").as_tsdf()
         tsdf_right = self.get_test_df_builder("right").as_tsdf()
-        dfExpectedSkipNulls = self.get_test_df_builder("expected_skip_nulls").as_sdf()
-        dfExpectedSkipNullsDisabled = self.get_test_df_builder(
+        df_expected_skip_nulls = self.get_test_df_builder("expected_skip_nulls").as_sdf()
+        df_expected_skip_nulls_disabled = self.get_test_df_builder(
             "expected_skip_nulls_disabled"
         ).as_sdf()
 
@@ -48,7 +48,7 @@ def test_asof_join_skip_nulls_disabled(self):
         ).df
 
         # joined dataframe should equal the expected dataframe with nulls skipped
-        self.assertDataFrameEquality(joined_df, dfExpectedSkipNulls)
+        self.assertDataFrameEquality(joined_df, df_expected_skip_nulls)
 
         # perform the join with skip nulls disabled
         joined_df = tsdf_left.asofJoin(
@@ -56,7 +56,7 @@ def test_asof_join_skip_nulls_disabled(self):
         ).df
 
         # joined dataframe should equal the expected dataframe without nulls skipped
-        self.assertDataFrameEquality(joined_df, dfExpectedSkipNullsDisabled)
+        self.assertDataFrameEquality(joined_df, df_expected_skip_nulls_disabled)
 
     def test_sequence_number_sort(self):
         """Skew AS-OF Join with Partition Window Test"""
@@ -64,13 +64,13 @@ def test_sequence_number_sort(self):
         # fetch test data
         tsdf_left = self.get_test_df_builder("left").as_tsdf()
         tsdf_right = self.get_test_df_builder("right").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
 
         # perform the join
         joined_df = tsdf_left.asofJoin(tsdf_right, right_prefix="right").df
 
         # joined dataframe should equal the expected dataframe
-        self.assertDataFrameEquality(joined_df, dfExpected)
+        self.assertDataFrameEquality(joined_df, df_expected)
 
     def test_partitioned_asof_join(self):
         """AS-OF Join with a time-partition"""
@@ -78,7 +78,7 @@ def test_partitioned_asof_join(self):
             # fetch test data
             tsdf_left = self.get_test_df_builder("left").as_tsdf()
             tsdf_right = self.get_test_df_builder("right").as_tsdf()
-            dfExpected = self.get_test_df_builder("expected").as_sdf()
+            df_expected = self.get_test_df_builder("expected").as_sdf()
 
             joined_df = tsdf_left.asofJoin(
                 tsdf_right,
@@ -88,7 +88,7 @@ def test_partitioned_asof_join(self):
                 fraction=0.1,
             ).df
 
-            self.assertDataFrameEquality(joined_df, dfExpected)
+            self.assertDataFrameEquality(joined_df, df_expected)
             self.assertEqual(
                 warning_captured.output,
                 [
@@ -144,8 +144,8 @@ def test_asof_join_sql_join_opt_and_bytes_threshold(self):
             # Construct dataframes
             tsdf_left = self.get_test_df_builder("left").as_tsdf()
             tsdf_right = self.get_test_df_builder("right").as_tsdf()
-            dfExpected = self.get_test_df_builder("expected").as_sdf()
-            noRightPrefixdfExpected = self.get_test_df_builder("expected_no_right_prefix").as_sdf()
+            df_expected = self.get_test_df_builder("expected").as_sdf()
+            no_right_prefixdf_expected = self.get_test_df_builder("expected_no_right_prefix").as_sdf()
 
             # perform the join
             joined_df = tsdf_left.asofJoin(
@@ -156,13 +156,13 @@ def test_asof_join_sql_join_opt_and_bytes_threshold(self):
             ).df
 
             # joined dataframe should equal the expected dataframe
-            self.assertDataFrameEquality(joined_df, dfExpected)
-            self.assertDataFrameEquality(non_prefix_joined_df, noRightPrefixdfExpected)
+            self.assertDataFrameEquality(joined_df, df_expected)
+            self.assertDataFrameEquality(non_prefix_joined_df, no_right_prefixdf_expected)
 
             spark_sql_joined_df = tsdf_left.asofJoin(
                 tsdf_right, left_prefix="left", right_prefix="right"
             ).df
-            self.assertDataFrameEquality(spark_sql_joined_df, dfExpected)
+            self.assertDataFrameEquality(spark_sql_joined_df, df_expected)
 
 
 # MAIN

From c24db4c2bb8ad3056330f1645ea2641879c8b8a7 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 12:12:11 -0600
Subject: [PATCH 110/137] remove re import as it was unused

---
 python/tests/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index 06f90277..6ae285b7 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -1,5 +1,4 @@
 import os
-import re
 import unittest
 import warnings
 from typing import Union, Optional

From 4ea9e2263517daad06bc6582a1c26edf1a45fb9e Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 12:12:41 -0600
Subject: [PATCH 111/137] remove get_data_as_tsdf

---
 python/tests/base.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index 6ae285b7..4dfd50eb 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -201,16 +201,6 @@ def tearDown(self) -> None:
     #         ts_cols.extend(td.get("other_ts_cols", []))
     #     return self.buildTestDF(td["schema"], td["data"], ts_cols)
     #
-    # def get_data_as_tsdf(self, name: str, convert_ts_col=True):
-    #     df = self.get_data_as_sdf(name, convert_ts_col)
-    #     td = self.test_data[name]
-    #     tsdf = TSDF(
-    #         df,
-    #         ts_col=td["ts_col"],
-    #         partition_cols=td.get("partition_cols", None),
-    #         sequence_col=td.get("sequence_col", None),
-    #     )
-    #     return tsdf
 
     def get_data_as_idf(self, name: str, convert_ts_col=True):
         df = self.get_data_as_sdf(name, convert_ts_col)

From 9b0094f3bf42f65a9de497cc517befe5b4d6752f Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 12:13:00 -0600
Subject: [PATCH 112/137] remove get_data_as_sdf

---
 python/tests/base.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index 4dfd50eb..03209961 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -193,15 +193,6 @@ def tearDown(self) -> None:
     # Utility Functions
     #
 
-    # def get_data_as_sdf(self, name: str, convert_ts_col=True):
-    #     td = self.test_data[name]
-    #     ts_cols = []
-    #     if convert_ts_col and (td.get("ts_col", None) or td.get("other_ts_cols", [])):
-    #         ts_cols = [td["ts_col"]] if "ts_col" in td else []
-    #         ts_cols.extend(td.get("other_ts_cols", []))
-    #     return self.buildTestDF(td["schema"], td["data"], ts_cols)
-    #
-
     def get_data_as_idf(self, name: str, convert_ts_col=True):
         df = self.get_data_as_sdf(name, convert_ts_col)
         td = self.test_data[name]

From 645114f52e36f333a5ea8bc404055380503ce13d Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 12:16:28 -0600
Subject: [PATCH 113/137] fix typo in docstring

---
 python/tests/as_of_join_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/as_of_join_tests.py b/python/tests/as_of_join_tests.py
index c815147b..7a8a5165 100644
--- a/python/tests/as_of_join_tests.py
+++ b/python/tests/as_of_join_tests.py
@@ -139,7 +139,7 @@ def test_asof_join_tolerance(self):
             self.assertDataFrameEquality(joined_df, expected_tolerance)
 
     def test_asof_join_sql_join_opt_and_bytes_threshold(self):
-        """AS-OF Join with out a time-partition test"""
+        """AS-OF Join without a time-partition test"""
         with patch("tempo.tsdf.TSDF._TSDF__getBytesFromPlan", return_value=1000):
             # Construct dataframes
             tsdf_left = self.get_test_df_builder("left").as_tsdf()

From d6e48f6399a1fc46eb10b9f7dcc24f118f9f6183 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 14:09:20 -0600
Subject: [PATCH 114/137] refactor schema comparison for test helper

---
 python/tests/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index 03209961..7525baff 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -297,7 +297,7 @@ def assertDataFrameEquality(
             # df2 must also be a TSDF
             self.assertIsInstance(df2, TSDF)
             # should have the same schemas
-            self.assertEqual(df1.ts_schema, df2.ts_schema)
+            self.assertEqual(df1.df.schema, df2.df.schema)
             # get the underlying Spark DataFrames
             df1 = df1.df
             df2 = df2.df

From 241402736365275ea51f2082bda5608d79ffce0d Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 14:09:46 -0600
Subject: [PATCH 115/137] general refactor for TSDFBaseTests

---
 python/tests/tsdf_tests.py                  |  329 +++---
 python/tests/unit_test_data/tsdf_tests.json | 1178 ++++++++++---------
 2 files changed, 814 insertions(+), 693 deletions(-)

diff --git a/python/tests/tsdf_tests.py b/python/tests/tsdf_tests.py
index 6baebd0b..00567bf4 100644
--- a/python/tests/tsdf_tests.py
+++ b/python/tests/tsdf_tests.py
@@ -18,7 +18,8 @@
 
 class TSDFBaseTests(SparkTest):
     def test_TSDF_init(self):
-        tsdf_init = self.get_data_as_tsdf("init")
+
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
 
         self.assertIsInstance(tsdf_init.df, DataFrame)
         self.assertEqual(tsdf_init.ts_col, "event_ts")
@@ -29,7 +30,7 @@ def test_describe(self):
         """AS-OF Join without a time-partition test"""
 
         # Construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
 
         # generate description dataframe
         res = tsdf_init.describe()
@@ -57,7 +58,7 @@ def test_describe(self):
         )
 
     def test__getSparkPlan(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         plan = init_tsdf._TSDF__getSparkPlan(init_tsdf.df, self.spark)
 
@@ -67,7 +68,7 @@ def test__getSparkPlan(self):
         self.assertIn("sizeInBytes", plan)
 
     def test__getBytesFromPlan(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         _bytes = init_tsdf._TSDF__getBytesFromPlan(init_tsdf.df, self.spark)
 
@@ -77,7 +78,7 @@ def test__getBytesFromPlan(self):
     def test__getBytesFromPlan_search_result_is_None(self, mock__getSparkPlan):
         mock__getSparkPlan.return_value = "will not match search value"
 
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(
             ValueError,
@@ -90,7 +91,7 @@ def test__getBytesFromPlan_search_result_is_None(self, mock__getSparkPlan):
     def test__getBytesFromPlan_size_in_MiB(self, mock__getSparkPlan):
         mock__getSparkPlan.return_value = "' Statistics(sizeInBytes=1.0 MiB) '"
 
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         _bytes = init_tsdf._TSDF__getBytesFromPlan(init_tsdf.df, self.spark)
         expected = 1 * 1024 * 1024
@@ -101,7 +102,7 @@ def test__getBytesFromPlan_size_in_MiB(self, mock__getSparkPlan):
     def test__getBytesFromPlan_size_in_KiB(self, mock__getSparkPlan):
         mock__getSparkPlan.return_value = "' Statistics(sizeInBytes=1.0 KiB) '"
 
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         _bytes = init_tsdf._TSDF__getBytesFromPlan(init_tsdf.df, self.spark)
 
@@ -111,7 +112,7 @@ def test__getBytesFromPlan_size_in_KiB(self, mock__getSparkPlan):
     def test__getBytesFromPlan_size_in_GiB(self, mock__getSparkPlan):
         mock__getSparkPlan.return_value = "' Statistics(sizeInBytes=1.0 GiB) '"
 
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         _bytes = init_tsdf._TSDF__getBytesFromPlan(init_tsdf.df, self.spark)
 
@@ -130,7 +131,7 @@ def __tsdf_with_double_tscol(tsdf: TSDF) -> TSDF:
         return TSDF(with_double_tscol_df, tsdf.ts_col, tsdf.partitionCols)
 
     def test__add_double_ts(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
         df = init_tsdf._TSDF__add_double_ts()
 
         schema_string = df.schema.simpleString()
@@ -165,12 +166,12 @@ def test__validate_ts_string_invalid(self):
         )
 
     def test__validated_column_not_string(self):
-        init_df = self.get_data_as_tsdf("init").df
+        init_df = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(TypeError, TSDF._TSDF__validated_column, init_df, 0)
 
     def test__validated_column_not_found(self):
-        init_df = self.get_data_as_tsdf("init").df
+        init_df = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(
             ValueError,
@@ -180,7 +181,7 @@ def test__validated_column_not_found(self):
         )
 
     def test__validated_column(self):
-        init_df = self.get_data_as_tsdf("init").df
+        init_df = self.get_test_df_builder("init").as_sdf()
 
         self.assertEqual(
             TSDF._TSDF__validated_column(init_df, "symbol"),
@@ -188,7 +189,7 @@ def test__validated_column(self):
         )
 
     def test__validated_columns_string(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertEqual(
             init_tsdf._TSDF__validated_columns(init_tsdf.df, "symbol"),
@@ -196,7 +197,7 @@ def test__validated_columns_string(self):
         )
 
     def test__validated_columns_none(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertEqual(
             init_tsdf._TSDF__validated_columns(init_tsdf.df, None),
@@ -204,7 +205,7 @@ def test__validated_columns_none(self):
         )
 
     def test__validated_columns_tuple(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(
             TypeError,
@@ -214,7 +215,7 @@ def test__validated_columns_tuple(self):
         )
 
     def test__validated_columns_list_multiple_elems(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertEqual(
             init_tsdf._TSDF__validated_columns(
@@ -225,19 +226,19 @@ def test__validated_columns_list_multiple_elems(self):
         )
 
     def test__checkPartitionCols(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        right_tsdf = self.get_data_as_tsdf("right_tsdf")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        right_tsdf = self.get_test_df_builder("right_tsdf").as_tsdf()
 
         self.assertRaises(ValueError, init_tsdf._TSDF__checkPartitionCols, right_tsdf)
 
     def test__validateTsColMatch(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        right_tsdf = self.get_data_as_tsdf("right_tsdf")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        right_tsdf = self.get_test_df_builder("right_tsdf").as_tsdf()
 
         self.assertRaises(ValueError, init_tsdf._TSDF__validateTsColMatch, right_tsdf)
 
     def test__addPrefixToColumns_non_empty_string(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         df = init_tsdf._TSDF__addPrefixToColumns(["event_ts"], "prefix").df
 
@@ -246,7 +247,7 @@ def test__addPrefixToColumns_non_empty_string(self):
         self.assertIn("prefix_event_ts", schema_string)
 
     def test__addPrefixToColumns_empty_string(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         df = init_tsdf._TSDF__addPrefixToColumns(["event_ts"], "").df
 
@@ -256,7 +257,7 @@ def test__addPrefixToColumns_empty_string(self):
         self.assertIn(",event_ts", schema_string)
 
     def test__addColumnsFromOtherDF(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         df = init_tsdf._TSDF__addColumnsFromOtherDF(["another_col"]).df
 
@@ -265,8 +266,8 @@ def test__addColumnsFromOtherDF(self):
         self.assertIn("another_col", schema_string)
 
     def test__combineTSDF(self):
-        init1_tsdf = self.get_data_as_tsdf("init")
-        init2_tsdf = self.get_data_as_tsdf("init")
+        init1_tsdf = self.get_test_df_builder("init").as_tsdf()
+        init2_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         union_tsdf = init1_tsdf._TSDF__combineTSDF(init2_tsdf, "combined_ts_col")
         df = union_tsdf.df
@@ -281,51 +282,43 @@ def test__getLastRightRow(self):
         pass
 
     def test__getTimePartitions(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         actual_tsdf = init_tsdf._TSDF__getTimePartitions(10)
 
-        self.assertDataFrameEquality(
-            actual_tsdf,
-            expected_tsdf,
-            from_tsdf=True,
-        )
+        self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
 
     def test__getTimePartitions_with_fraction(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         actual_tsdf = init_tsdf._TSDF__getTimePartitions(10, 0.25)
 
-        self.assertDataFrameEquality(
-            actual_tsdf,
-            expected_tsdf,
-            from_tsdf=True,
-        )
+        self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
 
     def test_select_empty(self):
         # TODO: Can we narrow down to types of Exception?
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(Exception, init_tsdf.select)
 
     def test_select_only_required_cols(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         tsdf = init_tsdf.select("event_ts", "symbol")
 
         self.assertEqual(tsdf.df.columns, ["event_ts", "symbol"])
 
     def test_select_all_cols(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         tsdf = init_tsdf.select("event_ts", "symbol", "trade_pr")
 
         self.assertEqual(tsdf.df.columns, ["event_ts", "symbol", "trade_pr"])
 
     def test_show(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
@@ -350,7 +343,7 @@ def test_show(self):
         )
 
     def test_show_n_5(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
@@ -373,14 +366,14 @@ def test_show_n_5(self):
         )
 
     def test_show_k_gt_n(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
         self.assertRaises(ValueError, init_tsdf.show, 5, 10)
 
     def test_show_truncate_false(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
@@ -405,7 +398,7 @@ def test_show_truncate_false(self):
         )
 
     def test_show_vertical_true(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
@@ -450,7 +443,7 @@ def test_show_vertical_true(self):
         )
 
     def test_show_vertical_true_n_5(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
@@ -484,7 +477,7 @@ def test_show_vertical_true_n_5(self):
         )
 
     def test_show_truncate_false_vertical_true(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
@@ -532,20 +525,20 @@ def test_at_string_timestamp(self):
         """
         Test of time-slicing at(..) function using a string timestamp
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
         at_tsdf = init_tsdf.at(target_ts)
 
-        self.assertDataFrameEquality(at_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(at_tsdf, expected_tsdf)
 
     def test_at_numeric_timestamp(self):
         """
         Test of time-slicint at(..) function using a numeric timestamp
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         # test with numeric ts_col
         init_dbl_tsdf = self.__tsdf_with_double_tscol(init_tsdf)
@@ -555,23 +548,23 @@ def test_at_numeric_timestamp(self):
         target_dbl = self.__timestamp_to_double(target_ts)
         at_dbl_tsdf = init_dbl_tsdf.at(target_dbl)
 
-        self.assertDataFrameEquality(at_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(at_dbl_tsdf, expected_dbl_tsdf)
 
     def test_before_string_timestamp(self):
         """
         Test of time-slicing before(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
         before_tsdf = init_tsdf.before(target_ts)
 
-        self.assertDataFrameEquality(before_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(before_tsdf, expected_tsdf)
 
     def test_before_numeric_timestamp(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         # test with numeric ts_col
         init_dbl_tsdf = self.__tsdf_with_double_tscol(init_tsdf)
@@ -581,26 +574,26 @@ def test_before_numeric_timestamp(self):
         target_dbl = self.__timestamp_to_double(target_ts)
         before_dbl_tsdf = init_dbl_tsdf.before(target_dbl)
 
-        self.assertDataFrameEquality(before_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(before_dbl_tsdf, expected_dbl_tsdf)
 
     def test_atOrBefore_string_timestamp(self):
         """
         Test of time-slicing atOrBefore(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
         before_tsdf = init_tsdf.atOrBefore(target_ts)
 
-        self.assertDataFrameEquality(before_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(before_tsdf, expected_tsdf)
 
     def test_atOrBefore_numeric_timestamp(self):
         """
         Test of time-slicing atOrBefore(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
 
@@ -611,26 +604,26 @@ def test_atOrBefore_numeric_timestamp(self):
         target_dbl = self.__timestamp_to_double(target_ts)
         before_dbl_tsdf = init_dbl_tsdf.atOrBefore(target_dbl)
 
-        self.assertDataFrameEquality(before_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(before_dbl_tsdf, expected_dbl_tsdf)
 
     def test_after_string_timestamp(self):
         """
         Test of time-slicing after(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
         after_tsdf = init_tsdf.after(target_ts)
 
-        self.assertDataFrameEquality(after_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(after_tsdf, expected_tsdf)
 
     def test_after_numeric_timestamp(self):
         """
         Test of time-slicing after(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
 
@@ -641,26 +634,26 @@ def test_after_numeric_timestamp(self):
         target_dbl = self.__timestamp_to_double(target_ts)
         after_dbl_tsdf = init_dbl_tsdf.after(target_dbl)
 
-        self.assertDataFrameEquality(after_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(after_dbl_tsdf, expected_dbl_tsdf)
 
     def test_atOrAfter_string_timestamp(self):
         """
         Test of time-slicing atOrAfter(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
         after_tsdf = init_tsdf.atOrAfter(target_ts)
 
-        self.assertDataFrameEquality(after_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(after_tsdf, expected_tsdf)
 
     def test_atOrAfter_numeric_timestamp(self):
         """
         Test of time-slicing atOrAfter(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:10"
 
@@ -671,27 +664,27 @@ def test_atOrAfter_numeric_timestamp(self):
         target_dbl = self.__timestamp_to_double(target_ts)
         after_dbl_tsdf = init_dbl_tsdf.atOrAfter(target_dbl)
 
-        self.assertDataFrameEquality(after_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(after_dbl_tsdf, expected_dbl_tsdf)
 
     def test_between_string_timestamp(self):
         """
         Test of time-slicing between(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         ts1 = "2020-08-01 00:01:10"
         ts2 = "2020-09-01 00:18:00"
         between_tsdf = init_tsdf.between(ts1, ts2)
 
-        self.assertDataFrameEquality(between_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(between_tsdf, expected_tsdf)
 
     def test_between_numeric_timestamp(self):
         """
         Test of time-slicing between(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         ts1 = "2020-08-01 00:01:10"
         ts2 = "2020-09-01 00:18:00"
@@ -705,28 +698,28 @@ def test_between_numeric_timestamp(self):
         between_dbl_tsdf = init_dbl_tsdf.between(ts1_dbl, ts2_dbl)
 
         self.assertDataFrameEquality(
-            between_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True
+            between_dbl_tsdf, expected_dbl_tsdf
         )
 
     def test_between_exclusive_string_timestamp(self):
         """
         Test of time-slicing between(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         ts1 = "2020-08-01 00:01:10"
         ts2 = "2020-09-01 00:18:00"
         between_tsdf = init_tsdf.between(ts1, ts2, inclusive=False)
 
-        self.assertDataFrameEquality(between_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(between_tsdf, expected_tsdf)
 
     def test_between_exclusive_numeric_timestamp(self):
         """
         Test of time-slicing between(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         ts1 = "2020-08-01 00:01:10"
         ts2 = "2020-09-01 00:18:00"
@@ -740,26 +733,26 @@ def test_between_exclusive_numeric_timestamp(self):
         between_dbl_tsdf = init_dbl_tsdf.between(ts1_dbl, ts2_dbl, inclusive=False)
 
         self.assertDataFrameEquality(
-            between_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True
+            between_dbl_tsdf, expected_dbl_tsdf
         )
 
     def test_earliest_string_timestamp(self):
         """
         Test of time-slicing earliest(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         earliest_tsdf = init_tsdf.earliest(n=3)
 
-        self.assertDataFrameEquality(earliest_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(earliest_tsdf, expected_tsdf)
 
     def test_earliest_numeric_timestamp(self):
         """
         Test of time-slicing earliest(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         # test with numeric ts_col
         init_dbl_tsdf = self.__tsdf_with_double_tscol(init_tsdf)
@@ -768,28 +761,28 @@ def test_earliest_numeric_timestamp(self):
         earliest_dbl_tsdf = init_dbl_tsdf.earliest(n=3)
 
         self.assertDataFrameEquality(
-            earliest_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True
+            earliest_dbl_tsdf, expected_dbl_tsdf
         )
 
     def test_latest_string_timestamp(self):
         """
         Test of time-slicing latest(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         latest_tsdf = init_tsdf.latest(n=3)
 
         self.assertDataFrameEquality(
-            latest_tsdf, expected_tsdf, ignore_row_order=True, from_tsdf=True
+            latest_tsdf, expected_tsdf, ignore_row_order=True
         )
 
     def test_latest_numeric_timestamp(self):
         """
         Test of time-slicing latest(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         # test with numeric ts_col
         init_dbl_tsdf = self.__tsdf_with_double_tscol(init_tsdf)
@@ -798,27 +791,27 @@ def test_latest_numeric_timestamp(self):
         latest_dbl_tsdf = init_dbl_tsdf.latest(n=3)
 
         self.assertDataFrameEquality(
-            latest_dbl_tsdf, expected_dbl_tsdf, ignore_row_order=True, from_tsdf=True
+            latest_dbl_tsdf, expected_dbl_tsdf, ignore_row_order=True
         )
 
     def test_priorTo_string_timestamp(self):
         """
         Test of time-slicing priorTo(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:00"
         prior_tsdf = init_tsdf.priorTo(target_ts)
 
-        self.assertDataFrameEquality(prior_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(prior_tsdf, expected_tsdf, ignore_column_order=True,)
 
     def test_priorTo_numeric_timestamp(self):
         """
         Test of time-slicing priorTo(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:00"
 
@@ -829,26 +822,26 @@ def test_priorTo_numeric_timestamp(self):
         target_dbl = self.__timestamp_to_double(target_ts)
         prior_dbl_tsdf = init_dbl_tsdf.priorTo(target_dbl)
 
-        self.assertDataFrameEquality(prior_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(prior_dbl_tsdf, expected_dbl_tsdf, ignore_column_order=True,)
 
     def test_subsequentTo_string_timestamp(self):
         """
         Test of time-slicing subsequentTo(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:00"
         subsequent_tsdf = init_tsdf.subsequentTo(target_ts)
 
-        self.assertDataFrameEquality(subsequent_tsdf, expected_tsdf, from_tsdf=True)
+        self.assertDataFrameEquality(subsequent_tsdf, expected_tsdf)
 
     def test_subsequentTo_numeric_timestamp(self):
         """
         Test of time-slicing subsequentTo(..) function
         """
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_tsdf = self.get_data_as_tsdf("expected")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
 
         target_ts = "2020-09-01 00:02:00"
 
@@ -860,16 +853,16 @@ def test_subsequentTo_numeric_timestamp(self):
         subsequent_dbl_tsdf = init_dbl_tsdf.subsequentTo(target_dbl)
 
         self.assertDataFrameEquality(
-            subsequent_dbl_tsdf, expected_dbl_tsdf, from_tsdf=True
+            subsequent_dbl_tsdf, expected_dbl_tsdf
         )
 
     def test__rowsBetweenWindow(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertIsInstance(init_tsdf._TSDF__rowsBetweenWindow(1, 1), WindowSpec)
 
     def test_withPartitionCols(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         actual_tsdf = init_tsdf.withPartitionCols(["symbol"])
 
@@ -884,8 +877,8 @@ def test_fourier_transform(self):
         """Test of fourier transform functionality in TSDF objects"""
 
         # construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
@@ -897,8 +890,8 @@ def test_fourier_transform_valid_sequence_col_empty_partition_cols(self):
         """Test of fourier transform functionality in TSDF objects"""
 
         # construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
@@ -910,8 +903,8 @@ def test_fourier_transform_valid_sequence_col_valid_partition_cols(self):
         """Test of fourier transform functionality in TSDF objects"""
 
         # construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
@@ -923,8 +916,8 @@ def test_fourier_transform_no_sequence_col_empty_partition_cols(self):
         """Test of fourier transform functionality in TSDF objects"""
 
         # construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
-        dfExpected = self.get_data_as_sdf("expected")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
@@ -1018,10 +1011,10 @@ def test_resample(self):
         """Test of range stats for 20 minute rolling window"""
 
         # construct dataframes
-        tsdf_input = self.get_data_as_tsdf("input")
-        dfExpected = self.get_data_as_sdf("expected")
-        expected_30s_df = self.get_data_as_sdf("expected30m")
-        barsExpected = self.get_data_as_sdf("expectedbars")
+        tsdf_input = self.get_test_df_builder("input").as_tsdf()
+        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        expected_30s_df = self.get_test_df_builder("expected30m").as_sdf()
+        barsExpected = self.get_test_df_builder("expectedbars").as_sdf()
 
         # 1 minute aggregation
         featured_df = tsdf_input.resample(freq="min", func="floor", prefix="floor").df
@@ -1045,8 +1038,8 @@ def test_resample_millis(self):
         """Test of resampling for millisecond windows"""
 
         # construct dataframes
-        tsdf_init = self.get_data_as_tsdf("init")
-        dfExpected = self.get_data_as_sdf("expectedms")
+        tsdf_init = self.get_test_df_builder("init").as_tsdf()
+        dfExpected = self.get_test_df_builder("expectedms").as_sdf()
 
         # 30 minute aggregation
         resample_ms = tsdf_init.resample(freq="ms", func="mean").df.withColumn(
@@ -1059,9 +1052,9 @@ def test_upsample(self):
         """Test of range stats for 20 minute rolling window"""
 
         # construct dataframes
-        tsdf_input = self.get_data_as_tsdf("input")
-        expected_30s_df = self.get_data_as_sdf("expected30m")
-        barsExpected = self.get_data_as_sdf("expectedbars")
+        tsdf_input = self.get_test_df_builder("input").as_tsdf()
+        expected_30s_df = self.get_test_df_builder("expected30m").as_sdf()
+        barsExpected = self.get_test_df_builder("expectedbars").as_sdf()
 
         resample_30m = tsdf_input.resample(
             freq="5 minutes", func="mean", fill=True
@@ -1092,8 +1085,8 @@ class ExtractStateIntervalsTest(SparkTest):
 
     def test_eq_0(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_eq_1_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1109,8 +1102,8 @@ def test_eq_0(self):
 
     def test_eq_1(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_eq_1_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1126,8 +1119,8 @@ def test_eq_1(self):
 
     def test_ne_0(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_ne_0_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1143,8 +1136,8 @@ def test_ne_0(self):
 
     def test_ne_1(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_ne_0_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1160,8 +1153,8 @@ def test_ne_1(self):
 
     def test_gt_0(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_gt_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1172,8 +1165,8 @@ def test_gt_0(self):
 
     def test_gt_1(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_gt_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1184,8 +1177,8 @@ def test_gt_1(self):
 
     def test_lt_0(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_lt_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1197,8 +1190,8 @@ def test_lt_0(self):
 
     def test_lt_1(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_lt_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1210,8 +1203,8 @@ def test_lt_1(self):
 
     def test_gte_0(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_gt_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1222,8 +1215,8 @@ def test_gte_0(self):
 
     def test_gte_1(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_gt_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1234,8 +1227,8 @@ def test_gte_1(self):
 
     def test_lte_0(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_lte_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1247,8 +1240,8 @@ def test_lte_0(self):
 
     def test_lte_1(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # call extractStateIntervals method
         intervals_lte_df: DataFrame = input_tsdf.extractStateIntervals(
@@ -1260,8 +1253,8 @@ def test_lte_1(self):
 
     def test_threshold_fn(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # threshold state function
         def threshold_fn(a: Column, b: Column) -> Column:
@@ -1277,8 +1270,8 @@ def threshold_fn(a: Column, b: Column) -> Column:
 
     def test_null_safe_eq_0(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         intervals_eq_df: DataFrame = input_tsdf.extractStateIntervals(
             "metric_1", "metric_2", "metric_3", state_definition="<=>"
@@ -1291,8 +1284,8 @@ def test_null_safe_eq_0(self):
 
     def test_null_safe_eq_1(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         intervals_eq_df: DataFrame = input_tsdf.extractStateIntervals(
             "metric_1", "metric_2", "metric_3", state_definition="<=>"
@@ -1305,8 +1298,8 @@ def test_null_safe_eq_1(self):
 
     def test_adjacent_intervals(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         intervals_eq_df: DataFrame = input_tsdf.extractStateIntervals(
             "metric_1", "metric_2", "metric_3"
@@ -1317,7 +1310,7 @@ def test_adjacent_intervals(self):
 
     def test_invalid_state_definition_str(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
 
         try:
             input_tsdf.extractStateIntervals(
@@ -1328,7 +1321,7 @@ def test_invalid_state_definition_str(self):
 
     def test_invalid_state_definition_type(self):
         # construct dataframes
-        input_tsdf: TSDF = self.get_data_as_tsdf("input")
+        input_tsdf: TSDF = self.get_test_df_builder("input").as_tsdf()
 
         try:
             input_tsdf.extractStateIntervals(
diff --git a/python/tests/unit_test_data/tsdf_tests.json b/python/tests/unit_test_data/tsdf_tests.json
index 7000c602..eb6b2193 100644
--- a/python/tests/unit_test_data/tsdf_tests.json
+++ b/python/tests/unit_test_data/tsdf_tests.json
@@ -1,53 +1,58 @@
 {
   "__SharedData": {
     "temp_slice_init_data": {
-      "schema": "symbol string, event_ts string, trade_pr float",
-      "ts_col": "event_ts",
-      "partition_cols": [
-        "symbol"
-      ],
-      "data": [
-        [
-          "S1",
-          "2020-08-01 00:00:10",
-          349.21
-        ],
-        [
-          "S1",
-          "2020-08-01 00:01:12",
-          351.32
-        ],
-        [
-          "S1",
-          "2020-09-01 00:02:10",
-          361.1
-        ],
-        [
-          "S1",
-          "2020-09-01 00:19:12",
-          362.1
-        ],
-        [
-          "S2",
-          "2020-08-01 00:01:10",
-          743.01
-        ],
-        [
-          "S2",
-          "2020-08-01 00:01:24",
-          751.92
-        ],
-        [
-          "S2",
-          "2020-09-01 00:02:10",
-          761.10
-        ],
-        [
-          "S2",
-          "2020-09-01 00:20:42",
-          762.33
+      "tsdf": {
+        "ts_col": "event_ts",
+        "partition_cols": [
+          "symbol"
+        ]
+      },
+      "df": {
+        "schema": "symbol string, event_ts string, trade_pr float",
+        "ts_convert": ["event_ts"],
+        "data": [
+          [
+            "S1",
+            "2020-08-01 00:00:10",
+            349.21
+          ],
+          [
+            "S1",
+            "2020-08-01 00:01:12",
+            351.32
+          ],
+          [
+            "S1",
+            "2020-09-01 00:02:10",
+            361.1
+          ],
+          [
+            "S1",
+            "2020-09-01 00:19:12",
+            362.1
+          ],
+          [
+            "S2",
+            "2020-08-01 00:01:10",
+            743.01
+          ],
+          [
+            "S2",
+            "2020-08-01 00:01:24",
+            751.92
+          ],
+          [
+            "S2",
+            "2020-09-01 00:02:10",
+            761.10
+          ],
+          [
+            "S2",
+            "2020-09-01 00:20:42",
+            762.33
+          ]
         ]
-      ]
+      }
     }
   },
   "TSDFBaseTests": {
@@ -101,18 +106,25 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "right_tsdf": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "event_ts"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "event_ts"
           ]
-        ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21
+            ]
+          ]
+        }
       }
     },
     "test__validateTsColMatch": {
@@ -120,10 +132,15 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "right_tsdf": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
         "schema": "symbol string, event_ts int, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
+        "ts_convert": [
+          "event_ts"
         ],
         "data": [
           [
@@ -133,6 +150,7 @@
           ]
         ]
       }
+    }
     },
     "test__addPrefixToColumns_non_empty_string": {
       "init": {
@@ -164,69 +182,76 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float, ts_partition int, is_original int",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21,
-            1596240010,
-            1
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32,
-            1596240070,
-            1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1,
-            1598918530,
-            1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1,
-            1598919550,
-            1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:10",
-            743.01,
-            1596240070,
-            1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92,
-            1596240080,
-            1
-          ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.1,
-            1598918530,
-            1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float, ts_partition int, is_original int",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-09-01 00:20:42",
-            762.33,
-            1598919640,
-            1
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21,
+              1596240010,
+              1
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32,
+              1596240070,
+              1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1,
+              1598918530,
+              1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1,
+              1598919550,
+              1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:10",
+              743.01,
+              1596240070,
+              1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92,
+              1596240080,
+              1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.1,
+              1598918530,
+              1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:20:42",
+              762.33,
+              1598919640,
+              1
+            ]
           ]
-        ]
+        }
       }
     },
     "test__getTimePartitions_with_fraction": {
@@ -234,107 +259,114 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float, ts_partition int, is_original int",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21,
-            1596240010,
-            1
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32,
-            1596240070,
-            1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1,
-            1598918530,
-            1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1,
-            1598919550,
-            1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:10",
-            743.01,
-            1596240070,
-            1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92,
-            1596240080,
-            1
-          ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.1,
-            1598918530,
-            1
-          ],
-          [
-            "S2",
-            "2020-09-01 00:20:42",
-            762.33,
-            1598919640,
-            1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
           ]
-        ]
-      }
-    },
-    "test_select_empty": {
-      "init": {
-        "$ref": "#/__SharedData/temp_slice_init_data"
-      }
-    },
-    "test_select_only_required_cols": {
-      "init": {
-        "$ref": "#/__SharedData/temp_slice_init_data"
-      }
-    },
-    "test_select_all_cols": {
-      "init": {
-        "$ref": "#/__SharedData/temp_slice_init_data"
-      }
-    },
-    "test_show": {
-      "init": {
-        "$ref": "#/__SharedData/temp_slice_init_data"
-      }
-    },
-    "test_show_n_5": {
-      "init": {
-        "$ref": "#/__SharedData/temp_slice_init_data"
-      }
-    },
-    "test_show_k_gt_n": {
-      "init": {
-        "$ref": "#/__SharedData/temp_slice_init_data"
-      }
-    },
-    "test_show_truncate_false": {
-      "init": {
-        "$ref": "#/__SharedData/temp_slice_init_data"
-      }
-    },
-    "test_show_vertical_true": {
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float, ts_partition int, is_original int",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21,
+              1596240010,
+              1
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32,
+              1596240070,
+              1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1,
+              1598918530,
+              1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1,
+              1598919550,
+              1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:10",
+              743.01,
+              1596240070,
+              1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92,
+              1596240080,
+              1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.1,
+              1598918530,
+              1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:20:42",
+              762.33,
+              1598919640,
+              1
+            ]
+          ]
+        }
+      }
+    },
+    "test_select_empty": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test_select_only_required_cols": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test_select_all_cols": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test_show": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test_show_n_5": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test_show_k_gt_n": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test_show_truncate_false": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test_show_vertical_true": {
       "init": {
         "$ref": "#/__SharedData/temp_slice_init_data"
       }
@@ -351,10 +383,16 @@
     },
     "test_describe": {
       "init": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
         "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
+        "ts_convert": [
+          "event_ts"
         ],
         "data": [
           [
@@ -378,6 +416,7 @@
             362.1
           ]
         ]
+        }
       }
     },
     "test__getSparkPlan": {
@@ -387,33 +426,40 @@
     },
     "test__getBytesFromPlan": {
       "init": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1
+            ]
           ]
-        ]
+        }
       }
     },
     "test__getBytesFromPlan_search_result_is_None": {
@@ -441,23 +487,30 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+          "data": [
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ]
           ]
-        ]
+        }
       }
     },
     "test_at_numeric_timestamp": {
@@ -473,33 +526,38 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:10",
-            743.01
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
           ]
-        ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:10",
+              743.01
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92
+            ]
+          ]
+        }
       }
     },
     "test_before_numeric_timestamp": {
@@ -515,43 +573,50 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:10",
-            743.01
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92
-          ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:10",
+              743.01
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ]
           ]
-        ]
+        }
       }
     },
     "test_atOrBefore_numeric_timestamp": {
@@ -567,23 +632,28 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1
-          ],
-          [
-            "S2",
-            "2020-09-01 00:20:42",
-            762.33
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
           ]
-        ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:20:42",
+              762.33
+            ]
+          ]
+        }
       }
     },
     "test_after_numeric_timestamp": {
@@ -599,33 +669,40 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1
-          ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+        "tsdf": {
+            "ts_col": "event_ts",
+            "partition_cols": [
+                "symbol"
+            ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-09-01 00:20:42",
-            762.33
+          "data": [
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ],
+            [
+              "S2",
+              "2020-09-01 00:20:42",
+              762.33
+            ]
           ]
-        ]
+        }
       }
     },
     "test_atOrAfter_numeric_timestamp": {
@@ -641,38 +718,45 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:10",
-            743.01
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:10",
+              743.01
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ]
           ]
-        ]
+        }
       }
     },
     "test_between_numeric_timestamp": {
@@ -688,33 +772,41 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "partition_cols": [
+            "symbol"
           ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ]
           ]
-        ]
+        }
       }
     },
     "test_between_exclusive_numeric_timestamp": {
@@ -730,43 +822,48 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:10",
-            349.21
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:10",
-            743.01
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92
-          ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
           ]
-        ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:10",
+              743.01
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ]
+          ]
+        }
       }
     },
     "test_earliest_numeric_timestamp": {
@@ -782,43 +879,53 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1
-          ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+          "partition_cols": [
+            "symbol"
           ],
-          [
-            "S2",
-            "2020-09-01 00:20:42",
-            762.33
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ],
+            [
+              "S2",
+              "2020-09-01 00:20:42",
+              762.33
+            ]
           ]
-        ]
+        }
       }
     },
     "test_latest_numeric_timestamp": {
@@ -834,23 +941,30 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:01:12",
-            351.32
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_col": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-08-01 00:01:24",
-            751.92
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S2",
+              "2020-08-01 00:01:24",
+              751.92
+            ]
           ]
-        ]
+        }
       }
     },
     "test_priorTo_numeric_timestamp": {
@@ -866,23 +980,30 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.10
+          "data": [
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.10
+            ]
           ]
-        ]
+        }
       }
     },
     "test_subsequentTo_numeric_timestamp": {
@@ -900,10 +1021,17 @@
     },
     "test_withPartitionCols": {
       "init": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "data": {
-          "$ref": "#/__SharedData/temp_slice_init_data/data"
+        "tsdf": {
+          "ts_col": "event_ts"
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": {
+            "$ref": "#/__SharedData/temp_slice_init_data/df/data"
+          }
         }
       }
     }

From f6f3520945e8728d53bd142de32248d05ad1cee7 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 14:31:48 -0600
Subject: [PATCH 116/137] remove schema check since that is performed by
 chispa.asssert_df_equality

---
 python/tests/base.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index 7525baff..8fb6c0cb 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -296,8 +296,6 @@ def assertDataFrameEquality(
         if isinstance(df1, TSDF):
             # df2 must also be a TSDF
             self.assertIsInstance(df2, TSDF)
-            # should have the same schemas
-            self.assertEqual(df1.df.schema, df2.df.schema)
             # get the underlying Spark DataFrames
             df1 = df1.df
             df2 = df2.df

From a46dd39dcdb30cbd1e1378adfdb58086ba85cc2d Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 14:35:36 -0600
Subject: [PATCH 117/137] existing tests refactored and passing

---
 python/tests/unit_test_data/tsdf_tests.json | 47 +++++++++++++++------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/python/tests/unit_test_data/tsdf_tests.json b/python/tests/unit_test_data/tsdf_tests.json
index eb6b2193..6d352fd8 100644
--- a/python/tests/unit_test_data/tsdf_tests.json
+++ b/python/tests/unit_test_data/tsdf_tests.json
@@ -66,6 +66,31 @@
         "$ref": "#/__SharedData/temp_slice_init_data"
       }
     },
+    "test__validate_ts_string_valid": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test__validate_ts_string_alt_format_valid": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test__validate_ts_string_with_microseconds_valid": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test__validate_ts_string_alt_format_with_microseconds_valid": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
+    "test__validate_ts_string_invalid": {
+      "init": {
+        "$ref": "#/__SharedData/temp_slice_init_data"
+      }
+    },
     "test__validated_column_not_string": {
       "init": {
         "$ref": "#/__SharedData/temp_slice_init_data"
@@ -138,19 +163,17 @@
             "symbol"
           ]
         },
-        "schema": "symbol string, event_ts int, trade_pr float",
-        "ts_convert": [
-          "event_ts"
-        ],
-        "data": [
-          [
-            "S1",
-            1596240010,
-            349.21
+        "df": {
+          "schema": "symbol string, event_ts int, trade_pr float",
+          "data": [
+            [
+              "S1",
+              1596240010,
+              349.21
+            ]
           ]
-        ]
+        }
       }
-    }
     },
     "test__addPrefixToColumns_non_empty_string": {
       "init": {
@@ -949,7 +972,7 @@
         },
         "df": {
           "schema": "symbol string, event_ts string, trade_pr float",
-          "ts_col": [
+          "ts_convert": [
             "event_ts"
           ],
           "data": [

From 1bb8383cdf3a87be8b2e864abf11f860c2e6ca84 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 15:30:23 -0600
Subject: [PATCH 118/137] interpol test case work

---
 python/tests/tsdf_tests.py                  |  9 +++++-
 python/tests/unit_test_data/tsdf_tests.json | 35 +++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/python/tests/tsdf_tests.py b/python/tests/tsdf_tests.py
index 00567bf4..1f0a5756 100644
--- a/python/tests/tsdf_tests.py
+++ b/python/tests/tsdf_tests.py
@@ -869,7 +869,14 @@ def test_withPartitionCols(self):
         self.assertEqual(init_tsdf.partitionCols, [])
         self.assertEqual(actual_tsdf.partitionCols, ["symbol"])
 
-    def test_tsdf_interpolate(self): ...
+    # def test_tsdf_interpolate(self):
+    #     # TODO: wicked slow
+    #     init_tsdf = self.get_test_df_builder("init").as_tsdf()
+    #     expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
+    #
+    #     actual_tsdf = init_tsdf.interpolate("zero", "minute", "floor")
+    #
+    #     self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
 
 
 class FourierTransformTest(SparkTest):
diff --git a/python/tests/unit_test_data/tsdf_tests.json b/python/tests/unit_test_data/tsdf_tests.json
index 6d352fd8..0c4b7a28 100644
--- a/python/tests/unit_test_data/tsdf_tests.json
+++ b/python/tests/unit_test_data/tsdf_tests.json
@@ -1057,6 +1057,41 @@
           }
         }
       }
+    },
+    "test_tsdf_interpolate": {
+      "init": {
+        "tsdf": {
+          "ts_col": "event_ts"
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": {
+            "$ref": "#/__SharedData/temp_slice_init_data/df/data"
+          }
+        }
+      },
+      "expected": {
+        "tsdf": {
+          "ts_col": "event_ts"
+        },
+        "df": {
+          "schema": "event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            ["2020-09-01 00:20:38", 0.0],
+            ["2020-09-01 00:20:39", 0.0],
+            ["2020-09-01 00:20:40", 0.0],
+            ["2020-09-01 00:20:41", 0.0],
+            ["2020-09-01 00:20:42", 762.33]
+          ]
+        }
+
+      }
     }
   },
   "FourierTransformTest": {

From 582dac5f3dd1cfa2b8e7eb56eb8887851b7e95fe Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Mon, 8 Jul 2024 19:23:02 -0600
Subject: [PATCH 119/137] WIP for test_tsdf_interpolate

---
 python/tests/tsdf_tests.py                  | 34 ++++++++++-----------
 python/tests/unit_test_data/tsdf_tests.json |  1 +
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/python/tests/tsdf_tests.py b/python/tests/tsdf_tests.py
index 1f0a5756..647aa022 100644
--- a/python/tests/tsdf_tests.py
+++ b/python/tests/tsdf_tests.py
@@ -869,14 +869,14 @@ def test_withPartitionCols(self):
         self.assertEqual(init_tsdf.partitionCols, [])
         self.assertEqual(actual_tsdf.partitionCols, ["symbol"])
 
-    # def test_tsdf_interpolate(self):
-    #     # TODO: wicked slow
-    #     init_tsdf = self.get_test_df_builder("init").as_tsdf()
-    #     expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
-    #
-    #     actual_tsdf = init_tsdf.interpolate("zero", "minute", "floor")
-    #
-    #     self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
+    def test_tsdf_interpolate(self):
+        # TODO: wicked slow
+        init_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
+
+        actual_tsdf = init_tsdf.interpolate("zero", "second", "floor")
+
+        self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
 
 
 class FourierTransformTest(SparkTest):
@@ -885,57 +885,57 @@ def test_fourier_transform(self):
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
 
         # should be equal to the expected dataframe
-        self.assertDataFrameEquality(result_tsdf.df, dfExpected)
+        self.assertDataFrameEquality(result_tsdf.df, df_expected)
 
     def test_fourier_transform_valid_sequence_col_empty_partition_cols(self):
         """Test of fourier transform functionality in TSDF objects"""
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
 
         # should be equal to the expected dataframe
-        self.assertDataFrameEquality(result_tsdf.df, dfExpected)
+        self.assertDataFrameEquality(result_tsdf.df, df_expected)
 
     def test_fourier_transform_valid_sequence_col_valid_partition_cols(self):
         """Test of fourier transform functionality in TSDF objects"""
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
 
         # should be equal to the expected dataframe
-        self.assertDataFrameEquality(result_tsdf.df, dfExpected)
+        self.assertDataFrameEquality(result_tsdf.df, df_expected)
 
     def test_fourier_transform_no_sequence_col_empty_partition_cols(self):
         """Test of fourier transform functionality in TSDF objects"""
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
         result_tsdf = tsdf_init.fourier_transform(1, "val")
 
         # should be equal to the expected dataframe
-        self.assertDataFrameEquality(result_tsdf.df, dfExpected)
+        self.assertDataFrameEquality(result_tsdf.df, df_expected)
 
 
 class RangeStatsTest(SparkTest):
     def test_range_stats(self):
-        """Test of range stats for 20 minute rolling window"""
+        """Test of range stats for 20-minute rolling window"""
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
diff --git a/python/tests/unit_test_data/tsdf_tests.json b/python/tests/unit_test_data/tsdf_tests.json
index 0c4b7a28..3c7580c6 100644
--- a/python/tests/unit_test_data/tsdf_tests.json
+++ b/python/tests/unit_test_data/tsdf_tests.json
@@ -1097,6 +1097,7 @@
   "FourierTransformTest": {
     "test_fourier_transform": {
       "init": {
+
         "schema": "group string, time long, val double",
         "ts_col": "time",
         "partition_cols": [

From 3d33a681f8d1824037636d3aca9432becfef3d26 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 12:00:49 -0600
Subject: [PATCH 120/137] add idf getter to test dataframe builder

---
 python/tests/base.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/python/tests/base.py b/python/tests/base.py
index 8fb6c0cb..b6760b14 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -57,6 +57,13 @@ def tsdf_constructor(self) -> Optional[str]:
         """
         return self.__test_data.get("tsdf_constructor", None)
 
+    @property
+    def idf_construct(self) -> Optional[str]:
+        """
+        :return: the name of the IntervalsDF constructor to use
+        """
+        return self.__test_data.get("idf_constructor", None)
+
     @property
     def tsdf(self) -> dict:
         """
@@ -64,6 +71,13 @@ def tsdf(self) -> dict:
         """
         return self.__test_data["tsdf"]
 
+    @property
+    def idf(self) -> dict:
+        """
+        :return: the start and end timestamp index metadata component of the test data
+        """
+        return self.__test_data["idf"]
+
     @property
     def ts_schema(self) -> Optional[dict]:
         """
@@ -138,6 +152,16 @@ def as_tsdf(self) -> TSDF:
         else:
             return TSDF(sdf, **self.tsdf)
 
+    def as_idf(self) -> IntervalsDF:
+        """
+        Constructs a IntervalsDF from the test data
+        """
+        sdf = self.as_sdf()
+        if self.idf_construct is not None:
+            return getattr(IntervalsDF, self.idf_construct)(sdf, **self.tsdf)
+        else:
+            return IntervalsDF(self.as_sdf(), **self.tsdf)
+
 
 class SparkTest(unittest.TestCase):
     #

From 1a5add8ce4f9e55d8d7644ccf4f3f6751ba9c0ec Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 12:01:03 -0600
Subject: [PATCH 121/137] tests for tsdf refactored

---
 python/tests/tsdf_tests.py                  |   46 +-
 python/tests/unit_test_data/tsdf_tests.json | 5036 ++++++++++---------
 2 files changed, 2634 insertions(+), 2448 deletions(-)

diff --git a/python/tests/tsdf_tests.py b/python/tests/tsdf_tests.py
index 647aa022..1c14f05b 100644
--- a/python/tests/tsdf_tests.py
+++ b/python/tests/tsdf_tests.py
@@ -869,14 +869,14 @@ def test_withPartitionCols(self):
         self.assertEqual(init_tsdf.partitionCols, [])
         self.assertEqual(actual_tsdf.partitionCols, ["symbol"])
 
-    def test_tsdf_interpolate(self):
-        # TODO: wicked slow
-        init_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
-
-        actual_tsdf = init_tsdf.interpolate("zero", "second", "floor")
-
-        self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
+    # def test_tsdf_interpolate(self):
+    #     # TODO: remove this test
+    #     init_tsdf = self.get_test_df_builder("init").as_tsdf()
+    #     expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
+    #
+    #     actual_tsdf = init_tsdf.interpolate("zero", "second", "floor")
+    #     actual_tsdf.df.show()
+        # self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
 
 
 class FourierTransformTest(SparkTest):
@@ -939,7 +939,7 @@ def test_range_stats(self):
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
 
         # convert to TSDF
 
@@ -960,7 +960,7 @@ def test_range_stats(self):
         )
 
         # cast to decimal with precision in cents for simplicity
-        dfExpected = dfExpected.select(
+        df_expected = df_expected.select(
             sfn.col("symbol"),
             sfn.col("event_ts"),
             sfn.col("mean_trade_pr").cast("decimal(5, 2)"),
@@ -973,14 +973,14 @@ def test_range_stats(self):
         )
 
         # should be equal to the expected dataframe
-        self.assertDataFrameEquality(featured_df, dfExpected)
+        self.assertDataFrameEquality(featured_df, df_expected)
 
     def test_group_stats(self):
         """Test of range stats for 20 minute rolling window"""
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
 
         # using lookback of 20 minutes
         featured_df = tsdf_init.withGroupedStats(freq="1 min").df
@@ -998,7 +998,7 @@ def test_group_stats(self):
         )
 
         # cast to decimal with precision in cents for simplicity
-        dfExpected = dfExpected.select(
+        df_expected = df_expected.select(
             sfn.col("symbol"),
             sfn.col("event_ts"),
             sfn.col("mean_trade_pr").cast("decimal(5, 2)"),
@@ -1010,7 +1010,7 @@ def test_group_stats(self):
         )
 
         # should be equal to the expected dataframe
-        self.assertDataFrameEquality(featured_df, dfExpected)
+        self.assertDataFrameEquality(featured_df, df_expected)
 
 
 class ResampleTest(SparkTest):
@@ -1019,9 +1019,9 @@ def test_resample(self):
 
         # construct dataframes
         tsdf_input = self.get_test_df_builder("input").as_tsdf()
-        dfExpected = self.get_test_df_builder("expected").as_sdf()
+        df_expected = self.get_test_df_builder("expected").as_sdf()
         expected_30s_df = self.get_test_df_builder("expected30m").as_sdf()
-        barsExpected = self.get_test_df_builder("expectedbars").as_sdf()
+        bars_expected = self.get_test_df_builder("expectedbars").as_sdf()
 
         # 1 minute aggregation
         featured_df = tsdf_input.resample(freq="min", func="floor", prefix="floor").df
@@ -1035,33 +1035,33 @@ def test_resample(self):
         ).df
 
         # should be equal to the expected dataframe
-        self.assertDataFrameEquality(featured_df, dfExpected)
+        self.assertDataFrameEquality(featured_df, df_expected)
         self.assertDataFrameEquality(resample_30m, expected_30s_df)
 
         # test bars summary
-        self.assertDataFrameEquality(bars, barsExpected)
+        self.assertDataFrameEquality(bars, bars_expected)
 
     def test_resample_millis(self):
         """Test of resampling for millisecond windows"""
 
         # construct dataframes
         tsdf_init = self.get_test_df_builder("init").as_tsdf()
-        dfExpected = self.get_test_df_builder("expectedms").as_sdf()
+        df_expected = self.get_test_df_builder("expectedms").as_sdf()
 
         # 30 minute aggregation
         resample_ms = tsdf_init.resample(freq="ms", func="mean").df.withColumn(
             "trade_pr", sfn.round(sfn.col("trade_pr"), 2)
         )
 
-        self.assertDataFrameEquality(resample_ms, dfExpected)
+        self.assertDataFrameEquality(resample_ms, df_expected)
 
     def test_upsample(self):
-        """Test of range stats for 20 minute rolling window"""
+        """Test of range stats for 20-minute rolling window"""
 
         # construct dataframes
         tsdf_input = self.get_test_df_builder("input").as_tsdf()
         expected_30s_df = self.get_test_df_builder("expected30m").as_sdf()
-        barsExpected = self.get_test_df_builder("expectedbars").as_sdf()
+        bars_expected = self.get_test_df_builder("expectedbars").as_sdf()
 
         resample_30m = tsdf_input.resample(
             freq="5 minutes", func="mean", fill=True
@@ -1084,7 +1084,7 @@ def test_upsample(self):
         self.assertDataFrameEquality(upsampled, expected_30s_df)
 
         # test bars summary
-        self.assertDataFrameEquality(bars, barsExpected)
+        self.assertDataFrameEquality(bars, bars_expected)
 
 
 class ExtractStateIntervalsTest(SparkTest):
diff --git a/python/tests/unit_test_data/tsdf_tests.json b/python/tests/unit_test_data/tsdf_tests.json
index 3c7580c6..99386d93 100644
--- a/python/tests/unit_test_data/tsdf_tests.json
+++ b/python/tests/unit_test_data/tsdf_tests.json
@@ -1097,524 +1097,564 @@
   "FourierTransformTest": {
     "test_fourier_transform": {
       "init": {
-
-        "schema": "group string, time long, val double",
-        "ts_col": "time",
-        "partition_cols": [
-          "group"
-        ],
-        "data": [
-          [
-            "Emissions",
-            1949,
-            2206.690829
-          ],
-          [
-            "Emissions",
-            1950,
-            2382.046176
-          ],
-          [
-            "Emissions",
-            1951,
-            2526.687327
-          ],
-          [
-            "Emissions",
-            1952,
-            2473.373964
-          ],
-          [
-            "WindGen",
-            1980,
-            0.0
-          ],
-          [
-            "WindGen",
-            1981,
-            0.0
-          ],
-          [
-            "WindGen",
-            1982,
-            0.0
+        "tsdf": {
+          "ts_col": "time",
+          "partition_cols": ["group"]
+        },
+        "df": {
+          "schema": "group string, time long, val double",
+          "ts_convert": [
+            "time"
           ],
-          [
-            "WindGen",
-            1983,
-            0.029667962
+          "data": [
+            [
+              "Emissions",
+              1949,
+              2206.690829
+            ],
+            [
+              "Emissions",
+              1950,
+              2382.046176
+            ],
+            [
+              "Emissions",
+              1951,
+              2526.687327
+            ],
+            [
+              "Emissions",
+              1952,
+              2473.373964
+            ],
+            [
+              "WindGen",
+              1980,
+              0.0
+            ],
+            [
+              "WindGen",
+              1981,
+              0.0
+            ],
+            [
+              "WindGen",
+              1982,
+              0.0
+            ],
+            [
+              "WindGen",
+              1983,
+              0.029667962
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "group string, time long, val double, freq double, ft_real double, ft_imag double",
-        "ts_col": "time",
-        "partition_cols": [
-          "group"
-        ],
-        "data": [
-          [
-            "Emissions",
-            1949,
-            2206.690829,
-            0.0,
-            9588.798296,
-            -0.0
-          ],
-          [
-            "Emissions",
-            1950,
-            2382.046176,
-            0.25,
-            -319.996498,
-            91.32778800000006
-          ],
-          [
-            "Emissions",
-            1951,
-            2526.687327,
-            -0.5,
-            -122.0419839999995,
-            -0.0
-          ],
-          [
-            "Emissions",
-            1952,
-            2473.373964,
-            -0.25,
-            -319.996498,
-            -91.32778800000006
-          ],
-          [
-            "WindGen",
-            1980,
-            0.0,
-            0.0,
-            0.029667962,
-            -0.0
-          ],
-          [
-            "WindGen",
-            1981,
-            0.0,
-            0.25,
-            0.0,
-            0.029667962
-          ],
-          [
-            "WindGen",
-            1982,
-            0.0,
-            -0.5,
-            -0.029667962,
-            -0.0
-          ],
-          [
-            "WindGen",
-            1983,
-            0.029667962,
-            -0.25,
-            0.0,
-            -0.029667962
+        "tsdf": {
+          "ts_col": "time",
+          "partition_cols": ["group"]
+        },
+        "df": {
+          "schema": "group string, time long, val double, freq double, ft_real double, ft_imag double",
+          "ts_convert": ["time"],
+          "data": [
+            [
+              "Emissions",
+              1949,
+              2206.690829,
+              0.0,
+              9588.798296,
+              -0.0
+            ],
+            [
+              "Emissions",
+              1950,
+              2382.046176,
+              0.25,
+              -319.996498,
+              91.32778800000006
+            ],
+            [
+              "Emissions",
+              1951,
+              2526.687327,
+              -0.5,
+              -122.0419839999995,
+              -0.0
+            ],
+            [
+              "Emissions",
+              1952,
+              2473.373964,
+              -0.25,
+              -319.996498,
+              -91.32778800000006
+            ],
+            [
+              "WindGen",
+              1980,
+              0.0,
+              0.0,
+              0.029667962,
+              -0.0
+            ],
+            [
+              "WindGen",
+              1981,
+              0.0,
+              0.25,
+              0.0,
+              0.029667962
+            ],
+            [
+              "WindGen",
+              1982,
+              0.0,
+              -0.5,
+              -0.029667962,
+              -0.0
+            ],
+            [
+              "WindGen",
+              1983,
+              0.029667962,
+              -0.25,
+              0.0,
+              -0.029667962
+            ]
           ]
-        ]
+        }
       }
     },
     "test_fourier_transform_no_sequence_col_empty_partition_cols": {
       "init": {
-        "schema": {
-          "$ref": "#/FourierTransformTest/test_fourier_transform/init/schema"
+        "tsdf": {
+          "ts_col": "time",
+          "partition_cols": []
         },
-        "ts_col": "time",
-        "partition_cols": [],
-        "data": {
-          "$ref": "#/FourierTransformTest/test_fourier_transform/init/data"
+        "df": {
+          "schema": {
+            "$ref": "#/FourierTransformTest/test_fourier_transform/init/df/schema"
+          },
+          "ts_convert": ["time"],
+          "data": {
+            "$ref": "#/FourierTransformTest/test_fourier_transform/init/df/data"
+          }
         }
       },
       "expected": {
-        "schema": "time long, val double, freq double, ft_real double, ft_imag double",
-        "ts_col": "time",
-        "data": [
-          [
-            1949,
-            2206.690829,
-            0.0,
-            9588.827963962001,
-            -0.0
-          ],
-          [
-            1950,
-            2382.046176,
-            0.125,
-            2142.1333092115465,
-            -5959.966855086621
-          ],
-          [
-            1951,
-            2526.687327,
-            0.25,
-            -319.996498,
-            91.35745596200013
-          ],
-          [
-            1952,
-            2473.373964,
-            0.375,
-            2271.2483487884538,
-            -906.5922010866211
-          ],
-          [
-            1980,
-            0.0,
-            -0.5,
-            -122.07165196199912,
-            -0.0
-          ],
-          [
-            1981,
-            0.0,
-            -0.375,
-            2271.2483487884538,
-            906.5922010866211
-          ],
-          [
-            1982,
-            0.0,
-            -0.25,
-            -319.996498,
-            -91.35745596200013
+        "tsdf": {
+          "ts_col": "time",
+          "partition_cols": []
+        },
+        "df": {
+          "schema": "time long, val double, freq double, ft_real double, ft_imag double",
+          "ts_convert": [
+            "time"
           ],
-          [
-            1983,
-            0.029667962,
-            -0.125,
-            2142.1333092115465,
-            5959.966855086621
+          "data": [
+            [
+              1949,
+              2206.690829,
+              0.0,
+              9588.827963962001,
+              -0.0
+            ],
+            [
+              1950,
+              2382.046176,
+              0.125,
+              2142.1333092115465,
+              -5959.966855086621
+            ],
+            [
+              1951,
+              2526.687327,
+              0.25,
+              -319.996498,
+              91.35745596200013
+            ],
+            [
+              1952,
+              2473.373964,
+              0.375,
+              2271.2483487884538,
+              -906.5922010866211
+            ],
+            [
+              1980,
+              0.0,
+              -0.5,
+              -122.07165196199912,
+              -0.0
+            ],
+            [
+              1981,
+              0.0,
+              -0.375,
+              2271.2483487884538,
+              906.5922010866211
+            ],
+            [
+              1982,
+              0.0,
+              -0.25,
+              -319.996498,
+              -91.35745596200013
+            ],
+            [
+              1983,
+              0.029667962,
+              -0.125,
+              2142.1333092115465,
+              5959.966855086621
+            ]
           ]
-        ]
+        }
       }
     },
     "test_fourier_transform_valid_sequence_col_empty_partition_cols": {
-      "init": {
-        "schema": "sequence int, time long, val double",
-        "ts_col": "time",
-        "sequence_col": "sequence",
-        "partition_cols": [],
-        "data": [
-          [
-            1,
-            1949,
-            2206.690829
-          ],
-          [
-            2,
-            1950,
-            2382.046176
-          ],
-          [
-            3,
-            1951,
-            2526.687327
-          ],
-          [
-            4,
-            1952,
-            2473.373964
-          ],
-          [
-            5,
-            1980,
-            0.0
-          ],
-          [
-            6,
-            1981,
-            0.0
-          ],
-          [
-            7,
-            1982,
-            0.0
-          ],
-          [
-            8,
-            1983,
-            0.029667962
-          ]
-        ]
-      },
-      "expected": {
-        "schema": "sequence int, time long, val double, freq double, ft_real double, ft_imag double",
-        "ts_col": "time",
-        "partition_cols": [],
-        "data": [
-          [
-            1,
-            1949,
-            2206.690829,
-            0.0,
-            9588.827963962001,
-            0.0
-          ],
-          [
-            2,
-            1950,
-            2382.046176,
-            0.125,
-            2142.1333092115465,
-            -5959.966855086621
-          ],
-          [
-            3,
-            1951,
-            2526.687327,
-            0.25,
-            -319.996498,
-            91.35745596200013
-          ],
-          [
-            4,
-            1952,
-            2473.373964,
-            0.375,
-            2271.2483487884538,
-            -906.5922010866211
-          ],
-          [
-            5,
-            1980,
-            0.0,
-            -0.5,
-            -122.07165196199912,
-            -0.0
-          ],
-          [
-            6,
-            1981,
-            0.0,
-            -0.375,
-            2271.2483487884538,
-            906.5922010866211
-          ],
-          [
-            7,
-            1982,
-            0.0,
-            -0.25,
-            -319.996498,
-            -91.35745596200013
-          ],
-          [
-            8,
-            1983,
-            0.029667962,
-            -0.125,
-            2142.1333092115465,
-            5959.966855086621
-          ]
-        ]
-      }
-    },
-    "test_fourier_transform_valid_sequence_col_valid_partition_cols": {
-      "init": {
-        "schema": "group string, sequence int, time long, val double",
-        "ts_col": "time",
-        "sequence_col": "sequence",
-        "partition_cols": [
-          "group"
-        ],
-        "data": [
-          [
-            "Emissions",
-            1,
-            1949,
-            2206.690829
-          ],
-          [
-            "Emissions",
-            2,
-            1950,
-            2382.046176
-          ],
-          [
-            "Emissions",
-            3,
-            1951,
-            2526.687327
-          ],
-          [
-            "Emissions",
-            4,
-            1952,
-            2473.373964
-          ],
-          [
-            "WindGen",
-            1,
-            1980,
-            0.0
-          ],
-          [
-            "WindGen",
-            2,
-            1981,
-            0.0
-          ],
-          [
-            "WindGen",
-            3,
-            1982,
-            0.0
-          ],
-          [
-            "WindGen",
-            4,
-            1983,
-            0.029667962
-          ]
-        ]
-      },
-      "expected": {
-        "schema": "group string, sequence int, time long, val double, freq double, ft_real double, ft_imag double",
-        "ts_col": "time",
-        "partition_cols": [
-          "group"
-        ],
-        "data": [
-          [
-            "Emissions",
-            1,
-            1949,
-            2206.690829,
-            0.0,
-            9588.798296,
-            0.0
-          ],
-          [
-            "Emissions",
-            2,
-            1950,
-            2382.046176,
-            0.25,
-            -319.996498,
-            91.32778800000006
-          ],
-          [
-            "Emissions",
-            3,
-            1951,
-            2526.687327,
-            -0.5,
-            -122.0419839999995,
-            0.0
-          ],
-          [
-            "Emissions",
-            4,
-            1952,
-            2473.373964,
-            -0.25,
-            -319.996498,
-            -91.32778800000006
-          ],
-          [
-            "WindGen",
-            1,
-            1980,
-            0.0,
-            0.0,
-            0.029667962,
-            0.0
-          ],
-          [
-            "WindGen",
-            2,
-            1981,
-            0.0,
-            0.25,
-            0.0,
-            0.029667962
-          ],
-          [
-            "WindGen",
-            3,
-            1982,
-            0.0,
-            -0.5,
-            -0.029667962,
-            -0.0
-          ],
-          [
-            "WindGen",
-            4,
-            1983,
-            0.029667962,
-            -0.25,
-            0.0,
-            -0.029667962
-          ]
-        ]
-      }
-    }
-  },
-  "RangeStatsTest": {
-    "test_range_stats": {
       "init": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": ["symbol"]
+          "ts_col": "time",
+          "sequence_col": "sequence",
+          "partition_cols": []
         },
         "df": {
-          "schema": "symbol string, event_ts string, trade_pr float",
-          "ts_convert": ["event_ts"],
+          "schema": "sequence int, time long, val double",
+          "ts_convert": ["time"],
           "data": [
             [
-              "S1",
-              "2020-08-01 00:00:10",
-              349.21
+              1,
+              1949,
+              2206.690829
             ],
             [
-              "S1",
-              "2020-08-01 00:01:12",
-              351.32
+              2,
+              1950,
+              2382.046176
             ],
             [
-              "S1",
-              "2020-09-01 00:02:10",
-              361.1
+              3,
+              1951,
+              2526.687327
             ],
             [
-              "S1",
-              "2020-09-01 00:19:12",
-              362.1
+              4,
+              1952,
+              2473.373964
+            ],
+            [
+              5,
+              1980,
+              0.0
+            ],
+            [
+              6,
+              1981,
+              0.0
+            ],
+            [
+              7,
+              1982,
+              0.0
+            ],
+            [
+              8,
+              1983,
+              0.029667962
             ]
           ]
         }
       },
       "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": ["symbol"]
+          "ts_col": "time",
+          "partition_cols": []
         },
         "df": {
-          "schema": "symbol string, event_ts string, mean_trade_pr float, count_trade_pr long, min_trade_pr float, max_trade_pr float, sum_trade_pr float, stddev_trade_pr float, zscore_trade_pr float",
-          "ts_convert": ["event_ts"],
+          "schema": "sequence int, time long, val double, freq double, ft_real double, ft_imag double",
+          "ts_convert": [
+            "time"
+          ],
           "data": [
             [
-              "S1",
-              "2020-08-01 00:00:10",
-              349.21,
               1,
-              349.21,
-              349.21,
-              349.21,
-              null,
-              null
+              1949,
+              2206.690829,
+              0.0,
+              9588.827963962001,
+              0.0
             ],
             [
-              "S1",
-              "2020-08-01 00:01:12",
-              350.26,
+              2,
+              1950,
+              2382.046176,
+              0.125,
+              2142.1333092115465,
+              -5959.966855086621
+            ],
+            [
+              3,
+              1951,
+              2526.687327,
+              0.25,
+              -319.996498,
+              91.35745596200013
+            ],
+            [
+              4,
+              1952,
+              2473.373964,
+              0.375,
+              2271.2483487884538,
+              -906.5922010866211
+            ],
+            [
+              5,
+              1980,
+              0.0,
+              -0.5,
+              -122.07165196199912,
+              -0.0
+            ],
+            [
+              6,
+              1981,
+              0.0,
+              -0.375,
+              2271.2483487884538,
+              906.5922010866211
+            ],
+            [
+              7,
+              1982,
+              0.0,
+              -0.25,
+              -319.996498,
+              -91.35745596200013
+            ],
+            [
+              8,
+              1983,
+              0.029667962,
+              -0.125,
+              2142.1333092115465,
+              5959.966855086621
+            ]
+          ]
+        }
+      }
+    },
+    "test_fourier_transform_valid_sequence_col_valid_partition_cols": {
+      "init": {
+        "tsdf": {
+          "ts_col": "time",
+          "sequence_col": "sequence",
+          "partition_cols": ["group"]
+        },
+        "df": {
+          "schema": "group string, sequence int, time long, val double",
+          "ts_convert": ["time"],
+          "data": [
+            [
+              "Emissions",
+              1,
+              1949,
+              2206.690829
+            ],
+            [
+              "Emissions",
+              2,
+              1950,
+              2382.046176
+            ],
+            [
+              "Emissions",
+              3,
+              1951,
+              2526.687327
+            ],
+            [
+              "Emissions",
+              4,
+              1952,
+              2473.373964
+            ],
+            [
+              "WindGen",
+              1,
+              1980,
+              0.0
+            ],
+            [
+              "WindGen",
+              2,
+              1981,
+              0.0
+            ],
+            [
+              "WindGen",
+              3,
+              1982,
+              0.0
+            ],
+            [
+              "WindGen",
+              4,
+              1983,
+              0.029667962
+            ]
+          ]
+        }
+      },
+      "expected": {
+        "tsdf": {
+          "ts_col": "time",
+          "partition_cols": ["group"]
+        },
+        "df": {
+          "schema": "group string, sequence int, time long, val double, freq double, ft_real double, ft_imag double",
+          "ts_convert": [
+            "time"
+          ],
+          "data": [
+            [
+              "Emissions",
+              1,
+              1949,
+              2206.690829,
+              0.0,
+              9588.798296,
+              0.0
+            ],
+            [
+              "Emissions",
+              2,
+              1950,
+              2382.046176,
+              0.25,
+              -319.996498,
+              91.32778800000006
+            ],
+            [
+              "Emissions",
+              3,
+              1951,
+              2526.687327,
+              -0.5,
+              -122.0419839999995,
+              0.0
+            ],
+            [
+              "Emissions",
+              4,
+              1952,
+              2473.373964,
+              -0.25,
+              -319.996498,
+              -91.32778800000006
+            ],
+            [
+              "WindGen",
+              1,
+              1980,
+              0.0,
+              0.0,
+              0.029667962,
+              0.0
+            ],
+            [
+              "WindGen",
+              2,
+              1981,
+              0.0,
+              0.25,
+              0.0,
+              0.029667962
+            ],
+            [
+              "WindGen",
+              3,
+              1982,
+              0.0,
+              -0.5,
+              -0.029667962,
+              -0.0
+            ],
+            [
+              "WindGen",
+              4,
+              1983,
+              0.029667962,
+              -0.25,
+              0.0,
+              -0.029667962
+            ]
+          ]
+        }
+      }
+    }
+  },
+  "RangeStatsTest": {
+    "test_range_stats": {
+      "init": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1
+            ]
+          ]
+        }
+      },
+      "expected": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, mean_trade_pr float, count_trade_pr long, min_trade_pr float, max_trade_pr float, sum_trade_pr float, stddev_trade_pr float, zscore_trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:10",
+              349.21,
+              1,
+              349.21,
+              349.21,
+              349.21,
+              null,
+              null
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:12",
+              350.26,
               2,
               349.21,
               351.32,
@@ -1734,68 +1774,79 @@
   "ResampleTest": {
     "test_resample": {
       "input": {
-        "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:10",
-            349.21,
-            10.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:11",
-            340.21,
-            9.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:12",
-            353.32,
-            8.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:13",
-            351.32,
-            7.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:14",
-            350.32,
-            6.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-09-01 00:01:12",
-            361.1,
-            5.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-09-01 00:19:12",
-            362.1,
-            4.0
+          "data": [
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:00:10",
+              349.21,
+              10.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:00:11",
+              340.21,
+              9.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:01:12",
+              353.32,
+              8.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:01:13",
+              351.32,
+              7.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:01:14",
+              350.32,
+              6.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-09-01 00:01:12",
+              361.1,
+              5.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-09-01 00:19:12",
+              362.1,
+              4.0
+            ]
           ]
-        ]
+        }
       },
       "expected": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
         "schema": "symbol string, event_ts string, floor_trade_pr float, floor_date string, floor_trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
+        "ts_convert": [
+          "event_ts"
         ],
         "data": [
           [
@@ -1827,132 +1878,156 @@
             4.0
           ]
         ]
+        }
       },
       "expected30m": {
-        "schema": "symbol string, event_ts string, date double, trade_pr double, trade_pr_2 double",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            null,
-            348.88,
-            8.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:00:00",
-            null,
-            361.1,
-            5.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, date double, trade_pr double, trade_pr_2 double",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "2020-09-01 00:15:00",
-            null,
-            362.1,
-            4.0
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              null,
+              348.88,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              null,
+              361.1,
+              5.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:15:00",
+              null,
+              362.1,
+              4.0
+            ]
           ]
-        ]
+        }
       },
       "expectedbars": {
-        "schema": "symbol string, event_ts string, close_trade_pr float, close_trade_pr_2 float, high_trade_pr float, high_trade_pr_2 float, low_trade_pr float, low_trade_pr_2 float, open_trade_pr float, open_trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            340.21,
-            9.0,
-            349.21,
-            10.0,
-            340.21,
-            9.0,
-            349.21,
-            10.0
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:00",
-            350.32,
-            6.0,
-            353.32,
-            8.0,
-            350.32,
-            6.0,
-            353.32,
-            8.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:01:00",
-            361.1,
-            5.0,
-            361.1,
-            5.0,
-            361.1,
-            5.0,
-            361.1,
-            5.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, close_trade_pr float, close_trade_pr_2 float, high_trade_pr float, high_trade_pr_2 float, low_trade_pr float, low_trade_pr_2 float, open_trade_pr float, open_trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "2020-09-01 00:19:00",
-            362.1,
-            4.0,
-            362.1,
-            4.0,
-            362.1,
-            4.0,
-            362.1,
-            4.0
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              340.21,
+              9.0,
+              349.21,
+              10.0,
+              340.21,
+              9.0,
+              349.21,
+              10.0
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:00",
+              350.32,
+              6.0,
+              353.32,
+              8.0,
+              350.32,
+              6.0,
+              353.32,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:01:00",
+              361.1,
+              5.0,
+              361.1,
+              5.0,
+              361.1,
+              5.0,
+              361.1,
+              5.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:00",
+              362.1,
+              4.0,
+              362.1,
+              4.0,
+              362.1,
+              4.0,
+              362.1,
+              4.0
+            ]
           ]
-        ]
+        }
       }
     },
     "test_resample_millis": {
       "init": {
-        "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:10.12345",
-            349.21,
-            10.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:10.123",
-            340.21,
-            9.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:10.124",
-            353.32,
-            8.0
+          "data": [
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:00:10.12345",
+              349.21,
+              10.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:00:10.123",
+              340.21,
+              9.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:00:10.124",
+              353.32,
+              8.0
+            ]
           ]
-        ]
+        }
       },
       "expectedms": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
         "schema": "symbol string, event_ts string, date double, trade_pr double, trade_pr_2 double",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
+        "ts_convert": ["event_ts"],
         "data": [
           [
             "S1",
@@ -1969,1158 +2044,1227 @@
             8.0
           ]
         ]
+          }
       }
     },
     "test_upsample": {
       "input": {
-        "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:10",
-            349.21,
-            10.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:11",
-            340.21,
-            9.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:12",
-            353.32,
-            8.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:13",
-            351.32,
-            7.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:14",
-            350.32,
-            6.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-09-01 00:01:12",
-            361.1,
-            5.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["symbol"]
+        },
+        "df": {
+          "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-09-01 00:19:12",
-            362.1,
-            4.0
+          "data": [
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:00:10",
+              349.21,
+              10.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:00:11",
+              340.21,
+              9.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:01:12",
+              353.32,
+              8.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:01:13",
+              351.32,
+              7.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-08-01 00:01:14",
+              350.32,
+              6.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-09-01 00:01:12",
+              361.1,
+              5.0
+            ],
+            [
+              "S1",
+              "SAME_DT",
+              "2020-09-01 00:19:12",
+              362.1,
+              4.0
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "symbol string, event_ts string, floor_trade_pr float, floor_date string, floor_trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            349.21,
-            "SAME_DT",
-            10.0
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:00",
-            353.32,
-            "SAME_DT",
-            8.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, floor_trade_pr float, floor_date string, floor_trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "2020-09-01 00:01:00",
-            361.1,
-            "SAME_DT",
-            5.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:00",
-            362.1,
-            "SAME_DT",
-            4.0
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              349.21,
+              "SAME_DT",
+              10.0
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:00",
+              353.32,
+              "SAME_DT",
+              8.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:01:00",
+              361.1,
+              "SAME_DT",
+              5.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:00",
+              362.1,
+              "SAME_DT",
+              4.0
+            ]
           ]
-        ]
+        }
       },
       "expected30m": {
-        "schema": "symbol string, event_ts string, date double, trade_pr double, trade_pr_2 double",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            0.0,
-            348.88,
-            8.0
-          ],
-          [
-            "S1",
-            "2020-08-01 00:05:00",
-            0.0,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:00:00",
-            0.0,
-            361.1,
-            5.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, date double, trade_pr double, trade_pr_2 double",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "2020-09-01 00:15:00",
-            0.0,
-            362.1,
-            4.0
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              0.0,
+              348.88,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-08-01 00:05:00",
+              0.0,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              0.0,
+              361.1,
+              5.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:15:00",
+              0.0,
+              362.1,
+              4.0
+            ]
           ]
-        ]
+        }
       },
       "expectedbars": {
-        "schema": "symbol string, event_ts string, close_trade_pr float, close_trade_pr_2 float, high_trade_pr float, high_trade_pr_2 float, low_trade_pr float, low_trade_pr_2 float, open_trade_pr float, open_trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            340.21,
-            9.0,
-            349.21,
-            10.0,
-            340.21,
-            9.0,
-            349.21,
-            10.0
-          ],
-          [
-            "S1",
-            "2020-08-01 00:01:00",
-            350.32,
-            6.0,
-            353.32,
-            8.0,
-            350.32,
-            6.0,
-            353.32,
-            8.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:01:00",
-            361.1,
-            5.0,
-            361.1,
-            5.0,
-            361.1,
-            5.0,
-            361.1,
-            5.0
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, close_trade_pr float, close_trade_pr_2 float, high_trade_pr float, high_trade_pr_2 float, low_trade_pr float, low_trade_pr_2 float, open_trade_pr float, open_trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S1",
-            "2020-09-01 00:19:00",
-            362.1,
-            4.0,
-            362.1,
-            4.0,
-            362.1,
-            4.0,
-            362.1,
-            4.0
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              340.21,
+              9.0,
+              349.21,
+              10.0,
+              340.21,
+              9.0,
+              349.21,
+              10.0
+            ],
+            [
+              "S1",
+              "2020-08-01 00:01:00",
+              350.32,
+              6.0,
+              353.32,
+              8.0,
+              350.32,
+              6.0,
+              353.32,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:01:00",
+              361.1,
+              5.0,
+              361.1,
+              5.0,
+              361.1,
+              5.0,
+              361.1,
+              5.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:00",
+              362.1,
+              4.0,
+              362.1,
+              4.0,
+              362.1,
+              4.0,
+              362.1,
+              4.0
+            ]
           ]
-        ]
+        }
       }
     }
   },
   "ExtractStateIntervalsTest": {
     "test_eq_0": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:12",
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_eq_1": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            null,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            null,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            null
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
-          ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              null,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              null,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              null
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:01:12",
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:01:12",
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
-    "test_ne_0": {
-      "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+    "test_ne_0": {
+      "input": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:14",
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_ne_1": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.0,
-            4.2
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            4.3,
-            4.1,
-            4.7
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.0,
+              4.2
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              4.3,
+              4.1,
+              4.7
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_gt_0": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar"
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:16",
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_gt_1": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.3,
-            4.1,
-            4.7
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.4,
-            4.0,
-            4.6
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            4.5,
-            4.1,
-            4.7
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.3,
+              4.1,
+              4.7
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.4,
+              4.0,
+              4.6
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              4.5,
+              4.1,
+              4.7
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_lt_0": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:01:15",
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:17",
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:01:15",
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_lt_1": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.3,
-            4.1,
-            4.7
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.2,
-            4.2,
-            4.8
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.7
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.3,
+              4.1,
+              4.7
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.2,
+              4.2,
+              4.8
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.7
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
-    "test_gte_0": {
-      "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+    "test_gte_0": {
+      "input": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
+        "df": {
         "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
+        "ts_convert": [
           "start_ts",
           "end_ts"
         ],
@@ -3140,815 +3284,857 @@
             "bar"
           ]
         ]
+          }
       }
     },
     "test_gte_1": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.3,
-            4.1,
-            4.7
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.4,
-            4.0,
-            4.6
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            4.5,
-            4.0,
-            4.7
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.3,
+              4.1,
+              4.7
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.4,
+              4.0,
+              4.6
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              4.5,
+              4.0,
+              4.7
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_lte_0": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar"
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar"
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:17",
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_lte_1": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.3,
-            4.1,
-            4.7
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.2,
-            4.2,
-            4.8
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.2,
-            4.7
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.3,
+              4.1,
+              4.7
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.2,
+              4.2,
+              4.8
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.2,
+              4.7
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_threshold_fn": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts: STRING, end_ts: STRING, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL ,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts: STRING, end_ts: STRING, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL ,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:12",
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_null_safe_eq_0": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            null,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            null,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            null,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            null,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            null,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              null,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              null,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              null,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              null,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              null,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:12",
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_null_safe_eq_1": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            null,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            null
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            null,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            null,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            null,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:15",
-            "v1",
-            "foo",
-            "bar",
-            42.3,
-            42.3,
-            42.3
-          ],
-          [
-            "2020-08-01 00:01:16",
-            "v1",
-            "foo",
-            "bar",
-            37.6,
-            37.6,
-            37.6
-          ],
-          [
-            "2020-08-01 00:01:17",
-            "v1",
-            "foo",
-            "bar",
-            61.5,
-            61.5,
-            61.5
-          ],
-          [
-            "2020-09-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            28.9,
-            28.9,
-            28.9
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-09-01 00:19:12",
-            "v1",
-            "foo",
-            "bar",
-            0.1,
-            0.1,
-            0.1
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              null,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              null
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              null,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              null,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              null,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:15",
+              "v1",
+              "foo",
+              "bar",
+              42.3,
+              42.3,
+              42.3
+            ],
+            [
+              "2020-08-01 00:01:16",
+              "v1",
+              "foo",
+              "bar",
+              37.6,
+              37.6,
+              37.6
+            ],
+            [
+              "2020-08-01 00:01:17",
+              "v1",
+              "foo",
+              "bar",
+              61.5,
+              61.5,
+              61.5
+            ],
+            [
+              "2020-09-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              28.9,
+              28.9,
+              28.9
+            ],
+            [
+              "2020-09-01 00:19:12",
+              "v1",
+              "foo",
+              "bar",
+              0.1,
+              0.1,
+              0.1
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:01:12",
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:01:12",
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_adjacent_intervals": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:00:12",
-            "v1",
-            "foo",
-            "bar",
-            5.0,
-            5.0,
-            5.0
-          ],
-          [
-            "2020-08-01 00:01:12",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
-          ],
-          [
-            "2020-08-01 00:01:13",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT, metric_2 FLOAT, metric_3 FLOAT",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar",
-            10.7,
-            10.7,
-            10.7
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:00:12",
+              "v1",
+              "foo",
+              "bar",
+              5.0,
+              5.0,
+              5.0
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:13",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ],
+            [
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar",
+              10.7,
+              10.7,
+              10.7
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            "bar"
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:12",
-            "v1",
-            "foo",
-            "bar"
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL,identifier_1 STRING NOT NULL,identifier_2 STRING NOT NULL,identifier_3 STRING NOT NULL",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:01:12",
-            "2020-08-01 00:01:14",
-            "v1",
-            "foo",
-            "bar"
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:12",
+              "v1",
+              "foo",
+              "bar"
+            ],
+            [
+              "2020-08-01 00:01:12",
+              "2020-08-01 00:01:14",
+              "v1",
+              "foo",
+              "bar"
+            ]
           ]
-        ]
+        }
       }
     },
     "test_invalid_state_definition_str": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ]
           ]
-        ]
+        }
       }
     },
     "test_invalid_state_definition_type": {
       "input": {
-        "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "identifier_1",
-          "identifier_2",
-          "identifier_3"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "v1",
-            "foo",
-            "bar",
-            4.1,
-            4.1,
-            4.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": ["identifier_1", "identifier_2", "identifier_3"]
+        },
+        "df": {
+          "schema": "event_ts STRING NOT NULL, identifier_1 STRING NOT NULL, identifier_2 STRING NOT NULL, identifier_3 STRING NOT NULL, metric_1 FLOAT NOT NULL, metric_2 FLOAT NOT NULL, metric_3 FLOAT NOT NULL",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "v1",
+              "foo",
+              "bar",
+              4.1,
+              4.1,
+              4.1
+            ]
           ]
-        ]
+        }
       }
     }
   }

From ab5210ab19a69568040ee91cfb277ce6d546925f Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 12:01:33 -0600
Subject: [PATCH 122/137] remove test_tsdf_interpolate

---
 python/tests/tsdf_tests.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/python/tests/tsdf_tests.py b/python/tests/tsdf_tests.py
index 1c14f05b..df25b462 100644
--- a/python/tests/tsdf_tests.py
+++ b/python/tests/tsdf_tests.py
@@ -869,15 +869,6 @@ def test_withPartitionCols(self):
         self.assertEqual(init_tsdf.partitionCols, [])
         self.assertEqual(actual_tsdf.partitionCols, ["symbol"])
 
-    # def test_tsdf_interpolate(self):
-    #     # TODO: remove this test
-    #     init_tsdf = self.get_test_df_builder("init").as_tsdf()
-    #     expected_tsdf = self.get_test_df_builder("expected").as_tsdf()
-    #
-    #     actual_tsdf = init_tsdf.interpolate("zero", "second", "floor")
-    #     actual_tsdf.df.show()
-        # self.assertDataFrameEquality(actual_tsdf, expected_tsdf)
-
 
 class FourierTransformTest(SparkTest):
     def test_fourier_transform(self):

From 09e64239a85bdf15fa583c0a1507b39c4a314d4c Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 12:33:48 -0600
Subject: [PATCH 123/137] refactor interpol_tests complete

---
 python/tests/interpol_tests.py                |   72 +-
 .../tests/unit_test_data/interpol_tests.json  | 2945 +++++++++--------
 2 files changed, 1524 insertions(+), 1493 deletions(-)

diff --git a/python/tests/interpol_tests.py b/python/tests/interpol_tests.py
index 0235a011..49754ee0 100644
--- a/python/tests/interpol_tests.py
+++ b/python/tests/interpol_tests.py
@@ -24,7 +24,7 @@ def test_validate_fill_method(self):
         )
 
     def test_validate_col_exist_in_df(self):
-        input_df: DataFrame = self.get_data_as_sdf("input_data")
+        input_df: DataFrame = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(
             ValueError,
@@ -54,7 +54,7 @@ def test_validate_col_exist_in_df(self):
         )
 
     def test_validate_col_target_cols_data_type(self):
-        input_df: DataFrame = self.get_data_as_sdf("input_data")
+        input_df: DataFrame = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(
             TypeError,
@@ -69,7 +69,7 @@ def test_fill_validation(self):
         """Test fill parameter is valid."""
 
         # load test data
-        input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -89,7 +89,7 @@ def test_target_column_validation(self):
         """Test target columns exist in schema, and are of the right type (numeric)."""
 
         # load test data
-        input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -109,7 +109,7 @@ def test_partition_column_validation(self):
         """Test partition columns exist in schema."""
 
         # load test data
-        input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -129,7 +129,7 @@ def test_ts_column_validation(self):
         """Test time series column exist in schema."""
 
         # load test data
-        input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -154,8 +154,8 @@ def test_zero_fill_interpolation(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -180,8 +180,8 @@ def test_zero_fill_interpolation_no_perform_checks(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -207,8 +207,8 @@ def test_null_fill_interpolation(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -234,8 +234,8 @@ def test_back_fill_interpolation(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -261,8 +261,8 @@ def test_forward_fill_interpolation(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -288,8 +288,8 @@ def test_linear_fill_interpolation(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -313,8 +313,8 @@ def test_different_freq_abbreviations(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -340,8 +340,8 @@ def test_show_interpolated(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = self.interpolate_helper.interpolate(
@@ -358,7 +358,7 @@ def test_show_interpolated(self):
         self.assertDataFrameEquality(expected_df, actual_df, ignore_nullable=True)
 
     def test_validate_ts_col_data_type_is_not_timestamp(self):
-        input_df: DataFrame = self.get_data_as_sdf("input_data")
+        input_df: DataFrame = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(
             ValueError,
@@ -374,7 +374,7 @@ def test_interpolation_freq_is_none(self):
         """Test a ValueError is raised when freq is None."""
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -394,7 +394,7 @@ def test_interpolation_func_is_none(self):
         """Test a ValueError is raised when func is None."""
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -414,7 +414,7 @@ def test_interpolation_func_is_callable(self):
         """Test ValueError is raised when func is callable."""
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -434,7 +434,7 @@ def test_interpolation_freq_is_not_supported_type(self):
         """Test ValueError is raised when func is callable."""
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("input_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("init").as_tsdf()
 
         # interpolate
         self.assertRaises(
@@ -459,8 +459,8 @@ def test_interpolation_using_default_tsdf_params(self):
         """
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         # interpolate
         actual_df: DataFrame = simple_input_tsdf.interpolate(
@@ -475,8 +475,8 @@ def test_interpolation_using_custom_params(self):
         modified params."""
 
         # Modify input DataFrame using different ts_col
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         input_tsdf = TSDF(
             simple_input_tsdf.df.withColumnRenamed("event_ts", "other_ts_col"),
@@ -501,7 +501,7 @@ def test_tsdf_constructor_params_are_updated(self):
         interpolation."""
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
 
         actual_tsdf: TSDF = simple_input_tsdf.interpolate(
             ts_col="event_ts",
@@ -520,8 +520,8 @@ def test_interpolation_on_sampled_data(self):
         """Verify interpolation can be chained with resample within the TSDF class"""
 
         # load test data
-        simple_input_tsdf: TSDF = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected")
+        simple_input_tsdf: TSDF = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         actual_df: DataFrame = (
             simple_input_tsdf.resample(freq="30 seconds", func="mean", fill=None)
@@ -538,8 +538,8 @@ def test_defaults_with_resampled_df(self):
         # self.buildTestingDataFrame()
 
         # load test data
-        simple_input_tsdf = self.get_data_as_tsdf("simple_input_data")
-        expected_df: DataFrame = self.get_data_as_sdf("expected", convert_ts_col=True)
+        simple_input_tsdf = self.get_test_df_builder("simple_init").as_tsdf()
+        expected_df: DataFrame = self.get_test_df_builder("expected").as_sdf()
 
         actual_df: DataFrame = (
             simple_input_tsdf.resample(freq="30 seconds", func="mean", fill=None)
diff --git a/python/tests/unit_test_data/interpol_tests.json b/python/tests/unit_test_data/interpol_tests.json
index ebea1a81..0f30061d 100644
--- a/python/tests/unit_test_data/interpol_tests.json
+++ b/python/tests/unit_test_data/interpol_tests.json
@@ -1,144 +1,17 @@
 {
   "__SharedData": {
-    "input_data": {
-      "schema": "partition_a string, partition_b string, event_ts string, value_a float, value_b float",
-      "ts_col": "event_ts",
-      "partition_cols": [
-        "partition_a",
-        "partition_b"
-      ],
-      "data": [
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:01:10",
-          349.21,
-          null
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:02:03",
-          null,
-          4.0
-        ],
-        [
-          "A",
-          "A-2",
-          "2020-01-01 00:01:15",
-          340.21,
-          9.0
-        ],
-        [
-          "B",
-          "B-1",
-          "2020-01-01 00:01:15",
-          362.1,
-          4.0
-        ],
-        [
-          "A",
-          "A-2",
-          "2020-01-01 00:01:17",
-          353.32,
-          8.0
-        ],
-        [
-          "B",
-          "B-2",
-          "2020-01-01 00:02:14",
-          null,
-          6.0
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:03:02",
-          351.32,
-          7.0
-        ],
-        [
-          "B",
-          "B-2",
-          "2020-01-01 00:01:12",
-          361.1,
-          5.0
-        ]
-      ]
-    },
-    "simple_input_data": {
-      "schema": "partition_a string, partition_b string, event_ts string, value_a float, value_b float",
-      "ts_col": "event_ts",
-      "partition_cols": [
-        "partition_a",
-        "partition_b"
-      ],
-      "data": [
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:00:10",
-          0.0,
-          null
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:01:10",
-          2.0,
-          2.0
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:01:32",
-          null,
-          null
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:02:03",
-          null,
-          null
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:03:32",
-          null,
-          7.0
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:04:12",
-          8.0,
-          8.0
-        ],
-        [
-          "A",
-          "A-1",
-          "2020-01-01 00:05:31",
-          11.0,
-          null
-        ]
-      ]
-    }
-  },
-  "InterpolationUnitTest": {
-    "test_validate_col_exist_in_df": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_validate_col_target_cols_data_type": {
-      "input_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, string_target string, float_target float",
+    "init": {
+      "tsdf": {
         "ts_col": "event_ts",
         "partition_cols": [
           "partition_a",
           "partition_b"
+        ]
+      },
+      "df": {
+        "schema": "partition_a string, partition_b string, event_ts string, value_a float, value_b float",
+        "ts_convert": [
+          "event_ts"
         ],
         "data": [
           [
@@ -200,1405 +73,1563 @@
         ]
       }
     },
-    "test_fill_validation": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_target_column_validation": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_partition_column_validation": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_ts_column_validation": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_zero_fill_interpolation": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
-      },
-      "expected_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
+    "simple_init": {
+      "tsdf": {
         "ts_col": "event_ts",
         "partition_cols": [
           "partition_a",
           "partition_b"
+        ]
+      },
+      "df": {
+        "schema": "partition_a string, partition_b string, event_ts string, value_a float, value_b float",
+        "ts_convert": [
+          "event_ts"
         ],
         "data": [
           [
             "A",
             "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            0.0,
-            false,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            0.0,
+            "2020-01-01 00:00:10",
             0.0,
-            true,
-            true,
-            true
+            null
           ],
           [
             "A",
             "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
+            "2020-01-01 00:01:10",
             2.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            0.0,
-            0.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            0.0,
-            0.0,
-            false,
-            true,
-            true
+            2.0
           ],
           [
             "A",
             "A-1",
-            "2020-01-01 00:02:30",
-            0.0,
-            0.0,
-            true,
-            true,
-            true
+            "2020-01-01 00:01:32",
+            null,
+            null
           ],
           [
             "A",
             "A-1",
-            "2020-01-01 00:03:00",
-            0.0,
-            0.0,
-            true,
-            true,
-            true
+            "2020-01-01 00:02:03",
+            null,
+            null
           ],
           [
             "A",
             "A-1",
-            "2020-01-01 00:03:30",
-            0.0,
-            7.0,
-            false,
-            true,
-            false
+            "2020-01-01 00:03:32",
+            null,
+            7.0
           ],
           [
             "A",
             "A-1",
-            "2020-01-01 00:04:00",
+            "2020-01-01 00:04:12",
             8.0,
-            8.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            0.0,
-            0.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            0.0,
-            0.0,
-            true,
-            true,
-            true
+            8.0
           ],
           [
             "A",
             "A-1",
-            "2020-01-01 00:05:30",
+            "2020-01-01 00:05:31",
             11.0,
-            0.0,
-            false,
-            false,
-            true
+            null
           ]
         ]
       }
+    }
+  },
+  "InterpolationUnitTest": {
+    "test_is_resampled_type": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_validate_fill_method": {
+        "init": {
+            "$ref": "#/__SharedData/init"
+        }
+    },
+    "test_validate_col_exist_in_df": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_validate_col_target_cols_data_type": {
+      "init": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, string_target string, float_target float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:10",
+              349.21,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:03",
+              null,
+              4.0
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:01:15",
+              340.21,
+              9.0
+            ],
+            [
+              "B",
+              "B-1",
+              "2020-01-01 00:01:15",
+              362.1,
+              4.0
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:01:17",
+              353.32,
+              8.0
+            ],
+            [
+              "B",
+              "B-2",
+              "2020-01-01 00:02:14",
+              null,
+              6.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:02",
+              351.32,
+              7.0
+            ],
+            [
+              "B",
+              "B-2",
+              "2020-01-01 00:01:12",
+              361.1,
+              5.0
+            ]
+          ]
+        }
+      }
+    },
+    "test_fill_validation": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_target_column_validation": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_partition_column_validation": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_ts_column_validation": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_zero_fill_interpolation": {
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
+      },
+      "expected": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "partition_a",
+            "partition_b"
+          ]
+        },
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              0.0,
+              false,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              0.0,
+              0.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              0.0,
+              0.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              0.0,
+              0.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              0.0,
+              0.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              0.0,
+              0.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              0.0,
+              7.0,
+              false,
+              true,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              0.0,
+              0.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              0.0,
+              0.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              0.0,
+              false,
+              false,
+              true
+            ]
+          ]
+        }
+      }
     },
     "test_zero_fill_interpolation_no_perform_checks": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
       },
-      "expected_data": {
-        "$ref": "#/InterpolationUnitTest/test_zero_fill_interpolation/expected_data"
+      "expected": {
+        "$ref": "#/InterpolationUnitTest/test_zero_fill_interpolation/expected"
       }
     },
     "test_null_fill_interpolation": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
       },
-      "expected_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            null,
-            false,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            null,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            null,
-            null,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            null,
-            null,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            null,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            null,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            null,
-            7.0,
-            false,
-            true,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            null,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            null,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            null,
-            false,
-            false,
-            true
+      "expected": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              null,
+              false,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              null,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              null,
+              null,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              null,
+              null,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              null,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              null,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              null,
+              7.0,
+              false,
+              true,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              null,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              null,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              null,
+              false,
+              false,
+              true
+            ]
           ]
-        ]
+        }
       }
     },
     "test_back_fill_interpolation": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
       },
-      "expected_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            2.0,
-            false,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            2.0,
-            2.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            8.0,
-            7.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            8.0,
-            7.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            8.0,
-            7.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            8.0,
-            7.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            8.0,
-            7.0,
-            false,
-            true,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            11.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            11.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            null,
-            false,
-            false,
-            true
-          ]
-        ]
-      }
-    },
-    "test_forward_fill_interpolation": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
-      },
-      "expected_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            null,
-            false,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            0.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            2.0,
-            2.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            2.0,
-            2.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            2.0,
-            2.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            2.0,
-            2.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            2.0,
-            7.0,
-            false,
-            true,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            8.0,
-            8.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            8.0,
-            8.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            8.0,
-            false,
-            false,
-            true
-          ]
-        ]
-      }
-    },
-    "test_linear_fill_interpolation": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
-      },
-      "expected_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            null,
-            false,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            1.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            3.0,
-            3.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            4.0,
-            4.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            5.0,
-            5.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            6.0,
-            6.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            7.0,
-            7.0,
-            false,
-            true,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            9.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            10.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            null,
-            false,
-            false,
-            true
-          ]
-        ]
-      }
-    },
-    "test_different_freq_abbreviations": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
-      },
-      "expected_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            null,
-            false,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            1.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            3.0,
-            3.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            4.0,
-            4.0,
-            false,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            5.0,
-            5.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            6.0,
-            6.0,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            7.0,
-            7.0,
-            false,
-            true,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0,
-            false,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            9.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            10.0,
-            null,
-            true,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            null,
-            false,
-            false,
-            true
-          ]
-        ]
-      }
-    },
-    "test_show_interpolated": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
-      },
-      "expected_data": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            1.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            3.0,
-            3.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            4.0,
-            4.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            5.0,
-            5.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            6.0,
-            6.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            7.0,
-            7.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            9.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            10.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            null
-          ]
-        ]
-      }
-    },
-    "test_validate_ts_col_data_type_is_not_timestamp": {
-        "input_data": {
-            "$ref": "#/__SharedData/input_data"
-        }
-    },
-    "test_interpolation_freq_is_none": {
-        "input_data": {
-            "$ref": "#/__SharedData/input_data"
-        }
-    },
-    "test_interpolation_func_is_none": {
-        "input_data": {
-            "$ref": "#/__SharedData/input_data"
-        }
-    },
-    "test_interpolation_func_is_callable": {
-        "input_data": {
-            "$ref": "#/__SharedData/input_data"
-        }
-    },
-    "test_interpolation_freq_is_not_supported_type": {
-        "input_data": {
-            "$ref": "#/__SharedData/input_data"
-        }
-    }
-  },
-  "InterpolationIntegrationTest": {
-    "test_interpolation_using_default_tsdf_params": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      },
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
-      },
-      "expected": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            1.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            3.0,
-            3.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            4.0,
-            4.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            5.0,
-            5.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            6.0,
-            6.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            7.0,
-            7.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            9.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            10.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            null
+      "expected": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              2.0,
+              false,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              2.0,
+              2.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              8.0,
+              7.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              8.0,
+              7.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              8.0,
+              7.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              8.0,
+              7.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              8.0,
+              7.0,
+              false,
+              true,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              11.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              11.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              null,
+              false,
+              false,
+              true
+            ]
           ]
-        ]
+        }
+      }
+    },
+    "test_forward_fill_interpolation": {
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
+      },
+      "expected": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              null,
+              false,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              0.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              2.0,
+              2.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              2.0,
+              2.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              2.0,
+              2.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              2.0,
+              2.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              2.0,
+              7.0,
+              false,
+              true,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              8.0,
+              8.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              8.0,
+              8.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              8.0,
+              false,
+              false,
+              true
+            ]
+          ]
+        }
+      }
+    },
+    "test_linear_fill_interpolation": {
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
+      },
+      "expected": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              null,
+              false,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              1.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              3.0,
+              3.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              4.0,
+              4.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              5.0,
+              5.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              6.0,
+              6.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              7.0,
+              7.0,
+              false,
+              true,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              9.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              10.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              null,
+              false,
+              false,
+              true
+            ]
+          ]
+        }
+      }
+    },
+    "test_different_freq_abbreviations": {
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
+      },
+      "expected": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double, is_ts_interpolated boolean, is_interpolated_value_a boolean, is_interpolated_value_b boolean",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              null,
+              false,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              1.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              3.0,
+              3.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              4.0,
+              4.0,
+              false,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              5.0,
+              5.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              6.0,
+              6.0,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              7.0,
+              7.0,
+              false,
+              true,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0,
+              false,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              9.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              10.0,
+              null,
+              true,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              null,
+              false,
+              false,
+              true
+            ]
+          ]
+        }
+      }
+    },
+    "test_show_interpolated": {
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
+      },
+      "expected": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              1.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              3.0,
+              3.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              4.0,
+              4.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              5.0,
+              5.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              6.0,
+              6.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              7.0,
+              7.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              9.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              10.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              null
+            ]
+          ]
+        }
+      }
+    },
+    "test_validate_ts_col_data_type_is_not_timestamp": {
+        "init": {
+            "$ref": "#/__SharedData/init"
+        }
+    },
+    "test_interpolation_freq_is_none": {
+        "init": {
+            "$ref": "#/__SharedData/init"
+        }
+    },
+    "test_interpolation_func_is_none": {
+        "init": {
+            "$ref": "#/__SharedData/init"
+        }
+    },
+    "test_interpolation_func_is_callable": {
+        "init": {
+            "$ref": "#/__SharedData/init"
+        }
+    },
+    "test_interpolation_freq_is_not_supported_type": {
+        "init": {
+            "$ref": "#/__SharedData/init"
+        }
+    }
+  },
+  "InterpolationIntegrationTest": {
+    "test_interpolation_using_default_tsdf_params": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      },
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
+      },
+      "expected": {
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              1.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              3.0,
+              3.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              4.0,
+              4.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              5.0,
+              5.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              6.0,
+              6.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              7.0,
+              7.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              9.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              10.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              null
+            ]
+          ]
+        }
       }
     },
     "test_interpolation_using_custom_params": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
       },
       "expected": {
-        "schema": "partition_a string, partition_b string, other_ts_col string, value_a double, is_ts_interpolated boolean, is_interpolated_value_a boolean",
-        "ts_col": "other_ts_col",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            1.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            3.0,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            4.0,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            5.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            6.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            7.0,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            9.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            10.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            false,
-            false
+        "df": {
+          "schema": "partition_a string, partition_b string, other_ts_col string, value_a double, is_ts_interpolated boolean, is_interpolated_value_a boolean",
+          "ts_convert": [
+            "other_ts_col"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              1.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              3.0,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              4.0,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              5.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              6.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              7.0,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              9.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              10.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              false,
+              false
+            ]
           ]
-        ]
+        }
       }
     },
     "test_interpolation_on_sampled_data": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
       },
       "expected": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, is_ts_interpolated boolean, is_interpolated_value_a boolean",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            1.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            3.0,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            4.0,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            5.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            6.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            7.0,
-            false,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            false,
-            false
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            9.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            10.0,
-            true,
-            true
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            false,
-            false
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, is_ts_interpolated boolean, is_interpolated_value_a boolean",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              1.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              3.0,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              4.0,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              5.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              6.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              7.0,
+              false,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              false,
+              false
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              9.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              10.0,
+              true,
+              true
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              false,
+              false
+            ]
           ]
-        ]
+        }
       }
     },
     "test_defaults_with_resampled_df": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
       },
       "expected": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "partition_a",
-          "partition_b"
-        ],
-        "data": [
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:00",
-            0.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:30",
-            0.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:00",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:30",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:00",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:30",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:00",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:30",
-            2.0,
-            7.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:00",
-            8.0,
-            8.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:30",
-            8.0,
-            8.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:00",
-            8.0,
-            8.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:30",
-            11.0,
-            8.0
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a double, value_b double",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:00",
+              0.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:30",
+              0.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:00",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:30",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:00",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:30",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:00",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:30",
+              2.0,
+              7.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:00",
+              8.0,
+              8.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:30",
+              8.0,
+              8.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:00",
+              8.0,
+              8.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:30",
+              11.0,
+              8.0
+            ]
           ]
-        ]
+        }
       }
     },
     "test_tsdf_constructor_params_are_updated": {
-      "simple_input_data": {
-        "$ref": "#/__SharedData/simple_input_data"
+      "simple_init": {
+        "$ref": "#/__SharedData/simple_init"
       }
     }
   }

From 981f1ab4ac80d490acde918ee8ae2bf1d03238e3 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 14:20:37 -0600
Subject: [PATCH 124/137] additional checks for idf dataframe equality

---
 python/tests/base.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/python/tests/base.py b/python/tests/base.py
index b6760b14..8538a1ce 100644
--- a/python/tests/base.py
+++ b/python/tests/base.py
@@ -158,9 +158,9 @@ def as_idf(self) -> IntervalsDF:
         """
         sdf = self.as_sdf()
         if self.idf_construct is not None:
-            return getattr(IntervalsDF, self.idf_construct)(sdf, **self.tsdf)
+            return getattr(IntervalsDF, self.idf_construct)(sdf, **self.idf)
         else:
-            return IntervalsDF(self.as_sdf(), **self.tsdf)
+            return IntervalsDF(self.as_sdf(), **self.idf)
 
 
 class SparkTest(unittest.TestCase):
@@ -305,8 +305,8 @@ def assertSchemaContainsField(self, schema, field):
 
     def assertDataFrameEquality(
         self,
-        df1: Union[TSDF, DataFrame],
-        df2: Union[TSDF, DataFrame],
+        df1: Union[TSDF, DataFrame, IntervalsDF],
+        df2: Union[TSDF, DataFrame, IntervalsDF],
         ignore_row_order: bool = False,
         ignore_column_order: bool = True,
         ignore_nullable: bool = True,
@@ -324,6 +324,14 @@ def assertDataFrameEquality(
             df1 = df1.df
             df2 = df2.df
 
+        # Handle IDFs
+        if isinstance(df1, IntervalsDF):
+            # df2 must also be a IntervalsDF
+            self.assertIsInstance(df2, IntervalsDF)
+            # get the underlying Spark DataFrames
+            df1 = df1.df
+            df2 = df2.df
+
         # handle DataFrames
         assert_df_equality(
             df1,

From 55e65ad8f18457d8b837b083fa910e4ee3e01b02 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 14:20:52 -0600
Subject: [PATCH 125/137] refactor intervals_tests

---
 python/tests/intervals_tests.py               |  123 +-
 .../tests/unit_test_data/intervals_tests.json | 1699 ++++++++---------
 2 files changed, 890 insertions(+), 932 deletions(-)

diff --git a/python/tests/intervals_tests.py b/python/tests/intervals_tests.py
index ca0bde7a..805055c1 100644
--- a/python/tests/intervals_tests.py
+++ b/python/tests/intervals_tests.py
@@ -74,7 +74,7 @@ class IntervalsDFTests(SparkTest):
     ]
 
     def test_init_series_str(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         idf = IntervalsDF(df_input, "start_ts", "end_ts", "series_1")
 
@@ -91,7 +91,7 @@ def test_init_series_str(self):
         self.assertCountEqual(idf.metric_columns, ["metric_1", "metric_2"])
 
     def test_init_series_comma_seperated_str(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         idf = IntervalsDF(df_input, "start_ts", "end_ts", "series_1, series_2")
 
@@ -108,7 +108,7 @@ def test_init_series_comma_seperated_str(self):
         self.assertCountEqual(idf.metric_columns, ["metric_1", "metric_2"])
 
     def test_init_series_tuple(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         idf = IntervalsDF(df_input, "start_ts", "end_ts", ("series_1",))
 
@@ -125,7 +125,7 @@ def test_init_series_tuple(self):
         self.assertCountEqual(idf.metric_columns, ["metric_1", "metric_2"])
 
     def test_init_series_list(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         idf = IntervalsDF(df_input, "start_ts", "end_ts", ["series_1"])
 
@@ -142,7 +142,7 @@ def test_init_series_list(self):
         self.assertCountEqual(idf.metric_columns, ["metric_1", "metric_2"])
 
     def test_init_series_none(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         idf = IntervalsDF(df_input, "start_ts", "end_ts", None)
 
@@ -159,7 +159,7 @@ def test_init_series_none(self):
         self.assertCountEqual(idf.metric_columns, ["metric_1", "metric_2"])
 
     def test_init_series_int(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(
             ValueError,
@@ -171,14 +171,12 @@ def test_init_series_int(self):
         )
 
     def test_window_property(self):
-        df_input = self.get_data_as_sdf("input")
-
-        idf = IntervalsDF(df_input, "start_ts", "end_ts", "series_1")
+        idf: IntervalsDF = self.get_test_df_builder("init").as_idf()
 
         self.assertIsInstance(idf.window, pyspark.sql.window.WindowSpec)
 
     def test_fromStackedMetrics_series_str(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(
             ValueError,
@@ -192,7 +190,7 @@ def test_fromStackedMetrics_series_str(self):
         )
 
     def test_fromStackedMetrics_series_tuple(self):
-        df_input = self.get_data_as_sdf("input")
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(
             ValueError,
@@ -206,8 +204,8 @@ def test_fromStackedMetrics_series_tuple(self):
         )
 
     def test_fromStackedMetrics_series_list(self):
-        df_input = self.get_data_as_sdf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        df_input = self.get_test_df_builder("init").as_sdf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         df_input = df_input.withColumn(
             "start_ts", f.to_timestamp("start_ts")
@@ -224,11 +222,11 @@ def test_fromStackedMetrics_series_list(self):
             "metric_value",
         )
 
-        self.assertDataFrameEquality(idf, idf_expected, from_idf=True)
+        self.assertDataFrameEquality(idf, idf_expected)
 
     def test_fromStackedMetrics_metric_names(self):
-        df_input = self.get_data_as_sdf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        df_input = self.get_test_df_builder("init").as_sdf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         df_input = df_input.withColumn(
             "start_ts", f.to_timestamp("start_ts")
@@ -246,21 +244,21 @@ def test_fromStackedMetrics_metric_names(self):
             ["metric_1", "metric_2"],
         )
 
-        self.assertDataFrameEquality(idf, idf_expected, from_idf=True)
+        self.assertDataFrameEquality(idf, idf_expected)
 
     def test_make_disjoint(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
     def test_make_disjoint_contains_interval_already_disjoint(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
         print("expected")
         print(idf_expected.df.toPandas())
 
@@ -269,72 +267,72 @@ def test_make_disjoint_contains_interval_already_disjoint(self):
         print(idf_actual)
 
         # self.assertDataFrameEquality(
-        #     idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+        #     idf_expected, idf_actual, ignore_row_order=True
         # )
 
     def test_make_disjoint_contains_intervals_equal(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
     def test_make_disjoint_intervals_same_start(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
     def test_make_disjoint_intervals_same_end(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
     def test_make_disjoint_multiple_series(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
     def test_make_disjoint_single_metric(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
     def test_make_disjoint_interval_is_subset(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
     def test_union_other_idf(self):
-        idf_input_1 = self.get_data_as_idf("input")
-        idf_input_2 = self.get_data_as_idf("input")
+        idf_input_1 = self.get_test_df_builder("init").as_idf()
+        idf_input_2 = self.get_test_df_builder("init").as_idf()
 
         count_idf_1 = idf_input_1.df.count()
         count_idf_2 = idf_input_2.df.count()
@@ -346,21 +344,21 @@ def test_union_other_idf(self):
         self.assertEqual(count_idf_1 + count_idf_2, count_union)
 
     def test_union_other_df(self):
-        idf_input = self.get_data_as_idf("input")
-        df_input = self.get_data_as_sdf("input")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(TypeError, idf_input.union, df_input)
 
     def test_union_other_list_dicts(self):
-        idf_input = self.get_data_as_idf("input")
+        idf_input = self.get_test_df_builder("init").as_idf()
 
         self.assertRaises(
             TypeError, idf_input.union, IntervalsDFTests.union_tests_dict_input
         )
 
     def test_unionByName_other_idf(self):
-        idf_input_1 = self.get_data_as_idf("input")
-        idf_input_2 = self.get_data_as_idf("input")
+        idf_input_1 = self.get_test_df_builder("init").as_idf()
+        idf_input_2 = self.get_test_df_builder("init").as_idf()
 
         count_idf_1 = idf_input_1.df.count()
         count_idf_2 = idf_input_2.df.count()
@@ -372,41 +370,42 @@ def test_unionByName_other_idf(self):
         self.assertEqual(count_idf_1 + count_idf_2, count_union_by_name)
 
     def test_unionByName_other_df(self):
-        idf_input = self.get_data_as_idf("input")
-        df_input = self.get_data_as_sdf("input")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        df_input = self.get_test_df_builder("init").as_sdf()
 
         self.assertRaises(TypeError, idf_input.unionByName, df_input)
 
     def test_unionByName_other_list_dicts(self):
-        idf_input = self.get_data_as_idf("input")
+        idf_input = self.get_test_df_builder("init").as_idf()
 
         self.assertRaises(
             TypeError, idf_input.unionByName, IntervalsDFTests.union_tests_dict_input
         )
 
     def test_unionByName_extra_column(self):
-        idf_extra_col = self.get_data_as_idf("input_extra_col")
-        idf_input = self.get_data_as_idf("input")
+        idf_extra_col = self.get_test_df_builder("init_extra_col").as_idf()
+        idf_input = self.get_test_df_builder("init").as_idf()
 
         self.assertRaises(AnalysisException, idf_extra_col.unionByName, idf_input)
 
     def test_unionByName_other_extra_column(self):
-        idf_input = self.get_data_as_idf("input")
-        idf_extra_col = self.get_data_as_idf("input_extra_col")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_extra_col = self.get_test_df_builder("init_extra_col").as_idf()
 
         self.assertRaises(AnalysisException, idf_input.unionByName, idf_extra_col)
 
     def test_toDF(self):
-        idf_input = self.get_data_as_idf("input")
-        expected_df = self.get_data_as_sdf("input")
+        # NB: init is used for both since the expected df is the same
+        idf_input = self.get_test_df_builder("init").as_idf()
+        expected_df = self.get_test_df_builder("init").as_sdf()
 
         actual_df = idf_input.toDF()
 
         self.assertDataFrameEquality(actual_df, expected_df)
 
     def test_toDF_stack(self):
-        idf_input = self.get_data_as_idf("input")
-        expected_df = self.get_data_as_sdf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         expected_df = expected_df.withColumn(
             "start_ts", f.to_timestamp("start_ts")
@@ -419,14 +418,14 @@ def test_toDF_stack(self):
     def test_make_disjoint_issue_268(self):
         # https://github.com/databrickslabs/tempo/issues/268
 
-        idf_input = self.get_data_as_idf("input")
-        idf_expected = self.get_data_as_idf("expected")
+        idf_input = self.get_test_df_builder("init").as_idf()
+        idf_expected = self.get_test_df_builder("expected").as_idf()
 
         idf_actual = idf_input.make_disjoint()
         idf_actual.df.show(truncate=False)
 
         self.assertDataFrameEquality(
-            idf_expected, idf_actual, from_idf=True, ignore_row_order=True
+            idf_expected, idf_actual, ignore_row_order=True
         )
 
 
diff --git a/python/tests/unit_test_data/intervals_tests.json b/python/tests/unit_test_data/intervals_tests.json
index 722ddbec..22b01a96 100644
--- a/python/tests/unit_test_data/intervals_tests.json
+++ b/python/tests/unit_test_data/intervals_tests.json
@@ -1,66 +1,22 @@
 {
   "__SharedData": {
     "init": {
-      "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT, metric_2 INT",
-      "other_ts_cols": [
-        "start_ts",
-        "end_ts"
-      ],
-      "start_ts": "start_ts",
-      "end_ts": "end_ts",
-      "series": [
-        "series_1"
-      ],
-      "data": [
-        [
-          "2020-08-01 00:00:09",
-          "2020-08-01 00:00:14",
-          "v1",
-          5,
-          null
-        ],
-        [
-          "2020-08-01 00:00:09",
-          "2020-08-01 00:00:11",
-          "v1",
-          null,
-          0
-        ],
-        [
-          "2020-08-01 00:00:11",
-          "2020-08-01 00:00:12",
-          "v1",
-          null,
-          4
-        ]
-      ]
-    }
-  },
-  "IntervalsDFTests": {
-    "test_init_series_str": {
-      "input": {
-        "$ref": "#/__SharedData/init"
-      }
-    },
-    "test_init_series_comma_seperated_str": {
-      "input": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, series_2 STRING NOT NULL, metric_1 INT, metric_2 INT",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
+      "idf": {
         "start_ts": "start_ts",
         "end_ts": "end_ts",
-        "series": [
-          "series_1",
-          "series_2"
+        "series_ids": ["series_1"]
+      },
+      "df": {
+        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT, metric_2 INT",
+        "ts_convert": [
+          "start_ts",
+          "end_ts"
         ],
         "data": [
           [
             "2020-08-01 00:00:09",
             "2020-08-01 00:00:14",
             "v1",
-            "v2",
             5,
             null
           ],
@@ -68,7 +24,6 @@
             "2020-08-01 00:00:09",
             "2020-08-01 00:00:11",
             "v1",
-            "v2",
             null,
             0
           ],
@@ -76,351 +31,380 @@
             "2020-08-01 00:00:11",
             "2020-08-01 00:00:12",
             "v1",
-            "v2",
             null,
             4
           ]
         ]
       }
+    }
+  },
+  "IntervalsDFTests": {
+    "test_init_series_str": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_init_series_comma_seperated_str": {
+      "init": {
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, series_2 STRING NOT NULL, metric_1 INT, metric_2 INT",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
+          ],
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              "v2",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              "v2",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:12",
+              "v1",
+              "v2",
+              null,
+              4
+            ]
+          ]
+        }
+      }
     },
     "test_init_series_tuple": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_init_series_list": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_init_series_none": {
-      "input": {
-        "$ref": "#/__SharedData/init"
+      "init": {
+        "idf": {
+          "start_ts": "start_ts",
+          "end_ts": "end_ts",
+          "series_ids": []
+        },
+        "df": {
+          "$ref": "#/__SharedData/init/df"
+        }
       }
     },
     "test_init_series_int": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_window_property": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_init_metric_none": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_fromStackedMetrics_series_str": {
-      "input": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_name STRING NOT NULL, metric_value INT NOT NULL",
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            "metric_1",
-            5
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            "metric_2",
-            0
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:12",
-            "v1",
-            "metric_2",
-            4
+      "init": {
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_name STRING NOT NULL, metric_value INT NOT NULL",
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              "metric_1",
+              5
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              "metric_2",
+              0
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:12",
+              "v1",
+              "metric_2",
+              4
+            ]
           ]
-        ]
+        }
       }
     },
     "test_fromStackedMetrics_series_tuple": {
-      "input": {
-        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_str/input"
+      "init": {
+        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_str/init"
       }
     },
     "test_fromStackedMetrics_series_list": {
-      "input": {
-        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_str/input"
+      "init": {
+        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_str/init"
       },
       "expected": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT, metric_2 INT",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "start_ts": "start_ts",
-        "end_ts": "end_ts",
-        "series": [
-          "series_1"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            null,
-            0
+        "idf": {
+          "start_ts": "start_ts",
+          "end_ts": "end_ts",
+          "series_ids": ["series_1"]
+        },
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT, metric_2 INT",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:12",
-            "v1",
-            null,
-            4
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:12",
+              "v1",
+              null,
+              4
+            ]
           ]
-        ]
+        }
       }
     },
     "test_fromStackedMetrics_metric_names": {
-      "input": {
-        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_list/input"
+      "init": {
+        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_list/init"
       },
       "expected": {
         "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_list/expected"
       }
     },
     "test_make_disjoint": {
-      "input": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            null,
-            0
+      "init": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              null,
+              0
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            5,
-            0
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              5,
+              0
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ]
           ]
-        ]
+        }
       }
     },
     "test_make_disjoint_contains_interval_already_disjoint": {
-      "input": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:13",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:12",
-            "v1",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:13",
-            "2020-08-01 00:00:14",
-            "v1",
-            null,
-            4
+      "init": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:13",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:12",
+              "v1",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:13",
+              "2020-08-01 00:00:14",
+              "v1",
+              null,
+              4
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:12",
-            "v1",
-            5,
-            0
-          ],
-          [
-            "2020-08-01 00:00:12",
-            "2020-08-01 00:00:13",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:13",
-            "2020-08-01 00:00:14",
-            "v1",
-            null,
-            4
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:12",
+              "v1",
+              5,
+              0
+            ],
+            [
+              "2020-08-01 00:00:12",
+              "2020-08-01 00:00:13",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:13",
+              "2020-08-01 00:00:14",
+              "v1",
+              null,
+              4
+            ]
           ]
-        ]
+        }
       }
     },
     "test_make_disjoint_contains_intervals_equal": {
-      "input": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:13",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:12",
-            "v1",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:13",
-            "2020-08-01 00:00:14",
-            "v1",
-            null,
-            4
-          ],
-          [
-            "2020-08-01 00:00:13",
-            "2020-08-01 00:00:14",
-            "v1",
-            7,
-            null
+      "init": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:13",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:12",
+              "v1",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:13",
+              "2020-08-01 00:00:14",
+              "v1",
+              null,
+              4
+            ],
+            [
+              "2020-08-01 00:00:13",
+              "2020-08-01 00:00:14",
+              "v1",
+              7,
+              null
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
         },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
+        "df": {
+        "schema": {
+          "$ref": "#/__SharedData/init/df/schema"
         },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
+        "ts_convert": {
+          "$ref": "#/__SharedData/init/df/ts_convert"
         },
         "data": [
           [
@@ -452,637 +436,612 @@
             4
           ]
         ]
+          }
       }
     },
     "test_make_disjoint_intervals_same_start": {
-      "input": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            null,
-            0
+      "init": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              null,
+              0
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            5,
-            0
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              5,
+              0
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ]
           ]
-        ]
+        }
       }
     },
     "test_make_disjoint_intervals_same_end": {
-      "input": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:14",
-            "v1",
-            null,
-            0
+      "init": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:14",
+              "v1",
+              null,
+              0
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            0
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              0
+            ]
           ]
-        ]
+        }
       }
     },
     "test_make_disjoint_multiple_series": {
-      "input": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, series_2 STRING NOT NULL, metric_1 INT, metric_2 INT",
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
+      "init": {
+        "idf": {
+          "start_ts": {
+            "$ref": "#/__SharedData/init/idf/start_ts"
+          },
+          "end_ts": {
+            "$ref": "#/__SharedData/init/idf/end_ts"
+          },
+          "series_ids": [
+            "series_1",
+            "series_2"
+          ]
         },
-        "series": [
-          "series_1",
-          "series_2"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:14",
-            "v1",
-            "foo",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            "bar",
-            3,
-            2
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:13",
-            "v2",
-            "foo",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:12",
-            "v2",
-            "foo",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:13",
-            "2020-08-01 00:00:14",
-            "v2",
-            "foo",
-            null,
-            4
-          ],
-          [
-            "2020-08-01 00:00:13",
-            "2020-08-01 00:00:14",
-            "v2",
-            "foo",
-            6,
-            3
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, series_2 STRING NOT NULL, metric_1 INT, metric_2 INT",
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:14",
+              "v1",
+              "foo",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              "bar",
+              3,
+              2
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:13",
+              "v2",
+              "foo",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:12",
+              "v2",
+              "foo",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:13",
+              "2020-08-01 00:00:14",
+              "v2",
+              "foo",
+              null,
+              4
+            ],
+            [
+              "2020-08-01 00:00:13",
+              "2020-08-01 00:00:14",
+              "v2",
+              "foo",
+              6,
+              3
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_multiple_series/input/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_multiple_series/input/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            "bar",
-            3,
-            2
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            "foo",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            "foo",
-            5,
-            0
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:14",
-            "v1",
-            "foo",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v2",
-            "foo",
-            null,
-            0
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:12",
-            "v2",
-            "foo",
-            5,
-            0
-          ],
-          [
-            "2020-08-01 00:00:12",
-            "2020-08-01 00:00:13",
-            "v2",
-            "foo",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:13",
-            "2020-08-01 00:00:14",
-            "v2",
-            "foo",
-            6,
-            4
+        "idf": {
+          "start_ts": {
+            "$ref": "#/__SharedData/init/idf/start_ts"
+          },
+          "end_ts": {
+            "$ref": "#/__SharedData/init/idf/end_ts"
+          },
+          "series_ids": {
+            "$ref": "#/IntervalsDFTests/test_make_disjoint_multiple_series/init/idf/series_ids"
+          }
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/IntervalsDFTests/test_make_disjoint_multiple_series/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              "bar",
+              3,
+              2
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              "foo",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              "foo",
+              5,
+              0
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:14",
+              "v1",
+              "foo",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v2",
+              "foo",
+              null,
+              0
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:12",
+              "v2",
+              "foo",
+              5,
+              0
+            ],
+            [
+              "2020-08-01 00:00:12",
+              "2020-08-01 00:00:13",
+              "v2",
+              "foo",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:13",
+              "2020-08-01 00:00:14",
+              "v2",
+              "foo",
+              6,
+              4
+            ]
           ]
-        ]
+        }
       }
     },
     "test_make_disjoint_single_metric": {
-      "input": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT",
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:14",
-            "v1",
-            5
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            4
+      "init": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT",
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:14",
+              "v1",
+              5
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              4
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_single_metric/input/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_single_metric/input/data"
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/IntervalsDFTests/test_make_disjoint_single_metric/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": {
+            "$ref": "#/IntervalsDFTests/test_make_disjoint_single_metric/init/df/data"
+          }
         }
       }
     },
     "test_make_disjoint_interval_is_subset": {
-      "input": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            null,
-            0
+      "init": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              null,
+              0
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/__SharedData/init/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/__SharedData/init/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/__SharedData/init/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/__SharedData/init/end_ts"
-        },
-        "series": {
-          "$ref": "#/__SharedData/init/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:10",
-            "v1",
-            5,
-            null
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:11",
-            "v1",
-            5,
-            0
-          ],
-          [
-            "2020-08-01 00:00:11",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/__SharedData/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/__SharedData/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:10",
+              "v1",
+              5,
+              null
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:11",
+              "v1",
+              5,
+              0
+            ],
+            [
+              "2020-08-01 00:00:11",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null
+            ]
           ]
-        ]
+        }
       }
     },
     "test_union_other_idf": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_union_other_df": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_union_other_list_dicts": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_unionByName_other_idf": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_unionByName_other_df": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_unionByName_other_list_dicts": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_unionByName_extra_column": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       },
-      "input_extra_col": {
-        "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT, metric_2 INT, metric_3 INT",
-        "other_ts_cols": [
-          "start_ts",
-          "end_ts"
-        ],
-        "start_ts": "start_ts",
-        "end_ts": "end_ts",
-        "series": [
-          "series_1"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "v1",
-            5,
-            null,
-            1
+      "init_extra_col": {
+        "idf": {
+          "$ref": "#/__SharedData/init/idf"
+        },
+        "df": {
+          "schema": "start_ts STRING NOT NULL, end_ts STRING NOT NULL, series_1 STRING NOT NULL, metric_1 INT, metric_2 INT, metric_3 INT",
+          "ts_convert": [
+            "start_ts",
+            "end_ts"
           ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "v1",
-            null,
-            0,
-            2
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:12",
-            "v1",
-            null,
-            4,
-            3
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "v1",
+              5,
+              null,
+              1
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "v1",
+              null,
+              0,
+              2
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:12",
+              "v1",
+              null,
+              4,
+              3
+            ]
           ]
-        ]
+        }
       }
     },
     "test_unionByName_other_extra_column": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       },
-      "input_extra_col": {
-        "$ref": "#/IntervalsDFTests/test_unionByName_extra_column/input_extra_col"
+      "init_extra_col": {
+        "$ref": "#/IntervalsDFTests/test_unionByName_extra_column/init_extra_col"
       }
     },
     "test_toDF": {
-      "input": {
+      "init": {
         "$ref": "#/__SharedData/init"
       }
     },
     "test_toDF_stack": {
-      "input": {
+      "init": {
         "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_list/expected"
       },
       "expected": {
-        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_list/input"
+        "$ref": "#/IntervalsDFTests/test_fromStackedMetrics_series_list/init"
       }
     },
     "test_make_disjoint_issue_268": {
-      "input": {
-        "schema": "start_timestamp STRING NOT NULL, end_timestamp STRING NOT NULL, id STRING NOT NULL, s1 INT, s2 INT, s3 INT, s4 INT",
-        "other_ts_cols": [
-          "start_timestamp",
-          "end_timestamp"
-        ],
-        "start_ts": "start_timestamp",
-        "end_ts": "end_timestamp",
-        "series": [
-          "id"
-        ],
-        "data": [
-          [
-            "2020-08-01 00:00:14",
-            "2020-08-01 00:00:17",
-            "id123",
-            null,
-            1,
-            null,
-            null
+      "init": {
+        "idf": {
+          "start_ts": "start_timestamp",
+          "end_ts": "end_timestamp",
+          "series_ids": ["id"]
+        },
+        "df": {
+          "schema": "start_timestamp STRING NOT NULL, end_timestamp STRING NOT NULL, id STRING NOT NULL, s1 INT, s2 INT, s3 INT, s4 INT",
+          "ts_convert": [
+            "start_timestamp",
+            "end_timestamp"
           ],
-          [
-            "2020-08-01 00:00:14",
-            "2020-08-01 00:00:16",
-            "id123",
-            null,
-            null,
-            null,
-            1
-          ],
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:11",
-            "id123",
-            1,
-            null,
-            null,
-            null
-          ],
-          [
-            "2020-08-01 00:00:10",
-            "2020-08-01 00:00:16",
-            "id123",
-            1,
-            null,
-            null,
-            null
-          ],
-          [
-            "2020-08-01 00:00:14",
-            "2020-08-01 00:00:21",
-            "id123",
-            null,
-            null,
-            1,
-            null
+          "data": [
+            [
+              "2020-08-01 00:00:14",
+              "2020-08-01 00:00:17",
+              "id123",
+              null,
+              1,
+              null,
+              null
+            ],
+            [
+              "2020-08-01 00:00:14",
+              "2020-08-01 00:00:16",
+              "id123",
+              null,
+              null,
+              null,
+              1
+            ],
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:11",
+              "id123",
+              1,
+              null,
+              null,
+              null
+            ],
+            [
+              "2020-08-01 00:00:10",
+              "2020-08-01 00:00:16",
+              "id123",
+              1,
+              null,
+              null,
+              null
+            ],
+            [
+              "2020-08-01 00:00:14",
+              "2020-08-01 00:00:21",
+              "id123",
+              null,
+              null,
+              1,
+              null
+            ]
           ]
-        ]
+        }
       },
       "expected": {
-        "schema": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/input/schema"
-        },
-        "other_ts_cols": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/input/other_ts_cols"
-        },
-        "start_ts": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/input/start_ts"
-        },
-        "end_ts": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/input/end_ts"
-        },
-        "series": {
-          "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/input/series"
-        },
-        "data": [
-          [
-            "2020-08-01 00:00:09",
-            "2020-08-01 00:00:14",
-            "id123",
-            1,
-            null,
-            null,
-            null
-          ],
-          [
-            "2020-08-01 00:00:14",
-            "2020-08-01 00:00:16",
-            "id123",
-            1,
-            1,
-            1,
-            1
-          ],
-          [
-            "2020-08-01 00:00:16",
-            "2020-08-01 00:00:17",
-            "id123",
-            null,
-            1,
-            1,
-            null
-          ],
-          [
-            "2020-08-01 00:00:17",
-            "2020-08-01 00:00:21",
-            "id123",
-            null,
-            null,
-            1,
-            null
+        "idf": {
+          "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/init/idf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/init/df/schema"
+          },
+          "ts_convert": {
+            "$ref": "#/IntervalsDFTests/test_make_disjoint_issue_268/init/df/ts_convert"
+          },
+          "data": [
+            [
+              "2020-08-01 00:00:09",
+              "2020-08-01 00:00:14",
+              "id123",
+              1,
+              null,
+              null,
+              null
+            ],
+            [
+              "2020-08-01 00:00:14",
+              "2020-08-01 00:00:16",
+              "id123",
+              1,
+              1,
+              1,
+              1
+            ],
+            [
+              "2020-08-01 00:00:16",
+              "2020-08-01 00:00:17",
+              "id123",
+              null,
+              1,
+              1,
+              null
+            ],
+            [
+              "2020-08-01 00:00:17",
+              "2020-08-01 00:00:21",
+              "id123",
+              null,
+              null,
+              1,
+              null
+            ]
           ]
-        ]
+        }
       }
     }
   }

From f1958b259486098ef6c7251645f79abce658b111 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 14:25:18 -0600
Subject: [PATCH 126/137] refactor io_tests

---
 python/tests/io_tests.py                  |   6 +-
 python/tests/unit_test_data/io_tests.json | 130 +++++++++++-----------
 2 files changed, 69 insertions(+), 67 deletions(-)

diff --git a/python/tests/io_tests.py b/python/tests/io_tests.py
index 7a138218..e3edad10 100644
--- a/python/tests/io_tests.py
+++ b/python/tests/io_tests.py
@@ -15,7 +15,7 @@ def test_write_to_delta_without_optimization_cols(self):
         table_name = "my_table_no_optimization_col"
 
         # load test data
-        input_tsdf = self.get_data_as_tsdf("input_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         # test write to delta
         input_tsdf.write(self.spark, table_name)
@@ -30,7 +30,7 @@ def test_write_to_delta_with_optimization_cols(self):
         table_name = "my_table_optimization_col"
 
         # load test data
-        input_tsdf = self.get_data_as_tsdf("input_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         # test write to delta
         input_tsdf.write(self.spark, table_name, ["date"])
@@ -45,7 +45,7 @@ def test_write_to_delta_bad_dbr_environment_logging(self):
         table_name = "my_table_optimization_col_fails"
 
         # load test data
-        input_tsdf = self.get_data_as_tsdf("input_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         if pkg_version.parse(DELTA_VERSION) < pkg_version.parse("2.0.0"):
 
diff --git a/python/tests/unit_test_data/io_tests.json b/python/tests/unit_test_data/io_tests.json
index f8bc9904..ab14eacf 100644
--- a/python/tests/unit_test_data/io_tests.json
+++ b/python/tests/unit_test_data/io_tests.json
@@ -1,83 +1,85 @@
 {
   "__SharedData": {
-    "input_data": {
-      "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-      "ts_col": "event_ts",
-      "partition_cols": [
-        "symbol"
-      ],
-      "data": [
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:00:10",
-          349.21,
-          10.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:00:11",
-          340.21,
-          9.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:01:12",
-          353.32,
-          8.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:01:13",
-          351.32,
-          7.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:01:14",
-          350.32,
-          6.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-09-01 00:01:12",
-          361.1,
-          5.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-09-01 00:19:12",
-          362.1,
-          4.0
+    "init": {
+      "tsdf": {
+        "ts_col": "event_ts",
+        "partition_cols": ["symbol"]
+      },
+      "df": {
+        "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+        "data": [
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:00:10",
+            349.21,
+            10.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:00:11",
+            340.21,
+            9.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:01:12",
+            353.32,
+            8.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:01:13",
+            351.32,
+            7.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:01:14",
+            350.32,
+            6.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-09-01 00:01:12",
+            361.1,
+            5.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-09-01 00:19:12",
+            362.1,
+            4.0
+          ]
         ]
-      ]
+      }
     }
   },
   "DeltaWriteTest": {
     "test_write_to_delta_without_optimization_cols": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_write_to_delta_with_optimization_cols": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_write_to_delta_non_dbr_environment_logging": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_write_to_delta_bad_dbr_environment_logging": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       }
     }
   }

From 21405b842535f46826eeacc0a66c87624ea4573d Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 14:29:05 -0600
Subject: [PATCH 127/137] reindent with 2 spaces for consistency

---
 .../unit_test_data/resample_2_tests.json      | 1100 ++++++++---------
 1 file changed, 550 insertions(+), 550 deletions(-)

diff --git a/python/tests/unit_test_data/resample_2_tests.json b/python/tests/unit_test_data/resample_2_tests.json
index e8c6a40e..17f290b7 100644
--- a/python/tests/unit_test_data/resample_2_tests.json
+++ b/python/tests/unit_test_data/resample_2_tests.json
@@ -1,564 +1,564 @@
 {
-    "ResampleUnitTests": {
-        "test_appendAggKey_freq_is_none": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            }
+  "__SharedData": {
+    "input_data": {
+      "tsdf": {
+        "ts_col": "event_ts",
+        "partition_cols": [
+          "symbol"
+        ]
+      },
+      "df": {
+        "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+        "data": [
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:00:10",
+            349.21,
+            10.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:00:11",
+            340.21,
+            9.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:01:12",
+            353.32,
+            8.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:01:13",
+            351.32,
+            7.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-08-01 00:01:14",
+            350.32,
+            6.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-09-01 00:01:12",
+            361.1,
+            5.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-09-01 00:19:12",
+            362.1,
+            4.0
+          ]
+        ]
+      }
+    }
+  },
+  "ResampleUnitTests": {
+    "test_appendAggKey_freq_is_none": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      }
+    },
+    "test_appendAggKey_freq_microsecond": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      }
+    },
+    "test_appendAggKey_freq_is_invalid": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      }
+    },
+    "test_aggregate_floor": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              349.21,
+              10.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              361.1,
+              5.0
+            ]
+          ]
+        },
+        "$ref": null
+      }
+    },
+    "test_aggregate_average": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              348.8760009765625,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              361.6000061035156,
+              4.5
+            ]
+          ]
         },
-        "test_appendAggKey_freq_microsecond": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            }
+        "$ref": null
+      }
+    },
+    "test_aggregate_min": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              340.21,
+              6.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              361.1,
+              4.0
+            ]
+          ]
         },
-        "test_appendAggKey_freq_is_invalid": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            }
+        "$ref": null
+      }
+    },
+    "test_aggregate_min_with_prefix": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
         },
-        "test_aggregate_floor": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "data": [
-                        [
-                            "S1",
-                            "2020-08-01 00:00:00",
-                            "SAME_DT",
-                            349.21,
-                            10.0
-                        ],
-                        [
-                            "S1",
-                            "2020-09-01 00:00:00",
-                            "SAME_DT",
-                            361.1,
-                            5.0
-                        ]
-                    ]
-                },
-                "$ref": null
-            }
+        "df": {
+          "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
+          "data": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected_data/data"
+          }
         },
-        "test_aggregate_average": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
-                    "data": [
-                        [
-                            "S1",
-                            "2020-08-01 00:00:00",
-                            348.8760009765625,
-                            8.0
-                        ],
-                        [
-                            "S1",
-                            "2020-09-01 00:00:00",
-                            361.6000061035156,
-                            4.5
-                        ]
-                    ]
-                },
-                "$ref": null
-            }
+        "$ref": null
+      }
+    },
+    "test_aggregate_min_with_fill": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
         },
-        "test_aggregate_min": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "data": [
-                        [
-                            "S1",
-                            "2020-08-01 00:00:00",
-                            "SAME_DT",
-                            340.21,
-                            6.0
-                        ],
-                        [
-                            "S1",
-                            "2020-09-01 00:00:00",
-                            "SAME_DT",
-                            361.1,
-                            4.0
-                        ]
-                    ]
-                },
-                "$ref": null
-            }
+        "df": {
+          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              340.21,
+              6.0
+            ],
+            [
+              "S1",
+              "2020-08-02 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-03 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-04 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-05 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-06 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-07 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-08 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-09 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-10 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-11 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-12 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-13 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-14 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-15 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-16 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-17 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-18 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-19 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-20 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-21 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-22 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-23 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-24 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-25 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-26 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-27 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-28 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-29 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-30 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-31 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              361.1,
+              4.0
+            ]
+          ]
         },
-        "test_aggregate_min_with_prefix": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
-                    "data": {
-                        "$ref": "#/ResampleUnitTests/test_aggregate_min/expected_data/data"
-                    }
-                },
-                "$ref": null
-            }
+        "$ref": null
+      }
+    },
+    "test_aggregate_max": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
         },
-        "test_aggregate_min_with_fill": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "data": [
-                        [
-                            "S1",
-                            "2020-08-01 00:00:00",
-                            "SAME_DT",
-                            340.21,
-                            6.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-02 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-03 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-04 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-05 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-06 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-07 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-08 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-09 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-10 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-11 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-12 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-13 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-14 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-15 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-16 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-17 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-18 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-19 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-20 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-21 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-22 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-23 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-24 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-25 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-26 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-27 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-28 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-29 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-30 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-31 00:00:00",
-                            null,
-                            0.0,
-                            0.0
-                        ],
-                        [
-                            "S1",
-                            "2020-09-01 00:00:00",
-                            "SAME_DT",
-                            361.1,
-                            4.0
-                        ]
-                    ]
-                },
-                "$ref": null
-            }
+        "df": {
+          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              353.32,
+              10.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              362.1,
+              5.0
+            ]
+          ]
         },
-        "test_aggregate_max": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "data": [
-                        [
-                            "S1",
-                            "2020-08-01 00:00:00",
-                            "SAME_DT",
-                            353.32,
-                            10.0
-                        ],
-                        [
-                            "S1",
-                            "2020-09-01 00:00:00",
-                            "SAME_DT",
-                            362.1,
-                            5.0
-                        ]
-                    ]
-                },
-                "$ref": null
-            }
+        "$ref": null
+      }
+    },
+    "test_aggregate_ceiling": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
         },
-        "test_aggregate_ceiling": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "data": [
-                        [
-                            "S1",
-                            "2020-08-01 00:00:00",
-                            "SAME_DT",
-                            350.32,
-                            6.0
-                        ],
-                        [
-                            "S1",
-                            "2020-09-01 00:00:00",
-                            "SAME_DT",
-                            362.1,
-                            4.0
-                        ]
-                    ]
-                },
-                "$ref": null
-            }
+        "df": {
+          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              350.32,
+              6.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              362.1,
+              4.0
+            ]
+          ]
         },
-        "test_aggregate_invalid_func_arg": {
-            "input_data": {
-                "tsdf": {},
-                "df": {},
-                "$ref": "#/__SharedData/input_data"
-            },
-            "expected_data": {
-                "tsdf": {
-                    "ts_col": "event_ts",
-                    "partition_cols": [
-                        "symbol"
-                    ]
-                },
-                "df": {
-                    "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-                    "data": [
-                        [
-                            "S1",
-                            "2020-07-31 20:00:00",
-                            "SAME_DT",
-                            348.88,
-                            8.0
-                        ],
-                        [
-                            "S1",
-                            "2020-08-31 20:00:00",
-                            "SAME_DT",
-                            361.6,
-                            4.5
-                        ]
-                    ]
-                },
-                "$ref": null
-            }
-        }
+        "$ref": null
+      }
     },
-    "__SharedData": {
-        "input_data": {
-            "tsdf": {
-                "ts_col": "event_ts",
-                "partition_cols": [
-                    "symbol"
-                ]
-            },
-            "df": {
-                "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-                "data": [
-                    [
-                        "S1",
-                        "SAME_DT",
-                        "2020-08-01 00:00:10",
-                        349.21,
-                        10.0
-                    ],
-                    [
-                        "S1",
-                        "SAME_DT",
-                        "2020-08-01 00:00:11",
-                        340.21,
-                        9.0
-                    ],
-                    [
-                        "S1",
-                        "SAME_DT",
-                        "2020-08-01 00:01:12",
-                        353.32,
-                        8.0
-                    ],
-                    [
-                        "S1",
-                        "SAME_DT",
-                        "2020-08-01 00:01:13",
-                        351.32,
-                        7.0
-                    ],
-                    [
-                        "S1",
-                        "SAME_DT",
-                        "2020-08-01 00:01:14",
-                        350.32,
-                        6.0
-                    ],
-                    [
-                        "S1",
-                        "SAME_DT",
-                        "2020-09-01 00:01:12",
-                        361.1,
-                        5.0
-                    ],
-                    [
-                        "S1",
-                        "SAME_DT",
-                        "2020-09-01 00:19:12",
-                        362.1,
-                        4.0
-                    ]
-                ]
-            }
-        }
+    "test_aggregate_invalid_func_arg": {
+      "input_data": {
+        "tsdf": {},
+        "df": {},
+        "$ref": "#/__SharedData/input_data"
+      },
+      "expected_data": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "data": [
+            [
+              "S1",
+              "2020-07-31 20:00:00",
+              "SAME_DT",
+              348.88,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-08-31 20:00:00",
+              "SAME_DT",
+              361.6,
+              4.5
+            ]
+          ]
+        },
+        "$ref": null
+      }
     }
+  }
 }
\ No newline at end of file

From 79390e7cba5fe4cd4b81458b440b23dfb9cb6c91 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Tue, 9 Jul 2024 15:12:37 -0600
Subject: [PATCH 128/137] refactor resample_2_tests

---
 python/tests/resample_2_tests.py              |  62 +++---
 .../unit_test_data/resample_2_tests.json      | 208 +++++++++---------
 2 files changed, 131 insertions(+), 139 deletions(-)

diff --git a/python/tests/resample_2_tests.py b/python/tests/resample_2_tests.py
index f3ccc8da..accba3f7 100644
--- a/python/tests/resample_2_tests.py
+++ b/python/tests/resample_2_tests.py
@@ -12,23 +12,23 @@
 
 class ResampleUnitTests(SparkTest):
     def test_appendAggKey_freq_is_none(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(TypeError, _appendAggKey, input_tsdf)
 
     def test_appendAggKey_freq_microsecond(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
-        appendAggKey_tuple = _appendAggKey(input_tsdf, "1 MICROSECOND")
-        appendAggKey_tsdf = appendAggKey_tuple[0]
+        append_agg_key_tuple = _appendAggKey(input_tsdf, "1 MICROSECOND")
+        append_agg_key_tsdf = append_agg_key_tuple[0]
 
-        self.assertIsInstance(appendAggKey_tsdf, TSDF)
-        self.assertIn("agg_key", appendAggKey_tsdf.df.columns)
-        self.assertEqual(appendAggKey_tuple[1], "1")
-        self.assertEqual(appendAggKey_tuple[2], "microseconds")
+        self.assertIsInstance(append_agg_key_tsdf, TSDF)
+        self.assertIn("agg_key", append_agg_key_tsdf.df.columns)
+        self.assertEqual(append_agg_key_tuple[1], "1")
+        self.assertEqual(append_agg_key_tuple[2], "microseconds")
 
     def test_appendAggKey_freq_is_invalid(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(
             ValueError,
@@ -38,14 +38,14 @@ def test_appendAggKey_freq_is_invalid(self):
         )
 
     def test_aggregate_floor(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
-        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "floor")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_average(self):
@@ -55,8 +55,8 @@ def test_aggregate_average(self):
         # is this intentional?
         # resample.py -> lines 86 to 87
         # occurring in all `func` arguments but causing null values for "mean"
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
-        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         # explicitly declaring metricCols to remove DATE so that test can pass for now
         aggregate_df = aggregate(
@@ -65,67 +65,67 @@ def test_aggregate_average(self):
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_min(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
-        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "min")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_min_with_prefix(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
-        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "min", prefix="min")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_min_with_fill(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
-        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "min", fill=True)
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_max(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
-        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "max")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_ceiling(self):
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
-        expected_data = self.get_test_df_builder("expected_data").as_sdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "ceil")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_invalid_func_arg(self):
         # TODO : we should not be hitting an UnboundLocalError
-        input_tsdf = self.get_test_df_builder("input_data").as_tsdf()
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(UnboundLocalError, aggregate, input_tsdf, "1 DAY", "average")
 
diff --git a/python/tests/unit_test_data/resample_2_tests.json b/python/tests/unit_test_data/resample_2_tests.json
index 17f290b7..cd429e04 100644
--- a/python/tests/unit_test_data/resample_2_tests.json
+++ b/python/tests/unit_test_data/resample_2_tests.json
@@ -1,6 +1,6 @@
 {
   "__SharedData": {
-    "input_data": {
+    "init": {
       "tsdf": {
         "ts_col": "event_ts",
         "partition_cols": [
@@ -9,6 +9,9 @@
       },
       "df": {
         "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+        "ts_convert": [
+          "event_ts"
+        ],
         "data": [
           [
             "S1",
@@ -65,41 +68,33 @@
   },
   "ResampleUnitTests": {
     "test_appendAggKey_freq_is_none": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_appendAggKey_freq_microsecond": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_appendAggKey_freq_is_invalid": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_aggregate_floor": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
           "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
+          ],
           "data": [
             [
               "S1",
@@ -116,25 +111,22 @@
               5.0
             ]
           ]
-        },
-        "$ref": null
+        }
       }
     },
     "test_aggregate_average": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
           "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
+          "ts_convert": [
+            "event_ts"
+          ],
           "data": [
             [
               "S1",
@@ -149,25 +141,24 @@
               4.5
             ]
           ]
-        },
-        "$ref": null
+        }
       }
     },
     "test_aggregate_min": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
-          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
           "data": [
             [
               "S1",
@@ -184,47 +175,43 @@
               4.0
             ]
           ]
-        },
-        "$ref": null
+        }
       }
     },
     "test_aggregate_min_with_prefix": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
           "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
+          ],
           "data": {
-            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected_data/data"
+            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected/df/data"
           }
-        },
-        "$ref": null
+        }
       }
     },
     "test_aggregate_min_with_fill": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
-          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
           "data": [
             [
               "S1",
@@ -451,25 +438,24 @@
               4.0
             ]
           ]
-        },
-        "$ref": null
+        }
       }
     },
     "test_aggregate_max": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
-          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
           "data": [
             [
               "S1",
@@ -486,25 +472,24 @@
               5.0
             ]
           ]
-        },
-        "$ref": null
+        }
       }
     },
     "test_aggregate_ceiling": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
-          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
           "data": [
             [
               "S1",
@@ -521,25 +506,21 @@
               4.0
             ]
           ]
-        },
-        "$ref": null
+        }
       }
     },
     "test_aggregate_invalid_func_arg": {
-      "input_data": {
-        "tsdf": {},
-        "df": {},
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
+      "expected": {
         "tsdf": {
-          "ts_col": "event_ts",
-          "partition_cols": [
-            "symbol"
-          ]
+          "$ref": "#/__SharedData/init/tsdf"
         },
         "df": {
-          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
           "data": [
             [
               "S1",
@@ -556,9 +537,20 @@
               4.5
             ]
           ]
-        },
-        "$ref": null
+        }
       }
-    }
+    },
+    "test_check_allowable_freq_none": {},
+    "test_check_allowable_freq_microsecond": {},
+    "test_check_allowable_freq_millisecond": {},
+    "test_check_allowable_freq_second": {},
+    "test_check_allowable_freq_minute": {},
+    "test_check_allowable_freq_hour": {},
+    "test_check_allowable_freq_day": {},
+    "test_check_allowable_freq_no_interval": {},
+    "test_check_allowable_freq_exception_not_in_allowable_freqs": {},
+    "test_check_allowable_freq_exception": {},
+    "test_validate_func_exists_type_error": {},
+    "test_validate_func_exists_value_error": {}
   }
 }
\ No newline at end of file

From 8558e908a7ecf7b5659c9ce9a0dba88701a78ff2 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Wed, 10 Jul 2024 11:55:12 -0600
Subject: [PATCH 129/137] refactor resample_tests

---
 python/tests/resample_tests.py                |  62 +-
 .../tests/unit_test_data/resample_tests.json  | 930 ++++++++++--------
 2 files changed, 525 insertions(+), 467 deletions(-)

diff --git a/python/tests/resample_tests.py b/python/tests/resample_tests.py
index 0f41dcfe..accba3f7 100644
--- a/python/tests/resample_tests.py
+++ b/python/tests/resample_tests.py
@@ -12,23 +12,23 @@
 
 class ResampleUnitTests(SparkTest):
     def test_appendAggKey_freq_is_none(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(TypeError, _appendAggKey, input_tsdf)
 
     def test_appendAggKey_freq_microsecond(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
-        appendAggKey_tuple = _appendAggKey(input_tsdf, "1 MICROSECOND")
-        appendAggKey_tsdf = appendAggKey_tuple[0]
+        append_agg_key_tuple = _appendAggKey(input_tsdf, "1 MICROSECOND")
+        append_agg_key_tsdf = append_agg_key_tuple[0]
 
-        self.assertIsInstance(appendAggKey_tsdf, TSDF)
-        self.assertIn("agg_key", appendAggKey_tsdf.df.columns)
-        self.assertEqual(appendAggKey_tuple[1], "1")
-        self.assertEqual(appendAggKey_tuple[2], "microseconds")
+        self.assertIsInstance(append_agg_key_tsdf, TSDF)
+        self.assertIn("agg_key", append_agg_key_tsdf.df.columns)
+        self.assertEqual(append_agg_key_tuple[1], "1")
+        self.assertEqual(append_agg_key_tuple[2], "microseconds")
 
     def test_appendAggKey_freq_is_invalid(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(
             ValueError,
@@ -38,14 +38,14 @@ def test_appendAggKey_freq_is_invalid(self):
         )
 
     def test_aggregate_floor(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
-        expected_data = self.get_data_as_sdf("expected_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "floor")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_average(self):
@@ -55,8 +55,8 @@ def test_aggregate_average(self):
         # is this intentional?
         # resample.py -> lines 86 to 87
         # occurring in all `func` arguments but causing null values for "mean"
-        input_tsdf = self.get_data_as_tsdf("input_data")
-        expected_data = self.get_data_as_sdf("expected_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         # explicitly declaring metricCols to remove DATE so that test can pass for now
         aggregate_df = aggregate(
@@ -65,67 +65,67 @@ def test_aggregate_average(self):
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_min(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
-        expected_data = self.get_data_as_sdf("expected_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "min")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_min_with_prefix(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
-        expected_data = self.get_data_as_sdf("expected_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "min", prefix="min")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_min_with_fill(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
-        expected_data = self.get_data_as_sdf("expected_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "min", fill=True)
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_max(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
-        expected_data = self.get_data_as_sdf("expected_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "max")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_ceiling(self):
-        input_tsdf = self.get_data_as_tsdf("input_data")
-        expected_data = self.get_data_as_sdf("expected_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
         aggregate_df = aggregate(input_tsdf, "1 DAY", "ceil")
 
         self.assertDataFrameEquality(
             aggregate_df,
-            expected_data,
+            expected_df,
         )
 
     def test_aggregate_invalid_func_arg(self):
         # TODO : we should not be hitting an UnboundLocalError
-        input_tsdf = self.get_data_as_tsdf("input_data")
+        input_tsdf = self.get_test_df_builder("init").as_tsdf()
 
         self.assertRaises(UnboundLocalError, aggregate, input_tsdf, "1 DAY", "average")
 
diff --git a/python/tests/unit_test_data/resample_tests.json b/python/tests/unit_test_data/resample_tests.json
index 19b22acb..cd429e04 100644
--- a/python/tests/unit_test_data/resample_tests.json
+++ b/python/tests/unit_test_data/resample_tests.json
@@ -1,498 +1,556 @@
 {
   "__SharedData": {
-    "input_data": {
-      "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-      "ts_col": "event_ts",
-      "partition_cols": [
-        "symbol"
-      ],
-      "data": [
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:00:10",
-          349.21,
-          10.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:00:11",
-          340.21,
-          9.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:01:12",
-          353.32,
-          8.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:01:13",
-          351.32,
-          7.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-08-01 00:01:14",
-          350.32,
-          6.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-09-01 00:01:12",
-          361.1,
-          5.0
-        ],
-        [
-          "S1",
-          "SAME_DT",
-          "2020-09-01 00:19:12",
-          362.1,
-          4.0
-        ]
-      ]
-    }
-  },
-  "ResampleUnitTests": {
-    "test_appendAggKey_freq_is_none": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_appendAggKey_freq_microsecond": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_appendAggKey_freq_is_invalid": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      }
-    },
-    "test_aggregate_floor": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+    "init": {
+      "tsdf": {
         "ts_col": "event_ts",
         "partition_cols": [
           "symbol"
+        ]
+      },
+      "df": {
+        "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+        "ts_convert": [
+          "event_ts"
         ],
         "data": [
           [
             "S1",
-            "2020-08-01 00:00:00",
             "SAME_DT",
+            "2020-08-01 00:00:10",
             349.21,
             10.0
           ],
           [
             "S1",
-            "2020-09-01 00:00:00",
             "SAME_DT",
-            361.1,
-            5.0
-          ]
-        ]
-      }
-    },
-    "test_aggregate_average": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
+            "2020-08-01 00:00:11",
+            340.21,
+            9.0
+          ],
           [
             "S1",
-            "2020-08-01 00:00:00",
-            348.8760009765625,
+            "SAME_DT",
+            "2020-08-01 00:01:12",
+            353.32,
             8.0
           ],
           [
             "S1",
-            "2020-09-01 00:00:00",
-            361.6000061035156,
-            4.5
-          ]
-        ]
-      }
-    },
-    "test_aggregate_min": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
-      },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
+            "SAME_DT",
+            "2020-08-01 00:01:13",
+            351.32,
+            7.0
+          ],
           [
             "S1",
-            "2020-08-01 00:00:00",
             "SAME_DT",
-            340.21,
+            "2020-08-01 00:01:14",
+            350.32,
             6.0
           ],
           [
             "S1",
-            "2020-09-01 00:00:00",
             "SAME_DT",
+            "2020-09-01 00:01:12",
             361.1,
+            5.0
+          ],
+          [
+            "S1",
+            "SAME_DT",
+            "2020-09-01 00:19:12",
+            362.1,
             4.0
           ]
         ]
       }
+    }
+  },
+  "ResampleUnitTests": {
+    "test_appendAggKey_freq_is_none": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_appendAggKey_freq_microsecond": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_appendAggKey_freq_is_invalid": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      }
+    },
+    "test_aggregate_floor": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      },
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              349.21,
+              10.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              361.1,
+              5.0
+            ]
+          ]
+        }
+      }
+    },
+    "test_aggregate_average": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      },
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              348.8760009765625,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              361.6000061035156,
+              4.5
+            ]
+          ]
+        }
+      }
+    },
+    "test_aggregate_min": {
+      "init": {
+        "$ref": "#/__SharedData/init"
+      },
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              340.21,
+              6.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              361.1,
+              4.0
+            ]
+          ]
+        }
+      }
     },
     "test_aggregate_min_with_prefix": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": {
-          "$ref": "#/ResampleUnitTests/test_aggregate_min/expected_data/data"
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected/df/data"
+          }
         }
       }
     },
     "test_aggregate_min_with_fill": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            "SAME_DT",
-            340.21,
-            6.0
-          ],
-          [
-            "S1",
-            "2020-08-02 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-03 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-04 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-05 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-06 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-07 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-08 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-09 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-10 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-11 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-12 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-13 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-14 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-15 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-16 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-17 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-18 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-19 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-20 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-21 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-22 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-23 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-24 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-25 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-26 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-27 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-28 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-29 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-30 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-08-31 00:00:00",
-            null,
-            0.0,
-            0.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:00:00",
-            "SAME_DT",
-            361.1,
-            4.0
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              340.21,
+              6.0
+            ],
+            [
+              "S1",
+              "2020-08-02 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-03 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-04 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-05 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-06 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-07 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-08 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-09 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-10 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-11 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-12 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-13 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-14 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-15 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-16 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-17 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-18 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-19 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-20 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-21 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-22 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-23 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-24 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-25 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-26 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-27 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-28 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-29 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-30 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-08-31 00:00:00",
+              null,
+              0.0,
+              0.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              361.1,
+              4.0
+            ]
           ]
-        ]
+        }
       }
     },
     "test_aggregate_max": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            "SAME_DT",
-            353.32,
-            10.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:00:00",
-            "SAME_DT",
-            362.1,
-            5.0
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              353.32,
+              10.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              362.1,
+              5.0
+            ]
           ]
-        ]
+        }
       }
     },
     "test_aggregate_ceiling": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-08-01 00:00:00",
-            "SAME_DT",
-            350.32,
-            6.0
-          ],
-          [
-            "S1",
-            "2020-09-01 00:00:00",
-            "SAME_DT",
-            362.1,
-            4.0
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "S1",
+              "2020-08-01 00:00:00",
+              "SAME_DT",
+              350.32,
+              6.0
+            ],
+            [
+              "S1",
+              "2020-09-01 00:00:00",
+              "SAME_DT",
+              362.1,
+              4.0
+            ]
           ]
-        ]
+        }
       }
     },
     "test_aggregate_invalid_func_arg": {
-      "input_data": {
-        "$ref": "#/__SharedData/input_data"
+      "init": {
+        "$ref": "#/__SharedData/init"
       },
-      "expected_data": {
-        "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-07-31 20:00:00",
-            "SAME_DT",
-            348.88,
-            8.0
-          ],
-          [
-            "S1",
-            "2020-08-31 20:00:00",
-            "SAME_DT",
-            361.60,
-            4.5
+      "expected": {
+        "tsdf": {
+          "$ref": "#/__SharedData/init/tsdf"
+        },
+        "df": {
+          "schema": {
+            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
+          },
+          "data": [
+            [
+              "S1",
+              "2020-07-31 20:00:00",
+              "SAME_DT",
+              348.88,
+              8.0
+            ],
+            [
+              "S1",
+              "2020-08-31 20:00:00",
+              "SAME_DT",
+              361.6,
+              4.5
+            ]
           ]
-        ]
+        }
       }
-    }
+    },
+    "test_check_allowable_freq_none": {},
+    "test_check_allowable_freq_microsecond": {},
+    "test_check_allowable_freq_millisecond": {},
+    "test_check_allowable_freq_second": {},
+    "test_check_allowable_freq_minute": {},
+    "test_check_allowable_freq_hour": {},
+    "test_check_allowable_freq_day": {},
+    "test_check_allowable_freq_no_interval": {},
+    "test_check_allowable_freq_exception_not_in_allowable_freqs": {},
+    "test_check_allowable_freq_exception": {},
+    "test_validate_func_exists_type_error": {},
+    "test_validate_func_exists_value_error": {}
   }
 }
\ No newline at end of file

From fc609342bdff32d10434ee1846965fd8fc583531 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Wed, 10 Jul 2024 11:57:37 -0600
Subject: [PATCH 130/137] safe delete and refactor to remove resample_2_tests

was identical test cases to resample_tests
---
 python/tests/resample_2_tests.py              | 172 ------
 python/tests/unit_test_data/json-fixer.ipynb  |   9 +-
 .../unit_test_data/resample_2_tests.json      | 556 ------------------
 3 files changed, 3 insertions(+), 734 deletions(-)
 delete mode 100644 python/tests/resample_2_tests.py
 delete mode 100644 python/tests/unit_test_data/resample_2_tests.json

diff --git a/python/tests/resample_2_tests.py b/python/tests/resample_2_tests.py
deleted file mode 100644
index accba3f7..00000000
--- a/python/tests/resample_2_tests.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import unittest
-
-from tempo import TSDF
-from tempo.resample import (
-    _appendAggKey,
-    aggregate,
-    checkAllowableFreq,
-    validateFuncExists,
-)
-from tests.base import SparkTest
-
-
-class ResampleUnitTests(SparkTest):
-    def test_appendAggKey_freq_is_none(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-
-        self.assertRaises(TypeError, _appendAggKey, input_tsdf)
-
-    def test_appendAggKey_freq_microsecond(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-
-        append_agg_key_tuple = _appendAggKey(input_tsdf, "1 MICROSECOND")
-        append_agg_key_tsdf = append_agg_key_tuple[0]
-
-        self.assertIsInstance(append_agg_key_tsdf, TSDF)
-        self.assertIn("agg_key", append_agg_key_tsdf.df.columns)
-        self.assertEqual(append_agg_key_tuple[1], "1")
-        self.assertEqual(append_agg_key_tuple[2], "microseconds")
-
-    def test_appendAggKey_freq_is_invalid(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-
-        self.assertRaises(
-            ValueError,
-            _appendAggKey,
-            input_tsdf,
-            "1 invalid",
-        )
-
-    def test_aggregate_floor(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_df = self.get_test_df_builder("expected").as_sdf()
-
-        aggregate_df = aggregate(input_tsdf, "1 DAY", "floor")
-
-        self.assertDataFrameEquality(
-            aggregate_df,
-            expected_df,
-        )
-
-    def test_aggregate_average(self):
-        # TODO: fix DATE returns `null`
-        # DATE is being included in metricCols when metricCols is None
-        # this occurs for all aggregate functions but causes negative side effects with avg
-        # is this intentional?
-        # resample.py -> lines 86 to 87
-        # occurring in all `func` arguments but causing null values for "mean"
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_df = self.get_test_df_builder("expected").as_sdf()
-
-        # explicitly declaring metricCols to remove DATE so that test can pass for now
-        aggregate_df = aggregate(
-            input_tsdf, "1 DAY", "mean", ["trade_pr", "trade_pr_2"]
-        )
-
-        self.assertDataFrameEquality(
-            aggregate_df,
-            expected_df,
-        )
-
-    def test_aggregate_min(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_df = self.get_test_df_builder("expected").as_sdf()
-
-        aggregate_df = aggregate(input_tsdf, "1 DAY", "min")
-
-        self.assertDataFrameEquality(
-            aggregate_df,
-            expected_df,
-        )
-
-    def test_aggregate_min_with_prefix(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_df = self.get_test_df_builder("expected").as_sdf()
-
-        aggregate_df = aggregate(input_tsdf, "1 DAY", "min", prefix="min")
-
-        self.assertDataFrameEquality(
-            aggregate_df,
-            expected_df,
-        )
-
-    def test_aggregate_min_with_fill(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_df = self.get_test_df_builder("expected").as_sdf()
-
-        aggregate_df = aggregate(input_tsdf, "1 DAY", "min", fill=True)
-
-        self.assertDataFrameEquality(
-            aggregate_df,
-            expected_df,
-        )
-
-    def test_aggregate_max(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_df = self.get_test_df_builder("expected").as_sdf()
-
-        aggregate_df = aggregate(input_tsdf, "1 DAY", "max")
-
-        self.assertDataFrameEquality(
-            aggregate_df,
-            expected_df,
-        )
-
-    def test_aggregate_ceiling(self):
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-        expected_df = self.get_test_df_builder("expected").as_sdf()
-
-        aggregate_df = aggregate(input_tsdf, "1 DAY", "ceil")
-
-        self.assertDataFrameEquality(
-            aggregate_df,
-            expected_df,
-        )
-
-    def test_aggregate_invalid_func_arg(self):
-        # TODO : we should not be hitting an UnboundLocalError
-        input_tsdf = self.get_test_df_builder("init").as_tsdf()
-
-        self.assertRaises(UnboundLocalError, aggregate, input_tsdf, "1 DAY", "average")
-
-    def test_check_allowable_freq_none(self):
-        self.assertRaises(TypeError, checkAllowableFreq, None)
-
-    def test_check_allowable_freq_microsecond(self):
-        self.assertEqual(checkAllowableFreq("1 MICROSECOND"), ("1", "microsec"))
-
-    def test_check_allowable_freq_millisecond(self):
-        self.assertEqual(checkAllowableFreq("1 MILLISECOND"), ("1", "ms"))
-
-    def test_check_allowable_freq_second(self):
-        self.assertEqual(checkAllowableFreq("1 SECOND"), ("1", "sec"))
-
-    def test_check_allowable_freq_minute(self):
-        self.assertEqual(checkAllowableFreq("1 MINUTE"), ("1", "min"))
-
-    def test_check_allowable_freq_hour(self):
-        self.assertEqual(checkAllowableFreq("1 HOUR"), ("1", "hour"))
-
-    def test_check_allowable_freq_day(self):
-        self.assertEqual(checkAllowableFreq("1 DAY"), ("1", "day"))
-
-    def test_check_allowable_freq_no_interval(self):
-        # TODO: should first element return str for consistency?
-        self.assertEqual(checkAllowableFreq("day"), (1, "day"))
-
-    def test_check_allowable_freq_exception_not_in_allowable_freqs(self):
-        self.assertRaises(ValueError, checkAllowableFreq, "wrong")
-
-    def test_check_allowable_freq_exception(self):
-        self.assertRaises(ValueError, checkAllowableFreq, "wrong wrong")
-
-    def test_validate_func_exists_type_error(self):
-        self.assertRaises(TypeError, validateFuncExists, None)
-
-    def test_validate_func_exists_value_error(self):
-        self.assertRaises(ValueError, validateFuncExists, "non-existent")
-
-
-# MAIN
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/tests/unit_test_data/json-fixer.ipynb b/python/tests/unit_test_data/json-fixer.ipynb
index d114d323..7c5a5cb1 100644
--- a/python/tests/unit_test_data/json-fixer.ipynb
+++ b/python/tests/unit_test_data/json-fixer.ipynb
@@ -256,14 +256,11 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 26,
    "metadata": {},
+   "cell_type": "code",
    "outputs": [],
-   "source": [
-    "with open(\"./resample_2_tests.json\", \"w\") as file:\n",
-    "    json.dump(combined, file, indent=4)"
-   ]
+   "execution_count": null,
+   "source": ""
   }
  ],
  "metadata": {
diff --git a/python/tests/unit_test_data/resample_2_tests.json b/python/tests/unit_test_data/resample_2_tests.json
deleted file mode 100644
index cd429e04..00000000
--- a/python/tests/unit_test_data/resample_2_tests.json
+++ /dev/null
@@ -1,556 +0,0 @@
-{
-  "__SharedData": {
-    "init": {
-      "tsdf": {
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ]
-      },
-      "df": {
-        "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
-        "ts_convert": [
-          "event_ts"
-        ],
-        "data": [
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:10",
-            349.21,
-            10.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:00:11",
-            340.21,
-            9.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:12",
-            353.32,
-            8.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:13",
-            351.32,
-            7.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-08-01 00:01:14",
-            350.32,
-            6.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-09-01 00:01:12",
-            361.1,
-            5.0
-          ],
-          [
-            "S1",
-            "SAME_DT",
-            "2020-09-01 00:19:12",
-            362.1,
-            4.0
-          ]
-        ]
-      }
-    }
-  },
-  "ResampleUnitTests": {
-    "test_appendAggKey_freq_is_none": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      }
-    },
-    "test_appendAggKey_freq_microsecond": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      }
-    },
-    "test_appendAggKey_freq_is_invalid": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      }
-    },
-    "test_aggregate_floor": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": "symbol string, event_ts string, date string, trade_pr float, trade_pr_2 float",
-          "ts_convert": [
-            "event_ts"
-          ],
-          "data": [
-            [
-              "S1",
-              "2020-08-01 00:00:00",
-              "SAME_DT",
-              349.21,
-              10.0
-            ],
-            [
-              "S1",
-              "2020-09-01 00:00:00",
-              "SAME_DT",
-              361.1,
-              5.0
-            ]
-          ]
-        }
-      }
-    },
-    "test_aggregate_average": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": "symbol string, event_ts string, trade_pr double, trade_pr_2 double",
-          "ts_convert": [
-            "event_ts"
-          ],
-          "data": [
-            [
-              "S1",
-              "2020-08-01 00:00:00",
-              348.8760009765625,
-              8.0
-            ],
-            [
-              "S1",
-              "2020-09-01 00:00:00",
-              361.6000061035156,
-              4.5
-            ]
-          ]
-        }
-      }
-    },
-    "test_aggregate_min": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": {
-            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
-          },
-          "ts_convert": [
-            "event_ts"
-          ],
-          "data": [
-            [
-              "S1",
-              "2020-08-01 00:00:00",
-              "SAME_DT",
-              340.21,
-              6.0
-            ],
-            [
-              "S1",
-              "2020-09-01 00:00:00",
-              "SAME_DT",
-              361.1,
-              4.0
-            ]
-          ]
-        }
-      }
-    },
-    "test_aggregate_min_with_prefix": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": "symbol string, event_ts string, min_date string, min_trade_pr float, min_trade_pr_2 float",
-          "ts_convert": [
-            "event_ts"
-          ],
-          "data": {
-            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected/df/data"
-          }
-        }
-      }
-    },
-    "test_aggregate_min_with_fill": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": {
-            "$ref": "#/ResampleUnitTests/test_aggregate_min/expected/df/schema"
-          },
-          "ts_convert": [
-            "event_ts"
-          ],
-          "data": [
-            [
-              "S1",
-              "2020-08-01 00:00:00",
-              "SAME_DT",
-              340.21,
-              6.0
-            ],
-            [
-              "S1",
-              "2020-08-02 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-03 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-04 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-05 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-06 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-07 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-08 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-09 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-10 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-11 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-12 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-13 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-14 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-15 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-16 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-17 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-18 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-19 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-20 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-21 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-22 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-23 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-24 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-25 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-26 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-27 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-28 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-29 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-30 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-08-31 00:00:00",
-              null,
-              0.0,
-              0.0
-            ],
-            [
-              "S1",
-              "2020-09-01 00:00:00",
-              "SAME_DT",
-              361.1,
-              4.0
-            ]
-          ]
-        }
-      }
-    },
-    "test_aggregate_max": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": {
-            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
-          },
-          "ts_convert": [
-            "event_ts"
-          ],
-          "data": [
-            [
-              "S1",
-              "2020-08-01 00:00:00",
-              "SAME_DT",
-              353.32,
-              10.0
-            ],
-            [
-              "S1",
-              "2020-09-01 00:00:00",
-              "SAME_DT",
-              362.1,
-              5.0
-            ]
-          ]
-        }
-      }
-    },
-    "test_aggregate_ceiling": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": {
-            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
-          },
-          "ts_convert": [
-            "event_ts"
-          ],
-          "data": [
-            [
-              "S1",
-              "2020-08-01 00:00:00",
-              "SAME_DT",
-              350.32,
-              6.0
-            ],
-            [
-              "S1",
-              "2020-09-01 00:00:00",
-              "SAME_DT",
-              362.1,
-              4.0
-            ]
-          ]
-        }
-      }
-    },
-    "test_aggregate_invalid_func_arg": {
-      "init": {
-        "$ref": "#/__SharedData/init"
-      },
-      "expected": {
-        "tsdf": {
-          "$ref": "#/__SharedData/init/tsdf"
-        },
-        "df": {
-          "schema": {
-            "$ref": "#/ResampleUnitTests/test_aggregate_floor/expected/df/schema"
-          },
-          "data": [
-            [
-              "S1",
-              "2020-07-31 20:00:00",
-              "SAME_DT",
-              348.88,
-              8.0
-            ],
-            [
-              "S1",
-              "2020-08-31 20:00:00",
-              "SAME_DT",
-              361.6,
-              4.5
-            ]
-          ]
-        }
-      }
-    },
-    "test_check_allowable_freq_none": {},
-    "test_check_allowable_freq_microsecond": {},
-    "test_check_allowable_freq_millisecond": {},
-    "test_check_allowable_freq_second": {},
-    "test_check_allowable_freq_minute": {},
-    "test_check_allowable_freq_hour": {},
-    "test_check_allowable_freq_day": {},
-    "test_check_allowable_freq_no_interval": {},
-    "test_check_allowable_freq_exception_not_in_allowable_freqs": {},
-    "test_check_allowable_freq_exception": {},
-    "test_validate_func_exists_type_error": {},
-    "test_validate_func_exists_value_error": {}
-  }
-}
\ No newline at end of file

From 4f2e8f997dc5f3305b8e0785060b48ffc4149ace Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Wed, 10 Jul 2024 13:39:10 -0600
Subject: [PATCH 131/137] refactor utils_tests

---
 python/tempo/utils.py                        |  93 ++--
 python/tests/unit_test_data/utils_tests.json | 542 ++++++++++---------
 python/tests/utils_tests.py                  |  34 +-
 3 files changed, 356 insertions(+), 313 deletions(-)

diff --git a/python/tempo/utils.py b/python/tempo/utils.py
index fbedcca6..812f28aa 100644
--- a/python/tempo/utils.py
+++ b/python/tempo/utils.py
@@ -51,11 +51,11 @@ def _is_capable_of_html_rendering() -> bool:
 
 
 def calculate_time_horizon(
-    df: DataFrame,
-    ts_col: str,
-    freq: str,
-    partition_cols: Optional[List[str]],
-    local_freq_dict: Optional[t_resample.FreqDict] = None,
+        df: DataFrame,
+        ts_col: str,
+        freq: str,
+        partition_cols: Optional[List[str]],
+        local_freq_dict: Optional[t_resample.FreqDict] = None,
 ) -> None:
     # Convert Frequency using resample dictionary
     if local_freq_dict is None:
@@ -63,8 +63,8 @@ def calculate_time_horizon(
     parsed_freq = t_resample.checkAllowableFreq(freq)
     period, unit = parsed_freq[0], parsed_freq[1]
     if t_resample.is_valid_allowed_freq_keys(
-        unit,
-        t_resample.ALLOWED_FREQ_KEYS,
+            unit,
+            t_resample.ALLOWED_FREQ_KEYS,
     ):
         freq = f"{period} {local_freq_dict[unit]}"  # type: ignore[literal-required]
     else:
@@ -175,53 +175,64 @@ def get_display_df(tsdf: t_tsdf.TSDF, k: int) -> DataFrame:
     return tsdf.latest(k).df.orderBy(orderCols)
 
 
-ENV_CAN_RENDER_HTML = _is_capable_of_html_rendering()
+@overload
+def display_improvised(obj: t_tsdf.TSDF) -> None: ...
 
-if (
-    IS_DATABRICKS
-    and not (get_ipython() is None)
-    and ("display" in get_ipython().user_ns.keys())
-):
-    method = get_ipython().user_ns["display"]
 
-    # Under 'display' key in user_ns the original databricks display method is present
-    # to know more refer: /databricks/python_shell/scripts/db_ipykernel_launcher.py
+@overload
+def display_improvised(obj: pandasDataFrame) -> None: ...
+
 
-    @overload
-    def display_improvised(obj: t_tsdf.TSDF) -> None: ...
+@overload
+def display_improvised(obj: DataFrame) -> None: ...
 
-    @overload
-    def display_improvised(obj: pandasDataFrame) -> None: ...
 
-    @overload
-    def display_improvised(obj: DataFrame) -> None: ...
+def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> None:
+    if isinstance(obj, t_tsdf.TSDF):
+        method(get_display_df(obj, k=5))
+    else:
+        method(obj)
 
-    def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> None:
-        if isinstance(obj, t_tsdf.TSDF):
-            method(get_display_df(obj, k=5))
-        else:
-            method(obj)
 
-    display = display_improvised
+@overload
+def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None:
+    ...
 
-elif ENV_CAN_RENDER_HTML:
 
-    @overload
-    def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None: ...
+@overload
+def display_html_improvised(obj: Optional[pandasDataFrame]) -> None:
+    ...
+
 
-    @overload
-    def display_html_improvised(obj: Optional[pandasDataFrame]) -> None: ...
+@overload
+def display_html_improvised(obj: Optional[DataFrame]) -> None:
+    ...
 
-    @overload
-    def display_html_improvised(obj: Optional[DataFrame]) -> None: ...
 
-    def display_html_improvised(
+def display_html_improvised(
         obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]
-    ) -> None:
-        if isinstance(obj, t_tsdf.TSDF):
-            display_html(get_display_df(obj, k=5))
-        else:
-            display_html(obj)
+) -> None:
+    if isinstance(obj, t_tsdf.TSDF):
+        display_html(get_display_df(obj, k=5))
+    else:
+        display_html(obj)
+
+
+ENV_CAN_RENDER_HTML = _is_capable_of_html_rendering()
+
+if (
+        IS_DATABRICKS
+        and not (get_ipython() is None)
+        and ("display" in get_ipython().user_ns.keys())
+):
+    method = get_ipython().user_ns["display"]
+
+    # Under 'display' key in user_ns the original databricks display method is present
+    # to know more refer: /databricks/python_shell/scripts/db_ipykernel_launcher.py
+
+    display = display_improvised
+
+elif ENV_CAN_RENDER_HTML:
 
     display = display_html_improvised
 
diff --git a/python/tests/unit_test_data/utils_tests.json b/python/tests/unit_test_data/utils_tests.json
index d279dffb..727ce41f 100644
--- a/python/tests/unit_test_data/utils_tests.json
+++ b/python/tests/unit_test_data/utils_tests.json
@@ -1,314 +1,345 @@
 {
   "__SharedData": {
-    "init_data": {
-      "schema": "symbol string, event_ts string, trade_pr float",
-      "ts_col": "event_ts",
-      "partition_cols": [
-        "symbol"
-      ],
-      "data": [
-        [
-          "S1",
-          "2020-08-01 00:00:10",
-          349.21
-        ],
-        [
-          "S1",
-          "2020-08-01 00:01:12",
-          351.32
-        ],
-        [
-          "S1",
-          "2020-09-01 00:02:10",
-          361.1
-        ],
-        [
-          "S1",
-          "2020-09-01 00:19:12",
-          362.1
-        ],
-        [
-          "S2",
-          "2020-08-01 00:01:10",
-          743.01
-        ],
-        [
-          "S2",
-          "2020-08-01 00:01:24",
-          751.92
-        ],
-        [
-          "S2",
-          "2020-09-01 00:02:10",
-          761.10
-        ],
-        [
-          "S2",
-          "2020-09-01 00:20:42",
-          762.33
-        ]
-      ]
-    }
-  },
-  "UtilsTest": {
-    "test_calculate_time_horizon": {
-      "simple_input": {
-        "schema": "partition_a string, partition_b string, event_ts string, value_a float, value_b float",
+    "init": {
+      "tsdf": {
         "ts_col": "event_ts",
         "partition_cols": [
-          "partition_a",
-          "partition_b"
+          "symbol"
+        ]
+      },
+      "df": {
+        "schema": "symbol string, event_ts string, trade_pr float",
+        "ts_convert": [
+          "event_ts"
         ],
         "data": [
           [
-            "A",
-            "A-1",
-            "2020-01-01 00:00:10",
-            0.0,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:10",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:01:32",
-            null,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:02:03",
-            null,
-            null
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:03:32",
-            null,
-            7.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:04:12",
-            8.0,
-            8.0
-          ],
-          [
-            "A",
-            "A-1",
-            "2020-01-01 00:05:31",
-            11.0,
-            null
-          ],
-          [
-            "A",
-            "A-2",
-            "2020-01-01 00:00:10",
-            0.0,
-            null
-          ],
-          [
-            "A",
-            "A-2",
-            "2020-01-01 00:01:10",
-            2.0,
-            2.0
-          ],
-          [
-            "A",
-            "A-2",
-            "2020-01-01 00:01:32",
-            null,
-            null
-          ],
-          [
-            "A",
-            "A-2",
-            "2020-01-01 00:02:03",
-            null,
-            null
+            "S1",
+            "2020-08-01 00:00:10",
+            349.21
           ],
           [
-            "A",
-            "A-2",
-            "2020-01-01 00:04:12",
-            8.0,
-            8.0
+            "S1",
+            "2020-08-01 00:01:12",
+            351.32
           ],
           [
-            "A",
-            "A-2",
-            "2020-01-01 00:05:31",
-            11.0,
-            null
+            "S1",
+            "2020-09-01 00:02:10",
+            361.1
           ],
           [
-            "B",
-            "A-2",
-            "2020-01-01 00:01:10",
-            2.0,
-            2.0
+            "S1",
+            "2020-09-01 00:19:12",
+            362.1
           ],
           [
-            "B",
-            "A-2",
-            "2020-01-01 00:01:32",
-            null,
-            null
+            "S2",
+            "2020-08-01 00:01:10",
+            743.01
           ],
           [
-            "B",
-            "A-2",
-            "2020-01-01 00:02:03",
-            null,
-            null
+            "S2",
+            "2020-08-01 00:01:24",
+            751.92
           ],
           [
-            "B",
-            "A-2",
-            "2020-01-01 00:03:32",
-            null,
-            7.0
+            "S2",
+            "2020-09-01 00:02:10",
+            761.10
           ],
           [
-            "B",
-            "A-2",
-            "2020-01-01 00:04:12",
-            8.0,
-            8.0
+            "S2",
+            "2020-09-01 00:20:42",
+            762.33
           ]
         ]
       }
+    }
+  },
+  "UtilsTest": {
+    "test_display": {},
+    "test_calculate_time_horizon": {
+      "init": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "partition_a",
+            "partition_b"
+          ]
+        },
+        "df": {
+          "schema": "partition_a string, partition_b string, event_ts string, value_a float, value_b float",
+          "ts_convert": [
+            "event_ts"
+          ],
+          "data": [
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:00:10",
+              0.0,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:10",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:01:32",
+              null,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:02:03",
+              null,
+              null
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:03:32",
+              null,
+              7.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:04:12",
+              8.0,
+              8.0
+            ],
+            [
+              "A",
+              "A-1",
+              "2020-01-01 00:05:31",
+              11.0,
+              null
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:00:10",
+              0.0,
+              null
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:01:10",
+              2.0,
+              2.0
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:01:32",
+              null,
+              null
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:02:03",
+              null,
+              null
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:04:12",
+              8.0,
+              8.0
+            ],
+            [
+              "A",
+              "A-2",
+              "2020-01-01 00:05:31",
+              11.0,
+              null
+            ],
+            [
+              "B",
+              "A-2",
+              "2020-01-01 00:01:10",
+              2.0,
+              2.0
+            ],
+            [
+              "B",
+              "A-2",
+              "2020-01-01 00:01:32",
+              null,
+              null
+            ],
+            [
+              "B",
+              "A-2",
+              "2020-01-01 00:02:03",
+              null,
+              null
+            ],
+            [
+              "B",
+              "A-2",
+              "2020-01-01 00:03:32",
+              null,
+              7.0
+            ],
+            [
+              "B",
+              "A-2",
+              "2020-01-01 00:04:12",
+              8.0,
+              8.0
+            ]
+          ]
+        }
+      }
     },
     "test_display_html_TSDF": {
       "init": {
-        "$ref": "#/__SharedData/init_data"
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_display_html_dataframe": {
       "init": {
-        "$ref": "#/__SharedData/init_data"
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_display_html_pandas_dataframe": {
       "init": {
-        "$ref": "#/__SharedData/init_data"
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_display_unavailable": {
       "init": {
-        "$ref": "#/__SharedData/init_data"
+        "$ref": "#/__SharedData/init"
       }
     },
     "test_get_display_df": {
       "init": {
-        "$ref": "#/__SharedData/init_data"
+        "$ref": "#/__SharedData/init"
       },
       "expected": {
-        "schema": "symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "data": [
-          [
-            "S1",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S1",
-            "2020-09-01 00:19:12",
-            362.1
-          ],
-          [
-            "S2",
-            "2020-09-01 00:02:10",
-            761.1
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ]
+        },
+        "df": {
+          "schema": "symbol string, event_ts string, trade_pr float",
+          "ts_convert": [
+            "event_ts"
           ],
-          [
-            "S2",
-            "2020-09-01 00:20:42",
-            762.33
+          "data": [
+            [
+              "S1",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S1",
+              "2020-09-01 00:19:12",
+              362.1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:02:10",
+              761.1
+            ],
+            [
+              "S2",
+              "2020-09-01 00:20:42",
+              762.33
+            ]
           ]
-        ]
+        }
       }
     },
     "test_get_display_df_sequence_col": {
       "init": {
-        "schema": "symbol string, secondary_symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "sequence_col": "secondary_symbol",
-        "data": [
-          [
-            "S1",
-            "t1",
-            "2020-08-01 00:00:10",
-            349.21
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
           ],
-          [
-            "S1",
-            "t1",
-            "2020-08-01 00:01:12",
-            351.32
-          ],
-          [
-            "S1",
-            "t2",
-            "2020-09-01 00:02:10",
-            361.1
-          ],
-          [
-            "S1",
-            "t3",
-            "2020-09-01 00:19:12",
-            362.1
-          ],
-          [
-            "S2",
-            "t1",
-            "2020-08-01 00:01:10",
-            743.01
-          ],
-          [
-            "S2",
-            "t2",
-            "2020-08-01 00:01:24",
-            751.92
-          ],
-          [
-            "S2",
-            "t2",
-            "2020-09-01 00:02:10",
-            761.10
-          ],
-          [
-            "S2",
-            "t2",
-            "2020-09-01 00:20:42",
-            762.33
+          "sequence_col": "secondary_symbol"
+        },
+        "df": {
+          "schema": "symbol string, secondary_symbol string, event_ts string, trade_pr float",
+          "ts_convert": ["event_ts"],
+          "data": [
+            [
+              "S1",
+              "t1",
+              "2020-08-01 00:00:10",
+              349.21
+            ],
+            [
+              "S1",
+              "t1",
+              "2020-08-01 00:01:12",
+              351.32
+            ],
+            [
+              "S1",
+              "t2",
+              "2020-09-01 00:02:10",
+              361.1
+            ],
+            [
+              "S1",
+              "t3",
+              "2020-09-01 00:19:12",
+              362.1
+            ],
+            [
+              "S2",
+              "t1",
+              "2020-08-01 00:01:10",
+              743.01
+            ],
+            [
+              "S2",
+              "t2",
+              "2020-08-01 00:01:24",
+              751.92
+            ],
+            [
+              "S2",
+              "t2",
+              "2020-09-01 00:02:10",
+              761.10
+            ],
+            [
+              "S2",
+              "t2",
+              "2020-09-01 00:20:42",
+              762.33
+            ]
           ]
-        ]
+        }
       },
       "expected": {
+        "tsdf": {
+          "ts_col": "event_ts",
+          "partition_cols": [
+            "symbol"
+          ],
+          "sequence_col": "secondary_symbol"
+        },
+        "df": {
         "schema": "symbol string, secondary_symbol string, event_ts string, trade_pr float",
-        "ts_col": "event_ts",
-        "partition_cols": [
-          "symbol"
-        ],
-        "sequence_col": "secondary_symbol",
+        "ts_convert": ["event_ts"],
         "data": [
           [
             "S1",
@@ -335,6 +366,7 @@
             762.33
           ]
         ]
+          }
       }
     }
   }
diff --git a/python/tests/utils_tests.py b/python/tests/utils_tests.py
index 6e634047..2839ee04 100644
--- a/python/tests/utils_tests.py
+++ b/python/tests/utils_tests.py
@@ -1,7 +1,7 @@
 import sys
 import unittest
 from io import StringIO
-from unittest import mock
+from unittest.mock import patch, create_autospec, MagicMock
 
 from tempo.utils import *  # noqa: F403
 from tests.tsdf_tests import SparkTest
@@ -20,17 +20,17 @@ def test_display(self):
         else:
             self.assertEqual(id(display), id(display_unavailable))
 
-    @mock.patch.dict(os.environ, {"TZ": "UTC"})
+    @patch.dict(os.environ, {"TZ": "UTC"})
     def test_calculate_time_horizon(self):
         """Test calculate time horizon warning and number of expected output rows"""
 
         # fetch test data
-        simple_input_tsdf = self.get_data_as_tsdf("simple_input")
+        tsdf = self.get_test_df_builder("init").as_tsdf()
 
         with warnings.catch_warnings(record=True) as w:
             calculate_time_horizon(
-                simple_input_tsdf.df,
-                simple_input_tsdf.ts_col,
+                tsdf.df,
+                tsdf.ts_col,
                 "30 seconds",
                 ["partition_a", "partition_b"],
             )
@@ -49,10 +49,10 @@ def test_calculate_time_horizon(self):
             assert warning_message.strip() == str(w[-1].message).strip()
 
     def test_display_html_TSDF(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        tsdf = self.get_test_df_builder("init").as_tsdf()
 
         with self.assertLogs(level="ERROR") as error_captured:
-            display_html(init_tsdf)
+            display_html(tsdf)
 
         self.assertEqual(len(error_captured.records), 1)
         self.assertEqual(
@@ -61,11 +61,11 @@ def test_display_html_TSDF(self):
         )
 
     def test_display_html_dataframe(self):
-        init_tsdf = self.get_data_as_tsdf("init")
+        sdf = self.get_test_df_builder("init").as_sdf()
 
         captured_output = StringIO()
         sys.stdout = captured_output
-        display_html(init_tsdf.df)
+        display_html(sdf)
         self.assertEqual(
             captured_output.getvalue(),
             (
@@ -87,8 +87,8 @@ def test_display_html_dataframe(self):
         )
 
     def test_display_html_pandas_dataframe(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        pandas_dataframe = init_tsdf.df.toPandas()
+        sdf = self.get_test_df_builder("init").as_sdf()
+        pandas_dataframe = sdf.toPandas()
 
         captured_output = StringIO()
         sys.stdout = captured_output
@@ -120,18 +120,18 @@ def test_display_unavailable(self):
         )
 
     def test_get_display_df(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_df = self.get_data_as_sdf("expected")
+        init = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
-        actual_df = get_display_df(init_tsdf, 2)
+        actual_df = get_display_df(init, 2)
 
         self.assertDataFrameEquality(actual_df, expected_df)
 
     def test_get_display_df_sequence_col(self):
-        init_tsdf = self.get_data_as_tsdf("init")
-        expected_df = self.get_data_as_sdf("expected")
+        init = self.get_test_df_builder("init").as_tsdf()
+        expected_df = self.get_test_df_builder("expected").as_sdf()
 
-        actual_df = get_display_df(init_tsdf, 2)
+        actual_df = get_display_df(init, 2)
 
         self.assertDataFrameEquality(actual_df, expected_df)
 

From a0955850540f4dab850f5f099bfec7f640effd6f Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Wed, 10 Jul 2024 13:40:46 -0600
Subject: [PATCH 132/137] chore: tox lint

---
 python/tempo/utils.py | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/python/tempo/utils.py b/python/tempo/utils.py
index 812f28aa..a45a52e5 100644
--- a/python/tempo/utils.py
+++ b/python/tempo/utils.py
@@ -51,11 +51,11 @@ def _is_capable_of_html_rendering() -> bool:
 
 
 def calculate_time_horizon(
-        df: DataFrame,
-        ts_col: str,
-        freq: str,
-        partition_cols: Optional[List[str]],
-        local_freq_dict: Optional[t_resample.FreqDict] = None,
+    df: DataFrame,
+    ts_col: str,
+    freq: str,
+    partition_cols: Optional[List[str]],
+    local_freq_dict: Optional[t_resample.FreqDict] = None,
 ) -> None:
     # Convert Frequency using resample dictionary
     if local_freq_dict is None:
@@ -63,8 +63,8 @@ def calculate_time_horizon(
     parsed_freq = t_resample.checkAllowableFreq(freq)
     period, unit = parsed_freq[0], parsed_freq[1]
     if t_resample.is_valid_allowed_freq_keys(
-            unit,
-            t_resample.ALLOWED_FREQ_KEYS,
+        unit,
+        t_resample.ALLOWED_FREQ_KEYS,
     ):
         freq = f"{period} {local_freq_dict[unit]}"  # type: ignore[literal-required]
     else:
@@ -195,22 +195,19 @@ def display_improvised(obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]) -> N
 
 
 @overload
-def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None:
-    ...
+def display_html_improvised(obj: Optional[t_tsdf.TSDF]) -> None: ...
 
 
 @overload
-def display_html_improvised(obj: Optional[pandasDataFrame]) -> None:
-    ...
+def display_html_improvised(obj: Optional[pandasDataFrame]) -> None: ...
 
 
 @overload
-def display_html_improvised(obj: Optional[DataFrame]) -> None:
-    ...
+def display_html_improvised(obj: Optional[DataFrame]) -> None: ...
 
 
 def display_html_improvised(
-        obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]
+    obj: Union[t_tsdf.TSDF, pandasDataFrame, DataFrame]
 ) -> None:
     if isinstance(obj, t_tsdf.TSDF):
         display_html(get_display_df(obj, k=5))
@@ -221,9 +218,9 @@ def display_html_improvised(
 ENV_CAN_RENDER_HTML = _is_capable_of_html_rendering()
 
 if (
-        IS_DATABRICKS
-        and not (get_ipython() is None)
-        and ("display" in get_ipython().user_ns.keys())
+    IS_DATABRICKS
+    and not (get_ipython() is None)
+    and ("display" in get_ipython().user_ns.keys())
 ):
     method = get_ipython().user_ns["display"]
 

From 3cb8cd3b4c30b9f10800de11a431f3f3a58387b6 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Wed, 10 Jul 2024 14:04:50 -0600
Subject: [PATCH 133/137] fix missing ts_convert keys in json

---
 python/tests/unit_test_data/io_tests.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/tests/unit_test_data/io_tests.json b/python/tests/unit_test_data/io_tests.json
index ab14eacf..0321bd14 100644
--- a/python/tests/unit_test_data/io_tests.json
+++ b/python/tests/unit_test_data/io_tests.json
@@ -7,6 +7,7 @@
       },
       "df": {
         "schema": "symbol string, date string, event_ts string, trade_pr float, trade_pr_2 float",
+        "ts_convert": ["event_ts"],
         "data": [
           [
             "S1",

From 1ac339178244751ef9ec38475dfd8691c49f3445 Mon Sep 17 00:00:00 2001
From: Lorin <lorin.dawson@databricks.com>
Date: Wed, 10 Jul 2024 19:28:45 -0600
Subject: [PATCH 134/137] adjust build release to only trigger when tag is
 pushed to master

---
 .github/workflows/build-release.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index 3035371c..3ad30f5f 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -1,10 +1,11 @@
 name: build-release
 
 on:
-  pull_request:
-    types: [opened, synchronize]
   push:
-    branches: ['master']
+    branches:
+      - master
+    tags:
+      - 'v*' # only release a versioned tag, such as v.X.Y.Z
 
 jobs:
   release:

From 8ffdce7fead48385ba48d9550f58910fa9eb4d1d Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Thu, 11 Jul 2024 10:30:04 -0700
Subject: [PATCH 135/137] Revert "adjust build release to only trigger when tag
 is pushed to master"

This reverts commit 1ac339178244751ef9ec38475dfd8691c49f3445.
---
 .github/workflows/build-release.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index 3ad30f5f..3035371c 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -1,11 +1,10 @@
 name: build-release
 
 on:
+  pull_request:
+    types: [opened, synchronize]
   push:
-    branches:
-      - master
-    tags:
-      - 'v*' # only release a versioned tag, such as v.X.Y.Z
+    branches: ['master']
 
 jobs:
   release:

From 60f9fc8aca8815ae7d4d9a6320debf3fe859cd16 Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Thu, 11 Jul 2024 11:04:31 -0700
Subject: [PATCH 136/137] need to update CodeQL action version

---
 .github/workflows/test.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8ee69b2e..4c68cecd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -47,7 +47,7 @@ jobs:
       uses: actions/checkout@v2
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -57,7 +57,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v1
+      uses: github/codeql-action/autobuild@v3
     # ℹ️ Command-line programs to run using the OS shell.
     # 📚 https://git.io/JvXDl
     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
@@ -67,7 +67,7 @@ jobs:
     #   make bootstrap
     #   make release
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
+      uses: github/codeql-action/analyze@v3
 
   test:
     needs: lint-and-check

From 06f93ac61b94a9ce9a224884d6fc7d4f9f54e62d Mon Sep 17 00:00:00 2001
From: Tristan Nixon <tristan.nixon@databricks.com>
Date: Thu, 11 Jul 2024 11:23:17 -0700
Subject: [PATCH 137/137] updating to latest actions versions

---
 .github/workflows/build-release.yml | 6 +++---
 .github/workflows/test.yml          | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
index add27729..877ecfd7 100644
--- a/.github/workflows/build-release.yml
+++ b/.github/workflows/build-release.yml
@@ -23,7 +23,7 @@ jobs:
           fetch-tags: true
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.10'
 
@@ -51,7 +51,7 @@ jobs:
           fetch-tags: true
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.9'
 
@@ -65,7 +65,7 @@ jobs:
         run: tox -e build-docs
 
       - name: Upload artifacts
-        uses: actions/upload-artifact@v1
+        uses: actions/upload-artifact@v4
         with:
           name: html-docs
           path: docs/_build/html/
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4c68cecd..6c151e5c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
         fetch-depth: 0
         fetch-tags: true
     - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: '3.10'
     - name: Install dependencies
@@ -44,7 +44,7 @@ jobs:
         # Learn more about CodeQL language support at https://git.io/codeql-language-support
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
       uses: github/codeql-action/init@v3
@@ -90,7 +90,7 @@ jobs:
         fetch-depth: 0
         fetch-tags: true
     - name: Set up Python ${{ matrix.config.py }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.config.py }}
     - name: Install dependencies