From 5251b5dbb254732ea730bab664ad319bd5be47e7 Mon Sep 17 00:00:00 2001
From: Lingqing Gan <lingqing.gan@gmail.com>
Date: Thu, 18 Apr 2024 13:14:56 -0700
Subject: [PATCH] feat: support RANGE in queries Part 2: Arrow (#1868)

* feat: support range in queries as dict

* fix sys tests

* lint

* add arrow support

* fix python 3.7 test error

* print dependencies in sys test

* add unit test and docs

* fix unit test

* add func docs

* add sys test for tabledata.list in arrow

* add sys test for tabledata.list as iterator

* lint

* fix docs error

* fix docstring

* fix docstring

* fix docstring

* docs

* docs

* docs

* move dtypes mapping code

* address comment

* address comment

* fix pytest error

* Revert "move dtypes mapping code"

This reverts commit c46c65c822b3c8295d5d6650b1c9c97d35d2ba5b.

* remove commented out assertions

* typo and formats

* add None-check for range_element_type and add unit tests

* change test skip condition

* fix test error

* change test skip condition

* change test skip condition

* change decorator order

* use a different way to construct test data

* fix error message and add warning number check

* add warning number check and comments
---
 google/cloud/bigquery/_helpers.py        |  16 ++-
 google/cloud/bigquery/_pandas_helpers.py |  33 ++++++
 google/cloud/bigquery/dbapi/_helpers.py  |  14 ++-
 google/cloud/bigquery/enums.py           |   9 ++
 google/cloud/bigquery/job/query.py       |  67 +++++++++++
 google/cloud/bigquery/query.py           |  11 +-
 google/cloud/bigquery/table.py           | 137 +++++++++++++++++++++++
 noxfile.py                               |   3 +
 tests/data/scalars.csv                   |   2 +
 tests/data/scalars_schema_csv.json       |  10 ++
 tests/system/conftest.py                 |  22 +++-
 tests/system/test_arrow.py               |  27 +++++
 tests/system/test_list_rows.py           |  14 +++
 tests/unit/test__pandas_helpers.py       |  61 ++++++++++
 tests/unit/test_table.py                 | 115 ++++++++++++++++++-
 15 files changed, 516 insertions(+), 25 deletions(-)
 create mode 100644 tests/data/scalars.csv
 create mode 100644 tests/data/scalars_schema_csv.json

diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
index 0572867d7..083eb9f9d 100644
--- a/google/cloud/bigquery/_helpers.py
+++ b/google/cloud/bigquery/_helpers.py
@@ -66,6 +66,8 @@
 _UNIVERSE_DOMAIN_ENV = "GOOGLE_CLOUD_UNIVERSE_DOMAIN"
 """Environment variable for setting universe domain."""
 
+_SUPPORTED_RANGE_ELEMENTS = {"TIMESTAMP", "DATETIME", "DATE"}
+
 
 def _get_client_universe(
     client_options: Optional[Union[client_options_lib.ClientOptions, dict]]
@@ -310,17 +312,13 @@ def _json_from_json(value, field):
 
 
 def _range_element_from_json(value, field):
-    """Coerce 'value' to a range element value, if set or not nullable."""
+    """Coerce 'value' to a range element value."""
     if value == "UNBOUNDED":
         return None
-    elif field.element_type == "DATE":
-        return _date_from_json(value, None)
-    elif field.element_type == "DATETIME":
-        return _datetime_from_json(value, None)
-    elif field.element_type == "TIMESTAMP":
-        return _timestamp_from_json(value, None)
+    if field.element_type in _SUPPORTED_RANGE_ELEMENTS:
+        return _CELLDATA_FROM_JSON[field.element_type](value, field.element_type)
     else:
-        raise ValueError(f"Unsupported range field type: {value}")
+        raise ValueError(f"Unsupported range element type: {field.element_type}")
 
 
 def _range_from_json(value, field):
@@ -344,7 +342,7 @@ def _range_from_json(value, field):
             end = _range_element_from_json(end, field.range_element_type)
             return {"start": start, "end": end}
         else:
-            raise ValueError(f"Unknown range format: {value}")
+            raise ValueError(f"Unknown format for range value: {value}")
     else:
         return None
 
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 3b58d3736..8395478fb 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -142,6 +142,17 @@ def bq_to_arrow_struct_data_type(field):
     return pyarrow.struct(arrow_fields)
 
 
+def bq_to_arrow_range_data_type(field):
+    if field is None:
+        raise ValueError(
+            "Range element type cannot be None, must be one of "
+            "DATE, DATETIME, or TIMESTAMP"
+        )
+    element_type = field.element_type.upper()
+    arrow_element_type = _pyarrow_helpers.bq_to_arrow_scalars(element_type)()
+    return pyarrow.struct([("start", arrow_element_type), ("end", arrow_element_type)])
+
+
 def bq_to_arrow_data_type(field):
     """Return the Arrow data type, corresponding to a given BigQuery column.
 
@@ -160,6 +171,9 @@ def bq_to_arrow_data_type(field):
     if field_type_upper in schema._STRUCT_TYPES:
         return bq_to_arrow_struct_data_type(field)
 
+    if field_type_upper == "RANGE":
+        return bq_to_arrow_range_data_type(field.range_element_type)
+
     data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper)
     if data_type_constructor is None:
         return None
@@ -220,6 +234,9 @@ def default_types_mapper(
     datetime_dtype: Union[Any, None] = None,
     time_dtype: Union[Any, None] = None,
     timestamp_dtype: Union[Any, None] = None,
+    range_date_dtype: Union[Any, None] = None,
+    range_datetime_dtype: Union[Any, None] = None,
+    range_timestamp_dtype: Union[Any, None] = None,
 ):
     """Create a mapping from pyarrow types to pandas types.
 
@@ -274,6 +291,22 @@ def types_mapper(arrow_data_type):
         elif time_dtype is not None and pyarrow.types.is_time(arrow_data_type):
             return time_dtype
 
+        elif pyarrow.types.is_struct(arrow_data_type):
+            if range_datetime_dtype is not None and arrow_data_type.equals(
+                range_datetime_dtype.pyarrow_dtype
+            ):
+                return range_datetime_dtype
+
+            elif range_date_dtype is not None and arrow_data_type.equals(
+                range_date_dtype.pyarrow_dtype
+            ):
+                return range_date_dtype
+
+            elif range_timestamp_dtype is not None and arrow_data_type.equals(
+                range_timestamp_dtype.pyarrow_dtype
+            ):
+                return range_timestamp_dtype
+
     return types_mapper
 
 
diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py
index 117fa8ae7..a4ab05ce8 100644
--- a/google/cloud/bigquery/dbapi/_helpers.py
+++ b/google/cloud/bigquery/dbapi/_helpers.py
@@ -277,12 +277,14 @@ def complex_query_parameter(
         param = query.ArrayQueryParameter(
             name,
             sub_type,
-            value
-            if isinstance(sub_type, query.ScalarQueryParameterType)
-            else [
-                complex_query_parameter(None, v, sub_type._complex__src, base)
-                for v in value
-            ],
+            (
+                value
+                if isinstance(sub_type, query.ScalarQueryParameterType)
+                else [
+                    complex_query_parameter(None, v, sub_type._complex__src, base)
+                    for v in value
+                ]
+            ),
         )
     elif type_type == STRUCT:
         if not isinstance(value, collections_abc.Mapping):
diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py
index 1abe28381..d8cbe9969 100644
--- a/google/cloud/bigquery/enums.py
+++ b/google/cloud/bigquery/enums.py
@@ -99,6 +99,15 @@ class DefaultPandasDTypes(enum.Enum):
     TIME_DTYPE = object()
     """Specifies default time dtype"""
 
+    RANGE_DATE_DTYPE = object()
+    """Specifies default range date dtype"""
+
+    RANGE_DATETIME_DTYPE = object()
+    """Specifies default range datetime dtype"""
+
+    RANGE_TIMESTAMP_DTYPE = object()
+    """Specifies default range timestamp dtype"""
+
 
 class DestinationFormat(object):
     """The exported file format. The default value is :attr:`CSV`.
diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index 7436b6013..09a69e11c 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -1784,6 +1784,13 @@ def to_dataframe(
         datetime_dtype: Union[Any, None] = None,
         time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE,
         timestamp_dtype: Union[Any, None] = None,
+        range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE,
+        range_datetime_dtype: Union[
+            Any, None
+        ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE,
+        range_timestamp_dtype: Union[
+            Any, None
+        ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE,
     ) -> "pandas.DataFrame":
         """Return a pandas DataFrame from a QueryJob
 
@@ -1919,6 +1926,63 @@ def to_dataframe(
 
                 .. versionadded:: 3.10.0
 
+            range_date_dtype (Optional[pandas.Series.dtype, None]):
+                If set, indicate a pandas ExtensionDtype, such as:
+
+                .. code-block:: python
+
+                    pandas.ArrowDtype(pyarrow.struct(
+                        [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+                    ))
+
+                to convert BigQuery RANGE<DATE> type, instead of relying on
+                the default ``object``. If you explicitly set the value to
+                ``None``, the data type will be ``object``. BigQuery Range type
+                can be found at:
+                https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+                .. versionadded:: 3.21.0
+
+            range_datetime_dtype (Optional[pandas.Series.dtype, None]):
+                If set, indicate a pandas ExtensionDtype, such as:
+
+                .. code-block:: python
+
+                    pandas.ArrowDtype(pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us")),
+                            ("end", pyarrow.timestamp("us")),
+                        ]
+                    ))
+
+                to convert BigQuery RANGE<DATETIME> type, instead of relying on
+                the default ``object``. If you explicitly set the value to
+                ``None``, the data type will be ``object``. BigQuery Range type
+                can be found at:
+                https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+                .. versionadded:: 3.21.0
+
+            range_timestamp_dtype (Optional[pandas.Series.dtype, None]):
+                If set, indicate a pandas ExtensionDtype, such as:
+
+                .. code-block:: python
+
+                    pandas.ArrowDtype(pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us", tz="UTC")),
+                            ("end", pyarrow.timestamp("us", tz="UTC")),
+                        ]
+                    ))
+
+                to convert BigQuery RANGE<TIMESTAMP> type, instead of relying
+                on the default ``object``. If you explicitly set the value to
+                ``None``, the data type will be ``object``. BigQuery Range type
+                can be found at:
+                https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+                .. versionadded:: 3.21.0
+
         Returns:
             pandas.DataFrame:
                 A :class:`~pandas.DataFrame` populated with row data
@@ -1949,6 +2013,9 @@ def to_dataframe(
             datetime_dtype=datetime_dtype,
             time_dtype=time_dtype,
             timestamp_dtype=timestamp_dtype,
+            range_date_dtype=range_date_dtype,
+            range_datetime_dtype=range_datetime_dtype,
+            range_timestamp_dtype=range_timestamp_dtype,
         )
 
     # If changing the signature of this method, make sure to apply the same
diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py
index 9c9402b74..9c59056fd 100644
--- a/google/cloud/bigquery/query.py
+++ b/google/cloud/bigquery/query.py
@@ -24,14 +24,13 @@
 from google.cloud.bigquery._helpers import _rows_from_json
 from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON
 from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM
+from google.cloud.bigquery._helpers import _SUPPORTED_RANGE_ELEMENTS
 
 
 _SCALAR_VALUE_TYPE = Optional[
     Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]
 ]
 
-_RANGE_ELEMENT_TYPE_STR = {"TIMESTAMP", "DATETIME", "DATE"}
-
 
 class ConnectionProperty:
     """A connection-level property to customize query behavior.
@@ -388,14 +387,14 @@ def _parse_range_element_type(self, type_):
             google.cloud.bigquery.query.ScalarQueryParameterType: Instance
         """
         if isinstance(type_, str):
-            if type_ not in _RANGE_ELEMENT_TYPE_STR:
+            if type_ not in _SUPPORTED_RANGE_ELEMENTS:
                 raise ValueError(
                     "If given as a string, range element type must be one of "
                     "'TIMESTAMP', 'DATE', or 'DATETIME'."
                 )
             return ScalarQueryParameterType(type_)
         elif isinstance(type_, ScalarQueryParameterType):
-            if type_._type not in _RANGE_ELEMENT_TYPE_STR:
+            if type_._type not in _SUPPORTED_RANGE_ELEMENTS:
                 raise ValueError(
                     "If given as a ScalarQueryParameter object, range element "
                     "type must be one of 'TIMESTAMP', 'DATE', or 'DATETIME' "
@@ -960,14 +959,14 @@ class RangeQueryParameter(_AbstractQueryParameter):
     @classmethod
     def _parse_range_element_type(self, range_element_type):
         if isinstance(range_element_type, str):
-            if range_element_type not in _RANGE_ELEMENT_TYPE_STR:
+            if range_element_type not in _SUPPORTED_RANGE_ELEMENTS:
                 raise ValueError(
                     "If given as a string, range_element_type must be one of "
                     f"'TIMESTAMP', 'DATE', or 'DATETIME'. Got {range_element_type}."
                 )
             return RangeQueryParameterType(range_element_type)
         elif isinstance(range_element_type, RangeQueryParameterType):
-            if range_element_type.type_._type not in _RANGE_ELEMENT_TYPE_STR:
+            if range_element_type.type_._type not in _SUPPORTED_RANGE_ELEMENTS:
                 raise ValueError(
                     "If given as a RangeQueryParameterType object, "
                     "range_element_type must be one of 'TIMESTAMP', 'DATE', "
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 73e755e9e..2f07bcc78 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -2044,6 +2044,13 @@ def to_dataframe(
         datetime_dtype: Union[Any, None] = None,
         time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE,
         timestamp_dtype: Union[Any, None] = None,
+        range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE,
+        range_datetime_dtype: Union[
+            Any, None
+        ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE,
+        range_timestamp_dtype: Union[
+            Any, None
+        ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE,
     ) -> "pandas.DataFrame":
         """Create a pandas DataFrame by loading all pages of a query.
 
@@ -2183,6 +2190,63 @@ def to_dataframe(
 
                 .. versionadded:: 3.10.0
 
+            range_date_dtype (Optional[pandas.Series.dtype, None]):
+                If set, indicate a pandas ExtensionDtype, such as:
+
+                .. code-block:: python
+
+                    pandas.ArrowDtype(pyarrow.struct(
+                        [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+                    ))
+
+                to convert BigQuery RANGE<DATE> type, instead of relying on
+                the default ``object``. If you explicitly set the value to
+                ``None``, the data type will be ``object``. BigQuery Range type
+                can be found at:
+                https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+                .. versionadded:: 3.21.0
+
+            range_datetime_dtype (Optional[pandas.Series.dtype, None]):
+                If set, indicate a pandas ExtensionDtype, such as:
+
+                .. code-block:: python
+
+                    pandas.ArrowDtype(pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us")),
+                            ("end", pyarrow.timestamp("us")),
+                        ]
+                    ))
+
+                to convert BigQuery RANGE<DATETIME> type, instead of relying on
+                the default ``object``. If you explicitly set the value to
+                ``None``, the data type will be ``object``. BigQuery Range type
+                can be found at:
+                https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+                .. versionadded:: 3.21.0
+
+            range_timestamp_dtype (Optional[pandas.Series.dtype, None]):
+                If set, indicate a pandas ExtensionDtype, such as:
+
+                .. code-block:: python
+
+                    pandas.ArrowDtype(pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us", tz="UTC")),
+                            ("end", pyarrow.timestamp("us", tz="UTC")),
+                        ]
+                    ))
+
+                to convert BigQuery RANGE<TIMESTAMP> type, instead of relying
+                on the default ``object``. If you explicitly set the value to
+                ``None``, the data type will be ``object``. BigQuery Range type
+                can be found at:
+                https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+                .. versionadded:: 3.21.0
+
         Returns:
             pandas.DataFrame:
                 A :class:`~pandas.DataFrame` populated with row data and column
@@ -2214,6 +2278,69 @@ def to_dataframe(
         if time_dtype is DefaultPandasDTypes.TIME_DTYPE:
             time_dtype = db_dtypes.TimeDtype()
 
+        if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE:
+            try:
+                range_date_dtype = pandas.ArrowDtype(
+                    pyarrow.struct(
+                        [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+                    )
+                )
+            except AttributeError:
+                # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7
+                # only supports upto pandas 1.3. If pandas.ArrowDtype is not
+                # present, we raise a warning and set range_date_dtype to None.
+                msg = (
+                    "Unable to find class ArrowDtype in pandas, setting "
+                    "range_date_dtype to be None. To use ArrowDtype, please "
+                    "use pandas >= 1.5 and python >= 3.8."
+                )
+                warnings.warn(msg)
+                range_date_dtype = None
+
+        if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE:
+            try:
+                range_datetime_dtype = pandas.ArrowDtype(
+                    pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us")),
+                            ("end", pyarrow.timestamp("us")),
+                        ]
+                    )
+                )
+            except AttributeError:
+                # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7
+                # only supports upto pandas 1.3. If pandas.ArrowDtype is not
+                # present, we raise a warning and set range_datetime_dtype to None.
+                msg = (
+                    "Unable to find class ArrowDtype in pandas, setting "
+                    "range_datetime_dtype to be None. To use ArrowDtype, "
+                    "please use pandas >= 1.5 and python >= 3.8."
+                )
+                warnings.warn(msg)
+                range_datetime_dtype = None
+
+        if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE:
+            try:
+                range_timestamp_dtype = pandas.ArrowDtype(
+                    pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us", tz="UTC")),
+                            ("end", pyarrow.timestamp("us", tz="UTC")),
+                        ]
+                    )
+                )
+            except AttributeError:
+                # pandas.ArrowDtype was introduced in pandas 1.5, but python 3.7
+                # only supports upto pandas 1.3. If pandas.ArrowDtype is not
+                # present, we raise a warning and set range_timestamp_dtype to None.
+                msg = (
+                    "Unable to find class ArrowDtype in pandas, setting "
+                    "range_timestamp_dtype to be None. To use ArrowDtype, "
+                    "please use pandas >= 1.5 and python >= 3.8."
+                )
+                warnings.warn(msg)
+                range_timestamp_dtype = None
+
         if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"):
             raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE)
 
@@ -2298,6 +2425,9 @@ def to_dataframe(
                     datetime_dtype=datetime_dtype,
                     time_dtype=time_dtype,
                     timestamp_dtype=timestamp_dtype,
+                    range_date_dtype=range_date_dtype,
+                    range_datetime_dtype=range_datetime_dtype,
+                    range_timestamp_dtype=range_timestamp_dtype,
                 ),
             )
         else:
@@ -2502,6 +2632,9 @@ def to_dataframe(
         datetime_dtype=None,
         time_dtype=None,
         timestamp_dtype=None,
+        range_date_dtype=None,
+        range_datetime_dtype=None,
+        range_timestamp_dtype=None,
     ) -> "pandas.DataFrame":
         """Create an empty dataframe.
 
@@ -2519,6 +2652,9 @@ def to_dataframe(
             datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.
             time_dtype (Any): Ignored. Added for compatibility with RowIterator.
             timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.
+            range_date_dtype (Any): Ignored. Added for compatibility with RowIterator.
+            range_datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.
+            range_timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.
 
         Returns:
             pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
@@ -2541,6 +2677,7 @@ def to_geodataframe(
             dtypes (Any): Ignored. Added for compatibility with RowIterator.
             progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
             create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
+            geography_column (str): Ignored. Added for compatibility with RowIterator.
 
         Returns:
             pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
diff --git a/noxfile.py b/noxfile.py
index 034bb843a..78a9ab5b6 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -208,6 +208,9 @@ def system(session):
         extras = "[all]"
     session.install("-e", f".{extras}", "-c", constraints_path)
 
+    # print versions of all dependencies
+    session.run("python", "-m", "pip", "freeze")
+
     # Run py.test against the system tests.
     session.run(
         "py.test",
diff --git a/tests/data/scalars.csv b/tests/data/scalars.csv
new file mode 100644
index 000000000..7af97583f
--- /dev/null
+++ b/tests/data/scalars.csv
@@ -0,0 +1,2 @@
+"[2020-01-01, 2020-02-01)"
+
diff --git a/tests/data/scalars_schema_csv.json b/tests/data/scalars_schema_csv.json
new file mode 100644
index 000000000..82b878d95
--- /dev/null
+++ b/tests/data/scalars_schema_csv.json
@@ -0,0 +1,10 @@
+[
+    {
+        "mode" : "NULLABLE",
+        "name" : "range_date",
+        "type" : "RANGE",
+        "rangeElementType": {
+           "type": "DATE"
+         }
+     }
+  ]
\ No newline at end of file
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 784a1dd5c..8efa042af 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -96,12 +96,14 @@ def load_scalars_table(
     project_id: str,
     dataset_id: str,
     data_path: str = "scalars.jsonl",
+    source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON,
+    schema_source="scalars_schema.json",
 ) -> str:
-    schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+    schema = bigquery_client.schema_from_json(DATA_DIR / schema_source)
     table_id = data_path.replace(".", "_") + hex(random.randrange(1000000))
     job_config = bigquery.LoadJobConfig()
     job_config.schema = schema
-    job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+    job_config.source_format = source_format
     full_table_id = f"{project_id}.{dataset_id}.{table_id}"
     with open(DATA_DIR / data_path, "rb") as data_file:
         job = bigquery_client.load_table_from_file(
@@ -151,6 +153,22 @@ def scalars_table_multi_location(
     return request.param, full_table_id
 
 
+@pytest.fixture(scope="session")
+def scalars_table_csv(
+    bigquery_client: bigquery.Client, project_id: str, dataset_id: str
+):
+    full_table_id = load_scalars_table(
+        bigquery_client,
+        project_id,
+        dataset_id,
+        data_path="scalars.csv",
+        source_format=enums.SourceFormat.CSV,
+        schema_source="scalars_schema_csv.json",
+    )
+    yield full_table_id
+    bigquery_client.delete_table(full_table_id, not_found_ok=True)
+
+
 @pytest.fixture
 def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub):
     return replace_non_anum("_", request.node.name)
diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py
index 8b88b6844..82cf11f85 100644
--- a/tests/system/test_arrow.py
+++ b/tests/system/test_arrow.py
@@ -167,3 +167,30 @@ def test_arrow_extension_types_same_for_storage_and_REST_APIs_894(
         b"ARROW:extension:name": b"google:sqlType:geography",
         b"ARROW:extension:metadata": b'{"encoding": "WKT"}',
     }
+
+
+def test_list_rows_range_csv(
+    bigquery_client: bigquery.Client,
+    scalars_table_csv: str,
+):
+    table_id = scalars_table_csv
+
+    schema = [
+        bigquery.SchemaField(
+            "range_date", enums.SqlTypeNames.RANGE, range_element_type="DATE"
+        ),
+    ]
+
+    arrow_table = bigquery_client.list_rows(
+        table_id,
+        selected_fields=schema,
+    ).to_arrow()
+
+    schema = arrow_table.schema
+
+    expected_type = pyarrow.struct(
+        [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+    )
+
+    range_type = schema.field("range_date").type
+    assert range_type == expected_type
diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py
index 4c08958c3..108b842ce 100644
--- a/tests/system/test_list_rows.py
+++ b/tests/system/test_list_rows.py
@@ -118,3 +118,17 @@ def test_list_rows_scalars_extreme(
             assert value == 4
         else:
             assert value is None
+
+
+def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: str):
+    rows = bigquery_client.list_rows(scalars_table_csv)
+    rows = list(rows)
+    row = rows[0]
+    expected_range = {
+        "start": datetime.date(2020, 1, 1),
+        "end": datetime.date(2020, 2, 1),
+    }
+    assert row["range_date"] == expected_range
+
+    row_null = rows[1]
+    assert row_null["range_date"] is None
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index 5c13669f3..58d2b73b3 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -670,6 +670,67 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test):
     assert array.to_pylist() == list(series)
 
 
+@pytest.mark.parametrize(
+    "bq_schema,expected",
+    [
+        (
+            schema.SchemaField(
+                "field1",
+                "RANGE",
+                range_element_type=schema.FieldElementType("DATE"),
+                mode="NULLABLE",
+            ),
+            pyarrow.struct(
+                [
+                    ("start", pyarrow.date32()),
+                    ("end", pyarrow.date32()),
+                ]
+            ),
+        ),
+        (
+            schema.SchemaField(
+                "field2",
+                "RANGE",
+                range_element_type=schema.FieldElementType("DATETIME"),
+                mode="NULLABLE",
+            ),
+            pyarrow.struct(
+                [
+                    ("start", pyarrow.timestamp("us", tz=None)),
+                    ("end", pyarrow.timestamp("us", tz=None)),
+                ]
+            ),
+        ),
+        (
+            schema.SchemaField(
+                "field3",
+                "RANGE",
+                range_element_type=schema.FieldElementType("TIMESTAMP"),
+                mode="NULLABLE",
+            ),
+            pyarrow.struct(
+                [
+                    ("start", pyarrow.timestamp("us", tz="UTC")),
+                    ("end", pyarrow.timestamp("us", tz="UTC")),
+                ]
+            ),
+        ),
+    ],
+)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_bq_to_arrow_data_type_w_range(module_under_test, bq_schema, expected):
+    actual = module_under_test.bq_to_arrow_data_type(bq_schema)
+    assert actual.equals(expected)
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_bq_to_arrow_data_type_w_range_no_element(module_under_test):
+    field = schema.SchemaField("field1", "RANGE", mode="NULLABLE")
+    with pytest.raises(ValueError, match="Range element type cannot be None"):
+        module_under_test.bq_to_arrow_data_type(field)
+
+
 @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
 def test_bq_to_arrow_schema_w_unknown_type(module_under_test):
     fields = (
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 3953170fd..099529f95 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -3503,7 +3503,11 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self):
         user_warnings = [
             warning for warning in warned if warning.category is UserWarning
         ]
-        self.assertEqual(len(user_warnings), 0)
+        # With Python 3.7 and 3.8, len(user_warnings) = 3. With pandas < 1.5,
+        # pandas.ArrowDtype is not supported. We raise warnings because
+        # range columns have to be converted to object.
+        # With higher Python versions and noextra tests, len(user_warnings) = 0
+        self.assertIn(len(user_warnings), [0, 3])
         self.assertEqual(len(df), 4)
 
     @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None)
@@ -3534,7 +3538,11 @@ def test_to_dataframe_no_tqdm(self):
         user_warnings = [
             warning for warning in warned if warning.category is UserWarning
         ]
-        self.assertEqual(len(user_warnings), 1)
+        # With Python 3.7 and 3.8, len(user_warnings) = 4. With pandas < 1.5,
+        # pandas.ArrowDtype is not supported. We raise warnings because
+        # range columns have to be converted to object.
+        # With higher Python versions and noextra tests, len(user_warnings) = 1
+        self.assertIn(len(user_warnings), [1, 4])
 
         # Even though the progress bar won't show, downloading the dataframe
         # should still work.
@@ -3653,6 +3661,9 @@ def test_to_dataframe_w_dtypes_mapper(self):
             SchemaField("datetime", "DATETIME"),
             SchemaField("time", "TIME"),
             SchemaField("timestamp", "TIMESTAMP"),
+            SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"),
+            SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"),
+            SchemaField("range_date", "RANGE", range_element_type="DATE"),
         ]
         row_data = [
             [
@@ -3665,6 +3676,9 @@ def test_to_dataframe_w_dtypes_mapper(self):
                 "1999-12-31T00:00:00.000000",
                 "00:00:00.000000",
                 "1433836800000000",
+                "[1433836800000000, 1433999900000000)",
+                "[2009-06-17T13:45:30, 2019-07-17T13:45:30)",
+                "[2020-10-01, 2021-10-02)",
             ],
             [
                 "Bharney Rhubble",
@@ -3676,6 +3690,9 @@ def test_to_dataframe_w_dtypes_mapper(self):
                 "4567-12-31T00:00:00.000000",
                 "12:00:00.232413",
                 "81953424000000000",
+                "[1433836800000000, UNBOUNDED)",
+                "[2009-06-17T13:45:30, UNBOUNDED)",
+                "[2020-10-01, UNBOUNDED)",
             ],
             [
                 "Wylma Phlyntstone",
@@ -3687,6 +3704,9 @@ def test_to_dataframe_w_dtypes_mapper(self):
                 "9999-12-31T23:59:59.999999",
                 "23:59:59.999999",
                 "253402261199999999",
+                "[UNBOUNDED, UNBOUNDED)",
+                "[UNBOUNDED, UNBOUNDED)",
+                "[UNBOUNDED, UNBOUNDED)",
             ],
         ]
         rows = [{"f": [{"v": field} for field in row]} for row in row_data]
@@ -3724,6 +3744,39 @@ def test_to_dataframe_w_dtypes_mapper(self):
                 if hasattr(pandas, "ArrowDtype")
                 else None
             ),
+            range_date_dtype=(
+                pandas.ArrowDtype(
+                    pyarrow.struct(
+                        [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+                    )
+                )
+                if hasattr(pandas, "ArrowDtype")
+                else None
+            ),
+            range_datetime_dtype=(
+                pandas.ArrowDtype(
+                    pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us")),
+                            ("end", pyarrow.timestamp("us")),
+                        ]
+                    )
+                )
+                if hasattr(pandas, "ArrowDtype")
+                else None
+            ),
+            range_timestamp_dtype=(
+                pandas.ArrowDtype(
+                    pyarrow.struct(
+                        [
+                            ("start", pyarrow.timestamp("us", tz="UTC")),
+                            ("end", pyarrow.timestamp("us", tz="UTC")),
+                        ]
+                    )
+                )
+                if hasattr(pandas, "ArrowDtype")
+                else None
+            ),
         )
 
         self.assertIsInstance(df, pandas.DataFrame)
@@ -3791,6 +3844,52 @@ def test_to_dataframe_w_dtypes_mapper(self):
                 ],
             )
             self.assertEqual(df.timestamp.dtype.name, "timestamp[us, tz=UTC][pyarrow]")
+
+            self.assertEqual(
+                list(df.range_timestamp),
+                [
+                    {
+                        "start": datetime.datetime(
+                            2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc
+                        ),
+                        "end": datetime.datetime(
+                            2015, 6, 11, 5, 18, 20, tzinfo=datetime.timezone.utc
+                        ),
+                    },
+                    {
+                        "start": datetime.datetime(
+                            2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc
+                        ),
+                        "end": None,
+                    },
+                    {"start": None, "end": None},
+                ],
+            )
+
+            self.assertEqual(
+                list(df.range_datetime),
+                [
+                    {
+                        "start": datetime.datetime(2009, 6, 17, 13, 45, 30),
+                        "end": datetime.datetime(2019, 7, 17, 13, 45, 30),
+                    },
+                    {"start": datetime.datetime(2009, 6, 17, 13, 45, 30), "end": None},
+                    {"start": None, "end": None},
+                ],
+            )
+
+            self.assertEqual(
+                list(df.range_date),
+                [
+                    {
+                        "start": datetime.date(2020, 10, 1),
+                        "end": datetime.date(2021, 10, 2),
+                    },
+                    {"start": datetime.date(2020, 10, 1), "end": None},
+                    {"start": None, "end": None},
+                ],
+            )
+
         else:
             self.assertEqual(
                 list(df.date),
@@ -3851,6 +3950,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self):
             SchemaField("datetime", "DATETIME"),
             SchemaField("time", "TIME"),
             SchemaField("timestamp", "TIMESTAMP"),
+            SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"),
+            SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"),
+            SchemaField("range_date", "RANGE", range_element_type="DATE"),
         ]
         row_data = [
             [
@@ -3863,6 +3965,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self):
                 "1999-12-31T00:00:00.000000",
                 "23:59:59.999999",
                 "1433836800000000",
+                "[1433836800000000, 1433999900000000)",
+                "[2009-06-17T13:45:30, 2019-07-17T13:45:30)",
+                "[2020-10-01, 2021-10-02)",
             ],
         ]
         rows = [{"f": [{"v": field} for field in row]} for row in row_data]
@@ -3880,6 +3985,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self):
             datetime_dtype=None,
             time_dtype=None,
             timestamp_dtype=None,
+            range_timestamp_dtype=None,
+            range_datetime_dtype=None,
+            range_date_dtype=None,
         )
         self.assertIsInstance(df, pandas.DataFrame)
         self.assertEqual(df.complete.dtype.name, "bool")
@@ -3891,6 +3999,9 @@ def test_to_dataframe_w_none_dtypes_mapper(self):
         self.assertEqual(df.datetime.dtype.name, "datetime64[ns]")
         self.assertEqual(df.time.dtype.name, "object")
         self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]")
+        self.assertEqual(df.range_timestamp.dtype.name, "object")
+        self.assertEqual(df.range_datetime.dtype.name, "object")
+        self.assertEqual(df.range_date.dtype.name, "object")
 
     def test_to_dataframe_w_unsupported_dtypes_mapper(self):
         pytest.importorskip("pandas")