datafusion/sqllogictest/test_files/information_schema.slt

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at

#   http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


# Verify the information schema does not exit by default
statement error DataFusion error: Error during planning: table 'datafusion.information_schema.tables' not found
SELECT * from information_schema.tables

# Verify the information schema does not exit by default
statement error DataFusion error: Error during planning: table 'datafusion.information_schema.schemata' not found
SELECT * from information_schema.schemata

statement error DataFusion error: Error during planning: SHOW \[VARIABLE\] is not supported unless information_schema is enabled
show all

# Turn it on

# expect that the queries now work
statement ok
set datafusion.catalog.information_schema = true;

# Verify the information schema now does exist and is empty
query TTTT rowsort
SELECT * from information_schema.tables;
----
datafusion information_schema columns VIEW
datafusion information_schema df_settings VIEW
datafusion information_schema parameters VIEW
datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW

# Verify the information schema now does exist and is empty
query TTTTTTT rowsort
SELECT * from information_schema.schemata;
----
datafusion public NULL NULL NULL NULL NULL

# Table name case insensitive
query T rowsort
SELECT catalog_name from information_schema.SchEmaTa;
----
datafusion

# Disable information_schema and verify it now errors again
statement ok
set datafusion.catalog.information_schema = false

statement error DataFusion error: Error during planning: table 'datafusion.information_schema.tables' not found
SELECT * from information_schema.tables

statement error Error during planning: table 'datafusion.information_schema.columns' not found
SELECT * from information_schema.columns;


############
## Enable information schema for the rest of the test
############
statement ok
set datafusion.catalog.information_schema = true

############
# New tables should show up in information schema
###########
statement ok
create table t as values (1);

query TTTT rowsort
SELECT * from information_schema.tables;
----
datafusion information_schema columns VIEW
datafusion information_schema df_settings VIEW
datafusion information_schema parameters VIEW
datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW
datafusion public t BASE TABLE

# Another new  table should show up in information schema
statement ok
create table t2 as values (1);

query TTTT rowsort
SELECT * from information_schema.tables;
----
datafusion information_schema columns VIEW
datafusion information_schema df_settings VIEW
datafusion information_schema parameters VIEW
datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW
datafusion public t BASE TABLE
datafusion public t2 BASE TABLE

query TTTT rowsort
SELECT * from information_schema.tables WHERE tables.table_schema='information_schema';
----
datafusion information_schema columns VIEW
datafusion information_schema df_settings VIEW
datafusion information_schema parameters VIEW
datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW

query TTTT rowsort
SELECT * from information_schema.tables WHERE information_schema.tables.table_schema='information_schema';
----
datafusion information_schema columns VIEW
datafusion information_schema df_settings VIEW
datafusion information_schema parameters VIEW
datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW

query TTTT rowsort
SELECT * from information_schema.tables WHERE datafusion.information_schema.tables.table_schema='information_schema';
----
datafusion information_schema columns VIEW
datafusion information_schema df_settings VIEW
datafusion information_schema parameters VIEW
datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW

# Cleanup
statement ok
drop table t

statement ok
drop table t2

############
## SHOW VARIABLES should work
###########

# target_partitions defaults to num_cores, so set
# to a known value that is unlikely to be
# the real number of cores on a system
statement ok
SET datafusion.execution.target_partitions=7

# planning_concurrency defaults to num_cores, so set
# to a known value that is unlikely to be
# the real number of cores on a system
statement ok
SET datafusion.execution.planning_concurrency=13

# pin the version string for test
statement ok
SET datafusion.execution.parquet.created_by=datafusion

# show all variables
query TT rowsort
SHOW ALL
----
datafusion.catalog.create_default_catalog_and_schema true
datafusion.catalog.default_catalog datafusion
datafusion.catalog.default_schema public
datafusion.catalog.format NULL
datafusion.catalog.has_header true
datafusion.catalog.information_schema true
datafusion.catalog.location NULL
datafusion.catalog.newlines_in_values false
datafusion.execution.batch_size 8192
datafusion.execution.coalesce_batches true
datafusion.execution.collect_statistics false
datafusion.execution.enable_recursive_ctes true
datafusion.execution.enforce_batch_size_in_joins false
datafusion.execution.keep_partition_by_columns false
datafusion.execution.listing_table_ignore_subdirectory true
datafusion.execution.max_buffered_batches_per_output_file 2
datafusion.execution.meta_fetch_concurrency 32
datafusion.execution.minimum_parallel_output_files 4
datafusion.execution.parquet.allow_single_file_parallelism true
datafusion.execution.parquet.binary_as_string false
datafusion.execution.parquet.bloom_filter_fpp NULL
datafusion.execution.parquet.bloom_filter_ndv NULL
datafusion.execution.parquet.bloom_filter_on_read true
datafusion.execution.parquet.bloom_filter_on_write false
datafusion.execution.parquet.column_index_truncate_length 64
datafusion.execution.parquet.compression zstd(3)
datafusion.execution.parquet.created_by datafusion
datafusion.execution.parquet.data_page_row_count_limit 20000
datafusion.execution.parquet.data_pagesize_limit 1048576
datafusion.execution.parquet.dictionary_enabled true
datafusion.execution.parquet.dictionary_page_size_limit 1048576
datafusion.execution.parquet.enable_page_index true
datafusion.execution.parquet.encoding NULL
datafusion.execution.parquet.max_row_group_size 1048576
datafusion.execution.parquet.max_statistics_size 4096
datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2
datafusion.execution.parquet.maximum_parallel_row_group_writers 1
datafusion.execution.parquet.metadata_size_hint NULL
datafusion.execution.parquet.pruning true
datafusion.execution.parquet.pushdown_filters false
datafusion.execution.parquet.reorder_filters false
datafusion.execution.parquet.schema_force_view_types true
datafusion.execution.parquet.skip_arrow_metadata false
datafusion.execution.parquet.skip_metadata true
datafusion.execution.parquet.statistics_enabled page
datafusion.execution.parquet.write_batch_size 1024
datafusion.execution.parquet.writer_version 1.0
datafusion.execution.planning_concurrency 13
datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8
datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000
datafusion.execution.skip_physical_aggregate_schema_check false
datafusion.execution.soft_max_rows_per_output_file 50000000
datafusion.execution.sort_in_place_threshold_bytes 1048576
datafusion.execution.sort_spill_reservation_bytes 10485760
datafusion.execution.split_file_groups_by_statistics false
datafusion.execution.target_partitions 7
datafusion.execution.time_zone +00:00
datafusion.execution.use_row_number_estimates_to_optimize_partitioning false
datafusion.explain.logical_plan_only false
datafusion.explain.physical_plan_only false
datafusion.explain.show_schema false
datafusion.explain.show_sizes true
datafusion.explain.show_statistics false
datafusion.optimizer.allow_symmetric_joins_without_pruning true
datafusion.optimizer.default_filter_selectivity 20
datafusion.optimizer.enable_distinct_aggregation_soft_limit true
datafusion.optimizer.enable_round_robin_repartition true
datafusion.optimizer.enable_topk_aggregation true
datafusion.optimizer.expand_views_at_output false
datafusion.optimizer.filter_null_join_keys false
datafusion.optimizer.hash_join_single_partition_threshold 1048576
datafusion.optimizer.hash_join_single_partition_threshold_rows 131072
datafusion.optimizer.max_passes 3
datafusion.optimizer.prefer_existing_sort false
datafusion.optimizer.prefer_existing_union false
datafusion.optimizer.prefer_hash_join true
datafusion.optimizer.repartition_aggregations true
datafusion.optimizer.repartition_file_min_size 10485760
datafusion.optimizer.repartition_file_scans true
datafusion.optimizer.repartition_joins true
datafusion.optimizer.repartition_sorts true
datafusion.optimizer.repartition_windows true
datafusion.optimizer.skip_failed_rules false
datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.collect_spans false
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.support_varchar_with_length true

# show all variables with verbose
query TTT rowsort
SHOW ALL VERBOSE
----
datafusion.catalog.create_default_catalog_and_schema true Whether the default catalog and schema should be created automatically.
datafusion.catalog.default_catalog datafusion The default catalog name - this impacts what SQL queries use if not specified
datafusion.catalog.default_schema public The default schema name - this impacts what SQL queries use if not specified
datafusion.catalog.format NULL Type of `TableProvider` to use when loading `default` schema
datafusion.catalog.has_header true Default value for `format.has_header` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement.
datafusion.catalog.information_schema true Should DataFusion provide access to `information_schema` virtual tables for displaying schema information
datafusion.catalog.location NULL Location scanned to load tables for `default` schema
datafusion.catalog.newlines_in_values false Specifies whether newlines in (quoted) CSV values are supported. This is the default value for `format.newlines_in_values` for `CREATE EXTERNAL TABLE` if not specified explicitly in the statement. Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to `true` ensures that newlines in values are parsed successfully, which may reduce performance.
datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption
datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
datafusion.execution.collect_statistics false Should DataFusion collect statistics after listing files
datafusion.execution.enable_recursive_ctes true Should DataFusion support recursive CTEs
datafusion.execution.enforce_batch_size_in_joins false Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.
datafusion.execution.keep_partition_by_columns false Should DataFusion keep the columns used for partition_by in the output RecordBatches
datafusion.execution.listing_table_ignore_subdirectory true Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).
datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
datafusion.execution.meta_fetch_concurrency 32 Number of files to read in parallel when inferring schema and statistics
datafusion.execution.minimum_parallel_output_files 4 Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.
datafusion.execution.parquet.allow_single_file_parallelism true (writing) Controls whether DataFusion will attempt to speed up writing parquet files by serializing them in parallel. Each column in each row group in each output file are serialized in parallel leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
datafusion.execution.parquet.binary_as_string false (reading) If true, parquet reader will read columns of `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`. Parquet files generated by some legacy writers do not correctly set the UTF8 flag for strings, causing string columns to be loaded as BLOB instead.
datafusion.execution.parquet.bloom_filter_fpp NULL (writing) Sets bloom filter false positive probability. If NULL, uses default parquet writer setting
datafusion.execution.parquet.bloom_filter_ndv NULL (writing) Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting
datafusion.execution.parquet.bloom_filter_on_read true (writing) Use any available bloom filters when reading parquet files
datafusion.execution.parquet.bloom_filter_on_write false (writing) Write bloom filters for all columns when creating parquet files
datafusion.execution.parquet.column_index_truncate_length 64 (writing) Sets column index truncate length
datafusion.execution.parquet.compression zstd(3) (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting Note that this default setting is not the same as the default parquet writer setting.
datafusion.execution.parquet.created_by datafusion (writing) Sets "created by" property
datafusion.execution.parquet.data_page_row_count_limit 20000 (writing) Sets best effort maximum number of rows in data page
datafusion.execution.parquet.data_pagesize_limit 1048576 (writing) Sets best effort maximum size of data page in bytes
datafusion.execution.parquet.dictionary_enabled true (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting
datafusion.execution.parquet.dictionary_page_size_limit 1048576 (writing) Sets best effort maximum dictionary page size, in bytes
datafusion.execution.parquet.enable_page_index true (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.
datafusion.execution.parquet.encoding NULL (writing)  Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting
datafusion.execution.parquet.max_row_group_size 1048576 (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.
datafusion.execution.parquet.max_statistics_size 4096 (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting max_statistics_size is deprecated, currently it is not being used
datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
datafusion.execution.parquet.maximum_parallel_row_group_writers 1 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer
datafusion.execution.parquet.pruning true (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file
datafusion.execution.parquet.pushdown_filters false (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".
datafusion.execution.parquet.reorder_filters false (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query
datafusion.execution.parquet.schema_force_view_types true (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.
datafusion.execution.parquet.skip_arrow_metadata false (writing) Skip encoding the embedded arrow metadata in the KV_meta This is analogous to the `ArrowWriterOptions::with_skip_arrow_metadata`. Refer to <https://docs.rs/parquet/53.3.0/parquet/arrow/arrow_writer/struct.ArrowWriterOptions.html#method.with_skip_arrow_metadata>
datafusion.execution.parquet.skip_metadata true (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata
datafusion.execution.parquet.statistics_enabled page (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting
datafusion.execution.parquet.write_batch_size 1024 (writing) Sets write_batch_size in bytes
datafusion.execution.parquet.writer_version 1.0 (writing) Sets parquet writer version valid values are "1.0" and "2.0"
datafusion.execution.planning_concurrency 13 Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system
datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8 Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input
datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000 Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode
datafusion.execution.skip_physical_aggregate_schema_check false When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.
datafusion.execution.soft_max_rows_per_output_file 50000000 Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max
datafusion.execution.sort_in_place_threshold_bytes 1048576 When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.
datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).
datafusion.execution.split_file_groups_by_statistics false Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental
datafusion.execution.target_partitions 7 Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system
datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
datafusion.execution.use_row_number_estimates_to_optimize_partitioning false Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.
datafusion.explain.logical_plan_only false When set to true, the explain statement will only print logical plans
datafusion.explain.physical_plan_only false When set to true, the explain statement will only print physical plans
datafusion.explain.show_schema false When set to true, the explain statement will print schema information
datafusion.explain.show_sizes true When set to true, the explain statement will print the partition sizes
datafusion.explain.show_statistics false When set to true, the explain statement will print operator statistics for physical plans
datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors.
datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).
datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.
datafusion.optimizer.enable_round_robin_repartition true When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores
datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible
datafusion.optimizer.expand_views_at_output false When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.
datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.
datafusion.optimizer.hash_join_single_partition_threshold 1048576 The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition
datafusion.optimizer.hash_join_single_partition_threshold_rows 131072 The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition
datafusion.optimizer.max_passes 3 Number of times that the optimizer will attempt to optimize the plan
datafusion.optimizer.prefer_existing_sort false When true, DataFusion will opportunistically remove sorts when the data is already sorted, (i.e. setting `preserve_order` to true on `RepartitionExec`  and using `SortPreservingMergeExec`) When false, DataFusion will maximize plan parallelism using `RepartitionExec` even if this requires subsequently resorting data using a `SortExec`.
datafusion.optimizer.prefer_existing_union false When set to true, the optimizer will not attempt to convert Union to Interleave
datafusion.optimizer.prefer_hash_join true When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory
datafusion.optimizer.repartition_aggregations true Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level
datafusion.optimizer.repartition_file_min_size 10485760 Minimum total files size in bytes to perform file scan repartitioning.
datafusion.optimizer.repartition_file_scans true When set to `true`, file groups will be repartitioned to achieve maximum parallelism. Currently Parquet and CSV formats are supported. If set to `true`, all files will be repartitioned evenly (i.e., a single large file might be partitioned into smaller chunks) for parallel scanning. If set to `false`, different files will be read in parallel, but repartitioning won't happen within a single file.
datafusion.optimizer.repartition_joins true Should DataFusion repartition data using the join keys to execute joins in parallel using the provided `target_partitions` level
datafusion.optimizer.repartition_sorts true Should DataFusion execute sorts in a per-partition fashion and merge afterwards instead of coalescing first and sorting globally. With this flag is enabled, plans in the form below ```text      "SortExec: [a@0 ASC]",      "  CoalescePartitionsExec",      "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", ``` would turn into the plan below which performs better in multithreaded environments ```text      "SortPreservingMergeExec: [a@0 ASC]",      "  SortExec: [a@0 ASC]",      "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", ```
datafusion.optimizer.repartition_windows true Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level
datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes.
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

# show_variable_in_config_options
query TT
SHOW datafusion.execution.batch_size
----
datafusion.execution.batch_size 8192

# show_variable_in_config_options_verbose
query TTT
SHOW datafusion.execution.batch_size VERBOSE
----
datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption

# show_time_zone_default_utc
# https://github.com/apache/datafusion/issues/3255
query TT
SHOW TIME ZONE
----
datafusion.execution.time_zone +00:00

# show_timezone_default_utc
# https://github.com/apache/datafusion/issues/3255
query TT
SHOW TIMEZONE
----
datafusion.execution.time_zone +00:00


# show_time_zone_default_utc_verbose
# https://github.com/apache/datafusion/issues/3255
query TTT
SHOW TIME ZONE VERBOSE
----
datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour

# show_timezone_default_utc
# https://github.com/apache/datafusion/issues/3255
query TTT
SHOW TIMEZONE VERBOSE
----
datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour


# show empty verbose
statement error DataFusion error: Error during planning: '' is not a variable which can be viewed with 'SHOW'
SHOW VERBOSE

# show nonsense verbose
statement error DataFusion error: Error during planning: 'nonsense' is not a variable which can be viewed with 'SHOW'
SHOW NONSENSE VERBOSE

# information_schema_describe_table

## some_table
statement ok
CREATE OR REPLACE TABLE some_table AS VALUES (1,2),(3,4);

query TTT rowsort
DESCRIBE some_table
----
column1 Int64 YES
column2 Int64 YES

statement ok
DROP TABLE public.some_table;

## public.some_table

statement ok
CREATE OR REPLACE TABLE public.some_table AS VALUES (1,2),(3,4);

query TTT rowsort
DESCRIBE public.some_table
----
column1 Int64 YES
column2 Int64 YES

statement ok
DROP TABLE public.some_table;

## datafusion.public.some_table

statement ok
CREATE OR REPLACE TABLE datafusion.public.some_table AS VALUES (1,2),(3,4);

query TTT rowsort
DESCRIBE datafusion.public.some_table
----
column1 Int64 YES
column2 Int64 YES

statement ok
DROP TABLE datafusion.public.some_table;

# information_schema_describe_table_not_exists

statement error Error during planning: table 'datafusion.public.table' not found
describe table;


# information_schema_show_tables
query TTTT rowsort
SHOW TABLES
----
datafusion information_schema columns VIEW
datafusion information_schema df_settings VIEW
datafusion information_schema parameters VIEW
datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW


# information_schema_show_tables_no_information_schema

statement ok
set datafusion.catalog.information_schema = false;

statement error Error during planning: SHOW TABLES is not supported unless information_schema is enabled
SHOW TABLES

statement ok
set datafusion.catalog.information_schema = true;


# information_schema_show_columns
statement ok
CREATE TABLE t AS SELECT 1::int as i;

statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported
SHOW COLUMNS FROM t LIKE 'f';

statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported
SHOW COLUMNS FROM t WHERE column_name = 'bar';

query TTTTTT
SHOW COLUMNS FROM t;
----
datafusion public t i Int32 NO

# This isn't ideal but it is consistent behavior for `SELECT * from "T"`
statement error Error during planning: table 'datafusion.public.T' not found
SHOW columns from "T"

# information_schema_show_columns_full_extended
query TTTTITTTIIIIIIT
SHOW FULL COLUMNS FROM t;
----
datafusion public t i 0 NULL NO Int32 NULL NULL 32 2 NULL NULL NULL

# expect same as above
query TTTTITTTIIIIIIT
SHOW EXTENDED COLUMNS FROM t;
----
datafusion public t i 0 NULL NO Int32 NULL NULL 32 2 NULL NULL NULL

# information_schema_show_columns_no_information_schema

statement ok
set datafusion.catalog.information_schema = false;

statement error Error during planning: SHOW COLUMNS is not supported unless information_schema is enabled
SHOW COLUMNS FROM t

statement ok
set datafusion.catalog.information_schema = true;


# information_schema_show_columns_names()
query TTTTTT
SHOW columns from public.t
----
datafusion public t i Int32 NO

query TTTTTT
SHOW columns from datafusion.public.t
----
datafusion public t i Int32 NO

statement error Error during planning: table 'datafusion.public.t2' not found
SHOW columns from t2

statement error Error during planning: table 'datafusion.public.t2' not found
SHOW columns from datafusion.public.t2


# show_non_existing_variable
statement error DataFusion error: Error during planning: 'something_unknown' is not a variable which can be viewed with 'SHOW'
SHOW SOMETHING_UNKNOWN;

statement ok
DROP TABLE t;

# show_unsupported_when_information_schema_off

statement ok
set datafusion.catalog.information_schema = false;

statement error Error during planning: SHOW \[VARIABLE\] is not supported unless information_schema is enabled
SHOW SOMETHING

statement error Error during planning: SHOW \[VARIABLE\] is not supported unless information_schema is enabled
SHOW SOMETHING VERBOSE

statement ok
set datafusion.catalog.information_schema = true;


# show_create_view()
statement ok
CREATE TABLE abc AS VALUES (1,2,3), (4,5,6);

statement ok
CREATE VIEW xyz AS SELECT * FROM abc

query TTTT
SHOW CREATE TABLE xyz
----
datafusion public xyz CREATE VIEW xyz AS SELECT * FROM abc

statement ok
DROP TABLE abc;

statement ok
DROP VIEW xyz;

# show_create_view_in_catalog
statement ok
CREATE TABLE abc AS VALUES (1,2,3), (4,5,6)

statement ok
CREATE SCHEMA test;

statement ok
CREATE VIEW test.xyz AS SELECT * FROM abc;

query TTTT
SHOW CREATE TABLE test.xyz
----
datafusion test xyz CREATE VIEW test.xyz AS SELECT * FROM abc

statement error DataFusion error: Execution error: Cannot drop schema test because other tables depend on it: xyz
DROP SCHEMA test;

statement ok
DROP TABLE abc;

statement ok
DROP VIEW test.xyz


# show_external_create_table()
statement ok
CREATE EXTERNAL TABLE abc
STORED AS CSV
LOCATION '../../testing/data/csv/aggregate_test_100.csv'
OPTIONS ('format.has_header' 'true');

query TTTT
SHOW CREATE TABLE abc;
----
datafusion public abc CREATE EXTERNAL TABLE abc STORED AS CSV LOCATION ../../testing/data/csv/aggregate_test_100.csv

# string_agg has different arg_types but same return type. Test avoiding duplicate entries for the same function.
query TTT
select routine_name, data_type, function_type from information_schema.routines where routine_name = 'string_agg';
----
string_agg LargeUtf8 AGGREGATE

# test every function type are included in the result
query TTTTTTTBTTTT rowsort
select * from information_schema.routines where routine_name = 'date_trunc' OR routine_name = 'string_agg' OR routine_name = 'rank' ORDER BY routine_name
----
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Microsecond, None) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Microsecond, Some("+TZ")) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Millisecond, None) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Millisecond, Some("+TZ")) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Nanosecond, None) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Nanosecond, Some("+TZ")) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Second, None) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Second, Some("+TZ")) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
datafusion public rank datafusion public rank FUNCTION true NULL WINDOW Returns the rank of the current row within its partition, allowing gaps between ranks. This function provides a ranking similar to `row_number`, but skips ranks for identical values. rank()
datafusion public string_agg datafusion public string_agg FUNCTION true LargeUtf8 AGGREGATE Concatenates the values of string expressions and places separator values between them. string_agg(expression, delimiter)

query B
select is_deterministic from information_schema.routines where routine_name = 'now';
----
false

# test every function type are included in the result
query TTTITTTTBI
select * from information_schema.parameters where specific_name = 'date_trunc' OR specific_name = 'string_agg' OR specific_name = 'rank' ORDER BY specific_name, rid;
----
datafusion public date_trunc 1 IN precision Utf8 NULL false 0
datafusion public date_trunc 2 IN expression Timestamp(Nanosecond, None) NULL false 0
datafusion public date_trunc 1 OUT NULL Timestamp(Nanosecond, None) NULL false 0
datafusion public date_trunc 1 IN precision Utf8View NULL false 1
datafusion public date_trunc 2 IN expression Timestamp(Nanosecond, None) NULL false 1
datafusion public date_trunc 1 OUT NULL Timestamp(Nanosecond, None) NULL false 1
datafusion public date_trunc 1 IN precision Utf8 NULL false 2
datafusion public date_trunc 2 IN expression Timestamp(Nanosecond, Some("+TZ")) NULL false 2
datafusion public date_trunc 1 OUT NULL Timestamp(Nanosecond, Some("+TZ")) NULL false 2
datafusion public date_trunc 1 IN precision Utf8View NULL false 3
datafusion public date_trunc 2 IN expression Timestamp(Nanosecond, Some("+TZ")) NULL false 3
datafusion public date_trunc 1 OUT NULL Timestamp(Nanosecond, Some("+TZ")) NULL false 3
datafusion public date_trunc 1 IN precision Utf8 NULL false 4
datafusion public date_trunc 2 IN expression Timestamp(Microsecond, None) NULL false 4
datafusion public date_trunc 1 OUT NULL Timestamp(Microsecond, None) NULL false 4
datafusion public date_trunc 1 IN precision Utf8View NULL false 5
datafusion public date_trunc 2 IN expression Timestamp(Microsecond, None) NULL false 5
datafusion public date_trunc 1 OUT NULL Timestamp(Microsecond, None) NULL false 5
datafusion public date_trunc 1 IN precision Utf8 NULL false 6
datafusion public date_trunc 2 IN expression Timestamp(Microsecond, Some("+TZ")) NULL false 6
datafusion public date_trunc 1 OUT NULL Timestamp(Microsecond, Some("+TZ")) NULL false 6
datafusion public date_trunc 1 IN precision Utf8View NULL false 7
datafusion public date_trunc 2 IN expression Timestamp(Microsecond, Some("+TZ")) NULL false 7
datafusion public date_trunc 1 OUT NULL Timestamp(Microsecond, Some("+TZ")) NULL false 7
datafusion public date_trunc 1 IN precision Utf8 NULL false 8
datafusion public date_trunc 2 IN expression Timestamp(Millisecond, None) NULL false 8
datafusion public date_trunc 1 OUT NULL Timestamp(Millisecond, None) NULL false 8
datafusion public date_trunc 1 IN precision Utf8View NULL false 9
datafusion public date_trunc 2 IN expression Timestamp(Millisecond, None) NULL false 9
datafusion public date_trunc 1 OUT NULL Timestamp(Millisecond, None) NULL false 9
datafusion public date_trunc 1 IN precision Utf8 NULL false 10
datafusion public date_trunc 2 IN expression Timestamp(Millisecond, Some("+TZ")) NULL false 10
datafusion public date_trunc 1 OUT NULL Timestamp(Millisecond, Some("+TZ")) NULL false 10
datafusion public date_trunc 1 IN precision Utf8View NULL false 11
datafusion public date_trunc 2 IN expression Timestamp(Millisecond, Some("+TZ")) NULL false 11
datafusion public date_trunc 1 OUT NULL Timestamp(Millisecond, Some("+TZ")) NULL false 11
datafusion public date_trunc 1 IN precision Utf8 NULL false 12
datafusion public date_trunc 2 IN expression Timestamp(Second, None) NULL false 12
datafusion public date_trunc 1 OUT NULL Timestamp(Second, None) NULL false 12
datafusion public date_trunc 1 IN precision Utf8View NULL false 13
datafusion public date_trunc 2 IN expression Timestamp(Second, None) NULL false 13
datafusion public date_trunc 1 OUT NULL Timestamp(Second, None) NULL false 13
datafusion public date_trunc 1 IN precision Utf8 NULL false 14
datafusion public date_trunc 2 IN expression Timestamp(Second, Some("+TZ")) NULL false 14
datafusion public date_trunc 1 OUT NULL Timestamp(Second, Some("+TZ")) NULL false 14
datafusion public date_trunc 1 IN precision Utf8View NULL false 15
datafusion public date_trunc 2 IN expression Timestamp(Second, Some("+TZ")) NULL false 15
datafusion public date_trunc 1 OUT NULL Timestamp(Second, Some("+TZ")) NULL false 15
datafusion public string_agg 1 IN expression LargeUtf8 NULL false 0
datafusion public string_agg 2 IN delimiter Utf8 NULL false 0
datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 0
datafusion public string_agg 1 IN expression LargeUtf8 NULL false 1
datafusion public string_agg 2 IN delimiter LargeUtf8 NULL false 1
datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 1
datafusion public string_agg 1 IN expression LargeUtf8 NULL false 2
datafusion public string_agg 2 IN delimiter Null NULL false 2
datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 2

# test variable length arguments
query TTTBI rowsort
select specific_name, data_type, parameter_mode, is_variadic, rid from information_schema.parameters where specific_name = 'concat';
----
concat LargeUtf8 IN true 2
concat LargeUtf8 OUT false 2
concat Utf8 IN true 1
concat Utf8 OUT false 1
concat Utf8View IN true 0
concat Utf8View OUT false 0

# test ceorcion signature
query TTITI rowsort
select specific_name, data_type, ordinal_position, parameter_mode, rid from information_schema.parameters where specific_name = 'repeat';
----
repeat Int64 2 IN 0
repeat Int64 2 IN 1
repeat Int64 2 IN 2
repeat LargeUtf8 1 IN 1
repeat LargeUtf8 1 OUT 1
repeat Utf8 1 IN 0
repeat Utf8 1 OUT 0
repeat Utf8 1 OUT 2
repeat Utf8View 1 IN 2

query TT??TTT rowsort
show functions like 'date_trunc';
----
date_trunc Timestamp(Microsecond, None) [precision, expression] [Utf8, Timestamp(Microsecond, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Microsecond, None) [precision, expression] [Utf8View, Timestamp(Microsecond, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Microsecond, Some("+TZ")) [precision, expression] [Utf8, Timestamp(Microsecond, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Microsecond, Some("+TZ")) [precision, expression] [Utf8View, Timestamp(Microsecond, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Millisecond, None) [precision, expression] [Utf8, Timestamp(Millisecond, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Millisecond, None) [precision, expression] [Utf8View, Timestamp(Millisecond, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Millisecond, Some("+TZ")) [precision, expression] [Utf8, Timestamp(Millisecond, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Millisecond, Some("+TZ")) [precision, expression] [Utf8View, Timestamp(Millisecond, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Nanosecond, None) [precision, expression] [Utf8, Timestamp(Nanosecond, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Nanosecond, None) [precision, expression] [Utf8View, Timestamp(Nanosecond, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Nanosecond, Some("+TZ")) [precision, expression] [Utf8, Timestamp(Nanosecond, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Nanosecond, Some("+TZ")) [precision, expression] [Utf8View, Timestamp(Nanosecond, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Second, None) [precision, expression] [Utf8, Timestamp(Second, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Second, None) [precision, expression] [Utf8View, Timestamp(Second, None)] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Second, Some("+TZ")) [precision, expression] [Utf8, Timestamp(Second, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
date_trunc Timestamp(Second, Some("+TZ")) [precision, expression] [Utf8View, Timestamp(Second, Some("+TZ"))] SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)

statement ok
show functions