Skip to content

Commit

Permalink
chore: set streaming opt-in
Browse files Browse the repository at this point in the history
Signed-off-by: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com>
  • Loading branch information
ion-elgreco committed Feb 2, 2025
1 parent 64cb62d commit 68597e1
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 119 deletions.
5 changes: 2 additions & 3 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@ def merge(
error_on_type_mismatch: bool = True,
writer_properties: Optional[WriterProperties] = None,
large_dtypes: Optional[bool] = None,
streaming: bool = False,
custom_metadata: Optional[Dict[str, str]] = None,
post_commithook_properties: Optional[PostCommitHookProperties] = None,
commit_properties: Optional[CommitProperties] = None,
Expand All @@ -991,6 +992,7 @@ def merge(
error_on_type_mismatch: specify if merge will return error if data types are mismatching :default = True
writer_properties: Pass writer properties to the Rust parquet writer
large_dtypes: Deprecated, will be removed in 1.0
streaming: Will execute MERGE using a LazyMemoryExec plan
arrow_schema_conversion_mode: Large converts all types of data schema into Large Arrow types, passthrough keeps string/binary/list types untouched
custom_metadata: Deprecated and will be removed in future versions. Use commit_properties instead.
post_commithook_properties: properties for the post commit hook. If None, default values are used.
Expand Down Expand Up @@ -1029,17 +1031,14 @@ def merge(
convert_pyarrow_table,
)

streaming = False
if isinstance(source, pyarrow.RecordBatchReader):
source = convert_pyarrow_recordbatchreader(source, conversion_mode)
streaming = True
elif isinstance(source, pyarrow.RecordBatch):
source = convert_pyarrow_recordbatch(source, conversion_mode)
elif isinstance(source, pyarrow.Table):
source = convert_pyarrow_table(source, conversion_mode)
elif isinstance(source, ds.Dataset):
source = convert_pyarrow_dataset(source, conversion_mode)
streaming = True
elif _has_pandas and isinstance(source, pd.DataFrame):
source = convert_pyarrow_table(
pyarrow.Table.from_pandas(source), conversion_mode
Expand Down
Loading

0 comments on commit 68597e1

Please sign in to comment.