@@ -322,6 +322,7 @@ def scanner(
322
322
io_buffer_size : Optional [int ] = None ,
323
323
late_materialization : Optional [bool | List [str ]] = None ,
324
324
use_scalar_index : Optional [bool ] = None ,
325
+ include_deleted_rows : Optional [bool ] = None ,
325
326
) -> LanceScanner :
326
327
"""Return a Scanner that can support various pushdowns.
327
328
@@ -414,6 +415,14 @@ def scanner(
414
415
fast_search: bool, default False
415
416
If True, then the search will only be performed on the indexed data, which
416
417
yields faster search time.
418
+ include_deleted_rows: bool, default False
419
+ If True, then rows that have been deleted, but are still present in the
420
+ fragment, will be returned. These rows will have the _rowid column set
421
+ to null. All other columns will reflect the value stored on disk and may
422
+ not be null.
423
+
424
+ Note: if this is a search operation, or a take operation (including scalar
425
+ indexed scans) then deleted rows cannot be returned.
417
426
418
427
Notes
419
428
-----
@@ -463,6 +472,7 @@ def setopt(opt, val):
463
472
setopt (builder .use_stats , use_stats )
464
473
setopt (builder .use_scalar_index , use_scalar_index )
465
474
setopt (builder .fast_search , fast_search )
475
+ setopt (builder .include_deleted_rows , include_deleted_rows )
466
476
467
477
# columns=None has a special meaning. we can't treat it as "user didn't specify"
468
478
if self ._default_scan_options is None :
@@ -543,6 +553,7 @@ def to_table(
543
553
io_buffer_size : Optional [int ] = None ,
544
554
late_materialization : Optional [bool | List [str ]] = None ,
545
555
use_scalar_index : Optional [bool ] = None ,
556
+ include_deleted_rows : Optional [bool ] = None ,
546
557
) -> pa .Table :
547
558
"""Read the data into memory as a :py:class:`pyarrow.Table`
548
559
@@ -612,6 +623,14 @@ def to_table(
612
623
currently only supports a single column in the columns list.
613
624
- query: str
614
625
The query string to search for.
626
+ include_deleted_rows: bool, optional, default False
627
+ If True, then rows that have been deleted, but are still present in the
628
+ fragment, will be returned. These rows will have the _rowid column set
629
+ to null. All other columns will reflect the value stored on disk and may
630
+ not be null.
631
+
632
+ Note: if this is a search operation, or a take operation (including scalar
633
+ indexed scans) then deleted rows cannot be returned.
615
634
616
635
Notes
617
636
-----
@@ -639,6 +658,7 @@ def to_table(
639
658
use_stats = use_stats ,
640
659
fast_search = fast_search ,
641
660
full_text_query = full_text_query ,
661
+ include_deleted_rows = include_deleted_rows ,
642
662
).to_table ()
643
663
644
664
@property
@@ -2982,6 +3002,7 @@ def __init__(self, ds: LanceDataset):
2982
3002
self ._fast_search = False
2983
3003
self ._full_text_query = None
2984
3004
self ._use_scalar_index = None
3005
+ self ._include_deleted_rows = None
2985
3006
2986
3007
def apply_defaults (self , default_opts : Dict [str , Any ]) -> ScannerBuilder :
2987
3008
for key , value in default_opts .items ():
@@ -3259,6 +3280,15 @@ def fast_search(self, flag: bool) -> ScannerBuilder:
3259
3280
self ._fast_search = flag
3260
3281
return self
3261
3282
3283
+ def include_deleted_rows (self , flag : bool ) -> ScannerBuilder :
3284
+ """Include deleted rows
3285
+
3286
+ Rows which have been deleted, but are still present in the fragment, will be
3287
+ returned. These rows will have all columns (except _rowaddr) set to null
3288
+ """
3289
+ self ._include_deleted_rows = flag
3290
+ return self
3291
+
3262
3292
def full_text_search (
3263
3293
self ,
3264
3294
query : str ,
@@ -3296,6 +3326,7 @@ def to_scanner(self) -> LanceScanner:
3296
3326
self ._full_text_query ,
3297
3327
self ._late_materialization ,
3298
3328
self ._use_scalar_index ,
3329
+ self ._include_deleted_rows ,
3299
3330
)
3300
3331
return LanceScanner (scanner , self .ds )
3301
3332
0 commit comments