49
49
import com .google .common .collect .ImmutableMap ;
50
50
import com .google .common .collect .ImmutableSet ;
51
51
import com .google .common .collect .Sets ;
52
+ import io .airlift .units .DataSize ;
52
53
import org .apache .iceberg .BaseTable ;
53
54
import org .apache .iceberg .ContentFile ;
54
55
import org .apache .iceberg .ContentScanTask ;
61
62
import org .apache .iceberg .PartitionField ;
62
63
import org .apache .iceberg .PartitionSpec ;
63
64
import org .apache .iceberg .RowLevelOperationMode ;
65
+ import org .apache .iceberg .Scan ;
64
66
import org .apache .iceberg .Schema ;
65
67
import org .apache .iceberg .Snapshot ;
66
68
import org .apache .iceberg .SortOrder ;
196
198
import static org .apache .iceberg .TableProperties .METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT ;
197
199
import static org .apache .iceberg .TableProperties .ORC_COMPRESSION ;
198
200
import static org .apache .iceberg .TableProperties .PARQUET_COMPRESSION ;
201
+ import static org .apache .iceberg .TableProperties .SPLIT_SIZE ;
202
+ import static org .apache .iceberg .TableProperties .SPLIT_SIZE_DEFAULT ;
199
203
import static org .apache .iceberg .TableProperties .UPDATE_MODE ;
200
204
import static org .apache .iceberg .TableProperties .WRITE_LOCATION_PROVIDER_IMPL ;
201
205
import static org .apache .iceberg .types .Type .TypeID .BINARY ;
@@ -863,10 +867,10 @@ public static long getDataSequenceNumber(ContentFile<?> file)
863
867
* @param requestedSchema If provided, only delete files with this schema will be provided
864
868
*/
865
869
public static CloseableIterable <DeleteFile > getDeleteFiles (Table table ,
866
- long snapshot ,
867
- TupleDomain <IcebergColumnHandle > filter ,
868
- Optional <Set <Integer >> requestedPartitionSpec ,
869
- Optional <Set <Integer >> requestedSchema )
870
+ long snapshot ,
871
+ TupleDomain <IcebergColumnHandle > filter ,
872
+ Optional <Set <Integer >> requestedPartitionSpec ,
873
+ Optional <Set <Integer >> requestedSchema )
870
874
{
871
875
Expression filterExpression = toIcebergExpression (filter );
872
876
CloseableIterable <FileScanTask > fileTasks = table .newScan ().useSnapshot (snapshot ).filter (filterExpression ).planFiles ();
@@ -1042,9 +1046,9 @@ private static class DeleteFilesIterator
1042
1046
private DeleteFile currentFile ;
1043
1047
1044
1048
private DeleteFilesIterator (Map <Integer , PartitionSpec > partitionSpecsById ,
1045
- CloseableIterator <FileScanTask > fileTasks ,
1046
- Optional <Set <Integer >> requestedPartitionSpec ,
1047
- Optional <Set <Integer >> requestedSchema )
1049
+ CloseableIterator <FileScanTask > fileTasks ,
1050
+ Optional <Set <Integer >> requestedPartitionSpec ,
1051
+ Optional <Set <Integer >> requestedSchema )
1048
1052
{
1049
1053
this .partitionSpecsById = partitionSpecsById ;
1050
1054
this .fileTasks = fileTasks ;
@@ -1158,6 +1162,9 @@ public static Map<String, String> populateTableProperties(ConnectorTableMetadata
1158
1162
1159
1163
Integer metricsMaxInferredColumn = IcebergTableProperties .getMetricsMaxInferredColumn (tableMetadata .getProperties ());
1160
1164
propertiesBuilder .put (METRICS_MAX_INFERRED_COLUMN_DEFAULTS , String .valueOf (metricsMaxInferredColumn ));
1165
+
1166
+ propertiesBuilder .put (SPLIT_SIZE , String .valueOf (IcebergTableProperties .getTargetSplitSize (tableMetadata .getProperties ())));
1167
+
1161
1168
return propertiesBuilder .build ();
1162
1169
}
1163
1170
@@ -1228,8 +1235,8 @@ public static Optional<PartitionData> partitionDataFromStructLike(PartitionSpec
1228
1235
1229
1236
/**
1230
1237
* Get the metadata location for target {@link Table},
1231
- * considering iceberg table properties {@code WRITE_METADATA_LOCATION}
1232
- * * /
1238
+ * considering iceberg table properties {@code WRITE_METADATA_LOCATION}
1239
+ */
1233
1240
public static String metadataLocation (Table icebergTable )
1234
1241
{
1235
1242
String metadataLocation = icebergTable .properties ().get (TableProperties .WRITE_METADATA_LOCATION );
@@ -1244,8 +1251,8 @@ public static String metadataLocation(Table icebergTable)
1244
1251
1245
1252
/**
1246
1253
* Get the data location for target {@link Table},
1247
- * considering iceberg table properties {@code WRITE_DATA_LOCATION}, {@code OBJECT_STORE_PATH} and {@code WRITE_FOLDER_STORAGE_LOCATION}
1248
- * * /
1254
+ * considering iceberg table properties {@code WRITE_DATA_LOCATION}, {@code OBJECT_STORE_PATH} and {@code WRITE_FOLDER_STORAGE_LOCATION}
1255
+ */
1249
1256
public static String dataLocation (Table icebergTable )
1250
1257
{
1251
1258
Map <String , String > properties = icebergTable .properties ();
@@ -1261,4 +1268,22 @@ public static String dataLocation(Table icebergTable)
1261
1268
}
1262
1269
return dataLocation ;
1263
1270
}
1271
+
1272
+ public static Long getSplitSize (Table table ) {
1273
+ return Long .parseLong (table .properties ()
1274
+ .getOrDefault (SPLIT_SIZE ,
1275
+ String .valueOf (SPLIT_SIZE_DEFAULT )));
1276
+ }
1277
+
1278
+ public static DataSize getTargetSplitSize (long sessionValueProperty , long icebergScanTargetSplitSize )
1279
+ {
1280
+ return Optional .of (DataSize .succinctBytes (sessionValueProperty ))
1281
+ .filter (size -> !size .equals (DataSize .succinctBytes (0 )))
1282
+ .orElse (DataSize .succinctBytes (icebergScanTargetSplitSize ));
1283
+ }
1284
+
1285
+ public static DataSize getTargetSplitSize (ConnectorSession session , Scan <?, ?, ?> scan )
1286
+ {
1287
+ return getTargetSplitSize (IcebergSessionProperties .getTargetSplitSize (session ), scan .targetSplitSize ());
1288
+ }
1264
1289
}
0 commit comments