49
49
import com .google .common .collect .ImmutableMap ;
50
50
import com .google .common .collect .ImmutableSet ;
51
51
import com .google .common .collect .Sets ;
52
+ import io .airlift .units .DataSize ;
52
53
import org .apache .iceberg .BaseTable ;
53
54
import org .apache .iceberg .ContentFile ;
54
55
import org .apache .iceberg .ContentScanTask ;
61
62
import org .apache .iceberg .PartitionField ;
62
63
import org .apache .iceberg .PartitionSpec ;
63
64
import org .apache .iceberg .RowLevelOperationMode ;
65
+ import org .apache .iceberg .Scan ;
64
66
import org .apache .iceberg .Schema ;
65
67
import org .apache .iceberg .Snapshot ;
66
68
import org .apache .iceberg .SortOrder ;
130
132
import static com .facebook .presto .iceberg .ExpressionConverter .toIcebergExpression ;
131
133
import static com .facebook .presto .iceberg .FileContent .POSITION_DELETES ;
132
134
import static com .facebook .presto .iceberg .FileContent .fromIcebergFileContent ;
133
- import static com .facebook .presto .iceberg .FileFormat .PARQUET ;
134
135
import static com .facebook .presto .iceberg .IcebergColumnHandle .DATA_SEQUENCE_NUMBER_COLUMN_HANDLE ;
135
136
import static com .facebook .presto .iceberg .IcebergColumnHandle .PATH_COLUMN_HANDLE ;
136
137
import static com .facebook .presto .iceberg .IcebergErrorCode .ICEBERG_INVALID_FORMAT_VERSION ;
196
197
import static org .apache .iceberg .TableProperties .METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT ;
197
198
import static org .apache .iceberg .TableProperties .ORC_COMPRESSION ;
198
199
import static org .apache .iceberg .TableProperties .PARQUET_COMPRESSION ;
200
+ import static org .apache .iceberg .TableProperties .SPLIT_SIZE ;
201
+ import static org .apache .iceberg .TableProperties .SPLIT_SIZE_DEFAULT ;
199
202
import static org .apache .iceberg .TableProperties .UPDATE_MODE ;
200
203
import static org .apache .iceberg .TableProperties .WRITE_LOCATION_PROVIDER_IMPL ;
201
204
import static org .apache .iceberg .types .Type .TypeID .BINARY ;
@@ -856,10 +859,10 @@ public static long getDataSequenceNumber(ContentFile<?> file)
856
859
* @param requestedSchema If provided, only delete files with this schema will be provided
857
860
*/
858
861
public static CloseableIterable <DeleteFile > getDeleteFiles (Table table ,
859
- long snapshot ,
860
- TupleDomain <IcebergColumnHandle > filter ,
861
- Optional <Set <Integer >> requestedPartitionSpec ,
862
- Optional <Set <Integer >> requestedSchema )
862
+ long snapshot ,
863
+ TupleDomain <IcebergColumnHandle > filter ,
864
+ Optional <Set <Integer >> requestedPartitionSpec ,
865
+ Optional <Set <Integer >> requestedSchema )
863
866
{
864
867
Expression filterExpression = toIcebergExpression (filter );
865
868
CloseableIterable <FileScanTask > fileTasks = table .newScan ().useSnapshot (snapshot ).filter (filterExpression ).planFiles ();
@@ -1035,9 +1038,9 @@ private static class DeleteFilesIterator
1035
1038
private DeleteFile currentFile ;
1036
1039
1037
1040
private DeleteFilesIterator (Map <Integer , PartitionSpec > partitionSpecsById ,
1038
- CloseableIterator <FileScanTask > fileTasks ,
1039
- Optional <Set <Integer >> requestedPartitionSpec ,
1040
- Optional <Set <Integer >> requestedSchema )
1041
+ CloseableIterator <FileScanTask > fileTasks ,
1042
+ Optional <Set <Integer >> requestedPartitionSpec ,
1043
+ Optional <Set <Integer >> requestedSchema )
1041
1044
{
1042
1045
this .partitionSpecsById = partitionSpecsById ;
1043
1046
this .fileTasks = fileTasks ;
@@ -1151,6 +1154,9 @@ public static Map<String, String> populateTableProperties(ConnectorTableMetadata
1151
1154
1152
1155
Integer metricsMaxInferredColumn = IcebergTableProperties .getMetricsMaxInferredColumn (tableMetadata .getProperties ());
1153
1156
propertiesBuilder .put (METRICS_MAX_INFERRED_COLUMN_DEFAULTS , String .valueOf (metricsMaxInferredColumn ));
1157
+
1158
+ propertiesBuilder .put (SPLIT_SIZE , String .valueOf (IcebergTableProperties .getTargetSplitSize (tableMetadata .getProperties ())));
1159
+
1154
1160
return propertiesBuilder .build ();
1155
1161
}
1156
1162
@@ -1221,8 +1227,8 @@ public static Optional<PartitionData> partitionDataFromStructLike(PartitionSpec
1221
1227
1222
1228
/**
1223
1229
* Get the metadata location for target {@link Table},
1224
- * considering iceberg table properties {@code WRITE_METADATA_LOCATION}
1225
- * * /
1230
+ * considering iceberg table properties {@code WRITE_METADATA_LOCATION}
1231
+ */
1226
1232
public static String metadataLocation (Table icebergTable )
1227
1233
{
1228
1234
String metadataLocation = icebergTable .properties ().get (TableProperties .WRITE_METADATA_LOCATION );
@@ -1237,8 +1243,8 @@ public static String metadataLocation(Table icebergTable)
1237
1243
1238
1244
/**
1239
1245
* Get the data location for target {@link Table},
1240
- * considering iceberg table properties {@code WRITE_DATA_LOCATION}, {@code OBJECT_STORE_PATH} and {@code WRITE_FOLDER_STORAGE_LOCATION}
1241
- * * /
1246
+ * considering iceberg table properties {@code WRITE_DATA_LOCATION}, {@code OBJECT_STORE_PATH} and {@code WRITE_FOLDER_STORAGE_LOCATION}
1247
+ */
1242
1248
public static String dataLocation (Table icebergTable )
1243
1249
{
1244
1250
Map <String , String > properties = icebergTable .properties ();
@@ -1254,4 +1260,23 @@ public static String dataLocation(Table icebergTable)
1254
1260
}
1255
1261
return dataLocation ;
1256
1262
}
1263
+
1264
+ public static Long getSplitSize (Table table )
1265
+ {
1266
+ return Long .parseLong (table .properties ()
1267
+ .getOrDefault (SPLIT_SIZE ,
1268
+ String .valueOf (SPLIT_SIZE_DEFAULT )));
1269
+ }
1270
+
1271
+ public static DataSize getTargetSplitSize (long sessionValueProperty , long icebergScanTargetSplitSize )
1272
+ {
1273
+ return Optional .of (DataSize .succinctBytes (sessionValueProperty ))
1274
+ .filter (size -> !size .equals (DataSize .succinctBytes (0 )))
1275
+ .orElse (DataSize .succinctBytes (icebergScanTargetSplitSize ));
1276
+ }
1277
+
1278
+ public static DataSize getTargetSplitSize (ConnectorSession session , Scan <?, ?, ?> scan )
1279
+ {
1280
+ return getTargetSplitSize (IcebergSessionProperties .getTargetSplitSize (session ), scan .targetSplitSize ());
1281
+ }
1257
1282
}
0 commit comments