15
15
16
16
import com .facebook .presto .Session ;
17
17
import com .facebook .presto .common .QualifiedObjectName ;
18
+ import com .facebook .presto .common .RuntimeMetric ;
19
+ import com .facebook .presto .common .RuntimeStats ;
18
20
import com .facebook .presto .common .predicate .Domain ;
19
21
import com .facebook .presto .common .predicate .Range ;
20
22
import com .facebook .presto .common .predicate .TupleDomain ;
52
54
import com .facebook .presto .testing .MaterializedResult ;
53
55
import com .facebook .presto .testing .QueryRunner ;
54
56
import com .facebook .presto .tests .AbstractTestQueryFramework ;
57
+ import com .facebook .presto .tests .DistributedQueryRunner ;
55
58
import com .google .common .collect .ImmutableList ;
56
59
import com .google .common .collect .ImmutableMap ;
57
60
import com .google .common .collect .ImmutableSet ;
75
78
import java .util .function .Function ;
76
79
import java .util .stream .Collectors ;
77
80
81
+ import static com .facebook .presto .SystemSessionProperties .OPTIMIZER_USE_HISTOGRAMS ;
78
82
import static com .facebook .presto .common .type .DoubleType .DOUBLE ;
79
83
import static com .facebook .presto .hive .BaseHiveColumnHandle .ColumnType .PARTITION_KEY ;
80
84
import static com .facebook .presto .hive .BaseHiveColumnHandle .ColumnType .REGULAR ;
85
89
import static com .facebook .presto .iceberg .IcebergQueryRunner .createIcebergQueryRunner ;
86
90
import static com .facebook .presto .iceberg .IcebergSessionProperties .HIVE_METASTORE_STATISTICS_MERGE_STRATEGY ;
87
91
import static com .facebook .presto .iceberg .IcebergSessionProperties .PUSHDOWN_FILTER_ENABLED ;
92
+ import static com .facebook .presto .iceberg .IcebergSessionProperties .STATISTICS_KLL_SKETCH_K_PARAMETER ;
93
+ import static com .facebook .presto .iceberg .statistics .KllHistogram .isKllHistogramSupportedType ;
88
94
import static com .facebook .presto .spi .StandardErrorCode .NOT_SUPPORTED ;
89
95
import static com .facebook .presto .spi .statistics .ColumnStatisticType .NUMBER_OF_DISTINCT_VALUES ;
90
96
import static com .facebook .presto .spi .statistics .ColumnStatisticType .TOTAL_SIZE_IN_BYTES ;
@@ -402,6 +408,41 @@ public void testPredicateOnlyColumnInStatisticsOutput(boolean pushdownFilterEnab
402
408
}
403
409
}
404
410
411
+ @ Test
412
+ public void testStatisticsCachePartialEviction ()
413
+ throws Exception
414
+ {
415
+ try (DistributedQueryRunner queryRunner = createIcebergQueryRunner (ImmutableMap .of (), ImmutableMap .of ("iceberg.max-statistics-file-cache-size" , "1024B" ))) {
416
+ Session session = Session .builder (queryRunner .getDefaultSession ())
417
+ // set histograms enabled
418
+ .setSystemProperty (OPTIMIZER_USE_HISTOGRAMS , "true" )
419
+ .setCatalogSessionProperty ("iceberg" , STATISTICS_KLL_SKETCH_K_PARAMETER , "32768" )
420
+ .build ();
421
+
422
+ queryRunner .execute (session , "ANALYZE lineitem" );
423
+ // get table statistics, to populate some of the cache
424
+ TableStatistics statistics = getTableStatistics (queryRunner , session , "lineitem" );
425
+ RuntimeStats runtimeStats = session .getRuntimeStats ();
426
+ runtimeStats .getMetrics ().keySet ().stream ().filter (name -> name .contains ("ColumnCount" )).findFirst ()
427
+ .ifPresent (stat -> assertEquals (32 , runtimeStats .getMetric (stat ).getSum ()));
428
+ runtimeStats .getMetrics ().keySet ().stream ().filter (name -> name .contains ("PuffinFileSize" )).findFirst ()
429
+ .ifPresent (stat -> assertTrue (runtimeStats .getMetric (stat ).getSum () > 1024 ));
430
+ // get them again to trigger retrieval of _some_ cached statistics
431
+ statistics = getTableStatistics (queryRunner , session , "lineitem" );
432
+ RuntimeMetric partialMiss = runtimeStats .getMetrics ().keySet ().stream ().filter (name -> name .contains ("PartialMiss" )).findFirst ()
433
+ .map (runtimeStats ::getMetric )
434
+ .orElseThrow (() -> new RuntimeException ("partial miss on statistics cache should have occurred" ));
435
+ assertTrue (partialMiss .getCount () > 0 );
436
+
437
+ statistics .getColumnStatistics ().forEach ((handle , stats ) -> {
438
+ assertFalse (stats .getDistinctValuesCount ().isUnknown ());
439
+ if (isKllHistogramSupportedType (((IcebergColumnHandle ) handle ).getType ())) {
440
+ assertTrue (stats .getHistogram ().isPresent ());
441
+ }
442
+ });
443
+ }
444
+ }
445
+
405
446
private TableStatistics getScanStatsEstimate (Session session , @ Language ("SQL" ) String sql )
406
447
{
407
448
Plan plan = plan (sql , session );
@@ -418,14 +459,19 @@ private TableStatistics getScanStatsEstimate(Session session, @Language("SQL") S
418
459
new Constraint <>(node .getCurrentConstraint ())));
419
460
}
420
461
421
- private TableStatistics getTableStatistics (Session session , String table )
462
+ private static TableStatistics getTableStatistics (QueryRunner queryRunner , Session session , String table )
422
463
{
423
- Metadata meta = getQueryRunner () .getMetadata ();
424
- TransactionId txid = getQueryRunner () .getTransactionManager ().beginTransaction (false );
425
- Session txnSession = session .beginTransactionId (txid , getQueryRunner () .getTransactionManager (), new AllowAllAccessControl ());
426
- Map <String , ColumnHandle > columnHandles = getColumnHandles (table , txnSession );
464
+ Metadata meta = queryRunner .getMetadata ();
465
+ TransactionId txid = queryRunner .getTransactionManager ().beginTransaction (false );
466
+ Session txnSession = session .beginTransactionId (txid , queryRunner .getTransactionManager (), new AllowAllAccessControl ());
467
+ Map <String , ColumnHandle > columnHandles = getColumnHandles (queryRunner , table , txnSession );
427
468
List <ColumnHandle > columnHandleList = new ArrayList <>(columnHandles .values ());
428
- return meta .getTableStatistics (txnSession , getAnalyzeTableHandle (table , txnSession ), columnHandleList , Constraint .alwaysTrue ());
469
+ return meta .getTableStatistics (txnSession , getAnalyzeTableHandle (queryRunner , table , txnSession ), columnHandleList , Constraint .alwaysTrue ());
470
+ }
471
+
472
+ private TableStatistics getTableStatistics (Session session , String table )
473
+ {
474
+ return getTableStatistics (getQueryRunner (), session , table );
429
475
}
430
476
431
477
private void columnStatsEqual (Map <ColumnHandle , ColumnStatistics > actualStats , Map <ColumnHandle , ColumnStatistics > expectedStats )
@@ -454,28 +500,38 @@ private static Constraint<ColumnHandle> constraintWithMinValue(ColumnHandle col,
454
500
ImmutableMap .of (col , Domain .create (ValueSet .ofRanges (Range .greaterThan (DOUBLE , min )), true ))));
455
501
}
456
502
457
- private TableHandle getAnalyzeTableHandle (String tableName , Session session )
503
+ private static TableHandle getAnalyzeTableHandle (QueryRunner queryRunner , String tableName , Session session )
458
504
{
459
- Metadata meta = getQueryRunner () .getMetadata ();
505
+ Metadata meta = queryRunner .getMetadata ();
460
506
return meta .getTableHandleForStatisticsCollection (
461
507
session ,
462
508
new QualifiedObjectName ("iceberg" , "tpch" , tableName .toLowerCase (Locale .US )),
463
509
Collections .emptyMap ()).get ();
464
510
}
465
511
466
- private TableHandle getTableHandle (String tableName , Session session )
512
+ private TableHandle getAnalyzeTableHandle (String tableName , Session session )
513
+ {
514
+ return getAnalyzeTableHandle (getQueryRunner (), tableName , session );
515
+ }
516
+
517
+ private static TableHandle getTableHandle (QueryRunner queryRunner , String tableName , Session session )
467
518
{
468
- MetadataResolver resolver = getQueryRunner () .getMetadata ().getMetadataResolver (session );
519
+ MetadataResolver resolver = queryRunner .getMetadata ().getMetadataResolver (session );
469
520
return resolver .getTableHandle (new QualifiedObjectName ("iceberg" , "tpch" , tableName .toLowerCase (Locale .US ))).get ();
470
521
}
471
522
472
- private Map <String , ColumnHandle > getColumnHandles (String tableName , Session session )
523
+ private static Map <String , ColumnHandle > getColumnHandles (QueryRunner queryRunner , String tableName , Session session )
473
524
{
474
- return getQueryRunner () .getMetadata ().getColumnHandles (session , getTableHandle (tableName , session )).entrySet ().stream ()
525
+ return queryRunner .getMetadata ().getColumnHandles (session , getTableHandle (queryRunner , tableName , session )).entrySet ().stream ()
475
526
.filter (entry -> !IcebergMetadataColumn .isMetadataColumnId (((IcebergColumnHandle ) (entry .getValue ())).getId ()))
476
527
.collect (Collectors .toMap (Map .Entry ::getKey , Map .Entry ::getValue ));
477
528
}
478
529
530
+ private Map <String , ColumnHandle > getColumnHandles (String tableName , Session session )
531
+ {
532
+ return getColumnHandles (getQueryRunner (), tableName , session );
533
+ }
534
+
479
535
static void assertStatValuePresent (StatsSchema column , MaterializedResult result , Set <String > columnNames )
480
536
{
481
537
assertStatValue (column , result , columnNames , null , true );
0 commit comments