From b5c9d4afd8b6dc6e4b68efbcf09d27931900ea32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josep=20Samp=C3=A9?= Date: Tue, 1 Oct 2024 11:48:34 +0200 Subject: [PATCH] Rebasing --- .github/workflows/test-artifact.yml | 2 +- CONTRIBUTING.md | 8 ++-- README.md | 6 +-- build.sbt | 43 ++++++++++++++++--- .../scala/io/qbeast/core/keeper/Keeper.scala | 0 .../io/qbeast/core/keeper/LocalKeeper.scala | 0 .../scala/io/qbeast/core/model/Block.scala | 0 .../core/model/BroadcastedTableChanges.scala | 4 +- .../io/qbeast/core/model/ColumnToIndex.scala | 0 .../core/model/ColumnToIndexUtils.scala | 0 .../core/model/ColumnsToIndexSelector.scala | 0 .../core/model/CubeDomainsBuilder.scala | 0 .../scala/io/qbeast/core/model/CubeId.scala | 0 .../core/model/CubeNormalizedWeights.scala | 0 .../io/qbeast/core}/model/CubeState.scala | 2 +- .../io/qbeast/core/model/DataWriter.scala | 0 .../qbeast/core/model/DenormalizedBlock.scala | 0 .../qbeast/core/model/IndexFileBuilder.scala | 0 .../io/qbeast/core/model/IndexManager.scala | 0 .../qbeast/core/model/MetadataManager.scala | 0 .../qbeast/core/model/NormalizedWeight.scala | 0 .../scala/io/qbeast/core/model/Point.scala | 0 .../core/model/PointWeightIndexer.scala | 0 .../io/qbeast/core/model/PreCommitHook.scala | 0 .../io/qbeast/core/model/QDataType.scala | 0 .../qbeast/core/model/QbeastCoreContext.scala | 0 .../io/qbeast/core/model/QbeastFiles.scala | 0 .../io/qbeast/core/model/QbeastSnapshot.scala | 0 .../io/qbeast/core/model/QbeastStats.scala | 0 .../io/qbeast/core/model/QuerySpace.scala | 0 .../qbeast/core/model/RevisionClasses.scala | 0 .../core/model/StagingDataManager.scala | 0 .../io/qbeast/core/model/StagingUtils.scala | 0 .../scala/io/qbeast/core/model/Weight.scala | 0 .../io/qbeast/core/model/WeightRange.scala | 0 .../scala/io/qbeast/core/model/package.scala | 0 .../CDFNumericQuantilesTransformation.scala | 0 .../CDFNumericQuantilesTransformer.scala | 0 .../CDFQuantilesTransformation.scala | 0 .../transform/CDFQuantilesTransformer.scala | 0 .../CDFStringQuantilesTransformation.scala | 0 .../CDFStringQuantilesTransformer.scala | 0 .../core/transform/EmptyTransformation.scala | 0 .../core/transform/EmptyTransformer.scala | 0 .../core/transform/HashTransformation.scala | 0 .../core/transform/HashTransformer.scala | 0 .../core/transform/LinearTransformation.scala | 0 .../core/transform/LinearTransformer.scala | 0 .../ManualPlaceholderTransformation.scala | 0 .../core/transform/Transformation.scala | 0 .../qbeast/core/transform/Transformer.scala | 0 .../src}/main/scala/io/qbeast/package.scala | 0 .../io/qbeast/spark/implicits/package.scala | 0 .../qbeast/spark/index/ColumnsToIndex.scala | 0 .../qbeast/spark/index/EmptyFileIndex.scala | 2 +- .../spark/index/IndexStatusBuilder.scala | 0 .../spark/index/OTreeDataAnalyzer.scala | 0 .../io/qbeast/spark/index/QbeastColumns.scala | 0 .../io/qbeast/spark/index/RowUtils.scala | 0 .../index/SparkColumnsToIndexSelector.scala | 0 .../spark/index/SparkOTreeManager.scala | 0 .../spark/index/SparkPointWeightIndexer.scala | 0 .../spark/index/SparkRevisionFactory.scala | 0 .../scala/io/qbeast/spark/index/package.scala | 0 .../spark/index/query/QbeastFilters.scala | 0 .../spark/index/query/QueryExecutor.scala | 0 .../spark/index/query/QueryFiltersUtils.scala | 0 .../qbeast/spark/index/query/QuerySpec.scala | 0 .../spark/index/query/QuerySpecBuilder.scala | 0 .../spark/internal/QbeastFunctions.scala | 0 .../qbeast/spark/internal/QbeastOptions.scala | 34 ++++++++++++++- .../expressions/QbeastMurmur3Hash.scala | 0 .../io/qbeast/spark/utils/IndexMetrics.scala | 0 .../scala/io/qbeast/spark/utils/Params.scala | 0 .../spark/utils/QbeastExceptionMessages.scala | 0 .../spark/utils/SparkToQTypesUtils.scala | 0 .../io/qbeast/spark/writer/BlockStats.scala | 0 .../io/qbeast/spark/writer/BlockWriter.scala | 2 +- .../qbeast/spark/writer/IndexFileWriter.scala | 0 .../spark/writer/IndexFileWriterFactory.scala | 0 .../scala/io/qbeast/spark/writer/Rollup.scala | 0 .../spark/writer/RollupDataWriter.scala | 0 .../io/qbeast/spark/writer/StatsTracker.scala | 0 .../org/apache/spark/qbeast/config.scala | 26 ++++++++--- .../spark/sql/AnalysisExceptionFactory.scala | 0 .../org/apache/spark/sql/DataFrameUtils.scala | 0 .../org/apache/spark/sql/DatasetFactory.scala | 0 .../org/apache/spark/sql/SchemaUtils.scala | 0 .../apache/spark/sql/SparkCatalogUtils.scala | 0 .../org/apache/spark/sql/V2AndV1Traits.scala | 0 .../catalog/SparkCatalogV2Util.scala | 0 .../qbeast/core/model/CubeKeyFactoryJava.java | 0 .../spark/model/CubeKeyFactoryJava.java | 0 .../io/qbeast/core/model/BlockTest.scala | 0 .../qbeast/core/model/ColumnToIndexTest.scala | 0 .../core/model/ColumnToIndexUtilsTest.scala | 0 .../core/model/CreateCubeKeySpeedTest.scala | 0 .../qbeast/core/model/CreateCubeKeyTest.scala | 0 .../core/model/CubeDomainsBuilderTest.scala | 0 .../qbeast/core/model/CubeIdSpeedTest.scala | 0 .../io/qbeast/core/model/CubeIdTest.scala | 0 .../io/qbeast/core/model/CubeKeyFactory.scala | 0 .../model/CubeNormalizedWeightsTest.scala | 0 .../model/EstimateGroupCubeSizeTest.scala | 0 .../io/qbeast/core/model/IndexFileTest.scala | 0 .../core/model/JSONSerializationTests.scala | 0 .../io/qbeast/core/model/PointTest.scala | 0 .../core/model/PointWeightIndexerTest.scala | 0 .../io/qbeast/core/model/QTableIDTest.scala | 0 .../core/model/QbeastHookLoaderTest.scala | 0 .../core/model/QuerySpaceFromToTest.scala | 0 .../io/qbeast/core/model/RevisionTest.scala | 0 .../core/model/WeightAndCountTest.scala | 0 ...DFNumericQuantilesTransformationTest.scala | 0 .../CDFQuantilesTransformerTest.scala | 0 ...CDFStringQuantilesTransformationTest.scala | 0 .../transform/EmptyTransformationTest.scala | 0 .../transform/HashTransformationTest.scala | 0 .../transform/LinearTransformationTest.scala | 0 .../core/transform/TransformerTest.scala | 0 .../qbeast/spark/delta/DefaultFileIndex.scala | 0 .../delta/DefaultListFilesStrategy.scala | 0 .../spark/delta/DeltaMetadataManager.scala | 0 .../spark/delta/DeltaMetadataWriter.scala | 0 .../spark/delta/DeltaQbeastFileUtils.scala | 0 .../spark/delta/DeltaQbeastSnapshot.scala | 0 .../spark/delta/DeltaQbeastStatsUtils.scala | 0 .../spark/delta/DeltaRollupDataWriter.scala | 0 .../spark/delta/DeltaStagingDataManager.scala | 10 +---- .../spark/delta/DeltaStagingUtils.scala | 0 .../spark/delta/ListFilesStrategy.scala | 0 .../spark/delta/QbeastMetadataOperation.scala | 0 .../delta/SamplingListFilesStrategy.scala | 0 .../sql/delta/DeltaStatsCollectionUtils.scala | 3 +- docs/AdvancedConfiguration.md | 10 ++--- docs/CloudStorages.md | 6 +-- docs/QbeastTable.md | 2 +- docs/Quickstart.md | 26 +++++------ docs/sample_pushdown_demo.ipynb | 2 +- ...pache.spark.sql.sources.DataSourceRegister | 2 +- .../catalog/DefaultStagedTable.scala | 2 +- .../sources => }/catalog/QbeastCatalog.scala | 22 +++++----- .../catalog/QbeastCatalogUtils.scala | 13 +++--- .../catalog/TableCreationMode.scala | 2 +- .../io/qbeast/context/QbeastContext.scala | 4 +- .../commands/AlterQbeastTableCommands.scala | 4 +- .../commands/ConvertToQbeastCommand.scala | 15 +------ .../commands/OptimizeTableCommand.scala | 4 +- .../internal/rules/QbeastAnalysis.scala | 7 +-- .../internal/rules/QbeastAnalysisUtils.scala | 4 +- .../internal/rules/SampleRule.scala | 2 +- .../internal/rules/SaveAsTableRule.scala | 4 +- .../sources/QbeastBaseRelation.scala | 4 +- .../sources/QbeastDataSource.scala | 26 +++++------ .../sources/v2/QbeastStagedTableImpl.scala | 14 +++--- .../sources/v2/QbeastTableImpl.scala | 6 +-- .../sources/v2/QbeastWriteBuilder.scala | 6 +-- .../QbeastSparkSessionExtension.scala} | 10 ++--- .../{spark => }/table/IndexedTable.scala | 12 +++++- .../qbeast/{spark => table}/QbeastTable.scala | 0 .../{spark => }/utils/QbeastUtils.scala | 2 +- .../QbeastIntegrationTestSpec.scala | 10 ++--- src/test/scala/io/qbeast/TestUtils.scala | 1 - .../catalog/CatalogTestSuite.scala | 10 ++--- .../catalog/DefaultStagedTableTest.scala | 10 ++--- .../QbeastCatalogIntegrationTest.scala | 14 +++--- .../catalog/QbeastCatalogTest.scala | 15 ++++--- .../io/qbeast/context/QbeastConfigTest.scala | 2 +- .../io/qbeast/context/QbeastContextTest.scala | 4 +- .../core/model/DenormalizedBlockTest.scala | 2 +- .../core/model/NormalizedWeightTest.scala | 2 +- .../io/qbeast/core/model/WeightTest.scala | 2 +- .../sources/QbeastBaseRelationTest.scala | 8 ++-- .../sources/QbeastDataSourceTest.scala | 6 +-- .../sources/QbeastOptionsTest.scala | 4 +- .../sources/QbeastStagedTableTest.scala | 15 ++++--- .../sources/QbeastTableImplTest.scala | 16 +++---- .../spark/delta/DefaultFileIndexTest.scala | 2 +- .../delta/DeltaRollupDataWriterTest.scala | 2 +- .../delta/QbeastDeltaIntegrationTest.scala | 2 +- .../spark/delta/QbeastFileUtilsTest.scala | 2 +- .../spark/delta/QbeastSparkTxnTest.scala | 2 +- .../spark/delta/keeper/ProtocolMock.scala | 2 +- .../{ => spark}/docs/DocumentationTests.scala | 4 +- .../index/CubeDomainsIntegrationTest.scala | 2 +- .../qbeast/spark/index/DataStagingTest.scala | 12 +++--- .../DoublePassOTreeDataAnalyzerTest.scala | 2 +- .../spark/index/IndexStatusBuilderTest.scala | 2 +- .../io/qbeast/spark/index/IndexTest.scala | 2 +- .../qbeast/spark/index/NewRevisionTest.scala | 2 +- .../NormalizedWeightIntegrationTest.scala | 2 +- .../spark/index/OTreeAlgorithmTest.scala | 2 +- .../io/qbeast/spark/index/RevisionTest.scala | 2 +- .../SparkColumnsToIndexSelectorTest.scala | 2 +- .../index/SparkRevisionFactoryTest.scala | 2 +- .../transformer/TransformerIndexingTest.scala | 4 +- .../query/DisjunctiveQuerySpecTest.scala | 2 +- .../spark/index/query/QueryExecutorTest.scala | 4 +- .../index/query/QuerySpecBuilderTest.scala | 2 +- .../index/query/TimeSeriesQueryTest.scala | 2 +- .../qbeast/spark/utils/IndexMetricsTest.scala | 2 +- .../PreCommitHookIntegrationTest.scala | 4 +- .../utils/QbeastCreateTableSQLTest.scala | 4 +- .../spark/utils/QbeastDeltaStagingTest.scala | 4 +- .../utils/QbeastFilterPushdownTest.scala | 2 +- .../spark/utils/QbeastInsertToTest.scala | 2 +- .../utils/QbeastSQLIntegrationTest.scala | 4 +- .../qbeast/spark/utils/QbeastSchemaTest.scala | 2 +- .../utils}/QbeastSnapshotTest.scala | 7 ++- .../utils/QbeastSparkCorrectnessTest.scala | 2 +- .../utils/QbeastSparkIntegrationTest.scala | 4 +- .../io/qbeast/spark/utils/SizeStatsTest.scala | 2 +- .../qbeast/spark/writer/BlockWriterTest.scala | 2 +- .../{spark => }/table/HasConflictsTest.scala | 0 .../utils => table}/QbeastTableTest.scala | 9 ++-- .../{spark => }/utils/QbeastUtilsTest.scala | 4 +- 216 files changed, 325 insertions(+), 265 deletions(-) rename {src => core/src}/main/scala/io/qbeast/core/keeper/Keeper.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/keeper/LocalKeeper.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/Block.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/BroadcastedTableChanges.scala (97%) rename {src => core/src}/main/scala/io/qbeast/core/model/ColumnToIndex.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/ColumnToIndexUtils.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/ColumnsToIndexSelector.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/CubeDomainsBuilder.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/CubeId.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/CubeNormalizedWeights.scala (100%) rename {src/main/scala/io/qbeast/spark => core/src/main/scala/io/qbeast/core}/model/CubeState.scala (96%) rename {src => core/src}/main/scala/io/qbeast/core/model/DataWriter.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/DenormalizedBlock.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/IndexFileBuilder.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/IndexManager.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/MetadataManager.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/NormalizedWeight.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/Point.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/PointWeightIndexer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/PreCommitHook.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/QDataType.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/QbeastCoreContext.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/QbeastFiles.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/QbeastSnapshot.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/QbeastStats.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/QuerySpace.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/RevisionClasses.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/StagingDataManager.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/StagingUtils.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/Weight.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/WeightRange.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/model/package.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/CDFQuantilesTransformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/CDFQuantilesTransformer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/EmptyTransformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/EmptyTransformer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/HashTransformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/HashTransformer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/LinearTransformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/LinearTransformer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/ManualPlaceholderTransformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/Transformation.scala (100%) rename {src => core/src}/main/scala/io/qbeast/core/transform/Transformer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/package.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/implicits/package.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/ColumnsToIndex.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/EmptyFileIndex.scala (95%) rename {src => core/src}/main/scala/io/qbeast/spark/index/IndexStatusBuilder.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/OTreeDataAnalyzer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/QbeastColumns.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/RowUtils.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/SparkColumnsToIndexSelector.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/SparkOTreeManager.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/SparkPointWeightIndexer.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/SparkRevisionFactory.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/package.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/query/QbeastFilters.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/query/QueryExecutor.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/query/QueryFiltersUtils.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/query/QuerySpec.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/index/query/QuerySpecBuilder.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/internal/QbeastFunctions.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/internal/QbeastOptions.scala (93%) rename {src => core/src}/main/scala/io/qbeast/spark/internal/expressions/QbeastMurmur3Hash.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/utils/IndexMetrics.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/utils/Params.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/utils/QbeastExceptionMessages.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/utils/SparkToQTypesUtils.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/writer/BlockStats.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/writer/BlockWriter.scala (99%) rename {src => core/src}/main/scala/io/qbeast/spark/writer/IndexFileWriter.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/writer/IndexFileWriterFactory.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/writer/Rollup.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/writer/RollupDataWriter.scala (100%) rename {src => core/src}/main/scala/io/qbeast/spark/writer/StatsTracker.scala (100%) rename {src => core/src}/main/scala/org/apache/spark/qbeast/config.scala (70%) rename {src => core/src}/main/scala/org/apache/spark/sql/AnalysisExceptionFactory.scala (100%) rename {src => core/src}/main/scala/org/apache/spark/sql/DataFrameUtils.scala (100%) rename {src => core/src}/main/scala/org/apache/spark/sql/DatasetFactory.scala (100%) rename {src => core/src}/main/scala/org/apache/spark/sql/SchemaUtils.scala (100%) rename {src => core/src}/main/scala/org/apache/spark/sql/SparkCatalogUtils.scala (100%) rename {src => core/src}/main/scala/org/apache/spark/sql/V2AndV1Traits.scala (100%) rename {src => core/src}/main/scala/org/apache/spark/sql/connector/catalog/SparkCatalogV2Util.scala (100%) rename {src => core/src}/test/java/io/qbeast/core/model/CubeKeyFactoryJava.java (100%) rename {src => core/src}/test/java/io/qbeast/spark/model/CubeKeyFactoryJava.java (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/BlockTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/ColumnToIndexTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/ColumnToIndexUtilsTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/CreateCubeKeySpeedTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/CreateCubeKeyTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/CubeDomainsBuilderTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/CubeIdSpeedTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/CubeIdTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/CubeKeyFactory.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/CubeNormalizedWeightsTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/EstimateGroupCubeSizeTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/IndexFileTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/JSONSerializationTests.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/PointTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/PointWeightIndexerTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/QTableIDTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/QbeastHookLoaderTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/QuerySpaceFromToTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/RevisionTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/model/WeightAndCountTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformationTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/transform/CDFQuantilesTransformerTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/transform/CDFStringQuantilesTransformationTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/transform/EmptyTransformationTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/transform/HashTransformationTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/transform/LinearTransformationTest.scala (100%) rename {src => core/src}/test/scala/io/qbeast/core/transform/TransformerTest.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DefaultFileIndex.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DefaultListFilesStrategy.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaMetadataManager.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaMetadataWriter.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaQbeastFileUtils.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaQbeastSnapshot.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaQbeastStatsUtils.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaRollupDataWriter.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaStagingDataManager.scala (92%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/DeltaStagingUtils.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/ListFilesStrategy.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/QbeastMetadataOperation.scala (100%) rename {src => delta/src}/main/scala/io/qbeast/spark/delta/SamplingListFilesStrategy.scala (100%) rename {src => delta/src}/main/scala/org/apache/spark/sql/delta/DeltaStatsCollectionUtils.scala (98%) rename src/main/scala/io/qbeast/{spark/internal/sources => }/catalog/DefaultStagedTable.scala (98%) rename src/main/scala/io/qbeast/{spark/internal/sources => }/catalog/QbeastCatalog.scala (95%) rename src/main/scala/io/qbeast/{spark/internal/sources => }/catalog/QbeastCatalogUtils.scala (97%) rename src/main/scala/io/qbeast/{spark/internal/sources => }/catalog/TableCreationMode.scala (96%) rename src/main/scala/io/qbeast/{spark => }/internal/commands/AlterQbeastTableCommands.scala (96%) rename src/main/scala/io/qbeast/{spark => }/internal/commands/ConvertToQbeastCommand.scala (86%) rename src/main/scala/io/qbeast/{spark => }/internal/commands/OptimizeTableCommand.scala (93%) rename src/main/scala/io/qbeast/{spark => }/internal/rules/QbeastAnalysis.scala (94%) rename src/main/scala/io/qbeast/{spark => }/internal/rules/QbeastAnalysisUtils.scala (99%) rename src/main/scala/io/qbeast/{spark => }/internal/rules/SampleRule.scala (99%) rename src/main/scala/io/qbeast/{spark => }/internal/rules/SaveAsTableRule.scala (95%) rename src/main/scala/io/qbeast/{spark/internal => }/sources/QbeastBaseRelation.scala (98%) rename src/main/scala/io/qbeast/{spark/internal => }/sources/QbeastDataSource.scala (87%) rename src/main/scala/io/qbeast/{spark/internal => }/sources/v2/QbeastStagedTableImpl.scala (92%) rename src/main/scala/io/qbeast/{spark/internal => }/sources/v2/QbeastTableImpl.scala (96%) rename src/main/scala/io/qbeast/{spark/internal => }/sources/v2/QbeastWriteBuilder.scala (95%) rename src/main/scala/io/qbeast/{spark/delta/QbeastDeltaSparkSessionExtension.scala => sql/QbeastSparkSessionExtension.scala} (81%) rename src/main/scala/io/qbeast/{spark => }/table/IndexedTable.scala (97%) rename src/main/scala/io/qbeast/{spark => table}/QbeastTable.scala (100%) rename src/main/scala/io/qbeast/{spark => }/utils/QbeastUtils.scala (99%) rename src/test/scala/io/qbeast/{spark => }/QbeastIntegrationTestSpec.scala (95%) rename src/test/scala/io/qbeast/{spark/internal/sources => }/catalog/CatalogTestSuite.scala (92%) rename src/test/scala/io/qbeast/{spark/internal/sources => }/catalog/DefaultStagedTableTest.scala (95%) rename src/test/scala/io/qbeast/{spark/internal/sources => }/catalog/QbeastCatalogIntegrationTest.scala (95%) rename src/test/scala/io/qbeast/{spark/internal/sources => }/catalog/QbeastCatalogTest.scala (97%) rename src/test/scala/io/qbeast/{spark/internal => }/sources/QbeastBaseRelationTest.scala (96%) rename src/test/scala/io/qbeast/{spark/internal => }/sources/QbeastDataSourceTest.scala (98%) rename src/test/scala/io/qbeast/{spark/internal => }/sources/QbeastOptionsTest.scala (98%) rename src/test/scala/io/qbeast/{spark/internal => }/sources/QbeastStagedTableTest.scala (93%) rename src/test/scala/io/qbeast/{spark/internal => }/sources/QbeastTableImplTest.scala (89%) rename src/test/scala/io/qbeast/{ => spark}/docs/DocumentationTests.scala (98%) rename src/test/scala/io/qbeast/spark/{delta => utils}/PreCommitHookIntegrationTest.scala (97%) rename src/test/scala/io/qbeast/{core/model => spark/utils}/QbeastSnapshotTest.scala (98%) rename src/test/scala/io/qbeast/{spark => }/table/HasConflictsTest.scala (100%) rename src/test/scala/io/qbeast/{spark/utils => table}/QbeastTableTest.scala (97%) rename src/test/scala/io/qbeast/{spark => }/utils/QbeastUtilsTest.scala (96%) diff --git a/.github/workflows/test-artifact.yml b/.github/workflows/test-artifact.yml index 6e2f83c19..36ffa3822 100644 --- a/.github/workflows/test-artifact.yml +++ b/.github/workflows/test-artifact.yml @@ -31,7 +31,7 @@ jobs: run: sbt "scalafixAll --check" - name: Test run: | - sbt coverage 'test' coverageReport + sbt coverage 'qbeastCore/test' 'qbeastDelta/test' 'qbeastSpark/test' coverageReport - name: Upload to Codecov run: | curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import # One-time step diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dd911c07c..7952704e2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -193,8 +193,8 @@ sbt assembly $SPARK_HOME/bin/spark-shell \ --jars ./target/scala-2.12/qbeast-spark-assembly-0.6.0.jar \ --packages io.delta:delta-spark_2.12:3.1.0 \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog ``` ### 4. Publishing artefacts in the local repository @@ -280,8 +280,8 @@ To publish a new version of the qbeast-spark project, follow these steps: export QBEAST_SPARK_VERSION=0.6.0-SNAPSHOT $SPARK_350/bin/spark-shell --repositories https://s01.oss.sonatype.org/content/repositories/snapshots \ --packages io.delta:delta-spark_2.12:3.1.0,io.qbeast:qbeast-spark_2.12:$QBEAST_SPARK_VERSION \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog ``` 6. If everything is ok, change the `build.sbt` with the corresponding version and publish the RC. diff --git a/README.md b/README.md index 17d198b43..1303ef128 100644 --- a/README.md +++ b/README.md @@ -89,8 +89,8 @@ export SPARK_HOME=$PWD/spark-3.5.0-bin-hadoop3 ```bash $SPARK_HOME/bin/spark-shell \ --packages io.qbeast:qbeast-spark_2.12:0.7.0,io.delta:delta-spark_2.12:3.1.0 \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog ``` ### 2. Indexing a dataset @@ -173,7 +173,7 @@ Go to the [Quickstart](./docs/Quickstart.md) or [notebook](docs/sample_pushdown_ Get **insights** to the data using the `QbeastTable` interface! ```scala -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable val qbeastTable = QbeastTable.forPath(spark, tmpDir) diff --git a/build.sbt b/build.sbt index 9329f1ff1..6dbbf0963 100644 --- a/build.sbt +++ b/build.sbt @@ -3,26 +3,49 @@ import Dependencies._ val mainVersion = "0.8.0-SNAPSHOT" -// Projects +lazy val qbeastCore = (project in file("./core")) + .settings( + name := "qbeast-core", + libraryDependencies ++= Seq(sparkCore % Provided, sparkSql % Provided, sparkml % Provided), + Test / parallelExecution := false, + assembly / test := {}, + assembly / assemblyOption := (assembly / assemblyOption).value.copy(includeScala = false)) + .settings(noWarningInConsole) + +lazy val qbeastDelta = (project in file("./delta")) + .dependsOn(qbeastCore) + .settings( + name := "qbeast-delta", + libraryDependencies ++= Seq(sparkCore % Provided, deltaSpark % Provided, sparkSql % Provided), + Test / parallelExecution := false, + assembly / test := {}, + assembly / assemblyOption := (assembly / assemblyOption).value.copy(includeScala = false)) + .settings(noWarningInConsole) + lazy val qbeastSpark = (project in file(".")) + .dependsOn(qbeastCore, qbeastDelta) .enablePlugins(ScalaUnidocPlugin) .settings( name := "qbeast-spark", libraryDependencies ++= Seq( sparkCore % Provided, sparkSql % Provided, - hadoopClient % Provided, deltaSpark % Provided, - sparkml % Provided, - apacheCommons % Test, - amazonAws % Test, - hadoopCommons % Test, + sparkml % Test, hadoopAws % Test), Test / parallelExecution := false, assembly / test := {}, assembly / assemblyOption := (assembly / assemblyOption).value.copy(includeScala = false)) .settings(noWarningInConsole) +qbeastCore / Compile / doc / scalacOptions ++= Seq( + "-doc-title", + "qbeast-core", + "-doc-version", + mainVersion, + "-doc-footer", + "Copyright 2022 Qbeast - Docs for version " + mainVersion + " of qbeast-core") + qbeastSpark / Compile / doc / scalacOptions ++= Seq( "-doc-title", "qbeast-spark", @@ -31,6 +54,14 @@ qbeastSpark / Compile / doc / scalacOptions ++= Seq( "-doc-footer", "Copyright 2022 Qbeast - Docs for version " + mainVersion + " of qbeast-spark") +qbeastDelta / Compile / doc / scalacOptions ++= Seq( + "-doc-title", + "qbeast-delta", + "-doc-version", + mainVersion, + "-doc-footer", + "Copyright 2022 Qbeast - Docs for version " + mainVersion + " of qbeast-delta") + // Common metadata ThisBuild / version := mainVersion ThisBuild / organization := "io.qbeast" diff --git a/src/main/scala/io/qbeast/core/keeper/Keeper.scala b/core/src/main/scala/io/qbeast/core/keeper/Keeper.scala similarity index 100% rename from src/main/scala/io/qbeast/core/keeper/Keeper.scala rename to core/src/main/scala/io/qbeast/core/keeper/Keeper.scala diff --git a/src/main/scala/io/qbeast/core/keeper/LocalKeeper.scala b/core/src/main/scala/io/qbeast/core/keeper/LocalKeeper.scala similarity index 100% rename from src/main/scala/io/qbeast/core/keeper/LocalKeeper.scala rename to core/src/main/scala/io/qbeast/core/keeper/LocalKeeper.scala diff --git a/src/main/scala/io/qbeast/core/model/Block.scala b/core/src/main/scala/io/qbeast/core/model/Block.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/Block.scala rename to core/src/main/scala/io/qbeast/core/model/Block.scala diff --git a/src/main/scala/io/qbeast/core/model/BroadcastedTableChanges.scala b/core/src/main/scala/io/qbeast/core/model/BroadcastedTableChanges.scala similarity index 97% rename from src/main/scala/io/qbeast/core/model/BroadcastedTableChanges.scala rename to core/src/main/scala/io/qbeast/core/model/BroadcastedTableChanges.scala index 328f634fe..93e066212 100644 --- a/src/main/scala/io/qbeast/core/model/BroadcastedTableChanges.scala +++ b/core/src/main/scala/io/qbeast/core/model/BroadcastedTableChanges.scala @@ -15,11 +15,11 @@ */ package io.qbeast.core.model -import io.qbeast.spark.model.CubeState -import io.qbeast.spark.model.CubeState.CubeStateValue import org.apache.spark.broadcast.Broadcast import org.apache.spark.sql.SparkSession +import CubeState.CubeStateValue + /** * Container for the table changes */ diff --git a/src/main/scala/io/qbeast/core/model/ColumnToIndex.scala b/core/src/main/scala/io/qbeast/core/model/ColumnToIndex.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/ColumnToIndex.scala rename to core/src/main/scala/io/qbeast/core/model/ColumnToIndex.scala diff --git a/src/main/scala/io/qbeast/core/model/ColumnToIndexUtils.scala b/core/src/main/scala/io/qbeast/core/model/ColumnToIndexUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/ColumnToIndexUtils.scala rename to core/src/main/scala/io/qbeast/core/model/ColumnToIndexUtils.scala diff --git a/src/main/scala/io/qbeast/core/model/ColumnsToIndexSelector.scala b/core/src/main/scala/io/qbeast/core/model/ColumnsToIndexSelector.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/ColumnsToIndexSelector.scala rename to core/src/main/scala/io/qbeast/core/model/ColumnsToIndexSelector.scala diff --git a/src/main/scala/io/qbeast/core/model/CubeDomainsBuilder.scala b/core/src/main/scala/io/qbeast/core/model/CubeDomainsBuilder.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/CubeDomainsBuilder.scala rename to core/src/main/scala/io/qbeast/core/model/CubeDomainsBuilder.scala diff --git a/src/main/scala/io/qbeast/core/model/CubeId.scala b/core/src/main/scala/io/qbeast/core/model/CubeId.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/CubeId.scala rename to core/src/main/scala/io/qbeast/core/model/CubeId.scala diff --git a/src/main/scala/io/qbeast/core/model/CubeNormalizedWeights.scala b/core/src/main/scala/io/qbeast/core/model/CubeNormalizedWeights.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/CubeNormalizedWeights.scala rename to core/src/main/scala/io/qbeast/core/model/CubeNormalizedWeights.scala diff --git a/src/main/scala/io/qbeast/spark/model/CubeState.scala b/core/src/main/scala/io/qbeast/core/model/CubeState.scala similarity index 96% rename from src/main/scala/io/qbeast/spark/model/CubeState.scala rename to core/src/main/scala/io/qbeast/core/model/CubeState.scala index 225e1d9d4..02ed0bf48 100644 --- a/src/main/scala/io/qbeast/spark/model/CubeState.scala +++ b/core/src/main/scala/io/qbeast/core/model/CubeState.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.model +package io.qbeast.core.model /** * Names of possible states of the cube diff --git a/src/main/scala/io/qbeast/core/model/DataWriter.scala b/core/src/main/scala/io/qbeast/core/model/DataWriter.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/DataWriter.scala rename to core/src/main/scala/io/qbeast/core/model/DataWriter.scala diff --git a/src/main/scala/io/qbeast/core/model/DenormalizedBlock.scala b/core/src/main/scala/io/qbeast/core/model/DenormalizedBlock.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/DenormalizedBlock.scala rename to core/src/main/scala/io/qbeast/core/model/DenormalizedBlock.scala diff --git a/src/main/scala/io/qbeast/core/model/IndexFileBuilder.scala b/core/src/main/scala/io/qbeast/core/model/IndexFileBuilder.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/IndexFileBuilder.scala rename to core/src/main/scala/io/qbeast/core/model/IndexFileBuilder.scala diff --git a/src/main/scala/io/qbeast/core/model/IndexManager.scala b/core/src/main/scala/io/qbeast/core/model/IndexManager.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/IndexManager.scala rename to core/src/main/scala/io/qbeast/core/model/IndexManager.scala diff --git a/src/main/scala/io/qbeast/core/model/MetadataManager.scala b/core/src/main/scala/io/qbeast/core/model/MetadataManager.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/MetadataManager.scala rename to core/src/main/scala/io/qbeast/core/model/MetadataManager.scala diff --git a/src/main/scala/io/qbeast/core/model/NormalizedWeight.scala b/core/src/main/scala/io/qbeast/core/model/NormalizedWeight.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/NormalizedWeight.scala rename to core/src/main/scala/io/qbeast/core/model/NormalizedWeight.scala diff --git a/src/main/scala/io/qbeast/core/model/Point.scala b/core/src/main/scala/io/qbeast/core/model/Point.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/Point.scala rename to core/src/main/scala/io/qbeast/core/model/Point.scala diff --git a/src/main/scala/io/qbeast/core/model/PointWeightIndexer.scala b/core/src/main/scala/io/qbeast/core/model/PointWeightIndexer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/PointWeightIndexer.scala rename to core/src/main/scala/io/qbeast/core/model/PointWeightIndexer.scala diff --git a/src/main/scala/io/qbeast/core/model/PreCommitHook.scala b/core/src/main/scala/io/qbeast/core/model/PreCommitHook.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/PreCommitHook.scala rename to core/src/main/scala/io/qbeast/core/model/PreCommitHook.scala diff --git a/src/main/scala/io/qbeast/core/model/QDataType.scala b/core/src/main/scala/io/qbeast/core/model/QDataType.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/QDataType.scala rename to core/src/main/scala/io/qbeast/core/model/QDataType.scala diff --git a/src/main/scala/io/qbeast/core/model/QbeastCoreContext.scala b/core/src/main/scala/io/qbeast/core/model/QbeastCoreContext.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/QbeastCoreContext.scala rename to core/src/main/scala/io/qbeast/core/model/QbeastCoreContext.scala diff --git a/src/main/scala/io/qbeast/core/model/QbeastFiles.scala b/core/src/main/scala/io/qbeast/core/model/QbeastFiles.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/QbeastFiles.scala rename to core/src/main/scala/io/qbeast/core/model/QbeastFiles.scala diff --git a/src/main/scala/io/qbeast/core/model/QbeastSnapshot.scala b/core/src/main/scala/io/qbeast/core/model/QbeastSnapshot.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/QbeastSnapshot.scala rename to core/src/main/scala/io/qbeast/core/model/QbeastSnapshot.scala diff --git a/src/main/scala/io/qbeast/core/model/QbeastStats.scala b/core/src/main/scala/io/qbeast/core/model/QbeastStats.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/QbeastStats.scala rename to core/src/main/scala/io/qbeast/core/model/QbeastStats.scala diff --git a/src/main/scala/io/qbeast/core/model/QuerySpace.scala b/core/src/main/scala/io/qbeast/core/model/QuerySpace.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/QuerySpace.scala rename to core/src/main/scala/io/qbeast/core/model/QuerySpace.scala diff --git a/src/main/scala/io/qbeast/core/model/RevisionClasses.scala b/core/src/main/scala/io/qbeast/core/model/RevisionClasses.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/RevisionClasses.scala rename to core/src/main/scala/io/qbeast/core/model/RevisionClasses.scala diff --git a/src/main/scala/io/qbeast/core/model/StagingDataManager.scala b/core/src/main/scala/io/qbeast/core/model/StagingDataManager.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/StagingDataManager.scala rename to core/src/main/scala/io/qbeast/core/model/StagingDataManager.scala diff --git a/src/main/scala/io/qbeast/core/model/StagingUtils.scala b/core/src/main/scala/io/qbeast/core/model/StagingUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/StagingUtils.scala rename to core/src/main/scala/io/qbeast/core/model/StagingUtils.scala diff --git a/src/main/scala/io/qbeast/core/model/Weight.scala b/core/src/main/scala/io/qbeast/core/model/Weight.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/Weight.scala rename to core/src/main/scala/io/qbeast/core/model/Weight.scala diff --git a/src/main/scala/io/qbeast/core/model/WeightRange.scala b/core/src/main/scala/io/qbeast/core/model/WeightRange.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/WeightRange.scala rename to core/src/main/scala/io/qbeast/core/model/WeightRange.scala diff --git a/src/main/scala/io/qbeast/core/model/package.scala b/core/src/main/scala/io/qbeast/core/model/package.scala similarity index 100% rename from src/main/scala/io/qbeast/core/model/package.scala rename to core/src/main/scala/io/qbeast/core/model/package.scala diff --git a/src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformation.scala b/core/src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformation.scala rename to core/src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformer.scala b/core/src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformer.scala rename to core/src/main/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformer.scala diff --git a/src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformation.scala b/core/src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformation.scala rename to core/src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformer.scala b/core/src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformer.scala rename to core/src/main/scala/io/qbeast/core/transform/CDFQuantilesTransformer.scala diff --git a/src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformation.scala b/core/src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformation.scala rename to core/src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformer.scala b/core/src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformer.scala rename to core/src/main/scala/io/qbeast/core/transform/CDFStringQuantilesTransformer.scala diff --git a/src/main/scala/io/qbeast/core/transform/EmptyTransformation.scala b/core/src/main/scala/io/qbeast/core/transform/EmptyTransformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/EmptyTransformation.scala rename to core/src/main/scala/io/qbeast/core/transform/EmptyTransformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/EmptyTransformer.scala b/core/src/main/scala/io/qbeast/core/transform/EmptyTransformer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/EmptyTransformer.scala rename to core/src/main/scala/io/qbeast/core/transform/EmptyTransformer.scala diff --git a/src/main/scala/io/qbeast/core/transform/HashTransformation.scala b/core/src/main/scala/io/qbeast/core/transform/HashTransformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/HashTransformation.scala rename to core/src/main/scala/io/qbeast/core/transform/HashTransformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/HashTransformer.scala b/core/src/main/scala/io/qbeast/core/transform/HashTransformer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/HashTransformer.scala rename to core/src/main/scala/io/qbeast/core/transform/HashTransformer.scala diff --git a/src/main/scala/io/qbeast/core/transform/LinearTransformation.scala b/core/src/main/scala/io/qbeast/core/transform/LinearTransformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/LinearTransformation.scala rename to core/src/main/scala/io/qbeast/core/transform/LinearTransformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/LinearTransformer.scala b/core/src/main/scala/io/qbeast/core/transform/LinearTransformer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/LinearTransformer.scala rename to core/src/main/scala/io/qbeast/core/transform/LinearTransformer.scala diff --git a/src/main/scala/io/qbeast/core/transform/ManualPlaceholderTransformation.scala b/core/src/main/scala/io/qbeast/core/transform/ManualPlaceholderTransformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/ManualPlaceholderTransformation.scala rename to core/src/main/scala/io/qbeast/core/transform/ManualPlaceholderTransformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/Transformation.scala b/core/src/main/scala/io/qbeast/core/transform/Transformation.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/Transformation.scala rename to core/src/main/scala/io/qbeast/core/transform/Transformation.scala diff --git a/src/main/scala/io/qbeast/core/transform/Transformer.scala b/core/src/main/scala/io/qbeast/core/transform/Transformer.scala similarity index 100% rename from src/main/scala/io/qbeast/core/transform/Transformer.scala rename to core/src/main/scala/io/qbeast/core/transform/Transformer.scala diff --git a/src/main/scala/io/qbeast/package.scala b/core/src/main/scala/io/qbeast/package.scala similarity index 100% rename from src/main/scala/io/qbeast/package.scala rename to core/src/main/scala/io/qbeast/package.scala diff --git a/src/main/scala/io/qbeast/spark/implicits/package.scala b/core/src/main/scala/io/qbeast/spark/implicits/package.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/implicits/package.scala rename to core/src/main/scala/io/qbeast/spark/implicits/package.scala diff --git a/src/main/scala/io/qbeast/spark/index/ColumnsToIndex.scala b/core/src/main/scala/io/qbeast/spark/index/ColumnsToIndex.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/ColumnsToIndex.scala rename to core/src/main/scala/io/qbeast/spark/index/ColumnsToIndex.scala diff --git a/src/main/scala/io/qbeast/spark/index/EmptyFileIndex.scala b/core/src/main/scala/io/qbeast/spark/index/EmptyFileIndex.scala similarity index 95% rename from src/main/scala/io/qbeast/spark/index/EmptyFileIndex.scala rename to core/src/main/scala/io/qbeast/spark/index/EmptyFileIndex.scala index 970cfc284..21bdf13a8 100644 --- a/src/main/scala/io/qbeast/spark/index/EmptyFileIndex.scala +++ b/core/src/main/scala/io/qbeast/spark/index/EmptyFileIndex.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.types.StructField import org.apache.spark.sql.types.StructType /** - * Implementation of FileIndex to be used for empty tables. + * Implementation of FileIndex to be used for empty table. */ object EmptyFileIndex extends FileIndex with Serializable { diff --git a/src/main/scala/io/qbeast/spark/index/IndexStatusBuilder.scala b/core/src/main/scala/io/qbeast/spark/index/IndexStatusBuilder.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/IndexStatusBuilder.scala rename to core/src/main/scala/io/qbeast/spark/index/IndexStatusBuilder.scala diff --git a/src/main/scala/io/qbeast/spark/index/OTreeDataAnalyzer.scala b/core/src/main/scala/io/qbeast/spark/index/OTreeDataAnalyzer.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/OTreeDataAnalyzer.scala rename to core/src/main/scala/io/qbeast/spark/index/OTreeDataAnalyzer.scala diff --git a/src/main/scala/io/qbeast/spark/index/QbeastColumns.scala b/core/src/main/scala/io/qbeast/spark/index/QbeastColumns.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/QbeastColumns.scala rename to core/src/main/scala/io/qbeast/spark/index/QbeastColumns.scala diff --git a/src/main/scala/io/qbeast/spark/index/RowUtils.scala b/core/src/main/scala/io/qbeast/spark/index/RowUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/RowUtils.scala rename to core/src/main/scala/io/qbeast/spark/index/RowUtils.scala diff --git a/src/main/scala/io/qbeast/spark/index/SparkColumnsToIndexSelector.scala b/core/src/main/scala/io/qbeast/spark/index/SparkColumnsToIndexSelector.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/SparkColumnsToIndexSelector.scala rename to core/src/main/scala/io/qbeast/spark/index/SparkColumnsToIndexSelector.scala diff --git a/src/main/scala/io/qbeast/spark/index/SparkOTreeManager.scala b/core/src/main/scala/io/qbeast/spark/index/SparkOTreeManager.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/SparkOTreeManager.scala rename to core/src/main/scala/io/qbeast/spark/index/SparkOTreeManager.scala diff --git a/src/main/scala/io/qbeast/spark/index/SparkPointWeightIndexer.scala b/core/src/main/scala/io/qbeast/spark/index/SparkPointWeightIndexer.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/SparkPointWeightIndexer.scala rename to core/src/main/scala/io/qbeast/spark/index/SparkPointWeightIndexer.scala diff --git a/src/main/scala/io/qbeast/spark/index/SparkRevisionFactory.scala b/core/src/main/scala/io/qbeast/spark/index/SparkRevisionFactory.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/SparkRevisionFactory.scala rename to core/src/main/scala/io/qbeast/spark/index/SparkRevisionFactory.scala diff --git a/src/main/scala/io/qbeast/spark/index/package.scala b/core/src/main/scala/io/qbeast/spark/index/package.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/package.scala rename to core/src/main/scala/io/qbeast/spark/index/package.scala diff --git a/src/main/scala/io/qbeast/spark/index/query/QbeastFilters.scala b/core/src/main/scala/io/qbeast/spark/index/query/QbeastFilters.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/query/QbeastFilters.scala rename to core/src/main/scala/io/qbeast/spark/index/query/QbeastFilters.scala diff --git a/src/main/scala/io/qbeast/spark/index/query/QueryExecutor.scala b/core/src/main/scala/io/qbeast/spark/index/query/QueryExecutor.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/query/QueryExecutor.scala rename to core/src/main/scala/io/qbeast/spark/index/query/QueryExecutor.scala diff --git a/src/main/scala/io/qbeast/spark/index/query/QueryFiltersUtils.scala b/core/src/main/scala/io/qbeast/spark/index/query/QueryFiltersUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/query/QueryFiltersUtils.scala rename to core/src/main/scala/io/qbeast/spark/index/query/QueryFiltersUtils.scala diff --git a/src/main/scala/io/qbeast/spark/index/query/QuerySpec.scala b/core/src/main/scala/io/qbeast/spark/index/query/QuerySpec.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/query/QuerySpec.scala rename to core/src/main/scala/io/qbeast/spark/index/query/QuerySpec.scala diff --git a/src/main/scala/io/qbeast/spark/index/query/QuerySpecBuilder.scala b/core/src/main/scala/io/qbeast/spark/index/query/QuerySpecBuilder.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/index/query/QuerySpecBuilder.scala rename to core/src/main/scala/io/qbeast/spark/index/query/QuerySpecBuilder.scala diff --git a/src/main/scala/io/qbeast/spark/internal/QbeastFunctions.scala b/core/src/main/scala/io/qbeast/spark/internal/QbeastFunctions.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/internal/QbeastFunctions.scala rename to core/src/main/scala/io/qbeast/spark/internal/QbeastFunctions.scala diff --git a/src/main/scala/io/qbeast/spark/internal/QbeastOptions.scala b/core/src/main/scala/io/qbeast/spark/internal/QbeastOptions.scala similarity index 93% rename from src/main/scala/io/qbeast/spark/internal/QbeastOptions.scala rename to core/src/main/scala/io/qbeast/spark/internal/QbeastOptions.scala index 8547bfd81..87c20a992 100644 --- a/src/main/scala/io/qbeast/spark/internal/QbeastOptions.scala +++ b/core/src/main/scala/io/qbeast/spark/internal/QbeastOptions.scala @@ -24,6 +24,7 @@ import io.qbeast.spark.index.ColumnsToIndex import io.qbeast.spark.internal.QbeastOptions.COLUMNS_TO_INDEX import io.qbeast.spark.internal.QbeastOptions.CUBE_SIZE import org.apache.spark.qbeast.config.DEFAULT_CUBE_SIZE +import org.apache.spark.qbeast.config.DEFAULT_TABLE_FORMAT import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.AnalysisExceptionFactory import org.apache.spark.sql.DataFrame @@ -56,6 +57,7 @@ import scala.util.matching.Regex case class QbeastOptions( columnsToIndex: Seq[String], cubeSize: Int, + tableFormat: String, stats: Option[DataFrame], txnAppId: Option[String], txnVersion: Option[String], @@ -104,6 +106,7 @@ case class QbeastOptions( object QbeastOptions { val COLUMNS_TO_INDEX: String = "columnsToIndex" val CUBE_SIZE: String = "cubeSize" + val TABLE_FORMAT: String = "tableFormat" val PATH: String = "path" val STATS: String = "columnStats" val TXN_APP_ID: String = "txnAppId" @@ -142,6 +145,12 @@ object QbeastOptions { } } + private def getTableFormat(options: Map[String, String]): String = + options.get(TABLE_FORMAT) match { + case Some(value) => value + case None => DEFAULT_TABLE_FORMAT + } + /** * Get the column stats from the options This stats should be in a JSON formatted string with * the following schema {columnName_min:value, columnName_max:value, ...} @@ -219,6 +228,7 @@ object QbeastOptions { def apply(options: CaseInsensitiveMap[String]): QbeastOptions = { val columnsToIndex = getColumnsToIndex(options) val desiredCubeSize = getDesiredCubeSize(options) + val tableFormat = getTableFormat(options) val stats = getStats(options) val txnAppId = getTxnAppId(options) val txnVersion = getTxnVersion(options) @@ -230,6 +240,7 @@ object QbeastOptions { QbeastOptions( columnsToIndex, desiredCubeSize, + tableFormat, stats, txnAppId, txnVersion, @@ -258,14 +269,33 @@ object QbeastOptions { val caseInsensitiveMap = CaseInsensitiveMap(options) val userMetadata = getUserMetadata(caseInsensitiveMap) val hookInfo = getHookInfo(caseInsensitiveMap) - QbeastOptions(Seq.empty, 0, None, None, None, userMetadata, None, None, hookInfo) + QbeastOptions( + Seq.empty, + 0, + DEFAULT_TABLE_FORMAT, + None, + None, + None, + userMetadata, + None, + None, + hookInfo) } /** * The empty options to be used as a placeholder. */ lazy val empty: QbeastOptions = - QbeastOptions(Seq.empty, DEFAULT_CUBE_SIZE, None, None, None, None, None, None) + QbeastOptions( + Seq.empty, + DEFAULT_CUBE_SIZE, + DEFAULT_TABLE_FORMAT, + None, + None, + None, + None, + None, + None) def loadTableIDFromParameters(parameters: Map[String, String]): QTableID = { new QTableID( diff --git a/src/main/scala/io/qbeast/spark/internal/expressions/QbeastMurmur3Hash.scala b/core/src/main/scala/io/qbeast/spark/internal/expressions/QbeastMurmur3Hash.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/internal/expressions/QbeastMurmur3Hash.scala rename to core/src/main/scala/io/qbeast/spark/internal/expressions/QbeastMurmur3Hash.scala diff --git a/src/main/scala/io/qbeast/spark/utils/IndexMetrics.scala b/core/src/main/scala/io/qbeast/spark/utils/IndexMetrics.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/utils/IndexMetrics.scala rename to core/src/main/scala/io/qbeast/spark/utils/IndexMetrics.scala diff --git a/src/main/scala/io/qbeast/spark/utils/Params.scala b/core/src/main/scala/io/qbeast/spark/utils/Params.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/utils/Params.scala rename to core/src/main/scala/io/qbeast/spark/utils/Params.scala diff --git a/src/main/scala/io/qbeast/spark/utils/QbeastExceptionMessages.scala b/core/src/main/scala/io/qbeast/spark/utils/QbeastExceptionMessages.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/utils/QbeastExceptionMessages.scala rename to core/src/main/scala/io/qbeast/spark/utils/QbeastExceptionMessages.scala diff --git a/src/main/scala/io/qbeast/spark/utils/SparkToQTypesUtils.scala b/core/src/main/scala/io/qbeast/spark/utils/SparkToQTypesUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/utils/SparkToQTypesUtils.scala rename to core/src/main/scala/io/qbeast/spark/utils/SparkToQTypesUtils.scala diff --git a/src/main/scala/io/qbeast/spark/writer/BlockStats.scala b/core/src/main/scala/io/qbeast/spark/writer/BlockStats.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/writer/BlockStats.scala rename to core/src/main/scala/io/qbeast/spark/writer/BlockStats.scala diff --git a/src/main/scala/io/qbeast/spark/writer/BlockWriter.scala b/core/src/main/scala/io/qbeast/spark/writer/BlockWriter.scala similarity index 99% rename from src/main/scala/io/qbeast/spark/writer/BlockWriter.scala rename to core/src/main/scala/io/qbeast/spark/writer/BlockWriter.scala index 8d8595cbe..96adf12c2 100644 --- a/src/main/scala/io/qbeast/spark/writer/BlockWriter.scala +++ b/core/src/main/scala/io/qbeast/spark/writer/BlockWriter.scala @@ -16,13 +16,13 @@ package io.qbeast.spark.writer import io.qbeast.core.model.CubeId +import io.qbeast.core.model.CubeState import io.qbeast.core.model.IndexFile import io.qbeast.core.model.IndexFileBuilder import io.qbeast.core.model.IndexFileBuilder.BlockBuilder import io.qbeast.core.model.TableChanges import io.qbeast.core.model.Weight import io.qbeast.spark.index.QbeastColumns -import io.qbeast.spark.model.CubeState import org.apache.hadoop.fs.Path import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.TaskAttemptContextImpl diff --git a/src/main/scala/io/qbeast/spark/writer/IndexFileWriter.scala b/core/src/main/scala/io/qbeast/spark/writer/IndexFileWriter.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/writer/IndexFileWriter.scala rename to core/src/main/scala/io/qbeast/spark/writer/IndexFileWriter.scala diff --git a/src/main/scala/io/qbeast/spark/writer/IndexFileWriterFactory.scala b/core/src/main/scala/io/qbeast/spark/writer/IndexFileWriterFactory.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/writer/IndexFileWriterFactory.scala rename to core/src/main/scala/io/qbeast/spark/writer/IndexFileWriterFactory.scala diff --git a/src/main/scala/io/qbeast/spark/writer/Rollup.scala b/core/src/main/scala/io/qbeast/spark/writer/Rollup.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/writer/Rollup.scala rename to core/src/main/scala/io/qbeast/spark/writer/Rollup.scala diff --git a/src/main/scala/io/qbeast/spark/writer/RollupDataWriter.scala b/core/src/main/scala/io/qbeast/spark/writer/RollupDataWriter.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/writer/RollupDataWriter.scala rename to core/src/main/scala/io/qbeast/spark/writer/RollupDataWriter.scala diff --git a/src/main/scala/io/qbeast/spark/writer/StatsTracker.scala b/core/src/main/scala/io/qbeast/spark/writer/StatsTracker.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/writer/StatsTracker.scala rename to core/src/main/scala/io/qbeast/spark/writer/StatsTracker.scala diff --git a/src/main/scala/org/apache/spark/qbeast/config.scala b/core/src/main/scala/org/apache/spark/qbeast/config.scala similarity index 70% rename from src/main/scala/org/apache/spark/qbeast/config.scala rename to core/src/main/scala/org/apache/spark/qbeast/config.scala index a449e958f..95506d4c5 100644 --- a/src/main/scala/org/apache/spark/qbeast/config.scala +++ b/core/src/main/scala/org/apache/spark/qbeast/config.scala @@ -15,10 +15,10 @@ */ package org.apache.spark.qbeast -import io.qbeast.context.QbeastContext import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.internal.config.OptionalConfigEntry +import org.apache.spark.sql.SparkSession package object config { @@ -58,19 +58,31 @@ package object config { .intConf .createWithDefault(3) - def DEFAULT_NUMBER_OF_RETRIES: Int = QbeastContext.config + private[config] val tableFormat: ConfigEntry[String] = + ConfigBuilder("spark.qbeast.tableFormat") + .version("0.2.0") + .stringConf + .createWithDefault("delta") + + def DEFAULT_NUMBER_OF_RETRIES: Int = SparkSession.active.sparkContext.conf .get(defaultNumberOfRetries) - def DEFAULT_CUBE_SIZE: Int = QbeastContext.config + def DEFAULT_CUBE_SIZE: Int = SparkSession.active.sparkContext.conf .get(defaultCubeSize) - def CUBE_WEIGHTS_BUFFER_CAPACITY: Long = QbeastContext.config + def DEFAULT_TABLE_FORMAT: String = SparkSession.active.sparkContext.conf + .get(tableFormat) + + def CUBE_WEIGHTS_BUFFER_CAPACITY: Long = SparkSession.active.sparkContext.conf .get(cubeWeightsBufferCapacity) - def STAGING_SIZE_IN_BYTES: Option[Long] = QbeastContext.config.get(stagingSizeInBytes) + def STAGING_SIZE_IN_BYTES: Option[Long] = + SparkSession.active.sparkContext.conf.get(stagingSizeInBytes) - def COLUMN_SELECTOR_ENABLED: Boolean = QbeastContext.config.get(columnsToIndexSelectorEnabled) + def COLUMN_SELECTOR_ENABLED: Boolean = + SparkSession.active.sparkContext.conf.get(columnsToIndexSelectorEnabled) - def MAX_NUM_COLUMNS_TO_INDEX: Int = QbeastContext.config.get(maxNumColumnsToIndex) + def MAX_NUM_COLUMNS_TO_INDEX: Int = + SparkSession.active.sparkContext.conf.get(maxNumColumnsToIndex) } diff --git a/src/main/scala/org/apache/spark/sql/AnalysisExceptionFactory.scala b/core/src/main/scala/org/apache/spark/sql/AnalysisExceptionFactory.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/AnalysisExceptionFactory.scala rename to core/src/main/scala/org/apache/spark/sql/AnalysisExceptionFactory.scala diff --git a/src/main/scala/org/apache/spark/sql/DataFrameUtils.scala b/core/src/main/scala/org/apache/spark/sql/DataFrameUtils.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/DataFrameUtils.scala rename to core/src/main/scala/org/apache/spark/sql/DataFrameUtils.scala diff --git a/src/main/scala/org/apache/spark/sql/DatasetFactory.scala b/core/src/main/scala/org/apache/spark/sql/DatasetFactory.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/DatasetFactory.scala rename to core/src/main/scala/org/apache/spark/sql/DatasetFactory.scala diff --git a/src/main/scala/org/apache/spark/sql/SchemaUtils.scala b/core/src/main/scala/org/apache/spark/sql/SchemaUtils.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/SchemaUtils.scala rename to core/src/main/scala/org/apache/spark/sql/SchemaUtils.scala diff --git a/src/main/scala/org/apache/spark/sql/SparkCatalogUtils.scala b/core/src/main/scala/org/apache/spark/sql/SparkCatalogUtils.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/SparkCatalogUtils.scala rename to core/src/main/scala/org/apache/spark/sql/SparkCatalogUtils.scala diff --git a/src/main/scala/org/apache/spark/sql/V2AndV1Traits.scala b/core/src/main/scala/org/apache/spark/sql/V2AndV1Traits.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/V2AndV1Traits.scala rename to core/src/main/scala/org/apache/spark/sql/V2AndV1Traits.scala diff --git a/src/main/scala/org/apache/spark/sql/connector/catalog/SparkCatalogV2Util.scala b/core/src/main/scala/org/apache/spark/sql/connector/catalog/SparkCatalogV2Util.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/connector/catalog/SparkCatalogV2Util.scala rename to core/src/main/scala/org/apache/spark/sql/connector/catalog/SparkCatalogV2Util.scala diff --git a/src/test/java/io/qbeast/core/model/CubeKeyFactoryJava.java b/core/src/test/java/io/qbeast/core/model/CubeKeyFactoryJava.java similarity index 100% rename from src/test/java/io/qbeast/core/model/CubeKeyFactoryJava.java rename to core/src/test/java/io/qbeast/core/model/CubeKeyFactoryJava.java diff --git a/src/test/java/io/qbeast/spark/model/CubeKeyFactoryJava.java b/core/src/test/java/io/qbeast/spark/model/CubeKeyFactoryJava.java similarity index 100% rename from src/test/java/io/qbeast/spark/model/CubeKeyFactoryJava.java rename to core/src/test/java/io/qbeast/spark/model/CubeKeyFactoryJava.java diff --git a/src/test/scala/io/qbeast/core/model/BlockTest.scala b/core/src/test/scala/io/qbeast/core/model/BlockTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/BlockTest.scala rename to core/src/test/scala/io/qbeast/core/model/BlockTest.scala diff --git a/src/test/scala/io/qbeast/core/model/ColumnToIndexTest.scala b/core/src/test/scala/io/qbeast/core/model/ColumnToIndexTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/ColumnToIndexTest.scala rename to core/src/test/scala/io/qbeast/core/model/ColumnToIndexTest.scala diff --git a/src/test/scala/io/qbeast/core/model/ColumnToIndexUtilsTest.scala b/core/src/test/scala/io/qbeast/core/model/ColumnToIndexUtilsTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/ColumnToIndexUtilsTest.scala rename to core/src/test/scala/io/qbeast/core/model/ColumnToIndexUtilsTest.scala diff --git a/src/test/scala/io/qbeast/core/model/CreateCubeKeySpeedTest.scala b/core/src/test/scala/io/qbeast/core/model/CreateCubeKeySpeedTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/CreateCubeKeySpeedTest.scala rename to core/src/test/scala/io/qbeast/core/model/CreateCubeKeySpeedTest.scala diff --git a/src/test/scala/io/qbeast/core/model/CreateCubeKeyTest.scala b/core/src/test/scala/io/qbeast/core/model/CreateCubeKeyTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/CreateCubeKeyTest.scala rename to core/src/test/scala/io/qbeast/core/model/CreateCubeKeyTest.scala diff --git a/src/test/scala/io/qbeast/core/model/CubeDomainsBuilderTest.scala b/core/src/test/scala/io/qbeast/core/model/CubeDomainsBuilderTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/CubeDomainsBuilderTest.scala rename to core/src/test/scala/io/qbeast/core/model/CubeDomainsBuilderTest.scala diff --git a/src/test/scala/io/qbeast/core/model/CubeIdSpeedTest.scala b/core/src/test/scala/io/qbeast/core/model/CubeIdSpeedTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/CubeIdSpeedTest.scala rename to core/src/test/scala/io/qbeast/core/model/CubeIdSpeedTest.scala diff --git a/src/test/scala/io/qbeast/core/model/CubeIdTest.scala b/core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/CubeIdTest.scala rename to core/src/test/scala/io/qbeast/core/model/CubeIdTest.scala diff --git a/src/test/scala/io/qbeast/core/model/CubeKeyFactory.scala b/core/src/test/scala/io/qbeast/core/model/CubeKeyFactory.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/CubeKeyFactory.scala rename to core/src/test/scala/io/qbeast/core/model/CubeKeyFactory.scala diff --git a/src/test/scala/io/qbeast/core/model/CubeNormalizedWeightsTest.scala b/core/src/test/scala/io/qbeast/core/model/CubeNormalizedWeightsTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/CubeNormalizedWeightsTest.scala rename to core/src/test/scala/io/qbeast/core/model/CubeNormalizedWeightsTest.scala diff --git a/src/test/scala/io/qbeast/core/model/EstimateGroupCubeSizeTest.scala b/core/src/test/scala/io/qbeast/core/model/EstimateGroupCubeSizeTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/EstimateGroupCubeSizeTest.scala rename to core/src/test/scala/io/qbeast/core/model/EstimateGroupCubeSizeTest.scala diff --git a/src/test/scala/io/qbeast/core/model/IndexFileTest.scala b/core/src/test/scala/io/qbeast/core/model/IndexFileTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/IndexFileTest.scala rename to core/src/test/scala/io/qbeast/core/model/IndexFileTest.scala diff --git a/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala b/core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala rename to core/src/test/scala/io/qbeast/core/model/JSONSerializationTests.scala diff --git a/src/test/scala/io/qbeast/core/model/PointTest.scala b/core/src/test/scala/io/qbeast/core/model/PointTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/PointTest.scala rename to core/src/test/scala/io/qbeast/core/model/PointTest.scala diff --git a/src/test/scala/io/qbeast/core/model/PointWeightIndexerTest.scala b/core/src/test/scala/io/qbeast/core/model/PointWeightIndexerTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/PointWeightIndexerTest.scala rename to core/src/test/scala/io/qbeast/core/model/PointWeightIndexerTest.scala diff --git a/src/test/scala/io/qbeast/core/model/QTableIDTest.scala b/core/src/test/scala/io/qbeast/core/model/QTableIDTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/QTableIDTest.scala rename to core/src/test/scala/io/qbeast/core/model/QTableIDTest.scala diff --git a/src/test/scala/io/qbeast/core/model/QbeastHookLoaderTest.scala b/core/src/test/scala/io/qbeast/core/model/QbeastHookLoaderTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/QbeastHookLoaderTest.scala rename to core/src/test/scala/io/qbeast/core/model/QbeastHookLoaderTest.scala diff --git a/src/test/scala/io/qbeast/core/model/QuerySpaceFromToTest.scala b/core/src/test/scala/io/qbeast/core/model/QuerySpaceFromToTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/QuerySpaceFromToTest.scala rename to core/src/test/scala/io/qbeast/core/model/QuerySpaceFromToTest.scala diff --git a/src/test/scala/io/qbeast/core/model/RevisionTest.scala b/core/src/test/scala/io/qbeast/core/model/RevisionTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/RevisionTest.scala rename to core/src/test/scala/io/qbeast/core/model/RevisionTest.scala diff --git a/src/test/scala/io/qbeast/core/model/WeightAndCountTest.scala b/core/src/test/scala/io/qbeast/core/model/WeightAndCountTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/model/WeightAndCountTest.scala rename to core/src/test/scala/io/qbeast/core/model/WeightAndCountTest.scala diff --git a/src/test/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformationTest.scala b/core/src/test/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformationTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformationTest.scala rename to core/src/test/scala/io/qbeast/core/transform/CDFNumericQuantilesTransformationTest.scala diff --git a/src/test/scala/io/qbeast/core/transform/CDFQuantilesTransformerTest.scala b/core/src/test/scala/io/qbeast/core/transform/CDFQuantilesTransformerTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/transform/CDFQuantilesTransformerTest.scala rename to core/src/test/scala/io/qbeast/core/transform/CDFQuantilesTransformerTest.scala diff --git a/src/test/scala/io/qbeast/core/transform/CDFStringQuantilesTransformationTest.scala b/core/src/test/scala/io/qbeast/core/transform/CDFStringQuantilesTransformationTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/transform/CDFStringQuantilesTransformationTest.scala rename to core/src/test/scala/io/qbeast/core/transform/CDFStringQuantilesTransformationTest.scala diff --git a/src/test/scala/io/qbeast/core/transform/EmptyTransformationTest.scala b/core/src/test/scala/io/qbeast/core/transform/EmptyTransformationTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/transform/EmptyTransformationTest.scala rename to core/src/test/scala/io/qbeast/core/transform/EmptyTransformationTest.scala diff --git a/src/test/scala/io/qbeast/core/transform/HashTransformationTest.scala b/core/src/test/scala/io/qbeast/core/transform/HashTransformationTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/transform/HashTransformationTest.scala rename to core/src/test/scala/io/qbeast/core/transform/HashTransformationTest.scala diff --git a/src/test/scala/io/qbeast/core/transform/LinearTransformationTest.scala b/core/src/test/scala/io/qbeast/core/transform/LinearTransformationTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/transform/LinearTransformationTest.scala rename to core/src/test/scala/io/qbeast/core/transform/LinearTransformationTest.scala diff --git a/src/test/scala/io/qbeast/core/transform/TransformerTest.scala b/core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala similarity index 100% rename from src/test/scala/io/qbeast/core/transform/TransformerTest.scala rename to core/src/test/scala/io/qbeast/core/transform/TransformerTest.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DefaultFileIndex.scala b/delta/src/main/scala/io/qbeast/spark/delta/DefaultFileIndex.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DefaultFileIndex.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DefaultFileIndex.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DefaultListFilesStrategy.scala b/delta/src/main/scala/io/qbeast/spark/delta/DefaultListFilesStrategy.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DefaultListFilesStrategy.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DefaultListFilesStrategy.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaMetadataManager.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaMetadataManager.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DeltaMetadataManager.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaMetadataManager.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaMetadataWriter.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaMetadataWriter.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DeltaMetadataWriter.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaMetadataWriter.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaQbeastFileUtils.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaQbeastFileUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DeltaQbeastFileUtils.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaQbeastFileUtils.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaQbeastSnapshot.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaQbeastSnapshot.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DeltaQbeastSnapshot.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaQbeastSnapshot.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaQbeastStatsUtils.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaQbeastStatsUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DeltaQbeastStatsUtils.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaQbeastStatsUtils.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaRollupDataWriter.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaRollupDataWriter.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DeltaRollupDataWriter.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaRollupDataWriter.scala diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaStagingDataManager.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaStagingDataManager.scala similarity index 92% rename from src/main/scala/io/qbeast/spark/delta/DeltaStagingDataManager.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaStagingDataManager.scala index d24084ed5..1097aec9a 100644 --- a/src/main/scala/io/qbeast/spark/delta/DeltaStagingDataManager.scala +++ b/delta/src/main/scala/io/qbeast/spark/delta/DeltaStagingDataManager.scala @@ -20,7 +20,6 @@ import io.qbeast.core.model.QTableID import io.qbeast.core.model.StagingDataManager import io.qbeast.core.model.StagingDataManagerFactory import io.qbeast.core.model.StagingResolution -import io.qbeast.spark.internal.commands.ConvertToQbeastCommand import io.qbeast.spark.internal.QbeastOptions import org.apache.hadoop.fs.Path import org.apache.spark.qbeast.config.STAGING_SIZE_IN_BYTES @@ -36,7 +35,7 @@ import org.apache.spark.sql.SparkSession /** * Access point for staged data */ -private[spark] class DeltaStagingDataManager(tableID: QTableID) +class DeltaStagingDataManager(tableID: QTableID) extends DeltaStagingUtils with StagingDataManager { private val spark = SparkSession.active @@ -133,13 +132,6 @@ private[spark] class DeltaStagingDataManager(tableID: QTableID) .option(DeltaOptions.USER_METADATA_OPTION, options.userMetadata.get) } writer.save(tableID.id) - - // Convert if the table is not yet qbeast - if (isInitial) { - val colsToIndex = indexStatus.revision.columnTransformers.map(_.columnName) - val dcs = indexStatus.revision.desiredCubeSize - ConvertToQbeastCommand(s"delta.`${tableID.id}`", colsToIndex, dcs).run(spark) - } } } diff --git a/src/main/scala/io/qbeast/spark/delta/DeltaStagingUtils.scala b/delta/src/main/scala/io/qbeast/spark/delta/DeltaStagingUtils.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/DeltaStagingUtils.scala rename to delta/src/main/scala/io/qbeast/spark/delta/DeltaStagingUtils.scala diff --git a/src/main/scala/io/qbeast/spark/delta/ListFilesStrategy.scala b/delta/src/main/scala/io/qbeast/spark/delta/ListFilesStrategy.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/ListFilesStrategy.scala rename to delta/src/main/scala/io/qbeast/spark/delta/ListFilesStrategy.scala diff --git a/src/main/scala/io/qbeast/spark/delta/QbeastMetadataOperation.scala b/delta/src/main/scala/io/qbeast/spark/delta/QbeastMetadataOperation.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/QbeastMetadataOperation.scala rename to delta/src/main/scala/io/qbeast/spark/delta/QbeastMetadataOperation.scala diff --git a/src/main/scala/io/qbeast/spark/delta/SamplingListFilesStrategy.scala b/delta/src/main/scala/io/qbeast/spark/delta/SamplingListFilesStrategy.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/delta/SamplingListFilesStrategy.scala rename to delta/src/main/scala/io/qbeast/spark/delta/SamplingListFilesStrategy.scala diff --git a/src/main/scala/org/apache/spark/sql/delta/DeltaStatsCollectionUtils.scala b/delta/src/main/scala/org/apache/spark/sql/delta/DeltaStatsCollectionUtils.scala similarity index 98% rename from src/main/scala/org/apache/spark/sql/delta/DeltaStatsCollectionUtils.scala rename to delta/src/main/scala/org/apache/spark/sql/delta/DeltaStatsCollectionUtils.scala index 7b4951b9b..19543142c 100644 --- a/src/main/scala/org/apache/spark/sql/delta/DeltaStatsCollectionUtils.scala +++ b/delta/src/main/scala/org/apache/spark/sql/delta/DeltaStatsCollectionUtils.scala @@ -15,7 +15,6 @@ */ package org.apache.spark.sql.delta -import io.qbeast.context.QbeastContext import io.qbeast.core.model.QTableID import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.expressions.Expression @@ -85,7 +84,7 @@ trait DeltaStatsCollectionUtils { sparkSession: SparkSession, tableID: QTableID): Option[DeltaJobStatisticsTracker] = { - if (QbeastContext.config.get(DeltaSQLConf.DELTA_COLLECT_STATS)) { + if (sparkSession.conf.get(DeltaSQLConf.DELTA_COLLECT_STATS)) { val outputStatsAtrributes = data.queryExecution.analyzed.output val outputSchema = data.schema diff --git a/docs/AdvancedConfiguration.md b/docs/AdvancedConfiguration.md index 44d9d0e21..e63d19b8c 100644 --- a/docs/AdvancedConfiguration.md +++ b/docs/AdvancedConfiguration.md @@ -11,7 +11,7 @@ However, you can also handle different Catalogs simultaneously. ### 1. Unified Catalog ```bash ---conf spark.sql.catalog.spark_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog ``` Using the `spark_catalog` configuration, you can write **qbeast** and **delta** ( or upcoming formats ;) ) tables into the `default` namespace. @@ -32,7 +32,7 @@ For using **more than one Catalog in the same session**, you can set it up in a ```bash --conf spark.sql.catalog.spark_catalog = org.apache.spark.sql.delta.catalog.DeltaCatalog \ ---conf spark.sql.catalog.qbeast_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.catalog.qbeast_catalog=io.qbeast.catalog.QbeastCatalog ``` Notice the `QbeastCatalog` conf parameter is not anymore `spark_catalog`, but has a customized name like `qbeast_catalog`. Each table written using the **qbeast** implementation, should have the prefix `qbeast_catalog`. @@ -181,7 +181,7 @@ This can be addressed by introducing a custom Quantile Based sequence in the for The following code snippet demonstrates the extraction of a Quantile-based CDF from the source data: ```scala -import io.qbeast.spark.utils.QbeastUtils +import io.qbeast.utils.QbeastUtils val columnQuantiles = QbeastUtils.computeQuantilesForColumn(df, "brand") val columnStats = s"""{"brand_quantiles":$columnQuantiles}""" @@ -312,7 +312,7 @@ df ```scala // Hooks for Optimizations -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable val qt = QbeastTable.forPath(spark, tablePath) val options = Map( "qbeastPreCommitHook.myHook1" -> classOf[SimpleHook].getCanonicalName, @@ -327,7 +327,7 @@ QbeastTable.forTable(sparkSession, tablePath) methods that returns a Dataset[Den contains all indexed metadata in an easy-to-analyze format. ```scala -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable val qt = QbeastTable.forPath(spark, tablePath) val dnb = qt.getDenormalizedBlocks() dnb.select("filePath").distinct.count() // number of files diff --git a/docs/CloudStorages.md b/docs/CloudStorages.md index b0b5812a5..e48de9bf0 100644 --- a/docs/CloudStorages.md +++ b/docs/CloudStorages.md @@ -34,7 +34,7 @@ Amazon Web Services S3 does not work with Hadoop 2.7. For this provider you'll n - If you are using a **public** bucket: ```bash $SPARK_HOME/bin/spark-shell \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider \ --packages io.qbeast:qbeast-spark_2.12:0.3.2,\ io.delta:delta-core_2.12:1.2.0,\ @@ -46,7 +46,7 @@ org.apache.hadoop:hadoop-aws:3.2.0 - If you are using **private** buckets: ```bash $SPARK_HOME/bin/spark-shell \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ --conf spark.hadoop.fs.s3a.access.key=${AWS_ACCESS_KEY_ID} \ --conf spark.hadoop.fs.s3a.secret.key=${AWS_SECRET_ACCESS_KEY} \ --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \ @@ -66,7 +66,7 @@ recommend using the latest. Remember that vanilla parquet format may not work in $SPARK_HOME/bin/spark-shell \ --conf spark.hadoop.fs.azure.account.key.blobqsql.blob.core.windows.net="${AZURE_BLOB_STORAGE_KEY}" \ --conf spark.hadoop.fs.AbstractFileSystem.wasb.impl=org.apache.hadoop.fs.azure.Wasb \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ --packages io.qbeast:qbeast-spark_2.12:0.3.2,\ io.delta:delta-core_2.12:1.2.0,\ org.apache.hadoop:hadoop-azure:3.2.0 diff --git a/docs/QbeastTable.md b/docs/QbeastTable.md index eaec384b0..45f6e7973 100644 --- a/docs/QbeastTable.md +++ b/docs/QbeastTable.md @@ -5,7 +5,7 @@ This API is implemented to interact with your QbeastTable. Creating an instance of QbeastTable is as easy as: ```scala -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable val qbeastTable = QbeastTable.forPath(spark, "path/to/qbeast/table") ``` diff --git a/docs/Quickstart.md b/docs/Quickstart.md index de02dc4e2..c26f32b35 100644 --- a/docs/Quickstart.md +++ b/docs/Quickstart.md @@ -92,8 +92,8 @@ Run `pyspark` shell: ```bash pyspark --packages io.qbeast:qbeast-spark_2.12:0.7.0,io.delta:delta-spark_2.12:3.1.0 \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog ``` ### Scala @@ -103,8 +103,8 @@ Run a `spark-shell` from the binaries: ```bash $SPARK_HOME/bin/spark-shell \ --packages io.qbeast:qbeast-spark_2.12:0.7.0,io.delta:delta-spark_2.12:3.1.0 \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog ``` ### SQL @@ -112,8 +112,8 @@ $SPARK_HOME/bin/spark-shell \ ```bash $SPARK_HOME/bin/spark-sql \ --packages io.qbeast:qbeast-spark_2.12:0.7.0,io.delta:delta-spark_2.12:3.1.0 \ ---conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=io.qbeast.spark.internal.sources.catalog.QbeastCatalog +--conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=io.qbeast.catalog.QbeastCatalog ``` ### Advanced Spark Configuration @@ -174,8 +174,8 @@ spark = pyspark.sql.SparkSession.builder.appName("MyApp").getOrCreate() # Session with Configuration pyspark.sql.SparkSession.builder.appName("MyApp") \ - .config("spark.sql.extensions", "io.qbeast.spark.delta.QbeastSparkSessionExtension") \ - .config("spark.sql.catalog.spark_catalog", "io.qbeast.spark.internal.sources.catalog.QbeastCatalog").getOrCreate() + .config("spark.sql.extensions", "io.qbeast.sql.QbeastSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "io.qbeast.catalog.QbeastCatalog").getOrCreate() ``` @@ -352,7 +352,7 @@ Notice that the sample operator is no longer present in the physical plan. It's Get **insights** into the data using the `QbeastTable` interface available in Scala. ```scala -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable val qbeastTable = QbeastTable.forPath(spark, "/tmp/qbeast_table") @@ -448,7 +448,7 @@ For setting up writes and reads on Amazon S3 service, it is possible to use both ```bash $SPARK_HOME/bin/spark-shell \ - --conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ + --conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider \ --packages io.qbeast:qbeast-spark_2.12:0.7.0,\ io.delta:delta-spark_2.12:3.1.0,\ @@ -462,7 +462,7 @@ For setting up writes and reads on Amazon S3 service, it is possible to use both ```bash $SPARK_HOME/bin/spark-shell \ - --conf spark.sql.extensions=io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension \ + --conf spark.sql.extensions=io.qbeast.sql.QbeastSparkSessionExtension \ --conf spark.hadoop.fs.s3a.access.key=${AWS_ACCESS_KEY_ID} \ --conf spark.hadoop.fs.s3a.secret.key=${AWS_SECRET_ACCESS_KEY}\ --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \ @@ -487,8 +487,8 @@ Google has several services related to Qbeast including [Cloud Storage](https:// ```bash # Configure the Spark worker to use the Qbeast formatter library - spark.sql.extensions io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension - spark.sql.catalog.spark_catalog io.qbeast.spark.internal.sources.catalog.QbeastCatalog + spark.sql.extensions io.qbeast.sql.QbeastSparkSessionExtension + spark.sql.catalog.spark_catalog io.qbeast.catalog.QbeastCatalog ``` 5. Create a schema in BigQuery Studio in the same region than the GC bucket. diff --git a/docs/sample_pushdown_demo.ipynb b/docs/sample_pushdown_demo.ipynb index ddd2e1773..cc2c52181 100644 --- a/docs/sample_pushdown_demo.ipynb +++ b/docs/sample_pushdown_demo.ipynb @@ -130,7 +130,7 @@ "\n", "spark = (SparkSession.builder\n", " .master(\"local[*]\")\n", - " .config(\"spark.sql.extensions\", \"io.qbeast.spark.delta.QbeastSparkSessionExtension\")\n", + " .config(\"spark.sql.extensions\", \"io.qbeast.sql.QbeastSparkSessionExtension\")\n", " .config(\"fs.s3a.aws.credentials.provider\", \"org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider\")\n", " .config(\"spark.jars.packages\", deps)\n", " .getOrCreate())\n", diff --git a/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister index 564cfd226..ab7db5148 100644 --- a/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ b/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -1 +1 @@ -io.qbeast.spark.internal.sources.QbeastDataSource \ No newline at end of file +io.qbeast.sources.QbeastDataSource \ No newline at end of file diff --git a/src/main/scala/io/qbeast/spark/internal/sources/catalog/DefaultStagedTable.scala b/src/main/scala/io/qbeast/catalog/DefaultStagedTable.scala similarity index 98% rename from src/main/scala/io/qbeast/spark/internal/sources/catalog/DefaultStagedTable.scala rename to src/main/scala/io/qbeast/catalog/DefaultStagedTable.scala index 8084c8fd3..a36b0e753 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/catalog/DefaultStagedTable.scala +++ b/src/main/scala/io/qbeast/catalog/DefaultStagedTable.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog import org.apache.spark.sql.connector.catalog.Column import org.apache.spark.sql.connector.catalog.Identifier diff --git a/src/main/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalog.scala b/src/main/scala/io/qbeast/catalog/QbeastCatalog.scala similarity index 95% rename from src/main/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalog.scala rename to src/main/scala/io/qbeast/catalog/QbeastCatalog.scala index 455263a54..1b5d60e0d 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalog.scala +++ b/src/main/scala/io/qbeast/catalog/QbeastCatalog.scala @@ -13,13 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog import io.qbeast.context.QbeastContext -import io.qbeast.spark.internal.commands.AlterTableSetPropertiesQbeastCommand -import io.qbeast.spark.internal.commands.AlterTableUnsetPropertiesQbeastCommand -import io.qbeast.spark.internal.sources.v2.QbeastStagedTableImpl -import io.qbeast.spark.internal.sources.v2.QbeastTableImpl +import io.qbeast.internal.commands.AlterTableSetPropertiesQbeastCommand +import io.qbeast.internal.commands.AlterTableUnsetPropertiesQbeastCommand +import io.qbeast.sources.v2.QbeastStagedTableImpl +import io.qbeast.sources.v2.QbeastTableImpl import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException @@ -41,7 +41,7 @@ import scala.collection.JavaConverters._ /** * QbeastCatalog is a CatalogExtenssion that supports Namespaces and the CREATION and/or - * REPLACEMENT of tables QbeastCatalog uses a session catalog of type T to delegate high-level + * REPLACEMENT of table QbeastCatalog uses a session catalog of type T to delegate high-level * operations */ class QbeastCatalog[T <: TableCatalog with SupportsNamespaces with FunctionCatalog] @@ -63,7 +63,7 @@ class QbeastCatalog[T <: TableCatalog with SupportsNamespaces with FunctionCatal * Gets the delegated catalog of the session * @return */ - private def getDelegatedCatalog(): T = { + private def getDelegatedCatalog: T = { val sessionCatalog = delegatedCatalog match { case null => // In this case, any catalog has been delegated, so we need to search for the default @@ -87,7 +87,7 @@ class QbeastCatalog[T <: TableCatalog with SupportsNamespaces with FunctionCatal private def getSessionCatalog(properties: Map[String, String] = Map.empty): T = { properties.get("provider") match { case Some("delta") => deltaCatalog.asInstanceOf[T] - case _ => getDelegatedCatalog() + case _ => getDelegatedCatalog } } @@ -159,7 +159,7 @@ class QbeastCatalog[T <: TableCatalog with SupportsNamespaces with FunctionCatal partitions: Array[Transform], properties: util.Map[String, String]): StagedTable = { if (QbeastCatalogUtils.isQbeastProvider(properties)) { - new QbeastStagedTableImpl( + QbeastStagedTableImpl( ident, schema, partitions, @@ -188,7 +188,7 @@ class QbeastCatalog[T <: TableCatalog with SupportsNamespaces with FunctionCatal partitions: Array[Transform], properties: util.Map[String, String]): StagedTable = { if (QbeastCatalogUtils.isQbeastProvider(properties)) { - new QbeastStagedTableImpl( + QbeastStagedTableImpl( ident, schema, partitions, @@ -228,7 +228,7 @@ class QbeastCatalog[T <: TableCatalog with SupportsNamespaces with FunctionCatal partitions: Array[Transform], properties: util.Map[String, String]): StagedTable = { if (QbeastCatalogUtils.isQbeastProvider(properties)) { - new QbeastStagedTableImpl( + QbeastStagedTableImpl( ident, schema, partitions, diff --git a/src/main/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogUtils.scala b/src/main/scala/io/qbeast/catalog/QbeastCatalogUtils.scala similarity index 97% rename from src/main/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogUtils.scala rename to src/main/scala/io/qbeast/catalog/QbeastCatalogUtils.scala index 6b0bea309..7671d3102 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogUtils.scala +++ b/src/main/scala/io/qbeast/catalog/QbeastCatalogUtils.scala @@ -13,15 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog import io.qbeast.context.QbeastContext -import io.qbeast.context.QbeastContext.metadataManager import io.qbeast.core.model.QTableID -import io.qbeast.spark.internal.commands.ConvertToQbeastCommand -import io.qbeast.spark.internal.sources.v2.QbeastTableImpl +import io.qbeast.internal.commands.ConvertToQbeastCommand +import io.qbeast.sources.v2.QbeastTableImpl import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.table.IndexedTableFactory +import io.qbeast.table.IndexedTableFactory import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.spark.internal.Logging @@ -330,8 +329,8 @@ object QbeastCatalogUtils extends Logging { // If the Log is not created // We make sure we create the table physically // So new data can be inserted - val isLogCreated = metadataManager.existsLog(tableID) - if (!isLogCreated) metadataManager.createLog(tableID) + val isLogCreated = QbeastContext.metadataManager.existsLog(tableID) + if (!isLogCreated) QbeastContext.metadataManager.createLog(tableID) } /** diff --git a/src/main/scala/io/qbeast/spark/internal/sources/catalog/TableCreationMode.scala b/src/main/scala/io/qbeast/catalog/TableCreationMode.scala similarity index 96% rename from src/main/scala/io/qbeast/spark/internal/sources/catalog/TableCreationMode.scala rename to src/main/scala/io/qbeast/catalog/TableCreationMode.scala index 94329f371..0c5668c20 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/catalog/TableCreationMode.scala +++ b/src/main/scala/io/qbeast/catalog/TableCreationMode.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog import org.apache.spark.sql.SaveMode diff --git a/src/main/scala/io/qbeast/context/QbeastContext.scala b/src/main/scala/io/qbeast/context/QbeastContext.scala index 127aa9b61..d172bd5fc 100644 --- a/src/main/scala/io/qbeast/context/QbeastContext.scala +++ b/src/main/scala/io/qbeast/context/QbeastContext.scala @@ -24,8 +24,8 @@ import io.qbeast.spark.delta.DeltaStagingDataManagerFactory import io.qbeast.spark.index.SparkColumnsToIndexSelector import io.qbeast.spark.index.SparkOTreeManager import io.qbeast.spark.index.SparkRevisionFactory -import io.qbeast.spark.table.IndexedTableFactory -import io.qbeast.spark.table.IndexedTableFactoryImpl +import io.qbeast.table.IndexedTableFactory +import io.qbeast.table.IndexedTableFactoryImpl import org.apache.spark.scheduler.SparkListener import org.apache.spark.scheduler.SparkListenerApplicationEnd import org.apache.spark.sql.SparkSession diff --git a/src/main/scala/io/qbeast/spark/internal/commands/AlterQbeastTableCommands.scala b/src/main/scala/io/qbeast/internal/commands/AlterQbeastTableCommands.scala similarity index 96% rename from src/main/scala/io/qbeast/spark/internal/commands/AlterQbeastTableCommands.scala rename to src/main/scala/io/qbeast/internal/commands/AlterQbeastTableCommands.scala index 3578d5238..dbd8542c2 100644 --- a/src/main/scala/io/qbeast/spark/internal/commands/AlterQbeastTableCommands.scala +++ b/src/main/scala/io/qbeast/internal/commands/AlterQbeastTableCommands.scala @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.commands +package io.qbeast.internal.commands -import io.qbeast.spark.internal.sources.v2.QbeastTableImpl +import io.qbeast.sources.v2.QbeastTableImpl import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData import org.apache.spark.sql.delta.catalog.DeltaTableV2 diff --git a/src/main/scala/io/qbeast/spark/internal/commands/ConvertToQbeastCommand.scala b/src/main/scala/io/qbeast/internal/commands/ConvertToQbeastCommand.scala similarity index 86% rename from src/main/scala/io/qbeast/spark/internal/commands/ConvertToQbeastCommand.scala rename to src/main/scala/io/qbeast/internal/commands/ConvertToQbeastCommand.scala index 84cfdc8dc..1c0ac20e1 100644 --- a/src/main/scala/io/qbeast/spark/internal/commands/ConvertToQbeastCommand.scala +++ b/src/main/scala/io/qbeast/internal/commands/ConvertToQbeastCommand.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.commands +package io.qbeast.internal.commands import io.qbeast.context.QbeastContext import io.qbeast.core.model._ @@ -23,7 +23,6 @@ import io.qbeast.spark.utils.QbeastExceptionMessages.incorrectIdentifierFormat import io.qbeast.spark.utils.QbeastExceptionMessages.partitionedTableExceptionMsg import io.qbeast.spark.utils.QbeastExceptionMessages.unsupportedFormatExceptionMsg import org.apache.hadoop.fs.Path -import org.apache.http.annotation.Experimental import org.apache.spark.internal.Logging import org.apache.spark.qbeast.config.DEFAULT_CUBE_SIZE import org.apache.spark.sql.catalyst.TableIdentifier @@ -33,18 +32,6 @@ import org.apache.spark.sql.AnalysisExceptionFactory import org.apache.spark.sql.Row import org.apache.spark.sql.SparkSession -/** - * Command to convert a parquet or a delta table into a qbeast table. The command creates the an - * empty revision for the metadata, the qbeast options provided should be those with which the - * user want to index the table. Partitioned tables are not supported. - * @param identifier - * STRING, table identifier consisting of "format.`tablePath`" e.g. parquet.`/tmp/test/` - * @param columnsToIndex - * Seq[STRING], the columns on which the index is built e.g. Seq("col1", "col2") - * @param cubeSize - * INT, the desired cube size for the index e.g. 5000 - */ -@Experimental case class ConvertToQbeastCommand( identifier: String, columnsToIndex: Seq[String], diff --git a/src/main/scala/io/qbeast/spark/internal/commands/OptimizeTableCommand.scala b/src/main/scala/io/qbeast/internal/commands/OptimizeTableCommand.scala similarity index 93% rename from src/main/scala/io/qbeast/spark/internal/commands/OptimizeTableCommand.scala rename to src/main/scala/io/qbeast/internal/commands/OptimizeTableCommand.scala index 439ee5d1d..1e1d63b18 100644 --- a/src/main/scala/io/qbeast/spark/internal/commands/OptimizeTableCommand.scala +++ b/src/main/scala/io/qbeast/internal/commands/OptimizeTableCommand.scala @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.commands +package io.qbeast.internal.commands import io.qbeast.core.model.RevisionID -import io.qbeast.spark.table.IndexedTable +import io.qbeast.table.IndexedTable import org.apache.spark.sql.execution.command.LeafRunnableCommand import org.apache.spark.sql.Row import org.apache.spark.sql.SparkSession diff --git a/src/main/scala/io/qbeast/spark/internal/rules/QbeastAnalysis.scala b/src/main/scala/io/qbeast/internal/rules/QbeastAnalysis.scala similarity index 94% rename from src/main/scala/io/qbeast/spark/internal/rules/QbeastAnalysis.scala rename to src/main/scala/io/qbeast/internal/rules/QbeastAnalysis.scala index 2d7ac3373..f94b4b793 100644 --- a/src/main/scala/io/qbeast/spark/internal/rules/QbeastAnalysis.scala +++ b/src/main/scala/io/qbeast/internal/rules/QbeastAnalysis.scala @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.rules +package io.qbeast.internal.rules -import io.qbeast.spark.internal.rules.QbeastAnalysisUtils._ -import io.qbeast.spark.internal.sources.v2.QbeastTableImpl +import io.qbeast.internal.rules.QbeastAnalysisUtils._ +import io.qbeast.sources.v2.QbeastTableImpl import org.apache.spark.sql.catalyst.plans.logical.AppendData import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule @@ -26,6 +26,7 @@ import org.apache.spark.sql.SparkSession /** * Analyzes and resolves the Spark Plan before Optimization + * * @param spark * the SparkSession */ diff --git a/src/main/scala/io/qbeast/spark/internal/rules/QbeastAnalysisUtils.scala b/src/main/scala/io/qbeast/internal/rules/QbeastAnalysisUtils.scala similarity index 99% rename from src/main/scala/io/qbeast/spark/internal/rules/QbeastAnalysisUtils.scala rename to src/main/scala/io/qbeast/internal/rules/QbeastAnalysisUtils.scala index 31f9b6585..154ecd9e9 100644 --- a/src/main/scala/io/qbeast/spark/internal/rules/QbeastAnalysisUtils.scala +++ b/src/main/scala/io/qbeast/internal/rules/QbeastAnalysisUtils.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.rules +package io.qbeast.internal.rules import org.apache.spark.sql.catalyst.analysis.TableOutputResolver import org.apache.spark.sql.catalyst.expressions.Alias @@ -212,7 +212,7 @@ private[rules] object QbeastAnalysisUtils { addCastsToStructs(tableName, Alias(GetArrayItem(parent, i), i.toString)(), source, target) val transformLambdaFunc = { val elementVar = NamedLambdaVariable("elementVar", source, sourceNullable) - val indexVar = NamedLambdaVariable("indexVar", IntegerType, false) + val indexVar = NamedLambdaVariable("indexVar", IntegerType, nullable = false) LambdaFunction(structConverter(elementVar, indexVar), Seq(elementVar, indexVar)) } ArrayTransform(parent, transformLambdaFunc) diff --git a/src/main/scala/io/qbeast/spark/internal/rules/SampleRule.scala b/src/main/scala/io/qbeast/internal/rules/SampleRule.scala similarity index 99% rename from src/main/scala/io/qbeast/spark/internal/rules/SampleRule.scala rename to src/main/scala/io/qbeast/internal/rules/SampleRule.scala index 4434c726b..bff0ded87 100644 --- a/src/main/scala/io/qbeast/spark/internal/rules/SampleRule.scala +++ b/src/main/scala/io/qbeast/internal/rules/SampleRule.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.rules +package io.qbeast.internal.rules import io.qbeast.core.model.Weight import io.qbeast.core.model.WeightRange diff --git a/src/main/scala/io/qbeast/spark/internal/rules/SaveAsTableRule.scala b/src/main/scala/io/qbeast/internal/rules/SaveAsTableRule.scala similarity index 95% rename from src/main/scala/io/qbeast/spark/internal/rules/SaveAsTableRule.scala rename to src/main/scala/io/qbeast/internal/rules/SaveAsTableRule.scala index 1ea3af96f..dcae05d41 100644 --- a/src/main/scala/io/qbeast/spark/internal/rules/SaveAsTableRule.scala +++ b/src/main/scala/io/qbeast/internal/rules/SaveAsTableRule.scala @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.rules +package io.qbeast.internal.rules -import io.qbeast.spark.internal.sources.catalog.QbeastCatalogUtils.isQbeastProvider +import io.qbeast.catalog.QbeastCatalogUtils.isQbeastProvider import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.plans.logical.CreateTableAsSelect import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan diff --git a/src/main/scala/io/qbeast/spark/internal/sources/QbeastBaseRelation.scala b/src/main/scala/io/qbeast/sources/QbeastBaseRelation.scala similarity index 98% rename from src/main/scala/io/qbeast/spark/internal/sources/QbeastBaseRelation.scala rename to src/main/scala/io/qbeast/sources/QbeastBaseRelation.scala index ff665f4ea..3c044ffb6 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/QbeastBaseRelation.scala +++ b/src/main/scala/io/qbeast/sources/QbeastBaseRelation.scala @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources +package io.qbeast.sources import io.qbeast.context.QbeastContext import io.qbeast.spark.delta.DefaultFileIndex import io.qbeast.spark.index.EmptyFileIndex -import io.qbeast.spark.table.IndexedTable +import io.qbeast.table.IndexedTable import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat diff --git a/src/main/scala/io/qbeast/spark/internal/sources/QbeastDataSource.scala b/src/main/scala/io/qbeast/sources/QbeastDataSource.scala similarity index 87% rename from src/main/scala/io/qbeast/spark/internal/sources/QbeastDataSource.scala rename to src/main/scala/io/qbeast/sources/QbeastDataSource.scala index 7fbff3fa1..9241c32dc 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/QbeastDataSource.scala +++ b/src/main/scala/io/qbeast/sources/QbeastDataSource.scala @@ -13,13 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources +package io.qbeast.sources import io.qbeast.context.QbeastContext -import io.qbeast.context.QbeastContext.metadataManager -import io.qbeast.spark.internal.sources.v2.QbeastTableImpl +import io.qbeast.sources.v2.QbeastTableImpl import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.table.IndexedTableFactory +import io.qbeast.table.IndexedTableFactory import org.apache.hadoop.fs.FileStatus import org.apache.hadoop.fs.Path import org.apache.spark.internal.Logging @@ -67,17 +66,18 @@ class QbeastDataSource private[sources] (private val tableFactory: IndexedTableF partitioning: Array[Transform], properties: util.Map[String, String]): Table = { val tableId = QbeastOptions.loadTableIDFromParameters(properties.asScala.toMap) - logInfo(s"Getting Qbeast table ${tableId}") + logInfo(s"Getting Qbeast table $tableId") val indexedTable = tableFactory.getIndexedTable(tableId) if (indexedTable.exists) { // If the table exists, we make sure to pass all the properties to QbeastTableImpl - val currentRevision = metadataManager.loadSnapshot(tableId).loadLatestRevision + val currentRevision = + QbeastContext.metadataManager.loadSnapshot(tableId).loadLatestRevision val indexProperties = Map( "columnsToIndex" -> currentRevision.columnTransformers.map(_.columnName).mkString(","), "cubeSize" -> currentRevision.desiredCubeSize.toString) val tableProperties = properties.asScala.toMap ++ indexProperties - logDebug(s"Table ${tableId} properties: ${tableProperties}") - new QbeastTableImpl( + logDebug(s"Table $tableId properties: $tableProperties") + QbeastTableImpl( TableIdentifier(tableId.id), new Path(tableId.id), tableProperties, @@ -85,7 +85,7 @@ class QbeastDataSource private[sources] (private val tableFactory: IndexedTableF None, tableFactory) } else { - new QbeastTableImpl( + QbeastTableImpl( TableIdentifier(tableId.id), new Path(tableId.id), properties.asScala.toMap, @@ -116,7 +116,7 @@ class QbeastDataSource private[sources] (private val tableFactory: IndexedTableF val tableId = QbeastOptions.loadTableIDFromParameters(parameters) val table = tableFactory.getIndexedTable(tableId) - logTrace(s"Begin: Create Qbeast relation ${tableId}") + logTrace(s"Begin: Create Qbeast relation $tableId") val result = mode match { case SaveMode.Append => table.save(data, parameters, append = true) case SaveMode.Overwrite => table.save(data, parameters, append = false) @@ -126,7 +126,7 @@ class QbeastDataSource private[sources] (private val tableFactory: IndexedTableF case SaveMode.Ignore if table.exists => table.load() case SaveMode.Ignore => table.save(data, parameters, append = false) } - logTrace(s"End: Create Qbeast relation ${tableId}") + logTrace(s"End: Create Qbeast relation $tableId") result } @@ -135,10 +135,10 @@ class QbeastDataSource private[sources] (private val tableFactory: IndexedTableF parameters: Map[String, String]): BaseRelation = { val tableID = QbeastOptions.loadTableIDFromParameters(parameters) val table = tableFactory.getIndexedTable(tableID) - logTrace(s"Begin: Create Qbeast relation ${tableID}") + logTrace(s"Begin: Create Qbeast relation $tableID") if (table.exists) { val result = table.load() - logTrace(s"End: Create Qbeast relation ${tableID}") + logTrace(s"End: Create Qbeast relation $tableID") result } else { throw AnalysisExceptionFactory.create( diff --git a/src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastStagedTableImpl.scala b/src/main/scala/io/qbeast/sources/v2/QbeastStagedTableImpl.scala similarity index 92% rename from src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastStagedTableImpl.scala rename to src/main/scala/io/qbeast/sources/v2/QbeastStagedTableImpl.scala index f1a91e1c8..ae93521e7 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastStagedTableImpl.scala +++ b/src/main/scala/io/qbeast/sources/v2/QbeastStagedTableImpl.scala @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.v2 +package io.qbeast.sources.v2 -import io.qbeast.spark.internal.sources.catalog.CreationMode -import io.qbeast.spark.internal.sources.catalog.QbeastCatalogUtils -import io.qbeast.spark.table.IndexedTableFactory +import io.qbeast.catalog.CreationMode +import io.qbeast.catalog.QbeastCatalogUtils +import io.qbeast.table.IndexedTableFactory import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.connector.catalog.StagedTable @@ -41,7 +41,7 @@ import scala.collection.JavaConverters._ * Qbeast Implementation of StagedTable An StagedTable allows Atomic CREATE TABLE AS SELECT / * REPLACE TABLE AS SELECT */ -private[sources] class QbeastStagedTableImpl( +case class QbeastStagedTableImpl( ident: Identifier, schema: StructType, partitions: Array[Transform], @@ -87,7 +87,7 @@ private[sources] class QbeastStagedTableImpl( // the writing of the dataFrame (if any) QbeastCatalogUtils.createQbeastTable( ident, - schema(), + schema, partitions, props, writeOptions, @@ -104,8 +104,6 @@ private[sources] class QbeastStagedTableImpl( override def name(): String = ident.name() - override def schema(): StructType = schema - override def capabilities(): util.Set[TableCapability] = Set(V1_BATCH_WRITE).asJava override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = diff --git a/src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastTableImpl.scala b/src/main/scala/io/qbeast/sources/v2/QbeastTableImpl.scala similarity index 96% rename from src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastTableImpl.scala rename to src/main/scala/io/qbeast/sources/v2/QbeastTableImpl.scala index 4647b2723..1ffeb61c7 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastTableImpl.scala +++ b/src/main/scala/io/qbeast/sources/v2/QbeastTableImpl.scala @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.v2 +package io.qbeast.sources.v2 import io.qbeast.context.QbeastContext import io.qbeast.core.model.QTableID -import io.qbeast.spark.internal.sources.QbeastBaseRelation -import io.qbeast.spark.table.IndexedTableFactory +import io.qbeast.sources.QbeastBaseRelation +import io.qbeast.table.IndexedTableFactory import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.catalog.CatalogTableType diff --git a/src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastWriteBuilder.scala b/src/main/scala/io/qbeast/sources/v2/QbeastWriteBuilder.scala similarity index 95% rename from src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastWriteBuilder.scala rename to src/main/scala/io/qbeast/sources/v2/QbeastWriteBuilder.scala index 83d06108a..37c71d15a 100644 --- a/src/main/scala/io/qbeast/spark/internal/sources/v2/QbeastWriteBuilder.scala +++ b/src/main/scala/io/qbeast/sources/v2/QbeastWriteBuilder.scala @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.v2 +package io.qbeast.sources.v2 -import io.qbeast.spark.internal.sources.QbeastBaseRelation -import io.qbeast.spark.table.IndexedTable +import io.qbeast.sources.QbeastBaseRelation +import io.qbeast.table.IndexedTable import org.apache.spark.sql.connector.write.LogicalWriteInfo import org.apache.spark.sql.connector.write.SupportsOverwrite import org.apache.spark.sql.connector.write.SupportsTruncate diff --git a/src/main/scala/io/qbeast/spark/delta/QbeastDeltaSparkSessionExtension.scala b/src/main/scala/io/qbeast/sql/QbeastSparkSessionExtension.scala similarity index 81% rename from src/main/scala/io/qbeast/spark/delta/QbeastDeltaSparkSessionExtension.scala rename to src/main/scala/io/qbeast/sql/QbeastSparkSessionExtension.scala index 6b5fe8cef..e05ec9d71 100644 --- a/src/main/scala/io/qbeast/spark/delta/QbeastDeltaSparkSessionExtension.scala +++ b/src/main/scala/io/qbeast/sql/QbeastSparkSessionExtension.scala @@ -13,18 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.delta +package io.qbeast.sql import io.delta.sql.DeltaSparkSessionExtension -import io.qbeast.spark.internal.rules.QbeastAnalysis -import io.qbeast.spark.internal.rules.SampleRule -import io.qbeast.spark.internal.rules.SaveAsTableRule +import io.qbeast.internal.rules.QbeastAnalysis +import io.qbeast.internal.rules.SampleRule +import io.qbeast.internal.rules.SaveAsTableRule import org.apache.spark.sql.SparkSessionExtensions /** * Qbeast rules extension to spark query analyzer/optimizer/planner */ -class QbeastDeltaSparkSessionExtension extends DeltaSparkSessionExtension { +class QbeastSparkSessionExtension extends DeltaSparkSessionExtension { override def apply(extensions: SparkSessionExtensions): Unit = { diff --git a/src/main/scala/io/qbeast/spark/table/IndexedTable.scala b/src/main/scala/io/qbeast/table/IndexedTable.scala similarity index 97% rename from src/main/scala/io/qbeast/spark/table/IndexedTable.scala rename to src/main/scala/io/qbeast/table/IndexedTable.scala index 9b459f706..08b49be07 100644 --- a/src/main/scala/io/qbeast/spark/table/IndexedTable.scala +++ b/src/main/scala/io/qbeast/table/IndexedTable.scala @@ -13,13 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.table +package io.qbeast.table import io.qbeast.core.keeper.Keeper import io.qbeast.core.model._ import io.qbeast.core.model.RevisionFactory +import io.qbeast.internal.commands.ConvertToQbeastCommand +import io.qbeast.sources.QbeastBaseRelation import io.qbeast.spark.index.DoublePassOTreeDataAnalyzer -import io.qbeast.spark.internal.sources.QbeastBaseRelation import io.qbeast.spark.internal.QbeastOptions import io.qbeast.spark.internal.QbeastOptions.checkQbeastProperties import io.qbeast.spark.internal.QbeastOptions.optimizationOptions @@ -32,6 +33,7 @@ import org.apache.spark.qbeast.config.DEFAULT_NUMBER_OF_RETRIES import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.AnalysisExceptionFactory import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.SparkSession import java.lang.System.currentTimeMillis import java.util.ConcurrentModificationException @@ -459,6 +461,12 @@ private[table] class IndexedTableImpl( stagingDataManager.updateWithStagedData(data) match { case r: StagingResolution if r.sendToStaging => stagingDataManager.stageData(data, indexStatus, options, append) + if (snapshot.isInitial) { + val colsToIndex = indexStatus.revision.columnTransformers.map(_.columnName) + val dcs = indexStatus.revision.desiredCubeSize + ConvertToQbeastCommand(s"${options.tableFormat}.`${tableID.id}`", colsToIndex, dcs) + .run(SparkSession.active) + } case StagingResolution(dataToWrite, removeFiles, false) => val schema = dataToWrite.schema diff --git a/src/main/scala/io/qbeast/spark/QbeastTable.scala b/src/main/scala/io/qbeast/table/QbeastTable.scala similarity index 100% rename from src/main/scala/io/qbeast/spark/QbeastTable.scala rename to src/main/scala/io/qbeast/table/QbeastTable.scala diff --git a/src/main/scala/io/qbeast/spark/utils/QbeastUtils.scala b/src/main/scala/io/qbeast/utils/QbeastUtils.scala similarity index 99% rename from src/main/scala/io/qbeast/spark/utils/QbeastUtils.scala rename to src/main/scala/io/qbeast/utils/QbeastUtils.scala index 17acd4f98..7d7227a23 100644 --- a/src/main/scala/io/qbeast/spark/utils/QbeastUtils.scala +++ b/src/main/scala/io/qbeast/utils/QbeastUtils.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.utils +package io.qbeast.utils import org.apache.spark.internal.Logging import org.apache.spark.sql.delta.skipping.MultiDimClusteringFunctions diff --git a/src/test/scala/io/qbeast/spark/QbeastIntegrationTestSpec.scala b/src/test/scala/io/qbeast/QbeastIntegrationTestSpec.scala similarity index 95% rename from src/test/scala/io/qbeast/spark/QbeastIntegrationTestSpec.scala rename to src/test/scala/io/qbeast/QbeastIntegrationTestSpec.scala index 656e7091c..b7b2aeef2 100644 --- a/src/test/scala/io/qbeast/spark/QbeastIntegrationTestSpec.scala +++ b/src/test/scala/io/qbeast/QbeastIntegrationTestSpec.scala @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark +package io.qbeast import com.github.mrpowers.spark.fast.tests.DatasetComparer import io.qbeast.context.QbeastContext @@ -29,7 +29,7 @@ import io.qbeast.spark.delta.DeltaStagingDataManagerFactory import io.qbeast.spark.index.SparkColumnsToIndexSelector import io.qbeast.spark.index.SparkOTreeManager import io.qbeast.spark.index.SparkRevisionFactory -import io.qbeast.spark.table.IndexedTableFactoryImpl +import io.qbeast.table.IndexedTableFactoryImpl import org.apache.log4j.Level import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.DataFrame @@ -57,10 +57,8 @@ trait QbeastIntegrationTestSpec extends AnyFlatSpec with Matchers with DatasetCo // Including Session Extensions and Catalog def sparkConfWithSqlAndCatalog: SparkConf = new SparkConf() .setMaster("local[8]") - .set("spark.sql.extensions", "io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension") - .set( - SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, - "io.qbeast.spark.internal.sources.catalog.QbeastCatalog") + .set("spark.sql.extensions", "io.qbeast.sql.QbeastSparkSessionExtension") + .set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, "io.qbeast.catalog.QbeastCatalog") def loadTestData(spark: SparkSession): DataFrame = spark.read .format("csv") diff --git a/src/test/scala/io/qbeast/TestUtils.scala b/src/test/scala/io/qbeast/TestUtils.scala index e6b336646..8a7153482 100644 --- a/src/test/scala/io/qbeast/TestUtils.scala +++ b/src/test/scala/io/qbeast/TestUtils.scala @@ -17,7 +17,6 @@ package io.qbeast import io.qbeast.spark.delta.DefaultFileIndex import io.qbeast.spark.internal.expressions.QbeastMurmur3Hash -import io.qbeast.spark.QbeastIntegrationTestSpec import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.DataFrame diff --git a/src/test/scala/io/qbeast/spark/internal/sources/catalog/CatalogTestSuite.scala b/src/test/scala/io/qbeast/catalog/CatalogTestSuite.scala similarity index 92% rename from src/test/scala/io/qbeast/spark/internal/sources/catalog/CatalogTestSuite.scala rename to src/test/scala/io/qbeast/catalog/CatalogTestSuite.scala index b7954eaf8..dde05e942 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/catalog/CatalogTestSuite.scala +++ b/src/test/scala/io/qbeast/catalog/CatalogTestSuite.scala @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog import io.qbeast.context.QbeastContext -import io.qbeast.spark.table.IndexedTableFactory +import io.qbeast.table.IndexedTableFactory import io.qbeast.TestClasses.Student import org.apache.spark.sql.connector.catalog.Column import org.apache.spark.sql.connector.catalog.SparkCatalogV2Util @@ -46,9 +46,9 @@ trait CatalogTestSuite { val schema: StructType = StructType( Seq( - StructField("id", IntegerType, true), - StructField("name", StringType, true), - StructField("age", IntegerType, true))) + StructField("id", IntegerType, nullable = true), + StructField("name", StringType, nullable = true), + StructField("age", IntegerType, nullable = true))) val columns: Array[Column] = SparkCatalogV2Util.structTypeToV2Columns(schema) diff --git a/src/test/scala/io/qbeast/spark/internal/sources/catalog/DefaultStagedTableTest.scala b/src/test/scala/io/qbeast/catalog/DefaultStagedTableTest.scala similarity index 95% rename from src/test/scala/io/qbeast/spark/internal/sources/catalog/DefaultStagedTableTest.scala rename to src/test/scala/io/qbeast/catalog/DefaultStagedTableTest.scala index 79383e4de..8bd9382e7 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/catalog/DefaultStagedTableTest.scala +++ b/src/test/scala/io/qbeast/catalog/DefaultStagedTableTest.scala @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.AnalysisException @@ -185,12 +185,10 @@ class DefaultStagedTableTest extends QbeastIntegrationTestSpec with CatalogTestS it should "use the right builder when table SupportsWrites" in withTmpDir(tmpDir => withExtendedSpark(sparkConf = new SparkConf() .setMaster("local[8]") - .set("spark.sql.extensions", "io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension") + .set("spark.sql.extensions", "io.qbeast.sql.QbeastSparkSessionExtension") .set("spark.sql.warehouse.dir", tmpDir) .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") - .set( - "spark.sql.catalog.qbeast_catalog", - "io.qbeast.spark.internal.sources.catalog.QbeastCatalog"))(spark => { + .set("spark.sql.catalog.qbeast_catalog", "io.qbeast.catalog.QbeastCatalog"))(spark => { val tableIdentifier = Identifier.of(Array("default"), "students") val catalog = sessionCatalog(spark) diff --git a/src/test/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogIntegrationTest.scala b/src/test/scala/io/qbeast/catalog/QbeastCatalogIntegrationTest.scala similarity index 95% rename from src/test/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogIntegrationTest.scala rename to src/test/scala/io/qbeast/catalog/QbeastCatalogIntegrationTest.scala index c6acc701b..b38cf3aca 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogIntegrationTest.scala +++ b/src/test/scala/io/qbeast/catalog/QbeastCatalogIntegrationTest.scala @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.delta.DeltaLog import org.apache.spark.sql.AnalysisException @@ -24,15 +24,13 @@ import org.apache.spark.SparkConf class QbeastCatalogIntegrationTest extends QbeastIntegrationTestSpec with CatalogTestSuite { "QbeastCatalog" should - "coexist with Delta tables" in withTmpDir(tmpDir => + "coexist with Delta table" in withTmpDir(tmpDir => withExtendedSpark(sparkConf = new SparkConf() .setMaster("local[8]") - .set("spark.sql.extensions", "io.qbeast.spark.delta.QbeastDeltaSparkSessionExtension") + .set("spark.sql.extensions", "io.qbeast.sql.QbeastSparkSessionExtension") .set("spark.sql.warehouse.dir", tmpDir) .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") - .set( - "spark.sql.catalog.qbeast_catalog", - "io.qbeast.spark.internal.sources.catalog.QbeastCatalog"))(spark => { + .set("spark.sql.catalog.qbeast_catalog", "io.qbeast.catalog.QbeastCatalog"))(spark => { val data = createTestData(spark) @@ -58,7 +56,7 @@ class QbeastCatalogIntegrationTest extends QbeastIntegrationTestSpec with Catalo })) it should - "coexist with Delta tables in the same catalog" in withQbeastContextSparkAndTmpWarehouse( + "coexist with Delta table in the same catalog" in withQbeastContextSparkAndTmpWarehouse( (spark, _) => { val data = createTestData(spark) diff --git a/src/test/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogTest.scala b/src/test/scala/io/qbeast/catalog/QbeastCatalogTest.scala similarity index 97% rename from src/test/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogTest.scala rename to src/test/scala/io/qbeast/catalog/QbeastCatalogTest.scala index d22770540..5e1223ea1 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/catalog/QbeastCatalogTest.scala +++ b/src/test/scala/io/qbeast/catalog/QbeastCatalogTest.scala @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources.catalog +package io.qbeast.catalog -import io.qbeast.spark.internal.sources.v2.QbeastStagedTableImpl -import io.qbeast.spark.internal.sources.v2.QbeastTableImpl -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.sources.v2.QbeastStagedTableImpl +import io.qbeast.sources.v2.QbeastTableImpl +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.connector.catalog.CatalogExtension import org.apache.spark.sql.connector.catalog.CatalogPlugin @@ -61,7 +61,7 @@ class QbeastCatalogTest extends QbeastIntegrationTestSpec with CatalogTestSuite Array.empty[Transform], Map.empty[String, String].asJava) - val newSchema = schema.add(StructField("newCol", IntegerType, false)) + val newSchema = schema.add(StructField("newCol", IntegerType, nullable = false)) val newColumns = SparkCatalogV2Util.structTypeToV2Columns(newSchema) qbeastCatalog.stageReplace( tableIdentifier, @@ -87,7 +87,7 @@ class QbeastCatalogTest extends QbeastIntegrationTestSpec with CatalogTestSuite qbeastCatalog.listTables(defaultNamespace) shouldBe Array(tableIdentifier) - val newSchema = schema.add(StructField("newCol", IntegerType, false)) + val newSchema = schema.add(StructField("newCol", IntegerType, nullable = false)) val newColumns = SparkCatalogV2Util.structTypeToV2Columns(newSchema) qbeastCatalog.stageCreateOrReplace( @@ -119,7 +119,8 @@ class QbeastCatalogTest extends QbeastIntegrationTestSpec with CatalogTestSuite tableIdentifier, TableChange.addColumn(Array("x"), IntegerType, false)) - val modifiedSchema = StructType(schema.fields ++ Seq(StructField("x", IntegerType, false))) + val modifiedSchema = + StructType(schema.fields ++ Seq(StructField("x", IntegerType, nullable = false))) val modifiedColumns = SparkCatalogV2Util.structTypeToV2Columns(modifiedSchema) qbeastCatalog .loadTable(Identifier.of(defaultNamespace, "student")) diff --git a/src/test/scala/io/qbeast/context/QbeastConfigTest.scala b/src/test/scala/io/qbeast/context/QbeastConfigTest.scala index 6824afe3e..bef5bf746 100644 --- a/src/test/scala/io/qbeast/context/QbeastConfigTest.scala +++ b/src/test/scala/io/qbeast/context/QbeastConfigTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.context -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.qbeast.config import org.apache.spark.sql.SparkSession import org.scalatest.flatspec.AnyFlatSpec diff --git a/src/test/scala/io/qbeast/context/QbeastContextTest.scala b/src/test/scala/io/qbeast/context/QbeastContextTest.scala index f62de80bc..673f8363f 100644 --- a/src/test/scala/io/qbeast/context/QbeastContextTest.scala +++ b/src/test/scala/io/qbeast/context/QbeastContextTest.scala @@ -23,8 +23,8 @@ import io.qbeast.spark.delta.DeltaStagingDataManagerFactory import io.qbeast.spark.index.SparkColumnsToIndexSelector import io.qbeast.spark.index.SparkOTreeManager import io.qbeast.spark.index.SparkRevisionFactory -import io.qbeast.spark.table.IndexedTableFactoryImpl -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.table.IndexedTableFactoryImpl +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.SparkSession import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers diff --git a/src/test/scala/io/qbeast/core/model/DenormalizedBlockTest.scala b/src/test/scala/io/qbeast/core/model/DenormalizedBlockTest.scala index fd4e9525f..4524b34c2 100644 --- a/src/test/scala/io/qbeast/core/model/DenormalizedBlockTest.scala +++ b/src/test/scala/io/qbeast/core/model/DenormalizedBlockTest.scala @@ -16,7 +16,7 @@ package io.qbeast.core.model import io.qbeast.core.transform.EmptyTransformer -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import scala.collection.immutable.SortedSet diff --git a/src/test/scala/io/qbeast/core/model/NormalizedWeightTest.scala b/src/test/scala/io/qbeast/core/model/NormalizedWeightTest.scala index b14548772..ace27e08d 100644 --- a/src/test/scala/io/qbeast/core/model/NormalizedWeightTest.scala +++ b/src/test/scala/io/qbeast/core/model/NormalizedWeightTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.core.model -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec /** * Tests of NormalizedWeight. diff --git a/src/test/scala/io/qbeast/core/model/WeightTest.scala b/src/test/scala/io/qbeast/core/model/WeightTest.scala index 469695af2..927b8313a 100644 --- a/src/test/scala/io/qbeast/core/model/WeightTest.scala +++ b/src/test/scala/io/qbeast/core/model/WeightTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.core.model -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec /** * Tests for [[Weight]]. diff --git a/src/test/scala/io/qbeast/spark/internal/sources/QbeastBaseRelationTest.scala b/src/test/scala/io/qbeast/sources/QbeastBaseRelationTest.scala similarity index 96% rename from src/test/scala/io/qbeast/spark/internal/sources/QbeastBaseRelationTest.scala rename to src/test/scala/io/qbeast/sources/QbeastBaseRelationTest.scala index 9a16573f7..c87a65f8b 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/QbeastBaseRelationTest.scala +++ b/src/test/scala/io/qbeast/sources/QbeastBaseRelationTest.scala @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources +package io.qbeast.sources import io.qbeast.context.QbeastContext import io.qbeast.core.model.QTableID -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Student import org.apache.spark.sql.execution.datasources.HadoopFsRelation import org.apache.spark.sql.sources.InsertableRelation @@ -49,7 +49,7 @@ class QbeastBaseRelationTest extends QbeastIntegrationTestSpec { val indexedTable = QbeastContext.indexedTableFactory.getIndexedTable(QTableID(tmpDir)) val qbeastBaseRelation = QbeastBaseRelation.forQbeastTable(indexedTable) - qbeastBaseRelation.asInstanceOf[InsertableRelation].insert(df, false) + qbeastBaseRelation.asInstanceOf[InsertableRelation].insert(df, overwrite = false) val indexed = spark.read.format("qbeast").load(tmpDir) indexed.count() shouldBe df.count() * 2 // we write two times the data @@ -72,7 +72,7 @@ class QbeastBaseRelationTest extends QbeastIntegrationTestSpec { val df = 1.to(10).map(i => Student(i, i.toString, Random.nextInt())).toDF("id", "name", "age") - qbeastBaseRelation.asInstanceOf[InsertableRelation].insert(df, false) + qbeastBaseRelation.asInstanceOf[InsertableRelation].insert(df, overwrite = false) val indexed = spark.read.format("qbeast").load(tmpDir) indexed.count() shouldBe df.count() diff --git a/src/test/scala/io/qbeast/spark/internal/sources/QbeastDataSourceTest.scala b/src/test/scala/io/qbeast/sources/QbeastDataSourceTest.scala similarity index 98% rename from src/test/scala/io/qbeast/spark/internal/sources/QbeastDataSourceTest.scala rename to src/test/scala/io/qbeast/sources/QbeastDataSourceTest.scala index d0f2d543d..178af08fc 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/QbeastDataSourceTest.scala +++ b/src/test/scala/io/qbeast/sources/QbeastDataSourceTest.scala @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources +package io.qbeast.sources import io.qbeast.core.model.QTableID -import io.qbeast.spark.table.IndexedTable -import io.qbeast.spark.table.IndexedTableFactory +import io.qbeast.table.IndexedTable +import io.qbeast.table.IndexedTableFactory import org.apache.log4j.Level import org.apache.spark.sql.connector.catalog.SparkCatalogV2Util import org.apache.spark.sql.connector.catalog.TableCapability._ diff --git a/src/test/scala/io/qbeast/spark/internal/sources/QbeastOptionsTest.scala b/src/test/scala/io/qbeast/sources/QbeastOptionsTest.scala similarity index 98% rename from src/test/scala/io/qbeast/spark/internal/sources/QbeastOptionsTest.scala rename to src/test/scala/io/qbeast/sources/QbeastOptionsTest.scala index 78040e3af..2b654e063 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/QbeastOptionsTest.scala +++ b/src/test/scala/io/qbeast/sources/QbeastOptionsTest.scala @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources +package io.qbeast.sources import io.qbeast.core.model.HookInfo import io.qbeast.core.model.PreCommitHook.PRE_COMMIT_HOOKS_PREFIX import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.qbeast.config import org.apache.spark.sql.AnalysisException diff --git a/src/test/scala/io/qbeast/spark/internal/sources/QbeastStagedTableTest.scala b/src/test/scala/io/qbeast/sources/QbeastStagedTableTest.scala similarity index 93% rename from src/test/scala/io/qbeast/spark/internal/sources/QbeastStagedTableTest.scala rename to src/test/scala/io/qbeast/sources/QbeastStagedTableTest.scala index 886497925..c32f51180 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/QbeastStagedTableTest.scala +++ b/src/test/scala/io/qbeast/sources/QbeastStagedTableTest.scala @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources +package io.qbeast.sources -import io.qbeast.spark.internal.sources.catalog.CatalogTestSuite -import io.qbeast.spark.internal.sources.v2.QbeastStagedTableImpl -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.catalog.CatalogTestSuite +import io.qbeast.sources.v2.QbeastStagedTableImpl +import io.qbeast.QbeastIntegrationTestSpec import org.apache.hadoop.fs.Path import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.connector.catalog.TableCapability.V1_BATCH_WRITE @@ -53,7 +53,7 @@ class QbeastStagedTableTest extends QbeastIntegrationTestSpec with CatalogTestSu Map("provider" -> "qbeast", "columnsToIndex" -> "id").asJava) .asInstanceOf[QbeastStagedTableImpl] - qbeastStagedTable.schema() shouldBe schema + qbeastStagedTable.schema shouldBe schema }) it should "retrieve name" in withQbeastContextSparkAndTmpWarehouse((spark, _) => { @@ -126,7 +126,10 @@ class QbeastStagedTableTest extends QbeastIntegrationTestSpec with CatalogTestSu .build() writeBuilder shouldBe a[V1Write] - writeBuilder.asInstanceOf[V1Write].toInsertableRelation.insert(dataToCommit, false) + writeBuilder + .asInstanceOf[V1Write] + .toInsertableRelation + .insert(dataToCommit, overwrite = false) // Commit the staged changes // This should create the log and write the data diff --git a/src/test/scala/io/qbeast/spark/internal/sources/QbeastTableImplTest.scala b/src/test/scala/io/qbeast/sources/QbeastTableImplTest.scala similarity index 89% rename from src/test/scala/io/qbeast/spark/internal/sources/QbeastTableImplTest.scala rename to src/test/scala/io/qbeast/sources/QbeastTableImplTest.scala index a1da08258..aeb863ca0 100644 --- a/src/test/scala/io/qbeast/spark/internal/sources/QbeastTableImplTest.scala +++ b/src/test/scala/io/qbeast/sources/QbeastTableImplTest.scala @@ -13,12 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.internal.sources +package io.qbeast.sources -import io.qbeast.spark.internal.sources.catalog.CatalogTestSuite -import io.qbeast.spark.internal.sources.v2.QbeastTableImpl -import io.qbeast.spark.internal.sources.v2.QbeastWriteBuilder -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.catalog.CatalogTestSuite +import io.qbeast.sources.v2.QbeastTableImpl +import io.qbeast.sources.v2.QbeastWriteBuilder +import io.qbeast.QbeastIntegrationTestSpec import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.connector.catalog.Identifier @@ -30,7 +30,7 @@ import scala.collection.JavaConverters._ class QbeastTableImplTest extends QbeastIntegrationTestSpec with CatalogTestSuite { def createQbeastTableImpl(tmpDir: String): QbeastTableImpl = { - new QbeastTableImpl( + QbeastTableImpl( TableIdentifier("students"), new Path(tmpDir), Map.empty, @@ -76,7 +76,7 @@ class QbeastTableImplTest extends QbeastIntegrationTestSpec with CatalogTestSuit it should "load properties" in withQbeastContextSparkAndTmpWarehouse((_, tmpDir) => { val properties = Map("provider" -> "qbeast", "columnsToIndex" -> "id") - val qbeastTableImpl = new QbeastTableImpl( + val qbeastTableImpl = QbeastTableImpl( TableIdentifier("students"), new Path(tmpDir), properties, @@ -100,7 +100,7 @@ class QbeastTableImplTest extends QbeastIntegrationTestSpec with CatalogTestSuit val properties = Map.empty[String, String] qbeastCatalog.createTable(identifier, columns, Array.empty[Transform], properties.asJava) - val qbeastTableImpl = new QbeastTableImpl( + val qbeastTableImpl = QbeastTableImpl( tableIdentifier, new Path(tmpDir), properties, diff --git a/src/test/scala/io/qbeast/spark/delta/DefaultFileIndexTest.scala b/src/test/scala/io/qbeast/spark/delta/DefaultFileIndexTest.scala index b33c6ec4a..4d55278b9 100644 --- a/src/test/scala/io/qbeast/spark/delta/DefaultFileIndexTest.scala +++ b/src/test/scala/io/qbeast/spark/delta/DefaultFileIndexTest.scala @@ -16,7 +16,7 @@ package io.qbeast.spark.delta import io.qbeast.spark.internal.expressions.QbeastMurmur3Hash -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.LessThan diff --git a/src/test/scala/io/qbeast/spark/delta/DeltaRollupDataWriterTest.scala b/src/test/scala/io/qbeast/spark/delta/DeltaRollupDataWriterTest.scala index d68572233..e7ad21439 100644 --- a/src/test/scala/io/qbeast/spark/delta/DeltaRollupDataWriterTest.scala +++ b/src/test/scala/io/qbeast/spark/delta/DeltaRollupDataWriterTest.scala @@ -23,7 +23,7 @@ import io.qbeast.core.transform.EmptyTransformer import io.qbeast.spark.index.SparkOTreeManager import io.qbeast.spark.index.SparkRevisionFactory import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses._ import scala.reflect.io.Path diff --git a/src/test/scala/io/qbeast/spark/delta/QbeastDeltaIntegrationTest.scala b/src/test/scala/io/qbeast/spark/delta/QbeastDeltaIntegrationTest.scala index 6b2c605d0..dc2475319 100644 --- a/src/test/scala/io/qbeast/spark/delta/QbeastDeltaIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/delta/QbeastDeltaIntegrationTest.scala @@ -16,7 +16,7 @@ package io.qbeast.spark.delta import io.delta.tables._ -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.delta.DeltaLog import org.apache.spark.sql.DataFrame import org.apache.spark.sql.SparkSession diff --git a/src/test/scala/io/qbeast/spark/delta/QbeastFileUtilsTest.scala b/src/test/scala/io/qbeast/spark/delta/QbeastFileUtilsTest.scala index 0622f80a6..d5a1f7ef7 100644 --- a/src/test/scala/io/qbeast/spark/delta/QbeastFileUtilsTest.scala +++ b/src/test/scala/io/qbeast/spark/delta/QbeastFileUtilsTest.scala @@ -18,7 +18,7 @@ package io.qbeast.spark.delta import com.fasterxml.jackson.core.JsonParseException import io.qbeast.core.model._ import io.qbeast.spark.utils.TagUtils -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.hadoop.fs.Path import org.apache.spark.sql.delta.actions.AddFile import org.apache.spark.sql.delta.actions.RemoveFile diff --git a/src/test/scala/io/qbeast/spark/delta/QbeastSparkTxnTest.scala b/src/test/scala/io/qbeast/spark/delta/QbeastSparkTxnTest.scala index a3c45b77e..99018500e 100644 --- a/src/test/scala/io/qbeast/spark/delta/QbeastSparkTxnTest.scala +++ b/src/test/scala/io/qbeast/spark/delta/QbeastSparkTxnTest.scala @@ -16,7 +16,7 @@ package io.qbeast.spark.delta import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Student import org.apache.spark.sql.delta.DeltaLog import org.apache.spark.sql.DataFrame diff --git a/src/test/scala/io/qbeast/spark/delta/keeper/ProtocolMock.scala b/src/test/scala/io/qbeast/spark/delta/keeper/ProtocolMock.scala index 4e5762cd9..099c3e9bc 100644 --- a/src/test/scala/io/qbeast/spark/delta/keeper/ProtocolMock.scala +++ b/src/test/scala/io/qbeast/spark/delta/keeper/ProtocolMock.scala @@ -21,7 +21,7 @@ import io.qbeast.spark.delta.DeltaMetadataManager import io.qbeast.spark.delta.DeltaQbeastSnapshot import io.qbeast.spark.delta.MetadataWriterTest import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.delta.actions.AddFile import org.apache.spark.sql.delta.actions.RemoveFile import org.apache.spark.sql.delta.DeltaOperations diff --git a/src/test/scala/io/qbeast/docs/DocumentationTests.scala b/src/test/scala/io/qbeast/spark/docs/DocumentationTests.scala similarity index 98% rename from src/test/scala/io/qbeast/docs/DocumentationTests.scala rename to src/test/scala/io/qbeast/spark/docs/DocumentationTests.scala index c42ec77bd..5b30ce8e2 100644 --- a/src/test/scala/io/qbeast/docs/DocumentationTests.scala +++ b/src/test/scala/io/qbeast/spark/docs/DocumentationTests.scala @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.docs +package io.qbeast.spark.docs -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.functions.input_file_name import org.apache.spark.SparkConf import org.scalatest.AppendedClues.convertToClueful diff --git a/src/test/scala/io/qbeast/spark/index/CubeDomainsIntegrationTest.scala b/src/test/scala/io/qbeast/spark/index/CubeDomainsIntegrationTest.scala index ef6c2ea6a..0c5405169 100644 --- a/src/test/scala/io/qbeast/spark/index/CubeDomainsIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/index/CubeDomainsIntegrationTest.scala @@ -21,7 +21,7 @@ import io.qbeast.core.model.IndexStatus import io.qbeast.core.model.QTableID import io.qbeast.core.model.Weight import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Client3 import org.apache.spark.qbeast.config.CUBE_WEIGHTS_BUFFER_CAPACITY import org.apache.spark.qbeast.config.DEFAULT_CUBE_SIZE diff --git a/src/test/scala/io/qbeast/spark/index/DataStagingTest.scala b/src/test/scala/io/qbeast/spark/index/DataStagingTest.scala index f5a23c62b..c3caeb331 100644 --- a/src/test/scala/io/qbeast/spark/index/DataStagingTest.scala +++ b/src/test/scala/io/qbeast/spark/index/DataStagingTest.scala @@ -15,11 +15,11 @@ */ package io.qbeast.spark.index +import io.qbeast.context.QbeastContext import io.qbeast.core.model.QTableID import io.qbeast.core.model.StagingUtils -import io.qbeast.spark.delta.DeltaStagingDataManager -import io.qbeast.spark.internal.commands.ConvertToQbeastCommand -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.internal.commands.ConvertToQbeastCommand +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.T2 import org.apache.spark.sql.DataFrame import org.apache.spark.sql.SparkSession @@ -78,7 +78,8 @@ class DataStagingTest val revisions = snapshot.loadAllRevisions revisions.size shouldBe 2 - val stagingDataManager = new DeltaStagingDataManager(QTableID(tmpDir)) + val stagingDataManager = + QbeastContext.stagingDataManagerBuilder.getManager(QTableID(tmpDir)) val indexedDataSize = snapshot .loadIndexStatus(1) @@ -119,7 +120,8 @@ class DataStagingTest val revisions = snapshot.loadAllRevisions revisions.size shouldBe 2 - val stagingDataManager = new DeltaStagingDataManager(QTableID(tmpDir)) + val stagingDataManager = + QbeastContext.stagingDataManagerBuilder.getManager(QTableID(tmpDir)) val indexedDataSize = snapshot .loadIndexStatus(1) diff --git a/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala b/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala index ebc9cc0ee..3602b0b10 100644 --- a/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala +++ b/src/test/scala/io/qbeast/spark/index/DoublePassOTreeDataAnalyzerTest.scala @@ -23,8 +23,8 @@ import io.qbeast.spark.index.DoublePassOTreeDataAnalyzer._ import io.qbeast.spark.index.QbeastColumns.weightColumnName import io.qbeast.spark.internal.QbeastFunctions.qbeastHash import io.qbeast.spark.utils.SparkToQTypesUtils -import io.qbeast.spark.QbeastIntegrationTestSpec import io.qbeast.IISeq +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.StructField diff --git a/src/test/scala/io/qbeast/spark/index/IndexStatusBuilderTest.scala b/src/test/scala/io/qbeast/spark/index/IndexStatusBuilderTest.scala index 5828823b5..95672c4a0 100644 --- a/src/test/scala/io/qbeast/spark/index/IndexStatusBuilderTest.scala +++ b/src/test/scala/io/qbeast/spark/index/IndexStatusBuilderTest.scala @@ -20,7 +20,7 @@ import io.qbeast.core.model.CubeId import io.qbeast.core.model.CubeStatus import io.qbeast.core.model.QTableID import io.qbeast.core.model.Weight -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec class IndexStatusBuilderTest extends QbeastIntegrationTestSpec { diff --git a/src/test/scala/io/qbeast/spark/index/IndexTest.scala b/src/test/scala/io/qbeast/spark/index/IndexTest.scala index fda470e0e..05c893f7a 100644 --- a/src/test/scala/io/qbeast/spark/index/IndexTest.scala +++ b/src/test/scala/io/qbeast/spark/index/IndexTest.scala @@ -19,7 +19,7 @@ import io.qbeast.context.QbeastContext import io.qbeast.core.model._ import io.qbeast.core.model.BroadcastedTableChanges import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Client3 import io.qbeast.TestClasses.Client4 import org.apache.spark.sql.functions.col diff --git a/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala b/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala index 109f2afaf..365120c29 100644 --- a/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala +++ b/src/test/scala/io/qbeast/spark/index/NewRevisionTest.scala @@ -16,7 +16,7 @@ package io.qbeast.spark.index import io.qbeast.core.transform.LinearTransformation -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses._ import org.apache.spark.sql.SparkSession import org.scalatest.flatspec.AnyFlatSpec diff --git a/src/test/scala/io/qbeast/spark/index/NormalizedWeightIntegrationTest.scala b/src/test/scala/io/qbeast/spark/index/NormalizedWeightIntegrationTest.scala index de034e4c2..c4f55c814 100644 --- a/src/test/scala/io/qbeast/spark/index/NormalizedWeightIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/index/NormalizedWeightIntegrationTest.scala @@ -17,7 +17,7 @@ package io.qbeast.spark.index import io.qbeast.context.QbeastContext import io.qbeast.core.model.QTableID -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Client3 import org.apache.spark.sql.Dataset import org.apache.spark.sql.SparkSession diff --git a/src/test/scala/io/qbeast/spark/index/OTreeAlgorithmTest.scala b/src/test/scala/io/qbeast/spark/index/OTreeAlgorithmTest.scala index 19ea2f80d..cde4dacae 100644 --- a/src/test/scala/io/qbeast/spark/index/OTreeAlgorithmTest.scala +++ b/src/test/scala/io/qbeast/spark/index/OTreeAlgorithmTest.scala @@ -18,7 +18,7 @@ package io.qbeast.spark.index import io.qbeast.core.model.QTableID import io.qbeast.spark.index.QbeastColumns._ import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.DataFrame diff --git a/src/test/scala/io/qbeast/spark/index/RevisionTest.scala b/src/test/scala/io/qbeast/spark/index/RevisionTest.scala index a326d1611..862a66499 100644 --- a/src/test/scala/io/qbeast/spark/index/RevisionTest.scala +++ b/src/test/scala/io/qbeast/spark/index/RevisionTest.scala @@ -16,7 +16,7 @@ package io.qbeast.spark.index import io.qbeast.core.model.Revision -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses._ import org.apache.spark.sql.functions.max import org.apache.spark.sql.functions.min diff --git a/src/test/scala/io/qbeast/spark/index/SparkColumnsToIndexSelectorTest.scala b/src/test/scala/io/qbeast/spark/index/SparkColumnsToIndexSelectorTest.scala index a20d3da20..bdc322988 100644 --- a/src/test/scala/io/qbeast/spark/index/SparkColumnsToIndexSelectorTest.scala +++ b/src/test/scala/io/qbeast/spark/index/SparkColumnsToIndexSelectorTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.spark.index -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec class SparkColumnsToIndexSelectorTest extends QbeastIntegrationTestSpec { diff --git a/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala b/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala index dc849535b..f5e57e798 100644 --- a/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala +++ b/src/test/scala/io/qbeast/spark/index/SparkRevisionFactoryTest.scala @@ -25,7 +25,7 @@ import io.qbeast.core.transform.LinearTransformer import io.qbeast.core.transform.ManualPlaceholderTransformation import io.qbeast.spark.delta.DeltaQbeastSnapshot import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.T3 import org.apache.spark.sql.delta.DeltaLog import org.apache.spark.sql.functions.to_timestamp diff --git a/src/test/scala/io/qbeast/spark/index/model/transformer/TransformerIndexingTest.scala b/src/test/scala/io/qbeast/spark/index/model/transformer/TransformerIndexingTest.scala index d8a75cb32..76ff2580c 100644 --- a/src/test/scala/io/qbeast/spark/index/model/transformer/TransformerIndexingTest.scala +++ b/src/test/scala/io/qbeast/spark/index/model/transformer/TransformerIndexingTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.spark.index.model.transformer -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.Dataset @@ -38,7 +38,7 @@ class TransformerIndexingTest extends AnyFlatSpec with Matchers with QbeastInteg .load(tmpDir) } - "Qbeast spark" should "Index tables with string" in withSparkAndTmpDir((spark, tmpDir) => { + "Qbeast spark" should "index tables with string" in withSparkAndTmpDir((spark, tmpDir) => { import spark.implicits._ val source = spark diff --git a/src/test/scala/io/qbeast/spark/index/query/DisjunctiveQuerySpecTest.scala b/src/test/scala/io/qbeast/spark/index/query/DisjunctiveQuerySpecTest.scala index e28e7a5f5..3be01b392 100644 --- a/src/test/scala/io/qbeast/spark/index/query/DisjunctiveQuerySpecTest.scala +++ b/src/test/scala/io/qbeast/spark/index/query/DisjunctiveQuerySpecTest.scala @@ -22,7 +22,7 @@ import io.qbeast.core.model.Weight import io.qbeast.core.model.WeightRange import io.qbeast.core.transform.LinearTransformation import io.qbeast.core.transform.Transformer -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.functions.expr class DisjunctiveQuerySpecTest extends QbeastIntegrationTestSpec with QueryTestSpec { diff --git a/src/test/scala/io/qbeast/spark/index/query/QueryExecutorTest.scala b/src/test/scala/io/qbeast/spark/index/query/QueryExecutorTest.scala index 90057e4ac..6c837bfe8 100644 --- a/src/test/scala/io/qbeast/spark/index/query/QueryExecutorTest.scala +++ b/src/test/scala/io/qbeast/spark/index/query/QueryExecutorTest.scala @@ -18,8 +18,8 @@ package io.qbeast.spark.index.query import io.qbeast.core.model._ import io.qbeast.core.model.CubeStatus import io.qbeast.core.transform.EmptyTransformer -import io.qbeast.spark.QbeastIntegrationTestSpec -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable +import io.qbeast.QbeastIntegrationTestSpec import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.functions.col diff --git a/src/test/scala/io/qbeast/spark/index/query/QuerySpecBuilderTest.scala b/src/test/scala/io/qbeast/spark/index/query/QuerySpecBuilderTest.scala index 6d18c1a62..cc0b5c38f 100644 --- a/src/test/scala/io/qbeast/spark/index/query/QuerySpecBuilderTest.scala +++ b/src/test/scala/io/qbeast/spark/index/query/QuerySpecBuilderTest.scala @@ -19,7 +19,7 @@ import io.qbeast.core.model._ import io.qbeast.core.transform.HashTransformation import io.qbeast.core.transform.LinearTransformation import io.qbeast.core.transform.Transformer -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.functions.expr class QuerySpecBuilderTest extends QbeastIntegrationTestSpec with QueryTestSpec { diff --git a/src/test/scala/io/qbeast/spark/index/query/TimeSeriesQueryTest.scala b/src/test/scala/io/qbeast/spark/index/query/TimeSeriesQueryTest.scala index 5afdf9577..c278f6a8e 100644 --- a/src/test/scala/io/qbeast/spark/index/query/TimeSeriesQueryTest.scala +++ b/src/test/scala/io/qbeast/spark/index/query/TimeSeriesQueryTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.spark.index.query -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.functions.to_date import org.apache.spark.sql.functions.to_timestamp import org.apache.spark.sql.functions.unix_timestamp diff --git a/src/test/scala/io/qbeast/spark/utils/IndexMetricsTest.scala b/src/test/scala/io/qbeast/spark/utils/IndexMetricsTest.scala index 72fc8ca01..a71948397 100644 --- a/src/test/scala/io/qbeast/spark/utils/IndexMetricsTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/IndexMetricsTest.scala @@ -16,7 +16,7 @@ package io.qbeast.spark.utils import io.qbeast.core.model._ -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.Dataset class IndexMetricsTest extends QbeastIntegrationTestSpec { diff --git a/src/test/scala/io/qbeast/spark/delta/PreCommitHookIntegrationTest.scala b/src/test/scala/io/qbeast/spark/utils/PreCommitHookIntegrationTest.scala similarity index 97% rename from src/test/scala/io/qbeast/spark/delta/PreCommitHookIntegrationTest.scala rename to src/test/scala/io/qbeast/spark/utils/PreCommitHookIntegrationTest.scala index 1818c0dec..c15e578f7 100644 --- a/src/test/scala/io/qbeast/spark/delta/PreCommitHookIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/PreCommitHookIntegrationTest.scala @@ -1,4 +1,4 @@ -package io.qbeast.spark.delta +package io.qbeast.spark.utils import io.qbeast.context.QbeastContext import io.qbeast.core.model.PreCommitHook @@ -6,7 +6,7 @@ import io.qbeast.core.model.PreCommitHook.PRE_COMMIT_HOOKS_PREFIX import io.qbeast.core.model.PreCommitHook.PreCommitHookOutput import io.qbeast.core.model.QTableID import io.qbeast.core.model.QbeastFile -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.delta.actions.Action import org.apache.spark.sql.delta.actions.CommitInfo import org.apache.spark.sql.delta.util.FileNames diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastCreateTableSQLTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastCreateTableSQLTest.scala index 00c3a704b..c0cf7f11f 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastCreateTableSQLTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastCreateTableSQLTest.scala @@ -15,8 +15,8 @@ */ package io.qbeast.spark.utils -import io.qbeast.spark.QbeastIntegrationTestSpec -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Student import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.DataFrame diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastDeltaStagingTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastDeltaStagingTest.scala index 17738b59d..a83d266e3 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastDeltaStagingTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastDeltaStagingTest.scala @@ -16,8 +16,8 @@ package io.qbeast.spark.utils import io.qbeast.core.model.StagingUtils -import io.qbeast.spark.QbeastIntegrationTestSpec -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.T2 import org.apache.spark.sql.SparkSession diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastFilterPushdownTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastFilterPushdownTest.scala index 7b9160e18..d5605f79a 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastFilterPushdownTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastFilterPushdownTest.scala @@ -16,7 +16,7 @@ package io.qbeast.spark.utils import io.qbeast.spark.delta.DefaultFileIndex -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestUtils._ import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.functions.avg diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastInsertToTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastInsertToTest.scala index ec25ca170..4f5051d05 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastInsertToTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastInsertToTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.spark.utils -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.functions.col import org.apache.spark.sql.Row diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastSQLIntegrationTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastSQLIntegrationTest.scala index 06a68e0dd..616652186 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastSQLIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastSQLIntegrationTest.scala @@ -16,8 +16,8 @@ package io.qbeast.spark.utils import io.qbeast.spark.index.SparkColumnsToIndexSelector -import io.qbeast.spark.QbeastIntegrationTestSpec -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Student import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.DataFrame diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastSchemaTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastSchemaTest.scala index 20ac0a3ad..06de45fe3 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastSchemaTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastSchemaTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.spark.utils -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.Row diff --git a/src/test/scala/io/qbeast/core/model/QbeastSnapshotTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastSnapshotTest.scala similarity index 98% rename from src/test/scala/io/qbeast/core/model/QbeastSnapshotTest.scala rename to src/test/scala/io/qbeast/spark/utils/QbeastSnapshotTest.scala index 7c0c9420b..4b4d3d978 100644 --- a/src/test/scala/io/qbeast/core/model/QbeastSnapshotTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastSnapshotTest.scala @@ -13,12 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.core.model +package io.qbeast.spark.utils +import io.qbeast.core.model.CubeStatus +import io.qbeast.core.model.IndexFile +import io.qbeast.core.model.QTableID import io.qbeast.spark.index.IndexStatusBuilder import io.qbeast.spark.index.SparkRevisionFactory import io.qbeast.spark.internal.QbeastOptions -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Client3 import org.apache.hadoop.fs.Path import org.apache.spark.sql.functions.input_file_name diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastSparkCorrectnessTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastSparkCorrectnessTest.scala index 750b0b98d..64765184f 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastSparkCorrectnessTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastSparkCorrectnessTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.spark.utils -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.delta.actions.Action import org.apache.spark.sql.delta.actions.Metadata import org.apache.spark.sql.delta.util.FileNames diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastSparkIntegrationTest.scala b/src/test/scala/io/qbeast/spark/utils/QbeastSparkIntegrationTest.scala index 151f2f8cd..0191e930a 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastSparkIntegrationTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/QbeastSparkIntegrationTest.scala @@ -15,8 +15,8 @@ */ package io.qbeast.spark.utils -import io.qbeast.spark.QbeastIntegrationTestSpec -import io.qbeast.spark.QbeastTable +import io.qbeast.table.QbeastTable +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Student import org.apache.spark.sql.DataFrame import org.apache.spark.sql.SparkSession diff --git a/src/test/scala/io/qbeast/spark/utils/SizeStatsTest.scala b/src/test/scala/io/qbeast/spark/utils/SizeStatsTest.scala index 0663abd3f..cdb4e1e2f 100644 --- a/src/test/scala/io/qbeast/spark/utils/SizeStatsTest.scala +++ b/src/test/scala/io/qbeast/spark/utils/SizeStatsTest.scala @@ -15,7 +15,7 @@ */ package io.qbeast.spark.utils -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.functions.col class SizeStatsTest extends QbeastIntegrationTestSpec { diff --git a/src/test/scala/io/qbeast/spark/writer/BlockWriterTest.scala b/src/test/scala/io/qbeast/spark/writer/BlockWriterTest.scala index d695518a1..d698ec359 100644 --- a/src/test/scala/io/qbeast/spark/writer/BlockWriterTest.scala +++ b/src/test/scala/io/qbeast/spark/writer/BlockWriterTest.scala @@ -17,7 +17,7 @@ package io.qbeast.spark.writer import io.qbeast.core.model.CubeId import io.qbeast.spark.index.QbeastColumns._ -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.functions.col import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers diff --git a/src/test/scala/io/qbeast/spark/table/HasConflictsTest.scala b/src/test/scala/io/qbeast/table/HasConflictsTest.scala similarity index 100% rename from src/test/scala/io/qbeast/spark/table/HasConflictsTest.scala rename to src/test/scala/io/qbeast/table/HasConflictsTest.scala diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastTableTest.scala b/src/test/scala/io/qbeast/table/QbeastTableTest.scala similarity index 97% rename from src/test/scala/io/qbeast/spark/utils/QbeastTableTest.scala rename to src/test/scala/io/qbeast/table/QbeastTableTest.scala index d5bdb37b7..54ed6a9d1 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastTableTest.scala +++ b/src/test/scala/io/qbeast/table/QbeastTableTest.scala @@ -13,12 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.utils +package io.qbeast.table import io.qbeast.core.transform.LinearTransformation -import io.qbeast.spark.internal.commands.ConvertToQbeastCommand -import io.qbeast.spark.QbeastIntegrationTestSpec -import io.qbeast.spark.QbeastTable +import io.qbeast.internal.commands.ConvertToQbeastCommand +import io.qbeast.spark.utils.SizeStats +import io.qbeast.spark.utils.Tabulator +import io.qbeast.QbeastIntegrationTestSpec import io.qbeast.TestClasses.Client3 import org.apache.spark.sql.functions._ import org.apache.spark.sql.DataFrame diff --git a/src/test/scala/io/qbeast/spark/utils/QbeastUtilsTest.scala b/src/test/scala/io/qbeast/utils/QbeastUtilsTest.scala similarity index 96% rename from src/test/scala/io/qbeast/spark/utils/QbeastUtilsTest.scala rename to src/test/scala/io/qbeast/utils/QbeastUtilsTest.scala index fbbc29b0c..d7025fbed 100644 --- a/src/test/scala/io/qbeast/spark/utils/QbeastUtilsTest.scala +++ b/src/test/scala/io/qbeast/utils/QbeastUtilsTest.scala @@ -13,9 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.qbeast.spark.utils +package io.qbeast.utils -import io.qbeast.spark.QbeastIntegrationTestSpec +import io.qbeast.QbeastIntegrationTestSpec import org.apache.spark.sql.AnalysisException class QbeastUtilsTest extends QbeastIntegrationTestSpec {