Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Optimize](Row store) allow to set row_store_page_size for tables, change default value to 16KB #37145

Merged
merged 26 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ec59d28
change default value of row_column_page_size to 16KB
lxr599 Jul 2, 2024
76a78da
allow to set row_column_page_size for tables
lxr599 Jul 4, 2024
288bf5a
[feature](Row store)allow to set row_column_page_size for tables
lxr599 Jul 4, 2024
b544145
format codes
lxr599 Jul 4, 2024
2339bd1
change default value of row_column_page_size to 16KB
lxr599 Jul 2, 2024
9a211b1
allow to set row_column_page_size for tables
lxr599 Jul 4, 2024
9552bd5
[feature](Row store)allow to set row_column_page_size for tables
lxr599 Jul 4, 2024
8c4c0f7
format codes
lxr599 Jul 4, 2024
a9e584a
rebase master
lxr599 Jul 5, 2024
eedaa21
Merge branch 'master' into page_size
lxr599 Jul 5, 2024
3f271b0
format codes
lxr599 Jul 5, 2024
843d26d
format fe codes
lxr599 Jul 5, 2024
2936a27
Merge remote-tracking branch 'upstream/master' into page_size
lxr599 Jul 8, 2024
82a3e84
Merge branch 'master' into page_size
lxr599 Jul 8, 2024
50dc477
Merge branch 'master' into page_size
lxr599 Jul 8, 2024
fe22103
display row_column_page_size only when row store enabled
lxr599 Jul 8, 2024
9706f9a
Merge branch 'master' into page_size
lxr599 Jul 10, 2024
9860a7f
add test case for property row_column_page_size
lxr599 Jul 10, 2024
29f3fd7
Merge branch 'master' into page_size
lxr599 Jul 10, 2024
78ba814
Merge branch 'apache:master' into page_size
lxr599 Jul 10, 2024
05374ba
add more test cases
lxr599 Jul 10, 2024
a607007
Merge branch 'master' into page_size
lxr599 Jul 11, 2024
c32b788
change the property name
lxr599 Jul 11, 2024
484160a
change names of vars related to row_store_page_size
lxr599 Jul 12, 2024
5c543f7
Merge remote-tracking branch 'upstream/master' into page_size
lxr599 Jul 15, 2024
617dafd
Merge branch 'master' into page_size
lxr599 Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1062,8 +1062,6 @@ DEFINE_mInt64(max_tablet_io_errors, "-1");
DEFINE_Int32(tablet_path_check_interval_seconds, "-1");
DEFINE_mInt32(tablet_path_check_batch_size, "1000");

// Page size of row column, default 4KB
DEFINE_mInt64(row_column_page_size, "4096");
// it must be larger than or equal to 5MB
DEFINE_mInt64(s3_write_buffer_size, "5242880");
// Log interval when doing s3 upload task
Expand Down
2 changes: 0 additions & 2 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1111,8 +1111,6 @@ DECLARE_mInt64(max_tablet_io_errors);
DECLARE_Int32(tablet_path_check_interval_seconds);
DECLARE_mInt32(tablet_path_check_batch_size);

// Page size of row column, default 4KB
DECLARE_mInt64(row_column_page_size);
// it must be larger than or equal to 5MB
DECLARE_mInt64(s3_write_buffer_size);
// Log interval when doing s3 upload task
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ namespace segment_v2 {

static constexpr size_t DEFAULT_PAGE_SIZE = 1024 * 1024; // default size: 1M

constexpr long ROW_STORE_PAGE_SIZE_DEFAULT_VALUE = 16384; // default row store page size: 16KB

struct PageBuilderOptions {
size_t data_page_size = DEFAULT_PAGE_SIZE;

Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co

if (column.is_row_store_column()) {
// smaller page size for row store column
opts.data_page_size = config::row_column_page_size;
opts.data_page_size = _tablet_schema->row_store_page_size();
}
std::unique_ptr<ColumnWriter> writer;
RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer));
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo

if (column.is_row_store_column()) {
// smaller page size for row store column
opts.data_page_size = config::row_column_page_size;
opts.data_page_size = _tablet_schema->row_store_page_size();
}
std::unique_ptr<ColumnWriter> writer;
RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer));
Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/tablet_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id
if (tablet_schema.__isset.store_row_column) {
schema->set_store_row_column(tablet_schema.store_row_column);
}
if (tablet_schema.__isset.row_store_page_size) {
schema->set_row_store_page_size(tablet_schema.row_store_page_size);
}
if (tablet_schema.__isset.skip_write_index_on_load) {
schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load);
}
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac
_sort_type = schema.sort_type();
_sort_col_num = schema.sort_col_num();
_compression_type = schema.compression_type();
_row_store_page_size = schema.row_store_page_size();
_schema_version = schema.schema_version();
// Default to V1 inverted index storage format for backward compatibility if not specified in schema.
if (!schema.has_inverted_index_storage_format()) {
Expand Down Expand Up @@ -1050,6 +1051,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version
_skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
_sort_type = ori_tablet_schema.sort_type();
_sort_col_num = ori_tablet_schema.sort_col_num();
_row_store_page_size = ori_tablet_schema.row_store_page_size();

// copy from table_schema_param
_schema_version = version;
Expand Down Expand Up @@ -1203,6 +1205,7 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
tablet_schema_pb->set_sort_col_num(_sort_col_num);
tablet_schema_pb->set_schema_version(_schema_version);
tablet_schema_pb->set_compression_type(_compression_type);
tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
tablet_schema_pb->set_version_col_idx(_version_col_idx);
tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
Expand Down Expand Up @@ -1522,6 +1525,7 @@ bool operator==(const TabletSchema& a, const TabletSchema& b) {
if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
if (a._store_row_column != b._store_row_column) return false;
if (a._row_store_page_size != b._row_store_page_size) return false;
if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
return true;
}
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/tablet_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "common/status.h"
#include "gutil/stringprintf.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/options.h"
#include "runtime/define_primitive_type.h"
#include "runtime/descriptors.h"
#include "util/string_util.h"
Expand Down Expand Up @@ -359,6 +360,8 @@ class TabletSchema {
void set_version_col_idx(int32_t version_col_idx) { _version_col_idx = version_col_idx; }
int32_t version_col_idx() const { return _version_col_idx; }
segment_v2::CompressionTypePB compression_type() const { return _compression_type; }
void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; }
long row_store_page_size() const { return _row_store_page_size; }

const std::vector<TabletIndex>& indexes() const { return _indexes; }
bool has_inverted_index() const {
Expand Down Expand Up @@ -508,6 +511,7 @@ class TabletSchema {
size_t _num_rows_per_row_block = 0;
CompressKind _compress_kind = COMPRESS_NONE;
segment_v2::CompressionTypePB _compression_type = segment_v2::CompressionTypePB::LZ4F;
long _row_store_page_size = segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
size_t _next_column_unique_id = 0;
std::string _auto_increment_column;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ private void createRollupReplicaForPartition(OlapTable tbl) throws Exception {
tbl.getTimeSeriesCompactionLevelThreshold(),
tbl.disableAutoCompaction(),
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
tbl.getEnableMowLightDelete(), null);
tbl.getEnableMowLightDelete(), null,
tbl.rowStorePageSize());
requestBuilder.addTabletMetas(builder);
} // end for rollupTablets
((CloudInternalCatalog) Env.getCurrentInternalCatalog())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ private void createShadowIndexReplicaForPartition(OlapTable tbl) throws Exceptio
tbl.disableAutoCompaction(),
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
tbl.getEnableMowLightDelete(),
tbl.getInvertedIndexFileStorageFormat());
tbl.getInvertedIndexFileStorageFormat(),
tbl.rowStorePageSize());
requestBuilder.addTabletMetas(builder);
} // end for rollupTablets
((CloudInternalCatalog) Env.getCurrentInternalCatalog())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ protected void createRollupReplica() throws AlterCancelException {
tbl.storeRowColumn(),
binlogConfig,
tbl.getRowStoreColumnsUniqueIds(tbl.getTableProperty().getCopiedRowStoreColumns()),
objectPool);
objectPool,
tbl.rowStorePageSize());
createReplicaTask.setBaseTablet(tabletIdMap.get(rollupTabletId), baseSchemaHash);
if (this.storageFormat != null) {
createReplicaTask.setStorageFormat(this.storageFormat);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ protected void createShadowIndexReplica() throws AlterCancelException {
tbl.storeRowColumn(),
binlogConfig,
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
objectPool);
objectPool,
tbl.rowStorePageSize());

createReplicaTask.setBaseTablet(partitionIndexTabletMap.get(partitionId, shadowIdxId)
.get(shadowTabletId), originSchemaHash);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1126,7 +1126,8 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc
localTbl.storeRowColumn(),
binlogConfig,
localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
objectPool);
objectPool,
localTbl.rowStorePageSize());
task.setInvertedIndexFileStorageFormat(localTbl.getInvertedIndexFileStorageFormat());
task.setInRestoreMode(true);
batchTask.addTask(task);
Expand Down
4 changes: 4 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -3525,6 +3525,10 @@ private static void addOlapTablePropertyInfo(OlapTable olapTable, StringBuilder
sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN).append("\" = \"");
sb.append(olapTable.storeRowColumn()).append("\"");
}

// row store page size
sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE).append("\" = \"");
sb.append(olapTable.rowStorePageSize()).append("\"");
}

// skip inverted index on load
Expand Down
14 changes: 14 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -2505,6 +2505,20 @@ public void setCompressionType(TCompressionType compressionType) {
tableProperty.buildCompressionType();
}

public void setRowStorePageSize(long pageSize) {
TableProperty tableProperty = getOrCreatTableProperty();
tableProperty.modifyTableProperties(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE,
Long.valueOf(pageSize).toString());
tableProperty.buildRowStorePageSize();
}

public long rowStorePageSize() {
if (tableProperty != null) {
return tableProperty.rowStorePageSize();
}
return PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
}

public void setStorageFormat(TStorageFormat storageFormat) {
TableProperty tableProperty = getOrCreatTableProperty();
tableProperty.modifyTableProperties(PropertyAnalyzer.PROPERTIES_STORAGE_FORMAT, storageFormat.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ public class TableProperty implements Writable, GsonPostProcessable {

private boolean skipWriteIndexOnLoad = false;

private long rowStorePageSize = PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;

private String compactionPolicy = PropertyAnalyzer.SIZE_BASED_COMPACTION_POLICY;

private long timeSeriesCompactionGoalSizeMbytes
Expand Down Expand Up @@ -267,6 +269,17 @@ public boolean storeRowColumn() {
return storeRowColumn;
}

public TableProperty buildRowStorePageSize() {
rowStorePageSize = Long.parseLong(
properties.getOrDefault(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE,
Long.toString(PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE)));
return this;
}

public long rowStorePageSize() {
return rowStorePageSize;
}

public TableProperty buildSkipWriteIndexOnLoad() {
skipWriteIndexOnLoad = Boolean.parseBoolean(
properties.getOrDefault(PropertyAnalyzer.PROPERTIES_SKIP_WRITE_INDEX_ON_LOAD, "false"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa
String storagePolicy,
IdGeneratorBuffer idGeneratorBuffer,
BinlogConfig binlogConfig,
boolean isStorageMediumSpecified, List<Integer> clusterKeyIndexes)
boolean isStorageMediumSpecified,
List<Integer> clusterKeyIndexes, long pageSize)
throws DdlException {
// create base index first.
Preconditions.checkArgument(tbl.getBaseIndexId() != -1);
Expand Down Expand Up @@ -172,7 +173,8 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa
tbl.disableAutoCompaction(),
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
tbl.getEnableMowLightDelete(),
tbl.getInvertedIndexFileStorageFormat());
tbl.getInvertedIndexFileStorageFormat(),
tbl.rowStorePageSize());
requestBuilder.addTabletMetas(builder);
}
if (!storageVaultIdSet && ((CloudEnv) Env.getCurrentEnv()).getEnableStorageVault()) {
Expand Down Expand Up @@ -220,7 +222,7 @@ public OlapFile.TabletMetaCloudPB.Builder createTabletMetaBuilder(long tableId,
Long timeSeriesCompactionTimeThresholdSeconds, Long timeSeriesCompactionEmptyRowsetsThreshold,
Long timeSeriesCompactionLevelThreshold, boolean disableAutoCompaction,
List<Integer> rowStoreColumnUniqueIds, boolean enableMowLightDelete,
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) throws DdlException {
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat, long pageSize) throws DdlException {
OlapFile.TabletMetaCloudPB.Builder builder = OlapFile.TabletMetaCloudPB.newBuilder();
builder.setTableId(tableId);
builder.setIndexId(indexId);
Expand Down Expand Up @@ -346,6 +348,8 @@ public OlapFile.TabletMetaCloudPB.Builder createTabletMetaBuilder(long tableId,
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V2);
}
}
schemaBuilder.setRowStorePageSize(pageSize);

OlapFile.TabletSchemaCloudPB schema = schemaBuilder.build();
builder.setSchema(schema);
// rowset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ public class PropertyAnalyzer {
public static final String PROPERTIES_TIMEOUT = "timeout";
public static final String PROPERTIES_COMPRESSION = "compression";

// row store page size, default 16KB
public static final String PROPERTIES_ROW_STORE_PAGE_SIZE = "row_store_page_size";
public static final long ROW_STORE_PAGE_SIZE_DEFAULT_VALUE = 16384;

public static final String PROPERTIES_ENABLE_LIGHT_SCHEMA_CHANGE = "light_schema_change";

public static final String PROPERTIES_DISTRIBUTION_TYPE = "distribution_type";
Expand Down Expand Up @@ -1012,6 +1016,31 @@ public static TCompressionType analyzeCompressionType(Map<String, String> proper
}
}

public static long alignTo4K(long size) {
return (size + 4095) & ~4095;
}

// analyzeRowStorePageSize will parse the row_store_page_size from properties
public static long analyzeRowStorePageSize(Map<String, String> properties) throws AnalysisException {
long rowStorePageSize = ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
if (properties != null && properties.containsKey(PROPERTIES_ROW_STORE_PAGE_SIZE)) {
String rowStorePageSizeStr = properties.get(PROPERTIES_ROW_STORE_PAGE_SIZE);
try {
rowStorePageSize = alignTo4K(Long.parseLong(rowStorePageSizeStr));
} catch (NumberFormatException e) {
throw new AnalysisException("Invalid row store page size: " + rowStorePageSizeStr);
}

if (rowStorePageSize <= 0) {
throw new AnalysisException("Row store page size should larger than 0.");
}

properties.remove(PROPERTIES_ROW_STORE_PAGE_SIZE);
}

return rowStorePageSize;
}

// analyzeStorageFormat will parse the storage format from properties
// sql: alter table tablet_name set ("storage_format" = "v2")
// Use this sql to convert all tablets(base and rollup index) to a new format segment
Expand Down
Loading
Loading