Skip to content

Commit

Permalink
[Pick](Row store) allow to set row_store_page_size for tables, change…
Browse files Browse the repository at this point in the history
… default value to 16KB

pick #37145
  • Loading branch information
lxr599 authored Jul 17, 2024
1 parent 2994232 commit 5a44f57
Show file tree
Hide file tree
Showing 27 changed files with 166 additions and 30 deletions.
2 changes: 0 additions & 2 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1059,8 +1059,6 @@ DEFINE_mInt64(max_tablet_io_errors, "-1");
DEFINE_Int32(tablet_path_check_interval_seconds, "-1");
DEFINE_mInt32(tablet_path_check_batch_size, "1000");

// Page size of row column, default 4KB
DEFINE_mInt64(row_column_page_size, "4096");
// it must be larger than or equal to 5MB
DEFINE_mInt64(s3_write_buffer_size, "5242880");
// Log interval when doing s3 upload task
Expand Down
2 changes: 0 additions & 2 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1102,8 +1102,6 @@ DECLARE_mInt64(max_tablet_io_errors);
DECLARE_Int32(tablet_path_check_interval_seconds);
DECLARE_mInt32(tablet_path_check_batch_size);

// Page size of row column, default 4KB
DECLARE_mInt64(row_column_page_size);
// it must be larger than or equal to 5MB
DECLARE_mInt64(s3_write_buffer_size);
// Log interval when doing s3 upload task
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ namespace segment_v2 {

static constexpr size_t DEFAULT_PAGE_SIZE = 1024 * 1024; // default size: 1M

constexpr long ROW_STORE_PAGE_SIZE_DEFAULT_VALUE = 16384; // default row store page size: 16KB

struct PageBuilderOptions {
size_t data_page_size = DEFAULT_PAGE_SIZE;

Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co

if (column.is_row_store_column()) {
// smaller page size for row store column
opts.data_page_size = config::row_column_page_size;
opts.data_page_size = _tablet_schema->row_store_page_size();
}
std::unique_ptr<ColumnWriter> writer;
RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer));
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo

if (column.is_row_store_column()) {
// smaller page size for row store column
opts.data_page_size = config::row_column_page_size;
opts.data_page_size = _tablet_schema->row_store_page_size();
}
std::unique_ptr<ColumnWriter> writer;
RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer));
Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/tablet_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id
if (tablet_schema.__isset.store_row_column) {
schema->set_store_row_column(tablet_schema.store_row_column);
}
if (tablet_schema.__isset.row_store_page_size) {
schema->set_row_store_page_size(tablet_schema.row_store_page_size);
}
if (tablet_schema.__isset.skip_write_index_on_load) {
schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load);
}
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac
_sort_type = schema.sort_type();
_sort_col_num = schema.sort_col_num();
_compression_type = schema.compression_type();
_row_store_page_size = schema.row_store_page_size();
_schema_version = schema.schema_version();
// Default to V1 inverted index storage format for backward compatibility if not specified in schema.
if (!schema.has_inverted_index_storage_format()) {
Expand Down Expand Up @@ -1050,6 +1051,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version
_skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
_sort_type = ori_tablet_schema.sort_type();
_sort_col_num = ori_tablet_schema.sort_col_num();
_row_store_page_size = ori_tablet_schema.row_store_page_size();

// copy from table_schema_param
_schema_version = version;
Expand Down Expand Up @@ -1203,6 +1205,7 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
tablet_schema_pb->set_sort_col_num(_sort_col_num);
tablet_schema_pb->set_schema_version(_schema_version);
tablet_schema_pb->set_compression_type(_compression_type);
tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
tablet_schema_pb->set_version_col_idx(_version_col_idx);
tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
Expand Down Expand Up @@ -1522,6 +1525,7 @@ bool operator==(const TabletSchema& a, const TabletSchema& b) {
if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
if (a._store_row_column != b._store_row_column) return false;
if (a._row_store_page_size != b._row_store_page_size) return false;
if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
return true;
}
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/tablet_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "common/status.h"
#include "gutil/stringprintf.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/options.h"
#include "runtime/define_primitive_type.h"
#include "runtime/descriptors.h"
#include "util/string_util.h"
Expand Down Expand Up @@ -359,6 +360,8 @@ class TabletSchema {
void set_version_col_idx(int32_t version_col_idx) { _version_col_idx = version_col_idx; }
int32_t version_col_idx() const { return _version_col_idx; }
segment_v2::CompressionTypePB compression_type() const { return _compression_type; }
void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; }
long row_store_page_size() const { return _row_store_page_size; }

const std::vector<TabletIndex>& indexes() const { return _indexes; }
bool has_inverted_index() const {
Expand Down Expand Up @@ -508,6 +511,7 @@ class TabletSchema {
size_t _num_rows_per_row_block = 0;
CompressKind _compress_kind = COMPRESS_NONE;
segment_v2::CompressionTypePB _compression_type = segment_v2::CompressionTypePB::LZ4F;
long _row_store_page_size = segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
size_t _next_column_unique_id = 0;
std::string _auto_increment_column;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ private void createRollupReplicaForPartition(OlapTable tbl) throws Exception {
tbl.getTimeSeriesCompactionEmptyRowsetsThreshold(),
tbl.getTimeSeriesCompactionLevelThreshold(),
tbl.disableAutoCompaction(),
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), null);
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
null, tbl.rowStorePageSize());
requestBuilder.addTabletMetas(builder);
} // end for rollupTablets
((CloudInternalCatalog) Env.getCurrentInternalCatalog())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ private void createShadowIndexReplicaForPartition(OlapTable tbl) throws Exceptio
tbl.getTimeSeriesCompactionLevelThreshold(),
tbl.disableAutoCompaction(),
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
tbl.getInvertedIndexFileStorageFormat());
tbl.getInvertedIndexFileStorageFormat(),
tbl.rowStorePageSize());
requestBuilder.addTabletMetas(builder);
} // end for rollupTablets
((CloudInternalCatalog) Env.getCurrentInternalCatalog())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ protected void createRollupReplica() throws AlterCancelException {
tbl.storeRowColumn(),
binlogConfig,
tbl.getRowStoreColumnsUniqueIds(tbl.getTableProperty().getCopiedRowStoreColumns()),
objectPool);
objectPool,
tbl.rowStorePageSize());
createReplicaTask.setBaseTablet(tabletIdMap.get(rollupTabletId), baseSchemaHash);
if (this.storageFormat != null) {
createReplicaTask.setStorageFormat(this.storageFormat);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ protected void createShadowIndexReplica() throws AlterCancelException {
tbl.storeRowColumn(),
binlogConfig,
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
objectPool);
objectPool,
tbl.rowStorePageSize());

createReplicaTask.setBaseTablet(partitionIndexTabletMap.get(partitionId, shadowIdxId)
.get(shadowTabletId), originSchemaHash);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1126,7 +1126,8 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc
localTbl.storeRowColumn(),
binlogConfig,
localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
objectPool);
objectPool,
localTbl.rowStorePageSize());
task.setInvertedIndexFileStorageFormat(localTbl.getInvertedIndexFileStorageFormat());
task.setInRestoreMode(true);
batchTask.addTask(task);
Expand Down
4 changes: 4 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -3510,6 +3510,10 @@ private static void addOlapTablePropertyInfo(OlapTable olapTable, StringBuilder
sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN).append("\" = \"");
sb.append(olapTable.storeRowColumn()).append("\"");
}

// row store page size
sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE).append("\" = \"");
sb.append(olapTable.rowStorePageSize()).append("\"");
}

// skip inverted index on load
Expand Down
14 changes: 14 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -2506,6 +2506,20 @@ public void setCompressionType(TCompressionType compressionType) {
tableProperty.buildCompressionType();
}

public void setRowStorePageSize(long pageSize) {
TableProperty tableProperty = getOrCreatTableProperty();
tableProperty.modifyTableProperties(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE,
Long.valueOf(pageSize).toString());
tableProperty.buildRowStorePageSize();
}

public long rowStorePageSize() {
if (tableProperty != null) {
return tableProperty.rowStorePageSize();
}
return PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
}

public void setStorageFormat(TStorageFormat storageFormat) {
TableProperty tableProperty = getOrCreatTableProperty();
tableProperty.modifyTableProperties(PropertyAnalyzer.PROPERTIES_STORAGE_FORMAT, storageFormat.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ public class TableProperty implements Writable, GsonPostProcessable {

private boolean skipWriteIndexOnLoad = false;

private long rowStorePageSize = PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;

private String compactionPolicy = PropertyAnalyzer.SIZE_BASED_COMPACTION_POLICY;

private long timeSeriesCompactionGoalSizeMbytes
Expand Down Expand Up @@ -267,6 +269,17 @@ public boolean storeRowColumn() {
return storeRowColumn;
}

public TableProperty buildRowStorePageSize() {
rowStorePageSize = Long.parseLong(
properties.getOrDefault(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE,
Long.toString(PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE)));
return this;
}

public long rowStorePageSize() {
return rowStorePageSize;
}

public TableProperty buildSkipWriteIndexOnLoad() {
skipWriteIndexOnLoad = Boolean.parseBoolean(
properties.getOrDefault(PropertyAnalyzer.PROPERTIES_SKIP_WRITE_INDEX_ON_LOAD, "false"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa
String storagePolicy,
IdGeneratorBuffer idGeneratorBuffer,
BinlogConfig binlogConfig,
boolean isStorageMediumSpecified, List<Integer> clusterKeyIndexes)
boolean isStorageMediumSpecified,
List<Integer> clusterKeyIndexes, long pageSize)
throws DdlException {
// create base index first.
Preconditions.checkArgument(tbl.getBaseIndexId() != -1);
Expand Down Expand Up @@ -157,7 +158,7 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa
}
Cloud.CreateTabletsRequest.Builder requestBuilder = Cloud.CreateTabletsRequest.newBuilder();
List<String> rowStoreColumns =
tbl.getTableProperty().getCopiedRowStoreColumns();
tbl.getTableProperty().getCopiedRowStoreColumns();
for (Tablet tablet : index.getTablets()) {
OlapFile.TabletMetaCloudPB.Builder builder = createTabletMetaBuilder(tbl.getId(), indexId,
partitionId, tablet, tabletType, schemaHash, keysType, shortKeyColumnCount,
Expand All @@ -171,7 +172,8 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa
tbl.getTimeSeriesCompactionLevelThreshold(),
tbl.disableAutoCompaction(),
tbl.getRowStoreColumnsUniqueIds(rowStoreColumns),
tbl.getInvertedIndexFileStorageFormat());
tbl.getInvertedIndexFileStorageFormat(),
tbl.rowStorePageSize());
requestBuilder.addTabletMetas(builder);
}
if (!storageVaultIdSet && ((CloudEnv) Env.getCurrentEnv()).getEnableStorageVault()) {
Expand Down Expand Up @@ -219,7 +221,7 @@ public OlapFile.TabletMetaCloudPB.Builder createTabletMetaBuilder(long tableId,
Long timeSeriesCompactionTimeThresholdSeconds, Long timeSeriesCompactionEmptyRowsetsThreshold,
Long timeSeriesCompactionLevelThreshold, boolean disableAutoCompaction,
List<Integer> rowStoreColumnUniqueIds,
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) throws DdlException {
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat, long pageSize) throws DdlException {
OlapFile.TabletMetaCloudPB.Builder builder = OlapFile.TabletMetaCloudPB.newBuilder();
builder.setTableId(tableId);
builder.setIndexId(indexId);
Expand Down Expand Up @@ -344,6 +346,8 @@ public OlapFile.TabletMetaCloudPB.Builder createTabletMetaBuilder(long tableId,
schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V2);
}
}
schemaBuilder.setRowStorePageSize(pageSize);

OlapFile.TabletSchemaCloudPB schema = schemaBuilder.build();
builder.setSchema(schema);
// rowset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ public class PropertyAnalyzer {
public static final String PROPERTIES_TIMEOUT = "timeout";
public static final String PROPERTIES_COMPRESSION = "compression";

// row store page size, default 16KB
public static final String PROPERTIES_ROW_STORE_PAGE_SIZE = "row_store_page_size";
public static final long ROW_STORE_PAGE_SIZE_DEFAULT_VALUE = 16384;

public static final String PROPERTIES_ENABLE_LIGHT_SCHEMA_CHANGE = "light_schema_change";

public static final String PROPERTIES_DISTRIBUTION_TYPE = "distribution_type";
Expand Down Expand Up @@ -1013,6 +1017,31 @@ public static TCompressionType analyzeCompressionType(Map<String, String> proper
}
}

public static long alignTo4K(long size) {
return (size + 4095) & ~4095;
}

// analyzeRowStorePageSize will parse the row_store_page_size from properties
public static long analyzeRowStorePageSize(Map<String, String> properties) throws AnalysisException {
long rowStorePageSize = ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
if (properties != null && properties.containsKey(PROPERTIES_ROW_STORE_PAGE_SIZE)) {
String rowStorePageSizeStr = properties.get(PROPERTIES_ROW_STORE_PAGE_SIZE);
try {
rowStorePageSize = alignTo4K(Long.parseLong(rowStorePageSizeStr));
} catch (NumberFormatException e) {
throw new AnalysisException("Invalid row store page size: " + rowStorePageSizeStr);
}

if (rowStorePageSize <= 0) {
throw new AnalysisException("Row store page size should larger than 0.");
}

properties.remove(PROPERTIES_ROW_STORE_PAGE_SIZE);
}

return rowStorePageSize;
}

// analyzeStorageFormat will parse the storage format from properties
// sql: alter table tablet_name set ("storage_format" = "v2")
// Use this sql to convert all tablets(base and rollup index) to a new format segment
Expand Down
Loading

0 comments on commit 5a44f57

Please sign in to comment.