Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DNM] Error injection on Hudi 1.0 RC1 #12275

Draft
wants to merge 3 commits into
base: release-1.0.0
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/base_java11/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/datanode/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/historyserver/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/hive_base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/namenode/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
<relativePath>../../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/prestobase/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/spark_base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/sparkadhoc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/sparkmaster/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/sparkworker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/trinobase/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/trinocoordinator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/trinoworker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
4 changes: 2 additions & 2 deletions hudi-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
<parent>
<artifactId>hudi</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>hudi-aws</artifactId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>

<name>hudi-aws</name>
<packaging>jar</packaging>
Expand Down
2 changes: 1 addition & 1 deletion hudi-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
4 changes: 2 additions & 2 deletions hudi-client/hudi-client-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
<parent>
<artifactId>hudi-client</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>hudi-client-common</artifactId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-rc1</version>

<name>hudi-client-common</name>
<packaging>jar</packaging>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.injection.ErrorInjectionUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieCommitException;
import org.apache.hudi.exception.HoodieException;
Expand All @@ -58,6 +59,10 @@
import java.util.List;
import java.util.Set;

import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_AFTER_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_WRITE_BEFORE_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.MDT_PRE_COMMIT;

/**
* Abstract class taking care of holding common member variables (FileSystem, SparkContext, HoodieConfigs) Also, manages
* embedded timeline-server if enabled.
Expand Down Expand Up @@ -283,11 +288,23 @@ protected void finalizeWrite(HoodieTable table, String instantTime, List<HoodieW
* @param writeStatuses Write statuses of the commit
*/
protected void writeTableMetadata(HoodieTable table, String instantTime, HoodieCommitMetadata metadata, HoodieData<WriteStatus> writeStatuses) {
if (config.getBasePath().contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(MDT_PRE_COMMIT, "Fail metadata table commit for " + instantTime);
} else {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(
DT_WRITE_BEFORE_MDT, "Fail before metadata table commit/services " + instantTime);
}

context.setJobStatus(this.getClass().getSimpleName(), "Committing to metadata table: " + config.getTableName());
Option<HoodieTableMetadataWriter> metadataWriterOpt = table.getMetadataWriter(instantTime);
if (metadataWriterOpt.isPresent()) {
try (HoodieTableMetadataWriter metadataWriter = metadataWriterOpt.get()) {
metadataWriter.updateFromWriteStatuses(metadata, writeStatuses, instantTime);
if (!config.getBasePath().contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(
DT_AFTER_MDT,
"Fail after metadata table commit/services before data table commit " + instantTime);
}
} catch (Exception e) {
if (e instanceof HoodieException) {
throw (HoodieException) e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.injection.ErrorInjectionUtils;
import org.apache.hudi.config.HoodieClusteringConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieWriteConfig;
Expand Down Expand Up @@ -88,6 +89,12 @@
import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeCommitMetadata;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_CLUSTERING_AFTER_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_CLUSTERING_BEFORE_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_COMPACTION_AFTER_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_COMPACTION_BEFORE_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.MDT_CLUSTERING;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.MDT_COMPACTION;
import static org.apache.hudi.metadata.HoodieTableMetadata.isMetadataTable;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.isIndexingCommit;

Expand Down Expand Up @@ -327,12 +334,26 @@ protected void completeCompaction(HoodieCommitMetadata metadata, HoodieTable tab
handleWriteErrors(writeStats, TableServiceType.COMPACT);
final HoodieInstant compactionInstant = HoodieTimeline.getCompactionInflightInstant(compactionCommitTime);
try {
if (!basePath.contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(DT_COMPACTION_BEFORE_MDT,
"Fail data table compaction before applying to MDT " + compactionCommitTime);
}

this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
finalizeWrite(table, compactionCommitTime, writeStats);
// commit to data table after committing to metadata table.
writeTableMetadata(table, compactionCommitTime, metadata, context.emptyHoodieData());
LOG.info("Committing Compaction {}", compactionCommitTime);
LOG.debug("Compaction {} finished with result: {}", compactionCommitTime, metadata);

if (basePath.contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(MDT_COMPACTION,
"Fail metadata table compaction " + compactionCommitTime);
} else {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(DT_COMPACTION_AFTER_MDT,
"Fail data table compaction after applying to MDT, but before completing in DT " + compactionCommitTime);
}

CompactHelpers.getInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
} finally {
this.txnManager.endTransaction(Option.of(compactionInstant));
Expand Down Expand Up @@ -518,6 +539,10 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata,
handleWriteErrors(writeStats, TableServiceType.CLUSTER);
final HoodieInstant clusteringInstant = ClusteringUtils.getInflightClusteringInstant(clusteringCommitTime, table.getActiveTimeline()).get();
try {
if (!basePath.contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(DT_CLUSTERING_BEFORE_MDT,
"Fail data table clustering before applying to MDT " + clusteringCommitTime);
}
this.txnManager.beginTransaction(Option.of(clusteringInstant), Option.empty());

finalizeWrite(table, clusteringCommitTime, writeStats);
Expand All @@ -532,6 +557,13 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata,
LOG.info("Committing Clustering {}", clusteringCommitTime);
LOG.debug("Clustering {} finished with result {}", clusteringCommitTime, metadata);

if (basePath.contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(MDT_CLUSTERING, "Fail metadata table clustering " + clusteringCommitTime);
} else {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(DT_CLUSTERING_AFTER_MDT,
"Fail data table clustering after applying to MDT, but before completing in DT " + clusteringCommitTime);
}

ClusteringUtils.transitionClusteringOrReplaceInflightToComplete(false, clusteringInstant, serializeCommitMetadata(metadata), table.getActiveTimeline());
} catch (Exception e) {
throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.injection.ErrorInjectionUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
Expand Down Expand Up @@ -104,6 +105,9 @@
import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.getIndexInflightInstant;
import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeIndexPlan;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.MDT_AFTER_COMPACTION;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.MDT_BEFORE_ARCHIVAL;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.MDT_BEFORE_CLEANING;
import static org.apache.hudi.metadata.HoodieMetadataWriteUtils.createMetadataWriteConfig;
import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP;
Expand Down Expand Up @@ -936,6 +940,7 @@ private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, Metadata
}, fileGroupFileIds.size());
}

@Override
public void dropMetadataPartitions(List<String> metadataPartitions) throws IOException {
for (String partitionPath : metadataPartitions) {
// first update table config
Expand Down Expand Up @@ -1011,6 +1016,7 @@ private Set<String> getMetadataPartitionsToUpdate() {
return getEnabledPartitionTypes().stream().map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
}

@Override
public void buildMetadataPartitions(HoodieEngineContext engineContext, List<HoodieIndexPartitionInfo> indexPartitionInfos, String instantTime) throws IOException {
if (indexPartitionInfos.isEmpty()) {
LOG.warn("No partition to index in the plan");
Expand Down Expand Up @@ -1512,6 +1518,8 @@ public void performTableServices(Option<String> inFlightInstantTimestamp) {
LOG.info("Latest deltacommit time found is {}, running compaction operations.", latestDeltacommitTime);
compactIfNecessary(writeClient);
}
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(MDT_BEFORE_ARCHIVAL,
"Fail metadata table just before archival " + inFlightInstantTimestamp);
writeClient.archive();
LOG.info("All the table services operations on MDT completed successfully");
} catch (Exception e) {
Expand Down Expand Up @@ -1567,6 +1575,8 @@ protected void compactIfNecessary(BaseHoodieWriteClient writeClient) {
} else if (writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty())) {
LOG.info("Compaction is scheduled for timestamp {}", compactionInstantTime);
writeClient.compact(compactionInstantTime);
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(MDT_AFTER_COMPACTION,
"Fail metadata table just after compaction " + compactionInstantTime);
} else if (metadataWriteConfig.isLogCompactionEnabled()) {
// Schedule and execute log compaction with new instant time.
final String logCompactionInstantTime = metadataMetaClient.createNewInstantTime(false);
Expand All @@ -1593,6 +1603,9 @@ protected void cleanIfNecessary(BaseHoodieWriteClient writeClient) {
// 3 is a value that I think is enough for metadata table reader.
return;
}

ErrorInjectionUtils.maybeInjectErrorByKillingJVM(MDT_BEFORE_CLEANING,
"Fail metadata table just before cleaning");
// Trigger cleaning with suffixes based on the same instant time. This ensures that any future
// delta commits synced over will not have an instant time lesser than the last completed instant on the
// metadata table.
Expand Down Expand Up @@ -1761,6 +1774,7 @@ protected void closeInternal() {
}
}

@Override
public boolean isInitialized() {
return initialized;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.ImmutablePair;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.injection.ErrorInjectionUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
Expand All @@ -55,6 +56,9 @@
import java.util.stream.Stream;

import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_CLEANING_AFTER_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.DT_CLEANING_BEFORE_MDT;
import static org.apache.hudi.common.util.injection.ErrorInjectionCategory.MDT_CLEANING;

public class CleanActionExecutor<T, I, K, O> extends BaseActionExecutor<T, I, K, O, HoodieCleanMetadata> {

Expand Down Expand Up @@ -231,7 +235,19 @@ private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstan
if (!skipLocking) {
this.txnManager.beginTransaction(Option.of(inflightInstant), Option.empty());
}
if (config.getBasePath().contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(
MDT_CLEANING, "Fail metadata table cleaning " + instantTime);
} else {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(
DT_CLEANING_BEFORE_MDT, "Fail data table cleaning before applying to MDT " + instantTime);
}
writeTableMetadata(metadata, inflightInstant.getTimestamp());
if (!config.getBasePath().contains(".hoodie/metadata")) {
ErrorInjectionUtils.maybeInjectErrorByKillingJVM(DT_CLEANING_AFTER_MDT,
"Fail data table cleaning after applying to MDT, but before completing in DT "
+ instantTime);
}
table.getActiveTimeline().transitionCleanInflightToComplete(false,
inflightInstant, TimelineMetadataUtils.serializeCleanMetadata(metadata));
LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
Expand Down
Loading
Loading