Skip to content

Commit

Permalink
Add OSS/S3 to cluster-mode type apache#4621
Browse files Browse the repository at this point in the history
  • Loading branch information
sunxiaojian committed Apr 23, 2023
1 parent c8a5d52 commit 34a827e
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,19 @@
<artifactId>imap-storage-file</artifactId>
<name>SeaTunnel : Engine : Storage : IMap Storage Plugins : File</name>

<properties>
<hadoop-aliyun.version>3.0.0</hadoop-aliyun.version>
<hadoop-aws.version>3.1.4</hadoop-aws.version>
<aws.java.sdk.version>1.11.271</aws.java.sdk.version>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>serializer-protobuf</artifactId>
<version>${project.version}</version>
</dependency>
<!-- hadoop jar -->
<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>seatunnel-hadoop3-3.1.4-uber</artifactId>
Expand All @@ -62,6 +69,27 @@
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aliyun</artifactId>
<version>${hadoop-aliyun.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>${hadoop-aws.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-bundle</artifactId>
<version>${aws.java.sdk.version}</version>
<scope>provided</scope>
</dependency>

</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import org.apache.seatunnel.engine.imap.storage.api.IMapStorage;
import org.apache.seatunnel.engine.imap.storage.api.IMapStorageFactory;
import org.apache.seatunnel.engine.imap.storage.api.exception.IMapStorageException;

import org.apache.hadoop.conf.Configuration;
import org.apache.seatunnel.engine.imap.storage.file.config.AbstractConfiguration;
import org.apache.seatunnel.engine.imap.storage.file.config.FileConfiguration;

import com.google.auto.service.AutoService;

Expand All @@ -34,16 +34,26 @@

@AutoService(IMapStorageFactory.class)
public class IMapFileStorageFactory implements IMapStorageFactory {

private static final String STORAGE_TYPE_KEY = "storage.type";

@Override
public String factoryIdentifier() {
return "hdfs";
}

@Override
public IMapStorage create(Map<String, Object> initMap) throws IMapStorageException {

String storageType =
String.valueOf(
initMap.getOrDefault(STORAGE_TYPE_KEY, FileConfiguration.HDFS.toString()));
// build configuration
AbstractConfiguration configuration =
FileConfiguration.valueOf(storageType.toUpperCase()).getConfiguration(storageType);
configuration.buildConfiguration(initMap);

IMapFileStorage iMapFileStorage = new IMapFileStorage();
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", (String) initMap.get("fs.defaultFS"));
initMap.put(HDFS_CONFIG_KEY, configuration);
iMapFileStorage.initialize(initMap);
return iMapFileStorage;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

package org.apache.seatunnel.engine.imap.storage.file.config;

import org.apache.seatunnel.engine.imap.storage.api.exception.IMapStorageException;

import org.apache.hadoop.conf.Configuration;

import java.util.Map;

public abstract class AbstractConfiguration {

protected static final String HDFS_IMPL_KEY = "impl";

/**
* check the configuration keys
*
* @param config configuration
* @param keys keys
*/
void checkConfiguration(Map<String, Object> config, String... keys) {
for (String key : keys) {
if (!config.containsKey(key) || null == config.get(key)) {
throw new IllegalArgumentException(key + " is required");
}
}
}

public abstract Configuration buildConfiguration(Map<String, Object> config)
throws IMapStorageException;

/**
* set extra options for configuration
*
* @param hadoopConf
* @param config
* @param prefix
*/
void setExtraConfiguration(
Configuration hadoopConf, Map<String, Object> config, String prefix) {
config.forEach(
(k, v) -> {
if (k.startsWith(prefix)) {
hadoopConf.set(k, String.valueOf(v));
}
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

package org.apache.seatunnel.engine.imap.storage.file.config;

public enum FileConfiguration {
HDFS("hdfs", new HdfsConfiguration()),
S3("s3", new S3Configuration()),
OSS("oss", new OssConfiguration());

/** file system type */
private final String name;

/** file system configuration */
private final AbstractConfiguration configuration;

FileConfiguration(String name, AbstractConfiguration configuration) {
this.name = name;
this.configuration = configuration;
}

public AbstractConfiguration getConfiguration(String name) {
return configuration;
}

public String getName() {
return name;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

package org.apache.seatunnel.engine.imap.storage.file.config;

import org.apache.seatunnel.engine.imap.storage.api.exception.IMapStorageException;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;

import java.io.IOException;
import java.util.Map;

import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY;

public class HdfsConfiguration extends AbstractConfiguration {
private static final String HDFS_DEF_FS_NAME = "fs.defaultFS";
private static final String KERBEROS_PRINCIPAL = "kerberosPrincipal";
private static final String KERBEROS_KEYTAB_FILE_PATH = "kerberosKeytabFilePath";
private static final String HADOOP_SECURITY_AUTHENTICATION_KEY =
"hadoop.security.authentication";
private static final String KERBEROS_KEY = "kerberos";
private static final String HDFS_IMPL = "org.apache.hadoop.hdfs.DistributedFileSystem";
private static final String HDFS_IMPL_KEY = "fs.hdfs.impl";

@Override
public Configuration buildConfiguration(Map<String, Object> config)
throws IMapStorageException {
checkConfiguration(config, HDFS_DEF_FS_NAME);
Configuration hadoopConf = new Configuration();

hadoopConf.set(HDFS_DEF_FS_NAME, String.valueOf(config.get(HDFS_DEF_FS_NAME)));

hadoopConf.set(HDFS_IMPL_KEY, HDFS_IMPL);
hadoopConf.set(FS_DEFAULT_NAME_KEY, String.valueOf(config.get(FS_DEFAULT_NAME_KEY)));
if (config.containsKey(KERBEROS_PRINCIPAL)
&& config.containsKey(KERBEROS_KEYTAB_FILE_PATH)) {
String kerberosPrincipal = String.valueOf(config.get(KERBEROS_PRINCIPAL));
String kerberosKeytabFilePath = String.valueOf(config.get(KERBEROS_KEYTAB_FILE_PATH));
if (StringUtils.isNotBlank(kerberosPrincipal)
&& StringUtils.isNotBlank(kerberosKeytabFilePath)) {
hadoopConf.set(HADOOP_SECURITY_AUTHENTICATION_KEY, KERBEROS_KEY);
authenticateKerberos(kerberosPrincipal, kerberosKeytabFilePath, hadoopConf);
}
}
// todo support other hdfs optional config keys
return hadoopConf;
}

/**
* Authenticate kerberos
*
* @param kerberosPrincipal
* @param kerberosKeytabFilePath
* @param hdfsConf
* @throws IMapStorageException
*/
private void authenticateKerberos(
String kerberosPrincipal, String kerberosKeytabFilePath, Configuration hdfsConf)
throws IMapStorageException {
UserGroupInformation.setConfiguration(hdfsConf);
try {
UserGroupInformation.loginUserFromKeytab(kerberosPrincipal, kerberosKeytabFilePath);
} catch (IOException e) {
throw new IMapStorageException(
"Failed to login user from keytab : "
+ kerberosKeytabFilePath
+ " and kerberos principal : "
+ kerberosPrincipal,
e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

package org.apache.seatunnel.engine.imap.storage.file.config;

import org.apache.seatunnel.engine.imap.storage.api.exception.IMapStorageException;

import org.apache.hadoop.conf.Configuration;

import java.util.Map;

import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY;

public class OssConfiguration extends AbstractConfiguration {
public static final String OSS_BUCKET_KEY = "oss.bucket";
private static final String OSS_IMPL_KEY = "fs.oss.impl";
private static final String HDFS_OSS_IMPL =
"org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem";
private static final String OSS_KEY = "fs.oss.";

@Override
public Configuration buildConfiguration(Map<String, Object> config)
throws IMapStorageException {
checkConfiguration(config, OSS_BUCKET_KEY);
Configuration hadoopConf = new Configuration();
hadoopConf.set(FS_DEFAULT_NAME_KEY, String.valueOf(config.get(OSS_BUCKET_KEY)));
hadoopConf.set(OSS_IMPL_KEY, HDFS_OSS_IMPL);
setExtraConfiguration(hadoopConf, config, OSS_KEY);
return hadoopConf;
}
}
Loading

0 comments on commit 34a827e

Please sign in to comment.