Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Created docker files for an integ test cluster (#601) #986

Merged
merged 6 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docker/integ-test/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
SPARK_VERSION=3.5.3
OPENSEARCH_VERSION=latest
DASHBOARDS_VERSION=latest
MASTER_UI_PORT=8080
MASTER_PORT=7077
UI_PORT=4040
SPARK_CONNECT_PORT=15002
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
OPENSEARCH_NODE_MEMORY=512m
OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
OPENSEARCH_PORT=9200
OPENSEARCH_DASHBOARDS_PORT=5601
143 changes: 143 additions & 0 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
services:
spark:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
container_name: spark
ports:
- "${MASTER_UI_PORT:-8080}:8080"
- "${MASTER_PORT:-7077}:7077"
- "${UI_PORT:-4040}:4040"
- "${SPARK_CONNECT_PORT}:15002"
entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-master-entrypoint.sh
target: /opt/bitnami/scripts/spark/master-entrypoint.sh
- type: bind
source: ./spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
networks:
- opensearch-net

spark-worker:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
container_name: spark-worker
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark:7077
- SPARK_WORKER_MEMORY=${WORKER_MEMORY:-1G}
- SPARK_WORKER_CORES=${WORKER_CORES:-1}
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
networks:
- opensearch-net
depends_on:
- spark

opensearch:
image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
container_name: opensearch
environment:
- cluster.name=opensearch-cluster
- node.name=opensearch
- discovery.seed_hosts=opensearch
- cluster.initial_cluster_manager_nodes=opensearch
- bootstrap.memory_lock=true
- plugins.security.ssl.http.enabled=false
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m}
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
volumes:
- opensearch-data:/usr/share/opensearch/data
ports:
- ${OPENSEARCH_PORT:-9200}:9200
- 9600:9600
expose:
- "${OPENSEARCH_PORT:-9200}"
healthcheck:
test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
networks:
- opensearch-net

opensearch-dashboards:
image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION}
container_name: opensearch-dashboards
ports:
- ${OPENSEARCH_DASHBOARDS_PORT:-5601}:5601
expose:
- "${OPENSEARCH_DASHBOARDS_PORT:-5601}"
environment:
OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
networks:
- opensearch-net
depends_on:
- opensearch

minio:
image: minio/minio
container_name: minio-S3
# See original entrypoint/command under https://github.com/minio/minio/blob/master/Dockerfile
entrypoint: sh -c 'mkdir -p /data/test && minio server /data --console-address ":9001"'
ports:
- "9000:9000"
- "9001:9001"
volumes:
- minio-data:/data
networks:
- opensearch-net

volumes:
opensearch-data:
minio-data:

networks:
opensearch-net:
69 changes: 69 additions & 0 deletions docker/integ-test/log4j2.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Set everything to be logged to the console
rootLogger.level = info
rootLogger.appenderRef.stdout.ref = console

# In the pattern layout configuration below, we specify an explicit `%ex` conversion
# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
# class packaging information. That extra information can sometimes add a substantial
# performance overhead, so we disable it in our default logging config.
# For more information, see SPARK-39361.
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex

# Set the default spark-shell/spark-sql log level to WARN. When running the
# spark-shell/spark-sql, the log level for these classes is used to overwrite
# the root logger's log level, so that the user can have different defaults
# for the shell and regular Spark apps.
logger.repl.name = org.apache.spark.repl.Main
logger.repl.level = warn

logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
logger.thriftserver.level = warn

# Settings to quiet third party logs that are too verbose
logger.jetty1.name = org.sparkproject.jetty
logger.jetty1.level = warn
logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
logger.jetty2.level = error
logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.replexprTyper.level = info
logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.replSparkILoopInterpreter.level = info
logger.parquet1.name = org.apache.parquet
logger.parquet1.level = error
logger.parquet2.name = parquet
logger.parquet2.level = error

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.RetryingHMSHandler.level = fatal
logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.FunctionRegistry.level = error

# For deploying Spark ThriftServer
# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
appender.console.filter.1.type = RegexFilter
appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
appender.console.filter.1.onMatch = deny
appender.console.filter.1.onMismatch = neutral
23 changes: 23 additions & 0 deletions docker/integ-test/prepare_scala_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python3

import csv

queries = None
with open('../../integ-test/script/test_cases.csv', 'r') as f:
reader = csv.DictReader(f)
queries = [(row['query'], i, row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()]

print('try {')
for query in queries:
query_str = query[0].replace('\n', '').replace('"', '\\"')
if 'FAILED' == query[2]:
print(' try {')
print(f' spark.sql("{query_str}")')
print(' throw new Error')
print(' } catch {')
print(' case e: Exception => null')
print(' }\n')
else:
print(f' spark.sql("{query_str}")\n')
print('}')

Loading
Loading