Skip to content

Commit

Permalink
remove unnecessary files
Browse files Browse the repository at this point in the history
  • Loading branch information
thanh-nguyen-dang committed Jul 10, 2024
1 parent faea40d commit c459c38
Show file tree
Hide file tree
Showing 13 changed files with 23 additions and 276 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ RUN python -m poetry config virtualenvs.create false \

# copy source code ONLY after installing dependencies
COPY . /tube
COPY log4j.properties /spark/conf/log4j.properties
COPY log4j2.properties /spark/conf/log4j2.properties
COPY dockers/confs/log4j.properties /spark/conf/log4j.properties
COPY dockers/confs/log4j2.properties /spark/conf/log4j2.properties

RUN python -m poetry config virtualenvs.create false \
&& python -m poetry install -vv --only main --no-interaction \
Expand Down
8 changes: 4 additions & 4 deletions log4j.properties → dockers/confs/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1}:%L - %m%n
log4j.logger.org.apache.spark.executor.CoarseGrainedExecutorBackend=DEBUG
log4j.logger.org.apache.spark.scheduler.TaskSetManager=DEBUG
log4j.logger.org.apache.hadoop.fs=DEBUG
log4j.logger.org.apache.spark.scheduler.DAGScheduler=DEBUG
log4j.logger.org.apache.spark.executor.CoarseGrainedExecutorBackend=INFO
log4j.logger.org.apache.spark.scheduler.TaskSetManager=INFO
log4j.logger.org.apache.hadoop.fs=INFO
log4j.logger.org.apache.spark.scheduler.DAGScheduler=INFO
Original file line number Diff line number Diff line change
Expand Up @@ -47,30 +47,30 @@ logger.jetty1.level = debug
logger.jetty1.appenderRef.stdout.ref = console

logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
logger.jetty2.level = debug
logger.jetty2.level = info
logger.jetty2.appenderRef.stdout.ref = console

logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.replexprTyper.level = debug
logger.replexprTyper.level = info
logger.replexprTyper.appenderRef.stdout.ref = console

logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.replSparkILoopInterpreter.level = debug
logger.replSparkILoopInterpreter.level = info
logger.replSparkILoopInterpreter.appenderRef.stdout.ref = console

logger.parquet1.name = org.apache.parquet
logger.parquet1.level = debug
logger.parquet1.level = info
logger.parquet1.appenderRef.stdout.ref = console

logger.parquet2.name = parquet
logger.parquet2.level = debug
logger.parquet2.level = info
logger.parquet2.appenderRef.stdout.ref = console

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.RetryingHMSHandler.level = fatal
logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.FunctionRegistry.level = debug
logger.FunctionRegistry.level = info

# For deploying Spark ThriftServer
# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
Expand All @@ -80,20 +80,20 @@ appender.console.filter.1.onMatch = deny
appender.console.filter.1.onMismatch = neutral

logger.org.name=org.apache
logger.org.level=debug
logger.org.level=info
logger.org.appenderRef.stdout.ref = console

# Logger configuration for org.apache.spark package
logger.spark.name = org.apache.spark
logger.spark.level = debug
logger.spark.level = info
logger.spark.appenderRef.stdout.ref = console

# Logger configuration for org.apache.spark.sql package
logger.sql.name = org.apache.spark.sql
logger.sql.level = debug
logger.sql.level = info
logger.sql.appenderRef.stdout.ref = console

# Logger configuration for py4j package
logger.py4j.name = py4j
logger.py4j.level = debug
logger.py4j.level = info
logger.py4j.appenderRef.stdout.ref = console
4 changes: 2 additions & 2 deletions dockers/spark/master/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ RUN python -m pip install dist/tube-1.0.5-py3-none-any.whl
EXPOSE 8080 7077 6066

COPY dockers/spark/master/master.sh /
COPY dockers/spark/master/log4j.properties /spark/conf/log4j.properties
COPY dockers/spark/master/log4j2.properties /spark/conf/log4j2.properties
COPY dockers/confs/log4j.properties /spark/conf/log4j.properties
COPY dockers/confs/log4j2.properties /spark/conf/log4j2.properties

CMD ["/bin/bash", "/master.sh"]
9 changes: 0 additions & 9 deletions dockers/spark/master/log4j.properties

This file was deleted.

99 changes: 0 additions & 99 deletions dockers/spark/master/log4j2.properties

This file was deleted.

10 changes: 0 additions & 10 deletions dockers/spark/submit/Dockerfile

This file was deleted.

30 changes: 0 additions & 30 deletions dockers/spark/submit/submit.sh

This file was deleted.

4 changes: 2 additions & 2 deletions dockers/spark/worker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ RUN python -m pip install dist/tube-1.0.5-py3-none-any.whl
EXPOSE 8081

COPY dockers/spark/worker/worker.sh /
COPY dockers/spark/worker/log4j.properties /spark/conf/log4j.properties
COPY dockers/spark/worker/log4j2.properties /spark/conf/log4j2.properties
COPY dockers/confs/log4j.properties /spark/conf/log4j.properties
COPY dockers/confs/log4j2.properties /spark/conf/log4j2.properties

CMD ["/bin/bash", "/worker.sh"]
9 changes: 0 additions & 9 deletions dockers/spark/worker/log4j.properties

This file was deleted.

99 changes: 0 additions & 99 deletions log4j2.properties

This file was deleted.

1 change: 1 addition & 0 deletions tube/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
SPARK_MASTER = os.getenv("SPARK_MASTER", "local[1]") # 'spark-service'
SPARK_EXECUTOR_MEMORY = os.getenv("SPARK_EXECUTOR_MEMORY", "2g")
SPARK_DRIVER_MEMORY = os.getenv("SPARK_DRIVER_MEMORY", "512m")
SPARK_DRIVER_HOST = os.getenv("SPARK_DRIVER_HOST", "tube")
APP_NAME = "Gen3 ETL"

os.environ[
Expand Down
2 changes: 2 additions & 0 deletions tube/utils/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def make_spark_context(tube_config):
SparkConf()
.set("spark.executor.memory", tube_config.SPARK_EXECUTOR_MEMORY)
.set("spark.driver.memory", tube_config.SPARK_DRIVER_MEMORY)
.set("spark.driver.host", tube_config.SPARK_DRIVER_HOST)
.set("spark.driver.port", "4040")
.set("spark.python.profile", "false")
.set("spark.executor.extraJavaOptions", "-Dlog4j.configuration=file:/spark/conf/log4j.properties")
# .set("spark.submit.pyFiles", "/tube")
Expand Down

0 comments on commit c459c38

Please sign in to comment.