Skip to content

Commit

Permalink
add log4j for tube
Browse files Browse the repository at this point in the history
  • Loading branch information
thanh-nguyen-dang committed Jul 8, 2024
1 parent 9a3db94 commit e56cd43
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
12 changes: 11 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
FROM quay.io/cdis/python:python3.9-buster-stable

ENV DEBIAN_FRONTEND=noninteractive \
HADOOP_SPARK_VERSION="3" \
SQOOP_VERSION="1.4.7" \
SPARK_VERSION="3.3.0" \
HADOOP_VERSION="3.3.2" \
ES_HADOOP_VERSION="8.3.3" \
MAVEN_ES_URL="https://search.maven.org/remotecontent?filepath=org/elasticsearch" \
Expand All @@ -11,9 +13,11 @@ ENV DEBIAN_FRONTEND=noninteractive \

ENV MAVEN_ES_SPARK_VERSION="${MAVEN_ES_URL}/${ES_SPARK_30_2_12}/${ES_HADOOP_VERSION}/${ES_SPARK_30_2_12}-${ES_HADOOP_VERSION}"

ENV SQOOP_INSTALLATION_URL="http://archive.apache.org/dist/sqoop/${SQOOP_VERSION}/sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz" \
ENV SPARK_INSTALLATION_URL="http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_SPARK_VERSION}.tgz" \
SQOOP_INSTALLATION_URL="http://archive.apache.org/dist/sqoop/${SQOOP_VERSION}/sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz" \
HADOOP_INSTALLATION_URL="http://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz" \
ES_HADOOP_INSTALLATION_URL="https://artifacts.elastic.co/downloads/elasticsearch-hadoop/elasticsearch-hadoop-${ES_HADOOP_VERSION}.zip" \
SPARK_HOME="/spark" \
SQOOP_HOME="/sqoop" \
HADOOP_HOME="/hadoop" \
ES_HADOOP_HOME="/es-hadoop" \
Expand Down Expand Up @@ -63,6 +67,11 @@ RUN wget ${HADOOP_INSTALLATION_URL} \
&& rm hadoop-${HADOOP_VERSION}.tar.gz \
&& rm -rf $HADOOP_HOME/share/doc

RUN wget $SPARK_INSTALLATION_URL \
&& mkdir -p $SPARK_HOME \
&& tar -xvf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_SPARK_VERSION}.tgz -C $SPARK_HOME --strip-components 1 \
&& rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_SPARK_VERSION}.tgz

RUN wget ${ES_HADOOP_INSTALLATION_URL} \
&& mkdir -p $ES_HADOOP_HOME \
&& unzip elasticsearch-hadoop-${ES_HADOOP_VERSION}.zip -d ${ES_HADOOP_HOME} \
Expand Down Expand Up @@ -100,6 +109,7 @@ RUN python -m poetry config virtualenvs.create false \

# copy source code ONLY after installing dependencies
COPY . /tube
COPY log4j.properties /spark/conf/log4j.properties

RUN python -m poetry config virtualenvs.create false \
&& python -m poetry install -vv --only main --no-interaction \
Expand Down
9 changes: 9 additions & 0 deletions log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1}:%L - %m%n
log4j.logger.org.apache.spark.executor.CoarseGrainedExecutorBackend=DEBUG
log4j.logger.org.apache.spark.scheduler.TaskSetManager=DEBUG
log4j.logger.org.apache.hadoop.fs=DEBUG
log4j.logger.org.apache.spark.scheduler.DAGScheduler=DEBUG

0 comments on commit e56cd43

Please sign in to comment.