From 75065c4a33d5af119690abda491fb8cc50c18862 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Tue, 10 Nov 2015 12:30:43 -0500 Subject: [PATCH 01/11] updated changelog --- CHANGES.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 09301d6d..13e25df8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,13 @@ +# 0.4 + +Released November 2015 + +* Nutch streaming visualizations +* Nutch REST API support +* Removed log.io +* DataWake trail support +* Bug fixes + # 0.3 Released September 2015 From 6650ffc058deff77be749c5ce944522ada3db519 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 10:55:10 -0500 Subject: [PATCH 02/11] Clean up Dockerfile/supervisor --- docker/Dockerfile | 93 ++++++++++++++++++----------------------- docker/supervisord.conf | 38 +++++++++++++---- 2 files changed, 70 insertions(+), 61 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index b1b1ce39..ed831d4d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,13 +17,17 @@ RUN apt-get install -y \ git \ libssl-dev \ make \ + nginx \ openjdk-7-jdk \ pkg-config \ + rabbitmq-server \ wget RUN groupadd -r explorer -g 433 && \ useradd -u 431 -m -r -g explorer -d /home/explorer -s /bin/bash -c "Docker image user" explorer && \ chown -R explorer:explorer /home/explorer +RUN adduser explorer sudo +RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers USER explorer @@ -33,41 +37,35 @@ RUN bash ./Miniconda-latest-Linux-x86_64.sh -b RUN git clone https://github.com/memex-explorer/memex-explorer WORKDIR /home/explorer/memex-explorer/ -RUN git checkout origin/ahmadia/tad_integration_master WORKDIR /home/explorer/memex-explorer -ENV PATH /home/explorer/miniconda/bin:$PATH +ENV PATH /home/explorer/miniconda2/bin:$PATH RUN conda update conda -y RUN conda install conda-env -y -RUN conda env update --file local-environment.yml +RUN conda env update WORKDIR /home/explorer/memex-explorer/source/memex RUN cp settings_files/dev_settings.py settings.py WORKDIR /home/explorer/memex-explorer/source #replaces source activate... sorta -ENV PATH /home/explorer/miniconda/envs/memex/bin:$PATH +ENV PATH /home/explorer/miniconda2/envs/memex/bin:$PATH ENV CONDA_DEFAULT_ENV memex -ENV CONDA_ENV_PATH /home/explorer/miniconda -RUN python manage.py migrate +ENV CONDA_ENV_PATH /home/explorer/miniconda2 -WORKDIR /home/explorer/memex-explorer/deploy -RUN python logio_settings.py +# move these into another environment.yaml? Or better, apply them as a patch? +RUN conda install -c memex ddt +# TAD dependencies +RUN pip install celery fisher elasticsearch flask flask_restful -### TAD -# Move this up eventually -USER root -RUN apt-get install -y rabbitmq-server +# ready for deployment +RUN python manage.py migrate +RUN python manage.py collectstatic -v0 --noinput -USER explorer -ENV PATH /home/explorer/miniconda/envs/memex/bin:$PATH -ENV CONDA_DEFAULT_ENV memex -ENV CONDA_ENV_PATH /home/explorer/miniconda -RUN pip install celery -RUN pip install fisher elasticsearch -RUN pip install flask flask_restful +# Install elasticdump +RUN npm install -g elasticdump -#------------- Pull software --------------# +#------------- Pull TAD software --------------# USER root RUN mkdir -p /service/build WORKDIR /service/build @@ -75,66 +73,55 @@ RUN git clone https://github.com/autonlab/tad.git # Build and install TAD library. RUN ln -s /service/build/tad/service /service/tad -### TAD! - +RUN mkdir /service/tad/config -# Need a special supervisord.conf that knows how to bind to 0.0.0.0 -# also this knows to run TAD +# Explorer configuration USER explorer COPY supervisord.conf /home/explorer/memex-explorer/source/supervisord.conf -# Need dev_settings that point to right locations COPY docker_settings.py /home/explorer/memex-explorer/source/memex/settings.py +# TAD configuration +COPY tad.cfg /service/tad/config/tad.cfg +RUN chown -R explorer /service + # Now bring over elasticsearch index data COPY elasticdump.json /home/explorer/elasticdump.json -# Install elasticdump -RUN npm install -g elasticdump - # Load data # Elasticsearch data COPY populate_elasticsearch.sh /home/explorer/populate_elasticsearch.sh WORKDIR /home/explorer RUN /bin/bash ./populate_elasticsearch.sh # Crawl data -COPY resources /home/explorer/memex-explorer/source/resources/ +# uncomment this to copy crawl data (heavy) over +# COPY resources /home/explorer/memex-explorer/source/resources/ # SQLite (Django) COPY db.sqlite3 /home/explorer/memex-explorer/source/db.sqlite3 # TAD run script -COPY tad_run /home/explorer/miniconda/envs/memex/bin/tad +COPY tad_run /home/explorer/miniconda2/envs/memex/bin/tad + +COPY nginx.conf /etc/nginx/sites-enabled/default +# uncomment these to install secrets into the container +# COPY secrets/nginx.crt /etc/nginx/ssl/nginx.crt +# COPY secrets/nginx.key /etc/nginx/ssl/nginx.key +# COPY secrets/htpasswd /etc/nginx/htpasswd # Permissions clean-up USER root RUN chown -R explorer /home/explorer/memex-explorer/source -RUN chmod +x /home/explorer/miniconda/envs/memex/bin/tad -RUN chown -R explorer /home/explorer/miniconda/envs/memex/bin/tad - -#------------- Service Configuration --------------# -# Permissions clean-up -USER root -RUN mkdir /service/tad/config -# TAD configuration file -COPY tad.cfg /service/tad/config/tad.cfg -RUN chown -R explorer /service -RUN adduser explorer sudo -RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers - -USER explorer -WORKDIR /home/explorer/memex-explorer/ -RUN git fetch && checkout origin/ahmadia/tad_integration_master +RUN chmod +x /home/explorer/miniconda2/envs/memex/bin/tad +RUN chown -R explorer /home/explorer/miniconda2/envs/memex/bin/tad ##################### INSTALLATION END ##################### # Expose the default ports -EXPOSE 8000 -EXPOSE 9200 -EXPOSE 9300 -EXPOSE 5601 -EXPOSE 8084 -EXPOSE 5000 +EXPOSE 80 +EXPOSE 443 # Set default container command USER explorer WORKDIR /home/explorer/memex-explorer/source -ENTRYPOINT "supervisord" +ENV HTTP_PROTOCOL http +ENV WS_PROTOCOL ws +ENTRYPOINT "supervisord" \ No newline at end of file diff --git a/docker/supervisord.conf b/docker/supervisord.conf index 00b2e33a..1309a2d5 100644 --- a/docker/supervisord.conf +++ b/docker/supervisord.conf @@ -15,18 +15,33 @@ minprocs=200 ; (min. avail process descriptors;default 200) [inet_http_server] port = 127.0.0.1:9001 +[program:nginx] +command=sudo nginx -g 'daemon off;' +priority=1 +autostart=true + [program:redis] command=redis-server priority=1 -[program:celery] -command=celery -A memex worker -l info --loglevel=debug --logfile=memex/logs/celeryd.log -priority=2 - [program:elasticsearch] command=elasticsearch priority=1 +[program:nutch] +command=nutch startserver +priority=1 + +[program:rabbitmq] +command=sudo rabbitmq-server ; need administrative access on Linux systems +priority=1 +autostart=true + +[program:bokeh-server] +command=bokeh-server --backend memory --port 5006 --ws-conn-string %(ENV_WS_PROTOCOL)://explorer.continuum.io/bokeh/sub +priority=1 +autostart=true + [program:tika] command=tika-rest-server priority=2 @@ -38,16 +53,23 @@ priority=2 [program:ddt] command=ddt priority=5 -autostart=false +autostart=true [program:tad] command=tad priority=5 -autostart=true +autostart=false + +# have to bring up celery after other applications +[program:celery] +command=celery -A memex worker -l info --loglevel=debug --logfile=resources/logs/celery-worker.log +priority=10 +killasgroup=true [program:django] -command=python manage.py runserver 0.0.0.0:8000 +command=gunicorn memex.wsgi:application -b 0.0.0.0:8000 +priority=20 stopasgroup=true [rpcinterface:supervisor] -supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface \ No newline at end of file +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface From 2136de4d21a294bababfae11d3502a9a361cddfe Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 11:22:16 -0500 Subject: [PATCH 03/11] Add docker settings file --- docker/docker_settings.py | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 docker/docker_settings.py diff --git a/docker/docker_settings.py b/docker/docker_settings.py new file mode 100644 index 00000000..c9f08488 --- /dev/null +++ b/docker/docker_settings.py @@ -0,0 +1,54 @@ +""" +Django settings for memex project. + +For more information on this file, see +https://docs.djangoproject.com/en/1.7/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/1.7/ref/settings/ +""" + +from common_settings import * +import os + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = '0#t((zq66&3*87djaltu-pn34%0p!*v_332f2p!$2i)w5y17f8' + +# SECURITY WARNING: don't run with debug turned on in production! +PRODUCTION = os.environ.get('PRODUCTION') + +if PRODUCTION: + DEBUG = False + TEMPLATE_DEBUG = False + DEPLOYMENT = True +else: + DEBUG = os.environ.get('DEBUG', ) + TEMPLATE_DEBUG = True + INSTALLED_APPS += ('debug_toolbar',) + DEPLOYMENT = False + + +MEDIA_ROOT = os.path.join(BASE_DIR, 'resources') +PROJECT_PATH = os.path.join(MEDIA_ROOT, "projects") + +VIRTUAL_HOST = os.environ.get('VIRTUAL_HOST', 'localhost') +PROTOCOL = os.environ.get('HTTP_PROTOCOL', 'http') + +REST_FRAMEWORK = { + 'DEFAULT_AUTHENTICATION_CLASSES': ( + 'rest_framework.authentication.SessionAuthentication', + ), + 'DEFAULT_PERMISSION_CLASSES': ( + 'rest_framework.permissions.AllowAny', + ), + 'DEFAULT_FILTER_BACKENDS': ('rest_framework.filters.DjangoFilterBackend',) +} + +# ddt is treated as an external service for now + +EXTERNAL_APP_LOCATIONS = { + 'bokeh-server': '/bokeh', + 'ddt': PROTOCOL + '://' + VIRTUAL_HOST + ':8084', + 'tad': '/tad', + 'kibana': '/kibana', +} From 738775bc402bea09ff6ac1a1a0aaa0e627b996b9 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 11:23:37 -0500 Subject: [PATCH 04/11] Ordering fix in Dockerfile --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index ed831d4d..c544b9b1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -82,7 +82,6 @@ COPY docker_settings.py /home/explorer/memex-explorer/source/memex/settings.py # TAD configuration COPY tad.cfg /service/tad/config/tad.cfg -RUN chown -R explorer /service # Now bring over elasticsearch index data COPY elasticdump.json /home/explorer/elasticdump.json @@ -109,6 +108,7 @@ COPY nginx.conf /etc/nginx/sites-enabled/default # Permissions clean-up USER root +RUN chown -R explorer /service RUN chown -R explorer /home/explorer/memex-explorer/source RUN chmod +x /home/explorer/miniconda2/envs/memex/bin/tad RUN chown -R explorer /home/explorer/miniconda2/envs/memex/bin/tad From a87a40771aa26acc63c59be9c937452952169280 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 11:26:06 -0500 Subject: [PATCH 05/11] Don't preload elasticsearch by default --- docker/Dockerfile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index c544b9b1..d7111ed1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -83,14 +83,17 @@ COPY docker_settings.py /home/explorer/memex-explorer/source/memex/settings.py # TAD configuration COPY tad.cfg /service/tad/config/tad.cfg + +# UNCOMMENT THIS SECTION TO PRELOAD THE ELASTICSEARCH INSTANCE + # Now bring over elasticsearch index data -COPY elasticdump.json /home/explorer/elasticdump.json +# COPY elasticdump.json /home/explorer/elasticdump.json # Load data # Elasticsearch data -COPY populate_elasticsearch.sh /home/explorer/populate_elasticsearch.sh -WORKDIR /home/explorer -RUN /bin/bash ./populate_elasticsearch.sh +# COPY populate_elasticsearch.sh /home/explorer/populate_elasticsearch.sh +# WORKDIR /home/explorer +# RUN /bin/bash ./populate_elasticsearch.sh # Crawl data # uncomment this to copy crawl data (heavy) over # COPY resources /home/explorer/memex-explorer/source/resources/ From 5cd08970c261143c03e715d97bd596b4eb4114ee Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 11:28:11 -0500 Subject: [PATCH 06/11] Make more data loads optional --- docker/Dockerfile | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index d7111ed1..9918ef31 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -85,6 +85,7 @@ COPY tad.cfg /service/tad/config/tad.cfg # UNCOMMENT THIS SECTION TO PRELOAD THE ELASTICSEARCH INSTANCE +# ------------------------------------------------------------- # # Now bring over elasticsearch index data # COPY elasticdump.json /home/explorer/elasticdump.json @@ -94,11 +95,21 @@ COPY tad.cfg /service/tad/config/tad.cfg # COPY populate_elasticsearch.sh /home/explorer/populate_elasticsearch.sh # WORKDIR /home/explorer # RUN /bin/bash ./populate_elasticsearch.sh -# Crawl data -# uncomment this to copy crawl data (heavy) over +# ------------------------------------------------------------- # + + +# ------------------------------------------------------------- # +# UNCOMMENT THIS SECTION TO PRELOAD CRAWL DATA +# Crawl data # COPY resources /home/explorer/memex-explorer/source/resources/ +# ------------------------------------------------------------- # + + +# ------------------------------------------------------------- # +# UNCOMMENT THIS SECTION TO PRELOAD SQL DATABASE # SQLite (Django) -COPY db.sqlite3 /home/explorer/memex-explorer/source/db.sqlite3 +# COPY db.sqlite3 /home/explorer/memex-explorer/source/db.sqlite3 +# ------------------------------------------------------------- # # TAD run script COPY tad_run /home/explorer/miniconda2/envs/memex/bin/tad From f655855a2ce13b8928d15d4e305375274fa024f0 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 11:29:54 -0500 Subject: [PATCH 07/11] Add default nginx configuration --- docker/nginx.conf | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 docker/nginx.conf diff --git a/docker/nginx.conf b/docker/nginx.conf new file mode 100644 index 00000000..40799e02 --- /dev/null +++ b/docker/nginx.conf @@ -0,0 +1,40 @@ +server { + listen 80; + server_name explorer.continuum.io; + + client_max_body_size 100M; + + location / { + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Host $host; + #todo: make this a variable below + proxy_pass http://127.0.0.1:8000; + } + + location /kibana/ { + rewrite /kibana/(.*) /$1 break; + proxy_pass http://127.0.0.1:5601/; + proxy_redirect off; + proxy_set_header Host $host; + } + + location /bokeh/ { + proxy_pass http://127.0.0.1:5006/; + proxy_cache off; + proxy_cookie_domain localhost explorer.continuum.io; + sub_filter 'http://localhost:5006' 'https://explorer.continuum.io'; + } + + location /bokeh/sub { + proxy_pass http://localhost:5006; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + + location /static { + rewrite ^/static/(.*)$ /$1 break; + root /home/explorer/memex-explorer/source/base/static/ ; + } +} \ No newline at end of file From b9a0c86a851ea9b036602a6f3dc8b2482630df0b Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 11:46:16 -0500 Subject: [PATCH 08/11] Fix missing string formatter in supervisord.conf --- docker/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/supervisord.conf b/docker/supervisord.conf index 1309a2d5..4d6564b9 100644 --- a/docker/supervisord.conf +++ b/docker/supervisord.conf @@ -38,7 +38,7 @@ priority=1 autostart=true [program:bokeh-server] -command=bokeh-server --backend memory --port 5006 --ws-conn-string %(ENV_WS_PROTOCOL)://explorer.continuum.io/bokeh/sub +command=bokeh-server --backend memory --port 5006 --ws-conn-string %(ENV_WS_PROTOCOL)s://explorer.continuum.io/bokeh/sub priority=1 autostart=true From 707204a9409459be40bf0f7534a6cef7ce56a6e0 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 15:36:06 -0500 Subject: [PATCH 09/11] Better inline html/changelog support --- docker/Dockerfile | 8 ++++++++ docker/docker_settings.py | 10 ++++++++-- source/base/templates/base/base.html | 17 +++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9918ef31..e7fa767b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -79,6 +79,13 @@ RUN mkdir /service/tad/config USER explorer COPY supervisord.conf /home/explorer/memex-explorer/source/supervisord.conf COPY docker_settings.py /home/explorer/memex-explorer/source/memex/settings.py +# patched in +WORKDIR /home/explorer/memex-explorer +RUN conda install markdown +RUN markdown_py CHANGES.md > source/base/changes.html +WORKDIR /home/explorer/memex-explorer/docs +RUN make html +RUN mv build/html ../source/base/static/docs # TAD configuration COPY tad.cfg /service/tad/config/tad.cfg @@ -138,4 +145,5 @@ USER explorer WORKDIR /home/explorer/memex-explorer/source ENV HTTP_PROTOCOL http ENV WS_PROTOCOL ws +ENV INLINE 1 ENTRYPOINT "supervisord" \ No newline at end of file diff --git a/docker/docker_settings.py b/docker/docker_settings.py index c9f08488..ef9354bc 100644 --- a/docker/docker_settings.py +++ b/docker/docker_settings.py @@ -11,18 +11,24 @@ from common_settings import * import os +# SECURITY WARNING: Not setting VIRTUAL_HOST prevents Django from being able to verify headers +ALLOWED_HOSTS = [os.environ.get('VIRTUAL_HOST', '*')] + # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = '0#t((zq66&3*87djaltu-pn34%0p!*v_332f2p!$2i)w5y17f8' # SECURITY WARNING: don't run with debug turned on in production! -PRODUCTION = os.environ.get('PRODUCTION') +PRODUCTION = os.environ.get('PRODUCTION', False) + +# when INLINE is true point to local sources for changes/documentation instead of remote ones +INLINE = os.environ.get('INLINE', False) if PRODUCTION: DEBUG = False TEMPLATE_DEBUG = False DEPLOYMENT = True else: - DEBUG = os.environ.get('DEBUG', ) + DEBUG = True TEMPLATE_DEBUG = True INSTALLED_APPS += ('debug_toolbar',) DEPLOYMENT = False diff --git a/source/base/templates/base/base.html b/source/base/templates/base/base.html index 779481de..f2e4bde8 100644 --- a/source/base/templates/base/base.html +++ b/source/base/templates/base/base.html @@ -52,12 +52,29 @@
  • Seeds
  • + {% if settings.INLINE %} +
  • + Docs +
  • + {% else %}
  • Docs
  • + {% endif %} +
  • About
  • + {% if settings.INLINE %} +
  • + Changes +
  • + {% else %} +
  • + Changes +
  • + {% endif %} + From 907d9cfe1b057345082e4e0fcd1d9b29ef696390 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 17:00:10 -0500 Subject: [PATCH 10/11] Fix URL for changes output --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index e7fa767b..a751e42a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -82,7 +82,7 @@ COPY docker_settings.py /home/explorer/memex-explorer/source/memex/settings.py # patched in WORKDIR /home/explorer/memex-explorer RUN conda install markdown -RUN markdown_py CHANGES.md > source/base/changes.html +RUN markdown_py CHANGES.md > source/base/static/changes.html WORKDIR /home/explorer/memex-explorer/docs RUN make html RUN mv build/html ../source/base/static/docs From d040be1cdf6f1b16cf0516b322669e70adc91836 Mon Sep 17 00:00:00 2001 From: Aron Ahmadia Date: Wed, 11 Nov 2015 17:03:05 -0500 Subject: [PATCH 11/11] Bump doc version --- docs/source/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index bfb9d611..09e16245 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '0.2' +version = '0.4' # The full version, including alpha/beta/rc tags. -release = '0.2' +release = '0.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages.