Skip to content

Commit

Permalink
Merge pull request #5274 from shaneknapp/dh-140-audit-datahub-python-…
Browse files Browse the repository at this point in the history
…packages

[DH-140] clean up the datahub image python install
  • Loading branch information
shaneknapp authored Dec 18, 2023
2 parents e417c3d + 5877b88 commit 83aa012
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 124 deletions.
33 changes: 18 additions & 15 deletions deployments/datahub/images/default/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -87,32 +87,38 @@ RUN mkdir -p /tmp/r-packages

# Install all our base R packages
COPY install.R /tmp/install.R
RUN /tmp/install.R && rm -rf /tmp/downloaded_packages
RUN echo "/tmp/install.R" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash
RUN rm -rf /tmp/downloaded_packages

# DLAB CTAWG, Fall '20 - Summer '21
# https://github.com/berkeley-dsep-infra/datahub/issues/1942
COPY r-packages/dlab-ctawg.r /tmp/r-packages/
RUN r /tmp/r-packages/dlab-ctawg.r && rm -rf /tmp/downloaded_packages
RUN echo "/usr/bin/r /tmp/r-packages/dlab-ctawg.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash
RUN rm -rf /tmp/downloaded_packages

# Econ 140, Fall '22 and into the future
# https://github.com/berkeley-dsep-infra/datahub/issues/3757
COPY r-packages/econ-140.r /tmp/r-packages
RUN r /tmp/r-packages/econ-140.r && rm -rf /tmp/downloaded_packages
RUN echo "/usr/bin/r /tmp/r-packages/econ-140.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash
RUN rm -rf /tmp/downloaded_packages

# EEP/IAS C119, Spring '23
# https://github.com/berkeley-dsep-infra/datahub/issues/4203
COPY r-packages/eep-1118.r /tmp/r-packages
RUN r /tmp/r-packages/eep-1118.r && rm -rf /tmp/downloaded_packages
RUN echo "/usr/bin/r /tmp/r-packages/eep-1118.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash
RUN rm -rf /tmp/downloaded_packages

# Stat 135, Fall '23
# https://github.com/berkeley-dsep-infra/datahub/issues/4907
COPY r-packages/2023-fall-stat-135.r /tmp/r-packages
RUN r /tmp/r-packages/2023-fall-stat-135.r && rm -rf /tmp/downloaded_packages
RUN echo "/usr/bin/r /tmp/r-packages/2023-fall-stat-135.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash
RUN rm -rf /tmp/downloaded_packages

# MBA 247, Fall '23
# issue TBD; discussed over email
COPY r-packages/2023-fall-mba-247.r /tmp/r-packages/
RUN r /tmp/r-packages/2023-fall-mba-247.r && rm -rf /tmp/downloaded_packages
RUN echo "/usr/bin/r /tmp/r-packages/2023-fall-mba-247.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash
RUN rm -rf /tmp/downloaded_packages

ENV PATH ${CONDA_DIR}/bin:$PATH:/usr/lib/rstudio-server/bin

Expand All @@ -123,36 +129,33 @@ WORKDIR /home/${NB_USER}
# Install mambaforge as root
USER root
COPY install-mambaforge.bash /tmp/install-mambaforge.bash
RUN /tmp/install-mambaforge.bash
RUN echo "/tmp/install-mambaforge.bash" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash

# Install conda environment as our user
USER ${NB_USER}

COPY infra-requirements.txt /tmp/infra-requirements.txt
COPY environment.yml /tmp/environment.yml

RUN mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml && \
mamba clean -afy
RUN echo "/srv/conda/bin/mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash
RUN echo "/srv/conda/bin/mamba clean -afy" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash

RUN jupyter contrib nbextensions install --sys-prefix --symlink && \
jupyter nbextensions_configurator enable --sys-prefix

# Used by MCB32, but incompatible with ipywidgets 8.x
RUN jupyter nbextension enable --py --sys-prefix qgrid

# Set up nbpdf dependencies
# commenting out for DH-164
#ENV PYPPETEER_HOME ${CONDA_DIR}
#RUN pyppeteer-install

# install chromium browser for playwright
# https://github.com/berkeley-dsep-infra/datahub/issues/5062
# playwright is only availalbe in nbconvert[webpdf], via pip/pypi.
# see also environment.yaml
# DH-164
ENV PLAYWRIGHT_BROWSERS_PATH ${CONDA_DIR}
RUN playwright install chromium

# Install IR kernelspec
RUN r -e "IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')"
RUN echo "/usr/bin/r -e \"IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')\"" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash

COPY d8extension.bash /usr/local/sbin/d8extension.bash
RUN /usr/local/sbin/d8extension.bash
Expand Down
3 changes: 3 additions & 0 deletions deployments/datahub/images/default/apt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ micro
tini
locales

# for timing builds
time

# for jupyter-tree-download, #3979
zip

Expand Down
113 changes: 4 additions & 109 deletions deployments/datahub/images/default/environment.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# https://docs.conda.io/projects/conda-build/en/latest/resources/package-spec.html#examples
# For conda, == is exact constraint, while = is fuzzy constraint.
# pip uses https://peps.python.org/pep-0440/ which does not have =.

dependencies:
- nodejs=16.*
- traitlets=5.9.*
- pip=22.2.*
- python=3.9.*

- syncthing==1.18.6

# pymc3 needs this
Expand All @@ -29,13 +27,6 @@ dependencies:
- spacy=3.4.1
- nltk=3.6.*

# data 88e, fall 2022, issue 3915
- yfinance=0.1.85
- quantstats=0.0.59

# dlab, dssj, summer 2022, issues 3472, 3473, 3474
- gensim=4.2.0

# 3577, L&S 22, Spring 2023
- spacy-model-en_core_web_sm=3.4.0
- spacy-model-en_core_web_lg=3.4.0
Expand Down Expand Up @@ -69,13 +60,6 @@ dependencies:
- pymatgen==2023.10.4
- matminer=0.9.0

# data101, Fall 2022, https://github.com/berkeley-dsep-infra/datahub/issues/3646
- pgspecial=2.0.1

# https://github.com/berkeley-dsep-infra/datahub/issues/3939 data101, fall 2022
- dbt-postgres=1.3.0
- dbt-bigquery=1.3.0

# Econ 148, Spring 2023, https://github.com/berkeley-dsep-infra/datahub/issues/4067
- ipykernel = 6.19.4

Expand All @@ -90,16 +74,10 @@ dependencies:

# data8; foundation
- datascience==0.17.6

- jupyter-server-proxy==3.2.1
- jupyter-rsession-proxy==2.0.1

- folium==0.12.1.post1

# cogsci88;
# disabled 2023-10-07 due to unresolved dependencies
#- ggplot==0.11.5

# cogsci131; spring 2018
- nose==1.3.7

Expand All @@ -121,13 +99,6 @@ dependencies:
# EPS 256, https://github.com/berkeley-dsep-infra/datahub/issues/1775
- obspy==1.3.0

# DATA 88E, https://github.com/berkeley-dsep-infra/datahub/issues/5135
- ipyleaflet==0.17.4

- otter-grader==4.2.0

- gh-scoped-creds==4.1

- pip:
- -r /tmp/infra-requirements.txt
# Econ 148, Spring 2023 https://github.com/berkeley-dsep-infra/datahub/issues/4093
Expand All @@ -145,37 +116,6 @@ dependencies:
#
# nb2pdf==0.6.2 commented out by sknapp 06.10.2023 to unblock https://github.com/berkeley-dsep-infra/datahub/issues/5062
#
# ls 88-3; neuro
- lxml==4.9.1
- tqdm==4.62.1
- mne==0.23.0
- nibabel==3.2.1
- h5py==3.7.0
- numexpr==2.7.3
- openpyxl==3.0.7
- nilearn==0.8.0

# data100, geog88
# https://github.com/berkeley-dsep-infra/datahub/issues/2838
- geopandas==0.10.*
- geopy==2.2.*
- pysal==2.5.*
- rtree==0.9.*
- netcdf4==1.6.*
- mplleaflet==0.0.5
# phys 151;
- emcee==3.1.0
- daft==0.1.2
- corner==2.2.1
#
# ce88;
- pymdptoolbox==4.0-b3
#
# data-x; DL
- tensorflow==2.11.1
- scikit-image==0.19.*
- tables==3.7.0
- opencv-python==4.6.0.66

# astr 128/256; spring 2021
- astroquery==0.4.6
Expand All @@ -200,14 +140,6 @@ dependencies:
# eps 109; fall 2019
- ffmpeg-python==0.2.0

# data 88e; spring 2021
- plotly==5.2.1
- mpmath==1.3.0
# sympy==1.6.2
- chart-studio==1.1.0
- csaps==1.1.0
- nbforms==0.5.1

# issue #875, global 150Q/pacs 190 - fall 2019
- wordcloud==1.8.1

Expand Down Expand Up @@ -250,51 +182,14 @@ dependencies:
# https://github.com/berkeley-dsep-infra/datahub/issues/1981
- ipycanvas==0.9.0

# data100 scientific packages
- ray==2.8.0
- xlrd==2.0.1

# data100 visualization
- colorlover==0.3.0
- cufflinks==0.17.3
- altair==4.1.0

# data100 tools to access things
- tweepy==3.10.0
- pytz==2021.1
- psycopg2==2.9.1

# data100 teaching
- jassign==0.0.7
- dsassign==0.0.8

# data102
- dask==2021.10.0
- distributed==2021.10.0
- keras-applications==1.0.8
- keras-preprocessing==1.1.2
- keras==2.11.0
- keras-vis==0.4.1
- plotly-express==0.4.1
- cytoolz==0.11.0
- pyro-ppl==1.7.0
- lime==0.2.0.1
- shap==0.39.0

# prob140 2021 Spring
- prob140==0.4.1.5
- sympy==1.8
# sknapp 2023-12-15: we will eventually move prob140 back to datahub in FA24
# https://jira-secure.berkeley.edu/browse/DH-203
# - prob140==0.4.1.5
# - sympy==1.8

# IB 105 / ESPM 125, Fall 2021 - https://github.com/berkeley-dsep-infra/datahub/issues/2696
- geonomics==1.3.0

# data100, fall 2021 - https://github.com/berkeley-dsep-infra/datahub/issues/2767
- ipython-sql==0.4.0

# data100, fall 2021 - https://github.com/berkeley-dsep-infra/datahub/issues/2875
- metpy==1.1.0
- pooch==1.5.2

# https://github.com/berkeley-dsep-infra/datahub/issues/2950
# Needed to work with a new enough version of httplib2
- httplib2>=0.20.2
Expand Down

0 comments on commit 83aa012

Please sign in to comment.