From f5f1ca2bab8c3c3c1760161160adbc5c5744c412 Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Thu, 24 Oct 2024 09:26:01 +0200 Subject: [PATCH 1/5] feat: Use the official python:3.9 image --- Dockerfile | 39 +++++++++++++++++++-------------------- requirements.txt | 2 ++ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/Dockerfile b/Dockerfile index 57e87186ce..543bced3f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,20 @@ -FROM ubuntu:20.04 +FROM python:3.9 ENV DEBIAN_FRONTEND noninteractive # build-essential RUN apt-get -qq -y update \ - && apt-get -qq --no-install-recommends -y install locales \ - ca-certificates postgresql-client libpq-dev curl jq \ - python3-pip python3-icu python3-psycopg2 \ - python3-lxml python3-crypto git \ - && apt-get -qq -y autoremove \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ - && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 + && apt-get -qq --no-install-recommends -y install locales \ + ca-certificates postgresql-client libpq-dev curl jq git \ + libxml2-dev libxslt1-dev python3-dev \ + && apt-get -qq -y autoremove \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 ENV LANG='en_US.UTF-8' RUN groupadd -g 1000 -r app \ - && useradd -m -u 1000 -s /bin/false -g app app + && useradd -m -u 1000 -s /bin/false -g app app # Install Python dependencies RUN pip3 install --no-cache-dir -q -U pip setuptools six @@ -32,19 +31,19 @@ RUN pip install --no-cache-dir -q -e /aleph ENV ALEPH_WORD_FREQUENCY_URI=https://public.data.occrp.org/develop/models/word-frequencies/word_frequencies-v0.4.1.zip ENV ALEPH_FTM_COMPARE_MODEL_URI=https://public.data.occrp.org/develop/models/xref/glm_bernoulli_2e_wf-v0.4.1.pkl RUN mkdir -p /opt/ftm-compare/word-frequencies/ && \ - curl -L -o "/opt/ftm-compare/word-frequencies/word-frequencies.zip" "$ALEPH_WORD_FREQUENCY_URI" && \ - python3 -m zipfile --extract /opt/ftm-compare/word-frequencies/word-frequencies.zip /opt/ftm-compare/word-frequencies/ && \ - curl -L -o "/opt/ftm-compare/model.pkl" "$ALEPH_FTM_COMPARE_MODEL_URI" + curl -L -o "/opt/ftm-compare/word-frequencies/word-frequencies.zip" "$ALEPH_WORD_FREQUENCY_URI" && \ + python3 -m zipfile --extract /opt/ftm-compare/word-frequencies/word-frequencies.zip /opt/ftm-compare/word-frequencies/ && \ + curl -L -o "/opt/ftm-compare/model.pkl" "$ALEPH_FTM_COMPARE_MODEL_URI" # Configure some docker defaults: ENV ALEPH_ELASTICSEARCH_URI=http://elasticsearch:9200/ \ - ALEPH_DATABASE_URI=postgresql://aleph:aleph@postgres/aleph \ - FTM_STORE_URI=postgresql://aleph:aleph@postgres/aleph \ - REDIS_URL=redis://redis:6379/0 \ - ARCHIVE_TYPE=file \ - ARCHIVE_PATH=/data \ - FTM_COMPARE_FREQUENCIES_DIR=/opt/ftm-compare/word-frequencies/ \ - FTM_COMPARE_MODEL=/opt/ftm-compare/model.pkl + ALEPH_DATABASE_URI=postgresql://aleph:aleph@postgres/aleph \ + FTM_STORE_URI=postgresql://aleph:aleph@postgres/aleph \ + REDIS_URL=redis://redis:6379/0 \ + ARCHIVE_TYPE=file \ + ARCHIVE_PATH=/data \ + FTM_COMPARE_FREQUENCIES_DIR=/opt/ftm-compare/word-frequencies/ \ + FTM_COMPARE_MODEL=/opt/ftm-compare/model.pkl RUN mkdir /run/prometheus diff --git a/requirements.txt b/requirements.txt index 24bed71223..54eb8de6b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,6 +38,8 @@ zipstream-new==1.1.8 pika==1.3.2 sentry-sdk[flask]==2.10.0 prometheus-client==0.17.1 +lxml==5.3.0 +lxml_html_clean==0.3.1 # Testing dependencies From 7b89c2c53f72b3390c0ebcb31fa6ba8705fb849e Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Thu, 24 Oct 2024 09:26:58 +0200 Subject: [PATCH 2/5] feat: run tests in dev mode --- contrib/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/test.sh b/contrib/test.sh index 0b0f00a2ae..5a5e0ce9ab 100755 --- a/contrib/test.sh +++ b/contrib/test.sh @@ -3,4 +3,4 @@ psql -c "DROP DATABASE IF EXISTS aleph_test;" $ALEPH_DATABASE_URI psql -c "CREATE DATABASE aleph_test;" $ALEPH_DATABASE_URI -pytest aleph/ --cov=aleph --cov-report html --cov-report term $@ +PYTHONDEVMODE=1 PYTHONTRACEMALLOC=1 pytest aleph/ --cov=aleph --cov-report html --cov-report term $@ From 198e2f0467117d0ece9432513286567e6e36c2fb Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Thu, 24 Oct 2024 09:27:04 +0200 Subject: [PATCH 3/5] chore: updated deprecated version query --- aleph/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aleph/__init__.py b/aleph/__init__.py index cd4d560d5e..b41c6668f4 100644 --- a/aleph/__init__.py +++ b/aleph/__init__.py @@ -1,9 +1,9 @@ import logging import warnings from sqlalchemy.exc import SAWarning -from pkg_resources import get_distribution +from importlib.metadata import version -__version__ = get_distribution("aleph").version +__version__ = version("aleph") # shut up useless SA warning: warnings.filterwarnings( From 9563a6cf37533191aa0ec139fce73839ee4a9836 Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Tue, 26 Nov 2024 11:50:28 +0100 Subject: [PATCH 4/5] Drop already installed packages --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 543bced3f3..30f134b958 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,8 +4,7 @@ ENV DEBIAN_FRONTEND noninteractive # build-essential RUN apt-get -qq -y update \ && apt-get -qq --no-install-recommends -y install locales \ - ca-certificates postgresql-client libpq-dev curl jq git \ - libxml2-dev libxslt1-dev python3-dev \ + postgresql-client jq python3-dev \ && apt-get -qq -y autoremove \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ From 492157acfb96b8bc615065dae9d59e2eb69cd479 Mon Sep 17 00:00:00 2001 From: Christian Stefanescu Date: Tue, 26 Nov 2024 15:30:48 +0100 Subject: [PATCH 5/5] Add pyicu and an appropriate test Co-authored-by: Till Prochaska <1512805+tillprochaska@users.noreply.github.com> --- aleph/tests/test_entities_api.py | 19 ++++++++++++++++++- requirements.txt | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/aleph/tests/test_entities_api.py b/aleph/tests/test_entities_api.py index c081da3dfa..1297169a8d 100644 --- a/aleph/tests/test_entities_api.py +++ b/aleph/tests/test_entities_api.py @@ -716,4 +716,21 @@ def test_expand(self): prop = res["property"] assert prop == "holder", prop assert res["count"] == 1, pformat(res) - assert len(res["entities"]) == 1, pformat(res) + assert len(res["entities"]) == 1, pformat(res) + + def test_view_transliterate(self): + _, headers = self.login(is_admin=True) + + data = { + "id": "1", + "schema": "Person", + "properties": { + "name": ["İlham Əliyev"], + }, + } + entity = self.create_entity(data, self.col) + index_entity(entity) + + res = self.client.get(f"/api/2/entities/{entity.id}", headers=headers) + assert res.json["properties"]["name"][0] == "İlham Əliyev" + assert res.json["latinized"]["İlham Əliyev"] == "Ilham Aliyev" diff --git a/requirements.txt b/requirements.txt index 54eb8de6b2..6848a2ac23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,6 +40,7 @@ sentry-sdk[flask]==2.10.0 prometheus-client==0.17.1 lxml==5.3.0 lxml_html_clean==0.3.1 +pyicu==2.14 # Testing dependencies