Skip to content

Commit

Permalink
Merge pull request #45 from plone/maurits-sustainable-exports-update-…
Browse files Browse the repository at this point in the history
…dates-at-end-take-two

Import: update dates again at the end. Take 2.
  • Loading branch information
mauritsvanrees authored Jan 23, 2025
2 parents d04511e + 1c35a49 commit 0c08c81
Show file tree
Hide file tree
Showing 9 changed files with 282 additions and 2 deletions.
1 change: 1 addition & 0 deletions news/39.bugfix.2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Import: update modification dates again at the end. The original modification dates may have changed. @mauritsvanrees
1 change: 1 addition & 0 deletions src/plone/exportimport/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"plone.importer.translations",
"plone.importer.discussions",
"plone.importer.portlets",
"plone.importer.final",
]

ImporterMapping = Dict[str, BaseImporter]
Expand Down
22 changes: 22 additions & 0 deletions src/plone/exportimport/importers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,25 @@ def import_data(
self.obj_hooks = self.obj_hooks or obj_hooks or []
report = self.do_import()
return report


class BaseDatalessImporter(BaseImporter):
"""Base for an import that does not read json data files.
Generally this would iterate over all existing content objects and do
some updates.
"""

def import_data(
self,
base_path: Path,
data_hooks: List[Callable] = None,
pre_deserialize_hooks: List[Callable] = None,
obj_hooks: List[Callable] = None,
) -> str:
"""Import data into a Plone site.
Note that we ignore the json data related arguments.
"""
self.obj_hooks = self.obj_hooks or obj_hooks or []
return self.do_import()
6 changes: 6 additions & 0 deletions src/plone/exportimport/importers/configure.zcml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
for="plone.base.interfaces.siteroot.IPloneSiteRoot"
name="plone.importer.relations"
/>
<adapter
factory=".final.FinalImporter"
provides="plone.exportimport.interfaces.INamedImporter"
for="plone.base.interfaces.siteroot.IPloneSiteRoot"
name="plone.importer.final"
/>
<configure zcml:condition="installed plone.app.multilingual">
<adapter
factory=".translations.TranslationsImporter"
Expand Down
44 changes: 44 additions & 0 deletions src/plone/exportimport/importers/final.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from .base import BaseDatalessImporter
from plone import api
from plone.exportimport import logger
from plone.exportimport.interfaces import IExportImportRequestMarker
from plone.exportimport.utils import content as content_utils
from plone.exportimport.utils import request_provides
from Products.CMFCore.indexing import processQueue

import transaction


class FinalImporter(BaseDatalessImporter):
# name: str = ""

def do_import(self) -> str:
count = 0

with request_provides(self.request, IExportImportRequestMarker):
catalog = api.portal.get_tool("portal_catalog")
# getAllBrains does not yet process the indexing queue before it starts.
# It probably should. Let's call it explicitly here.
processQueue()
for brain in catalog.getAllBrains():
obj = brain.getObject()
logger_prefix = f"- {brain.getPath()}:"
for updater in content_utils.final_updaters():
logger.debug(f"{logger_prefix} Running {updater.name} for {obj}")
updater.func(obj)

# Apply obj hooks
for func in self.obj_hooks:
logger.debug(
f"{logger_prefix} Running object hook {func.__name__}"
)
obj = func(obj)

count += 1
if not count % 100:
transaction.savepoint()
logger.info(f"Handled {count} items...")

report = f"{self.__class__.__name__}: Updated {count} objects"
logger.info(report)
return report
1 change: 1 addition & 0 deletions src/plone/exportimport/utils/content/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .export_helpers import fixers # noQA
from .export_helpers import get_serializer # noQA
from .export_helpers import metadata_helpers # noQA
from .import_helpers import final_updaters # noQA
from .import_helpers import get_deserializer # noQA
from .import_helpers import get_obj_instance # noQA
from .import_helpers import metadata_setters # noQA
Expand Down
50 changes: 48 additions & 2 deletions src/plone/exportimport/utils/content/import_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from plone import api
from plone.base.interfaces.constrains import ENABLED
from plone.base.interfaces.constrains import ISelectableConstrainTypes
from plone.base.utils import base_hasattr
from plone.base.utils import unrestricted_construct_instance
from plone.dexterity.content import DexterityContent
from plone.exportimport import logger
Expand Down Expand Up @@ -168,7 +169,13 @@ def update_workflow_history(item: dict, obj: DexterityContent) -> DexterityConte


def update_dates(item: dict, obj: DexterityContent) -> DexterityContent:
"""Update creation and modification dates on the object."""
"""Update creation and modification dates on the object.
We call this last in our content updaters, because they have been changed.
The modification date may change again due to importers that run after us.
So we save it on a temporary property for handling in the final importer.
"""
created = item.get("created", item.get("creation_date", None))
modified = item.get("modified", item.get("modification_date", None))
idxs = []
Expand All @@ -179,9 +186,32 @@ def update_dates(item: dict, obj: DexterityContent) -> DexterityContent:
value = parse_date(value)
if not value:
continue
if attr == "modification_date":
# Make sure we never change an acquired attribute.
aq_base(obj).modification_date_migrated = value
old_value = getattr(obj, attr, None)
if old_value == value:
continue
setattr(obj, attr, value)
idxs.append(idx)
obj.reindexObject(idxs=idxs)
if idxs:
obj.reindexObject(idxs=idxs)
return obj


def reset_modification_date(obj: DexterityContent) -> DexterityContent:
"""Update modification date if it was saved on the object.
The modification date of the object may have gotten changed in various
importers. The content import has saved the original modification date
on the object. Now restore it.
"""
if base_hasattr(obj, "modification_date_migrated"):
modified = obj.modification_date_migrated
if modified and modified != obj.modification_date:
obj.modification_date = modified
del obj.modification_date_migrated
obj.reindexObject(idxs=["modified"])
return obj


Expand Down Expand Up @@ -334,3 +364,19 @@ def recatalog_uids(uids: List[str], idxs: List[str]):
if not obj:
continue
obj.reindexObject(idxs)


def final_updaters() -> List[types.ExportImportHelper]:
updaters = []
funcs = [
reset_modification_date,
]
for func in funcs:
updaters.append(
types.ExportImportHelper(
func=func,
name=func.__name__,
description=func.__doc__,
)
)
return updaters
75 changes: 75 additions & 0 deletions tests/importers/test_importers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from DateTime import DateTime
from plone import api
from plone.exportimport.importers import get_importer
from plone.exportimport.importers import Importer
from Products.CMFCore.indexing import processQueue

import pytest

Expand Down Expand Up @@ -27,6 +30,7 @@ def test_all_importers(self):
"plone.importer.relations",
"plone.importer.translations",
"plone.importer.discussions",
"plone.importer.final",
],
)
def test_importer_present(self, importer_name: str):
Expand All @@ -39,6 +43,7 @@ def test_importer_present(self, importer_name: str):
"ContentImporter: Imported 9 objects",
"PrincipalsImporter: Imported 2 groups and 1 members",
"RedirectsImporter: Imported 0 redirects",
"FinalImporter: Updated 9 objects",
],
)
def test_import_site(self, base_import_path, msg: str):
Expand All @@ -47,3 +52,73 @@ def test_import_site(self, base_import_path, msg: str):
# One entry per importer
assert len(results) >= 6
assert msg in results

@pytest.mark.parametrize(
"uid,method_name,value",
[
[
"35661c9bb5be42c68f665aa1ed291418",
"created",
"2024-02-13T18:16:04+00:00",
],
[
"35661c9bb5be42c68f665aa1ed291418",
"modified",
"2024-02-13T18:16:04+00:00",
],
[
"e7359727ace64e609b79c4091c38822a",
"created",
"2024-02-13T18:15:56+00:00",
],
# The next one would fail without the final importer.
[
"e7359727ace64e609b79c4091c38822a",
"modified",
"2024-02-13T20:51:06+00:00",
],
],
)
def test_date_is_set(self, base_import_path, uid, method_name, value):
from plone.exportimport.utils.content import object_from_uid

self.importer.import_site(base_import_path)
content = object_from_uid(uid)
assert getattr(content, method_name)() == DateTime(value)

def test_final_contents(self, base_import_path):
self.importer.import_site(base_import_path)

# First test that some specific contents were created.
image = api.content.get(path="/bar/2025.png")
assert image is not None
assert image.portal_type == "Image"
assert image.title == "2025 logo"

page = api.content.get(path="/foo/another-page")
assert page is not None
assert page.portal_type == "Document"
assert page.title == "Another page"

# Now do general checks on all contents.
catalog = api.portal.get_tool("portal_catalog")

# getAllBrains does not yet process the indexing queue before it starts.
# It probably should. We call it explicitly here, otherwise the tests fail:
# Some brains will have a modification date of today, even though if you get
# the object, its actual modification date has been reset to 2024.
processQueue()
brains = list(catalog.getAllBrains())
assert len(brains) >= 9
for brain in brains:
if brain.portal_type == "Plone Site":
continue
# All created and modified dates should be in the previous year
# (or earlier).
assert not brain.created.isCurrentYear()
assert not brain.modified.isCurrentYear()
# Given what we see with getAllBrains, let's check the actual content
# items as well.
obj = brain.getObject()
assert not obj.created().isCurrentYear()
assert not obj.modified().isCurrentYear()
84 changes: 84 additions & 0 deletions tests/importers/test_importers_final.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from DateTime import DateTime
from plone.exportimport import interfaces
from plone.exportimport.importers import content
from plone.exportimport.importers import final
from zope.component import getAdapter

import pytest


class TestImporterContent:
@pytest.fixture(autouse=True)
def _init(self, portal_multilingual_content):
self.portal = portal_multilingual_content
self.importer = final.FinalImporter(self.portal)

def test_adapter_is_registered(self):
adapter = getAdapter(
self.portal, interfaces.INamedImporter, "plone.importer.final"
)
assert isinstance(adapter, final.FinalImporter)

def test_output_is_str(self, multilingual_import_path):
result = self.importer.import_data(base_path=multilingual_import_path)
assert isinstance(result, str)
assert result == "FinalImporter: Updated 19 objects"

def test_empty_import_path(self, empty_import_path):
# The import path is ignored by this importer.
result = self.importer.import_data(base_path=empty_import_path)
assert isinstance(result, str)
assert result == "FinalImporter: Updated 19 objects"


class TestImporterDates:
@pytest.fixture(autouse=True)
def _init(self, portal, base_import_path, load_json):
self.portal = portal
content_importer = content.ContentImporter(self.portal)
content_importer.import_data(base_path=base_import_path)
importer = final.FinalImporter(portal)
importer.import_data(base_path=base_import_path)

@pytest.mark.parametrize(
"uid,method_name,value",
[
[
"35661c9bb5be42c68f665aa1ed291418",
"created",
"2024-02-13T18:16:04+00:00",
],
[
"35661c9bb5be42c68f665aa1ed291418",
"modified",
"2024-02-13T18:16:04+00:00",
],
[
"3e0dd7c4b2714eafa1d6fc6a1493f953",
"created",
"2024-03-19T19:02:18+00:00",
],
[
"3e0dd7c4b2714eafa1d6fc6a1493f953",
"modified",
"2024-03-19T19:02:18+00:00",
],
[
"e7359727ace64e609b79c4091c38822a",
"created",
"2024-02-13T18:15:56+00:00",
],
# Note: this would fail without the final importer, because this
# is a folder that gets modified later when a document is added.
[
"e7359727ace64e609b79c4091c38822a",
"modified",
"2024-02-13T20:51:06+00:00",
],
],
)
def test_date_is_set(self, uid, method_name, value):
from plone.exportimport.utils.content import object_from_uid

content = object_from_uid(uid)
assert getattr(content, method_name)() == DateTime(value)

0 comments on commit 0c08c81

Please sign in to comment.