From a5051ff08e1ab1b75cec369b6a1a2cf464d048e1 Mon Sep 17 00:00:00 2001 From: Matthias Dellweg Date: Fri, 25 Nov 2022 16:08:28 +0100 Subject: [PATCH] Reimplemented labels as HStore field This solves the imminent performance issues. fixes #3400 --- CHANGES/3400.bugfix | 1 + CHANGES/plugin_api/3400.removal | 2 + pulpcore/app/migrations/0097_pulp_labels.py | 92 +++++++++++++++++++ pulpcore/app/models/__init__.py | 2 + pulpcore/app/models/base.py | 79 +++++++++++++++- pulpcore/app/serializers/__init__.py | 1 + pulpcore/app/serializers/base.py | 5 + pulpcore/app/serializers/fields.py | 19 ++++ pulpcore/app/serializers/publication.py | 4 +- pulpcore/app/serializers/repository.py | 6 +- pulpcore/app/settings.py | 1 + pulpcore/app/viewsets/custom_filters.py | 57 +++++++++++- pulpcore/app/viewsets/publication.py | 4 +- pulpcore/app/viewsets/repository.py | 6 +- pulpcore/plugin/models/__init__.py | 4 +- pulpcore/tests/unit/models/test_repository.py | 14 +-- .../tests/unit/serializers/test_repository.py | 2 +- 17 files changed, 270 insertions(+), 29 deletions(-) create mode 100644 CHANGES/3400.bugfix create mode 100644 CHANGES/plugin_api/3400.removal create mode 100644 pulpcore/app/migrations/0097_pulp_labels.py diff --git a/CHANGES/3400.bugfix b/CHANGES/3400.bugfix new file mode 100644 index 00000000000..d3b965d2d58 --- /dev/null +++ b/CHANGES/3400.bugfix @@ -0,0 +1 @@ +Fixed label querying performance issues. diff --git a/CHANGES/plugin_api/3400.removal b/CHANGES/plugin_api/3400.removal new file mode 100644 index 00000000000..9a8da109f11 --- /dev/null +++ b/CHANGES/plugin_api/3400.removal @@ -0,0 +1,2 @@ +Deprecated model ``Label`` and serializer field ``LabelField`` and ``LabelSelectFilter`` for +removal in 3.25. diff --git a/pulpcore/app/migrations/0097_pulp_labels.py b/pulpcore/app/migrations/0097_pulp_labels.py new file mode 100644 index 00000000000..a5830ffc2be --- /dev/null +++ b/pulpcore/app/migrations/0097_pulp_labels.py @@ -0,0 +1,92 @@ +# Generated by Django 3.2.16 on 2022-11-28 08:30 + +import django.contrib.postgres.fields.hstore +from django.db import migrations + +import os +from django.contrib.postgres.operations import HStoreExtension +from django.db.models.expressions import OuterRef, RawSQL +from django.apps import apps as global_apps +from pulpcore.app.apps import PulpPluginAppConfig + + +def copy_labels_up(apps, schema_editor): + ContentType = apps.get_model("contenttypes", "ContentType") + Label = apps.get_model("core", "Label") + + labeled_ctypes = [ContentType.objects.get(pk=pk) for pk in Label.objects.values_list("content_type", flat=True).distinct()] + labeled_models_ctypes = [(apps.get_model(ctype.app_label, ctype.model), ctype) for ctype in labeled_ctypes] + for model, ctype in labeled_models_ctypes: + label_subq = Label.objects.filter(content_type=ctype, object_id=OuterRef("pulp_id")).annotate(label_data=RawSQL("hstore(array_agg(key), array_agg(value))", [])).values("label_data") + model.objects.update(pulp_labels=label_subq) + Label.objects.filter(content_type=ctype).delete() + + if Label.objects.count() != 0: + raise RuntimeError("Not all labels could be migrated properly. Please raise an issue and stand by for further investigation.") + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0096_alter_task_logging_cid'), + ('contenttypes', '0002_remove_content_type_name'), + ] + # Depend on all installed plugins containing migrations to be able to use their models. + for plugin_config in global_apps.app_configs.values(): + if isinstance(plugin_config, PulpPluginAppConfig) and os.path.exists(plugin_config.path + "/migrations/0001_initial.py"): + dependencies.append((plugin_config.label, '0001_initial')) + + operations = [ + HStoreExtension(), + migrations.AddField( + model_name='alternatecontentsource', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='basedistribution', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='content', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='contentguard', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='distribution', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='exporter', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='importer', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='publication', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='remote', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.AddField( + model_name='repository', + name='pulp_labels', + field=django.contrib.postgres.fields.hstore.HStoreField(default=dict), + ), + migrations.RunPython(code=copy_labels_up, reverse_code=migrations.RunPython.noop, elidable=True), + ] diff --git a/pulpcore/app/models/__init__.py b/pulpcore/app/models/__init__.py index f1bc11b38af..d320b154592 100644 --- a/pulpcore/app/models/__init__.py +++ b/pulpcore/app/models/__init__.py @@ -4,6 +4,8 @@ from .base import ( # noqa BaseModel, Label, + LabeledBaseModel, + LabeledMasterModel, MasterModel, pulp_uuid, ) diff --git a/pulpcore/app/models/base.py b/pulpcore/app/models/base.py index 653601323e5..1be5d289da7 100644 --- a/pulpcore/app/models/base.py +++ b/pulpcore/app/models/base.py @@ -3,11 +3,14 @@ from django.contrib.contenttypes.fields import GenericRelation, GenericForeignKey from django.contrib.contenttypes.models import ContentType +from django.contrib.postgres.fields import HStoreField from django.db import models from django.db.models import options from django.db.models.base import ModelBase from django_lifecycle import LifecycleModel +from pulpcore.app.loggers import deprecation_logger + def pulp_uuid(): """ @@ -43,6 +46,13 @@ class Label(LifecycleModel): content_object = GenericForeignKey("content_type", "object_id", for_concrete_model=False) + def __init__(self, *args, **kwargs): + deprecation_logger.warning( + "'Label' is deprecated and will be removed in pulpcore==3.25;" + " use an 'HStoreField' named 'pulp_labels' instead." + ) + super().__init__(*args, **kwargs) + class Meta: unique_together = [["content_type", "object_id", "key"]] @@ -59,7 +69,8 @@ class BaseModel(LifecycleModel): pulp_last_updated (models.DateTimeField): Last updated timestamp UTC. Relations: - pulp_labels (GenericRelation): A list of key/value labels. + user_roles (GenericRelation): List of user role associations with this object. + group_roles (GenericRelation): List of group role associations with this object. References: @@ -71,7 +82,6 @@ class BaseModel(LifecycleModel): pulp_id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) pulp_created = models.DateTimeField(auto_now_add=True) pulp_last_updated = models.DateTimeField(auto_now=True, null=True) - pulp_labels = GenericRelation(Label) user_roles = GenericRelation("core.UserRole") group_roles = GenericRelation("core.GroupRole") @@ -90,6 +100,37 @@ def __repr__(self): return str(self) +class LabeledBaseModel(BaseModel): + """ + Base model class for all Pulp models using deprecated labels. + + This class is DEPRECATED and scheduled for removal in 3.25. + + This model inherits from `LifecycleModel` which allows all Pulp models to be used with + `django-lifecycle`. + + Fields: + pulp_created (models.DateTimeField): Created timestamp UTC. + pulp_last_updated (models.DateTimeField): Last updated timestamp UTC. + + Relations: + pulp_labels (GenericRelation): A list of key/value labels. + user_roles (GenericRelation): List of user role associations with this object. + group_roles (GenericRelation): List of group role associations with this object. + + References: + + * https://docs.djangoproject.com/en/3.2/topics/db/models/#automatic-primary-key-fields + * https://rsinger86.github.io/django-lifecycle/ + + """ + + pulp_labels = GenericRelation("core.Labels") + + class Meta: + abstract = True + + class MasterModelMeta(ModelBase): def __new__(cls, name, bases, attrs, **kwargs): """Override __new__ to set the default_related_name.""" @@ -147,6 +188,7 @@ class MasterModel(BaseModel, metaclass=MasterModelMeta): # This field must have a value when models are saved, and defaults to the value of # the TYPE attribute on the Model being saved (seen above). pulp_type = models.TextField(null=False, default=None, db_index=True) + pulp_labels = HStoreField(default=dict) class Meta: abstract = True @@ -247,3 +289,36 @@ def master_model(options): options.Options.master_model = property(master_model) + + +class LabeledMasterModel(MasterModel): + """ + Base model for the "Master" model in a "Master-Detail" relationship using deprecated labels. + + This class is DEPRECATED and scheduled for removal in 3.25. + + Provides methods for casting down to detail types, back up to the master type, + as well as a model field for tracking the type. + + Attributes: + + TYPE (str): Default constant value saved into the ``pulp_type`` + field of Model instances + + Fields: + + pulp_type: The user-facing string identifying the detail type of this model + + Warning: + Subclasses of this class rely on there being no other parent/child Model + relationships than the Master/Detail relationship. All subclasses must use + only abstract Model base classes for MasterModel to behave properly. + Specifically, OneToOneField relationships must not be used in any MasterModel + subclass. + + """ + + pulp_labels = GenericRelation("core.Labels") + + class Meta: + abstract = True diff --git a/pulpcore/app/serializers/__init__.py b/pulpcore/app/serializers/__init__.py index 6db2898904c..bbb7edae1d9 100644 --- a/pulpcore/app/serializers/__init__.py +++ b/pulpcore/app/serializers/__init__.py @@ -34,6 +34,7 @@ RepositoryVersionIdentityField, relative_path_validator, TaskGroupStatusCountField, + pulp_labels_validator, ) from .access_policy import AccessPolicySerializer # noqa from .acs import ( # noqa diff --git a/pulpcore/app/serializers/base.py b/pulpcore/app/serializers/base.py index 94123922d96..b14e07914e6 100644 --- a/pulpcore/app/serializers/base.py +++ b/pulpcore/app/serializers/base.py @@ -21,6 +21,7 @@ get_viewset_for_model, get_request_without_query_params, ) +from pulpcore.app.serializers import LabelsField log = getLogger(__name__) @@ -180,6 +181,8 @@ def create(self, validated_data): Returns: instance: The created of resource """ + if not isinstance(self.fields.get("pulp_labels"), LabelsField): + return super().create(validated_data) labels = validated_data.pop("pulp_labels", {}) with transaction.atomic(): instance = super().create(validated_data) @@ -196,6 +199,8 @@ def update(self, instance, validated_data): Returns: instance: The updated instance of resource """ + if not isinstance(self.fields.get("pulp_labels"), LabelsField): + return super().update(validated_data) labels = validated_data.pop("pulp_labels", None) with transaction.atomic(): instance = super().update(instance, validated_data) diff --git a/pulpcore/app/serializers/fields.py b/pulpcore/app/serializers/fields.py index 96c9c2f0f5a..902c75d47d1 100644 --- a/pulpcore/app/serializers/fields.py +++ b/pulpcore/app/serializers/fields.py @@ -10,6 +10,7 @@ from pulpcore.app import models from pulpcore.app.serializers import DetailIdentityField, IdentityField, RelatedField +from pulpcore.app.loggers import deprecation_logger def relative_path_validator(relative_path): @@ -381,6 +382,13 @@ def get_attribute(self, instance): class LabelsField(serializers.JSONField): """A serializer field for pulp_labels.""" + def __init__(self, *args, **kwargs): + deprecation_logger.warning( + "'LabelsField' is deprecated and will be removed in pulpcore==3.25;" + " use an 'HStoreField' named 'pulp_labels' instead." + ) + super().__init__(*args, **kwargs) + def to_representation(self, labels): """ Serializes list of labels to a dict. @@ -421,3 +429,14 @@ def to_internal_value(self, data): ) return data + + +def pulp_labels_validator(value): + """A validator designed for the pulp_labels field.""" + for key, value in value.items(): + if not re.match(r"^[\w ]+$", key): + raise serializers.ValidationError(_("Key '{}' contains non-alphanumerics.").format(key)) + if re.search(r"[,()]", value): + raise serializers.ValidationError( + _("Key '{}' contains value with comma or parenthesis.").format(key) + ) diff --git a/pulpcore/app/serializers/publication.py b/pulpcore/app/serializers/publication.py index 9851af0153d..5505d5f3352 100644 --- a/pulpcore/app/serializers/publication.py +++ b/pulpcore/app/serializers/publication.py @@ -11,9 +11,9 @@ DetailIdentityField, DetailRelatedField, GetOrCreateSerializerMixin, - LabelsField, ModelSerializer, RepositoryVersionRelatedField, + pulp_labels_validator, validate_unknown_fields, ) from pulpcore.app.serializers.user import GroupUserSerializer, GroupSerializer @@ -159,7 +159,7 @@ class DistributionSerializer(ModelSerializer): """ pulp_href = DetailIdentityField(view_name_pattern=r"distributions(-.*/.*)-detail") - pulp_labels = LabelsField(required=False) + pulp_labels = serializers.HStoreField(required=False, validators=[pulp_labels_validator]) base_path = serializers.CharField( help_text=_( diff --git a/pulpcore/app/serializers/repository.py b/pulpcore/app/serializers/repository.py index 8c298d41c62..c20e92f9bdb 100644 --- a/pulpcore/app/serializers/repository.py +++ b/pulpcore/app/serializers/repository.py @@ -10,7 +10,6 @@ from pulpcore.app.serializers import ( DetailIdentityField, DetailRelatedField, - LabelsField, LatestVersionField, ModelSerializer, RepositoryVersionIdentityField, @@ -18,12 +17,13 @@ RepositoryVersionsIdentityFromRepositoryField, ValidateFieldsMixin, HiddenFieldsMixin, + pulp_labels_validator, ) class RepositorySerializer(ModelSerializer): pulp_href = DetailIdentityField(view_name_pattern=r"repositories(-.*/.*)-detail") - pulp_labels = LabelsField(required=False) + pulp_labels = serializers.HStoreField(required=False, validators=[pulp_labels_validator]) versions_href = RepositoryVersionsIdentityFromRepositoryField() latest_version_href = LatestVersionField() name = serializers.CharField( @@ -78,7 +78,7 @@ class RemoteSerializer(ModelSerializer, HiddenFieldsMixin): """ pulp_href = DetailIdentityField(view_name_pattern=r"remotes(-.*/.*)-detail") - pulp_labels = LabelsField(required=False) + pulp_labels = serializers.HStoreField(required=False, validators=[pulp_labels_validator]) name = serializers.CharField( help_text=_("A unique name for this remote."), validators=[UniqueValidator(queryset=models.Remote.objects.all())], diff --git a/pulpcore/app/settings.py b/pulpcore/app/settings.py index fc92c24563d..e3f21e5fb96 100644 --- a/pulpcore/app/settings.py +++ b/pulpcore/app/settings.py @@ -70,6 +70,7 @@ "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", + "django.contrib.postgres", "import_export", # third-party "django_filters", diff --git a/pulpcore/app/viewsets/custom_filters.py b/pulpcore/app/viewsets/custom_filters.py index 6658bf080a7..a2f07bd657c 100644 --- a/pulpcore/app/viewsets/custom_filters.py +++ b/pulpcore/app/viewsets/custom_filters.py @@ -16,6 +16,7 @@ from pulpcore.app.models import ContentArtifact, Label, RepositoryVersion, Publication from pulpcore.app.viewsets import NamedModelViewSet +from pulpcore.app.loggers import deprecation_logger class ReservedResourcesFilter(Filter): @@ -273,13 +274,67 @@ class CharInFilter(BaseInFilter, CharFilter): pass -class LabelSelectFilter(Filter): +class LabelFilter(Filter): """Filter to get resources that match a label filter string.""" def __init__(self, *args, **kwargs): kwargs.setdefault("help_text", _("Filter labels by search string")) super().__init__(*args, **kwargs) + def filter(self, qs, value): + """ + Args: + qs (django.db.models.query.QuerySet): The Model queryset + value (string): label search query + + Returns: + Queryset of the Models filtered by label(s) + + Raises: + rest_framework.exceptions.ValidationError: on invalid search string + """ + if value is None: + # user didn't supply a value + return qs + + for term in value.split(","): + match = re.match(r"(!?[\w\s]+)(=|!=|~)?(.*)?", term) + if not match: + raise DRFValidationError(_("Invalid search term: '{}'.").format(term)) + key, op, val = match.groups() + + if key.startswith("!") and op: + raise DRFValidationError(_("Cannot use an operator with '{}'.").format(key)) + + if op == "=": + qs = qs.filter(**{f"pulp_labels__{key}": val}) + elif op == "!=": + qs = qs.filter(pulp_labels__has_key=key).exclude(**{f"pulp_labels__{key}": val}) + elif op == "~": + qs = qs.filter(**{f"pulp_labels__{key}__icontains": val}) + else: + # 'foo', '!foo' + if key.startswith("!"): + qs = qs.exclude(pulp_labels__has_key=key[1:]) + else: + qs = qs.filter(pulp_labels__has_key=key) + + return qs + + +class LabelSelectFilter(Filter): + """Filter to get resources that match a label filter string. DEPRECATED.""" + + def __init__(self, *args, **kwargs): + deprecation_logger.warning( + "'LabelSelectFilter' is deprecated and will be removed in pulpcore==3.25;" + " use 'LabelFilter' instead." + ) + super().__init__(*args, **kwargs) + + kwargs.setdefault("help_text", _("Filter labels by search string")) + super().__init__(*args, **kwargs) + def filter(self, qs, value): """ Args: diff --git a/pulpcore/app/viewsets/publication.py b/pulpcore/app/viewsets/publication.py index 001b9ef7b05..2e23bca7a08 100644 --- a/pulpcore/app/viewsets/publication.py +++ b/pulpcore/app/viewsets/publication.py @@ -32,7 +32,7 @@ from pulpcore.app.viewsets.base import DATETIME_FILTER_OPTIONS, NAME_FILTER_OPTIONS from pulpcore.app.viewsets.custom_filters import ( DistributionWithContentFilter, - LabelSelectFilter, + LabelFilter, RepositoryVersionFilter, ) @@ -321,7 +321,7 @@ class DistributionFilter(BaseFilterSet): # /?name__in=foo,bar # /?base_path__contains=foo # /?base_path__icontains=foo - pulp_label_select = LabelSelectFilter() + pulp_label_select = LabelFilter() with_content = DistributionWithContentFilter() class Meta: diff --git a/pulpcore/app/viewsets/repository.py b/pulpcore/app/viewsets/repository.py index 7b1cf94d654..11afe8343ef 100644 --- a/pulpcore/app/viewsets/repository.py +++ b/pulpcore/app/viewsets/repository.py @@ -28,13 +28,13 @@ NamedModelViewSet, ) from pulpcore.app.viewsets.base import DATETIME_FILTER_OPTIONS, NAME_FILTER_OPTIONS -from pulpcore.app.viewsets.custom_filters import LabelSelectFilter +from pulpcore.app.viewsets.custom_filters import LabelFilter from pulpcore.tasking.tasks import dispatch from pulpcore.filters import HyperlinkRelatedFilter class RepositoryFilter(BaseFilterSet): - pulp_label_select = LabelSelectFilter() + pulp_label_select = LabelFilter() remote = HyperlinkRelatedFilter(allow_null=True) class Meta: @@ -220,7 +220,7 @@ class RemoteFilter(BaseFilterSet): - extend `fields` with specific ones """ - pulp_label_select = LabelSelectFilter() + pulp_label_select = LabelFilter() class Meta: model = Remote diff --git a/pulpcore/plugin/models/__init__.py b/pulpcore/plugin/models/__init__.py index c2bb5297cce..7ebd5fa5b84 100644 --- a/pulpcore/plugin/models/__init__.py +++ b/pulpcore/plugin/models/__init__.py @@ -8,7 +8,7 @@ AutoAddObjPermsMixin, Artifact, AsciiArmoredDetachedSigningService, - BaseModel, + LabeledBaseModel as BaseModel, Content, ContentArtifact, ContentManager, @@ -24,7 +24,7 @@ Importer, FilesystemExporter, Label, - MasterModel, + LabeledMasterModel as MasterModel, ProgressReport, Publication, PublishedArtifact, diff --git a/pulpcore/tests/unit/models/test_repository.py b/pulpcore/tests/unit/models/test_repository.py index 47300520551..de80c41e4c9 100644 --- a/pulpcore/tests/unit/models/test_repository.py +++ b/pulpcore/tests/unit/models/test_repository.py @@ -1,8 +1,7 @@ from itertools import compress -from uuid import uuid4 from django.test import TestCase -from pulpcore.plugin.models import Content, Label, Repository, RepositoryVersion +from pulpcore.plugin.models import Content, Repository, RepositoryVersion class RepositoryVersionTestCase(TestCase): @@ -290,14 +289,3 @@ def test_next_version_with_multiple_versions(self): self.assertEqual( self.repository.latest_version().number, 1, self.repository.latest_version().number ) - - def test_label_cascade(self): - """Check that a repo's labels are deleted when it's deleted.""" - repo = Repository.objects.create(name=uuid4()) - repo.CONTENT_TYPES = [Content] - repo.save() - - label = repo.pulp_labels.create(key="brown", value="radagast") - self.assertTrue(Label.objects.filter(pk=label.pk).exists()) - repo.delete() - self.assertFalse(Label.objects.filter(pk=label.pk).exists()) diff --git a/pulpcore/tests/unit/serializers/test_repository.py b/pulpcore/tests/unit/serializers/test_repository.py index f661c9bb3fb..6a63cdf998e 100644 --- a/pulpcore/tests/unit/serializers/test_repository.py +++ b/pulpcore/tests/unit/serializers/test_repository.py @@ -11,7 +11,7 @@ class TestRemoteSerializer(TestCase): - minimal_data = {"name": "test", "url": "http://whatever"} + minimal_data = {"name": "test", "url": "http://whatever", "pulp_labels": {}} def test_validate_proxy_creds_update(self): remote = SimpleNamespace(