Skip to content

Commit

Permalink
Reimplemented labels as HStore field
Browse files Browse the repository at this point in the history
This solves the imminent performance issues.

fixes #3400
  • Loading branch information
mdellweg committed Nov 30, 2022
1 parent ff43262 commit a5051ff
Show file tree
Hide file tree
Showing 17 changed files with 270 additions and 29 deletions.
1 change: 1 addition & 0 deletions CHANGES/3400.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed label querying performance issues.
2 changes: 2 additions & 0 deletions CHANGES/plugin_api/3400.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Deprecated model ``Label`` and serializer field ``LabelField`` and ``LabelSelectFilter`` for
removal in 3.25.
92 changes: 92 additions & 0 deletions pulpcore/app/migrations/0097_pulp_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Generated by Django 3.2.16 on 2022-11-28 08:30

import django.contrib.postgres.fields.hstore
from django.db import migrations

import os
from django.contrib.postgres.operations import HStoreExtension
from django.db.models.expressions import OuterRef, RawSQL
from django.apps import apps as global_apps
from pulpcore.app.apps import PulpPluginAppConfig


def copy_labels_up(apps, schema_editor):
ContentType = apps.get_model("contenttypes", "ContentType")
Label = apps.get_model("core", "Label")

labeled_ctypes = [ContentType.objects.get(pk=pk) for pk in Label.objects.values_list("content_type", flat=True).distinct()]
labeled_models_ctypes = [(apps.get_model(ctype.app_label, ctype.model), ctype) for ctype in labeled_ctypes]
for model, ctype in labeled_models_ctypes:
label_subq = Label.objects.filter(content_type=ctype, object_id=OuterRef("pulp_id")).annotate(label_data=RawSQL("hstore(array_agg(key), array_agg(value))", [])).values("label_data")
model.objects.update(pulp_labels=label_subq)
Label.objects.filter(content_type=ctype).delete()

if Label.objects.count() != 0:
raise RuntimeError("Not all labels could be migrated properly. Please raise an issue and stand by for further investigation.")


class Migration(migrations.Migration):

dependencies = [
('core', '0096_alter_task_logging_cid'),
('contenttypes', '0002_remove_content_type_name'),
]
# Depend on all installed plugins containing migrations to be able to use their models.
for plugin_config in global_apps.app_configs.values():
if isinstance(plugin_config, PulpPluginAppConfig) and os.path.exists(plugin_config.path + "/migrations/0001_initial.py"):
dependencies.append((plugin_config.label, '0001_initial'))

operations = [
HStoreExtension(),
migrations.AddField(
model_name='alternatecontentsource',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='basedistribution',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='content',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='contentguard',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='distribution',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='exporter',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='importer',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='publication',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='remote',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='repository',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.RunPython(code=copy_labels_up, reverse_code=migrations.RunPython.noop, elidable=True),
]
2 changes: 2 additions & 0 deletions pulpcore/app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from .base import ( # noqa
BaseModel,
Label,
LabeledBaseModel,
LabeledMasterModel,
MasterModel,
pulp_uuid,
)
Expand Down
79 changes: 77 additions & 2 deletions pulpcore/app/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@

from django.contrib.contenttypes.fields import GenericRelation, GenericForeignKey
from django.contrib.contenttypes.models import ContentType
from django.contrib.postgres.fields import HStoreField
from django.db import models
from django.db.models import options
from django.db.models.base import ModelBase
from django_lifecycle import LifecycleModel

from pulpcore.app.loggers import deprecation_logger


def pulp_uuid():
"""
Expand Down Expand Up @@ -43,6 +46,13 @@ class Label(LifecycleModel):

content_object = GenericForeignKey("content_type", "object_id", for_concrete_model=False)

def __init__(self, *args, **kwargs):
deprecation_logger.warning(
"'Label' is deprecated and will be removed in pulpcore==3.25;"
" use an 'HStoreField' named 'pulp_labels' instead."
)
super().__init__(*args, **kwargs)

class Meta:
unique_together = [["content_type", "object_id", "key"]]

Expand All @@ -59,7 +69,8 @@ class BaseModel(LifecycleModel):
pulp_last_updated (models.DateTimeField): Last updated timestamp UTC.
Relations:
pulp_labels (GenericRelation): A list of key/value labels.
user_roles (GenericRelation): List of user role associations with this object.
group_roles (GenericRelation): List of group role associations with this object.
References:
Expand All @@ -71,7 +82,6 @@ class BaseModel(LifecycleModel):
pulp_id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
pulp_created = models.DateTimeField(auto_now_add=True)
pulp_last_updated = models.DateTimeField(auto_now=True, null=True)
pulp_labels = GenericRelation(Label)
user_roles = GenericRelation("core.UserRole")
group_roles = GenericRelation("core.GroupRole")

Expand All @@ -90,6 +100,37 @@ def __repr__(self):
return str(self)


class LabeledBaseModel(BaseModel):
"""
Base model class for all Pulp models using deprecated labels.
This class is DEPRECATED and scheduled for removal in 3.25.
This model inherits from `LifecycleModel` which allows all Pulp models to be used with
`django-lifecycle`.
Fields:
pulp_created (models.DateTimeField): Created timestamp UTC.
pulp_last_updated (models.DateTimeField): Last updated timestamp UTC.
Relations:
pulp_labels (GenericRelation): A list of key/value labels.
user_roles (GenericRelation): List of user role associations with this object.
group_roles (GenericRelation): List of group role associations with this object.
References:
* https://docs.djangoproject.com/en/3.2/topics/db/models/#automatic-primary-key-fields
* https://rsinger86.github.io/django-lifecycle/
"""

pulp_labels = GenericRelation("core.Labels")

class Meta:
abstract = True


class MasterModelMeta(ModelBase):
def __new__(cls, name, bases, attrs, **kwargs):
"""Override __new__ to set the default_related_name."""
Expand Down Expand Up @@ -147,6 +188,7 @@ class MasterModel(BaseModel, metaclass=MasterModelMeta):
# This field must have a value when models are saved, and defaults to the value of
# the TYPE attribute on the Model being saved (seen above).
pulp_type = models.TextField(null=False, default=None, db_index=True)
pulp_labels = HStoreField(default=dict)

class Meta:
abstract = True
Expand Down Expand Up @@ -247,3 +289,36 @@ def master_model(options):


options.Options.master_model = property(master_model)


class LabeledMasterModel(MasterModel):
"""
Base model for the "Master" model in a "Master-Detail" relationship using deprecated labels.
This class is DEPRECATED and scheduled for removal in 3.25.
Provides methods for casting down to detail types, back up to the master type,
as well as a model field for tracking the type.
Attributes:
TYPE (str): Default constant value saved into the ``pulp_type``
field of Model instances
Fields:
pulp_type: The user-facing string identifying the detail type of this model
Warning:
Subclasses of this class rely on there being no other parent/child Model
relationships than the Master/Detail relationship. All subclasses must use
only abstract Model base classes for MasterModel to behave properly.
Specifically, OneToOneField relationships must not be used in any MasterModel
subclass.
"""

pulp_labels = GenericRelation("core.Labels")

class Meta:
abstract = True
1 change: 1 addition & 0 deletions pulpcore/app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
RepositoryVersionIdentityField,
relative_path_validator,
TaskGroupStatusCountField,
pulp_labels_validator,
)
from .access_policy import AccessPolicySerializer # noqa
from .acs import ( # noqa
Expand Down
5 changes: 5 additions & 0 deletions pulpcore/app/serializers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
get_viewset_for_model,
get_request_without_query_params,
)
from pulpcore.app.serializers import LabelsField


log = getLogger(__name__)
Expand Down Expand Up @@ -180,6 +181,8 @@ def create(self, validated_data):
Returns:
instance: The created of resource
"""
if not isinstance(self.fields.get("pulp_labels"), LabelsField):
return super().create(validated_data)
labels = validated_data.pop("pulp_labels", {})
with transaction.atomic():
instance = super().create(validated_data)
Expand All @@ -196,6 +199,8 @@ def update(self, instance, validated_data):
Returns:
instance: The updated instance of resource
"""
if not isinstance(self.fields.get("pulp_labels"), LabelsField):
return super().update(validated_data)
labels = validated_data.pop("pulp_labels", None)
with transaction.atomic():
instance = super().update(instance, validated_data)
Expand Down
19 changes: 19 additions & 0 deletions pulpcore/app/serializers/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from pulpcore.app import models
from pulpcore.app.serializers import DetailIdentityField, IdentityField, RelatedField
from pulpcore.app.loggers import deprecation_logger


def relative_path_validator(relative_path):
Expand Down Expand Up @@ -381,6 +382,13 @@ def get_attribute(self, instance):
class LabelsField(serializers.JSONField):
"""A serializer field for pulp_labels."""

def __init__(self, *args, **kwargs):
deprecation_logger.warning(
"'LabelsField' is deprecated and will be removed in pulpcore==3.25;"
" use an 'HStoreField' named 'pulp_labels' instead."
)
super().__init__(*args, **kwargs)

def to_representation(self, labels):
"""
Serializes list of labels to a dict.
Expand Down Expand Up @@ -421,3 +429,14 @@ def to_internal_value(self, data):
)

return data


def pulp_labels_validator(value):
"""A validator designed for the pulp_labels field."""
for key, value in value.items():
if not re.match(r"^[\w ]+$", key):
raise serializers.ValidationError(_("Key '{}' contains non-alphanumerics.").format(key))
if re.search(r"[,()]", value):
raise serializers.ValidationError(
_("Key '{}' contains value with comma or parenthesis.").format(key)
)
4 changes: 2 additions & 2 deletions pulpcore/app/serializers/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
DetailIdentityField,
DetailRelatedField,
GetOrCreateSerializerMixin,
LabelsField,
ModelSerializer,
RepositoryVersionRelatedField,
pulp_labels_validator,
validate_unknown_fields,
)
from pulpcore.app.serializers.user import GroupUserSerializer, GroupSerializer
Expand Down Expand Up @@ -159,7 +159,7 @@ class DistributionSerializer(ModelSerializer):
"""

pulp_href = DetailIdentityField(view_name_pattern=r"distributions(-.*/.*)-detail")
pulp_labels = LabelsField(required=False)
pulp_labels = serializers.HStoreField(required=False, validators=[pulp_labels_validator])

base_path = serializers.CharField(
help_text=_(
Expand Down
6 changes: 3 additions & 3 deletions pulpcore/app/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@
from pulpcore.app.serializers import (
DetailIdentityField,
DetailRelatedField,
LabelsField,
LatestVersionField,
ModelSerializer,
RepositoryVersionIdentityField,
RepositoryVersionRelatedField,
RepositoryVersionsIdentityFromRepositoryField,
ValidateFieldsMixin,
HiddenFieldsMixin,
pulp_labels_validator,
)


class RepositorySerializer(ModelSerializer):
pulp_href = DetailIdentityField(view_name_pattern=r"repositories(-.*/.*)-detail")
pulp_labels = LabelsField(required=False)
pulp_labels = serializers.HStoreField(required=False, validators=[pulp_labels_validator])
versions_href = RepositoryVersionsIdentityFromRepositoryField()
latest_version_href = LatestVersionField()
name = serializers.CharField(
Expand Down Expand Up @@ -78,7 +78,7 @@ class RemoteSerializer(ModelSerializer, HiddenFieldsMixin):
"""

pulp_href = DetailIdentityField(view_name_pattern=r"remotes(-.*/.*)-detail")
pulp_labels = LabelsField(required=False)
pulp_labels = serializers.HStoreField(required=False, validators=[pulp_labels_validator])
name = serializers.CharField(
help_text=_("A unique name for this remote."),
validators=[UniqueValidator(queryset=models.Remote.objects.all())],
Expand Down
1 change: 1 addition & 0 deletions pulpcore/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
"django.contrib.postgres",
"import_export",
# third-party
"django_filters",
Expand Down
Loading

0 comments on commit a5051ff

Please sign in to comment.