Skip to content

Commit

Permalink
Reimplemented labels as HStore field
Browse files Browse the repository at this point in the history
This solves the imminent performance issues.

fixes #3400
  • Loading branch information
mdellweg committed Nov 28, 2022
1 parent ff43262 commit 796a5fb
Show file tree
Hide file tree
Showing 14 changed files with 183 additions and 80 deletions.
1 change: 1 addition & 0 deletions CHANGES/3400.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed label querying performance issues.
2 changes: 2 additions & 0 deletions CHANGES/plugin_api/3400.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Deprecated model ``Label`` and serializer field ``LabelField`` and ``LabelSelectFilter`` for
removal in 3.25.
92 changes: 92 additions & 0 deletions pulpcore/app/migrations/0097_pulp_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Generated by Django 3.2.16 on 2022-11-28 08:30

import django.contrib.postgres.fields.hstore
from django.db import migrations

import os
from django.contrib.postgres.operations import HStoreExtension
from django.db.models.expressions import OuterRef, RawSQL
from django.apps import apps as global_apps
from pulpcore.app.apps import PulpPluginAppConfig


def copy_labels_up(apps, schema_editor):
ContentType = apps.get_model("contenttypes", "ContentType")
Label = apps.get_model("core", "Label")

labeled_ctypes = [ContentType.objects.get(pk=pk) for pk in Label.objects.values_list("content_type", flat=True).distinct()]
labeled_models_ctypes = [(apps.get_model(ctype.app_label, ctype.model), ctype) for ctype in labeled_ctypes]
for model, ctype in labeled_models_ctypes:
label_subq = Label.objects.filter(content_type=ctype, object_id=OuterRef("pulp_id")).annotate(label_data=RawSQL("hstore(array_agg(key), array_agg(value))", [])).values("label_data")
model.objects.update(pulp_labels=label_subq)
Label.objects.filter(content_type=ctype).delete()

if Label.objects.count() != 0:
raise RuntimeError("Not all labels could be migrated properly. Please raise an issue and stand by for further investigation.")


class Migration(migrations.Migration):

dependencies = [
('core', '0096_alter_task_logging_cid'),
('contenttypes', '0002_remove_content_type_name'),
]
# Depend on all installed plugins containing migrations to be able to use their models.
for plugin_config in global_apps.app_configs.values():
if isinstance(plugin_config, PulpPluginAppConfig) and os.path.exists(plugin_config.path + "/migrations/0001_initial.py"):
dependencies.append((plugin_config.label, '0001_initial'))

operations = [
HStoreExtension(),
migrations.AddField(
model_name='alternatecontentsource',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='basedistribution',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='content',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='contentguard',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='distribution',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='exporter',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='importer',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='publication',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='remote',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.AddField(
model_name='repository',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
migrations.RunPython(code=copy_labels_up, reverse_code=migrations.RunPython.noop, elidable=True),
]
12 changes: 11 additions & 1 deletion pulpcore/app/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@

from django.contrib.contenttypes.fields import GenericRelation, GenericForeignKey
from django.contrib.contenttypes.models import ContentType
from django.contrib.postgres.fields import HStoreField
from django.db import models
from django.db.models import options
from django.db.models.base import ModelBase
from django_lifecycle import LifecycleModel

from pulpcore.app.loggers import deprecation_logger


def pulp_uuid():
"""
Expand Down Expand Up @@ -43,6 +46,13 @@ class Label(LifecycleModel):

content_object = GenericForeignKey("content_type", "object_id", for_concrete_model=False)

def __init__(self, *args, **kwargs):
deprecation_logger.warning(
"'Label' is deprecated and will be removed in pulpcore==3.25;"
" use an 'HStoreField' named 'pulp_labels' instead."
)
super().__init__(*args, **kwargs)

class Meta:
unique_together = [["content_type", "object_id", "key"]]

Expand Down Expand Up @@ -71,7 +81,6 @@ class BaseModel(LifecycleModel):
pulp_id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
pulp_created = models.DateTimeField(auto_now_add=True)
pulp_last_updated = models.DateTimeField(auto_now=True, null=True)
pulp_labels = GenericRelation(Label)
user_roles = GenericRelation("core.UserRole")
group_roles = GenericRelation("core.GroupRole")

Expand Down Expand Up @@ -147,6 +156,7 @@ class MasterModel(BaseModel, metaclass=MasterModelMeta):
# This field must have a value when models are saved, and defaults to the value of
# the TYPE attribute on the Model being saved (seen above).
pulp_type = models.TextField(null=False, default=None, db_index=True)
pulp_labels = HStoreField(default=dict)

class Meta:
abstract = True
Expand Down
56 changes: 2 additions & 54 deletions pulpcore/app/serializers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@

from django.core.validators import URLValidator
from django.core.exceptions import ObjectDoesNotExist
from django.db import IntegrityError, transaction
from django.db import IntegrityError
from drf_queryfields.mixins import QueryFieldsMixin
from rest_framework import serializers
from rest_framework_nested.relations import (
NestedHyperlinkedIdentityField,
NestedHyperlinkedRelatedField,
)

from pulpcore.app.models import Label, Task, TaskGroup
from pulpcore.app.models import Task, TaskGroup
from pulpcore.app.util import (
get_view_name_for_model,
get_viewset_for_model,
Expand Down Expand Up @@ -151,58 +151,6 @@ def __init_subclass__(cls, **kwargs):
except AttributeError:
pass

def _update_labels(self, instance, labels):
"""
Update the labels for a Model instance.
Args:
instance (pulpcore.app.models.BaseModel): instance with labels to update
labels (list): labels to set for the instance
"""
instance.pulp_labels.exclude(key__in=labels.keys()).delete()

for key, value in labels.items():
label = instance.pulp_labels.filter(key=key).first()
try:
label = instance.pulp_labels.get(key=key)
if label.value != value:
instance.pulp_labels.filter(key=key).update(value=value)
except Label.DoesNotExist:
instance.pulp_labels.create(key=key, value=value)

def create(self, validated_data):
"""
Created the resource from validated_data.
Args:
validated_data (dict): Validated data to create instance
Returns:
instance: The created of resource
"""
labels = validated_data.pop("pulp_labels", {})
with transaction.atomic():
instance = super().create(validated_data)
self._update_labels(instance, labels)
return instance

def update(self, instance, validated_data):
"""
Update the resource from validated_data.
Args:
validated_data (dict): Validated data to update instance
Returns:
instance: The updated instance of resource
"""
labels = validated_data.pop("pulp_labels", None)
with transaction.atomic():
instance = super().update(instance, validated_data)
if labels is not None:
self._update_labels(instance, labels)
return instance


class _MatchingRegexViewName(object):
"""This is a helper class to help defining object matching rules for master-detail.
Expand Down
8 changes: 8 additions & 0 deletions pulpcore/app/serializers/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from pulpcore.app import models
from pulpcore.app.serializers import DetailIdentityField, IdentityField, RelatedField
from pulpcore.app.loggers import deprecation_logger


def relative_path_validator(relative_path):
Expand Down Expand Up @@ -381,6 +382,13 @@ def get_attribute(self, instance):
class LabelsField(serializers.JSONField):
"""A serializer field for pulp_labels."""

def __init__(self, *args, **kwargs):
deprecation_logger.warning(
"'LabelsField' is deprecated and will be removed in pulpcore==3.25;"
" use an 'HStoreField' named 'pulp_labels' instead."
)
super().__init__(*args, **kwargs)

def to_representation(self, labels):
"""
Serializes list of labels to a dict.
Expand Down
3 changes: 1 addition & 2 deletions pulpcore/app/serializers/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
DetailIdentityField,
DetailRelatedField,
GetOrCreateSerializerMixin,
LabelsField,
ModelSerializer,
RepositoryVersionRelatedField,
validate_unknown_fields,
Expand Down Expand Up @@ -159,7 +158,7 @@ class DistributionSerializer(ModelSerializer):
"""

pulp_href = DetailIdentityField(view_name_pattern=r"distributions(-.*/.*)-detail")
pulp_labels = LabelsField(required=False)
pulp_labels = serializers.HStoreField()

base_path = serializers.CharField(
help_text=_(
Expand Down
5 changes: 2 additions & 3 deletions pulpcore/app/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from pulpcore.app.serializers import (
DetailIdentityField,
DetailRelatedField,
LabelsField,
LatestVersionField,
ModelSerializer,
RepositoryVersionIdentityField,
Expand All @@ -23,7 +22,7 @@

class RepositorySerializer(ModelSerializer):
pulp_href = DetailIdentityField(view_name_pattern=r"repositories(-.*/.*)-detail")
pulp_labels = LabelsField(required=False)
pulp_labels = serializers.HStoreField()
versions_href = RepositoryVersionsIdentityFromRepositoryField()
latest_version_href = LatestVersionField()
name = serializers.CharField(
Expand Down Expand Up @@ -78,7 +77,7 @@ class RemoteSerializer(ModelSerializer, HiddenFieldsMixin):
"""

pulp_href = DetailIdentityField(view_name_pattern=r"remotes(-.*/.*)-detail")
pulp_labels = LabelsField(required=False)
pulp_labels = serializers.HStoreField()
name = serializers.CharField(
help_text=_("A unique name for this remote."),
validators=[UniqueValidator(queryset=models.Remote.objects.all())],
Expand Down
1 change: 1 addition & 0 deletions pulpcore/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
"django.contrib.postgres",
"import_export",
# third-party
"django_filters",
Expand Down
57 changes: 56 additions & 1 deletion pulpcore/app/viewsets/custom_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from pulpcore.app.models import ContentArtifact, Label, RepositoryVersion, Publication
from pulpcore.app.viewsets import NamedModelViewSet
from pulpcore.app.loggers import deprecation_logger


class ReservedResourcesFilter(Filter):
Expand Down Expand Up @@ -273,13 +274,67 @@ class CharInFilter(BaseInFilter, CharFilter):
pass


class LabelSelectFilter(Filter):
class LabelFilter(Filter):
"""Filter to get resources that match a label filter string."""

def __init__(self, *args, **kwargs):
kwargs.setdefault("help_text", _("Filter labels by search string"))
super().__init__(*args, **kwargs)

def filter(self, qs, value):
"""
Args:
qs (django.db.models.query.QuerySet): The Model queryset
value (string): label search query
Returns:
Queryset of the Models filtered by label(s)
Raises:
rest_framework.exceptions.ValidationError: on invalid search string
"""
if value is None:
# user didn't supply a value
return qs

for term in value.split(","):
match = re.match(r"(!?[\w\s]+)(=|!=|~)?(.*)?", term)
if not match:
raise DRFValidationError(_("Invalid search term: '{}'.").format(term))
key, op, val = match.groups()

if key.startswith("!") and op:
raise DRFValidationError(_("Cannot use an operator with '{}'.").format(key))

if op == "=":
qs = qs.filter(**{f"pulp_labels__{key}": val})
elif op == "!=":
qs = qs.filter(pulp_labels__has_key=key).exclude(**{f"pulp_labels__{key}": val})
elif op == "~":
qs = qs.filter(**{f"pulp_labels__{key}__icontains": val})
else:
# 'foo', '!foo'
if key.startswith("!"):
qs = qs.exclude(pulp_labels__has_key=key[1:])
else:
qs = qs.filter(pulp_labels__has_key=key)

return qs


class LabelSelectFilter(Filter):
"""Filter to get resources that match a label filter string. DEPRECATED."""

def __init__(self, *args, **kwargs):
deprecation_logger.warning(
"'LabelSelectFilter' is deprecated and will be removed in pulpcore==3.25;"
" use 'LabelFilter' instead."
)
super().__init__(*args, **kwargs)

kwargs.setdefault("help_text", _("Filter labels by search string"))
super().__init__(*args, **kwargs)

def filter(self, qs, value):
"""
Args:
Expand Down
4 changes: 2 additions & 2 deletions pulpcore/app/viewsets/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from pulpcore.app.viewsets.base import DATETIME_FILTER_OPTIONS, NAME_FILTER_OPTIONS
from pulpcore.app.viewsets.custom_filters import (
DistributionWithContentFilter,
LabelSelectFilter,
LabelFilter,
RepositoryVersionFilter,
)

Expand Down Expand Up @@ -321,7 +321,7 @@ class DistributionFilter(BaseFilterSet):
# /?name__in=foo,bar
# /?base_path__contains=foo
# /?base_path__icontains=foo
pulp_label_select = LabelSelectFilter()
pulp_label_select = LabelFilter()
with_content = DistributionWithContentFilter()

class Meta:
Expand Down
Loading

0 comments on commit 796a5fb

Please sign in to comment.