diff --git a/app/models/repository.rb b/app/models/repository.rb index e08475827..ed329b615 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -74,6 +74,24 @@ def delete_by!(actor) destroyed end + # Handle a pull event from the registry. + def self.handle_pull_event(event) + registry = Registry.find_from_event(event) + return if registry.nil? + + namespace, repo_name, tag_name = registry.get_namespace_from_event(event) + return if namespace.nil? + + repository = namespace.repositories.find_by(name: repo_name) + return if repository.nil? + + tag = repository.tags.find_by(name: tag_name) + return if tag.nil? + + tag.update_columns(pulled_at: Time.current) + tag + end + # Handle a push event from the registry. def self.handle_push_event(event) registry = Registry.find_from_event(event) diff --git a/app/models/tag.rb b/app/models/tag.rb index c0942734e..f0047dd81 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -16,6 +16,7 @@ # username :string(255) # scanned :integer default(0) # size :integer +# pulled_at :datetime # # Indexes # diff --git a/app/models/webhook.rb b/app/models/webhook.rb index 8d61e9cfc..a1eb286be 100644 --- a/app/models/webhook.rb +++ b/app/models/webhook.rb @@ -87,6 +87,9 @@ def self.handle_push_event(event) hydra.run end + # Pull event is not handled on Webhook yet. + def self.handle_pull_event(event); end + # Handle a delete event from the registry. All enabled webhooks of the provided # namespace are triggered in parallel. def self.handle_delete_event(event) diff --git a/config/config.yml b/config/config.yml index 1c52ffb4d..bf1f2cef8 100644 --- a/config/config.yml +++ b/config/config.yml @@ -63,10 +63,17 @@ delete: garbage_collector: enabled: false - # Remove images older than a specific value. This value is interpreted as - # the number of days. + # Remove images not pulled and older than a specific value. This value is + # interpreted as the number of days. + # + # e.g.: If an image wasn't pulled in the latest 30 days and the image wasn't + # updated somehow in the latest 30 days, the image will be deleted. older_than: 30 + # Keep the latest X images regardless if it's older than the value set in + # `older_than` configuration. + keep_latest: 5 + # Provide a string containing a regular expression. If you provide a # valid regular expression, garbage collector will only be applied into tags # matching a given name. diff --git a/db/migrate/20190115133935_add_pulled_at_to_tags.rb b/db/migrate/20190115133935_add_pulled_at_to_tags.rb new file mode 100644 index 000000000..b8b704901 --- /dev/null +++ b/db/migrate/20190115133935_add_pulled_at_to_tags.rb @@ -0,0 +1,5 @@ +class AddPulledAtToTags < ActiveRecord::Migration[5.2] + def change + add_column :tags, :pulled_at, :datetime, default: nil + end +end diff --git a/db/schema.mysql.rb b/db/schema.mysql.rb index 724c6cc66..d8aa4213c 100644 --- a/db/schema.mysql.rb +++ b/db/schema.mysql.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_01_09_112643) do +ActiveRecord::Schema.define(version: 2019_01_15_133935) do create_table "activities", id: :integer, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t| t.string "trackable_type" @@ -121,6 +121,7 @@ t.string "username" t.integer "scanned", default: 0 t.integer "size" + t.datetime "pulled_at" t.index ["repository_id"], name: "index_tags_on_repository_id" t.index ["user_id"], name: "index_tags_on_user_id" end diff --git a/db/schema.postgresql.rb b/db/schema.postgresql.rb index b6a890eeb..a51369963 100644 --- a/db/schema.postgresql.rb +++ b/db/schema.postgresql.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_01_09_112643) do +ActiveRecord::Schema.define(version: 2019_01_15_133935) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -124,6 +124,7 @@ t.string "username" t.integer "scanned", default: 0 t.integer "size" + t.datetime "pulled_at" t.index ["repository_id"], name: "index_tags_on_repository_id" t.index ["user_id"], name: "index_tags_on_user_id" end diff --git a/lib/portus/background/garbage_collector.rb b/lib/portus/background/garbage_collector.rb index c94b62d15..8251fc684 100644 --- a/lib/portus/background/garbage_collector.rb +++ b/lib/portus/background/garbage_collector.rb @@ -29,6 +29,8 @@ def disable? end def execute! + return if Tag.all.count <= APP_CONFIG["delete"]["garbage_collector"]["keep_latest"].to_i + @tags ||= tags_to_be_collected service = ::Tags::DestroyService.new(User.find_by(username: "portus")) @@ -46,7 +48,10 @@ def to_s protected def tags_to_be_collected - tags = Tag.where(marked: false).where("updated_at < ?", older_than) + tags = Tag.where(marked: false) + .where("updated_at < ? AND (pulled_at < ? OR pulled_at IS NULL)", + older_than, + older_than) return tags if APP_CONFIG["delete"]["garbage_collector"]["tag"].blank? rx = tag_regexp diff --git a/lib/portus/registry_notification.rb b/lib/portus/registry_notification.rb index 1f9590751..ee2fe6ac5 100644 --- a/lib/portus/registry_notification.rb +++ b/lib/portus/registry_notification.rb @@ -5,7 +5,7 @@ module Portus # consumed later on. class RegistryNotification # An array with the events that a handler has to support. - HANDLED_EVENTS = %w[push delete].freeze + HANDLED_EVENTS = %w[push delete pull].freeze # It filters the event from the registry so the background job can actually # handle this request. diff --git a/spec/lib/portus/background/garbage_collector_spec.rb b/spec/lib/portus/background/garbage_collector_spec.rb index 4a472d382..d058b3d7d 100644 --- a/spec/lib/portus/background/garbage_collector_spec.rb +++ b/spec/lib/portus/background/garbage_collector_spec.rb @@ -1,11 +1,15 @@ # frozen_string_literal: true +require "rails_helper" +require "portus/background/garbage_collector" + describe ::Portus::Background::GarbageCollector do let(:old_tag) { (APP_CONFIG["delete"]["garbage_collector"]["older_than"].to_i + 10).days.ago } let(:recent_tag) { (APP_CONFIG["delete"]["garbage_collector"]["older_than"].to_i - 10).days.ago } before do APP_CONFIG["delete"]["garbage_collector"]["enabled"] = true + APP_CONFIG["delete"]["garbage_collector"]["keep_latest"] = 0 end it "returns the proper value for sleep_value" do @@ -62,6 +66,12 @@ expect(tags).to be_empty end + it "ignores older tags if pulled recently" do + create(:tag, name: "tag", repository: repository, updated_at: old_tag, pulled_at: recent_tag) + tags = subject.send(:tags_to_be_collected) + expect(tags).to be_empty + end + it "exists a tag but it's considered recent" do create(:tag, name: "tag", repository: repository, updated_at: recent_tag) tags = subject.send(:tags_to_be_collected) @@ -113,6 +123,19 @@ end.to(change { Tag.all.count }.from(1).to(0)) end + it "skips older tags if number of tags < keep_latest" do + APP_CONFIG["delete"]["garbage_collector"]["keep_latest"] = 5 + create_list(:tag, 4, repository: repository, updated_at: old_tag) + + expect { subject.execute! }.not_to change(Tag.all, :count) + end + + it "skips older tags if it was pulled recently" do + create_list(:tag, 4, repository: repository, updated_at: old_tag, pulled_at: recent_tag) + + expect { subject.execute! }.not_to change(Tag.all, :count) + end + it "skips tags which could not be removed for whatever reason" do allow_any_instance_of(Tag).to( receive(:fetch_digest) { |tag| tag.digest == "wrong" ? "" : tag.digest } diff --git a/spec/lib/portus/registry_notification_spec.rb b/spec/lib/portus/registry_notification_spec.rb index c9b498955..31453c036 100644 --- a/spec/lib/portus/registry_notification_spec.rb +++ b/spec/lib/portus/registry_notification_spec.rb @@ -7,15 +7,16 @@ let(:relevant) { ::Portus::Fixtures::RegistryEvent::RELEVANT.dup } let(:delete) { ::Portus::Fixtures::RegistryEvent::DELETE.dup } let(:version23) { ::Portus::Fixtures::RegistryEvent::VERSION23.dup } + let(:pull) { ::Portus::Fixtures::RegistryEvent::PULL.dup } it "processes all the relevant events" do - evaluated_events = [relevant, delete, version23] + evaluated_events = [relevant, delete, version23, pull] evaluated_events.each { |e| body["events"] << e } described_class.process!(body) events = RegistryEvent.order(:event_id) - expect(events.size).to eq 3 + expect(events.size).to eq 4 events.each_with_index do |e, idx| data = JSON.parse(e.data) diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index 2a172b9ce..eefecbeae 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -71,6 +71,44 @@ def get_url(repo, tag) end end + describe "handle pull event" do + let(:tag_name) { "latest" } + let(:registry) do + create(:registry, + hostname: "registry.test.lan", + external_hostname: "external.test.lan") + end + let(:repository) { create(:repository, namespace: registry.global_namespace, name: "busybox") } + + before do + @event = build(:raw_pull_event).to_test_hash + @event["target"]["repository"] = "busybox" + @event["target"]["mediaType"] = "application/vnd.docker.distribution.manifest.v1+json" + @event["target"]["tag"] = tag_name + end + + context "when the tag is not known by Portus" do + it "returns nil" do + tag = described_class.handle_pull_event(@event) + + expect(tag).to be_nil + end + end + + context "when the tag is known by Portus" do + let(:tag) { create(:tag, name: tag_name, repository: repository) } + + it "updates `pulled_at` attribute and returns the tag" do + expect(tag.pulled_at).to be_nil + + updated_tag = described_class.handle_pull_event(@event) + + expect(updated_tag).not_to be_nil + expect(updated_tag.pulled_at).not_to be_nil + end + end + end + describe "handle push event" do let(:tag_name) { "latest" } let(:repository_name) { "busybox" } diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb index aa33cf573..d644c7078 100644 --- a/spec/models/tag_spec.rb +++ b/spec/models/tag_spec.rb @@ -16,6 +16,7 @@ # username :string(255) # scanned :integer default(0) # size :integer +# pulled_at :datetime # # Indexes # diff --git a/spec/support/registry_events.rb b/spec/support/registry_events.rb index b05f84c6c..17c258378 100644 --- a/spec/support/registry_events.rb +++ b/spec/support/registry_events.rb @@ -12,13 +12,36 @@ module RegistryEvent { "action" => "push" }, { "action" => "push", "target" => { "mediaType" => "some" } }, { - "action" => "pull", "target" => { - "mediaType" => "application/vnd.docker.distribution.manifest.v1+json" + "action" => "irrelevant", + "target" => { + "mediaType" => "application/vnd.docker.distribution.manifest.v2+json" } } ] }.freeze + PULL = + { + "id" => "847f45bb-5f19-4c1b-b198-6c5ba467c127", + "timestamp" => "2019-01-15T20:17:10.595087128Z", + "action" => "pull", + "target" => { + "mediaType" => "application/vnd.docker.distribution.manifest.v2+json", + "size" => 2193, + "digest" => "sha256:095ca87493f6a2147b8543a669f2d773097df9be7e17a981033c", + "length" => 2193, + "repository" => "vitoravelino/etcd", + "tag" => "v3.2.25-arm64" + }, + "actor" => { + "name" => "vitoravelino" + }, + "source" => { + "addr" => "50549da63cc2:5000", + "instanceID" => "a481f8c8-a71c-4395-b90c-f8d32a083d02" + } + }.freeze + RELEVANT = { "id" => "5d673710-06b5-48b5-a7d9-94cbaacf776b",