Skip to content

Commit

Permalink
Merge pull request #5053 from avalonmediasystem/mo_to_solr_refactor
Browse files Browse the repository at this point in the history
Refactor MediaObject methods for improving save performance
  • Loading branch information
cjcolvar authored Feb 17, 2023
2 parents 98dfbff + 2e28876 commit 9180b03
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 45 deletions.
21 changes: 21 additions & 0 deletions app/jobs/media_object_indexing_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2011-2022, The Trustees of Indiana University and Northwestern
# University. Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# --- END LICENSE_HEADER BLOCK ---
class MediaObjectIndexingJob < ApplicationJob
queue_as :media_object_indexing

def perform(id)
mo = MediaObject.find(id)
ActiveFedora::SolrService.add(mo.to_solr(include_child_fields: true), softCommit: true)
end
end
58 changes: 42 additions & 16 deletions app/models/media_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -177,26 +177,27 @@ def report_missing_attributes
end

def set_media_types!
mime_types = master_files.reject {|mf| mf.file_location.blank? }.collect { |mf|
Rack::Mime.mime_type(File.extname(mf.file_location))
}.uniq
mime_types = master_file_solr_docs.reject { |mf| mf["file_location_ssi"].blank? }.collect do |mf|
Rack::Mime.mime_type(File.extname(mf["file_location_ssi"]))
end.uniq
self.format = mime_types.empty? ? nil : mime_types
end

def set_resource_types!
self.avalon_resource_type = master_files.reject {|mf| mf.file_format.blank? }.collect{ |mf|
case mf.file_format
self.avalon_resource_type = master_file_solr_docs.reject { |mf| mf["file_format_ssi"].blank? }.collect do |mf|
case mf["file_format_ssi"]
when 'Moving image'
'moving image'
when 'Sound'
'sound recording'
else
mf.file_format.downcase
end
}.uniq
end.uniq
end

def update_dependent_properties!
@master_file_docs = nil
self.set_duration!
self.set_media_types!
self.set_resource_types!
Expand Down Expand Up @@ -225,7 +226,20 @@ def section_physical_descriptions
all_pds.uniq
end

def to_solr
# All fields that need to iterate over the master files do in this new method
# using a copy of the master file solr doc to avoid having to fetch them all from fedora
# this is probably okay since this is just aggregating the values already in the master file solr docs

def fill_in_solr_fields_that_need_master_files(solr_doc)
solr_doc['section_id_ssim'] = ordered_master_file_ids
solr_doc["other_identifier_sim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten
solr_doc["date_digitized_sim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" }
solr_doc["section_label_tesim"] = section_labels
solr_doc['section_physical_description_ssim'] = section_physical_descriptions
solr_doc['all_comments_sim'] = all_comments
end

def to_solr(include_child_fields: false)
super.tap do |solr_doc|
solr_doc[ActiveFedora.index_field_mapper.solr_name("workflow_published", :facetable, type: :string)] = published? ? 'Published' : 'Unpublished'
solr_doc[ActiveFedora.index_field_mapper.solr_name("collection", :symbol, type: :string)] = collection.name if collection.present?
Expand All @@ -236,17 +250,19 @@ def to_solr
solr_doc[Hydra.config.permissions.read.group] += solr_doc['read_access_ip_group_ssim']
solr_doc["title_ssort"] = self.title
solr_doc["creator_ssort"] = Array(self.creator).join(', ')
solr_doc["date_digitized_sim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" }
solr_doc["date_ingested_sim"] = self.create_date.strftime "%F" if self.create_date.present?
#include identifiers for parts
solr_doc["other_identifier_sim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten
#include labels for parts and their structural metadata
solr_doc['section_id_ssim'] = ordered_master_file_ids
solr_doc["section_label_tesim"] = section_labels
solr_doc['section_physical_description_ssim'] = section_physical_descriptions
solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type.map(&:titleize)
solr_doc['identifier_ssim'] = self.identifier.map(&:downcase)
solr_doc['all_comments_sim'] = all_comments
if include_child_fields
fill_in_solr_fields_that_need_master_files(solr_doc)
elsif id.present? # avoid error in test suite
# Fill in other identifier so these values aren't stripped from the solr doc while waiting for the background job
mf_docs = ActiveFedora::SolrService.query("isPartOf_ssim:#{id}", rows: 1_000_000)
solr_doc["other_identifier_sim"] += mf_docs.collect { |h| h['identifier_ssim'] }.flatten
# Enqueue background job to do a full indexing including more costly fields that read from children
MediaObjectIndexingJob.perform_later(id)
end

#Add all searchable fields to the all_text_timv field
all_text_values = []
all_text_values << solr_doc["title_tesi"]
Expand Down Expand Up @@ -394,10 +410,20 @@ def current_checkout(user_id)
checkouts.select{ |ch| ch.user_id == user_id }.first
end

# Override to reset memoized fields
def reload
@master_file_docs = nil
super
end

private

def master_file_solr_docs
@master_file_docs ||= ActiveFedora::SolrService.query("isPartOf_ssim:#{id}", rows: 1_000_000)
end

def calculate_duration
self.master_files.map{|mf| mf.duration.to_i }.compact.sum
master_file_solr_docs.collect { |h| h['duration_ssi'].to_i }.compact.sum
end

def collect_ips_for_index ip_strings
Expand Down
1 change: 1 addition & 0 deletions config/sidekiq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- mailers
- master_file_management_move
- master_file_management_delete
- media_object_indexing
- reindex
- s3_split
- solr_backup
Expand Down
2 changes: 2 additions & 0 deletions spec/controllers/catalog_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@
@master_file = FactoryBot.create(:master_file, :with_structure, media_object: @media_object, title: 'Test Label')
@media_object.ordered_master_files += [@master_file]
@media_object.save!
# Explicitly run indexing job to ensure fields are indexed for structure searching
MediaObjectIndexingJob.perform_now(@media_object.id)
end
it "should find results based upon structure" do
get 'index', params: { q: 'CD 1' }
Expand Down
23 changes: 11 additions & 12 deletions spec/controllers/media_objects_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -773,10 +773,10 @@
it "should choose the correct default master_file" do
mf1 = FactoryBot.create(:master_file, media_object: media_object)
mf2 = FactoryBot.create(:master_file, media_object: media_object)
expect(media_object.ordered_master_files.to_a.first).to eq(mf1)
media_object.ordered_master_files = media_object.ordered_master_files.to_a.reverse
media_object.save!
controller.instance_variable_set('@media_object', media_object)
expect(media_object.master_files.first).to eq(mf1)
expect(media_object.ordered_master_files.to_a.first).to eq(mf2)
expect(controller.send('set_active_file')).to eq(mf2)
end
Expand Down Expand Up @@ -1217,7 +1217,7 @@
delete :destroy, params: { id: media_object.id }
expect(flash[:notice]).to include("media object deleted")
expect(MediaObject.exists?(media_object.id)).to be_falsey
expect(MasterFile.exists?(media_object.master_files.first.id)).to be_falsey
expect(MasterFile.exists?(media_object.master_file_ids.first)).to be_falsey
end

it "should remove a MediaObject with multiple MasterFiles" do
Expand Down Expand Up @@ -1392,10 +1392,9 @@
end
it 'sets the MIME type' do
media_object = FactoryBot.create(:media_object)
media_object.ordered_master_files += [FactoryBot.create(:master_file, :with_derivative)]
master_file = FactoryBot.create(:master_file, :with_derivative, media_object: media_object)
media_object.reload # Reload here to force reloading master file solr docs used in #set_media_types!
media_object.set_media_types!
media_object.save
media_object.reload
expect(media_object.format).to eq(["video/mp4"])
end

Expand Down Expand Up @@ -1575,16 +1574,16 @@
login_as :user
end
it "should render add_to_playlist_form with correct masterfile_id" do
get :add_to_playlist_form, params: { id: media_object.id, scope: 'master_file', masterfile_id: media_object.master_file_ids[0] }
get :add_to_playlist_form, params: { id: media_object.id, scope: 'master_file', masterfile_id: media_object.ordered_master_file_ids[0] }
expect(response).to render_template(:_add_to_playlist_form)
expect(response.body).to include(media_object.master_file_ids[0])
expect(response.body).to include(media_object.ordered_master_file_ids[0])
end
it "should render the correct label for scope=master_file" do
get :add_to_playlist_form, params: { id: media_object.id, scope: 'master_file', masterfile_id: media_object.master_file_ids[0] }
get :add_to_playlist_form, params: { id: media_object.id, scope: 'master_file', masterfile_id: media_object.ordered_master_file_ids[0] }
expect(response.body).to include('Add Section to Playlist')
end
it "should render the correct label for scope=media_object" do
get :add_to_playlist_form, params: { id: media_object.id, scope: 'media_object', masterfile_id: media_object.master_file_ids[0] }
get :add_to_playlist_form, params: { id: media_object.id, scope: 'media_object', masterfile_id: media_object.ordered_master_file_ids[0] }
expect(response.body).to include('Add Item to Playlist')
end
end
Expand All @@ -1602,13 +1601,13 @@

it "should create a single playlist_item for a single master_file" do
expect {
post :add_to_playlist, params: { id: media_object.id, post: { playlist_id: playlist.id, masterfile_id: media_object.master_file_ids[0], playlistitem_scope: 'section' } }
post :add_to_playlist, params: { id: media_object.id, post: { playlist_id: playlist.id, masterfile_id: media_object.ordered_master_file_ids[0], playlistitem_scope: 'section' } }
}.to change { playlist.items.count }.from(0).to(1)
expect(playlist.items[0].title).to eq("#{media_object.title} - #{media_object.ordered_master_files.to_a[0].title}")
end
it "should create playlist_items for each span in a single master_file's structure" do
expect {
post :add_to_playlist, params: { id: media_object.id, post: { playlist_id: playlist.id, masterfile_id: media_object.master_file_ids[1], playlistitem_scope: 'structure' } }
post :add_to_playlist, params: { id: media_object.id, post: { playlist_id: playlist.id, masterfile_id: media_object.ordered_master_file_ids[1], playlistitem_scope: 'structure' } }
}.to change { playlist.items.count }.from(0).to(13)
expect(playlist.items[0].title).to eq("Test Item - CD 1 - Copland, Three Piano Excerpts from Our Town - Track 1. Story of Our Town")
expect(playlist.items[12].title).to eq("Test Item - CD 1 - Track 13. Copland, Danzon Cubano")
Expand All @@ -1631,7 +1630,7 @@
it 'redirects with flash message when playlist is owned by another user' do
login_as :user
other_playlist = FactoryBot.create(:playlist)
post :add_to_playlist, params: { id: media_object.id, post: { playlist_id: other_playlist.id, masterfile_id: media_object.master_file_ids[0], playlistitem_scope: 'section' } }
post :add_to_playlist, params: { id: media_object.id, post: { playlist_id: other_playlist.id, masterfile_id: media_object.ordered_master_file_ids[0], playlistitem_scope: 'section' } }
expect(response).to have_http_status(403)
expect(JSON.parse(response.body).symbolize_keys).to eq({message: "<p>You are not authorized to update this playlist.</p>", status: 403})
end
Expand Down
2 changes: 1 addition & 1 deletion spec/controllers/playlists_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@
before :each do
FactoryBot.reload
FactoryBot.create(:playlist, title: "aardvark", user: user)
FactoryBot.create_list(:playlist, 9, user: user)
FactoryBot.create_list(:playlist, 9, title: 'bbbbb', user: user)
FactoryBot.create(:playlist, title: "zzzebra", user: user)
end

Expand Down
32 changes: 32 additions & 0 deletions spec/jobs/media_object_indexing_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright 2011-2022, The Trustees of Indiana University and Northwestern
# University. Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# --- END LICENSE_HEADER BLOCK ---

require 'rails_helper'

describe MediaObjectIndexingJob do
let(:job) { MediaObjectIndexingJob.new }

describe "perform" do
let!(:media_object) { FactoryBot.create(:media_object) }
let!(:master_file) { FactoryBot.create(:master_file, media_object: media_object) }

it 'indexes the media object including master_file fields' do
before_doc = ActiveFedora::SolrService.query("id:#{media_object.id}").first
expect(before_doc["section_id_ssim"]).to be_blank
job.perform(media_object.id)
after_doc = ActiveFedora::SolrService.query("id:#{media_object.id}").first
expect(after_doc["section_id_ssim"]).to eq [master_file.id]
end
end
end
49 changes: 33 additions & 16 deletions spec/models/media_object_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -455,22 +455,39 @@
end

describe '#calculate_duration' do
it 'returns zero if there are zero master files' do
expect(media_object.send(:calculate_duration)).to eq(0)
let(:master_file1) { FactoryBot.create(:master_file, media_object: media_object, duration: '40') }
let(:master_file2) { FactoryBot.create(:master_file, media_object: media_object, duration: '40') }
let(:master_file3) { FactoryBot.create(:master_file, media_object: media_object, duration: nil) }
let(:master_files) { [] }

before do
master_files
# Explicitly run indexing job to ensure fields are indexed for structure searching
MediaObjectIndexingJob.perform_now(media_object.id)
end
it 'returns the correct duration with two master files' do
media_object.ordered_master_files += [FactoryBot.create(:master_file, duration: '40')]
media_object.ordered_master_files += [FactoryBot.create(:master_file, duration: '40')]
expect(media_object.send(:calculate_duration)).to eq(80)

context 'with zero master files' do
it 'returns zero' do
expect(media_object.send(:calculate_duration)).to eq(0)
end
end
it 'returns the correct duration with two master files one nil' do
media_object.ordered_master_files += [FactoryBot.create(:master_file, duration: '40')]
media_object.ordered_master_files += [FactoryBot.create(:master_file, duration:nil)]
expect(media_object.send(:calculate_duration)).to eq(40)
context 'with two master files' do
let(:master_files) { [master_file1, master_file2] }
it 'returns the correct duration' do
expect(media_object.send(:calculate_duration)).to eq(80)
end
end
it 'returns the correct duration with one master file that is nil' do
media_object.ordered_master_files += [FactoryBot.create(:master_file, duration:nil)]
expect(media_object.send(:calculate_duration)).to eq(0)
context 'with two master files one nil' do
let(:master_files) { [master_file1, master_file3] }
it 'returns the correct duration' do
expect(media_object.send(:calculate_duration)).to eq(40)
end
end
context 'with one master file that is nil' do
let(:master_files) { [master_file3] }
it 'returns the correct duration' do
expect(media_object.send(:calculate_duration)).to eq(0)
end
end
end

Expand Down Expand Up @@ -561,13 +578,13 @@
it 'should index identifier for master files' do
master_file = FactoryBot.create(:master_file, identifier: ['TestOtherID'], media_object: media_object)
media_object.reload
solr_doc = media_object.to_solr
solr_doc = media_object.to_solr(include_child_fields: true)
expect(solr_doc['other_identifier_sim']).to include('TestOtherID')
end
it 'should index labels for master files' do
FactoryBot.create(:master_file, :with_structure, media_object: media_object, title: 'Test Label')
media_object.reload
solr_doc = media_object.to_solr
solr_doc = media_object.to_solr(include_child_fields: true)
expect(solr_doc['section_label_tesim']).to include('CD 1')
expect(solr_doc['section_label_tesim']).to include('Test Label')
end
Expand All @@ -576,7 +593,7 @@
media_object.comment = ['MO Comment']
media_object.save!
media_object.reload
solr_doc = media_object.to_solr
solr_doc = media_object.to_solr(include_child_fields: true)
expect(solr_doc['all_comments_sim']).to include('MO Comment')
expect(solr_doc['all_comments_sim']).to include('[Test Label] MF Comment 1')
expect(solr_doc['all_comments_sim']).to include('[Test Label] MF Comment 2')
Expand Down

0 comments on commit 9180b03

Please sign in to comment.