From 9955683af572bcc0e67fc66bfe4e66a9e047457c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20B?= Date: Fri, 26 Jan 2024 23:08:27 +0100 Subject: [PATCH] AO3-6210 Load the tags in the mass wrangling bins from Elasticsearch (#4646) * Move copy-pasted count to a single function * Load unwrangled counts from ES * Now count the right thing * Hound * Fix (some?) tests * Load tags on Index from ES * Restore ordering by name ASC second on uses * Use _cache everywhere * Hound * Linter * Update tests * Hound * Some sort of solution for seemingly noop * Linter * Now unused? * Syntax/Typo * Only store unwrangeable value * Hound * Fix Grandparent bug? * More subtle cache bursting * Review * More explicit uncovered case * Syntax confusion * Hopefully helpful comment * Review * Obsolete See https://github.com/otwcode/otwarchive/pull/4658 * Review * Linter * Split into (almost) unitary tags in_use still doing two things As it contains a OR * Cop * Review --- app/controllers/tag_wranglers_controller.rb | 11 +--- app/controllers/tag_wranglings_controller.rb | 33 +++++------ app/controllers/unsorted_tags_controller.rb | 11 +--- app/helpers/wrangling_helper.rb | 14 +++++ app/models/fandom.rb | 5 +- app/models/search/tag_indexer.rb | 6 +- app/models/search/tag_query.rb | 27 ++++++++- app/models/tag.rb | 17 +++++- app/views/tag_wranglings/index.html.erb | 54 +++++++++--------- app/views/tags/wrangle.html.erb | 2 +- features/step_definitions/tag_steps.rb | 5 ++ .../brand_new_fandoms.feature | 10 ++++ .../tags_and_wrangling/tag_wrangling.feature | 10 ++++ .../tag_wrangling_more.feature | 4 ++ spec/controllers/tags_controller_spec.rb | 13 +++++ spec/models/search/tag_query_spec.rb | 55 ++++++++++++++++++- 16 files changed, 204 insertions(+), 73 deletions(-) create mode 100644 app/helpers/wrangling_helper.rb diff --git a/app/controllers/tag_wranglers_controller.rb b/app/controllers/tag_wranglers_controller.rb index 9842dcbb163..8419905245d 100644 --- a/app/controllers/tag_wranglers_controller.rb +++ b/app/controllers/tag_wranglers_controller.rb @@ -1,5 +1,6 @@ class TagWranglersController < ApplicationController include ExportsHelper + include WranglingHelper before_action :check_user_status before_action :check_permission_to_wrangle, except: [:report_csv] @@ -41,15 +42,7 @@ def show @wrangler = User.find_by!(login: params[:id]) @page_subtitle = @wrangler.login @fandoms = @wrangler.fandoms.by_name - @counts = {} - [Fandom, Character, Relationship, Freeform].each do |klass| - @counts[klass.to_s.downcase.pluralize.to_sym] = Rails.cache.fetch("/wrangler/counts/sidebar/#{klass}", race_condition_ttl: 10, expires_in: 1.hour) do - klass.unwrangled.in_use.count - end - end - @counts[:UnsortedTag] = Rails.cache.fetch("/wrangler/counts/sidebar/UnsortedTag", race_condition_ttl: 10, expires_in: 1.hour) do - UnsortedTag.count - end + @counts = tag_counts_per_category end def report_csv diff --git a/app/controllers/tag_wranglings_controller.rb b/app/controllers/tag_wranglings_controller.rb index cc3d195294b..8bbd2dd6830 100644 --- a/app/controllers/tag_wranglings_controller.rb +++ b/app/controllers/tag_wranglings_controller.rb @@ -1,34 +1,35 @@ class TagWranglingsController < ApplicationController include TagWrangling + include WranglingHelper before_action :check_user_status before_action :check_permission_to_wrangle around_action :record_wrangling_activity, only: [:wrangle] def index - @counts = {} - [Fandom, Character, Relationship, Freeform].each do |klass| - @counts[klass.to_s.downcase.pluralize.to_sym] = Rails.cache.fetch("/wrangler/counts/sidebar/#{klass}", race_condition_ttl: 10, expires_in: 1.hour) do - klass.unwrangled.in_use.count - end - end - @counts[:UnsortedTag] = Rails.cache.fetch("/wrangler/counts/sidebar/UnsortedTag", race_condition_ttl: 10, expires_in: 1.hour) do - UnsortedTag.count - end + @counts = tag_counts_per_category unless params[:show].blank? + raise "Redshirt: Attempted to constantize invalid class initialize tag_wranglings_controller_index #{params[:show].classify}" unless Tag::USER_DEFINED.include?(params[:show].classify) + params[:sort_column] = 'created_at' if !valid_sort_column(params[:sort_column], 'tag') params[:sort_direction] = 'ASC' if !valid_sort_direction(params[:sort_direction]) - sort = params[:sort_column] + " " + params[:sort_direction] - sort = sort + ", name ASC" if sort.include?('taggings_count_cache') + if params[:show] == "fandoms" @media_names = Media.by_name.pluck(:name) @page_subtitle = ts("fandoms") - @tags = Fandom.unwrangled.in_use.order(sort).paginate(page: params[:page], per_page: ArchiveConfig.ITEMS_PER_PAGE) - else # by fandom - raise "Redshirt: Attempted to constantize invalid class initialize tag_wranglings_controller_index #{params[:show].classify}" unless Tag::USER_DEFINED.include?(params[:show].classify) - klass = params[:show].classify.constantize - @tags = klass.unwrangled.in_use.order(sort).paginate(page: params[:page], per_page: ArchiveConfig.ITEMS_PER_PAGE) end + + type = params[:show].singularize.capitalize + @tags = TagQuery.new({ + type: type, + in_use: true, + unwrangleable: false, + unwrangled: true, + sort_column: params[:sort_column], + sort_direction: params[:sort_direction], + page: params[:page], + per_page: ArchiveConfig.ITEMS_PER_PAGE + }).search_results end end diff --git a/app/controllers/unsorted_tags_controller.rb b/app/controllers/unsorted_tags_controller.rb index 4def324d62a..fc6c8fd6c68 100644 --- a/app/controllers/unsorted_tags_controller.rb +++ b/app/controllers/unsorted_tags_controller.rb @@ -1,19 +1,12 @@ class UnsortedTagsController < ApplicationController + include WranglingHelper before_action :check_user_status before_action :check_permission_to_wrangle def index @tags = UnsortedTag.page(params[:page]) - @counts = {} - [Fandom, Character, Relationship, Freeform].each do |klass| - @counts[klass.to_s.downcase.pluralize.to_sym] = Rails.cache.fetch("/wrangler/counts/sidebar/#{klass}", race_condition_ttl: 10, expires_in: 1.hour) do - klass.unwrangled.in_use.count - end - end - @counts[:UnsortedTag] = Rails.cache.fetch("/wrangler/counts/sidebar/UnsortedTag", race_condition_ttl: 10, expires_in: 1.hour) do - UnsortedTag.count - end + @counts = tag_counts_per_category end def mass_update diff --git a/app/helpers/wrangling_helper.rb b/app/helpers/wrangling_helper.rb new file mode 100644 index 00000000000..8ef592ed4b9 --- /dev/null +++ b/app/helpers/wrangling_helper.rb @@ -0,0 +1,14 @@ +module WranglingHelper + def tag_counts_per_category + counts = {} + [Fandom, Character, Relationship, Freeform].each do |klass| + counts[klass.to_s.downcase.pluralize.to_sym] = Rails.cache.fetch("/wrangler/counts/sidebar/#{klass}", race_condition_ttl: 10, expires_in: 1.hour) do + TagQuery.new(type: klass.to_s, in_use: true, unwrangleable: false, unwrangled: true).count + end + end + counts[:UnsortedTag] = Rails.cache.fetch("/wrangler/counts/sidebar/UnsortedTag", race_condition_ttl: 10, expires_in: 1.hour) do + UnsortedTag.count + end + counts + end +end diff --git a/app/models/fandom.rb b/app/models/fandom.rb index 5ccf831e1f8..d194fce01e1 100644 --- a/app/models/fandom.rb +++ b/app/models/fandom.rb @@ -14,9 +14,8 @@ class Fandom < Tag scope :by_media, lambda {|media| where(media_id: media.id)} - def self.unwrangled - joins(:common_taggings). - where("unwrangleable = 0 AND common_taggings.filterable_id = ? AND common_taggings.filterable_type = 'Tag'", Media.uncategorized.try(:id)) + def unwrangled? + Media.uncategorized.fandoms.include?(self) end # An association callback to add the default media if all others have been removed diff --git a/app/models/search/tag_indexer.rb b/app/models/search/tag_indexer.rb index 137ca8bf0bb..2dbb8904ed6 100644 --- a/app/models/search/tag_indexer.rb +++ b/app/models/search/tag_indexer.rb @@ -23,7 +23,8 @@ def self.mapping }, tag_type: { type: "keyword" }, sortable_name: { type: "keyword" }, - uses: { type: "integer" } + uses: { type: "integer" }, + unwrangled: { type: "boolean" } } } end @@ -67,7 +68,8 @@ def document(object) ).merge( has_posted_works: object.has_posted_works?, tag_type: object.type, - uses: object.taggings_count_cache + uses: object.taggings_count_cache, + unwrangled: object.unwrangled? ).merge(parent_data(object)) end diff --git a/app/models/search/tag_query.rb b/app/models/search/tag_query.rb index 1e83dab1f7c..766074d71b5 100644 --- a/app/models/search/tag_query.rb +++ b/app/models/search/tag_query.rb @@ -22,7 +22,9 @@ def filters fandom_filter, character_filter, suggested_fandom_filter, - suggested_character_filter + suggested_character_filter, + in_use_filter, + unwrangled_filter ].flatten.compact end @@ -60,7 +62,11 @@ def sort sort_hash[column][:unmapped_type] = "date" end - [sort_hash, { id: { order: direction } }] + sort_by_id = { id: { order: direction } } + + return [sort_hash, { "name.keyword" => { order: "asc" } }, sort_by_id] if column == "uses" + + [sort_hash, sort_by_id] end ################ @@ -103,6 +109,23 @@ def suggested_character_filter terms_filter(:pre_character_ids, options[:pre_character_ids]) if options[:pre_character_ids] end + # Canonical tags are treated as used even if they technically aren't + def in_use_filter + return if options[:in_use].nil? + + unless options[:in_use] + # Check if not used AND not canonical + return [term_filter(:uses, 0), term_filter(:canonical, false)] + end + + # Check if used OR canonical + { bool: { should: [{ range: { uses: { gt: 0 } } }, term_filter(:canonical, true)] } } + end + + def unwrangled_filter + term_filter(:unwrangled, bool_value(options[:unwrangled])) unless options[:unwrangled].nil? + end + # Filter to only include tags that have no assigned fandom_ids. Checks that # the fandom exists, because this particular filter is included in the # exclusion_filters section. diff --git a/app/models/tag.rb b/app/models/tag.rb index 9740deb8546..ebbf35987a9 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -272,9 +272,6 @@ def destroy_common_tagging(parent) scope :unfilterable, -> { nonsynonymous.where(unwrangleable: false) } scope :unwrangleable, -> { where(unwrangleable: true) } - # we need to manually specify a LEFT JOIN instead of just joins(:common_taggings or :meta_taggings) here because - # what we actually need are the empty rows in the results - scope :unwrangled, -> { joins("LEFT JOIN `common_taggings` ON common_taggings.common_tag_id = tags.id").where("unwrangleable = 0 AND common_taggings.id IS NULL") } scope :in_use, -> { where("canonical = 1 OR taggings_count_cache > 0") } scope :first_class, -> { joins("LEFT JOIN `meta_taggings` ON meta_taggings.sub_tag_id = tags.id").where("meta_taggings.id IS NULL") } @@ -1034,6 +1031,20 @@ def merger_string=(tag_string) end end + # unwrangleable: + # - A boolean stored in the tags table + # - Default false + # - Set to true by wranglers on tags that should be excluded from the wrangling process altogether. Example: freeform tags like "idk how to explain it but trust me" + # + # unwrangled: + # - A computed value + # - True for "orphan" tags yet to be tied to something (fandom, character, etc.) by wranglers + # - Exact meaning may change depending on the nature of the tag (search for definitions of unwrangled? overriding this one) + # + def unwrangled? + common_taggings.empty? + end + ################################# ## SEARCH ####################### ################################# diff --git a/app/views/tag_wranglings/index.html.erb b/app/views/tag_wranglings/index.html.erb index ee738e57e16..7f07bf665fc 100755 --- a/app/views/tag_wranglings/index.html.erb +++ b/app/views/tag_wranglings/index.html.erb @@ -104,32 +104,34 @@ <% @tags.each do |tag| %> - - - <%= check_box_tag 'selected_tags[]', tag.id, nil, id: "selected_tags_#{tag.id}" %> - <%= label_tag "selected_tags_#{tag.id}", "#{tag.name}" %> - - - <%= l(tag.created_at.to_date) %> - - - <% if tag.canonical? %> - <%= ts('Yes') %> - <% else %> - <%= check_box_tag 'canonicals[]', tag.id, tag.canonical?, id: "canonicals_#{tag.id}" %> - <% end %> - - - <%= tag.taggings_count %> - - - - - + <% if tag.unwrangled? %> + + "> + <%= check_box_tag "selected_tags[]", tag.id, nil, id: "selected_tags_#{tag.id}" %> + <%= label_tag "selected_tags_#{tag.id}", tag.name.to_s %> + + + "><%= l(tag.created_at.to_date) %> + + "> + <% if tag.canonical? %> + <%= ts("Yes") %> + <% else %> + <%= check_box_tag "canonicals[]", tag.id, tag.canonical?, id: "canonicals_#{tag.id}" %> + <% end %> + + + "><%= tag.taggings_count_cache %> + + + + + + <% end %> <% end %> diff --git a/app/views/tags/wrangle.html.erb b/app/views/tags/wrangle.html.erb index 7de8abfeee0..5eef96ab42e 100644 --- a/app/views/tags/wrangle.html.erb +++ b/app/views/tags/wrangle.html.erb @@ -133,7 +133,7 @@ <%= tag.created_at.to_date %> - <%= tag.taggings_count %> + "><%= tag.taggings_count_cache %>