From 4026cd18ba142acf7826d02bea2ce794706487dc Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 24 Jan 2024 17:47:23 -0500 Subject: [PATCH 01/16] errant debugging --- app/controllers/sessions_controller.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/controllers/sessions_controller.rb b/app/controllers/sessions_controller.rb index 3239d57ab..930ca4e9a 100644 --- a/app/controllers/sessions_controller.rb +++ b/app/controllers/sessions_controller.rb @@ -10,9 +10,7 @@ def index def new @user = User.new - puts 'hi' - puts Rails.env.production? - puts Rails.application.config.devise.omniauth_providers.include?(:google_oauth2) + if Rails.env.production? && Rails.application.config.devise.omniauth_providers.include?(:google_oauth2) # Google only lets us oAuth from https sites in production. @flag_not_on_https = false From ea3ad344714c6a5ede75310884ac678ee0bc3c3e Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 24 Jan 2024 17:48:07 -0500 Subject: [PATCH 02/16] optimize creating a snapshot --- .../javascripts/services/querySnapshotSvc.js | 12 ++++--- .../api/v1/books/populate_controller.rb | 3 ++ .../api/v1/snapshots_controller.rb | 34 ++++++++++++------- app/jobs/populate_snapshot_job.rb | 34 +++++++++++++++++++ app/models/snapshot.rb | 2 ++ .../api/v1/snapshots/_snapshot.json.jbuilder | 3 +- .../api/v1/snapshots/create.json.jbuilder | 2 +- app/views/api/v1/snapshots/show.json.jbuilder | 2 +- 8 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 app/jobs/populate_snapshot_job.rb diff --git a/app/assets/javascripts/services/querySnapshotSvc.js b/app/assets/javascripts/services/querySnapshotSvc.js index d3a6c5173..ccff45a0f 100644 --- a/app/assets/javascripts/services/querySnapshotSvc.js +++ b/app/assets/javascripts/services/querySnapshotSvc.js @@ -127,12 +127,14 @@ angular.module('QuepidApp') }; return $http.post('api/cases/' + caseNo + '/snapshots', saved) - .then(function(response) { - return addSnapshotResp([response.data]) - .then(function() { - version++; - }); + .then(function() { + version++; }); + // return addSnapshotResp([response.data]) + // .then(function() { + // version++; + // }); + //}); }; this.deleteSnapshot = function(snapshotId) { diff --git a/app/controllers/api/v1/books/populate_controller.rb b/app/controllers/api/v1/books/populate_controller.rb index c03bc3814..00232e3a8 100644 --- a/app/controllers/api/v1/books/populate_controller.rb +++ b/app/controllers/api/v1/books/populate_controller.rb @@ -15,6 +15,9 @@ class PopulateController < Api::ApiController # We get a messy set of params in this method, so we don't use the normal # approach of strong parameter validation. We hardcode the only params # we care about. + # + # With 5000 queries in large case, this takes 108 seconds... + # # rubocop:disable Layout/LineLength def update puts "[PopulateController] Request Size is #{number_to_human_size(query_doc_pairs_params.to_s.bytesize)}" diff --git a/app/controllers/api/v1/snapshots_controller.rb b/app/controllers/api/v1/snapshots_controller.rb index 4a3574fac..124f47a93 100644 --- a/app/controllers/api/v1/snapshots_controller.rb +++ b/app/controllers/api/v1/snapshots_controller.rb @@ -1,8 +1,10 @@ # frozen_string_literal: true +require 'action_view' module Api module V1 class SnapshotsController < Api::ApiController + include ActionView::Helpers::NumberHelper before_action :set_case before_action :check_case before_action :set_snapshot, only: [ :show, :destroy ] @@ -20,7 +22,8 @@ def authenticate_api! def index @snapshots = @case.snapshots - @shallow = params[:shallow] || false + @shallow = params[:shallow] == 'true' + @with_docs = false respond_with @snapshots end @@ -37,21 +40,19 @@ def create @snapshot.try = @case.tries.first if @snapshot.save - service = SnapshotManager.new(@snapshot) - - snapshot_docs = params[:snapshot][:docs] - snapshot_queries = params[:snapshot][:queries] + + serialized_data = Marshal.dump(snapshot_params) - service.add_docs snapshot_docs, snapshot_queries if snapshot_docs + puts "[SnapshotController] the size of the serialized data is #{number_to_human_size(serialized_data.bytesize)}" + compressed_data = Zlib::Deflate.deflate(serialized_data) + puts "[SnapshotController] the size of the compressed data is #{number_to_human_size(compressed_data.bytesize)}" + @snapshot.snapshot_file.attach(io: StringIO.new(compressed_data), filename: "snapshot_#{@snapshot.id}.bin.zip", + content_type: 'application/zip') + PopulateSnapshotJob.perform_later @snapshot Analytics::Tracker.track_snapshot_created_event current_user, @snapshot - - # Refetch snapshot because after bulk creating the docs - # the snapshot object is then stale - @snapshot = Snapshot.where(id: @snapshot.id) - .includes([ snapshot_queries: [ :snapshot_docs, { query: [ :ratings ] } ] ]) - .first - + + @with_docs = false # don't show individual docs in the response respond_with @snapshot else render json: @snapshot.errors, status: :bad_request @@ -78,6 +79,13 @@ def set_snapshot def check_snapshot render json: { error: 'Not Found!' }, status: :not_found unless @snapshot end + + def snapshot_params + # avoid StrongParameters ;-( to faciliate sending params as + # hash to ActiveJob via ActiveStorage by directly getting parameters from request + # object + request.parameters + end end end end diff --git a/app/jobs/populate_snapshot_job.rb b/app/jobs/populate_snapshot_job.rb new file mode 100644 index 000000000..5f811730c --- /dev/null +++ b/app/jobs/populate_snapshot_job.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +class PopulateSnapshotJob < ApplicationJob + queue_as :default + sidekiq_options log_level: :warn + + # rubocop:disable Security/MarshalLoad + # rubocop:disable Metrics/MethodLength + # rubocop:disable Metrics/AbcSize + def perform snapshot + # down the road we should be using ActiveRecord-import and first_or_initialize instead. + # See how snapshots are managed. + + compressed_data = snapshot.snapshot_file.download + serialized_data = Zlib::Inflate.inflate(compressed_data) + params = Marshal.load(serialized_data) + + puts "[PopulateSnapshotJob] I am going to populate the snapshot with #{params[:snapshot][:queries].size} queries" + + service = SnapshotManager.new(snapshot) + + snapshot_docs = params[:snapshot][:docs] + snapshot_queries = params[:snapshot][:queries] + + service.add_docs snapshot_docs, snapshot_queries if snapshot_docs + + snapshot.snapshot_file.purge + snapshot.save + + end + # rubocop:enable Security/MarshalLoad + # rubocop:enable Metrics/MethodLength + # rubocop:enable Metrics/AbcSize +end diff --git a/app/models/snapshot.rb b/app/models/snapshot.rb index d4d0af1ca..38460abb0 100644 --- a/app/models/snapshot.rb +++ b/app/models/snapshot.rb @@ -31,6 +31,8 @@ class Snapshot < ApplicationRecord has_many :snapshot_queries, dependent: :destroy has_many :snapshot_docs, through: :snapshot_queries + + has_one_attached :snapshot_file # Validations validates :name, diff --git a/app/views/api/v1/snapshots/_snapshot.json.jbuilder b/app/views/api/v1/snapshots/_snapshot.json.jbuilder index 00f9ff879..f556ca5ac 100644 --- a/app/views/api/v1/snapshots/_snapshot.json.jbuilder +++ b/app/views/api/v1/snapshots/_snapshot.json.jbuilder @@ -5,6 +5,7 @@ shallow ||= false json.id snapshot.id json.name snapshot.name json.time snapshot.created_at +json.has_snapshot_file = snapshot.snapshot_file.present? unless shallow json.scorer snapshot.scorer, partial: 'api/v1/scorers/communal_scorer', as: :scorer @@ -35,7 +36,7 @@ if with_docs end end -unless shallow +if with_docs json.scores do json.array! snapshot.snapshot_queries, partial: 'api/v1/snapshots/snapshot_query', as: :snapshot_query end diff --git a/app/views/api/v1/snapshots/create.json.jbuilder b/app/views/api/v1/snapshots/create.json.jbuilder index 27f74891a..99f2772a8 100644 --- a/app/views/api/v1/snapshots/create.json.jbuilder +++ b/app/views/api/v1/snapshots/create.json.jbuilder @@ -1,3 +1,3 @@ # frozen_string_literal: true -json.partial! 'snapshot', snapshot: @snapshot, with_docs: true +json.partial! 'snapshot', snapshot: @snapshot, with_docs: @with_docs diff --git a/app/views/api/v1/snapshots/show.json.jbuilder b/app/views/api/v1/snapshots/show.json.jbuilder index 603646829..167d05a60 100644 --- a/app/views/api/v1/snapshots/show.json.jbuilder +++ b/app/views/api/v1/snapshots/show.json.jbuilder @@ -1,3 +1,3 @@ # frozen_string_literal: true -json.partial! 'snapshot', snapshot: @snapshot, with_docs: true, shallow: @shallow +json.partial! 'snapshot', snapshot: @snapshot, with_docs: @with_docs, shallow: @shallow From 9bbcc3e63469c82b596b4cb9613140af79b481e5 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 24 Jan 2024 17:48:25 -0500 Subject: [PATCH 03/16] need larger fields! --- app/models/query_doc_pair.rb | 2 +- ...esize_query_d_oc_doc_fields_to_match_snapshot_fields.rb | 7 +++++++ db/schema.rb | 4 ++-- test/fixtures/query_doc_pairs.yml | 2 +- test/models/query_doc_pair_test.rb | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 db/migrate/20240124224447_resize_query_d_oc_doc_fields_to_match_snapshot_fields.rb diff --git a/app/models/query_doc_pair.rb b/app/models/query_doc_pair.rb index fe9dd665e..ae521b471 100644 --- a/app/models/query_doc_pair.rb +++ b/app/models/query_doc_pair.rb @@ -5,7 +5,7 @@ # Table name: query_doc_pairs # # id :bigint not null, primary key -# document_fields :text(65535) +# document_fields :text(16777215) # information_need :string(255) # notes :text(65535) # options :text(65535) diff --git a/db/migrate/20240124224447_resize_query_d_oc_doc_fields_to_match_snapshot_fields.rb b/db/migrate/20240124224447_resize_query_d_oc_doc_fields_to_match_snapshot_fields.rb new file mode 100644 index 000000000..d9d5ef3cc --- /dev/null +++ b/db/migrate/20240124224447_resize_query_d_oc_doc_fields_to_match_snapshot_fields.rb @@ -0,0 +1,7 @@ +class ResizeQueryDOcDocFieldsToMatchSnapshotFields < ActiveRecord::Migration[7.1] + # discovered that we limit this to TEXT, not MEDIUMTEXT, however + # in SnapshotDocs.fields it IS a MEDIUMTEXT field. + def change + change_column :query_doc_pairs, :document_fields, :mediumtext + end +end diff --git a/db/schema.rb b/db/schema.rb index 468ed8979..75628d23d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_01_22_154641) do +ActiveRecord::Schema[7.1].define(version: 2024_01_24_224447) do create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_bin", force: :cascade do |t| t.string "name", null: false t.string "record_type", null: false @@ -167,7 +167,7 @@ create_table "query_doc_pairs", charset: "utf8mb3", collation: "utf8mb3_unicode_ci", force: :cascade do |t| t.string "query_text", limit: 500 t.integer "position" - t.text "document_fields" + t.text "document_fields", size: :medium t.bigint "book_id", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false diff --git a/test/fixtures/query_doc_pairs.yml b/test/fixtures/query_doc_pairs.yml index b1bfa8bd6..59e7a4428 100644 --- a/test/fixtures/query_doc_pairs.yml +++ b/test/fixtures/query_doc_pairs.yml @@ -3,7 +3,7 @@ # Table name: query_doc_pairs # # id :bigint not null, primary key -# document_fields :text(65535) +# document_fields :text(16777215) # information_need :string(255) # notes :text(65535) # options :text(65535) diff --git a/test/models/query_doc_pair_test.rb b/test/models/query_doc_pair_test.rb index f5d62e481..3817c3771 100644 --- a/test/models/query_doc_pair_test.rb +++ b/test/models/query_doc_pair_test.rb @@ -5,7 +5,7 @@ # Table name: query_doc_pairs # # id :bigint not null, primary key -# document_fields :text(65535) +# document_fields :text(16777215) # information_need :string(255) # notes :text(65535) # options :text(65535) From 1554fe80be325d638d9c9e6b483a263f412e5c61 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 24 Jan 2024 17:48:38 -0500 Subject: [PATCH 04/16] simpler way to skip/unskip these tests.. --- test/integration/experiment_with_bulk_insert_test.rb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/integration/experiment_with_bulk_insert_test.rb b/test/integration/experiment_with_bulk_insert_test.rb index 2e61fac40..2fa16745a 100644 --- a/test/integration/experiment_with_bulk_insert_test.rb +++ b/test/integration/experiment_with_bulk_insert_test.rb @@ -8,8 +8,10 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest let(:scorer) { scorers(:quepid_default_scorer) } let(:selection_strategy) { selection_strategies(:multiple_raters) } + @@skip_tests = true + test 'generate and export 5000 queries with traditional AR' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -32,7 +34,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end test 'generate and export 5000 queries with bulk import' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -56,7 +58,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end test 'generate and export 5000 queries with insert_all' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -86,7 +88,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end test 'generate and export 5000 queries with upsert_all' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -117,7 +119,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest # rubocop:disable Layout/LineLength test 'generate and export 5000 queries with upsert_all when exists already data' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? From 37e9135abe26b41a1aeb547c6597a8bfc4a06993 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 24 Jan 2024 18:02:53 -0500 Subject: [PATCH 05/16] lint --- .../api/v1/books/populate_controller.rb | 4 ++-- app/controllers/api/v1/snapshots_controller.rb | 16 ++++++++++------ app/controllers/sessions_controller.rb | 2 +- app/jobs/populate_snapshot_job.rb | 7 +------ app/models/snapshot.rb | 2 +- .../experiment_with_bulk_insert_test.rb | 4 +++- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/app/controllers/api/v1/books/populate_controller.rb b/app/controllers/api/v1/books/populate_controller.rb index 00232e3a8..2c9f00205 100644 --- a/app/controllers/api/v1/books/populate_controller.rb +++ b/app/controllers/api/v1/books/populate_controller.rb @@ -15,9 +15,9 @@ class PopulateController < Api::ApiController # We get a messy set of params in this method, so we don't use the normal # approach of strong parameter validation. We hardcode the only params # we care about. - # + # # With 5000 queries in large case, this takes 108 seconds... - # + # # rubocop:disable Layout/LineLength def update puts "[PopulateController] Request Size is #{number_to_human_size(query_doc_pairs_params.to_s.bytesize)}" diff --git a/app/controllers/api/v1/snapshots_controller.rb b/app/controllers/api/v1/snapshots_controller.rb index 124f47a93..5f51a263b 100644 --- a/app/controllers/api/v1/snapshots_controller.rb +++ b/app/controllers/api/v1/snapshots_controller.rb @@ -22,7 +22,7 @@ def authenticate_api! def index @snapshots = @case.snapshots - @shallow = params[:shallow] == 'true' + @shallow = 'true' == params[:shallow] @with_docs = false respond_with @snapshots @@ -34,24 +34,26 @@ def show end # rubocop:disable Metrics/MethodLength + # rubocop:disable Metrics/AbcSize + # rubocop:disable Layout/LineLength def create @snapshot = @case.snapshots.build(name: params[:snapshot][:name]) @snapshot.scorer = @case.scorer @snapshot.try = @case.tries.first if @snapshot.save - + serialized_data = Marshal.dump(snapshot_params) puts "[SnapshotController] the size of the serialized data is #{number_to_human_size(serialized_data.bytesize)}" compressed_data = Zlib::Deflate.deflate(serialized_data) puts "[SnapshotController] the size of the compressed data is #{number_to_human_size(compressed_data.bytesize)}" @snapshot.snapshot_file.attach(io: StringIO.new(compressed_data), filename: "snapshot_#{@snapshot.id}.bin.zip", - content_type: 'application/zip') - PopulateSnapshotJob.perform_later @snapshot + content_type: 'application/zip') + PopulateSnapshotJob.perform_later @snapshot Analytics::Tracker.track_snapshot_created_event current_user, @snapshot - + @with_docs = false # don't show individual docs in the response respond_with @snapshot else @@ -59,6 +61,8 @@ def create end end # rubocop:enable Metrics/MethodLength + # rubocop:enable Metrics/AbcSize + # rubocop:enable Layout/LineLength def destroy @snapshot.destroy @@ -79,7 +83,7 @@ def set_snapshot def check_snapshot render json: { error: 'Not Found!' }, status: :not_found unless @snapshot end - + def snapshot_params # avoid StrongParameters ;-( to faciliate sending params as # hash to ActiveJob via ActiveStorage by directly getting parameters from request diff --git a/app/controllers/sessions_controller.rb b/app/controllers/sessions_controller.rb index 930ca4e9a..ae6ee7843 100644 --- a/app/controllers/sessions_controller.rb +++ b/app/controllers/sessions_controller.rb @@ -10,7 +10,7 @@ def index def new @user = User.new - + if Rails.env.production? && Rails.application.config.devise.omniauth_providers.include?(:google_oauth2) # Google only lets us oAuth from https sites in production. @flag_not_on_https = false diff --git a/app/jobs/populate_snapshot_job.rb b/app/jobs/populate_snapshot_job.rb index 5f811730c..643b6f54b 100644 --- a/app/jobs/populate_snapshot_job.rb +++ b/app/jobs/populate_snapshot_job.rb @@ -5,8 +5,6 @@ class PopulateSnapshotJob < ApplicationJob sidekiq_options log_level: :warn # rubocop:disable Security/MarshalLoad - # rubocop:disable Metrics/MethodLength - # rubocop:disable Metrics/AbcSize def perform snapshot # down the road we should be using ActiveRecord-import and first_or_initialize instead. # See how snapshots are managed. @@ -23,12 +21,9 @@ def perform snapshot snapshot_queries = params[:snapshot][:queries] service.add_docs snapshot_docs, snapshot_queries if snapshot_docs - + snapshot.snapshot_file.purge snapshot.save - end # rubocop:enable Security/MarshalLoad - # rubocop:enable Metrics/MethodLength - # rubocop:enable Metrics/AbcSize end diff --git a/app/models/snapshot.rb b/app/models/snapshot.rb index 38460abb0..178f30475 100644 --- a/app/models/snapshot.rb +++ b/app/models/snapshot.rb @@ -31,7 +31,7 @@ class Snapshot < ApplicationRecord has_many :snapshot_queries, dependent: :destroy has_many :snapshot_docs, through: :snapshot_queries - + has_one_attached :snapshot_file # Validations diff --git a/test/integration/experiment_with_bulk_insert_test.rb b/test/integration/experiment_with_bulk_insert_test.rb index 2fa16745a..13ae7cd0f 100644 --- a/test/integration/experiment_with_bulk_insert_test.rb +++ b/test/integration/experiment_with_bulk_insert_test.rb @@ -8,8 +8,10 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest let(:scorer) { scorers(:quepid_default_scorer) } let(:selection_strategy) { selection_strategies(:multiple_raters) } + # rubocop:disable Style/ClassVars @@skip_tests = true - + # rubocop:enable Style/ClassVars + test 'generate and export 5000 queries with traditional AR' do skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy From 2752ac2278e97d4755794b65abb5b154113eb314 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 25 Jan 2024 11:49:14 -0500 Subject: [PATCH 06/16] appears to all work... --- .../api/v1/snapshots_controller.rb | 4 +- app/jobs/populate_snapshot_job.rb | 9 +- app/services/snapshot_manager_copy.rb | 260 +++++++++++++++++ .../api/v1/snapshots/_snapshot.json.jbuilder | 2 +- .../api/v1/snapshots_controller_test.rb | 88 ++++-- test/services/snapshot_manager_copy_test.rb | 264 ++++++++++++++++++ 6 files changed, 591 insertions(+), 36 deletions(-) create mode 100644 app/services/snapshot_manager_copy.rb create mode 100644 test/services/snapshot_manager_copy_test.rb diff --git a/app/controllers/api/v1/snapshots_controller.rb b/app/controllers/api/v1/snapshots_controller.rb index 5f51a263b..2da9a6813 100644 --- a/app/controllers/api/v1/snapshots_controller.rb +++ b/app/controllers/api/v1/snapshots_controller.rb @@ -30,6 +30,7 @@ def index def show @shallow = params[:shallow] || false + @with_docs = true respond_with @snapshot end @@ -41,8 +42,9 @@ def create @snapshot.scorer = @case.scorer @snapshot.try = @case.tries.first + puts "Okay, checking snapshot queries: #{@snapshot.snapshot_queries.length}" if @snapshot.save - + puts "Okay2, checking snapshot queries: #{@snapshot.snapshot_queries.length}" serialized_data = Marshal.dump(snapshot_params) puts "[SnapshotController] the size of the serialized data is #{number_to_human_size(serialized_data.bytesize)}" diff --git a/app/jobs/populate_snapshot_job.rb b/app/jobs/populate_snapshot_job.rb index 643b6f54b..6f5187d88 100644 --- a/app/jobs/populate_snapshot_job.rb +++ b/app/jobs/populate_snapshot_job.rb @@ -13,17 +13,16 @@ def perform snapshot serialized_data = Zlib::Inflate.inflate(compressed_data) params = Marshal.load(serialized_data) - puts "[PopulateSnapshotJob] I am going to populate the snapshot with #{params[:snapshot][:queries].size} queries" - - service = SnapshotManager.new(snapshot) + service = SnapshotManagerCopy.new(snapshot) snapshot_docs = params[:snapshot][:docs] snapshot_queries = params[:snapshot][:queries] - service.add_docs snapshot_docs, snapshot_queries if snapshot_docs + service.add_docs snapshot_docs, snapshot_queries + + snapshot.reload # this appears to be required or we duplicate the snapshot_queries! snapshot.snapshot_file.purge - snapshot.save end # rubocop:enable Security/MarshalLoad end diff --git a/app/services/snapshot_manager_copy.rb b/app/services/snapshot_manager_copy.rb new file mode 100644 index 000000000..fc86405db --- /dev/null +++ b/app/services/snapshot_manager_copy.rb @@ -0,0 +1,260 @@ +# frozen_string_literal: true + +# rubocop:disable Metrics/ClassLength +class SnapshotManagerCopy + attr_reader :logger, :options + + def initialize snapshot, opts = {} + default_options = { + format: :csv, + logger: Rails.logger, + show_progress: false, + } + + @options = default_options.merge(opts.deep_symbolize_keys) + @logger = @options[:logger] + @snapshot = snapshot + end + + def show_progress? + options[:show_progress] + end + + # + # Adds docs to a snapshot, assuming snapshot is being created from the + # app and thus all the queries already exist for the case + # (so no need to create them). + # + # @param data, hash + # @return self + # + # Example: + # + # manager = SnapshotManager.new snapshot + # data = { + # 123 => [ + # { id: "doc1", explain: "1" }, + # { id: "doc2", explain: "2" }, + # ], + # 456 => [ + # { id: "doc3", explain: "3" }, + # { id: "doc4", explain: "4" }, + # ] + # } + # manager.add_docs data + # + def add_docs docs, queries + queries_to_import = [] + + keys = docs.nil? ? [] : docs.keys + + # Start by adding queries to snapshot. + # First, setup all queries to be added in an array. + # block_with_progress_bar(keys.length) do |i| + keys.length.times.each do |i| + query_id = keys[i] + + snapshot_query = @snapshot.snapshot_queries.where(query_id: query_id).first_or_initialize + + # Quepid front end can send -- as no score. + queries[query_id]['score'] = nil if '--' == queries[query_id]['score'] + snapshot_query.score = queries[query_id][:score] + snapshot_query.all_rated = queries[query_id][:all_rated] + snapshot_query.number_of_results = queries[query_id][:number_of_results] + + queries_to_import << snapshot_query + end + + # Second, mass insert queries. + SnapshotQuery.import queries_to_import + # End of queries import. + + # Then import docs for the queries that were just created. + # This method is shared with the `import_queries` method + # which does the same thing with a slightly different set of data. + import_docs keys, docs + + self + end + + # + # Imports queries and docs to a snapshot. + # If the query does not already exists, it adds it to the case first, + # then it adds it to the snapshot. + # + # @param queries, hash + # @return self + # + # Example: + # + # manager = SnapshotManager.new snapshot + # data = { + # "dog" => { + # docs: [ + # { id: "doc1", explain: "1", position: 1 }, + # { id: "doc2", explain: "2", position: 2 }, + # ] + # }, + # "cat" => { + # docs: [ + # { id: "doc3", explain: "3", position: 2 }, + # { id: "doc4", explain: "4", position: 1 }, + # ] + # } + # } + # manager.import_queries data + # + # rubocop:disable Metrics/MethodLength + def import_queries queries + queries_to_import = [] + keys = queries.keys + + # Fetch all queries for the snapshot's case where the query text + # matches the keys in the hash supplied in the params. + queries_params = { + query_text: keys, + case_id: @snapshot.case_id, + } + indexed_queries = Query.where(queries_params) + .all + .index_by(&:query_text) + + # Start by adding queries to snapshot. + # First, setup all queries to be added in an array. + # print_step 'Importing queries' + # block_with_progress_bar(keys.length) do |i| + keys.length.times.each do |i| + query_text = keys[i] + query = fetch_or_create_query indexed_queries, query_text + + snapshot_query = @snapshot.snapshot_queries.where(query_id: query.id).first_or_initialize + + queries[query.id] = queries.delete(keys[i]) + + queries_to_import << snapshot_query + end + + # Second, mass insert queries. + SnapshotQuery.import queries_to_import + # End of queries import. + + # Updates keys after we switched them out from the text to the id + keys = queries.keys + data = {} + queries.each { |key, q| data[key] = q[:docs] || q['docs'] } + + # Then import docs for the queries that were just created. + import_docs keys, data + + self + end + # rubocop:enable Metrics/MethodLength + + def csv_to_queries_hash docs + # print_step 'Transforming csv into a queries hash' + + query_docs = {} + # block_with_progress_bar(docs.length) do |i| + docs.length.times.each do |i| + row = extract_doc_info docs[i] + query_docs[row[:query_text]] ||= { docs: [] } + query_docs[row[:query_text]][:docs] << row + end + + query_docs + end + + private + + # rubocop:disable Metrics/MethodLength + def setup_docs_for_query query, docs + results = [] + + return results if docs.blank? + return results if query.blank? + + docs = normalize_docs_array docs + docs = docs.sort { |d1, d2| d1[:position].to_i <=> d2[:position].to_i } + + docs.each_with_index do |doc, index| + doc_params = { + doc_id: doc[:id], + explain: doc[:explain], + position: doc[:position] || (index + 1), + rated_only: doc[:rated_only] || false, + fields: doc[:fields].blank? ? nil : doc[:fields].to_json, + } + + results << query.snapshot_docs.build(doc_params) + end + + results + end + # rubocop:enable Metrics/MethodLength + + def extract_doc_info row + case @options[:format] + when :csv + { + query_text: row[0], + id: row[1], + position: row[2], + } + when :hash + row.deep_symbolize_keys + else + row + end + end + + def normalize_docs_array docs + return [] if docs.blank? + + result = docs.map do |each| + each = each.to_unsafe_h if each.is_a?(ActionController::Parameters) + each = each.to_hash if each.is_a?(ActiveSupport::HashWithIndifferentAccess) + + each.symbolize_keys! if each.present? + end.compact + + result + end + + def import_docs keys, data + docs_to_import = [] + + indexed_snap_queries = @snapshot.snapshot_queries + .where(query_id: keys) + .all + .index_by { |q| q.query_id.to_s } + + # print_step 'Importing docs' + # block_with_progress_bar(keys.length) do |i| + keys.length.times.each do |i| + query_id = keys[i] + docs = data[keys[i]] + + snapshot_query = indexed_snap_queries[query_id.to_s] + query_docs = setup_docs_for_query snapshot_query, docs + + docs_to_import += query_docs + end + + SnapshotDoc.import docs_to_import + + self + end + + def fetch_or_create_query indexed_queries, query_text + if indexed_queries[query_text].present? + indexed_queries[query_text] + else + query_params = { + query_text: query_text, + case_id: @snapshot.case_id, + } + Query.create(query_params) + end + end +end +# rubocop:enable Metrics/ClassLength diff --git a/app/views/api/v1/snapshots/_snapshot.json.jbuilder b/app/views/api/v1/snapshots/_snapshot.json.jbuilder index f556ca5ac..0c261afc3 100644 --- a/app/views/api/v1/snapshots/_snapshot.json.jbuilder +++ b/app/views/api/v1/snapshots/_snapshot.json.jbuilder @@ -5,7 +5,7 @@ shallow ||= false json.id snapshot.id json.name snapshot.name json.time snapshot.created_at -json.has_snapshot_file = snapshot.snapshot_file.present? +json.has_snapshot_file snapshot.snapshot_file.present? unless shallow json.scorer snapshot.scorer, partial: 'api/v1/scorers/communal_scorer', as: :scorer diff --git a/test/controllers/api/v1/snapshots_controller_test.rb b/test/controllers/api/v1/snapshots_controller_test.rb index ff9d07150..84f21d5bb 100644 --- a/test/controllers/api/v1/snapshots_controller_test.rb +++ b/test/controllers/api/v1/snapshots_controller_test.rb @@ -24,12 +24,12 @@ class SnapshotsControllerTest < ActionController::TestCase name: 'New Snapshot', docs: { first_query.id => [ - { id: 'doc1', explain: 1 }, - { id: 'doc2', explain: 2 } + { id: 'doc1', explain: '1' }, + { id: 'doc2', explain: '2' } ], second_query.id => [ - { id: 'doc3', explain: 3 }, - { id: 'doc4', explain: 4 } + { id: 'doc3', explain: '3' }, + { id: 'doc4', explain: '4' } ], }, queries: { @@ -49,27 +49,37 @@ class SnapshotsControllerTest < ActionController::TestCase post :create, params: data.merge(case_id: acase.id) assert_response :ok - snapshot = response.parsed_body + snapshot_response = response.parsed_body - assert_not_nil snapshot['time'] + assert_not_nil snapshot_response['scorer'] + assert_not_nil snapshot_response['try'] + assert_not_nil snapshot_response['time'] + assert_not_nil snapshot_response['id'] + assert_equal snapshot_response['name'], data[:snapshot][:name] - assert_equal snapshot['name'], data[:snapshot][:name] - assert_equal snapshot['docs'].length, data[:snapshot][:docs].length + perform_enqueued_jobs - data_doc = data[:snapshot][:docs][first_query.id][0] - response_doc = snapshot['docs'][first_query.id.to_s][0] + snapshot = Snapshot.find(snapshot_response['id']) - assert_equal data_doc[:id], response_doc['id'] - assert_equal data_doc[:explain], response_doc['explain'] + assert_equal snapshot.snapshot_docs.length, data[:snapshot][:docs].values.flatten.count + data_doc = data[:snapshot][:docs][first_query.id][0] + snapshot_query = snapshot.snapshot_queries.where(query_id: first_query.id).first + + response_doc = snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + + snapshot_query = snapshot.snapshot_queries.where(query: second_query).first data_doc = data[:snapshot][:docs][second_query.id][0] - response_doc = snapshot['docs'][second_query.id.to_s][0] + response_doc = snapshot_query.snapshot_docs[0] - assert_equal data_doc[:id], response_doc['id'] - assert_equal data_doc[:explain], response_doc['explain'] + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain - assert_not_nil snapshot['scorer'] - assert_not_nil snapshot['try'] + assert_not_nil snapshot.scorer + assert_not_nil snapshot.try end end @@ -79,8 +89,8 @@ class SnapshotsControllerTest < ActionController::TestCase name: 'New Snapshot', docs: { first_query.id => [ - { id: 'doc1', explain: 1 }, - { id: 'doc2', explain: 2 } + { id: 'doc1', explain: '1' }, + { id: 'doc2', explain: '2' } ], # in Rails 4, we could do second_query.id => [] and getting the second_query in, # but in Rails 5, the second_query doesn't show up because the array that is empty @@ -105,20 +115,39 @@ class SnapshotsControllerTest < ActionController::TestCase assert_response :ok - snapshot = response.parsed_body + snapshot_response = response.parsed_body + + assert_equal data[:snapshot][:name], snapshot_response['name'] + assert_not_nil snapshot_response['id'] + + snapshot = Snapshot.find(snapshot_response['id']) + assert snapshot.snapshot_file.present? + assert_equal snapshot.snapshot_queries.length, 0 + + perform_enqueued_jobs - assert_equal data[:snapshot][:name], snapshot['name'] - assert_equal data[:snapshot][:docs].length, snapshot['docs'].length + snapshot = Snapshot.find(snapshot_response['id']) + assert_not snapshot.snapshot_file.present? + assert_equal snapshot.snapshot_queries.length, 2 + assert_equal data[:snapshot][:docs].length, snapshot.snapshot_docs.length - data_doc = data[:snapshot][:docs][first_query.id][0] - response_doc = snapshot['docs'][first_query.id.to_s][0] + data_doc = data[:snapshot][:docs][first_query.id][0] - assert_equal data_doc[:id], response_doc['id'] - assert_equal data_doc[:explain], response_doc['explain'] + snapshot_query = snapshot.snapshot_queries.where(query_id: first_query.id).first - response_docs = snapshot['docs'][second_query.id.to_s] + response_doc = snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + + snapshot_query = snapshot.snapshot_queries.where(query_id: second_query.id).first + response_docs = snapshot_query.snapshot_docs assert_empty response_docs + + perform_enqueued_jobs + + assert_equal snapshot.snapshot_queries.length, 2 end end @@ -216,8 +245,9 @@ class SnapshotsControllerTest < ActionController::TestCase let(:data) do { snapshot: { - name: 'New Snapshot', - docs: {}, + name: 'New Snapshot', + docs: {}, + queries: {}, }, } end diff --git a/test/services/snapshot_manager_copy_test.rb b/test/services/snapshot_manager_copy_test.rb new file mode 100644 index 000000000..84f649b14 --- /dev/null +++ b/test/services/snapshot_manager_copy_test.rb @@ -0,0 +1,264 @@ +# frozen_string_literal: true + +require 'test_helper' + +class SnapshotManagerCopyTest < ActiveSupport::TestCase + let(:snapshot) { snapshots(:empty_snapshot) } + let(:first_query) { queries(:a_query) } + let(:second_query) { queries(:b_query) } + + let(:service) { SnapshotManagerCopy.new(snapshot) } + + describe 'Adds docs to a snapshot' do + test 'adds snapshot to case' do + sample_fields = { title: 'some title', 'thumb:product_image': 'http://example.com/image.png' } + + data = { + docs: { + first_query.id => [ + { id: 'doc1', explain: '1', fields: sample_fields }, + { id: 'doc2', explain: '2', fields: sample_fields } + ], + second_query.id => [ + { id: 'doc3', explain: '3', fields: sample_fields }, + { id: 'doc4', explain: '4', fields: sample_fields } + ], + }, + queries: { + first_query.id => { + score: 0.87, + all_rated: true, + number_of_results: 42, + }, + second_query.id => { + score: 0.45, + all_rated: false, + number_of_results: nil, + }, + }, + } + + assert_difference 'snapshot.snapshot_queries.count', 2 do + service.add_docs data[:docs], data[:queries] + + # This is needed or else we get wrong numbers + snapshot.reload + snapshot_queries = snapshot.snapshot_queries + + assert_equal snapshot_queries.length, data[:docs].length + + first_snapshot_query = snapshot_queries.where(query_id: first_query.id).first + second_snapshot_query = snapshot_queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_equal first_snapshot_query.query_id, first_query.id + assert_equal first_snapshot_query.score, 0.87 + assert_equal true, first_snapshot_query.all_rated + assert_equal 42, first_snapshot_query.number_of_results + + assert_not_nil second_snapshot_query + assert_equal second_snapshot_query.query_id, second_query.id + assert_equal second_snapshot_query.score, 0.45 + assert_equal false, second_snapshot_query.all_rated + assert_nil second_snapshot_query.number_of_results + + data_doc = data[:docs][first_query.id][0] + response_doc = first_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal 1, response_doc.position + assert_equal sample_fields.to_json, response_doc.fields + + data_doc = data[:docs][second_query.id][0] + response_doc = second_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal 1, response_doc.position + assert_equal sample_fields.to_json, response_doc.fields + end + end + end + + describe 'Import queries' do + test 'creates queries if they do not already exist' do + data = { + 'dog' => { + docs: [ + { id: 'doc1', explain: '1', position: 1 }, + { id: 'doc2', explain: '2', position: 2 } + ], + }, + 'cat' => { + docs: [ + { id: 'doc3', explain: '3', position: 2 }, + { id: 'doc4', explain: '4', position: 1 } + ], + }, + } + + assert_difference 'Query.count', 2 do + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_query = Query.where(query_text: 'dog', case_id: snapshot.case_id).first + second_query = Query.where(query_text: 'cat', case_id: snapshot.case_id).first + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_equal first_snapshot_query.query_id, first_query.id + + assert_not_nil second_snapshot_query + assert_equal second_snapshot_query.query_id, second_query.id + + data_doc = data[first_query.id][:docs][0] + response_doc = first_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 1, response_doc.position + + data_doc = data[second_query.id][:docs][0] + response_doc = second_snapshot_query.snapshot_docs[1] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 2, response_doc.position + end + end + + test 'does not create queries if they already exist' do + data = { + first_query.query_text => { + docs: [ + { id: 'doc1', explain: '1', position: 1 }, + { id: 'doc2', explain: '2', position: 2 } + ], + }, + second_query.query_text => { + docs: [ + { id: 'doc3', explain: '3', position: 2 }, + { id: 'doc4', explain: '4', position: 1 } + ], + }, + } + + assert_no_difference 'Query.count' do + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_equal first_snapshot_query.query_id, first_query.id + + assert_not_nil second_snapshot_query + assert_equal second_snapshot_query.query_id, second_query.id + + data_doc = data[first_query.id][:docs][0] + response_doc = first_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 1, response_doc.position + + data_doc = data[second_query.id][:docs][0] + response_doc = second_snapshot_query.snapshot_docs[1] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 2, response_doc.position + end + end + + test 'handles query data with empty docs' do + data = { + first_query.query_text => { + docs: [], + }, + second_query.query_text => { + docs: [], + }, + } + + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_not_nil second_snapshot_query + + data_doc = data[first_query.id][:docs] + response_doc = first_snapshot_query.snapshot_docs + + assert_equal data_doc.length, response_doc.length + + data_doc = data[second_query.id][:docs] + response_doc = second_snapshot_query.snapshot_docs + + assert_equal data_doc.length, response_doc.length + end + + test 'handles query data with array of nil docs' do + data = { + first_query.query_text => { + docs: [ nil, nil ], + }, + second_query.query_text => { + docs: [ + { id: 'doc3', explain: '3', position: 2 }, + { id: 'doc4', explain: '4', position: 1 } + ], + }, + } + + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_not_nil second_snapshot_query + + response_doc = first_snapshot_query.snapshot_docs + + assert_equal 0, response_doc.length # coz they were all nils + + data_doc = data[second_query.id][:docs] + response_doc = second_snapshot_query.snapshot_docs + + assert_equal data_doc.length, response_doc.length + end + end +end From da5597e7f93846339cc0cdf83d638a427dda9f61 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Fri, 26 Jan 2024 11:42:19 -0500 Subject: [PATCH 07/16] lots of perf fixes and other changes! --- .../javascripts/components/diff/_modal.html | 4 +++ .../diff/diff_modal_instance_controller.js | 28 +++++++++++++++++-- .../components/export_case/_modal.html | 4 +++ .../export_case_modal_instance_controller.js | 21 ++++++++++++++ .../javascripts/controllers/mainCtrl.js | 2 +- .../javascripts/services/querySnapshotSvc.js | 19 +++++++++---- .../javascripts/services/snapshotFactory.js | 1 + app/controllers/api/v1/books_controller.rb | 11 +++++++- .../api/v1/snapshots_controller.rb | 16 +++++------ app/controllers/books_controller.rb | 9 ++++-- app/models/book.rb | 9 +++++- app/models/snapshot.rb | 2 +- app/models/snapshot_query.rb | 2 +- app/views/home/_case_summary.html.erb | 2 +- .../_moar_judgements_needed.html.erb | 5 ++-- 15 files changed, 109 insertions(+), 26 deletions(-) diff --git a/app/assets/javascripts/components/diff/_modal.html b/app/assets/javascripts/components/diff/_modal.html index 444c30245..d3da36859 100644 --- a/app/assets/javascripts/components/diff/_modal.html +++ b/app/assets/javascripts/components/diff/_modal.html @@ -34,6 +34,10 @@ Snapshot has the id {{ ctrl.selection }} + +
diff --git a/app/assets/javascripts/components/diff/diff_modal_instance_controller.js b/app/assets/javascripts/components/diff/diff_modal_instance_controller.js index ad964fd0a..8d7aa232e 100644 --- a/app/assets/javascripts/components/diff/diff_modal_instance_controller.js +++ b/app/assets/javascripts/components/diff/diff_modal_instance_controller.js @@ -18,8 +18,11 @@ angular.module('QuepidApp') ) { var ctrl = this; - // Attributes - ctrl.snapshots = querySnapshotSvc.snapshots; + querySnapshotSvc.getSnapshots().then(function() { + ctrl.snapshots = querySnapshotSvc.snapshots; + } + ); + ctrl.which = 'snapshot'; ctrl.selection = initialSelection; ctrl.inProgress = false; @@ -32,6 +35,8 @@ angular.module('QuepidApp') ctrl.nothingSelected = nothingSelected; ctrl.ok = ok; ctrl.toggleDel = toggleDel; + ctrl.isProcessingFile = isProcessingFile; + // Watches $scope.$watch('ctrl.selection', function(newVal, oldVal) { @@ -72,6 +77,25 @@ angular.module('QuepidApp') flash.success = 'Snapshot deleted successfully.'; }); } + + function isProcessingFile() { + if (ctrl.snapshots){ + var desiredSnapshot = null; + angular.forEach(ctrl.snapshots, function(snapshot) { + if (snapshot.id === ctrl.selection) { + desiredSnapshot = snapshot; + return; // exit the loop early + } + }); + if (desiredSnapshot){ + return desiredSnapshot.hasSnapshotFile; + } + else { + return false; + } + } + return false; + } function isNumber(num) { return !isNaN(parseInt('' + num, 10)); diff --git a/app/assets/javascripts/components/export_case/_modal.html b/app/assets/javascripts/components/export_case/_modal.html index cd21e263e..92e511bca 100644 --- a/app/assets/javascripts/components/export_case/_modal.html +++ b/app/assets/javascripts/components/export_case/_modal.html @@ -50,6 +50,10 @@

diff --git a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js index e422efe6d..7bafab114 100644 --- a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js +++ b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js @@ -12,6 +12,7 @@ angular.module('QuepidApp') ctrl.theCase = theCase; ctrl.supportsDetailedExport = supportsDetailedExport; + ctrl.isProcessingFile = isProcessingFile; // If called from the cases listing page, then we need the call back with the bootstrap, // otherwise on the main page the querySnapshotSvc.snapshots was bootstrapped. @@ -47,5 +48,25 @@ angular.module('QuepidApp') ctrl.cancel = function () { $uibModalInstance.dismiss('cancel'); }; + + + function isProcessingFile() { + if (ctrl.options.snapshot){ + var desiredSnapshot = null; + angular.forEach(ctrl.snapshots, function(snapshot) { + if (snapshot.id === ctrl.selection) { + desiredSnapshot = snapshot; + return; // exit the loop early + } + }); + if (desiredSnapshot){ + return desiredSnapshot.hasSnapshotFile; + } + else { + return false; + } + } + return false; + } } ]); diff --git a/app/assets/javascripts/controllers/mainCtrl.js b/app/assets/javascripts/controllers/mainCtrl.js index 8e16b7fd0..85c16c2ca 100644 --- a/app/assets/javascripts/controllers/mainCtrl.js +++ b/app/assets/javascripts/controllers/mainCtrl.js @@ -147,7 +147,7 @@ angular.module('QuepidApp') bootstrapCase() .then(function() { loadQueries(); - loadSnapshots(); + loadSnapshots(); // this is here just to set the caseNo in the querySnapshotSvc. updateCaseMetadata(); paneSvc.refreshElements(); }).catch(function(error) { diff --git a/app/assets/javascripts/services/querySnapshotSvc.js b/app/assets/javascripts/services/querySnapshotSvc.js index ccff45a0f..dc2d13690 100644 --- a/app/assets/javascripts/services/querySnapshotSvc.js +++ b/app/assets/javascripts/services/querySnapshotSvc.js @@ -64,6 +64,20 @@ angular.module('QuepidApp') }); }); }; + + // Now that we process snapshots async, we + // don't want to cache the data + this.getSnapshots = function() { + this.snapshots = {}; + + return $http.get('api/cases/' + caseNo + '/snapshots?shallow=true') + .then(function(response) { + return addSnapshotResp(response.data.snapshots) + .then(function() { + version++; + }); + }); + }; this.addSnapshot = function(name, recordDocumentFields, queries) { // we may want to refactor the payload structure in the future. @@ -130,11 +144,6 @@ angular.module('QuepidApp') .then(function() { version++; }); - // return addSnapshotResp([response.data]) - // .then(function() { - // version++; - // }); - //}); }; this.deleteSnapshot = function(snapshotId) { diff --git a/app/assets/javascripts/services/snapshotFactory.js b/app/assets/javascripts/services/snapshotFactory.js index 2584c5208..96672cd2e 100644 --- a/app/assets/javascripts/services/snapshotFactory.js +++ b/app/assets/javascripts/services/snapshotFactory.js @@ -18,6 +18,7 @@ self.id = params.id; self.name = snapshotName; self.time = params.time; + self.hasSnapshotFile = params.has_snapshot_file; self.docs = params.docs; self.queries = params.queries; diff --git a/app/controllers/api/v1/books_controller.rb b/app/controllers/api/v1/books_controller.rb index 0cc777285..5877de607 100644 --- a/app/controllers/api/v1/books_controller.rb +++ b/app/controllers/api/v1/books_controller.rb @@ -4,6 +4,7 @@ module Api module V1 + # rubocop:disable Metrics/ClassLength class BooksController < Api::ApiController before_action :set_book, only: [ :show, :update, :destroy ] before_action :check_book, only: [ :show, :update, :destroy ] @@ -28,6 +29,7 @@ def index # rubocop:disable Metrics/AbcSize # rubocop:disable Metrics/CyclomaticComplexity # rubocop:disable Metrics/PerceivedComplexity + # rubocop:disable Metrics/BlockLength api :GET, '/api/books/:book_id', 'Show the book with the given ID.' param :id, :number, @@ -40,7 +42,11 @@ def show csv_headers = %w[query docid] # Only return rateable judgements, filter out the unrateable ones. - unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq + # unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq + unique_raters = @book.judges.merge(Judgement.rateable) + + puts 'HERE COME THE RATERS' + pp unique_raters # this logic about using email versus name is kind of awful. Think about user.full_name or user.identifier? unique_raters.each do |rater| @@ -72,6 +78,8 @@ def show # rubocop:enable Metrics/AbcSize # rubocop:enable Metrics/CyclomaticComplexity # rubocop:enable Metrics/PerceivedComplexity + # rubocop:enable Metrics/BlockLength + api :POST, '/api/books', 'Create a new book.' param_group :book def create @@ -135,5 +143,6 @@ def make_csv_safe str end end end + # rubocop:enable Metrics/ClassLength end end diff --git a/app/controllers/api/v1/snapshots_controller.rb b/app/controllers/api/v1/snapshots_controller.rb index 2da9a6813..714e2a95b 100644 --- a/app/controllers/api/v1/snapshots_controller.rb +++ b/app/controllers/api/v1/snapshots_controller.rb @@ -34,22 +34,18 @@ def show respond_with @snapshot end - # rubocop:disable Metrics/MethodLength - # rubocop:disable Metrics/AbcSize # rubocop:disable Layout/LineLength def create @snapshot = @case.snapshots.build(name: params[:snapshot][:name]) @snapshot.scorer = @case.scorer @snapshot.try = @case.tries.first - puts "Okay, checking snapshot queries: #{@snapshot.snapshot_queries.length}" if @snapshot.save - puts "Okay2, checking snapshot queries: #{@snapshot.snapshot_queries.length}" serialized_data = Marshal.dump(snapshot_params) - puts "[SnapshotController] the size of the serialized data is #{number_to_human_size(serialized_data.bytesize)}" + # puts "[SnapshotController] the size of the serialized data is #{number_to_human_size(serialized_data.bytesize)}" compressed_data = Zlib::Deflate.deflate(serialized_data) - puts "[SnapshotController] the size of the compressed data is #{number_to_human_size(compressed_data.bytesize)}" + # puts "[SnapshotController] the size of the compressed data is #{number_to_human_size(compressed_data.bytesize)}" @snapshot.snapshot_file.attach(io: StringIO.new(compressed_data), filename: "snapshot_#{@snapshot.id}.bin.zip", content_type: 'application/zip') PopulateSnapshotJob.perform_later @snapshot @@ -62,11 +58,14 @@ def create render json: @snapshot.errors, status: :bad_request end end - # rubocop:enable Metrics/MethodLength - # rubocop:enable Metrics/AbcSize # rubocop:enable Layout/LineLength def destroy + # SnapshotDoc.joins(snapshot_query: :snapshot) + # .where(snapshot_queries: { snapshot: @snapshot }) + # .delete_all + @snapshot.snapshot_docs.delete_all + @snapshot.snapshot_queries.delete_all @snapshot.destroy Analytics::Tracker.track_snapshot_deleted_event current_user, @snapshot @@ -78,7 +77,6 @@ def destroy def set_snapshot @snapshot = @case.snapshots .where(id: params[:id]) - .includes([ snapshot_queries: [ :snapshot_docs ] ]) .first end diff --git a/app/controllers/books_controller.rb b/app/controllers/books_controller.rb index 5651d6db5..ad4fa0284 100644 --- a/app/controllers/books_controller.rb +++ b/app/controllers/books_controller.rb @@ -28,11 +28,16 @@ def show @book.cases.each do |kase| @count_of_anonymous_case_judgements += kase.ratings.where(user: nil).count end + + @moar_judgements_needed = @book.judgements.where(user: current_user).count < @book.query_doc_pairs.count @cases = @book.cases @leaderboard_data = [] @stats_data = [] - unique_judges = @book.judgements.preload(:user).collect(&:user).uniq - unique_judges.each do |judge| + + unique_judge_ids = @book.query_doc_pairs.joins(:judgements) + .distinct.pluck(:user_id) + unique_judge_ids.each do |judge_id| + judge = User.find(judge_id) unless judge_id.nil? @leaderboard_data << { judge: judge.nil? ? 'anonymous' : judge.name, judgements: @book.judgements.where(user: judge).count } @stats_data << { diff --git a/app/models/book.rb b/app/models/book.rb index 4832066ad..55934325d 100644 --- a/app/models/book.rb +++ b/app/models/book.rb @@ -35,10 +35,17 @@ class Book < ApplicationRecord belongs_to :selection_strategy belongs_to :scorer has_many :query_doc_pairs, dependent: :destroy, autosave: true - has_many :judgements, -> { order('query_doc_pair_id') }, + has_many :ordered_judgements, -> { order('query_doc_pair_id') }, + through: :query_doc_pairs, + dependent: :destroy, + class_name: 'Judgement' + + has_many :judgements, through: :query_doc_pairs, dependent: :destroy + # has_many :judges, -> { distinct }, through: :judgements, class_name: 'User', source: :user + has_many :cases, dependent: :nullify has_many :rated_query_doc_pairs, -> { has_judgements }, diff --git a/app/models/snapshot.rb b/app/models/snapshot.rb index 178f30475..44179f7bc 100644 --- a/app/models/snapshot.rb +++ b/app/models/snapshot.rb @@ -28,7 +28,7 @@ class Snapshot < ApplicationRecord belongs_to :case, optional: true # shouldn't be optional! belongs_to :try, optional: true # shouldn't be optional! belongs_to :scorer, optional: true # shouldn't be optional! - has_many :snapshot_queries, dependent: :destroy + has_many :snapshot_queries, dependent: :delete_all has_many :snapshot_docs, through: :snapshot_queries diff --git a/app/models/snapshot_query.rb b/app/models/snapshot_query.rb index 06b29ec0e..d28d2e0bd 100644 --- a/app/models/snapshot_query.rb +++ b/app/models/snapshot_query.rb @@ -26,6 +26,6 @@ class SnapshotQuery < ApplicationRecord belongs_to :snapshot, optional: true # shouldn't be belongs_to :query, optional: true # shouldn't be has_many :snapshot_docs, -> { order(position: :asc) }, - dependent: :destroy, + dependent: :delete_all, inverse_of: :snapshot_query end diff --git a/app/views/home/_case_summary.html.erb b/app/views/home/_case_summary.html.erb index d49b0d96a..12d387879 100644 --- a/app/views/home/_case_summary.html.erb +++ b/app/views/home/_case_summary.html.erb @@ -5,7 +5,7 @@
-
<%= kase.last_score.score unless kase.scores.empty? %> <%= kase.scorer.name %>
+
<%= number_with_precision(kase.last_score.score, precision: 2) unless kase.scores.empty? %> <%= kase.scorer.name %>

<%= kase.created_at.to_date.to_fs(:short) %> - <%= kase.last_score.updated_at.to_date.to_fs(:short) unless kase.scores.empty?%>

<% diff --git a/app/views/judgements/_moar_judgements_needed.html.erb b/app/views/judgements/_moar_judgements_needed.html.erb index 9b9d7f8d6..4135272dd 100644 --- a/app/views/judgements/_moar_judgements_needed.html.erb +++ b/app/views/judgements/_moar_judgements_needed.html.erb @@ -1,4 +1,5 @@ -<% if SelectionStrategy.random_query_doc_based_on_strategy(book, current_user) %> +hi: <%= @moar_judgements_needed %> +<% if @moar_judgements_needed %> @@ -35,7 +36,7 @@ <% else %> <% end %> <% end %> From 366678a26243681c82d77c9ce2777968f66e5e37 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 30 Jan 2024 14:43:25 -0500 Subject: [PATCH 08/16] more fixes --- .../export_case_modal_instance_controller.js | 5 ++-- .../javascripts/services/querySnapshotSvc.js | 7 ++++-- app/controllers/api/v1/books_controller.rb | 25 +++++++++++-------- .../api/v1/snapshots_controller.rb | 7 +++--- app/controllers/judgements_controller.rb | 2 +- app/models/book.rb | 4 --- .../angular/services/querySnapshotSvc_spec.js | 2 +- 7 files changed, 27 insertions(+), 25 deletions(-) diff --git a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js index 7bafab114..0bee2dcf3 100644 --- a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js +++ b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js @@ -12,7 +12,6 @@ angular.module('QuepidApp') ctrl.theCase = theCase; ctrl.supportsDetailedExport = supportsDetailedExport; - ctrl.isProcessingFile = isProcessingFile; // If called from the cases listing page, then we need the call back with the bootstrap, // otherwise on the main page the querySnapshotSvc.snapshots was bootstrapped. @@ -50,7 +49,7 @@ angular.module('QuepidApp') }; - function isProcessingFile() { + ctrl.isProcessingFile = function () { if (ctrl.options.snapshot){ var desiredSnapshot = null; angular.forEach(ctrl.snapshots, function(snapshot) { @@ -67,6 +66,6 @@ angular.module('QuepidApp') } } return false; - } + }; } ]); diff --git a/app/assets/javascripts/services/querySnapshotSvc.js b/app/assets/javascripts/services/querySnapshotSvc.js index dc2d13690..6875a6d0a 100644 --- a/app/assets/javascripts/services/querySnapshotSvc.js +++ b/app/assets/javascripts/services/querySnapshotSvc.js @@ -141,8 +141,11 @@ angular.module('QuepidApp') }; return $http.post('api/cases/' + caseNo + '/snapshots', saved) - .then(function() { - version++; + .then(function(response) { + return addSnapshotResp([response.data]) + .then(function() { + version++; + }); }); }; diff --git a/app/controllers/api/v1/books_controller.rb b/app/controllers/api/v1/books_controller.rb index 5877de607..3c65c9d2b 100644 --- a/app/controllers/api/v1/books_controller.rb +++ b/app/controllers/api/v1/books_controller.rb @@ -43,25 +43,30 @@ def show # Only return rateable judgements, filter out the unrateable ones. # unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq - unique_raters = @book.judges.merge(Judgement.rateable) + #unique_raters = @book.judges.merge(Judgement.rateable) + unique_judge_ids = @book.query_doc_pairs.joins(:judgements) + .distinct.pluck(:user_id) + - puts 'HERE COME THE RATERS' - pp unique_raters # this logic about using email versus name is kind of awful. Think about user.full_name or user.identifier? - unique_raters.each do |rater| - csv_headers << make_csv_safe(if rater.nil? - 'Unknown' + unique_judges = [] + unique_judge_ids.each do |judge_id| + judge = User.find(judge_id) unless judge_id.nil? + unique_judges << judge + csv_headers << make_csv_safe(if judge.nil? + 'anonymous' else - rater.name.presence || rater.email + judge.name.presence || judge.email end) end @csv_array << csv_headers - @book.query_doc_pairs.each do |qdp| + query_doc_pairs = @book.query_doc_pairs.include(:judgements) + query_doc_pairs.each do |qdp| row = [ make_csv_safe(qdp.query_text), qdp.doc_id ] - unique_raters.each do |rater| - judgement = qdp.judgements.detect { |j| j.user == rater } + unique_judges.each do |judge| + judgement = qdp.judgements.detect { |j| j.user == judge } rating = judgement.nil? ? '' : judgement.rating row.append rating diff --git a/app/controllers/api/v1/snapshots_controller.rb b/app/controllers/api/v1/snapshots_controller.rb index 714e2a95b..30c81179c 100644 --- a/app/controllers/api/v1/snapshots_controller.rb +++ b/app/controllers/api/v1/snapshots_controller.rb @@ -61,10 +61,9 @@ def create # rubocop:enable Layout/LineLength def destroy - # SnapshotDoc.joins(snapshot_query: :snapshot) - # .where(snapshot_queries: { snapshot: @snapshot }) - # .delete_all - @snapshot.snapshot_docs.delete_all + SnapshotDoc.joins(snapshot_query: :snapshot) + .where(snapshot_queries: { snapshot: @snapshot }) + .delete_all @snapshot.snapshot_queries.delete_all @snapshot.destroy Analytics::Tracker.track_snapshot_deleted_event current_user, @snapshot diff --git a/app/controllers/judgements_controller.rb b/app/controllers/judgements_controller.rb index 80d13467b..8fa8bb5f1 100644 --- a/app/controllers/judgements_controller.rb +++ b/app/controllers/judgements_controller.rb @@ -8,7 +8,7 @@ def index bool = ActiveRecord::Type::Boolean.new @shallow = bool.deserialize(params[:shallow] || true ) - @judgements = @book.judgements.includes([ :query_doc_pair, :user ]) + @judgements = @book.judgements.includes([ :query_doc_pair, :user ]).order('query_doc_pair_id') end def show diff --git a/app/models/book.rb b/app/models/book.rb index 55934325d..c74c1c1cd 100644 --- a/app/models/book.rb +++ b/app/models/book.rb @@ -35,10 +35,6 @@ class Book < ApplicationRecord belongs_to :selection_strategy belongs_to :scorer has_many :query_doc_pairs, dependent: :destroy, autosave: true - has_many :ordered_judgements, -> { order('query_doc_pair_id') }, - through: :query_doc_pairs, - dependent: :destroy, - class_name: 'Judgement' has_many :judgements, through: :query_doc_pairs, diff --git a/spec/javascripts/angular/services/querySnapshotSvc_spec.js b/spec/javascripts/angular/services/querySnapshotSvc_spec.js index b4d4f136a..2319e51b2 100644 --- a/spec/javascripts/angular/services/querySnapshotSvc_spec.js +++ b/spec/javascripts/angular/services/querySnapshotSvc_spec.js @@ -149,7 +149,7 @@ describe('Service: querySnapshotSvc', function () { $httpBackend.verifyNoOutstandingExpectation(); }); - it('doesnt update version if same case', function() { + it('does not update version if same case', function() { var priorVersion = querySnapshotSvc.version(); querySnapshotSvc.bootstrap(2); querySnapshotSvc.bootstrap(2); From 75c6800ced794e34ec63f6e47c8146847697a205 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Tue, 30 Jan 2024 15:40:51 -0500 Subject: [PATCH 09/16] lint and fix test --- app/controllers/api/v1/books_controller.rb | 6 ++---- app/controllers/api/v1/snapshots_controller.rb | 2 +- test/controllers/api/v1/books_controller_test.rb | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/app/controllers/api/v1/books_controller.rb b/app/controllers/api/v1/books_controller.rb index 3c65c9d2b..a9a2983b5 100644 --- a/app/controllers/api/v1/books_controller.rb +++ b/app/controllers/api/v1/books_controller.rb @@ -43,11 +43,9 @@ def show # Only return rateable judgements, filter out the unrateable ones. # unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq - #unique_raters = @book.judges.merge(Judgement.rateable) + # unique_raters = @book.judges.merge(Judgement.rateable) unique_judge_ids = @book.query_doc_pairs.joins(:judgements) .distinct.pluck(:user_id) - - # this logic about using email versus name is kind of awful. Think about user.full_name or user.identifier? unique_judges = [] @@ -62,7 +60,7 @@ def show end @csv_array << csv_headers - query_doc_pairs = @book.query_doc_pairs.include(:judgements) + query_doc_pairs = @book.query_doc_pairs.includes(:judgements) query_doc_pairs.each do |qdp| row = [ make_csv_safe(qdp.query_text), qdp.doc_id ] unique_judges.each do |judge| diff --git a/app/controllers/api/v1/snapshots_controller.rb b/app/controllers/api/v1/snapshots_controller.rb index 30c81179c..b673f8665 100644 --- a/app/controllers/api/v1/snapshots_controller.rb +++ b/app/controllers/api/v1/snapshots_controller.rb @@ -62,7 +62,7 @@ def create def destroy SnapshotDoc.joins(snapshot_query: :snapshot) - .where(snapshot_queries: { snapshot: @snapshot }) + .where(snapshot_queries: { snapshot: @snapshot }) .delete_all @snapshot.snapshot_queries.delete_all @snapshot.destroy diff --git a/test/controllers/api/v1/books_controller_test.rb b/test/controllers/api/v1/books_controller_test.rb index 57461a1ea..d2091373b 100644 --- a/test/controllers/api/v1/books_controller_test.rb +++ b/test/controllers/api/v1/books_controller_test.rb @@ -121,7 +121,7 @@ class BooksControllerTest < ActionController::TestCase assert_response :ok csv = CSV.parse(response.body, headers: true) - assert_includes csv.headers, 'Unknown' + assert_includes csv.headers, 'anonymous' end end end From e2173b18a4edf2b192b19a8ef9ec2509dc3d48d2 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 31 Jan 2024 09:07:43 -0500 Subject: [PATCH 10/16] factories should be in /factories! --- app/assets/javascripts/{services => factories}/docListFactory.js | 0 app/assets/javascripts/{services => factories}/snapshotFactory.js | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename app/assets/javascripts/{services => factories}/docListFactory.js (100%) rename app/assets/javascripts/{services => factories}/snapshotFactory.js (100%) diff --git a/app/assets/javascripts/services/docListFactory.js b/app/assets/javascripts/factories/docListFactory.js similarity index 100% rename from app/assets/javascripts/services/docListFactory.js rename to app/assets/javascripts/factories/docListFactory.js diff --git a/app/assets/javascripts/services/snapshotFactory.js b/app/assets/javascripts/factories/snapshotFactory.js similarity index 100% rename from app/assets/javascripts/services/snapshotFactory.js rename to app/assets/javascripts/factories/snapshotFactory.js From 000d76a0293b0af010ffe1419965c2c11d12f5c7 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Fri, 2 Feb 2024 18:43:54 -0500 Subject: [PATCH 11/16] Now forcing snapshots to track the docs if you don't support lookup by id! --- .../javascripts/controllers/promptSnapshot.js | 7 +++++ .../javascripts/services/docCacheSvc.js | 16 ++-------- .../javascripts/services/querySnapshotSvc.js | 23 ++++++++++++-- .../javascripts/services/settingsSvc.js | 11 +++++++ app/assets/templates/views/snapshotModal.html | 10 ++++++- .../api/v1/snapshots/search_controller.rb | 19 ++++++++---- docker-compose.override.yml.example | 9 ------ .../v1/snapshots/search_controller_test.rb | 30 +++++++++++++++++++ test/fixtures/snapshot_docs.yml | 12 +++++--- test/fixtures/tries.yml | 7 +++++ 10 files changed, 110 insertions(+), 34 deletions(-) delete mode 100644 docker-compose.override.yml.example diff --git a/app/assets/javascripts/controllers/promptSnapshot.js b/app/assets/javascripts/controllers/promptSnapshot.js index 73189abea..f7a999c44 100644 --- a/app/assets/javascripts/controllers/promptSnapshot.js +++ b/app/assets/javascripts/controllers/promptSnapshot.js @@ -14,10 +14,17 @@ angular.module('QuepidApp') $scope.snapPrompt = {name: '', recordDocumentFields: false, inProgress: false, error: null}; $scope.fieldSpec = settingsSvc.applicableSettings().fieldSpec; + $scope.searchEngine = settingsSvc.applicableSettings().searchEngine; + + $scope.supportLookupById = settingsSvc.supportLookupById(settingsSvc.applicableSettings().searchEngine); $scope.ok = function() { $scope.snapPrompt.inProgress = true; $scope.snapPrompt.error = null; + + if ($scope.supportLookupById == false){ // force recording of document fields for non supporting end points. + $scope.snapPrompt.recordDocumentFields = true; + } querySnapshotSvc.addSnapshot($scope.snapPrompt.name, $scope.snapPrompt.recordDocumentFields, queriesSvc.queryArray()) .then(function() { diff --git a/app/assets/javascripts/services/docCacheSvc.js b/app/assets/javascripts/services/docCacheSvc.js index c0256b8ab..85b76a78c 100644 --- a/app/assets/javascripts/services/docCacheSvc.js +++ b/app/assets/javascripts/services/docCacheSvc.js @@ -59,20 +59,10 @@ angular.module('QuepidApp') settings.proxyUrl = caseTryNavSvc.getQuepidProxyUrl(); } - var docIds = Object.keys(docsToFetch); + var docIds = Object.keys(docsToFetch); var resolver = docResolverSvc.createResolver(docIds, settings, 15); - - // 'vectara' does not support doc lookup by ID. - let supportLookupById = true; - if (settings && settings.searchEngine === 'vectara'){ - supportLookupById = false; - } - else if (settings && settings.searchEngine === 'searchapi'){ - supportLookupById = false; - } - - - if ( supportLookupById && docIds.length > 0 ) { + + if ( docIds.length > 0 ) { return resolver.fetchDocs() .then(function () { angular.forEach(resolver.docs, function (doc) { diff --git a/app/assets/javascripts/services/querySnapshotSvc.js b/app/assets/javascripts/services/querySnapshotSvc.js index 6875a6d0a..cb89592cc 100644 --- a/app/assets/javascripts/services/querySnapshotSvc.js +++ b/app/assets/javascripts/services/querySnapshotSvc.js @@ -5,11 +5,11 @@ angular.module('QuepidApp') .service('querySnapshotSvc', [ '$http', '$q', - 'settingsSvc', 'docCacheSvc', + 'settingsSvc', 'docCacheSvc', 'caseTryNavSvc', 'SnapshotFactory', function querySnapshotSvc( $http, $q, - settingsSvc, docCacheSvc, + settingsSvc, docCacheSvc, caseTryNavSvc, SnapshotFactory ) { // caches normal docs for all snapshots @@ -40,6 +40,25 @@ angular.module('QuepidApp') settings === null || Object.keys(settings).length === 0) ) { + + // Some search endpoints let you look up the documents by an id + // however if that isnt' possible, then we require you to store the doc fields + // in the snapshot, and we look them up from the Snapshot. To be clever + // we pretend to be a "solr'" endpoint to drive the lookup. + if (settingsSvc.supportLookupById(settings.searchEngine) == false){ + var settingsForLookup = angular.copy(settings); + settingsForLookup.apiMethod = 'GET'; + settingsForLookup.searchEngine = 'solr'; + settingsForLookup.searchEndpointId = null; + settingsForLookup.customHeaders = null; + + let snapshotId = snapshots[0].id + settingsForLookup.searchUrl = `${caseTryNavSvc.getQuepidRootUrl()}/api/cases/${caseTryNavSvc.getCaseNo()}/snapshots/${snapshotId}/search` + + settings = settingsForLookup; + } + + return docCacheSvc.update(settings); } else { return $q(function(resolve) { diff --git a/app/assets/javascripts/services/settingsSvc.js b/app/assets/javascripts/services/settingsSvc.js index f2ff5c51b..7fe95ba36 100644 --- a/app/assets/javascripts/services/settingsSvc.js +++ b/app/assets/javascripts/services/settingsSvc.js @@ -290,6 +290,17 @@ angular.module('QuepidApp') var Settings = SettingsFactory; var currSettings = null; + + this.supportLookupById = function(searchEngine){ + let supportLookupById = true; + if (searchEngine === 'vectara'){ + supportLookupById = false; + } + else if (searchEngine === 'searchapi'){ + supportLookupById = false; + } + return supportLookupById; + } this.demoSettingsChosen = function(searchEngine, newUrl) { var useTMDBDemoSettings = false; diff --git a/app/assets/templates/views/snapshotModal.html b/app/assets/templates/views/snapshotModal.html index 65be07539..1a09f9500 100644 --- a/app/assets/templates/views/snapshotModal.html +++ b/app/assets/templates/views/snapshotModal.html @@ -12,8 +12,9 @@
+

- To facilitate additional analysis, you may need to record as part of the snapshot all the displayed fields for each document returned. + To facilitate additional analysis, you may want to record as part of the snapshot all the displayed fields for each document returned. This will be the {{ fieldSpec }} fields.

@@ -22,6 +23,13 @@ Record Document Fields?
+ + + +

+ To power comparisons when using {{ searchEngine | searchEngineName }} the fields of your documents will be stored in Quepid. +

+
An error ({{snapPrompt.error}}) occurred, please try again.
diff --git a/app/controllers/api/v1/snapshots/search_controller.rb b/app/controllers/api/v1/snapshots/search_controller.rb index 334f7b4d6..6b8aadcd6 100644 --- a/app/controllers/api/v1/snapshots/search_controller.rb +++ b/app/controllers/api/v1/snapshots/search_controller.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require 'uri' module Api module V1 module Snapshots @@ -14,8 +15,8 @@ class SearchController < SnapshotsController # rubocop:disable Metrics/CyclomaticComplexity # rubocop:disable Metrics/PerceivedComplexity # rubocop:disable Layout/LineLength - api :GET, '/api/cases/:case_id/snapshots/:snapshot_id/search?q=:q', - 'Mimic a Solr query by looking up query/doc data from a specific snapshot, using the q parameter as the query' + api :GET, '/api/cases/:case_id/snapshots/:snapshot_id/search?somesolrparams=here', + 'Mimic a Solr query by looking up query/doc data from a specific snapshot, supports a query or a lookup by id query' param :case_id, :number, desc: 'The ID of the requested case.', required: true param :snapshot_id, :number, @@ -24,22 +25,30 @@ class SearchController < SnapshotsController desc: 'The query that you are looking up', required: true def index @q = search_params[:q] + @snapshot_docs = nil + @q = @q.gsub('\?', '?') # Since it's a GET, a ? in the query gets special escaping query = if '*:*' == @q + # we have a match all query. @snapshot.snapshot_queries.first.query + elsif @q.ends_with?(')') && @q.include?(':(') && ('lucene' == search_params[:defType]) + # We have a lookup docs by id query + doc_ids = @q[@q.index(':(') + 2...@q.index(')')].split(' OR ') + @snapshot_docs = @snapshot.snapshot_docs.where(doc_id: doc_ids) + else @snapshot.case.queries.find_by(query_text: @q) end - if query + if query && @snapshot_docs.nil? snapshot_query = @snapshot.snapshot_queries.find_by(query: query) @snapshot_docs = snapshot_query.nil? ? [] : snapshot_query.snapshot_docs - elsif @q.starts_with?('id') + elsif @q.starts_with?('id:') && !@q.starts_with?('id:(') doc_id = @q.split(':')[1] @snapshot_docs = @snapshot.snapshot_docs.where(doc_id: doc_id) - else + elsif @snapshot_docs.nil? @snapshot_docs = [] end diff --git a/docker-compose.override.yml.example b/docker-compose.override.yml.example deleted file mode 100644 index 2133b8809..000000000 --- a/docker-compose.override.yml.example +++ /dev/null @@ -1,9 +0,0 @@ -# Demonstrate how to load your own local copy of a node package into Quepid. -# Commonly used when you are developing against an unreleased package. - -version: '3' -services: - app: - volumes: - - /Users/epugh/Documents/projects/splainer-search/package.json:/srv/app/node_modules/splainer-search/package.json - - /Users/epugh/Documents/projects/splainer-search/splainer-search.js:/srv/app/node_modules/splainer-search/splainer-search.js diff --git a/test/controllers/api/v1/snapshots/search_controller_test.rb b/test/controllers/api/v1/snapshots/search_controller_test.rb index 0918b5114..647d84c83 100644 --- a/test/controllers/api/v1/snapshots/search_controller_test.rb +++ b/test/controllers/api/v1/snapshots/search_controller_test.rb @@ -32,6 +32,36 @@ class SearchControllerTest < ActionController::TestCase assert_equal snapshot_query.snapshot_docs.count, data['response']['docs'].length end + test 'handles a term lookup for docs as if Solr' do + query_string = 'id:(' + snapshot.snapshot_docs.each do |doc| + query_string += "#{doc.doc_id} OR " + end + query_string = query_string[0...-4] + query_string += ')' + solr_query_params = { + q: query_string, + defType: 'lucene', + rows: snapshot.snapshot_docs.count, + fl: snapshot.case.tries.first.field_spec, + wt: 'json', + hl: false, + + } + + params = { case_id: acase.id, snapshot_id: snapshot.id } + params = params.merge(solr_query_params) + + get :index, params: params + + assert_response :ok + + response.parsed_body + + assert_equal query_string, data['responseHeader']['params']['q'] + assert_equal snapshot.snapshot_docs.count, data['response']['docs'].length + end + test 'handles a *:* search' do get :index, params: { case_id: acase.id, snapshot_id: snapshot.id, q: '*:*' } diff --git a/test/fixtures/snapshot_docs.yml b/test/fixtures/snapshot_docs.yml index 3579cfcba..57528827c 100644 --- a/test/fixtures/snapshot_docs.yml +++ b/test/fixtures/snapshot_docs.yml @@ -20,25 +20,29 @@ # a_doc: - doc_id: "doc a" + doc_id: "doc_a" position: 1 snapshot_query: :first_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"_id":"doc_a", "title": "title", "body":"body"}' b_doc: - doc_id: "doc b" + doc_id: "doc_b" position: 1 snapshot_query: :first_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"_id":"doc_b", "title": "title", "body":"body"}' c_doc: - doc_id: "doc c" + doc_id: "doc_c" position: 1 snapshot_query: :second_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"_id":"doc_c", "title": "title", "body":"body"}' d_doc: - doc_id: "doc d" + doc_id: "doc_d" position: 1 snapshot_query: :second_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"_id":"doc_d", "title": "title", "body":"body"}' diff --git a/test/fixtures/tries.yml b/test/fixtures/tries.yml index 55227ef45..13ab29e56 100644 --- a/test/fixtures/tries.yml +++ b/test/fixtures/tries.yml @@ -203,3 +203,10 @@ for_case_queries_case: query_params: 'q=#$query##' try_number: 1 search_endpoint: :for_case_queries_case + +for_case_snapshot_case: + case: :snapshot_case + query_params: 'q=#$query##' + try_number: 1 + field_spec: id:_id title:title body + search_endpoint: :one From fbd4ae00db847604a20187d245d898bf5c7a091c Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Fri, 2 Feb 2024 18:44:04 -0500 Subject: [PATCH 12/16] lint --- test/controllers/api/v1/snapshots/search_controller_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/controllers/api/v1/snapshots/search_controller_test.rb b/test/controllers/api/v1/snapshots/search_controller_test.rb index 647d84c83..a0dd5fb45 100644 --- a/test/controllers/api/v1/snapshots/search_controller_test.rb +++ b/test/controllers/api/v1/snapshots/search_controller_test.rb @@ -57,7 +57,7 @@ class SearchControllerTest < ActionController::TestCase assert_response :ok response.parsed_body - + assert_equal query_string, data['responseHeader']['params']['q'] assert_equal snapshot.snapshot_docs.count, data['response']['docs'].length end From 298b9eaae7ebc4f2b59fac6ae0be162d42286520 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Fri, 2 Feb 2024 21:15:53 -0500 Subject: [PATCH 13/16] mock up the new method --- spec/javascripts/mock/settingsSvc.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spec/javascripts/mock/settingsSvc.js b/spec/javascripts/mock/settingsSvc.js index 850ada364..b10532d19 100644 --- a/spec/javascripts/mock/settingsSvc.js +++ b/spec/javascripts/mock/settingsSvc.js @@ -26,6 +26,11 @@ this.applicableSettings = function() { return mockTry; }; + + this.supportLookupById = function() { + return true; + }; + }; wind.MockSettingsSvc = MockSettingsSvc; })(window); From ebdbd25d725f1a93a8a4dd1509a54cf71f57d482 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Fri, 2 Feb 2024 21:19:45 -0500 Subject: [PATCH 14/16] fix logic --- .../export_case/export_case_modal_instance_controller.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js index 0bee2dcf3..d55e196ec 100644 --- a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js +++ b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js @@ -55,7 +55,7 @@ angular.module('QuepidApp') angular.forEach(ctrl.snapshots, function(snapshot) { if (snapshot.id === ctrl.selection) { desiredSnapshot = snapshot; - return; // exit the loop early + break; // exit the loop early } }); if (desiredSnapshot){ From 94bdfa32e2a987a0debf755e1089022aa79ea979 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sun, 4 Feb 2024 16:04:32 -0500 Subject: [PATCH 15/16] various fixes for the build --- .../export_case/export_case_modal_instance_controller.js | 9 +-------- app/assets/javascripts/controllers/promptSnapshot.js | 2 +- app/assets/javascripts/services/querySnapshotSvc.js | 2 +- app/assets/javascripts/services/settingsSvc.js | 4 ++-- app/controllers/api/v1/snapshots/search_controller.rb | 1 - spec/javascripts/mock/settingsSvc.js | 2 +- .../api/v1/snapshots/search_controller_test.rb | 2 +- test/fixtures/snapshot_docs.yml | 8 ++++---- test/fixtures/tries.yml | 2 +- 9 files changed, 12 insertions(+), 20 deletions(-) diff --git a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js index d55e196ec..452ea696e 100644 --- a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js +++ b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js @@ -54,16 +54,9 @@ angular.module('QuepidApp') var desiredSnapshot = null; angular.forEach(ctrl.snapshots, function(snapshot) { if (snapshot.id === ctrl.selection) { - desiredSnapshot = snapshot; - break; // exit the loop early + return desiredSnapshot.hasSnapshotFile; // exit the loop early } }); - if (desiredSnapshot){ - return desiredSnapshot.hasSnapshotFile; - } - else { - return false; - } } return false; }; diff --git a/app/assets/javascripts/controllers/promptSnapshot.js b/app/assets/javascripts/controllers/promptSnapshot.js index f7a999c44..d7c0bc4c6 100644 --- a/app/assets/javascripts/controllers/promptSnapshot.js +++ b/app/assets/javascripts/controllers/promptSnapshot.js @@ -22,7 +22,7 @@ angular.module('QuepidApp') $scope.snapPrompt.inProgress = true; $scope.snapPrompt.error = null; - if ($scope.supportLookupById == false){ // force recording of document fields for non supporting end points. + if ($scope.supportLookupById === false){ // force recording of document fields for non supporting end points. $scope.snapPrompt.recordDocumentFields = true; } diff --git a/app/assets/javascripts/services/querySnapshotSvc.js b/app/assets/javascripts/services/querySnapshotSvc.js index cb89592cc..2deb4e382 100644 --- a/app/assets/javascripts/services/querySnapshotSvc.js +++ b/app/assets/javascripts/services/querySnapshotSvc.js @@ -45,7 +45,7 @@ angular.module('QuepidApp') // however if that isnt' possible, then we require you to store the doc fields // in the snapshot, and we look them up from the Snapshot. To be clever // we pretend to be a "solr'" endpoint to drive the lookup. - if (settingsSvc.supportLookupById(settings.searchEngine) == false){ + if (settingsSvc.supportLookupById(settings.searchEngine) === false){ var settingsForLookup = angular.copy(settings); settingsForLookup.apiMethod = 'GET'; settingsForLookup.searchEngine = 'solr'; diff --git a/app/assets/javascripts/services/settingsSvc.js b/app/assets/javascripts/services/settingsSvc.js index 7fe95ba36..ab1350c88 100644 --- a/app/assets/javascripts/services/settingsSvc.js +++ b/app/assets/javascripts/services/settingsSvc.js @@ -291,7 +291,7 @@ angular.module('QuepidApp') var Settings = SettingsFactory; var currSettings = null; - this.supportLookupById = function(searchEngine){ + this.supportLookupById = function(searchEngine) { let supportLookupById = true; if (searchEngine === 'vectara'){ supportLookupById = false; @@ -300,7 +300,7 @@ angular.module('QuepidApp') supportLookupById = false; } return supportLookupById; - } + }; this.demoSettingsChosen = function(searchEngine, newUrl) { var useTMDBDemoSettings = false; diff --git a/app/controllers/api/v1/snapshots/search_controller.rb b/app/controllers/api/v1/snapshots/search_controller.rb index 6b8aadcd6..9eebc8b0d 100644 --- a/app/controllers/api/v1/snapshots/search_controller.rb +++ b/app/controllers/api/v1/snapshots/search_controller.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require 'uri' module Api module V1 module Snapshots diff --git a/spec/javascripts/mock/settingsSvc.js b/spec/javascripts/mock/settingsSvc.js index b10532d19..644de94df 100644 --- a/spec/javascripts/mock/settingsSvc.js +++ b/spec/javascripts/mock/settingsSvc.js @@ -27,7 +27,7 @@ return mockTry; }; - this.supportLookupById = function() { + this.supportLookupById = function(searchEngine) { return true; }; diff --git a/test/controllers/api/v1/snapshots/search_controller_test.rb b/test/controllers/api/v1/snapshots/search_controller_test.rb index a0dd5fb45..c5cf43e0f 100644 --- a/test/controllers/api/v1/snapshots/search_controller_test.rb +++ b/test/controllers/api/v1/snapshots/search_controller_test.rb @@ -56,7 +56,7 @@ class SearchControllerTest < ActionController::TestCase assert_response :ok - response.parsed_body + data = response.parsed_body assert_equal query_string, data['responseHeader']['params']['q'] assert_equal snapshot.snapshot_docs.count, data['response']['docs'].length diff --git a/test/fixtures/snapshot_docs.yml b/test/fixtures/snapshot_docs.yml index 57528827c..4f4305b93 100644 --- a/test/fixtures/snapshot_docs.yml +++ b/test/fixtures/snapshot_docs.yml @@ -24,25 +24,25 @@ a_doc: position: 1 snapshot_query: :first_snapshot_query explain: '{"message": "Explain Me"}' - fields: '{"_id":"doc_a", "title": "title", "body":"body"}' + fields: '{"id":"doc_a", "title": "title", "body":"body"}' b_doc: doc_id: "doc_b" position: 1 snapshot_query: :first_snapshot_query explain: '{"message": "Explain Me"}' - fields: '{"_id":"doc_b", "title": "title", "body":"body"}' + fields: '{"id":"doc_b", "title": "title", "body":"body"}' c_doc: doc_id: "doc_c" position: 1 snapshot_query: :second_snapshot_query explain: '{"message": "Explain Me"}' - fields: '{"_id":"doc_c", "title": "title", "body":"body"}' + fields: '{"id":"doc_c", "title": "title", "body":"body"}' d_doc: doc_id: "doc_d" position: 1 snapshot_query: :second_snapshot_query explain: '{"message": "Explain Me"}' - fields: '{"_id":"doc_d", "title": "title", "body":"body"}' + fields: '{"id":"doc_d", "title": "title", "body":"body"}' diff --git a/test/fixtures/tries.yml b/test/fixtures/tries.yml index 13ab29e56..3143f4f3d 100644 --- a/test/fixtures/tries.yml +++ b/test/fixtures/tries.yml @@ -208,5 +208,5 @@ for_case_snapshot_case: case: :snapshot_case query_params: 'q=#$query##' try_number: 1 - field_spec: id:_id title:title body + field_spec: id:id title:title body search_endpoint: :one From ae04ca63fd4f0e7a5bcb4b035114fe54c2c2f721 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Sun, 4 Feb 2024 16:44:38 -0500 Subject: [PATCH 16/16] lint --- app/assets/javascripts/services/querySnapshotSvc.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/assets/javascripts/services/querySnapshotSvc.js b/app/assets/javascripts/services/querySnapshotSvc.js index 2deb4e382..d890c2830 100644 --- a/app/assets/javascripts/services/querySnapshotSvc.js +++ b/app/assets/javascripts/services/querySnapshotSvc.js @@ -52,8 +52,8 @@ angular.module('QuepidApp') settingsForLookup.searchEndpointId = null; settingsForLookup.customHeaders = null; - let snapshotId = snapshots[0].id - settingsForLookup.searchUrl = `${caseTryNavSvc.getQuepidRootUrl()}/api/cases/${caseTryNavSvc.getCaseNo()}/snapshots/${snapshotId}/search` + let snapshotId = snapshots[0].id; + settingsForLookup.searchUrl = `${caseTryNavSvc.getQuepidRootUrl()}/api/cases/${caseTryNavSvc.getCaseNo()}/snapshots/${snapshotId}/search`; settings = settingsForLookup; }