diff --git a/app/assets/javascripts/components/diff/_modal.html b/app/assets/javascripts/components/diff/_modal.html index 444c30245..d3da36859 100644 --- a/app/assets/javascripts/components/diff/_modal.html +++ b/app/assets/javascripts/components/diff/_modal.html @@ -34,6 +34,10 @@ Snapshot has the id {{ ctrl.selection }} + +
diff --git a/app/assets/javascripts/components/diff/diff_modal_instance_controller.js b/app/assets/javascripts/components/diff/diff_modal_instance_controller.js index ad964fd0a..8d7aa232e 100644 --- a/app/assets/javascripts/components/diff/diff_modal_instance_controller.js +++ b/app/assets/javascripts/components/diff/diff_modal_instance_controller.js @@ -18,8 +18,11 @@ angular.module('QuepidApp') ) { var ctrl = this; - // Attributes - ctrl.snapshots = querySnapshotSvc.snapshots; + querySnapshotSvc.getSnapshots().then(function() { + ctrl.snapshots = querySnapshotSvc.snapshots; + } + ); + ctrl.which = 'snapshot'; ctrl.selection = initialSelection; ctrl.inProgress = false; @@ -32,6 +35,8 @@ angular.module('QuepidApp') ctrl.nothingSelected = nothingSelected; ctrl.ok = ok; ctrl.toggleDel = toggleDel; + ctrl.isProcessingFile = isProcessingFile; + // Watches $scope.$watch('ctrl.selection', function(newVal, oldVal) { @@ -72,6 +77,25 @@ angular.module('QuepidApp') flash.success = 'Snapshot deleted successfully.'; }); } + + function isProcessingFile() { + if (ctrl.snapshots){ + var desiredSnapshot = null; + angular.forEach(ctrl.snapshots, function(snapshot) { + if (snapshot.id === ctrl.selection) { + desiredSnapshot = snapshot; + return; // exit the loop early + } + }); + if (desiredSnapshot){ + return desiredSnapshot.hasSnapshotFile; + } + else { + return false; + } + } + return false; + } function isNumber(num) { return !isNaN(parseInt('' + num, 10)); diff --git a/app/assets/javascripts/components/export_case/_modal.html b/app/assets/javascripts/components/export_case/_modal.html index cd21e263e..92e511bca 100644 --- a/app/assets/javascripts/components/export_case/_modal.html +++ b/app/assets/javascripts/components/export_case/_modal.html @@ -50,6 +50,10 @@

diff --git a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js index e422efe6d..452ea696e 100644 --- a/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js +++ b/app/assets/javascripts/components/export_case/export_case_modal_instance_controller.js @@ -47,5 +47,18 @@ angular.module('QuepidApp') ctrl.cancel = function () { $uibModalInstance.dismiss('cancel'); }; + + + ctrl.isProcessingFile = function () { + if (ctrl.options.snapshot){ + var desiredSnapshot = null; + angular.forEach(ctrl.snapshots, function(snapshot) { + if (snapshot.id === ctrl.selection) { + return desiredSnapshot.hasSnapshotFile; // exit the loop early + } + }); + } + return false; + }; } ]); diff --git a/app/assets/javascripts/controllers/mainCtrl.js b/app/assets/javascripts/controllers/mainCtrl.js index 8e16b7fd0..85c16c2ca 100644 --- a/app/assets/javascripts/controllers/mainCtrl.js +++ b/app/assets/javascripts/controllers/mainCtrl.js @@ -147,7 +147,7 @@ angular.module('QuepidApp') bootstrapCase() .then(function() { loadQueries(); - loadSnapshots(); + loadSnapshots(); // this is here just to set the caseNo in the querySnapshotSvc. updateCaseMetadata(); paneSvc.refreshElements(); }).catch(function(error) { diff --git a/app/assets/javascripts/controllers/promptSnapshot.js b/app/assets/javascripts/controllers/promptSnapshot.js index 73189abea..d7c0bc4c6 100644 --- a/app/assets/javascripts/controllers/promptSnapshot.js +++ b/app/assets/javascripts/controllers/promptSnapshot.js @@ -14,10 +14,17 @@ angular.module('QuepidApp') $scope.snapPrompt = {name: '', recordDocumentFields: false, inProgress: false, error: null}; $scope.fieldSpec = settingsSvc.applicableSettings().fieldSpec; + $scope.searchEngine = settingsSvc.applicableSettings().searchEngine; + + $scope.supportLookupById = settingsSvc.supportLookupById(settingsSvc.applicableSettings().searchEngine); $scope.ok = function() { $scope.snapPrompt.inProgress = true; $scope.snapPrompt.error = null; + + if ($scope.supportLookupById === false){ // force recording of document fields for non supporting end points. + $scope.snapPrompt.recordDocumentFields = true; + } querySnapshotSvc.addSnapshot($scope.snapPrompt.name, $scope.snapPrompt.recordDocumentFields, queriesSvc.queryArray()) .then(function() { diff --git a/app/assets/javascripts/services/docListFactory.js b/app/assets/javascripts/factories/docListFactory.js similarity index 100% rename from app/assets/javascripts/services/docListFactory.js rename to app/assets/javascripts/factories/docListFactory.js diff --git a/app/assets/javascripts/services/snapshotFactory.js b/app/assets/javascripts/factories/snapshotFactory.js similarity index 98% rename from app/assets/javascripts/services/snapshotFactory.js rename to app/assets/javascripts/factories/snapshotFactory.js index 2584c5208..96672cd2e 100644 --- a/app/assets/javascripts/services/snapshotFactory.js +++ b/app/assets/javascripts/factories/snapshotFactory.js @@ -18,6 +18,7 @@ self.id = params.id; self.name = snapshotName; self.time = params.time; + self.hasSnapshotFile = params.has_snapshot_file; self.docs = params.docs; self.queries = params.queries; diff --git a/app/assets/javascripts/services/docCacheSvc.js b/app/assets/javascripts/services/docCacheSvc.js index c0256b8ab..85b76a78c 100644 --- a/app/assets/javascripts/services/docCacheSvc.js +++ b/app/assets/javascripts/services/docCacheSvc.js @@ -59,20 +59,10 @@ angular.module('QuepidApp') settings.proxyUrl = caseTryNavSvc.getQuepidProxyUrl(); } - var docIds = Object.keys(docsToFetch); + var docIds = Object.keys(docsToFetch); var resolver = docResolverSvc.createResolver(docIds, settings, 15); - - // 'vectara' does not support doc lookup by ID. - let supportLookupById = true; - if (settings && settings.searchEngine === 'vectara'){ - supportLookupById = false; - } - else if (settings && settings.searchEngine === 'searchapi'){ - supportLookupById = false; - } - - - if ( supportLookupById && docIds.length > 0 ) { + + if ( docIds.length > 0 ) { return resolver.fetchDocs() .then(function () { angular.forEach(resolver.docs, function (doc) { diff --git a/app/assets/javascripts/services/querySnapshotSvc.js b/app/assets/javascripts/services/querySnapshotSvc.js index d3a6c5173..d890c2830 100644 --- a/app/assets/javascripts/services/querySnapshotSvc.js +++ b/app/assets/javascripts/services/querySnapshotSvc.js @@ -5,11 +5,11 @@ angular.module('QuepidApp') .service('querySnapshotSvc', [ '$http', '$q', - 'settingsSvc', 'docCacheSvc', + 'settingsSvc', 'docCacheSvc', 'caseTryNavSvc', 'SnapshotFactory', function querySnapshotSvc( $http, $q, - settingsSvc, docCacheSvc, + settingsSvc, docCacheSvc, caseTryNavSvc, SnapshotFactory ) { // caches normal docs for all snapshots @@ -40,6 +40,25 @@ angular.module('QuepidApp') settings === null || Object.keys(settings).length === 0) ) { + + // Some search endpoints let you look up the documents by an id + // however if that isnt' possible, then we require you to store the doc fields + // in the snapshot, and we look them up from the Snapshot. To be clever + // we pretend to be a "solr'" endpoint to drive the lookup. + if (settingsSvc.supportLookupById(settings.searchEngine) === false){ + var settingsForLookup = angular.copy(settings); + settingsForLookup.apiMethod = 'GET'; + settingsForLookup.searchEngine = 'solr'; + settingsForLookup.searchEndpointId = null; + settingsForLookup.customHeaders = null; + + let snapshotId = snapshots[0].id; + settingsForLookup.searchUrl = `${caseTryNavSvc.getQuepidRootUrl()}/api/cases/${caseTryNavSvc.getCaseNo()}/snapshots/${snapshotId}/search`; + + settings = settingsForLookup; + } + + return docCacheSvc.update(settings); } else { return $q(function(resolve) { @@ -64,6 +83,20 @@ angular.module('QuepidApp') }); }); }; + + // Now that we process snapshots async, we + // don't want to cache the data + this.getSnapshots = function() { + this.snapshots = {}; + + return $http.get('api/cases/' + caseNo + '/snapshots?shallow=true') + .then(function(response) { + return addSnapshotResp(response.data.snapshots) + .then(function() { + version++; + }); + }); + }; this.addSnapshot = function(name, recordDocumentFields, queries) { // we may want to refactor the payload structure in the future. diff --git a/app/assets/javascripts/services/settingsSvc.js b/app/assets/javascripts/services/settingsSvc.js index f2ff5c51b..ab1350c88 100644 --- a/app/assets/javascripts/services/settingsSvc.js +++ b/app/assets/javascripts/services/settingsSvc.js @@ -290,6 +290,17 @@ angular.module('QuepidApp') var Settings = SettingsFactory; var currSettings = null; + + this.supportLookupById = function(searchEngine) { + let supportLookupById = true; + if (searchEngine === 'vectara'){ + supportLookupById = false; + } + else if (searchEngine === 'searchapi'){ + supportLookupById = false; + } + return supportLookupById; + }; this.demoSettingsChosen = function(searchEngine, newUrl) { var useTMDBDemoSettings = false; diff --git a/app/assets/templates/views/snapshotModal.html b/app/assets/templates/views/snapshotModal.html index 65be07539..1a09f9500 100644 --- a/app/assets/templates/views/snapshotModal.html +++ b/app/assets/templates/views/snapshotModal.html @@ -12,8 +12,9 @@
+

- To facilitate additional analysis, you may need to record as part of the snapshot all the displayed fields for each document returned. + To facilitate additional analysis, you may want to record as part of the snapshot all the displayed fields for each document returned. This will be the {{ fieldSpec }} fields.

@@ -22,6 +23,13 @@ Record Document Fields?
+ + + +

+ To power comparisons when using {{ searchEngine | searchEngineName }} the fields of your documents will be stored in Quepid. +

+
An error ({{snapPrompt.error}}) occurred, please try again.
diff --git a/app/controllers/api/v1/books/populate_controller.rb b/app/controllers/api/v1/books/populate_controller.rb index c03bc3814..2c9f00205 100644 --- a/app/controllers/api/v1/books/populate_controller.rb +++ b/app/controllers/api/v1/books/populate_controller.rb @@ -15,6 +15,9 @@ class PopulateController < Api::ApiController # We get a messy set of params in this method, so we don't use the normal # approach of strong parameter validation. We hardcode the only params # we care about. + # + # With 5000 queries in large case, this takes 108 seconds... + # # rubocop:disable Layout/LineLength def update puts "[PopulateController] Request Size is #{number_to_human_size(query_doc_pairs_params.to_s.bytesize)}" diff --git a/app/controllers/api/v1/books_controller.rb b/app/controllers/api/v1/books_controller.rb index 0cc777285..a9a2983b5 100644 --- a/app/controllers/api/v1/books_controller.rb +++ b/app/controllers/api/v1/books_controller.rb @@ -4,6 +4,7 @@ module Api module V1 + # rubocop:disable Metrics/ClassLength class BooksController < Api::ApiController before_action :set_book, only: [ :show, :update, :destroy ] before_action :check_book, only: [ :show, :update, :destroy ] @@ -28,6 +29,7 @@ def index # rubocop:disable Metrics/AbcSize # rubocop:disable Metrics/CyclomaticComplexity # rubocop:disable Metrics/PerceivedComplexity + # rubocop:disable Metrics/BlockLength api :GET, '/api/books/:book_id', 'Show the book with the given ID.' param :id, :number, @@ -40,22 +42,29 @@ def show csv_headers = %w[query docid] # Only return rateable judgements, filter out the unrateable ones. - unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq + # unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq + # unique_raters = @book.judges.merge(Judgement.rateable) + unique_judge_ids = @book.query_doc_pairs.joins(:judgements) + .distinct.pluck(:user_id) # this logic about using email versus name is kind of awful. Think about user.full_name or user.identifier? - unique_raters.each do |rater| - csv_headers << make_csv_safe(if rater.nil? - 'Unknown' + unique_judges = [] + unique_judge_ids.each do |judge_id| + judge = User.find(judge_id) unless judge_id.nil? + unique_judges << judge + csv_headers << make_csv_safe(if judge.nil? + 'anonymous' else - rater.name.presence || rater.email + judge.name.presence || judge.email end) end @csv_array << csv_headers - @book.query_doc_pairs.each do |qdp| + query_doc_pairs = @book.query_doc_pairs.includes(:judgements) + query_doc_pairs.each do |qdp| row = [ make_csv_safe(qdp.query_text), qdp.doc_id ] - unique_raters.each do |rater| - judgement = qdp.judgements.detect { |j| j.user == rater } + unique_judges.each do |judge| + judgement = qdp.judgements.detect { |j| j.user == judge } rating = judgement.nil? ? '' : judgement.rating row.append rating @@ -72,6 +81,8 @@ def show # rubocop:enable Metrics/AbcSize # rubocop:enable Metrics/CyclomaticComplexity # rubocop:enable Metrics/PerceivedComplexity + # rubocop:enable Metrics/BlockLength + api :POST, '/api/books', 'Create a new book.' param_group :book def create @@ -135,5 +146,6 @@ def make_csv_safe str end end end + # rubocop:enable Metrics/ClassLength end end diff --git a/app/controllers/api/v1/snapshots/search_controller.rb b/app/controllers/api/v1/snapshots/search_controller.rb index 334f7b4d6..9eebc8b0d 100644 --- a/app/controllers/api/v1/snapshots/search_controller.rb +++ b/app/controllers/api/v1/snapshots/search_controller.rb @@ -14,8 +14,8 @@ class SearchController < SnapshotsController # rubocop:disable Metrics/CyclomaticComplexity # rubocop:disable Metrics/PerceivedComplexity # rubocop:disable Layout/LineLength - api :GET, '/api/cases/:case_id/snapshots/:snapshot_id/search?q=:q', - 'Mimic a Solr query by looking up query/doc data from a specific snapshot, using the q parameter as the query' + api :GET, '/api/cases/:case_id/snapshots/:snapshot_id/search?somesolrparams=here', + 'Mimic a Solr query by looking up query/doc data from a specific snapshot, supports a query or a lookup by id query' param :case_id, :number, desc: 'The ID of the requested case.', required: true param :snapshot_id, :number, @@ -24,22 +24,30 @@ class SearchController < SnapshotsController desc: 'The query that you are looking up', required: true def index @q = search_params[:q] + @snapshot_docs = nil + @q = @q.gsub('\?', '?') # Since it's a GET, a ? in the query gets special escaping query = if '*:*' == @q + # we have a match all query. @snapshot.snapshot_queries.first.query + elsif @q.ends_with?(')') && @q.include?(':(') && ('lucene' == search_params[:defType]) + # We have a lookup docs by id query + doc_ids = @q[@q.index(':(') + 2...@q.index(')')].split(' OR ') + @snapshot_docs = @snapshot.snapshot_docs.where(doc_id: doc_ids) + else @snapshot.case.queries.find_by(query_text: @q) end - if query + if query && @snapshot_docs.nil? snapshot_query = @snapshot.snapshot_queries.find_by(query: query) @snapshot_docs = snapshot_query.nil? ? [] : snapshot_query.snapshot_docs - elsif @q.starts_with?('id') + elsif @q.starts_with?('id:') && !@q.starts_with?('id:(') doc_id = @q.split(':')[1] @snapshot_docs = @snapshot.snapshot_docs.where(doc_id: doc_id) - else + elsif @snapshot_docs.nil? @snapshot_docs = [] end diff --git a/app/controllers/api/v1/snapshots_controller.rb b/app/controllers/api/v1/snapshots_controller.rb index 4a3574fac..b673f8665 100644 --- a/app/controllers/api/v1/snapshots_controller.rb +++ b/app/controllers/api/v1/snapshots_controller.rb @@ -1,8 +1,10 @@ # frozen_string_literal: true +require 'action_view' module Api module V1 class SnapshotsController < Api::ApiController + include ActionView::Helpers::NumberHelper before_action :set_case before_action :check_case before_action :set_snapshot, only: [ :show, :destroy ] @@ -20,46 +22,49 @@ def authenticate_api! def index @snapshots = @case.snapshots - @shallow = params[:shallow] || false + @shallow = 'true' == params[:shallow] + @with_docs = false respond_with @snapshots end def show @shallow = params[:shallow] || false + @with_docs = true respond_with @snapshot end - # rubocop:disable Metrics/MethodLength + # rubocop:disable Layout/LineLength def create @snapshot = @case.snapshots.build(name: params[:snapshot][:name]) @snapshot.scorer = @case.scorer @snapshot.try = @case.tries.first if @snapshot.save - service = SnapshotManager.new(@snapshot) - - snapshot_docs = params[:snapshot][:docs] - snapshot_queries = params[:snapshot][:queries] + serialized_data = Marshal.dump(snapshot_params) - service.add_docs snapshot_docs, snapshot_queries if snapshot_docs + # puts "[SnapshotController] the size of the serialized data is #{number_to_human_size(serialized_data.bytesize)}" + compressed_data = Zlib::Deflate.deflate(serialized_data) + # puts "[SnapshotController] the size of the compressed data is #{number_to_human_size(compressed_data.bytesize)}" + @snapshot.snapshot_file.attach(io: StringIO.new(compressed_data), filename: "snapshot_#{@snapshot.id}.bin.zip", + content_type: 'application/zip') + PopulateSnapshotJob.perform_later @snapshot Analytics::Tracker.track_snapshot_created_event current_user, @snapshot - # Refetch snapshot because after bulk creating the docs - # the snapshot object is then stale - @snapshot = Snapshot.where(id: @snapshot.id) - .includes([ snapshot_queries: [ :snapshot_docs, { query: [ :ratings ] } ] ]) - .first - + @with_docs = false # don't show individual docs in the response respond_with @snapshot else render json: @snapshot.errors, status: :bad_request end end - # rubocop:enable Metrics/MethodLength + # rubocop:enable Layout/LineLength def destroy + SnapshotDoc.joins(snapshot_query: :snapshot) + .where(snapshot_queries: { snapshot: @snapshot }) + .delete_all + @snapshot.snapshot_queries.delete_all @snapshot.destroy Analytics::Tracker.track_snapshot_deleted_event current_user, @snapshot @@ -71,13 +76,19 @@ def destroy def set_snapshot @snapshot = @case.snapshots .where(id: params[:id]) - .includes([ snapshot_queries: [ :snapshot_docs ] ]) .first end def check_snapshot render json: { error: 'Not Found!' }, status: :not_found unless @snapshot end + + def snapshot_params + # avoid StrongParameters ;-( to faciliate sending params as + # hash to ActiveJob via ActiveStorage by directly getting parameters from request + # object + request.parameters + end end end end diff --git a/app/controllers/books_controller.rb b/app/controllers/books_controller.rb index 5651d6db5..ad4fa0284 100644 --- a/app/controllers/books_controller.rb +++ b/app/controllers/books_controller.rb @@ -28,11 +28,16 @@ def show @book.cases.each do |kase| @count_of_anonymous_case_judgements += kase.ratings.where(user: nil).count end + + @moar_judgements_needed = @book.judgements.where(user: current_user).count < @book.query_doc_pairs.count @cases = @book.cases @leaderboard_data = [] @stats_data = [] - unique_judges = @book.judgements.preload(:user).collect(&:user).uniq - unique_judges.each do |judge| + + unique_judge_ids = @book.query_doc_pairs.joins(:judgements) + .distinct.pluck(:user_id) + unique_judge_ids.each do |judge_id| + judge = User.find(judge_id) unless judge_id.nil? @leaderboard_data << { judge: judge.nil? ? 'anonymous' : judge.name, judgements: @book.judgements.where(user: judge).count } @stats_data << { diff --git a/app/controllers/judgements_controller.rb b/app/controllers/judgements_controller.rb index 80d13467b..8fa8bb5f1 100644 --- a/app/controllers/judgements_controller.rb +++ b/app/controllers/judgements_controller.rb @@ -8,7 +8,7 @@ def index bool = ActiveRecord::Type::Boolean.new @shallow = bool.deserialize(params[:shallow] || true ) - @judgements = @book.judgements.includes([ :query_doc_pair, :user ]) + @judgements = @book.judgements.includes([ :query_doc_pair, :user ]).order('query_doc_pair_id') end def show diff --git a/app/controllers/sessions_controller.rb b/app/controllers/sessions_controller.rb index 3239d57ab..ae6ee7843 100644 --- a/app/controllers/sessions_controller.rb +++ b/app/controllers/sessions_controller.rb @@ -10,9 +10,7 @@ def index def new @user = User.new - puts 'hi' - puts Rails.env.production? - puts Rails.application.config.devise.omniauth_providers.include?(:google_oauth2) + if Rails.env.production? && Rails.application.config.devise.omniauth_providers.include?(:google_oauth2) # Google only lets us oAuth from https sites in production. @flag_not_on_https = false diff --git a/app/jobs/populate_snapshot_job.rb b/app/jobs/populate_snapshot_job.rb new file mode 100644 index 000000000..6f5187d88 --- /dev/null +++ b/app/jobs/populate_snapshot_job.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +class PopulateSnapshotJob < ApplicationJob + queue_as :default + sidekiq_options log_level: :warn + + # rubocop:disable Security/MarshalLoad + def perform snapshot + # down the road we should be using ActiveRecord-import and first_or_initialize instead. + # See how snapshots are managed. + + compressed_data = snapshot.snapshot_file.download + serialized_data = Zlib::Inflate.inflate(compressed_data) + params = Marshal.load(serialized_data) + + service = SnapshotManagerCopy.new(snapshot) + + snapshot_docs = params[:snapshot][:docs] + snapshot_queries = params[:snapshot][:queries] + + service.add_docs snapshot_docs, snapshot_queries + + snapshot.reload # this appears to be required or we duplicate the snapshot_queries! + + snapshot.snapshot_file.purge + end + # rubocop:enable Security/MarshalLoad +end diff --git a/app/models/book.rb b/app/models/book.rb index 4832066ad..c74c1c1cd 100644 --- a/app/models/book.rb +++ b/app/models/book.rb @@ -35,10 +35,13 @@ class Book < ApplicationRecord belongs_to :selection_strategy belongs_to :scorer has_many :query_doc_pairs, dependent: :destroy, autosave: true - has_many :judgements, -> { order('query_doc_pair_id') }, + + has_many :judgements, through: :query_doc_pairs, dependent: :destroy + # has_many :judges, -> { distinct }, through: :judgements, class_name: 'User', source: :user + has_many :cases, dependent: :nullify has_many :rated_query_doc_pairs, -> { has_judgements }, diff --git a/app/models/query_doc_pair.rb b/app/models/query_doc_pair.rb index fe9dd665e..ae521b471 100644 --- a/app/models/query_doc_pair.rb +++ b/app/models/query_doc_pair.rb @@ -5,7 +5,7 @@ # Table name: query_doc_pairs # # id :bigint not null, primary key -# document_fields :text(65535) +# document_fields :text(16777215) # information_need :string(255) # notes :text(65535) # options :text(65535) diff --git a/app/models/snapshot.rb b/app/models/snapshot.rb index d4d0af1ca..44179f7bc 100644 --- a/app/models/snapshot.rb +++ b/app/models/snapshot.rb @@ -28,10 +28,12 @@ class Snapshot < ApplicationRecord belongs_to :case, optional: true # shouldn't be optional! belongs_to :try, optional: true # shouldn't be optional! belongs_to :scorer, optional: true # shouldn't be optional! - has_many :snapshot_queries, dependent: :destroy + has_many :snapshot_queries, dependent: :delete_all has_many :snapshot_docs, through: :snapshot_queries + has_one_attached :snapshot_file + # Validations validates :name, presence: true diff --git a/app/models/snapshot_query.rb b/app/models/snapshot_query.rb index 06b29ec0e..d28d2e0bd 100644 --- a/app/models/snapshot_query.rb +++ b/app/models/snapshot_query.rb @@ -26,6 +26,6 @@ class SnapshotQuery < ApplicationRecord belongs_to :snapshot, optional: true # shouldn't be belongs_to :query, optional: true # shouldn't be has_many :snapshot_docs, -> { order(position: :asc) }, - dependent: :destroy, + dependent: :delete_all, inverse_of: :snapshot_query end diff --git a/app/services/snapshot_manager_copy.rb b/app/services/snapshot_manager_copy.rb new file mode 100644 index 000000000..fc86405db --- /dev/null +++ b/app/services/snapshot_manager_copy.rb @@ -0,0 +1,260 @@ +# frozen_string_literal: true + +# rubocop:disable Metrics/ClassLength +class SnapshotManagerCopy + attr_reader :logger, :options + + def initialize snapshot, opts = {} + default_options = { + format: :csv, + logger: Rails.logger, + show_progress: false, + } + + @options = default_options.merge(opts.deep_symbolize_keys) + @logger = @options[:logger] + @snapshot = snapshot + end + + def show_progress? + options[:show_progress] + end + + # + # Adds docs to a snapshot, assuming snapshot is being created from the + # app and thus all the queries already exist for the case + # (so no need to create them). + # + # @param data, hash + # @return self + # + # Example: + # + # manager = SnapshotManager.new snapshot + # data = { + # 123 => [ + # { id: "doc1", explain: "1" }, + # { id: "doc2", explain: "2" }, + # ], + # 456 => [ + # { id: "doc3", explain: "3" }, + # { id: "doc4", explain: "4" }, + # ] + # } + # manager.add_docs data + # + def add_docs docs, queries + queries_to_import = [] + + keys = docs.nil? ? [] : docs.keys + + # Start by adding queries to snapshot. + # First, setup all queries to be added in an array. + # block_with_progress_bar(keys.length) do |i| + keys.length.times.each do |i| + query_id = keys[i] + + snapshot_query = @snapshot.snapshot_queries.where(query_id: query_id).first_or_initialize + + # Quepid front end can send -- as no score. + queries[query_id]['score'] = nil if '--' == queries[query_id]['score'] + snapshot_query.score = queries[query_id][:score] + snapshot_query.all_rated = queries[query_id][:all_rated] + snapshot_query.number_of_results = queries[query_id][:number_of_results] + + queries_to_import << snapshot_query + end + + # Second, mass insert queries. + SnapshotQuery.import queries_to_import + # End of queries import. + + # Then import docs for the queries that were just created. + # This method is shared with the `import_queries` method + # which does the same thing with a slightly different set of data. + import_docs keys, docs + + self + end + + # + # Imports queries and docs to a snapshot. + # If the query does not already exists, it adds it to the case first, + # then it adds it to the snapshot. + # + # @param queries, hash + # @return self + # + # Example: + # + # manager = SnapshotManager.new snapshot + # data = { + # "dog" => { + # docs: [ + # { id: "doc1", explain: "1", position: 1 }, + # { id: "doc2", explain: "2", position: 2 }, + # ] + # }, + # "cat" => { + # docs: [ + # { id: "doc3", explain: "3", position: 2 }, + # { id: "doc4", explain: "4", position: 1 }, + # ] + # } + # } + # manager.import_queries data + # + # rubocop:disable Metrics/MethodLength + def import_queries queries + queries_to_import = [] + keys = queries.keys + + # Fetch all queries for the snapshot's case where the query text + # matches the keys in the hash supplied in the params. + queries_params = { + query_text: keys, + case_id: @snapshot.case_id, + } + indexed_queries = Query.where(queries_params) + .all + .index_by(&:query_text) + + # Start by adding queries to snapshot. + # First, setup all queries to be added in an array. + # print_step 'Importing queries' + # block_with_progress_bar(keys.length) do |i| + keys.length.times.each do |i| + query_text = keys[i] + query = fetch_or_create_query indexed_queries, query_text + + snapshot_query = @snapshot.snapshot_queries.where(query_id: query.id).first_or_initialize + + queries[query.id] = queries.delete(keys[i]) + + queries_to_import << snapshot_query + end + + # Second, mass insert queries. + SnapshotQuery.import queries_to_import + # End of queries import. + + # Updates keys after we switched them out from the text to the id + keys = queries.keys + data = {} + queries.each { |key, q| data[key] = q[:docs] || q['docs'] } + + # Then import docs for the queries that were just created. + import_docs keys, data + + self + end + # rubocop:enable Metrics/MethodLength + + def csv_to_queries_hash docs + # print_step 'Transforming csv into a queries hash' + + query_docs = {} + # block_with_progress_bar(docs.length) do |i| + docs.length.times.each do |i| + row = extract_doc_info docs[i] + query_docs[row[:query_text]] ||= { docs: [] } + query_docs[row[:query_text]][:docs] << row + end + + query_docs + end + + private + + # rubocop:disable Metrics/MethodLength + def setup_docs_for_query query, docs + results = [] + + return results if docs.blank? + return results if query.blank? + + docs = normalize_docs_array docs + docs = docs.sort { |d1, d2| d1[:position].to_i <=> d2[:position].to_i } + + docs.each_with_index do |doc, index| + doc_params = { + doc_id: doc[:id], + explain: doc[:explain], + position: doc[:position] || (index + 1), + rated_only: doc[:rated_only] || false, + fields: doc[:fields].blank? ? nil : doc[:fields].to_json, + } + + results << query.snapshot_docs.build(doc_params) + end + + results + end + # rubocop:enable Metrics/MethodLength + + def extract_doc_info row + case @options[:format] + when :csv + { + query_text: row[0], + id: row[1], + position: row[2], + } + when :hash + row.deep_symbolize_keys + else + row + end + end + + def normalize_docs_array docs + return [] if docs.blank? + + result = docs.map do |each| + each = each.to_unsafe_h if each.is_a?(ActionController::Parameters) + each = each.to_hash if each.is_a?(ActiveSupport::HashWithIndifferentAccess) + + each.symbolize_keys! if each.present? + end.compact + + result + end + + def import_docs keys, data + docs_to_import = [] + + indexed_snap_queries = @snapshot.snapshot_queries + .where(query_id: keys) + .all + .index_by { |q| q.query_id.to_s } + + # print_step 'Importing docs' + # block_with_progress_bar(keys.length) do |i| + keys.length.times.each do |i| + query_id = keys[i] + docs = data[keys[i]] + + snapshot_query = indexed_snap_queries[query_id.to_s] + query_docs = setup_docs_for_query snapshot_query, docs + + docs_to_import += query_docs + end + + SnapshotDoc.import docs_to_import + + self + end + + def fetch_or_create_query indexed_queries, query_text + if indexed_queries[query_text].present? + indexed_queries[query_text] + else + query_params = { + query_text: query_text, + case_id: @snapshot.case_id, + } + Query.create(query_params) + end + end +end +# rubocop:enable Metrics/ClassLength diff --git a/app/views/api/v1/snapshots/_snapshot.json.jbuilder b/app/views/api/v1/snapshots/_snapshot.json.jbuilder index 00f9ff879..0c261afc3 100644 --- a/app/views/api/v1/snapshots/_snapshot.json.jbuilder +++ b/app/views/api/v1/snapshots/_snapshot.json.jbuilder @@ -5,6 +5,7 @@ shallow ||= false json.id snapshot.id json.name snapshot.name json.time snapshot.created_at +json.has_snapshot_file snapshot.snapshot_file.present? unless shallow json.scorer snapshot.scorer, partial: 'api/v1/scorers/communal_scorer', as: :scorer @@ -35,7 +36,7 @@ if with_docs end end -unless shallow +if with_docs json.scores do json.array! snapshot.snapshot_queries, partial: 'api/v1/snapshots/snapshot_query', as: :snapshot_query end diff --git a/app/views/api/v1/snapshots/create.json.jbuilder b/app/views/api/v1/snapshots/create.json.jbuilder index 27f74891a..99f2772a8 100644 --- a/app/views/api/v1/snapshots/create.json.jbuilder +++ b/app/views/api/v1/snapshots/create.json.jbuilder @@ -1,3 +1,3 @@ # frozen_string_literal: true -json.partial! 'snapshot', snapshot: @snapshot, with_docs: true +json.partial! 'snapshot', snapshot: @snapshot, with_docs: @with_docs diff --git a/app/views/api/v1/snapshots/show.json.jbuilder b/app/views/api/v1/snapshots/show.json.jbuilder index 603646829..167d05a60 100644 --- a/app/views/api/v1/snapshots/show.json.jbuilder +++ b/app/views/api/v1/snapshots/show.json.jbuilder @@ -1,3 +1,3 @@ # frozen_string_literal: true -json.partial! 'snapshot', snapshot: @snapshot, with_docs: true, shallow: @shallow +json.partial! 'snapshot', snapshot: @snapshot, with_docs: @with_docs, shallow: @shallow diff --git a/app/views/home/_case_summary.html.erb b/app/views/home/_case_summary.html.erb index d49b0d96a..12d387879 100644 --- a/app/views/home/_case_summary.html.erb +++ b/app/views/home/_case_summary.html.erb @@ -5,7 +5,7 @@
-
<%= kase.last_score.score unless kase.scores.empty? %> <%= kase.scorer.name %>
+
<%= number_with_precision(kase.last_score.score, precision: 2) unless kase.scores.empty? %> <%= kase.scorer.name %>

<%= kase.created_at.to_date.to_fs(:short) %> - <%= kase.last_score.updated_at.to_date.to_fs(:short) unless kase.scores.empty?%>

<% diff --git a/app/views/judgements/_moar_judgements_needed.html.erb b/app/views/judgements/_moar_judgements_needed.html.erb index 9b9d7f8d6..4135272dd 100644 --- a/app/views/judgements/_moar_judgements_needed.html.erb +++ b/app/views/judgements/_moar_judgements_needed.html.erb @@ -1,4 +1,5 @@ -<% if SelectionStrategy.random_query_doc_based_on_strategy(book, current_user) %> +hi: <%= @moar_judgements_needed %> +<% if @moar_judgements_needed %> @@ -35,7 +36,7 @@ <% else %> <% end %> <% end %> diff --git a/db/migrate/20240124224447_resize_query_d_oc_doc_fields_to_match_snapshot_fields.rb b/db/migrate/20240124224447_resize_query_d_oc_doc_fields_to_match_snapshot_fields.rb new file mode 100644 index 000000000..d9d5ef3cc --- /dev/null +++ b/db/migrate/20240124224447_resize_query_d_oc_doc_fields_to_match_snapshot_fields.rb @@ -0,0 +1,7 @@ +class ResizeQueryDOcDocFieldsToMatchSnapshotFields < ActiveRecord::Migration[7.1] + # discovered that we limit this to TEXT, not MEDIUMTEXT, however + # in SnapshotDocs.fields it IS a MEDIUMTEXT field. + def change + change_column :query_doc_pairs, :document_fields, :mediumtext + end +end diff --git a/db/schema.rb b/db/schema.rb index 4dc1bdcd2..1f916c30f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -182,7 +182,7 @@ create_table "query_doc_pairs", charset: "utf8mb3", collation: "utf8mb3_unicode_ci", force: :cascade do |t| t.string "query_text", limit: 500 t.integer "position" - t.text "document_fields" + t.text "document_fields", size: :medium t.bigint "book_id", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false diff --git a/docker-compose.override.yml.example b/docker-compose.override.yml.example deleted file mode 100644 index 2133b8809..000000000 --- a/docker-compose.override.yml.example +++ /dev/null @@ -1,9 +0,0 @@ -# Demonstrate how to load your own local copy of a node package into Quepid. -# Commonly used when you are developing against an unreleased package. - -version: '3' -services: - app: - volumes: - - /Users/epugh/Documents/projects/splainer-search/package.json:/srv/app/node_modules/splainer-search/package.json - - /Users/epugh/Documents/projects/splainer-search/splainer-search.js:/srv/app/node_modules/splainer-search/splainer-search.js diff --git a/spec/javascripts/angular/services/querySnapshotSvc_spec.js b/spec/javascripts/angular/services/querySnapshotSvc_spec.js index b4d4f136a..2319e51b2 100644 --- a/spec/javascripts/angular/services/querySnapshotSvc_spec.js +++ b/spec/javascripts/angular/services/querySnapshotSvc_spec.js @@ -149,7 +149,7 @@ describe('Service: querySnapshotSvc', function () { $httpBackend.verifyNoOutstandingExpectation(); }); - it('doesnt update version if same case', function() { + it('does not update version if same case', function() { var priorVersion = querySnapshotSvc.version(); querySnapshotSvc.bootstrap(2); querySnapshotSvc.bootstrap(2); diff --git a/spec/javascripts/mock/settingsSvc.js b/spec/javascripts/mock/settingsSvc.js index 850ada364..644de94df 100644 --- a/spec/javascripts/mock/settingsSvc.js +++ b/spec/javascripts/mock/settingsSvc.js @@ -26,6 +26,11 @@ this.applicableSettings = function() { return mockTry; }; + + this.supportLookupById = function(searchEngine) { + return true; + }; + }; wind.MockSettingsSvc = MockSettingsSvc; })(window); diff --git a/test/controllers/api/v1/books_controller_test.rb b/test/controllers/api/v1/books_controller_test.rb index 57461a1ea..d2091373b 100644 --- a/test/controllers/api/v1/books_controller_test.rb +++ b/test/controllers/api/v1/books_controller_test.rb @@ -121,7 +121,7 @@ class BooksControllerTest < ActionController::TestCase assert_response :ok csv = CSV.parse(response.body, headers: true) - assert_includes csv.headers, 'Unknown' + assert_includes csv.headers, 'anonymous' end end end diff --git a/test/controllers/api/v1/snapshots/search_controller_test.rb b/test/controllers/api/v1/snapshots/search_controller_test.rb index 0918b5114..c5cf43e0f 100644 --- a/test/controllers/api/v1/snapshots/search_controller_test.rb +++ b/test/controllers/api/v1/snapshots/search_controller_test.rb @@ -32,6 +32,36 @@ class SearchControllerTest < ActionController::TestCase assert_equal snapshot_query.snapshot_docs.count, data['response']['docs'].length end + test 'handles a term lookup for docs as if Solr' do + query_string = 'id:(' + snapshot.snapshot_docs.each do |doc| + query_string += "#{doc.doc_id} OR " + end + query_string = query_string[0...-4] + query_string += ')' + solr_query_params = { + q: query_string, + defType: 'lucene', + rows: snapshot.snapshot_docs.count, + fl: snapshot.case.tries.first.field_spec, + wt: 'json', + hl: false, + + } + + params = { case_id: acase.id, snapshot_id: snapshot.id } + params = params.merge(solr_query_params) + + get :index, params: params + + assert_response :ok + + data = response.parsed_body + + assert_equal query_string, data['responseHeader']['params']['q'] + assert_equal snapshot.snapshot_docs.count, data['response']['docs'].length + end + test 'handles a *:* search' do get :index, params: { case_id: acase.id, snapshot_id: snapshot.id, q: '*:*' } diff --git a/test/controllers/api/v1/snapshots_controller_test.rb b/test/controllers/api/v1/snapshots_controller_test.rb index ff9d07150..84f21d5bb 100644 --- a/test/controllers/api/v1/snapshots_controller_test.rb +++ b/test/controllers/api/v1/snapshots_controller_test.rb @@ -24,12 +24,12 @@ class SnapshotsControllerTest < ActionController::TestCase name: 'New Snapshot', docs: { first_query.id => [ - { id: 'doc1', explain: 1 }, - { id: 'doc2', explain: 2 } + { id: 'doc1', explain: '1' }, + { id: 'doc2', explain: '2' } ], second_query.id => [ - { id: 'doc3', explain: 3 }, - { id: 'doc4', explain: 4 } + { id: 'doc3', explain: '3' }, + { id: 'doc4', explain: '4' } ], }, queries: { @@ -49,27 +49,37 @@ class SnapshotsControllerTest < ActionController::TestCase post :create, params: data.merge(case_id: acase.id) assert_response :ok - snapshot = response.parsed_body + snapshot_response = response.parsed_body - assert_not_nil snapshot['time'] + assert_not_nil snapshot_response['scorer'] + assert_not_nil snapshot_response['try'] + assert_not_nil snapshot_response['time'] + assert_not_nil snapshot_response['id'] + assert_equal snapshot_response['name'], data[:snapshot][:name] - assert_equal snapshot['name'], data[:snapshot][:name] - assert_equal snapshot['docs'].length, data[:snapshot][:docs].length + perform_enqueued_jobs - data_doc = data[:snapshot][:docs][first_query.id][0] - response_doc = snapshot['docs'][first_query.id.to_s][0] + snapshot = Snapshot.find(snapshot_response['id']) - assert_equal data_doc[:id], response_doc['id'] - assert_equal data_doc[:explain], response_doc['explain'] + assert_equal snapshot.snapshot_docs.length, data[:snapshot][:docs].values.flatten.count + data_doc = data[:snapshot][:docs][first_query.id][0] + snapshot_query = snapshot.snapshot_queries.where(query_id: first_query.id).first + + response_doc = snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + + snapshot_query = snapshot.snapshot_queries.where(query: second_query).first data_doc = data[:snapshot][:docs][second_query.id][0] - response_doc = snapshot['docs'][second_query.id.to_s][0] + response_doc = snapshot_query.snapshot_docs[0] - assert_equal data_doc[:id], response_doc['id'] - assert_equal data_doc[:explain], response_doc['explain'] + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain - assert_not_nil snapshot['scorer'] - assert_not_nil snapshot['try'] + assert_not_nil snapshot.scorer + assert_not_nil snapshot.try end end @@ -79,8 +89,8 @@ class SnapshotsControllerTest < ActionController::TestCase name: 'New Snapshot', docs: { first_query.id => [ - { id: 'doc1', explain: 1 }, - { id: 'doc2', explain: 2 } + { id: 'doc1', explain: '1' }, + { id: 'doc2', explain: '2' } ], # in Rails 4, we could do second_query.id => [] and getting the second_query in, # but in Rails 5, the second_query doesn't show up because the array that is empty @@ -105,20 +115,39 @@ class SnapshotsControllerTest < ActionController::TestCase assert_response :ok - snapshot = response.parsed_body + snapshot_response = response.parsed_body + + assert_equal data[:snapshot][:name], snapshot_response['name'] + assert_not_nil snapshot_response['id'] + + snapshot = Snapshot.find(snapshot_response['id']) + assert snapshot.snapshot_file.present? + assert_equal snapshot.snapshot_queries.length, 0 + + perform_enqueued_jobs - assert_equal data[:snapshot][:name], snapshot['name'] - assert_equal data[:snapshot][:docs].length, snapshot['docs'].length + snapshot = Snapshot.find(snapshot_response['id']) + assert_not snapshot.snapshot_file.present? + assert_equal snapshot.snapshot_queries.length, 2 + assert_equal data[:snapshot][:docs].length, snapshot.snapshot_docs.length - data_doc = data[:snapshot][:docs][first_query.id][0] - response_doc = snapshot['docs'][first_query.id.to_s][0] + data_doc = data[:snapshot][:docs][first_query.id][0] - assert_equal data_doc[:id], response_doc['id'] - assert_equal data_doc[:explain], response_doc['explain'] + snapshot_query = snapshot.snapshot_queries.where(query_id: first_query.id).first - response_docs = snapshot['docs'][second_query.id.to_s] + response_doc = snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + + snapshot_query = snapshot.snapshot_queries.where(query_id: second_query.id).first + response_docs = snapshot_query.snapshot_docs assert_empty response_docs + + perform_enqueued_jobs + + assert_equal snapshot.snapshot_queries.length, 2 end end @@ -216,8 +245,9 @@ class SnapshotsControllerTest < ActionController::TestCase let(:data) do { snapshot: { - name: 'New Snapshot', - docs: {}, + name: 'New Snapshot', + docs: {}, + queries: {}, }, } end diff --git a/test/fixtures/query_doc_pairs.yml b/test/fixtures/query_doc_pairs.yml index b1bfa8bd6..59e7a4428 100644 --- a/test/fixtures/query_doc_pairs.yml +++ b/test/fixtures/query_doc_pairs.yml @@ -3,7 +3,7 @@ # Table name: query_doc_pairs # # id :bigint not null, primary key -# document_fields :text(65535) +# document_fields :text(16777215) # information_need :string(255) # notes :text(65535) # options :text(65535) diff --git a/test/fixtures/snapshot_docs.yml b/test/fixtures/snapshot_docs.yml index 3579cfcba..4f4305b93 100644 --- a/test/fixtures/snapshot_docs.yml +++ b/test/fixtures/snapshot_docs.yml @@ -20,25 +20,29 @@ # a_doc: - doc_id: "doc a" + doc_id: "doc_a" position: 1 snapshot_query: :first_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"id":"doc_a", "title": "title", "body":"body"}' b_doc: - doc_id: "doc b" + doc_id: "doc_b" position: 1 snapshot_query: :first_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"id":"doc_b", "title": "title", "body":"body"}' c_doc: - doc_id: "doc c" + doc_id: "doc_c" position: 1 snapshot_query: :second_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"id":"doc_c", "title": "title", "body":"body"}' d_doc: - doc_id: "doc d" + doc_id: "doc_d" position: 1 snapshot_query: :second_snapshot_query explain: '{"message": "Explain Me"}' + fields: '{"id":"doc_d", "title": "title", "body":"body"}' diff --git a/test/fixtures/tries.yml b/test/fixtures/tries.yml index 55227ef45..3143f4f3d 100644 --- a/test/fixtures/tries.yml +++ b/test/fixtures/tries.yml @@ -203,3 +203,10 @@ for_case_queries_case: query_params: 'q=#$query##' try_number: 1 search_endpoint: :for_case_queries_case + +for_case_snapshot_case: + case: :snapshot_case + query_params: 'q=#$query##' + try_number: 1 + field_spec: id:id title:title body + search_endpoint: :one diff --git a/test/integration/experiment_with_bulk_insert_test.rb b/test/integration/experiment_with_bulk_insert_test.rb index 2e61fac40..13ae7cd0f 100644 --- a/test/integration/experiment_with_bulk_insert_test.rb +++ b/test/integration/experiment_with_bulk_insert_test.rb @@ -8,8 +8,12 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest let(:scorer) { scorers(:quepid_default_scorer) } let(:selection_strategy) { selection_strategies(:multiple_raters) } + # rubocop:disable Style/ClassVars + @@skip_tests = true + # rubocop:enable Style/ClassVars + test 'generate and export 5000 queries with traditional AR' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -32,7 +36,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end test 'generate and export 5000 queries with bulk import' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -56,7 +60,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end test 'generate and export 5000 queries with insert_all' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -86,7 +90,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end test 'generate and export 5000 queries with upsert_all' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? result = Benchmark.measure do @@ -117,7 +121,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest # rubocop:disable Layout/LineLength test 'generate and export 5000 queries with upsert_all when exists already data' do - skip('Ignoring all tests in ExperimentWithBulkInsertTest') + skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? diff --git a/test/models/query_doc_pair_test.rb b/test/models/query_doc_pair_test.rb index f5d62e481..3817c3771 100644 --- a/test/models/query_doc_pair_test.rb +++ b/test/models/query_doc_pair_test.rb @@ -5,7 +5,7 @@ # Table name: query_doc_pairs # # id :bigint not null, primary key -# document_fields :text(65535) +# document_fields :text(16777215) # information_need :string(255) # notes :text(65535) # options :text(65535) diff --git a/test/services/snapshot_manager_copy_test.rb b/test/services/snapshot_manager_copy_test.rb new file mode 100644 index 000000000..84f649b14 --- /dev/null +++ b/test/services/snapshot_manager_copy_test.rb @@ -0,0 +1,264 @@ +# frozen_string_literal: true + +require 'test_helper' + +class SnapshotManagerCopyTest < ActiveSupport::TestCase + let(:snapshot) { snapshots(:empty_snapshot) } + let(:first_query) { queries(:a_query) } + let(:second_query) { queries(:b_query) } + + let(:service) { SnapshotManagerCopy.new(snapshot) } + + describe 'Adds docs to a snapshot' do + test 'adds snapshot to case' do + sample_fields = { title: 'some title', 'thumb:product_image': 'http://example.com/image.png' } + + data = { + docs: { + first_query.id => [ + { id: 'doc1', explain: '1', fields: sample_fields }, + { id: 'doc2', explain: '2', fields: sample_fields } + ], + second_query.id => [ + { id: 'doc3', explain: '3', fields: sample_fields }, + { id: 'doc4', explain: '4', fields: sample_fields } + ], + }, + queries: { + first_query.id => { + score: 0.87, + all_rated: true, + number_of_results: 42, + }, + second_query.id => { + score: 0.45, + all_rated: false, + number_of_results: nil, + }, + }, + } + + assert_difference 'snapshot.snapshot_queries.count', 2 do + service.add_docs data[:docs], data[:queries] + + # This is needed or else we get wrong numbers + snapshot.reload + snapshot_queries = snapshot.snapshot_queries + + assert_equal snapshot_queries.length, data[:docs].length + + first_snapshot_query = snapshot_queries.where(query_id: first_query.id).first + second_snapshot_query = snapshot_queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_equal first_snapshot_query.query_id, first_query.id + assert_equal first_snapshot_query.score, 0.87 + assert_equal true, first_snapshot_query.all_rated + assert_equal 42, first_snapshot_query.number_of_results + + assert_not_nil second_snapshot_query + assert_equal second_snapshot_query.query_id, second_query.id + assert_equal second_snapshot_query.score, 0.45 + assert_equal false, second_snapshot_query.all_rated + assert_nil second_snapshot_query.number_of_results + + data_doc = data[:docs][first_query.id][0] + response_doc = first_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal 1, response_doc.position + assert_equal sample_fields.to_json, response_doc.fields + + data_doc = data[:docs][second_query.id][0] + response_doc = second_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal 1, response_doc.position + assert_equal sample_fields.to_json, response_doc.fields + end + end + end + + describe 'Import queries' do + test 'creates queries if they do not already exist' do + data = { + 'dog' => { + docs: [ + { id: 'doc1', explain: '1', position: 1 }, + { id: 'doc2', explain: '2', position: 2 } + ], + }, + 'cat' => { + docs: [ + { id: 'doc3', explain: '3', position: 2 }, + { id: 'doc4', explain: '4', position: 1 } + ], + }, + } + + assert_difference 'Query.count', 2 do + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_query = Query.where(query_text: 'dog', case_id: snapshot.case_id).first + second_query = Query.where(query_text: 'cat', case_id: snapshot.case_id).first + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_equal first_snapshot_query.query_id, first_query.id + + assert_not_nil second_snapshot_query + assert_equal second_snapshot_query.query_id, second_query.id + + data_doc = data[first_query.id][:docs][0] + response_doc = first_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 1, response_doc.position + + data_doc = data[second_query.id][:docs][0] + response_doc = second_snapshot_query.snapshot_docs[1] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 2, response_doc.position + end + end + + test 'does not create queries if they already exist' do + data = { + first_query.query_text => { + docs: [ + { id: 'doc1', explain: '1', position: 1 }, + { id: 'doc2', explain: '2', position: 2 } + ], + }, + second_query.query_text => { + docs: [ + { id: 'doc3', explain: '3', position: 2 }, + { id: 'doc4', explain: '4', position: 1 } + ], + }, + } + + assert_no_difference 'Query.count' do + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_equal first_snapshot_query.query_id, first_query.id + + assert_not_nil second_snapshot_query + assert_equal second_snapshot_query.query_id, second_query.id + + data_doc = data[first_query.id][:docs][0] + response_doc = first_snapshot_query.snapshot_docs[0] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 1, response_doc.position + + data_doc = data[second_query.id][:docs][0] + response_doc = second_snapshot_query.snapshot_docs[1] + + assert_equal data_doc[:id], response_doc.doc_id + assert_equal data_doc[:explain], response_doc.explain + assert_equal data_doc[:position], response_doc.position + assert_equal 2, response_doc.position + end + end + + test 'handles query data with empty docs' do + data = { + first_query.query_text => { + docs: [], + }, + second_query.query_text => { + docs: [], + }, + } + + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_not_nil second_snapshot_query + + data_doc = data[first_query.id][:docs] + response_doc = first_snapshot_query.snapshot_docs + + assert_equal data_doc.length, response_doc.length + + data_doc = data[second_query.id][:docs] + response_doc = second_snapshot_query.snapshot_docs + + assert_equal data_doc.length, response_doc.length + end + + test 'handles query data with array of nil docs' do + data = { + first_query.query_text => { + docs: [ nil, nil ], + }, + second_query.query_text => { + docs: [ + { id: 'doc3', explain: '3', position: 2 }, + { id: 'doc4', explain: '4', position: 1 } + ], + }, + } + + service.import_queries data + + # This is needed or else we get wrong numbers + snapshot.reload + queries = snapshot.snapshot_queries + + assert_equal queries.length, data.length + + first_snapshot_query = queries.where(query_id: first_query.id).first + second_snapshot_query = queries.where(query_id: second_query.id).first + + assert_not_nil first_snapshot_query + assert_not_nil second_snapshot_query + + response_doc = first_snapshot_query.snapshot_docs + + assert_equal 0, response_doc.length # coz they were all nils + + data_doc = data[second_query.id][:docs] + response_doc = second_snapshot_query.snapshot_docs + + assert_equal data_doc.length, response_doc.length + end + end +end