Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize snapshots #931

Merged
merged 17 commits into from
Feb 4, 2024
Merged
4 changes: 4 additions & 0 deletions app/assets/javascripts/components/diff/_modal.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ <h3 class="modal-title">Compare Your Search Results</h3>
<span ng-if="ctrl.selection">
Snapshot has the id {{ ctrl.selection }}
</span>

<div class="alert alert-warning" role="alert" ng-if="ctrl.isProcessingFile()">
This snapshot is currently being processed in the background. You should wait till it completes.
</div>
</div>

<div class="form-group">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ angular.module('QuepidApp')
) {
var ctrl = this;

// Attributes
ctrl.snapshots = querySnapshotSvc.snapshots;
querySnapshotSvc.getSnapshots().then(function() {
ctrl.snapshots = querySnapshotSvc.snapshots;
}
);

ctrl.which = 'snapshot';
ctrl.selection = initialSelection;
ctrl.inProgress = false;
Expand All @@ -32,6 +35,8 @@ angular.module('QuepidApp')
ctrl.nothingSelected = nothingSelected;
ctrl.ok = ok;
ctrl.toggleDel = toggleDel;
ctrl.isProcessingFile = isProcessingFile;


// Watches
$scope.$watch('ctrl.selection', function(newVal, oldVal) {
Expand Down Expand Up @@ -72,6 +77,25 @@ angular.module('QuepidApp')
flash.success = 'Snapshot deleted successfully.';
});
}

function isProcessingFile() {
if (ctrl.snapshots){
var desiredSnapshot = null;
angular.forEach(ctrl.snapshots, function(snapshot) {
if (snapshot.id === ctrl.selection) {
desiredSnapshot = snapshot;
return; // exit the loop early
}
});
if (desiredSnapshot){
return desiredSnapshot.hasSnapshotFile;
}
else {
return false;
}
}
return false;
}

function isNumber(num) {
return !isNaN(parseInt('' + num, 10));
Expand Down
4 changes: 4 additions & 0 deletions app/assets/javascripts/components/export_case/_modal.html
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ <h3 class="modal-title">Export Case: <span class="modal-case">{{ ctrl.theCase.ca
>
</select>
</label>

<div class="alert alert-warning" role="alert" ng-if="ctrl.isProcessingFile()">
This snapshot is currently being processed in the background. You should wait till it completes.
</div>
</div>
<hr/>
<div class="form-group">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,18 @@ angular.module('QuepidApp')
ctrl.cancel = function () {
$uibModalInstance.dismiss('cancel');
};


ctrl.isProcessingFile = function () {
if (ctrl.options.snapshot){
var desiredSnapshot = null;
angular.forEach(ctrl.snapshots, function(snapshot) {
if (snapshot.id === ctrl.selection) {
return desiredSnapshot.hasSnapshotFile; // exit the loop early
}
});
}
return false;
};
}
]);
2 changes: 1 addition & 1 deletion app/assets/javascripts/controllers/mainCtrl.js
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ angular.module('QuepidApp')
bootstrapCase()
.then(function() {
loadQueries();
loadSnapshots();
loadSnapshots(); // this is here just to set the caseNo in the querySnapshotSvc.
updateCaseMetadata();
paneSvc.refreshElements();
}).catch(function(error) {
Expand Down
7 changes: 7 additions & 0 deletions app/assets/javascripts/controllers/promptSnapshot.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,17 @@ angular.module('QuepidApp')
$scope.snapPrompt = {name: '', recordDocumentFields: false, inProgress: false, error: null};

$scope.fieldSpec = settingsSvc.applicableSettings().fieldSpec;
$scope.searchEngine = settingsSvc.applicableSettings().searchEngine;

$scope.supportLookupById = settingsSvc.supportLookupById(settingsSvc.applicableSettings().searchEngine);

$scope.ok = function() {
$scope.snapPrompt.inProgress = true;
$scope.snapPrompt.error = null;

if ($scope.supportLookupById === false){ // force recording of document fields for non supporting end points.
$scope.snapPrompt.recordDocumentFields = true;
}

querySnapshotSvc.addSnapshot($scope.snapPrompt.name, $scope.snapPrompt.recordDocumentFields, queriesSvc.queryArray())
.then(function() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
self.id = params.id;
self.name = snapshotName;
self.time = params.time;
self.hasSnapshotFile = params.has_snapshot_file;
self.docs = params.docs;
self.queries = params.queries;

Expand Down
16 changes: 3 additions & 13 deletions app/assets/javascripts/services/docCacheSvc.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,10 @@ angular.module('QuepidApp')
settings.proxyUrl = caseTryNavSvc.getQuepidProxyUrl();
}

var docIds = Object.keys(docsToFetch);
var docIds = Object.keys(docsToFetch);
var resolver = docResolverSvc.createResolver(docIds, settings, 15);

// 'vectara' does not support doc lookup by ID.
let supportLookupById = true;
if (settings && settings.searchEngine === 'vectara'){
supportLookupById = false;
}
else if (settings && settings.searchEngine === 'searchapi'){
supportLookupById = false;
}


if ( supportLookupById && docIds.length > 0 ) {

if ( docIds.length > 0 ) {
return resolver.fetchDocs()
.then(function () {
angular.forEach(resolver.docs, function (doc) {
Expand Down
37 changes: 35 additions & 2 deletions app/assets/javascripts/services/querySnapshotSvc.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
angular.module('QuepidApp')
.service('querySnapshotSvc', [
'$http', '$q',
'settingsSvc', 'docCacheSvc',
'settingsSvc', 'docCacheSvc', 'caseTryNavSvc',
'SnapshotFactory',
function querySnapshotSvc(
$http, $q,
settingsSvc, docCacheSvc,
settingsSvc, docCacheSvc, caseTryNavSvc,
SnapshotFactory
) {
// caches normal docs for all snapshots
Expand Down Expand Up @@ -40,6 +40,25 @@ angular.module('QuepidApp')
settings === null ||
Object.keys(settings).length === 0)
) {

// Some search endpoints let you look up the documents by an id
// however if that isnt' possible, then we require you to store the doc fields
// in the snapshot, and we look them up from the Snapshot. To be clever
// we pretend to be a "solr'" endpoint to drive the lookup.
if (settingsSvc.supportLookupById(settings.searchEngine) === false){
var settingsForLookup = angular.copy(settings);
settingsForLookup.apiMethod = 'GET';
settingsForLookup.searchEngine = 'solr';
settingsForLookup.searchEndpointId = null;
settingsForLookup.customHeaders = null;

let snapshotId = snapshots[0].id;
settingsForLookup.searchUrl = `${caseTryNavSvc.getQuepidRootUrl()}/api/cases/${caseTryNavSvc.getCaseNo()}/snapshots/${snapshotId}/search`;

settings = settingsForLookup;
}


return docCacheSvc.update(settings);
} else {
return $q(function(resolve) {
Expand All @@ -64,6 +83,20 @@ angular.module('QuepidApp')
});
});
};

// Now that we process snapshots async, we
// don't want to cache the data
this.getSnapshots = function() {
this.snapshots = {};

return $http.get('api/cases/' + caseNo + '/snapshots?shallow=true')
.then(function(response) {
return addSnapshotResp(response.data.snapshots)
.then(function() {
version++;
});
});
};

this.addSnapshot = function(name, recordDocumentFields, queries) {
// we may want to refactor the payload structure in the future.
Expand Down
11 changes: 11 additions & 0 deletions app/assets/javascripts/services/settingsSvc.js
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,17 @@ angular.module('QuepidApp')

var Settings = SettingsFactory;
var currSettings = null;

this.supportLookupById = function(searchEngine) {
let supportLookupById = true;
if (searchEngine === 'vectara'){
supportLookupById = false;
}
else if (searchEngine === 'searchapi'){
supportLookupById = false;
}
return supportLookupById;
};

this.demoSettingsChosen = function(searchEngine, newUrl) {
var useTMDBDemoSettings = false;
Expand Down
10 changes: 9 additions & 1 deletion app/assets/templates/views/snapshotModal.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ <h3 class="modal-title">Take a Snapshot of all your queries?</h3>
<br/>

<label>Include Document Fields</label>
<span ng-show="supportLookupById">
<p>
To facilitate additional analysis, you may need to record as part of the snapshot all the displayed fields for each document returned.
To facilitate additional analysis, you may want to record as part of the snapshot all the displayed fields for each document returned.
This will be the <code>{{ fieldSpec }}</code> fields.
</p>

Expand All @@ -22,6 +23,13 @@ <h3 class="modal-title">Take a Snapshot of all your queries?</h3>
<input id="include-document-fields" type="checkbox" ng-model='snapPrompt.recordDocumentFields'> Record Document Fields?</input>
</label>
</div>
</span>

<span ng-hide="supportLookupById">
<p>
To power comparisons when using {{ searchEngine | searchEngineName }} the fields of your documents will be stored in Quepid.
</p>
</span>

<div class='text-danger' ng-show="snapPrompt.error">
An error ({{snapPrompt.error}}) occurred, please try again.<br />
Expand Down
3 changes: 3 additions & 0 deletions app/controllers/api/v1/books/populate_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ class PopulateController < Api::ApiController
# We get a messy set of params in this method, so we don't use the normal
# approach of strong parameter validation. We hardcode the only params
# we care about.
#
# With 5000 queries in large case, this takes 108 seconds...
#
# rubocop:disable Layout/LineLength
def update
puts "[PopulateController] Request Size is #{number_to_human_size(query_doc_pairs_params.to_s.bytesize)}"
Expand Down
28 changes: 20 additions & 8 deletions app/controllers/api/v1/books_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

module Api
module V1
# rubocop:disable Metrics/ClassLength
class BooksController < Api::ApiController
before_action :set_book, only: [ :show, :update, :destroy ]
before_action :check_book, only: [ :show, :update, :destroy ]
Expand All @@ -28,6 +29,7 @@ def index
# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
# rubocop:disable Metrics/BlockLength
api :GET, '/api/books/:book_id',
'Show the book with the given ID.'
param :id, :number,
Expand All @@ -40,22 +42,29 @@ def show
csv_headers = %w[query docid]

# Only return rateable judgements, filter out the unrateable ones.
unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq
# unique_raters = @book.judgements.rateable.preload(:user).collect(&:user).uniq
# unique_raters = @book.judges.merge(Judgement.rateable)
unique_judge_ids = @book.query_doc_pairs.joins(:judgements)
.distinct.pluck(:user_id)

# this logic about using email versus name is kind of awful. Think about user.full_name or user.identifier?
unique_raters.each do |rater|
csv_headers << make_csv_safe(if rater.nil?
'Unknown'
unique_judges = []
unique_judge_ids.each do |judge_id|
judge = User.find(judge_id) unless judge_id.nil?
unique_judges << judge
csv_headers << make_csv_safe(if judge.nil?
'anonymous'
else
rater.name.presence || rater.email
judge.name.presence || judge.email
end)
end

@csv_array << csv_headers
@book.query_doc_pairs.each do |qdp|
query_doc_pairs = @book.query_doc_pairs.includes(:judgements)
query_doc_pairs.each do |qdp|
row = [ make_csv_safe(qdp.query_text), qdp.doc_id ]
unique_raters.each do |rater|
judgement = qdp.judgements.detect { |j| j.user == rater }
unique_judges.each do |judge|
judgement = qdp.judgements.detect { |j| j.user == judge }
rating = judgement.nil? ? '' : judgement.rating

row.append rating
Expand All @@ -72,6 +81,8 @@ def show
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/PerceivedComplexity
# rubocop:enable Metrics/BlockLength

api :POST, '/api/books', 'Create a new book.'
param_group :book
def create
Expand Down Expand Up @@ -135,5 +146,6 @@ def make_csv_safe str
end
end
end
# rubocop:enable Metrics/ClassLength
end
end
18 changes: 13 additions & 5 deletions app/controllers/api/v1/snapshots/search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class SearchController < SnapshotsController
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
# rubocop:disable Layout/LineLength
api :GET, '/api/cases/:case_id/snapshots/:snapshot_id/search?q=:q',
'Mimic a Solr query by looking up query/doc data from a specific snapshot, using the q parameter as the query'
api :GET, '/api/cases/:case_id/snapshots/:snapshot_id/search?somesolrparams=here',
'Mimic a Solr query by looking up query/doc data from a specific snapshot, supports a query or a lookup by id query'
param :case_id, :number,
desc: 'The ID of the requested case.', required: true
param :snapshot_id, :number,
Expand All @@ -24,22 +24,30 @@ class SearchController < SnapshotsController
desc: 'The query that you are looking up', required: true
def index
@q = search_params[:q]
@snapshot_docs = nil

@q = @q.gsub('\?', '?') # Since it's a GET, a ? in the query gets special escaping
query = if '*:*' == @q
# we have a match all query.
@snapshot.snapshot_queries.first.query

elsif @q.ends_with?(')') && @q.include?(':(') && ('lucene' == search_params[:defType])
# We have a lookup docs by id query
doc_ids = @q[@q.index(':(') + 2...@q.index(')')].split(' OR ')
@snapshot_docs = @snapshot.snapshot_docs.where(doc_id: doc_ids)

else
@snapshot.case.queries.find_by(query_text: @q)
end

if query
if query && @snapshot_docs.nil?
snapshot_query = @snapshot.snapshot_queries.find_by(query: query)

@snapshot_docs = snapshot_query.nil? ? [] : snapshot_query.snapshot_docs
elsif @q.starts_with?('id')
elsif @q.starts_with?('id:') && !@q.starts_with?('id:(')
doc_id = @q.split(':')[1]
@snapshot_docs = @snapshot.snapshot_docs.where(doc_id: doc_id)
else
elsif @snapshot_docs.nil?
@snapshot_docs = []
end

Expand Down
Loading