Skip to content

Commit

Permalink
appears to all work...
Browse files Browse the repository at this point in the history
  • Loading branch information
epugh committed Jan 25, 2024
1 parent 37e9135 commit 2752ac2
Show file tree
Hide file tree
Showing 6 changed files with 591 additions and 36 deletions.
4 changes: 3 additions & 1 deletion app/controllers/api/v1/snapshots_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def index

def show
@shallow = params[:shallow] || false
@with_docs = true
respond_with @snapshot
end

Expand All @@ -41,8 +42,9 @@ def create
@snapshot.scorer = @case.scorer
@snapshot.try = @case.tries.first

puts "Okay, checking snapshot queries: #{@snapshot.snapshot_queries.length}"
if @snapshot.save

puts "Okay2, checking snapshot queries: #{@snapshot.snapshot_queries.length}"
serialized_data = Marshal.dump(snapshot_params)

puts "[SnapshotController] the size of the serialized data is #{number_to_human_size(serialized_data.bytesize)}"
Expand Down
9 changes: 4 additions & 5 deletions app/jobs/populate_snapshot_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@ def perform snapshot
serialized_data = Zlib::Inflate.inflate(compressed_data)
params = Marshal.load(serialized_data)

puts "[PopulateSnapshotJob] I am going to populate the snapshot with #{params[:snapshot][:queries].size} queries"

service = SnapshotManager.new(snapshot)
service = SnapshotManagerCopy.new(snapshot)

snapshot_docs = params[:snapshot][:docs]
snapshot_queries = params[:snapshot][:queries]

service.add_docs snapshot_docs, snapshot_queries if snapshot_docs
service.add_docs snapshot_docs, snapshot_queries

snapshot.reload # this appears to be required or we duplicate the snapshot_queries!

snapshot.snapshot_file.purge
snapshot.save
end
# rubocop:enable Security/MarshalLoad
end
260 changes: 260 additions & 0 deletions app/services/snapshot_manager_copy.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
# frozen_string_literal: true

# rubocop:disable Metrics/ClassLength
class SnapshotManagerCopy
attr_reader :logger, :options

def initialize snapshot, opts = {}
default_options = {
format: :csv,
logger: Rails.logger,
show_progress: false,
}

@options = default_options.merge(opts.deep_symbolize_keys)
@logger = @options[:logger]
@snapshot = snapshot
end

def show_progress?
options[:show_progress]
end

#
# Adds docs to a snapshot, assuming snapshot is being created from the
# app and thus all the queries already exist for the case
# (so no need to create them).
#
# @param data, hash
# @return self
#
# Example:
#
# manager = SnapshotManager.new snapshot
# data = {
# 123 => [
# { id: "doc1", explain: "1" },
# { id: "doc2", explain: "2" },
# ],
# 456 => [
# { id: "doc3", explain: "3" },
# { id: "doc4", explain: "4" },
# ]
# }
# manager.add_docs data
#
def add_docs docs, queries
queries_to_import = []

keys = docs.nil? ? [] : docs.keys

# Start by adding queries to snapshot.
# First, setup all queries to be added in an array.
# block_with_progress_bar(keys.length) do |i|
keys.length.times.each do |i|
query_id = keys[i]

snapshot_query = @snapshot.snapshot_queries.where(query_id: query_id).first_or_initialize

# Quepid front end can send -- as no score.
queries[query_id]['score'] = nil if '--' == queries[query_id]['score']
snapshot_query.score = queries[query_id][:score]
snapshot_query.all_rated = queries[query_id][:all_rated]
snapshot_query.number_of_results = queries[query_id][:number_of_results]

queries_to_import << snapshot_query
end

# Second, mass insert queries.
SnapshotQuery.import queries_to_import
# End of queries import.

# Then import docs for the queries that were just created.
# This method is shared with the `import_queries` method
# which does the same thing with a slightly different set of data.
import_docs keys, docs

self
end

#
# Imports queries and docs to a snapshot.
# If the query does not already exists, it adds it to the case first,
# then it adds it to the snapshot.
#
# @param queries, hash
# @return self
#
# Example:
#
# manager = SnapshotManager.new snapshot
# data = {
# "dog" => {
# docs: [
# { id: "doc1", explain: "1", position: 1 },
# { id: "doc2", explain: "2", position: 2 },
# ]
# },
# "cat" => {
# docs: [
# { id: "doc3", explain: "3", position: 2 },
# { id: "doc4", explain: "4", position: 1 },
# ]
# }
# }
# manager.import_queries data
#
# rubocop:disable Metrics/MethodLength
def import_queries queries
queries_to_import = []
keys = queries.keys

# Fetch all queries for the snapshot's case where the query text
# matches the keys in the hash supplied in the params.
queries_params = {
query_text: keys,
case_id: @snapshot.case_id,
}
indexed_queries = Query.where(queries_params)
.all
.index_by(&:query_text)

# Start by adding queries to snapshot.
# First, setup all queries to be added in an array.
# print_step 'Importing queries'
# block_with_progress_bar(keys.length) do |i|
keys.length.times.each do |i|
query_text = keys[i]
query = fetch_or_create_query indexed_queries, query_text

snapshot_query = @snapshot.snapshot_queries.where(query_id: query.id).first_or_initialize

queries[query.id] = queries.delete(keys[i])

queries_to_import << snapshot_query
end

# Second, mass insert queries.
SnapshotQuery.import queries_to_import
# End of queries import.

# Updates keys after we switched them out from the text to the id
keys = queries.keys
data = {}
queries.each { |key, q| data[key] = q[:docs] || q['docs'] }

# Then import docs for the queries that were just created.
import_docs keys, data

self
end
# rubocop:enable Metrics/MethodLength

def csv_to_queries_hash docs
# print_step 'Transforming csv into a queries hash'

query_docs = {}
# block_with_progress_bar(docs.length) do |i|
docs.length.times.each do |i|
row = extract_doc_info docs[i]
query_docs[row[:query_text]] ||= { docs: [] }
query_docs[row[:query_text]][:docs] << row
end

query_docs
end

private

# rubocop:disable Metrics/MethodLength
def setup_docs_for_query query, docs
results = []

return results if docs.blank?
return results if query.blank?

docs = normalize_docs_array docs
docs = docs.sort { |d1, d2| d1[:position].to_i <=> d2[:position].to_i }

docs.each_with_index do |doc, index|
doc_params = {
doc_id: doc[:id],
explain: doc[:explain],
position: doc[:position] || (index + 1),
rated_only: doc[:rated_only] || false,
fields: doc[:fields].blank? ? nil : doc[:fields].to_json,
}

results << query.snapshot_docs.build(doc_params)
end

results
end
# rubocop:enable Metrics/MethodLength

def extract_doc_info row
case @options[:format]
when :csv
{
query_text: row[0],
id: row[1],
position: row[2],
}
when :hash
row.deep_symbolize_keys
else
row
end
end

def normalize_docs_array docs
return [] if docs.blank?

result = docs.map do |each|
each = each.to_unsafe_h if each.is_a?(ActionController::Parameters)
each = each.to_hash if each.is_a?(ActiveSupport::HashWithIndifferentAccess)

each.symbolize_keys! if each.present?
end.compact

result
end

def import_docs keys, data
docs_to_import = []

indexed_snap_queries = @snapshot.snapshot_queries
.where(query_id: keys)
.all
.index_by { |q| q.query_id.to_s }

# print_step 'Importing docs'
# block_with_progress_bar(keys.length) do |i|
keys.length.times.each do |i|
query_id = keys[i]
docs = data[keys[i]]

snapshot_query = indexed_snap_queries[query_id.to_s]
query_docs = setup_docs_for_query snapshot_query, docs

docs_to_import += query_docs
end

SnapshotDoc.import docs_to_import

self
end

def fetch_or_create_query indexed_queries, query_text
if indexed_queries[query_text].present?
indexed_queries[query_text]
else
query_params = {
query_text: query_text,
case_id: @snapshot.case_id,
}
Query.create(query_params)
end
end
end
# rubocop:enable Metrics/ClassLength
2 changes: 1 addition & 1 deletion app/views/api/v1/snapshots/_snapshot.json.jbuilder
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ shallow ||= false
json.id snapshot.id
json.name snapshot.name
json.time snapshot.created_at
json.has_snapshot_file = snapshot.snapshot_file.present?
json.has_snapshot_file snapshot.snapshot_file.present?

unless shallow
json.scorer snapshot.scorer, partial: 'api/v1/scorers/communal_scorer', as: :scorer
Expand Down
Loading

0 comments on commit 2752ac2

Please sign in to comment.