Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extend length of query_text and use upsert to support 5000 queries #1037

Merged
merged 2 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions app/controllers/api/v1/bulk/queries_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@ class QueriesController < Api::ApiController
before_action :set_case
before_action :check_case

def_param_group :queries_params do
param :queries, Array, required: true do
param :queries, String
end
end

# rubocop:disable Metrics/MethodLength
api :POST, '/api/bulk/cases/:case_id/queries', 'Bulk create queries.'
param :case_id, :number,
desc: 'The ID of the requested case.', required: true
param_group :queries_params
def create
# This logic is very similar to the ratings_importer.rb logic.
queries_to_import = []
Expand All @@ -28,22 +38,23 @@ def create

non_existing_queries = unique_queries - existing_queries
non_existing_queries.each_with_index do |query_text, _index|
query = @case.queries.build(query_text: query_text)
# query.insert_at(index + 1)
queries_to_import << query
queries_to_import << {
case_id: @case.id,
query_text: query_text,
created_at: Time.current,
updated_at: Time.current,
}
end

# Mass insert queries
if Query.import queries_to_import
# rubocop:disable Rails/SkipsModelValidations
Query.upsert_all(queries_to_import)
# rubocop:enable Rails/SkipsModelValidations

@case.reload
@queries = @case.queries.includes([ :ratings ])
@display_order = @queries.map(&:id)
@case.reload
@queries = @case.queries.includes([ :ratings ])
@display_order = @queries.map(&:id)

respond_with @queries, @display_order
else
render status: :bad_request
end
respond_with @queries, @display_order
end
# rubocop:enable Metrics/MethodLength

Expand Down
9 changes: 5 additions & 4 deletions app/controllers/api/v1/case_scores_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ class CaseScoresController < Api::ApiController
param :try_number, Integer
param :last_try_number, Integer
param :try_id, Integer
# this isn't quite right as we have a hash of "query_id" and "values" here'
param :queries, Hash, required: false do
param :text, String
param :score, Float
param :maxScore, Float
param :numFound, Integer
param :text, String, desc: 'The actual query text that is being scored.'
param :score, Float, desc: 'The score.'
param :maxScore, Float, desc: 'The max possible score'
param :numFound, Integer, desc: 'How many results matched'
end
end
end
Expand Down
41 changes: 26 additions & 15 deletions app/controllers/home_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,26 @@ def case_prophet

puts "we have decided we are stale for case #{@case.id} at #{@case.updated_at}"

data = @case.scores.sampled(@case.id, 25).collect do |score|
{ ds: score.updated_at.to_date.to_fs(:db), y: score.score, datetime: score.updated_at.to_date }
sampled_scores = @case.scores.sampled(@case.id, 25)

unless sampled_scores.empty?
@for_single_day = sampled_scores.first.updated_at.all_day.overlaps?(sampled_scores.last.updated_at.all_day)
@final = @case.scores.last_one.score
end

data = sampled_scores.collect do |score|
if @for_single_day
{ ds: score.updated_at.to_fs(:db), y: score.score, datetime: score.updated_at }
else
{ ds: score.updated_at.to_date.to_fs(:db), y: score.score, datetime: score.updated_at.to_date }
end
end.uniq

# warning! blunt filter below!
data = data.uniq { |h| h[:ds] }
data = data.map { |h| h.transform_keys(&:to_s) }

do_changepoints = data.length >= 3 # need at least 3...

if do_changepoints

df = Rover::DataFrame.new(data)
Expand All @@ -61,19 +72,19 @@ def case_prophet

last_changepoint = DateTime.parse(m.changepoints.last.to_s)
initial = data.find { |h| h['datetime'].all_day.overlaps?(last_changepoint.all_day) }['y']
final = @case.scores.last_one.score
change = 100 * (final - initial) / initial

vega_data = data.map { |d| { x: d['ds'], y: d['y'] } }

@prophet_case_data = {
initial: initial,
final: final,
change: change,
last_changepoint: last_changepoint,
vega_data: vega_data,
}
changepoint = 100 * (@final - initial) / initial

end

vega_data = data.map { |d| { x: d['ds'], y: d['y'] } }

@prophet_case_data = {
initial: initial,
final: @final,
changepoint: changepoint.nil? ? 0 : changepoint,
last_changepoint: last_changepoint,
vega_data: vega_data,
}
render layout: false
end
end
Expand Down
1 change: 1 addition & 0 deletions app/models/case.rb
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ def rearrange_queries

def last_score
scores.last_one
# scores.last
# scores.first
end

Expand Down
4 changes: 2 additions & 2 deletions app/models/query.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# information_need :string(255)
# notes :text(65535)
# options :text(65535)
# query_text :string(500)
# query_text :string(2048)
# created_at :datetime not null
# updated_at :datetime not null
# case_id :integer
Expand Down Expand Up @@ -40,7 +40,7 @@ class Query < ApplicationRecord
dependent: :destroy

# Validations
validates :query_text, presence: true, length: { maximum: 500 }
validates :query_text, presence: true, length: { maximum: 2048 }

# Scopes

Expand Down
5 changes: 2 additions & 3 deletions app/models/query_doc_pair.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# notes :text(65535)
# options :text(65535)
# position :integer
# query_text :string(500)
# query_text :string(2048) not null
# created_at :datetime not null
# updated_at :datetime not null
# book_id :bigint not null
Expand All @@ -19,7 +19,6 @@
# Indexes
#
# index_query_doc_pairs_on_book_id (book_id)
# unique_query_doc_pair (query_text,doc_id,book_id) UNIQUE
#
# Foreign Keys
#
Expand All @@ -29,7 +28,7 @@ class QueryDocPair < ApplicationRecord
belongs_to :book
has_many :judgements, dependent: :destroy, autosave: true

validates :query_text, presence: true, length: { maximum: 500 }
validates :query_text, presence: true, length: { maximum: 2048 }
validates :doc_id, presence: true
validates :position, numericality: { only_integer: true }, allow_nil: true

Expand Down
57 changes: 32 additions & 25 deletions app/views/home/case_prophet.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,43 @@
<h5 class="card-title"><%= number_with_precision(@case.last_score.score, precision: 2) unless @case.scores.empty? %> <%= @case.scorer.name %></h5>
<p class="card-text">
<% if @case.scores.empty? %>
<small><code>no scores</code></small>
<% else %>

<small><code>no scores yet</code></small>
<% else %>
<%= @case.first_score.updated_at.to_date.to_fs(:short) %>
<% if @case.scores.count > 1 and @case.first_score.updated_at.to_date.to_fs(:short) != @case.last_score.updated_at.to_date.to_fs(:short) %>
- <%= @case.last_score.updated_at.to_date.to_fs(:short)%>
<% end %>
<% end %>
</p>
<%
if @prophet_case_data
if @prophet_case_data[:change] > 0 %>
<% if @prophet_case_data[:change].positive? %>
<p class="card-text text-success"><%= number_to_percentage(@prophet_case_data[:change] , precision:0) %> increase since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago</p>
<% else %>
<p class="card-text text-danger"><%= number_to_percentage(@prophet_case_data[:change] , precision:0) %> decrease since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago</p>
<% end %>
<% else %>
<p class="card-text">&nbsp;</p>
<% end %>

<%= Vega.lite
.data(@prophet_case_data[:vega_data])
.mark(type: "line", tooltip: true, interpolate: "cardinal", point: {size: 60})
.encoding(
x: {field: "x", type: "temporal", scale: {type: "utc"}, axis: {format: "%b %e"}},
y: {field: "y", type: "quantitative"}
)
.height(60)
.config(axis: {title: nil, labelFontSize: 12}) %>

<% end # if prophet_data %>
if !@prophet_case_data[:changepoint].zero? %>
<% if @prophet_case_data[:changepoint].positive? %>
<p class="card-text text-success"><%= number_to_percentage(@prophet_case_data[:changepoint] , precision:0) %> increase since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago</p>
<% else %>
<p class="card-text text-danger"><%= number_to_percentage(@prophet_case_data[:changepoint] , precision:0) %> decrease since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago</p>
<% end %>
<% else %>
<p class="card-text">&nbsp;</p>
<% end %>

<%
if @for_single_day
# may not work since not sure about utc and time zones
vega_time = {scale: {type: "time"}, "domain": {"timeUnit": "hours"}}
else
vega_time = {scale: {type: "utc"}, axis: {format: "%b %e"}}
end
%>

<%= Vega.lite
.data(@prophet_case_data[:vega_data])
.mark(type: "line", tooltip: true, interpolate: "cardinal", point: {size: 60})
.encoding(
x: {field: "x", type: "temporal"}.merge(vega_time),
y: {field: "y", type: "quantitative"}
)
.height(60)
.config(axis: {title: nil, labelFontSize: 12}) %>


</turbo-frame>
7 changes: 3 additions & 4 deletions db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions test/controllers/api/v1/bulk/queries_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,29 @@ class QueriesControllerTest < ActionController::TestCase

assert_equal 5, acase.queries.size
end

test 'Scales up to thousands of queries' do
number_of_queries = 5000
result = Benchmark.measure do
queries = []
number_of_queries.times do |index|
queries << "Query #{index + 1}"
end
data = {
case_id: acase.id,
queries: queries,
}

post :create, params: data
end

assert result.real < 2.0
# puts "Elapsed time: #{result.real} seconds\n"

acase.reload

assert_equal number_of_queries, acase.queries.size
end
end

describe 'Deletes all queries for a case.' do
Expand Down
2 changes: 1 addition & 1 deletion test/fixtures/queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# information_need :string(255)
# notes :text(65535)
# options :text(65535)
# query_text :string(500)
# query_text :string(2048)
# created_at :datetime not null
# updated_at :datetime not null
# case_id :integer
Expand Down
3 changes: 1 addition & 2 deletions test/fixtures/query_doc_pairs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# notes :text(65535)
# options :text(65535)
# position :integer
# query_text :string(500)
# query_text :string(2048) not null
# created_at :datetime not null
# updated_at :datetime not null
# book_id :bigint not null
Expand All @@ -17,7 +17,6 @@
# Indexes
#
# index_query_doc_pairs_on_book_id (book_id)
# unique_query_doc_pair (query_text,doc_id,book_id) UNIQUE
#
# Foreign Keys
#
Expand Down
Loading