diff --git a/app/controllers/api/v1/bulk/queries_controller.rb b/app/controllers/api/v1/bulk/queries_controller.rb index d818fcf42..5e91a089e 100644 --- a/app/controllers/api/v1/bulk/queries_controller.rb +++ b/app/controllers/api/v1/bulk/queries_controller.rb @@ -7,7 +7,17 @@ class QueriesController < Api::ApiController before_action :set_case before_action :check_case + def_param_group :queries_params do + param :queries, Array, required: true do + param :queries, String + end + end + # rubocop:disable Metrics/MethodLength + api :POST, '/api/bulk/cases/:case_id/queries', 'Bulk create queries.' + param :case_id, :number, + desc: 'The ID of the requested case.', required: true + param_group :queries_params def create # This logic is very similar to the ratings_importer.rb logic. queries_to_import = [] @@ -28,22 +38,23 @@ def create non_existing_queries = unique_queries - existing_queries non_existing_queries.each_with_index do |query_text, _index| - query = @case.queries.build(query_text: query_text) - # query.insert_at(index + 1) - queries_to_import << query + queries_to_import << { + case_id: @case.id, + query_text: query_text, + created_at: Time.current, + updated_at: Time.current, + } end - # Mass insert queries - if Query.import queries_to_import + # rubocop:disable Rails/SkipsModelValidations + Query.upsert_all(queries_to_import) + # rubocop:enable Rails/SkipsModelValidations - @case.reload - @queries = @case.queries.includes([ :ratings ]) - @display_order = @queries.map(&:id) + @case.reload + @queries = @case.queries.includes([ :ratings ]) + @display_order = @queries.map(&:id) - respond_with @queries, @display_order - else - render status: :bad_request - end + respond_with @queries, @display_order end # rubocop:enable Metrics/MethodLength diff --git a/app/controllers/api/v1/case_scores_controller.rb b/app/controllers/api/v1/case_scores_controller.rb index 18cda4d2d..98776196d 100644 --- a/app/controllers/api/v1/case_scores_controller.rb +++ b/app/controllers/api/v1/case_scores_controller.rb @@ -13,11 +13,12 @@ class CaseScoresController < Api::ApiController param :try_number, Integer param :last_try_number, Integer param :try_id, Integer + # this isn't quite right as we have a hash of "query_id" and "values" here' param :queries, Hash, required: false do - param :text, String - param :score, Float - param :maxScore, Float - param :numFound, Integer + param :text, String, desc: 'The actual query text that is being scored.' + param :score, Float, desc: 'The score.' + param :maxScore, Float, desc: 'The max possible score' + param :numFound, Integer, desc: 'How many results matched' end end end diff --git a/app/controllers/home_controller.rb b/app/controllers/home_controller.rb index a8b13f654..129a6aed1 100644 --- a/app/controllers/home_controller.rb +++ b/app/controllers/home_controller.rb @@ -44,15 +44,26 @@ def case_prophet puts "we have decided we are stale for case #{@case.id} at #{@case.updated_at}" - data = @case.scores.sampled(@case.id, 25).collect do |score| - { ds: score.updated_at.to_date.to_fs(:db), y: score.score, datetime: score.updated_at.to_date } + sampled_scores = @case.scores.sampled(@case.id, 25) + + unless sampled_scores.empty? + @for_single_day = sampled_scores.first.updated_at.all_day.overlaps?(sampled_scores.last.updated_at.all_day) + @final = @case.scores.last_one.score + end + + data = sampled_scores.collect do |score| + if @for_single_day + { ds: score.updated_at.to_fs(:db), y: score.score, datetime: score.updated_at } + else + { ds: score.updated_at.to_date.to_fs(:db), y: score.score, datetime: score.updated_at.to_date } + end end.uniq + # warning! blunt filter below! data = data.uniq { |h| h[:ds] } data = data.map { |h| h.transform_keys(&:to_s) } do_changepoints = data.length >= 3 # need at least 3... - if do_changepoints df = Rover::DataFrame.new(data) @@ -61,19 +72,19 @@ def case_prophet last_changepoint = DateTime.parse(m.changepoints.last.to_s) initial = data.find { |h| h['datetime'].all_day.overlaps?(last_changepoint.all_day) }['y'] - final = @case.scores.last_one.score - change = 100 * (final - initial) / initial - - vega_data = data.map { |d| { x: d['ds'], y: d['y'] } } - - @prophet_case_data = { - initial: initial, - final: final, - change: change, - last_changepoint: last_changepoint, - vega_data: vega_data, - } + changepoint = 100 * (@final - initial) / initial + end + + vega_data = data.map { |d| { x: d['ds'], y: d['y'] } } + + @prophet_case_data = { + initial: initial, + final: @final, + changepoint: changepoint.nil? ? 0 : changepoint, + last_changepoint: last_changepoint, + vega_data: vega_data, + } render layout: false end end diff --git a/app/models/case.rb b/app/models/case.rb index aa86144c1..dd19707e8 100644 --- a/app/models/case.rb +++ b/app/models/case.rb @@ -169,6 +169,7 @@ def rearrange_queries def last_score scores.last_one + # scores.last # scores.first end diff --git a/app/models/query.rb b/app/models/query.rb index f0bb05e42..7be2cb9f0 100644 --- a/app/models/query.rb +++ b/app/models/query.rb @@ -10,7 +10,7 @@ # information_need :string(255) # notes :text(65535) # options :text(65535) -# query_text :string(500) +# query_text :string(2048) # created_at :datetime not null # updated_at :datetime not null # case_id :integer @@ -40,7 +40,7 @@ class Query < ApplicationRecord dependent: :destroy # Validations - validates :query_text, presence: true, length: { maximum: 500 } + validates :query_text, presence: true, length: { maximum: 2048 } # Scopes diff --git a/app/models/query_doc_pair.rb b/app/models/query_doc_pair.rb index 767d2f2d9..958219950 100644 --- a/app/models/query_doc_pair.rb +++ b/app/models/query_doc_pair.rb @@ -10,7 +10,7 @@ # notes :text(65535) # options :text(65535) # position :integer -# query_text :string(500) +# query_text :string(2048) not null # created_at :datetime not null # updated_at :datetime not null # book_id :bigint not null @@ -19,7 +19,6 @@ # Indexes # # index_query_doc_pairs_on_book_id (book_id) -# unique_query_doc_pair (query_text,doc_id,book_id) UNIQUE # # Foreign Keys # @@ -29,7 +28,7 @@ class QueryDocPair < ApplicationRecord belongs_to :book has_many :judgements, dependent: :destroy, autosave: true - validates :query_text, presence: true, length: { maximum: 500 } + validates :query_text, presence: true, length: { maximum: 2048 } validates :doc_id, presence: true validates :position, numericality: { only_integer: true }, allow_nil: true diff --git a/app/views/home/case_prophet.html.erb b/app/views/home/case_prophet.html.erb index 499253fd4..07e6ce8f2 100644 --- a/app/views/home/case_prophet.html.erb +++ b/app/views/home/case_prophet.html.erb @@ -2,9 +2,8 @@
<% if @case.scores.empty? %>
- no scores
- <% else %>
-
+ no scores yet
+ <% else %>
<%= @case.first_score.updated_at.to_date.to_fs(:short) %>
<% if @case.scores.count > 1 and @case.first_score.updated_at.to_date.to_fs(:short) != @case.last_score.updated_at.to_date.to_fs(:short) %>
- <%= @case.last_score.updated_at.to_date.to_fs(:short)%>
@@ -12,26 +11,34 @@
<% end %>
<%= number_to_percentage(@prophet_case_data[:change] , precision:0) %> increase since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago
- <% else %> -<%= number_to_percentage(@prophet_case_data[:change] , precision:0) %> decrease since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago
- <% end %> - <% else %> -- <% end %> - - <%= Vega.lite - .data(@prophet_case_data[:vega_data]) - .mark(type: "line", tooltip: true, interpolate: "cardinal", point: {size: 60}) - .encoding( - x: {field: "x", type: "temporal", scale: {type: "utc"}, axis: {format: "%b %e"}}, - y: {field: "y", type: "quantitative"} - ) - .height(60) - .config(axis: {title: nil, labelFontSize: 12}) %> - - <% end # if prophet_data %> + if !@prophet_case_data[:changepoint].zero? %> + <% if @prophet_case_data[:changepoint].positive? %> +
<%= number_to_percentage(@prophet_case_data[:changepoint] , precision:0) %> increase since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago
+ <% else %> +<%= number_to_percentage(@prophet_case_data[:changepoint] , precision:0) %> decrease since <%=time_ago_in_words(@prophet_case_data[:last_changepoint]) %> ago
+ <% end %> + <% else %> ++ <% end %> + + <% + if @for_single_day + # may not work since not sure about utc and time zones + vega_time = {scale: {type: "time"}, "domain": {"timeUnit": "hours"}} + else + vega_time = {scale: {type: "utc"}, axis: {format: "%b %e"}} + end + %> + + <%= Vega.lite + .data(@prophet_case_data[:vega_data]) + .mark(type: "line", tooltip: true, interpolate: "cardinal", point: {size: 60}) + .encoding( + x: {field: "x", type: "temporal"}.merge(vega_time), + y: {field: "y", type: "quantitative"} + ) + .height(60) + .config(axis: {title: nil, labelFontSize: 12}) %> + + diff --git a/db/schema.rb b/db/schema.rb index 2fefec92b..627450067 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_05_12_144038) do +ActiveRecord::Schema[7.1].define(version: 2024_06_17_144746) do create_table "active_storage_attachments", charset: "utf8mb4", collation: "utf8mb4_bin", force: :cascade do |t| t.string "name", null: false t.string "record_type", null: false @@ -170,7 +170,7 @@ create_table "queries", id: :integer, charset: "utf8mb3", force: :cascade do |t| t.bigint "arranged_next" t.bigint "arranged_at" - t.string "query_text", limit: 500 + t.string "query_text", limit: 2048 t.text "notes" t.integer "case_id" t.datetime "created_at", precision: nil, null: false @@ -181,7 +181,7 @@ end create_table "query_doc_pairs", charset: "utf8mb3", collation: "utf8mb3_unicode_ci", force: :cascade do |t| - t.string "query_text", limit: 500 + t.string "query_text", limit: 2048, null: false t.integer "position" t.text "document_fields", size: :medium, collation: "utf8mb4_0900_ai_ci" t.bigint "book_id", null: false @@ -192,7 +192,6 @@ t.text "notes" t.text "options", collation: "utf8mb3_bin" t.index ["book_id"], name: "index_query_doc_pairs_on_book_id" - t.index ["query_text", "doc_id", "book_id"], name: "unique_query_doc_pair", unique: true end create_table "ratings", id: :integer, charset: "latin1", force: :cascade do |t| diff --git a/test/controllers/api/v1/bulk/queries_controller_test.rb b/test/controllers/api/v1/bulk/queries_controller_test.rb index e6752198c..c36f93d17 100644 --- a/test/controllers/api/v1/bulk/queries_controller_test.rb +++ b/test/controllers/api/v1/bulk/queries_controller_test.rb @@ -71,6 +71,29 @@ class QueriesControllerTest < ActionController::TestCase assert_equal 5, acase.queries.size end + + test 'Scales up to thousands of queries' do + number_of_queries = 5000 + result = Benchmark.measure do + queries = [] + number_of_queries.times do |index| + queries << "Query #{index + 1}" + end + data = { + case_id: acase.id, + queries: queries, + } + + post :create, params: data + end + + assert result.real < 2.0 + # puts "Elapsed time: #{result.real} seconds\n" + + acase.reload + + assert_equal number_of_queries, acase.queries.size + end end describe 'Deletes all queries for a case.' do diff --git a/test/fixtures/queries.yml b/test/fixtures/queries.yml index 744053e76..9450a670f 100644 --- a/test/fixtures/queries.yml +++ b/test/fixtures/queries.yml @@ -8,7 +8,7 @@ # information_need :string(255) # notes :text(65535) # options :text(65535) -# query_text :string(500) +# query_text :string(2048) # created_at :datetime not null # updated_at :datetime not null # case_id :integer diff --git a/test/fixtures/query_doc_pairs.yml b/test/fixtures/query_doc_pairs.yml index 5f4f77ac5..2a22da03f 100644 --- a/test/fixtures/query_doc_pairs.yml +++ b/test/fixtures/query_doc_pairs.yml @@ -8,7 +8,7 @@ # notes :text(65535) # options :text(65535) # position :integer -# query_text :string(500) +# query_text :string(2048) not null # created_at :datetime not null # updated_at :datetime not null # book_id :bigint not null @@ -17,7 +17,6 @@ # Indexes # # index_query_doc_pairs_on_book_id (book_id) -# unique_query_doc_pair (query_text,doc_id,book_id) UNIQUE # # Foreign Keys # diff --git a/test/integration/experiment_with_bulk_insert_test.rb b/test/integration/experiment_with_bulk_insert_test.rb index 13ae7cd0f..eb94e72ec 100644 --- a/test/integration/experiment_with_bulk_insert_test.rb +++ b/test/integration/experiment_with_bulk_insert_test.rb @@ -12,7 +12,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest @@skip_tests = true # rubocop:enable Style/ClassVars - test 'generate and export 5000 queries with traditional AR' do + test 'generate and import query/doc pairs with traditional AR' do skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? @@ -32,10 +32,10 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end # Print the elapsed time - puts "Elapsed time: #{result.real} seconds" + puts "Elapsed time: #{result.real} seconds\n" end - test 'generate and export 5000 queries with bulk import' do + test 'generate and import query/doc pairs with bulk import' do skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? @@ -56,10 +56,10 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end # Print the elapsed time - puts "Elapsed time: #{result.real} seconds" + puts "Elapsed time: #{result.real} seconds\n" end - test 'generate and export 5000 queries with insert_all' do + test 'generate and import query/doc pairs with insert_all' do skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? @@ -86,10 +86,10 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end # Print the elapsed time - puts "Elapsed time: #{result.real} seconds" + puts "Elapsed time: #{result.real} seconds\n" end - test 'generate and export 5000 queries with upsert_all' do + test 'generate and export query/doc pairs with upsert_all' do skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? @@ -116,11 +116,11 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest end # Print the elapsed time - puts "Elapsed time: #{result.real} seconds" + puts "Elapsed time: #{result.real} seconds\n" end # rubocop:disable Layout/LineLength - test 'generate and export 5000 queries with upsert_all when exists already data' do + test 'generate and export query/doc pairs with upsert_all when exists already data' do skip('Ignoring all tests in ExperimentWithBulkInsertTest') if @@skip_tests book = user.books.create name: '50000 Query Doc Pairs', scorer: scorer, selection_strategy: selection_strategy assert book.valid? @@ -153,7 +153,7 @@ class ExperimentWithBulkInsertTest < ActionDispatch::IntegrationTest assert_equal 50_000, book.query_doc_pairs.count # Print the elapsed time - puts "Elapsed time: #{result.real} seconds" + puts "Elapsed time: #{result.real} seconds\n" end # rubocop:enable Layout/LineLength diff --git a/test/models/query_doc_pair_test.rb b/test/models/query_doc_pair_test.rb index 3817c3771..89ef38d75 100644 --- a/test/models/query_doc_pair_test.rb +++ b/test/models/query_doc_pair_test.rb @@ -10,7 +10,7 @@ # notes :text(65535) # options :text(65535) # position :integer -# query_text :string(500) +# query_text :string(2048) not null # created_at :datetime not null # updated_at :datetime not null # book_id :bigint not null @@ -19,7 +19,6 @@ # Indexes # # index_query_doc_pairs_on_book_id (book_id) -# unique_query_doc_pair (query_text,doc_id,book_id) UNIQUE # # Foreign Keys # diff --git a/test/models/query_test.rb b/test/models/query_test.rb index ccb6d0189..d38a1a5ee 100644 --- a/test/models/query_test.rb +++ b/test/models/query_test.rb @@ -10,7 +10,7 @@ # information_need :string(255) # notes :text(65535) # options :text(65535) -# query_text :string(500) +# query_text :string(2048) # created_at :datetime not null # updated_at :datetime not null # case_id :integer