diff --git a/app/services/search_criteria.rb b/app/services/search_criteria.rb index 975e753b37d..fb6fa183894 100644 --- a/app/services/search_criteria.rb +++ b/app/services/search_criteria.rb @@ -44,6 +44,17 @@ def sanitize_direction(direction) def transform(query) words = query.gsub(/\s+/m, ' ').strip.split(" ") words.map! { |item| lemmatize(item) } + added_results = [] + words.each do |word| + if word.include? "-" + added_results << (word.delete '-') + end + hyphenated_word = results_with_probable_hyphens(word) + if hyphenated_word != word + added_results << hyphenated_word + end + end + words += added_results words.join(' ') end diff --git a/lib/related_and_hyphenated_terms.dict.txt b/lib/related_and_hyphenated_terms.dict.txt new file mode 100644 index 00000000000..8cdbca03a9b --- /dev/null +++ b/lib/related_and_hyphenated_terms.dict.txt @@ -0,0 +1 @@ +noun purpleair purple-air \ No newline at end of file diff --git a/lib/text_search.rb b/lib/text_search.rb index cc19d9858ff..b62e21b3053 100644 --- a/lib/text_search.rb +++ b/lib/text_search.rb @@ -5,4 +5,9 @@ def lemmatize(word) lem = Lemmatizer.new lem.lemma(word) end + + def results_with_probable_hyphens(word) + lem = Lemmatizer.new("lib/related_and_hyphenated_terms.dict.txt") + lem.lemma(word) + end end diff --git a/test/fixtures/nodes.yml b/test/fixtures/nodes.yml index 295e54f6871..db7689481b1 100644 --- a/test/fixtures/nodes.yml +++ b/test/fixtures/nodes.yml @@ -357,3 +357,27 @@ search_trawling: type: "note" cached_likes: 0 slug: "could-the-babylegs-diy" + +purple_air_without_hyphen: + nid: 30 + uid: 5 + title: "This is purpleair without hyphen" + path: "/notes/admin/02-20-2019/purple-air-without-hyphen" + created: <%= DateTime.new(2019,2,20).to_i %> + changed: <%= DateTime.new(2019,2,20).to_i %> + status: 1 + type: "note" + cached_likes: 0 + slug: "purple-air-without-hyphen" + +purple_air_with_hyphen: + nid: 31 + uid: 2 + title: "This is purple-air with hyphen" + path: "/notes/admin/02-20-2019/purple-air-with-hyphen" + created: <%= DateTime.new(2019,2,20).to_i %> + changed: <%= DateTime.new(2019,2,20).to_i %> + status: 1 + type: "note" + cached_likes: 0 + slug: "purple-air-with-hyphen" \ No newline at end of file diff --git a/test/fixtures/revisions.yml b/test/fixtures/revisions.yml index 6d2c691bdcb..ce4098fd453 100644 --- a/test/fixtures/revisions.yml +++ b/test/fixtures/revisions.yml @@ -326,4 +326,18 @@ search_trawling: title: "Could the BabyLegs DIY trawling?" body: "BabyLegs is a research trawling." timestamp: <%= DateTime.new(2019,1,07).to_i %> + +purple_air_without_hyphen: + nid: 30 + uid: 5 + title: "This is purpleair without hyphen" + body: "purpleair is one of the most searched terms on public labs." + timestamp: <%= DateTime.new(2019,2,20).to_i %> + +purple_air_with_hyphen: + nid: 31 + uid: 2 + title: "This is purple-air with hyphen" + body: "Is Purple-air searched with hyphens or without hyphens?" + timestamp: <%= DateTime.new(2019,2,20).to_i %> \ No newline at end of file diff --git a/test/functional/notes_controller_test.rb b/test/functional/notes_controller_test.rb index e62845d0ae8..d2623a52580 100644 --- a/test/functional/notes_controller_test.rb +++ b/test/functional/notes_controller_test.rb @@ -313,7 +313,7 @@ def teardown assert_response :success selector = css_select 'div.note' - assert_equal selector.size, 23 + assert_equal selector.size, 25 assert_select "div p", 'Pending approval by community moderators. Please be patient!' end @@ -342,7 +342,7 @@ def teardown assert_response :success selector = css_select 'div.note' - assert_equal selector.size, 23 + assert_equal selector.size, 25 assert_select "p", "Moderate first-time post: \n Approve\n Spam" end diff --git a/test/functional/search_controller_test.rb b/test/functional/search_controller_test.rb index 45b7e3322c2..d473bee848c 100644 --- a/test/functional/search_controller_test.rb +++ b/test/functional/search_controller_test.rb @@ -77,4 +77,19 @@ class SearchControllerTest < ActionController::TestCase assert_equal nodes_with_trawl, nodes_with_trawls assert_response :success end + + test "search for hyphenated searches returns results for non hyphenated searches as well" do + get :all_content, params: { :query => "purple-air" } + nodes_with_purple_air = assigns(:nodes) + + get :all_content, params: { :query => "purpleair" } + nodes_with_purpleair = assigns(:nodes) + flag = false + nodes_with_purpleair.each do |key,val| + if nodes_with_purpleair[key].length != nodes_with_purple_air[key].length + flag = true + end + end + assert_not flag + end end diff --git a/test/unit/node_test.rb b/test/unit/node_test.rb index 0a14e1ee933..7d5dc8ac1f4 100644 --- a/test/unit/node_test.rb +++ b/test/unit/node_test.rb @@ -271,7 +271,7 @@ class NodeTest < ActiveSupport::TestCase expected = [nodes(:one), nodes(:spam), nodes(:first_timer_note), nodes(:blog), nodes(:moderated_user_note), nodes(:activity), nodes(:upgrade), nodes(:draft), nodes(:post_test1), nodes(:post_test2), - nodes(:post_test3), nodes(:post_test4), nodes(:scraped_image), nodes(:search_trawling)] + nodes(:post_test3), nodes(:post_test4), nodes(:scraped_image), nodes(:search_trawling), nodes(:purple_air_without_hyphen), nodes(:purple_air_with_hyphen)] assert_equal expected, notes end