-
Notifications
You must be signed in to change notification settings - Fork 25.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding aggregations support for the _ignored
field
#101373
Changes from 116 commits
50d1ffb
737fbb2
ace155a
e698cc1
5e3f6ec
ef9ce62
c34dc4b
aa9cdec
2e03324
8bc6582
873a34f
904694b
aba9ff6
a316a4d
f6c4ad2
53d374e
f78532d
1256646
47cf96a
3c5d42e
fea357c
6e80788
866c8f4
667972d
b91b8ca
df3a45a
49a77e9
d52e5e4
bc894bb
e5a0743
fb8263c
664958d
77f2e3b
1bae37a
171c7ad
8bcfe85
4096422
189f6ee
2f62882
5b2207c
eca3e1b
300c342
9b33013
43a5d61
238473e
0344e28
4a233df
f904250
05f7553
6f9f878
bc6aa30
c725fb1
bf9629d
bcf16a3
e31df96
05fefdf
bbdc263
bfe6913
509abe5
763fc27
390fc6b
be24d84
1c620ea
e402020
a9bd6cd
1a34860
52c326a
de91ea6
4a3b4c2
05a8c5f
4657791
656534a
e1abe80
773b0a6
72f6465
d1c0cbe
154682b
92c06ff
c30c246
1f86317
1c80140
85e1ba7
570fe56
8acc8b1
55e3d4d
c01f2dd
ee51ec8
7444215
bcc1adf
17996be
c2232c9
98d3856
e27bf2e
9846d32
cee180a
eee62f1
cfc94a2
c6f4164
dc3ce8e
9c56f59
52f1ef3
4609a42
80a7426
ea20632
2ac32f4
3ce075f
f669f9e
58bb648
08dd66f
b561aab
98a4c6b
897eca0
241a8ca
cf06f0c
4f1816d
3276a82
6825ada
fe0d874
de02373
1175322
e37e1b9
bfec2b3
4f95463
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pr: 101373 | ||
summary: Adding aggregations support for the `_ignored` field | ||
area: Search | ||
type: feature | ||
issues: | ||
- 59946 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,302 @@ | ||
setup: | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
mappings: | ||
properties: | ||
city: | ||
type: keyword | ||
ignore_above: 10 | ||
email: | ||
type: keyword | ||
ignore_above: 20 | ||
date_of_birth: | ||
type: date | ||
format: "dd-MM-yyyy" | ||
ignore_malformed: true | ||
newsletter: | ||
type: boolean | ||
ignore_malformed: true | ||
ip_address: | ||
type: ip | ||
ignore_malformed: true | ||
products: | ||
type: keyword | ||
ignore_above: 12 | ||
total_price: | ||
type: double | ||
ignore_malformed: true | ||
location: | ||
type: geo_point | ||
ignore_malformed: true | ||
order_datetime: | ||
type: date | ||
format: "yyyy-MM-dd HH:mm:ss" | ||
ignore_malformed: true | ||
|
||
- do: | ||
bulk: | ||
index: test | ||
refresh: true | ||
body: | ||
- { "index": { "_id": "001" } } | ||
- { "city": "Milano", email: "alice@gmail.com", date_of_birth: "12-03-1990", newsletter: true, ip_address: "130.34.45.202", products: ["it-002-4567", "it-001-6679"], total_price: "57.99", location: [45.46, 9.16], order_datetime: "2021-05-01 20:01:37" } | ||
- { "index": { "_id": "002" } } | ||
- { "city": "Roma", email: "bob@gmail.com", date_of_birth: "15-05-1991", newsletter: false, ip_address: "2001:0db8:85a3:0000:0000:8a2e:0370:7334", products: [ "it-002-112467", "it-002-5579" ], total_price: "10.99", location: [ -44.78, 19.20 ], order_datetime: "2021-05-01 20:01:37" } | ||
- { "index": { "_id": "003" } } | ||
- { "city": "Venezia", email: "alice@gmail.com", date_of_birth: "01-09-1994", newsletter: false, ip_address: "fe80::1", products: [ "it-002", "it-003-17171717" ], total_price: "-12.99", location: [ 182.22, "20.12" ], order_datetime: "2021-05-02" } | ||
- { "index": { "_id": "004" } } | ||
- { "city": "Cortina d'Ampezzo", email: "a-very-long-email-address-that-should-be-ignored@gmail.com", date_of_birth: "05-06-1989", newsletter: t, ip_address: "::1", products: [ "it101020203030", "it" ], total_price: "57", location: [ 0, 9.16 ], order_datetime: "2021-05-01-20:01:37" } | ||
- { "index": { "_id": "005" } } | ||
- { "city": "Cortina d'Ampezzo", email: "dave@gmail.com", date_of_birth: "12-03-1990 12:30:45", newsletter: t, ip_address: "130.999.36.201", products: [ "it-002-2213", "it-001-7709" ], total_price: "twentytree/12", location: [ "45.33, 8.20" ], order_datetime: "20210501 20:01:37" } | ||
- { "index": { "_id": "006" } } | ||
- { "city": "Milano", email: "eric@gmail.com", date_of_birth: "19-12-90", newsletter: f, ip_address: "130.34.45", products: [ "it-002-555", "it-001-5589990000" ], total_price: "", location: [ "45.99", "9.16" ], order_datetime: "2021-05-01 20:01:37.123" } | ||
- { "index": { "_id": "007" } } | ||
- { "city": "Venezia", email: "luke-skywalker@gmail.com", date_of_birth: "20/03/1992", newsletter: f, ip_address: "130..45.202", products: [ "it-002-1234", "it-001-1213" ], total_price: "57.99.12", location: [ 45, 20 ], order_datetime: "2021-05-03 19:38:22" } | ||
- { "index": { "_id": "008" } } | ||
- { "city": "Firenze", email: "bob@gmail.com", date_of_birth: "02311988", newsletter: "", ip_address: ":::1", products: ["", ""], total_price: "0.0", location: [ 46.22, 11.22 ], order_datetime: "2021-05-03 20:01" } | ||
- { "index": { "_id": "009" } } | ||
- { "city": "Firenze", email: "tom@gmail.com", date_of_birth: "16-11-1990", newsletter: "not_sure", ip_address: "2001:0db8::1234:5678::", products: "it-002-4567", total_price: "0,99", location: [ 18.18, 19.19 ], order_datetime: "2021-05-03 20-01-55" } | ||
- { "index": { "_id": "010" } } | ||
- { "city": "Cortina d'Ampezzo", email: "alice@gmail.com", date_of_birth: "18-12-1992", newsletter: "false", ip_address: ":::1", products: "it-002-1890994567", total_price: "14,27", location: [ 45.46-9.16 ], order_datetime: "2021-05-01 20:05:37" } | ||
- { "index": { "_id": "011" } } | ||
- { "city": "Roma", email: "paul@gmail.com", date_of_birth: "17.15.1990", newsletter: "true", ip_address: "", products: [ "it-002-1019", "it-001-5578", "it-009-9901256" ], total_price: "49.99", location: 45.22, order_datetime: "2021-05-01T20:02:00" } | ||
|
||
--- | ||
"terms aggregation on _ignored metadata field": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
ignored_terms: | ||
terms: | ||
field: _ignored | ||
|
||
- match: { hits.total.value: 11 } | ||
- length: { aggregations.ignored_terms.buckets: 9 } | ||
- match: { aggregations.ignored_terms.buckets.0.key: "ip_address" } | ||
- match: { aggregations.ignored_terms.buckets.0.doc_count: 7 } | ||
- match: { aggregations.ignored_terms.buckets.1.key: "order_datetime" } | ||
- match: { aggregations.ignored_terms.buckets.1.doc_count: 7 } | ||
- match: { aggregations.ignored_terms.buckets.2.key: "products" } | ||
- match: { aggregations.ignored_terms.buckets.2.doc_count: 6 } | ||
- match: { aggregations.ignored_terms.buckets.3.key: "date_of_birth" } | ||
- match: { aggregations.ignored_terms.buckets.3.doc_count: 5 } | ||
- match: { aggregations.ignored_terms.buckets.4.key: "newsletter" } | ||
- match: { aggregations.ignored_terms.buckets.4.doc_count: 5 } | ||
- match: { aggregations.ignored_terms.buckets.5.key: "total_price" } | ||
- match: { aggregations.ignored_terms.buckets.5.doc_count: 4 } | ||
- match: { aggregations.ignored_terms.buckets.6.key: "city" } | ||
- match: { aggregations.ignored_terms.buckets.6.doc_count: 3 } | ||
- match: { aggregations.ignored_terms.buckets.7.key: "location" } | ||
- match: { aggregations.ignored_terms.buckets.7.doc_count: 3 } | ||
- match: { aggregations.ignored_terms.buckets.8.key: "email" } | ||
- match: { aggregations.ignored_terms.buckets.8.doc_count: 2 } | ||
|
||
--- | ||
"terms aggregation on _ignored metadata field with top hits": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
ignored_terms: | ||
terms: | ||
field: _ignored | ||
size: 3 | ||
aggs: | ||
top_by_datetime: | ||
top_hits: | ||
sort: | ||
- order_datetime: { order: desc } | ||
size: 1 | ||
|
||
- match: { hits.total.value: 11 } | ||
- length: { aggregations.ignored_terms.buckets: 3 } | ||
|
||
- match: { aggregations.ignored_terms.buckets.0.key: "ip_address" } | ||
- match: { aggregations.ignored_terms.buckets.0.doc_count: 7 } | ||
- match: { aggregations.ignored_terms.buckets.0.top_by_datetime.hits.hits.0._ignored: ["date_of_birth", "email", "ip_address", "newsletter", "total_price"]} | ||
|
||
- match: { aggregations.ignored_terms.buckets.1.key: "order_datetime" } | ||
- match: { aggregations.ignored_terms.buckets.1.doc_count: 7 } | ||
- match: { aggregations.ignored_terms.buckets.1.top_by_datetime.hits.hits.0._ignored: ["order_datetime", "products"]} | ||
|
||
- match: { aggregations.ignored_terms.buckets.2.key: "products" } | ||
- match: { aggregations.ignored_terms.buckets.2.doc_count: 6 } | ||
- match: { aggregations.ignored_terms.buckets.2.top_by_datetime.hits.hits.0._ignored: ["city", "ip_address", "location", "products", "total_price"]} | ||
|
||
--- | ||
"date histogram aggregation with terms on _ignored metadata field": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
order_datetime_histo: | ||
date_histogram: | ||
field: order_datetime | ||
calendar_interval: day | ||
aggs: | ||
ignored_terms: | ||
terms: | ||
field: _ignored | ||
size: 2 | ||
|
||
- match: { hits.total.value: 11 } | ||
- length: { aggregations.order_datetime_histo.buckets: 3 } | ||
|
||
- match: { aggregations.order_datetime_histo.buckets.0.key_as_string: "2021-05-01 00:00:00" } | ||
- match: { aggregations.order_datetime_histo.buckets.0.doc_count: 3 } | ||
- match: { aggregations.order_datetime_histo.buckets.0.ignored_terms.buckets.0: { key: "products", doc_count: 2 } } | ||
|
||
- match: { aggregations.order_datetime_histo.buckets.1.key_as_string: "2021-05-02 00:00:00" } | ||
- match: { aggregations.order_datetime_histo.buckets.1.doc_count: 0 } | ||
- length: { aggregations.order_datetime_histo.buckets.1.ignored_terms.buckets: 0 } | ||
|
||
- match: { aggregations.order_datetime_histo.buckets.2.key_as_string: "2021-05-03 00:00:00" } | ||
- match: { aggregations.order_datetime_histo.buckets.2.doc_count: 1 } | ||
- match: { aggregations.order_datetime_histo.buckets.2.ignored_terms.buckets.0: { key: "date_of_birth", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_histo.buckets.2.ignored_terms.buckets.1: { key: "email", doc_count: 1 } } | ||
|
||
--- | ||
"cardinality aggregation on _ignored metadata field": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
ignored_cardinality: | ||
cardinality: | ||
field: _ignored | ||
|
||
- match: { hits.total.value: 11 } | ||
- match: {aggregations.ignored_cardinality.value: 9 } | ||
|
||
--- | ||
"value count aggregation on _ignored metadata field": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
ignored_value_count: | ||
value_count: | ||
field: _ignored | ||
|
||
- match: { hits.total.value: 11 } | ||
- match: {aggregations.ignored_value_count.value: 42 } | ||
|
||
--- | ||
"date range aggregation with terms on _ignored metadata field": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
order_datetime_range: | ||
date_range: | ||
field: order_datetime | ||
format: "dd-MM-yyyy" | ||
ranges: | ||
- to: "03-05-2021" | ||
- from: "02-05-2021" | ||
aggs: | ||
ignored_terms: | ||
terms: | ||
field: _ignored | ||
|
||
- match: { hits.total.value: 11 } | ||
- length: { aggregations.order_datetime_range.buckets: 2 } | ||
|
||
- match: { aggregations.order_datetime_range.buckets.0.to_as_string: "03-05-2021" } | ||
- match: { aggregations.order_datetime_range.buckets.0.doc_count: 3 } | ||
- length: { aggregations.order_datetime_range.buckets.0.ignored_terms.buckets: 5 } | ||
- match: { aggregations.order_datetime_range.buckets.0.ignored_terms.buckets.0: { key: "products", doc_count: 2 } } | ||
- match: { aggregations.order_datetime_range.buckets.0.ignored_terms.buckets.1: { key: "city", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_range.buckets.0.ignored_terms.buckets.2: { key: "ip_address", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_range.buckets.0.ignored_terms.buckets.3: { key: "location", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_range.buckets.0.ignored_terms.buckets.4: { key: "total_price", doc_count: 1 } } | ||
|
||
- match: { aggregations.order_datetime_range.buckets.1.from_as_string: "02-05-2021" } | ||
- match: { aggregations.order_datetime_range.buckets.1.doc_count: 1 } | ||
- length: { aggregations.order_datetime_range.buckets.1.ignored_terms.buckets: 5 } | ||
- match: { aggregations.order_datetime_range.buckets.1.ignored_terms.buckets.0: { key: "date_of_birth", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_range.buckets.1.ignored_terms.buckets.1: { key: "email", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_range.buckets.1.ignored_terms.buckets.2: { key: "ip_address", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_range.buckets.1.ignored_terms.buckets.3: { key: "newsletter", doc_count: 1 } } | ||
- match: { aggregations.order_datetime_range.buckets.1.ignored_terms.buckets.4: { key: "total_price", doc_count: 1 } } | ||
|
||
--- | ||
"random sampler aggregation with terms on _ignored metadata field": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
sample: | ||
random_sampler: | ||
probability: 1.0 # make sure buckets count is consistent | ||
seed: 43 | ||
aggs: | ||
ignored_terms: | ||
terms: | ||
field: _ignored | ||
|
||
- match: { hits.total.value: 11 } | ||
- length: { aggregations.sample.ignored_terms.buckets: 9 } | ||
- match: { aggregations.sample.ignored_terms.buckets.0: { key: "ip_address", doc_count: 7 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.1: { key: "order_datetime", doc_count: 7 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.2: { key: "products", doc_count: 6 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.3: { key: "date_of_birth", doc_count: 5 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.4: { key: "newsletter", doc_count: 5 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.5: { key: "total_price", doc_count: 4 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.6: { key: "city", doc_count: 3 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.7: { key: "location", doc_count: 3 } } | ||
- match: { aggregations.sample.ignored_terms.buckets.8: { key: "email", doc_count: 2 } } | ||
|
||
--- | ||
"filter aggregation on _ignored metadata field": | ||
- skip: | ||
version: " - 8.14.99" | ||
reason: "_ignored metadata field aggregation support added in 8.15" | ||
features: close_to | ||
- do: | ||
search: | ||
body: | ||
size: 0 | ||
aggs: | ||
total: | ||
sum: | ||
field: total_price | ||
filter_ignored: | ||
filter: | ||
term: | ||
_ignored: "email" | ||
|
||
- match: { hits.total.value: 11 } | ||
- close_to: { aggregations.total.value: { value: 162.98, error: 0.01 } } | ||
- match: { aggregations.filter_ignored.doc_count: 2 } |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -140,7 +140,7 @@ profile fetch: | |
- gt: { profile.shards.0.fetch.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.breakdown.load_stored_fields_count: 0 } | ||
- gt: { profile.shards.0.fetch.breakdown.load_stored_fields: 0 } | ||
- match: { profile.shards.0.fetch.debug.stored_fields: [_id, _ignored, _routing, _source] } | ||
- match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if with this change, the skip above needs updating? Isn't it surprising that this test runs? 8.14 returns the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I fixed this but the other was ok. |
||
- length: { profile.shards.0.fetch.children: 4 } | ||
- match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not rerturned anymore because the field is not stored anymore.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that makes sense, I was expecting this change.