Skip to content

Commit

Permalink
Add index filtering in field capabilities API
Browse files Browse the repository at this point in the history
This change allows to use an `index_filter` in the
field capabilities API. Indices are filtered from
the response if the provided query rewrites to `match_none`
on every shard:

````
GET metrics-*
{
  "index_filter": {
    "bool": {
      "must": [
        "range": {
          "@timestamp": {
            "gt": "2019"
          }
        }
      }
  }
}
````

The filtering is done on a best-effort basis, it uses the can match phase
to rewrite queries to `match_none` instead of fully executing the request.
The first shard that can match the filter is used to create the field
capabilities response for the entire index.

Closes elastic#56195
  • Loading branch information
jimczi committed May 28, 2020
1 parent f8b090b commit 37cc341
Show file tree
Hide file tree
Showing 18 changed files with 707 additions and 98 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -521,13 +521,17 @@ static Request explain(ExplainRequest explainRequest) throws IOException {
return request;
}

static Request fieldCaps(FieldCapabilitiesRequest fieldCapabilitiesRequest) {
Request request = new Request(HttpGet.METHOD_NAME, endpoint(fieldCapabilitiesRequest.indices(), "_field_caps"));
static Request fieldCaps(FieldCapabilitiesRequest fieldCapabilitiesRequest) throws IOException {
String methodName = fieldCapabilitiesRequest.indexFilter() != null ? HttpPut.METHOD_NAME : HttpGet.METHOD_NAME;
Request request = new Request(methodName, endpoint(fieldCapabilitiesRequest.indices(), "_field_caps"));

Params params = new Params();
params.withFields(fieldCapabilitiesRequest.fields());
params.withIndicesOptions(fieldCapabilitiesRequest.indicesOptions());
request.addParameters(params.asMap());
if (fieldCapabilitiesRequest.indexFilter() != null) {
request.setEntity(createEntity(fieldCapabilitiesRequest, REQUEST_BODY_CONTENT_TYPE));
}
return request;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1493,7 +1493,7 @@ public void testMultiTermVectorsWithType() throws IOException {
assertToXContentBody(mtvRequest, request.getEntity());
}

public void testFieldCaps() {
public void testFieldCaps() throws IOException {
// Create a random request.
String[] indices = randomIndicesNames(0, 5);
String[] fields = generateRandomStringArray(5, 10, false, false);
Expand Down Expand Up @@ -1531,6 +1531,48 @@ public void testFieldCaps() {
assertNull(request.getEntity());
}

public void testFieldCapsWithIndexFilter() throws IOException {
// Create a random request.
String[] indices = randomIndicesNames(0, 5);
String[] fields = generateRandomStringArray(5, 10, false, false);

FieldCapabilitiesRequest fieldCapabilitiesRequest = new FieldCapabilitiesRequest()
.indices(indices)
.fields(fields)
.indexFilter(QueryBuilders.matchAllQuery());

Map<String, String> indicesOptionsParams = new HashMap<>();
setRandomIndicesOptions(fieldCapabilitiesRequest::indicesOptions, fieldCapabilitiesRequest::indicesOptions, indicesOptionsParams);

Request request = RequestConverters.fieldCaps(fieldCapabilitiesRequest);

// Verify that the resulting REST request looks as expected.
StringJoiner endpoint = new StringJoiner("/", "/", "");
String joinedIndices = String.join(",", indices);
if (!joinedIndices.isEmpty()) {
endpoint.add(joinedIndices);
}
endpoint.add("_field_caps");

assertEquals(endpoint.toString(), request.getEndpoint());
assertEquals(5, request.getParameters().size());

// Note that we don't check the field param value explicitly, as field names are
// passed through
// a hash set before being added to the request, and can appear in a
// non-deterministic order.
assertThat(request.getParameters(), hasKey("fields"));
String[] requestFields = Strings.splitStringByCommaToArray(request.getParameters().get("fields"));
assertEquals(new HashSet<>(Arrays.asList(fields)), new HashSet<>(Arrays.asList(requestFields)));

for (Map.Entry<String, String> param : indicesOptionsParams.entrySet()) {
assertThat(request.getParameters(), hasEntry(param.getKey(), param.getValue()));
}

assertNotNull(request.getEntity());
assertToXContentBody(fieldCapabilitiesRequest, request.getEntity());
}

public void testRankEval() throws Exception {
RankEvalSpec spec = new RankEvalSpec(
Collections.singletonList(new RatedRequest("queryId", Collections.emptyList(), new SearchSourceBuilder())),
Expand Down
76 changes: 65 additions & 11 deletions docs/reference/search/field-caps.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ GET /_field_caps?fields=rating
[[search-field-caps-api-request]]
==== {api-request-title}

`GET /_field_caps`
`GET /_field_caps`

`POST /_field_caps`
`POST /_field_caps`

`GET /<index>/_field_caps`
`GET /<index>/_field_caps`

`POST /<index>/_field_caps`

Expand All @@ -25,7 +25,7 @@ GET /_field_caps?fields=rating
==== {api-description-title}


The field capabilities API returns the information about the capabilities of
The field capabilities API returns the information about the capabilities of
fields among multiple indices.


Expand Down Expand Up @@ -53,13 +53,33 @@ include::{docdir}/rest-api/common-parms.asciidoc[tag=fields]
include::{docdir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailable]

`include_unmapped`::
(Optional, boolean) If `true`, unmapped fields are included in the response.
(Optional, boolean) If `true`, unmapped fields are included in the response.
Defaults to `false`.

[[search-field-caps-api-request-body]]
==== {api-request-body-title}

`index_filter`::
(Optional, <<query-dsl,query object>> Allows to filter indices if the provided
query rewrites to `match_none` on every shard.
+
--
[IMPORTANT]
====
The filtering is done on a best-effort basis, it uses index statistics and mappings
to rewrite queries to `match_none` instead of fully executing the request.
For instance a `range` query over a `date` field can rewrite to `match_none`
if all documents within a shard (including deleted documents) are outside
of the provided range.
However, not all queries can rewrite to `match_none` so this API may return
an index even if the provided filter matches no document.
====
--


[[search-field-caps-api-response-body]]
==== {api-response-body-title}



`searchable`::
Expand All @@ -69,15 +89,15 @@ include::{docdir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailable]
Whether this field can be aggregated on all indices.

`indices`::
The list of indices where this field has the same type, or null if all indices
The list of indices where this field has the same type, or null if all indices
have the same type for the field.

`non_searchable_indices`::
The list of indices where this field is not searchable, or null if all indices
The list of indices where this field is not searchable, or null if all indices
have the same definition for the field.

`non_aggregatable_indices`::
The list of indices where this field is not aggregatable, or null if all
The list of indices where this field is not aggregatable, or null if all
indices have the same definition for the field.

`meta`::
Expand All @@ -100,7 +120,7 @@ GET twitter/_field_caps?fields=rating
// TEST[setup:twitter]


The next example API call requests information about the `rating` and the
The next example API call requests information about the `rating` and the
`title` fields:

[source,console]
Expand Down Expand Up @@ -156,7 +176,7 @@ adding a parameter called `include_unmapped` in the request:
GET _field_caps?fields=rating,title&include_unmapped
--------------------------------------------------

In which case the response will contain an entry for each field that is present
In which case the response will contain an entry for each field that is present
in some indices but not all:

[source,console-result]
Expand Down Expand Up @@ -202,3 +222,37 @@ in some indices but not all:

<1> The `rating` field is unmapped` in `index5`.
<2> The `title` field is unmapped` in `index5`.

It is also possible to filter indices with a query:

[source,console]
--------------------------------------------------
POST twitter-*/_field_caps?fields=rating
{
"index_filter": {
"range": {
"@timestamp": {
"gte": "2018"
}
}
}
}
--------------------------------------------------
// TEST[setup:twitter]


In which case indices that rewrite the provided filter to `match_none` on every shard
will be filtered from the response.

--
[IMPORTANT]
====
The filtering is done on a best-effort basis, it uses index statistics and mappings
to rewrite queries to `match_none` instead of fully executing the request.
For instance a `range` query over a `date` field can rewrite to `match_none`
if all documents within a shard (including deleted documents) are outside
of the provided range.
However, not all queries can rewrite to `match_none` so this API may return
an index even if the provided filter matches no document.
====
--
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,34 @@
fields: [number]
- match: {fields.number.double.searchable: true}
- match: {fields.number.double.aggregatable: true}

---
"Get field caps from remote cluster with index filter":
- skip:
version: " - 7.99.99"
# TODO: Adapt version after backport
reason: Index filter support was added in 8.0

- do:
field_caps:
index: 'field_caps_index_2,my_remote_cluster:field_*'
fields: [number]
body: { index_filter: { range: { created_at: { lt: 2018 } } } }
- match: {indices: ["field_caps_index_2", "my_remote_cluster:field_caps_index_1"]}
- length: {fields.number: 1
- match: {fields.number.double.searchable: true}
- match: {fields.number.double.aggregatable: true}

- do:
field_caps:
index: 'field_caps_index_2,my_remote_cluster:field_*'
fields: [number]
body: { index_filter: { range: { created_at: { gt: 2019 } } } }
- match: {indices: ["field_caps_index_2", "my_remote_cluster:field_caps_index_3"]}
- length: {fields.number: 2
- match: {fields.number.double.searchable: true}
- match: {fields.number.double.aggregatable: true}
- match: {fields.number.double.indices: ["field_caps_index_2"]}
- match: {fields.number.long.searchable: true}
- match: {fields.number.long.aggregatable: true}
- match: {fields.number.long.indices: ["my_remote_cluster:field_caps_index_3"]}
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
indices.create:
index: field_caps_index_1
body:
settings:
index.number_of_shards: 1
mappings:
properties:
created_at:
type: date
text:
type: text
keyword:
Expand All @@ -51,10 +51,10 @@
indices.create:
index: field_caps_index_3
body:
settings:
index.number_of_shards: 1
mappings:
properties:
created_at:
type: date
text:
type: text
keyword:
Expand Down Expand Up @@ -103,6 +103,19 @@
- '{"index": {"_index": "test_index"}}'
- '{"f1": "remote_cluster", "animal": "chicken", "filter_field": 0}'

- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "field_caps_index_1"}}'
- '{"created_at": "2018-01-05"}'
- '{"index": {"_index": "field_caps_index_1"}}'
- '{"created_at": "2017-12-01"}'
- '{"index": {"_index": "field_caps_index_3"}}'
- '{"created_at": "2019-10-01"}'
- '{"index": {"_index": "field_caps_index_3"}}'
- '{"created_at": "2020-01-01"}'

- do:
search:
rest_total_hits_as_int: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@
"default":false,
"description":"Indicates whether unmapped fields should be included in the response."
}
},
"body":{
"description":"An index filter specified with the Query DSL"
}
}
}
Loading

0 comments on commit 37cc341

Please sign in to comment.