From 93a6030e9d00ad0cfc4da65440108b001e982106 Mon Sep 17 00:00:00 2001 From: Brandon Skinner Date: Thu, 22 Oct 2020 11:01:47 -0600 Subject: [PATCH 1/8] Added Elasticsearch::get_documents and Indexable::multi_get --- includes/classes/Elasticsearch.php | 45 ++++++++++++++++++++++++++++++ includes/classes/Indexable.php | 11 ++++++++ 2 files changed, 56 insertions(+) diff --git a/includes/classes/Elasticsearch.php b/includes/classes/Elasticsearch.php index 23bd80cdc5..d28d5aa998 100644 --- a/includes/classes/Elasticsearch.php +++ b/includes/classes/Elasticsearch.php @@ -650,6 +650,51 @@ public function delete_network_alias( $alias ) { return false; } + /** + * Get multiple documents from Elasticsearch given an array of ids + * + * @param string $index Index name. + * @param array $document_ids Array of document ids to get. + * @since 3.5 + * @return boolean|array + */ + public function get_documents( $index, $document_ids ) { + $path = $index . '/_doc/_mget'; + + $request_args = [ + 'method' => 'POST', + 'body' => wp_json_encode( + array( + 'ids' => $document_ids, + ) + ), + ]; + + $request = $this->remote_request( $path, $request_args, [], 'post' ); + + if ( is_wp_error( $request ) ) { + return false; + } + + $response_body = wp_remote_retrieve_body( $request ); + + $response = json_decode( $response_body, true ); + + $docs = []; + + if ( is_array( $response['docs'] ) ) { + foreach ( $response['docs'] as $doc ) { + if ( ! empty( $doc['exists'] ) || ! empty( $doc['found'] ) ) { + $docs[] = $doc['_source']; + } else { + $docs[] = null; + } + } + } + + return $docs; + } + /** * Create the network alias. * diff --git a/includes/classes/Indexable.php b/includes/classes/Indexable.php index 978189d2ce..2d609b4bff 100644 --- a/includes/classes/Indexable.php +++ b/includes/classes/Indexable.php @@ -208,6 +208,17 @@ public function get( $object_id ) { return Elasticsearch::factory()->get_document( $this->get_index_name(), $this->slug, $object_id ); } + /** + * Get objects within the indexable + * + * @param int $object_ids Array of object ids to get. + * @since 3.0 + * @return boolean|array + */ + public function multi_get( $object_ids ) { + return Elasticsearch::factory()->get_documents( $this->get_index_name(), $object_ids ); + } + /** * Delete an index within the indexable * From dcd17956d0b262a6e6d655ce7a9a90750c048f1a Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Fri, 21 May 2021 10:27:36 -0300 Subject: [PATCH 2/8] Add compatibility to ES 5 --- includes/classes/Elasticsearch.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/includes/classes/Elasticsearch.php b/includes/classes/Elasticsearch.php index d28d5aa998..aed2ad0190 100644 --- a/includes/classes/Elasticsearch.php +++ b/includes/classes/Elasticsearch.php @@ -654,12 +654,17 @@ public function delete_network_alias( $alias ) { * Get multiple documents from Elasticsearch given an array of ids * * @param string $index Index name. + * @param string $type Index type. Previously this was used for index type. Now it's just passed to hooks for legacy reasons. * @param array $document_ids Array of document ids to get. - * @since 3.5 + * @since 3.6.0 * @return boolean|array */ - public function get_documents( $index, $document_ids ) { - $path = $index . '/_doc/_mget'; + public function get_documents( $index, $type, $document_ids ) { + if ( version_compare( $this->get_elasticsearch_version(), '7.0', '<' ) ) { + $path = apply_filters( 'ep_index_' . $type . '_request_path', $index . '/' . $type . '/_mget', $document_ids, $type ); + } else { + $path = apply_filters( 'ep_index_' . $type . '_request_path', $index . '/_doc/_mget', $document_ids, $type ); + } $request_args = [ 'method' => 'POST', From bba102e1899faecb199607d20da54ba79bae3bd0 Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Fri, 21 May 2021 10:30:51 -0300 Subject: [PATCH 3/8] Update the Indexable::multi_get method --- includes/classes/Indexable.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/includes/classes/Indexable.php b/includes/classes/Indexable.php index 2d609b4bff..2e75947fab 100644 --- a/includes/classes/Indexable.php +++ b/includes/classes/Indexable.php @@ -212,11 +212,11 @@ public function get( $object_id ) { * Get objects within the indexable * * @param int $object_ids Array of object ids to get. - * @since 3.0 + * @since 3.6.0 * @return boolean|array */ public function multi_get( $object_ids ) { - return Elasticsearch::factory()->get_documents( $this->get_index_name(), $object_ids ); + return Elasticsearch::factory()->get_documents( $this->get_index_name(), $this->slug, $object_ids ); } /** From d11759e5e48878bdec814a85af88a5e928c45b97 Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Fri, 21 May 2021 13:46:05 -0300 Subject: [PATCH 4/8] Multi_get: Index results by IDs --- includes/classes/Elasticsearch.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/includes/classes/Elasticsearch.php b/includes/classes/Elasticsearch.php index aed2ad0190..3881834ca9 100644 --- a/includes/classes/Elasticsearch.php +++ b/includes/classes/Elasticsearch.php @@ -690,9 +690,7 @@ public function get_documents( $index, $type, $document_ids ) { if ( is_array( $response['docs'] ) ) { foreach ( $response['docs'] as $doc ) { if ( ! empty( $doc['exists'] ) || ! empty( $doc['found'] ) ) { - $docs[] = $doc['_source']; - } else { - $docs[] = null; + $docs[ $doc['_id'] ] = $doc['_source']; } } } From 9508ef24b5641d94511e4ac19ec6790b237931bc Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Fri, 21 May 2021 13:50:27 -0300 Subject: [PATCH 5/8] Add the ep_get_documents filter --- includes/classes/Elasticsearch.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/includes/classes/Elasticsearch.php b/includes/classes/Elasticsearch.php index 3881834ca9..06c8d7fce2 100644 --- a/includes/classes/Elasticsearch.php +++ b/includes/classes/Elasticsearch.php @@ -695,6 +695,19 @@ public function get_documents( $index, $type, $document_ids ) { } } + /** + * Filter documents found by Elasticsearch through the /_mget endpoint. + * + * @hook ep_get_documents + * @since 3.6.0 + * @param {array} $docs Documents found indexed by ID + * @param {string} $index Index name + * @param {string} $type Index type + * @param {array} $document_ids Array of document ids + * @return {array} Documents to be returned + */ + $docs = apply_filters( 'ep_get_documents', $docs, $index, $type, $document_ids ); + return $docs; } From 662c48c363c51bee7b2235d93fc0362fb241ab27 Mon Sep 17 00:00:00 2001 From: Ramon Date: Sat, 22 May 2021 15:44:24 -0300 Subject: [PATCH 6/8] Add tests to Elasticsearch->get_documents --- tests/php/TestElasticsearch.php | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/php/TestElasticsearch.php b/tests/php/TestElasticsearch.php index a2a89be2c2..453d305ac1 100644 --- a/tests/php/TestElasticsearch.php +++ b/tests/php/TestElasticsearch.php @@ -51,4 +51,48 @@ public function testGetClusterStatus() { } } + + /** + * Test get documents + * + * @since 3.6.0 + * @group elasticsearch + */ + public function testGetDocuments() { + + $post_ids = array(); + $post_ids[] = Functions\create_and_sync_post(); + $post_ids[] = Functions\create_and_sync_post(); + + ElasticPress\Elasticsearch::factory()->refresh_indices(); + + $index_name = ElasticPress\Indexables::factory()->get( 'post' )->get_index_name(); + + $documents = ElasticPress\Elasticsearch::factory()->get_documents( $index_name, 'post', $post_ids ); + + $this->assertIsArray( $documents ); + $this->assertEquals( 2, count( $documents ) ); + $this->assertArrayHasKey( $post_ids[0], $documents ); + $this->assertArrayHasKey( $post_ids[1], $documents ); + + $post_ids[] = 99999999; // Adding an id that doesn't exist + + $documents = ElasticPress\Elasticsearch::factory()->get_documents( $index_name, 'post', $post_ids ); + + $this->assertIsArray( $documents ); + $this->assertEquals( 2, count( $documents ) ); + $this->assertArrayHasKey( $post_ids[0], $documents ); + $this->assertArrayHasKey( $post_ids[1], $documents ); + + // Trying get an document that doesn't exist + $documents = ElasticPress\Elasticsearch::factory()->get_documents( $index_name, 'post', [ 99999999 ] ); + + $this->assertIsArray( $documents ); + $this->assertEmpty( $documents ); + + $documents = ElasticPress\Elasticsearch::factory()->get_documents( $index_name, 'post', [] ); + + $this->assertIsArray( $documents ); + $this->assertEmpty( $documents ); + } } From 0acc334f240f63990289071d5566866e265bcbda Mon Sep 17 00:00:00 2001 From: Ramon Date: Sat, 22 May 2021 15:45:22 -0300 Subject: [PATCH 7/8] Fix prevent a possible access to undefined index 'docs' --- includes/classes/Elasticsearch.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/classes/Elasticsearch.php b/includes/classes/Elasticsearch.php index 06c8d7fce2..78e303a776 100644 --- a/includes/classes/Elasticsearch.php +++ b/includes/classes/Elasticsearch.php @@ -687,7 +687,7 @@ public function get_documents( $index, $type, $document_ids ) { $docs = []; - if ( is_array( $response['docs'] ) ) { + if ( isset( $response['docs'] ) && is_array( $response['docs'] ) ) { foreach ( $response['docs'] as $doc ) { if ( ! empty( $doc['exists'] ) || ! empty( $doc['found'] ) ) { $docs[ $doc['_id'] ] = $doc['_source']; From 12f22c26793526780e39fff1e04243d9a15842e5 Mon Sep 17 00:00:00 2001 From: Felipe Elia Date: Wed, 26 May 2021 12:44:31 -0300 Subject: [PATCH 8/8] Small adjustment in docs --- tests/php/TestElasticsearch.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/php/TestElasticsearch.php b/tests/php/TestElasticsearch.php index 453d305ac1..f4cf7b6200 100644 --- a/tests/php/TestElasticsearch.php +++ b/tests/php/TestElasticsearch.php @@ -84,7 +84,7 @@ public function testGetDocuments() { $this->assertArrayHasKey( $post_ids[0], $documents ); $this->assertArrayHasKey( $post_ids[1], $documents ); - // Trying get an document that doesn't exist + // Trying to get a document that doesn't exist $documents = ElasticPress\Elasticsearch::factory()->get_documents( $index_name, 'post', [ 99999999 ] ); $this->assertIsArray( $documents );