Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ID ranges instead of relying on OFFSET when doing pagination. #2171

Merged
merged 12 commits into from
Jun 24, 2021
2 changes: 2 additions & 0 deletions docs/wp-cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ The following WP-CLI commands are supported by ElasticPress:
* `--post-type` let's you specify which post types will be indexed (by default: all indexable post types are indexed). For example, `--post-type="my_custom_post_type"` would limit indexing to only posts from the post type "my_custom_post_type". Accepts multiple post types separated by comma.
* `--include` Choose which object IDs to include in the index.
* `--post-ids` Choose which post_ids to include when indexing the Posts Indexable (deprecated).
* `--start-object-id` Upper limit of a range of IDs to be indexed. If indexing IDs from 30 to 45, this should be 45.
* `--end-object-id` Lower limit of a range of IDs to be indexed. If indexing IDs from 30 to 45, this should be 30.

* `wp elasticpress delete-index [--network-wide]`

Expand Down
39 changes: 27 additions & 12 deletions includes/classes/Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ public function delete_transient_on_int( $signal_no ) {
/**
* Index all posts for a site or network wide
*
* @synopsis [--setup] [--network-wide] [--per-page] [--nobulk] [--show-errors] [--offset] [--indexables] [--show-bulk-errors] [--show-nobulk-errors] [--post-type] [--include] [--post-ids] [--ep-host] [--ep-prefix]
* @synopsis [--setup] [--network-wide] [--per-page] [--nobulk] [--show-errors] [--offset] [--start-object-id] [--end-object-id] [--indexables] [--show-bulk-errors] [--show-nobulk-errors] [--post-type] [--include] [--post-ids] [--ep-host] [--ep-prefix]
*
* @param array $args Positional CLI args.
* @since 0.1.2
Expand Down Expand Up @@ -781,11 +781,11 @@ public function index( $args, $assoc_args ) {
private function index_helper( Indexable $indexable, $args ) {
$synced = 0;
$errors = [];
$no_bulk_count = 0;
$index_queue = [];
$killed_object_count = 0;
$failed_objects = [];
$total_indexable = 0;
$time_elapsed = 0;

$no_bulk = false;

Expand Down Expand Up @@ -825,6 +825,14 @@ function ( $prefix ) use ( $args ) {
$query_args['offset'] = absint( $args['offset'] );
}

if ( ! empty( $args['start-object-id'] ) && is_numeric( $args['start-object-id'] ) ) {
$query_args['ep_indexing_start_object_id'] = $args['start-object-id'];
}

if ( ! empty( $args['end-object-id'] ) && is_numeric( $args['end-object-id'] ) ) {
$query_args['ep_indexing_end_object_id'] = $args['end-object-id'];
}

if ( ! empty( $args['post-ids'] ) ) {
$args['include'] = $args['post-ids'];
}
Expand All @@ -847,6 +855,7 @@ function ( $prefix ) use ( $args ) {
$query_args['post_type'] = array_map( 'trim', $query_args['post_type'] );
}

$loop_counter = 0;
while ( true ) {
$query = $indexable->query_db( $query_args );

Expand All @@ -856,7 +865,6 @@ function ( $prefix ) use ( $args ) {
$objects = [];

if ( ! empty( $query['objects'] ) ) {

foreach ( $query['objects'] as $object ) {

$this->should_interrupt_sync();
Expand All @@ -867,8 +875,6 @@ function ( $prefix ) use ( $args ) {
*/
$result = $indexable->index( $object->ID, true );

$no_bulk_count++;

if ( ! empty( $result->error ) ) {
if ( ! empty( $result->error->reason ) ) {
$failed_objects[ $object->ID ] = (array) $result->error;
Expand All @@ -889,8 +895,6 @@ function ( $prefix ) use ( $args ) {
* @param {Indexable} $indexable Current indexable
*/
do_action( 'ep_cli_object_index', $object->ID, $indexable );

WP_CLI::log( sprintf( esc_html__( 'Processed %1$d/%2$d...', 'elasticpress' ), $no_bulk_count, (int) $query['total_objects'] ) );
} else {
/**
* Conditionally kill indexing for a post
Expand Down Expand Up @@ -972,18 +976,29 @@ function ( $prefix ) use ( $args ) {
break;
}

if ( ! $no_bulk ) {
WP_CLI::log( sprintf( esc_html__( 'Processed %1$d/%2$d...', 'elasticpress' ), (int) ( count( $query['objects'] ) + $query_args['offset'] ), (int) $query['total_objects'] ) );
$last_object_array_key = array_keys( $query['objects'] )[ count( $query['objects'] ) - 1 ];
$last_processed_object_id = $query['objects'][ $last_object_array_key ]->ID;
WP_CLI::log( sprintf( esc_html__( 'Processed %1$d/%2$d. Last Object ID: %3$d', 'elasticpress' ), (int) ( $synced + count( $failed_objects ) ), (int) $query['total_objects'], (int) $last_processed_object_id ) );

$loop_counter++;
if ( ( $loop_counter % 10 ) === 0 ) {
$time_elapsed_diff = $time_elapsed > 0 ? ' (+' . (string) ( timer_stop( 0, 2 ) - $time_elapsed ) . ')' : '';
$time_elapsed = timer_stop( 0, 2 );
WP_CLI::log( WP_CLI::colorize( '%Y' . esc_html__( 'Time elapsed: ', 'elasticpress' ) . '%N' . $time_elapsed . $time_elapsed_diff ) );

$current_memory = round( memory_get_usage() / 1024 / 1024, 2 ) . 'mb';
$peak_memory = ' (Peak: ' . round( memory_get_peak_usage() / 1024 / 1024, 2 ) . 'mb)';
WP_CLI::log( WP_CLI::colorize( '%Y' . esc_html__( 'Memory Usage: ', 'elasticpress' ) . '%N' . $current_memory . $peak_memory ) );
}

$query_args['offset'] += $per_page;
$total_indexable = (int) $query['total_objects'];
$query_args['offset'] += $per_page;
$total_indexable = (int) $query['total_objects'];
$query_args['ep_indexing_last_processed_object_id'] = $last_processed_object_id;

usleep( 500 );

// Avoid running out of memory.
$this->stop_the_insanity();

}

if ( $show_errors && ! empty( $failed_objects ) ) {
Expand Down
123 changes: 105 additions & 18 deletions includes/classes/Indexable/Post/Post.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,15 @@ public function __construct() {
*/
public function query_db( $args ) {
$defaults = [
'posts_per_page' => $this->get_bulk_items_per_page(),
'post_type' => $this->get_indexable_post_types(),
'post_status' => $this->get_indexable_post_status(),
'offset' => 0,
'ignore_sticky_posts' => true,
'orderby' => 'ID',
'order' => 'desc',
'posts_per_page' => $this->get_bulk_items_per_page(),
'post_type' => $this->get_indexable_post_types(),
'post_status' => $this->get_indexable_post_status(),
'offset' => 0,
'ignore_sticky_posts' => true,
'orderby' => 'ID',
'order' => 'desc',
'no_found_rows' => true,
'ep_indexing_advanced_pagination' => true,
];

if ( isset( $args['per_page'] ) ) {
Expand All @@ -84,25 +86,110 @@ public function query_db( $args ) {
* @param {array} $args Database arguments
* @return {array} New arguments
*/
$args = apply_filters( 'ep_post_query_db_args', wp_parse_args( $args, $defaults ) );
$args = apply_filters( 'ep_index_posts_args', apply_filters( 'ep_post_query_db_args', wp_parse_args( $args, $defaults ) ) );

/**
* Filter arguments used to query posts from database. Backwards compat with pre-3.0
*
* @hook ep_index_posts_args
* @param {array} $args Database arguments
* @return {array} New arguments
*/
$args = apply_filters( 'ep_index_posts_args', $args );
if ( isset( $args['include'] ) || isset( $args['post__in'] ) ) {
// Disable advanced pagination. Not useful if only indexing specific IDs.
$args['ep_indexing_advanced_pagination'] = false;
}

// Enforce the following query args during advanced pagination to ensure things work correctly.
if ( $args['ep_indexing_advanced_pagination'] ) {
$args = array_merge(
$args,
[
'suppress_filters' => false,
'orderby' => 'ID',
'order' => 'DESC',
'paged' => 1,
'offset' => 0,
'no_found_rows' => true,
]
);
}

add_filter( 'posts_where', array( $this, 'bulk_indexing_filter_posts_where' ), 9999, 2 );

$query = new WP_Query( $args );
$query = new WP_Query( $args );
$total_objects = $this->get_total_objects_for_query( $args );

remove_filter( 'posts_where', array( $this, 'bulk_indexing_filter_posts_where' ), 9999, 2 );

return [
'objects' => $query->posts,
'total_objects' => $query->found_posts,
'total_objects' => $total_objects,
];
}

/**
* Manipulate the WHERE clause of the bulk indexing query to paginate by ID in order to avoid performance issues with SQL offset.
*
* @param string $where The current $where clause.
* @param WP_Query $query WP_Query object.
* @return string WHERE clause with our pagination added if needed.
*/
public function bulk_indexing_filter_posts_where( $where, $query ) {
$using_advanced_pagination = $query->get( 'ep_indexing_advanced_pagination', false );

if ( $using_advanced_pagination ) {
$requested_start_id = $query->get( 'ep_indexing_start_object_id', PHP_INT_MAX );
$requested_end_id = $query->get( 'ep_indexing_end_object_id', 0 );
$last_processed_id = $query->get( 'ep_indexing_last_processed_object_id', null );

// On the first loopthrough we begin with the requested start ID. Afterwards, use the last processed ID to paginate.
$start_range_post_id = $requested_start_id;
if ( is_numeric( $last_processed_id ) ) {
$start_range_post_id = $last_processed_id - 1;
}

// Sanitize. Abort if unexpected data at this point.
if ( ! is_numeric( $start_range_post_id ) || ! is_numeric( $requested_end_id ) ) {
return $where;
}

$range = [
'start' => "{$GLOBALS['wpdb']->posts}.ID <= {$start_range_post_id}",
'end' => "{$GLOBALS['wpdb']->posts}.ID >= {$requested_end_id}",
];

// Skip the end range if it's unnecessary.
$skip_ending_range = 0 === $requested_end_id;
$where = $skip_ending_range ? "AND {$range['start']} {$where}" : "AND {$range['start']} AND {$range['end']} {$where}";
}

return $where;
}

/**
* Get SQL_CALC_FOUND_ROWS for a specific query based on it's args.
*
* @param array $query_args The query args.
* @return int The query result's found_posts.
*/
private function get_total_objects_for_query( $query_args ) {
static $object_counts = [];

// Reset the pagination-related args for optimal caching.
$normalized_query_args = array_merge(
$query_args,
[
'offset' => 0,
'paged' => 1,
'posts_per_page' => 1,
'no_found_rows' => false,
'ep_indexing_last_processed_object_id' => null,
]
);

$cache_key = md5( json_encode( $normalized_query_args ) );

if ( ! isset( $object_counts[ $cache_key ] ) ) {
$object_counts[ $cache_key ] = ( new WP_Query( $normalized_query_args ) )->found_posts;
}

return $object_counts[ $cache_key ];
}

/**
* Returns indexable post types for the current site
*
Expand Down
3 changes: 3 additions & 0 deletions includes/dashboard.php
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,9 @@ function action_wp_ajax_ep_index() {
]
);

// Disable during dashboard indexing for now. Support would be possible if desired in the future.
$args['ep_indexing_advanced_pagination'] = false;

$query = $indexable->query_db( $args );

$index_meta['found_items'] = (int) $query['total_objects'];
Expand Down
92 changes: 51 additions & 41 deletions tests/php/indexables/TestPost.php
Original file line number Diff line number Diff line change
Expand Up @@ -4843,73 +4843,83 @@ public function testPostConstructor() {
}

/**
* Tests the constructor for the Indexable\Post class.
* Tests the query_db method.
*
* @return void
* @group post
*/
public function testQueryDb() {
$indexable_post_object = new \ElasticPress\Indexable\Post\Post();

$exclude_post_id = Functions\create_and_sync_post();
$post_id = Functions\create_and_sync_post();

$post = new \ElasticPress\Indexable\Post\Post();
$post_id_1 = Functions\create_and_sync_post();
$post_id_2 = Functions\create_and_sync_post();
$post_id_3 = Functions\create_and_sync_post();

$results = $post->query_db(
// Test the first loop of the indexing.
$results = $indexable_post_object->query_db(
[
'per_page' => 1,
'include' => [ $post_id ],
]
);

$post_ids = wp_list_pluck( $results['objects'], 'ID' );
$this->assertEquals( $post_id_3, $post_ids[0] );
$this->assertCount( 1, $results['objects'] );
$this->assertEquals( 3, $results['total_objects'] );

$this->assertCount( 1, $post_ids );
$this->assertContains( $post_id, $post_ids );
$this->assertSame( 1, absint( $results['total_objects'] ) );

$results = $post->query_db(
// Second loop.
$results = $indexable_post_object->query_db(
[
'exclude' => [ $exclude_post_id ],
'per_page' => 1,
'ep_indexing_last_processed_object_id' => $post_id_3,
]
);

$post_ids = wp_list_pluck( $results['objects'], 'ID' );
$this->assertEquals( $post_id_2, $post_ids[0] );
$this->assertCount( 1, $results['objects'] );
$this->assertEquals( 3, $results['total_objects'] );

$this->assertNotContains( $exclude_post_id, $post_ids );

// Set up a few posts for the filters.
$args_post_ids = [];

$args_post_ids[] = Functions\create_and_sync_post();
$args_post_ids[] = Functions\create_and_sync_post();
$args_post_ids[] = Functions\create_and_sync_post();
$args_post_ids[] = Functions\create_and_sync_post();

$defaults_filter = function( $args ) use ( $args_post_ids ) {
$args['post__in'] = $args_post_ids;
return $args;
};
// A custom start_object_id was passed in.
$results = $indexable_post_object->query_db(
[
'per_page' => 1,
'ep_indexing_start_object_id' => $post_id_1,
]
);

$index_filter = function( $args ) {
$args['posts_per_page'] = 3;
$args['order'] = 'ASC';
return $args;
};
$post_ids = wp_list_pluck( $results['objects'], 'ID' );
$this->assertEquals( $post_id_1, $post_ids[0] );
$this->assertCount( 1, $results['objects'] );
$this->assertEquals( 1, $results['total_objects'] );

add_filter( 'ep_post_query_db_args', $defaults_filter );
add_filter( 'ep_index_posts_args', $index_filter );
// Passing custom start and last post IDs. Second loop.
$results = $indexable_post_object->query_db(
[
'per_page' => 1,
'ep_indexing_start_object_id' => $post_id_3,
'ep_indexing_end_object_id' => $post_id_2,
'ep_indexing_last_processed_object_id' => $post_id_3,
]
);

$results = $post->query_db( [] );
$post_ids = wp_list_pluck( $results['objects'], 'ID' );
$this->assertEquals( $post_id_2, $post_ids[0] );
$this->assertCount( 1, $results['objects'] );
$this->assertEquals( 2, $results['total_objects'] );

remove_filter( 'ep_post_query_db_args', $defaults_filter );
remove_filter( 'ep_index_posts_args', $index_filter );
// Specific post IDs
$results = $indexable_post_object->query_db(
[
'per_page' => 1,
'include' => [ $post_id_1 ],
]
);

$post_ids = wp_list_pluck( $results['objects'], 'ID' );

$this->assertCount( 3, $post_ids );
$this->assertContains( $args_post_ids[2], $post_ids );
$this->assertNotContains( $args_post_ids[3], $post_ids );
$this->assertEquals( $post_id_1, $post_ids[0] );
$this->assertCount( 1, $results['objects'] );
$this->assertEquals( 1, $results['total_objects'] );
}

/**
Expand Down