From fac961086eafa0f7437576fd6af900e1f9fe22ed Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Tue, 25 May 2021 14:53:01 -0400 Subject: [PATCH] fix(NODE-3309): remove redundant iteration of bulk write result (#2815) Add caching for getters that build the result object. Add benchmarking script. Add unit test for getter caching. --- lib/bulk/common.js | 37 +++++++--- lib/operations/bulk_write.js | 17 ----- .../benchmarks/driverBench/bulkWriteResult.js | 60 ++++++++++++++++ test/unit/bulk_write.test.js | 69 +++++++++++++++++++ 4 files changed, 157 insertions(+), 26 deletions(-) create mode 100644 test/benchmarks/driverBench/bulkWriteResult.js diff --git a/lib/bulk/common.js b/lib/bulk/common.js index cfc661e02f..97d6745d5a 100644 --- a/lib/bulk/common.js +++ b/lib/bulk/common.js @@ -57,6 +57,9 @@ class Batch { } } +const kUpsertedIds = Symbol('upsertedIds'); +const kInsertedIds = Symbol('insertedIds'); + /** * @classdesc * The result of a bulk write. @@ -69,6 +72,8 @@ class BulkWriteResult { */ constructor(bulkResult) { this.result = bulkResult; + this[kUpsertedIds] = undefined; + this[kInsertedIds] = undefined; } /** Number of documents inserted. */ @@ -94,20 +99,33 @@ class BulkWriteResult { /** Upserted document generated Id's, hash key is the index of the originating operation */ get upsertedIds() { - const upserted = {}; - for (const doc of !this.result.upserted ? [] : this.result.upserted) { - upserted[doc.index] = doc._id; + if (this[kUpsertedIds]) { + return this[kUpsertedIds]; + } + + this[kUpsertedIds] = {}; + for (const doc of this.result.upserted || []) { + this[kUpsertedIds][doc.index] = doc._id; } - return upserted; + return this[kUpsertedIds]; } /** Inserted document generated Id's, hash key is the index of the originating operation */ get insertedIds() { - const inserted = {}; - for (const doc of !this.result.insertedIds ? [] : this.result.insertedIds) { - inserted[doc.index] = doc._id; + if (this[kInsertedIds]) { + return this[kInsertedIds]; + } + + this[kInsertedIds] = {}; + for (const doc of this.result.insertedIds || []) { + this[kInsertedIds][doc.index] = doc._id; } - return inserted; + return this[kInsertedIds]; + } + + /** The number of inserted documents @type {number} */ + get n() { + return this.result.insertedCount; } /** @@ -1370,5 +1388,6 @@ module.exports = { INSERT: INSERT, UPDATE: UPDATE, REMOVE: REMOVE, - BulkWriteError + BulkWriteError, + BulkWriteResult }; diff --git a/lib/operations/bulk_write.js b/lib/operations/bulk_write.js index 752b3e7b7f..493defd61b 100644 --- a/lib/operations/bulk_write.js +++ b/lib/operations/bulk_write.js @@ -70,23 +70,6 @@ class BulkWriteOperation extends OperationBase { return callback(err, null); } - // Update the n - r.n = r.insertedCount; - - // Inserted documents - const inserted = r.getInsertedIds(); - // Map inserted ids - for (let i = 0; i < inserted.length; i++) { - r.insertedIds[inserted[i].index] = inserted[i]._id; - } - - // Upserted documents - const upserted = r.getUpsertedIds(); - // Map upserted ids - for (let i = 0; i < upserted.length; i++) { - r.upsertedIds[upserted[i].index] = upserted[i]._id; - } - // Return the results callback(null, r); }); diff --git a/test/benchmarks/driverBench/bulkWriteResult.js b/test/benchmarks/driverBench/bulkWriteResult.js new file mode 100644 index 0000000000..ff5055f448 --- /dev/null +++ b/test/benchmarks/driverBench/bulkWriteResult.js @@ -0,0 +1,60 @@ +'use strict'; + +const performance = require('perf_hooks').performance; +const PerformanceObserver = require('perf_hooks').PerformanceObserver; +const MongoClient = require('../../../index').MongoClient; + +/** + * # BulkWriteResult class Benchmark + * This script can be used to reproduce a performance regression between 3.6.6...3.6.8 + * - [Changes to bulk/common.js](https://github.com/mongodb/node-mongodb-native/compare/v3.6.6...v3.6.8#diff-ab41c37a93c7b6e74f6d2dd30dec67a140f0a84562b4bd28d0ffc3b150c43600) + * - [Changes to operations/bulk_write.js](https://github.com/mongodb/node-mongodb-native/compare/v3.6.6...v3.6.8#diff-93e45847ed36e2aead01a003826fd4057104d76cdeba4807adc1f76b573a87d8) + * + * ## Solution + * A nested loop was introduced through the use of getters. + * - Results running this script (modify the mongodb import) against v3.6.6 + * - bulkWrite took `217.664902ms` to insert 10000 documents + * - Results with performance regression: + * - bulkWrite took `1713.479087ms` to insert 10000 documents + * - Results with nested loop removal and getter caching: + * - bulkWrite took `190.523483ms` to insert 10000 documents + */ + +const client = new MongoClient(process.env.MONGODB_URI || 'mongodb://localhost', { + useUnifiedTopology: true +}); + +const DOC_COUNT = 10000; + +const MANY_DOCS = new Array(DOC_COUNT).fill(null).map((_, index) => ({ + _id: `id is ${index}` +})); + +const obs = new PerformanceObserver(items => { + items.getEntries().forEach(entry => { + console.log(`${entry.name} took ${entry.duration}ms to insert ${MANY_DOCS.length} documents`); + }); +}); + +obs.observe({ entryTypes: ['measure'], buffer: true }); + +async function main() { + await client.connect(); + const collection = client.db('test').collection('test'); + + try { + await collection.drop(); + } catch (_) { + // resetting collection if exists + } + + performance.mark('bulkWrite-start'); + await collection.insertMany(MANY_DOCS); + performance.mark('bulkWrite-end'); + + performance.measure('bulkWrite', 'bulkWrite-start', 'bulkWrite-end'); +} + +main(process.argv) + .catch(console.error) + .finally(() => client.close()); diff --git a/test/unit/bulk_write.test.js b/test/unit/bulk_write.test.js index 56162be4fe..8ca1722d17 100644 --- a/test/unit/bulk_write.test.js +++ b/test/unit/bulk_write.test.js @@ -2,6 +2,7 @@ const expect = require('chai').expect; const mock = require('mongodb-mock-server'); +const BulkWriteResult = require('../../lib/bulk/common').BulkWriteResult; describe('Bulk Writes', function() { const test = {}; @@ -62,4 +63,72 @@ describe('Bulk Writes', function() { }); }); }); + + it('should cache the upsertedIds in result', function() { + const result = new BulkWriteResult({ + upserted: [ + { index: 0, _id: 1 }, + { index: 1, _id: 2 }, + { index: 2, _id: 3 } + ] + }); + + const bulkWriteResultSymbols = Object.getOwnPropertySymbols(result); + + expect(bulkWriteResultSymbols.length).to.be.equal(2); + + const kUpsertedIds = bulkWriteResultSymbols.filter( + s => s.toString() === 'Symbol(upsertedIds)' + )[0]; + + expect(kUpsertedIds).to.be.a('symbol'); + + expect(result[kUpsertedIds]).to.equal(undefined); + + const upsertedIds = result.upsertedIds; // calls getter + + expect(upsertedIds).to.be.a('object'); + + expect(result[kUpsertedIds]).to.equal(upsertedIds); + + Object.freeze(result); // If the getters try to write to `this` + Object.freeze(result[kUpsertedIds]); + // or either cached object then they will throw in these expects: + + expect(() => result.upsertedIds).to.not.throw(); + }); + + it('should cache the insertedIds in result', function() { + const result = new BulkWriteResult({ + insertedIds: [ + { index: 0, _id: 4 }, + { index: 1, _id: 5 }, + { index: 2, _id: 6 } + ] + }); + + const bulkWriteResultSymbols = Object.getOwnPropertySymbols(result); + + expect(bulkWriteResultSymbols.length).to.be.equal(2); + + const kInsertedIds = bulkWriteResultSymbols.filter( + s => s.toString() === 'Symbol(insertedIds)' + )[0]; + + expect(kInsertedIds).to.be.a('symbol'); + + expect(result[kInsertedIds]).to.equal(undefined); + + const insertedIds = result.insertedIds; // calls getter + + expect(insertedIds).to.be.a('object'); + + expect(result[kInsertedIds]).to.equal(insertedIds); + + Object.freeze(result); // If the getters try to write to `this` + Object.freeze(result[kInsertedIds]); + // or either cached object then they will throw in these expects: + + expect(() => result.insertedIds).to.not.throw(); + }); });