Skip to content

Commit

Permalink
fix(NODE-3309): remove redundant iteration of bulk write result (#2815)
Browse files Browse the repository at this point in the history
Add caching for getters that build the result object.
Add benchmarking script.
Add unit test for getter caching.
  • Loading branch information
nbbeeken authored May 25, 2021
1 parent 58c4e69 commit fac9610
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 26 deletions.
37 changes: 28 additions & 9 deletions lib/bulk/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class Batch {
}
}

const kUpsertedIds = Symbol('upsertedIds');
const kInsertedIds = Symbol('insertedIds');

/**
* @classdesc
* The result of a bulk write.
Expand All @@ -69,6 +72,8 @@ class BulkWriteResult {
*/
constructor(bulkResult) {
this.result = bulkResult;
this[kUpsertedIds] = undefined;
this[kInsertedIds] = undefined;
}

/** Number of documents inserted. */
Expand All @@ -94,20 +99,33 @@ class BulkWriteResult {

/** Upserted document generated Id's, hash key is the index of the originating operation */
get upsertedIds() {
const upserted = {};
for (const doc of !this.result.upserted ? [] : this.result.upserted) {
upserted[doc.index] = doc._id;
if (this[kUpsertedIds]) {
return this[kUpsertedIds];
}

this[kUpsertedIds] = {};
for (const doc of this.result.upserted || []) {
this[kUpsertedIds][doc.index] = doc._id;
}
return upserted;
return this[kUpsertedIds];
}

/** Inserted document generated Id's, hash key is the index of the originating operation */
get insertedIds() {
const inserted = {};
for (const doc of !this.result.insertedIds ? [] : this.result.insertedIds) {
inserted[doc.index] = doc._id;
if (this[kInsertedIds]) {
return this[kInsertedIds];
}

this[kInsertedIds] = {};
for (const doc of this.result.insertedIds || []) {
this[kInsertedIds][doc.index] = doc._id;
}
return inserted;
return this[kInsertedIds];
}

/** The number of inserted documents @type {number} */
get n() {
return this.result.insertedCount;
}

/**
Expand Down Expand Up @@ -1370,5 +1388,6 @@ module.exports = {
INSERT: INSERT,
UPDATE: UPDATE,
REMOVE: REMOVE,
BulkWriteError
BulkWriteError,
BulkWriteResult
};
17 changes: 0 additions & 17 deletions lib/operations/bulk_write.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,6 @@ class BulkWriteOperation extends OperationBase {
return callback(err, null);
}

// Update the n
r.n = r.insertedCount;

// Inserted documents
const inserted = r.getInsertedIds();
// Map inserted ids
for (let i = 0; i < inserted.length; i++) {
r.insertedIds[inserted[i].index] = inserted[i]._id;
}

// Upserted documents
const upserted = r.getUpsertedIds();
// Map upserted ids
for (let i = 0; i < upserted.length; i++) {
r.upsertedIds[upserted[i].index] = upserted[i]._id;
}

// Return the results
callback(null, r);
});
Expand Down
60 changes: 60 additions & 0 deletions test/benchmarks/driverBench/bulkWriteResult.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
'use strict';

const performance = require('perf_hooks').performance;
const PerformanceObserver = require('perf_hooks').PerformanceObserver;
const MongoClient = require('../../../index').MongoClient;

/**
* # BulkWriteResult class Benchmark
* This script can be used to reproduce a performance regression between 3.6.6...3.6.8
* - [Changes to bulk/common.js](https://github.com/mongodb/node-mongodb-native/compare/v3.6.6...v3.6.8#diff-ab41c37a93c7b6e74f6d2dd30dec67a140f0a84562b4bd28d0ffc3b150c43600)
* - [Changes to operations/bulk_write.js](https://github.com/mongodb/node-mongodb-native/compare/v3.6.6...v3.6.8#diff-93e45847ed36e2aead01a003826fd4057104d76cdeba4807adc1f76b573a87d8)
*
* ## Solution
* A nested loop was introduced through the use of getters.
* - Results running this script (modify the mongodb import) against v3.6.6
* - bulkWrite took `217.664902ms` to insert 10000 documents
* - Results with performance regression:
* - bulkWrite took `1713.479087ms` to insert 10000 documents
* - Results with nested loop removal and getter caching:
* - bulkWrite took `190.523483ms` to insert 10000 documents
*/

const client = new MongoClient(process.env.MONGODB_URI || 'mongodb://localhost', {
useUnifiedTopology: true
});

const DOC_COUNT = 10000;

const MANY_DOCS = new Array(DOC_COUNT).fill(null).map((_, index) => ({
_id: `id is ${index}`
}));

const obs = new PerformanceObserver(items => {
items.getEntries().forEach(entry => {
console.log(`${entry.name} took ${entry.duration}ms to insert ${MANY_DOCS.length} documents`);
});
});

obs.observe({ entryTypes: ['measure'], buffer: true });

async function main() {
await client.connect();
const collection = client.db('test').collection('test');

try {
await collection.drop();
} catch (_) {
// resetting collection if exists
}

performance.mark('bulkWrite-start');
await collection.insertMany(MANY_DOCS);
performance.mark('bulkWrite-end');

performance.measure('bulkWrite', 'bulkWrite-start', 'bulkWrite-end');
}

main(process.argv)
.catch(console.error)
.finally(() => client.close());
69 changes: 69 additions & 0 deletions test/unit/bulk_write.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

const expect = require('chai').expect;
const mock = require('mongodb-mock-server');
const BulkWriteResult = require('../../lib/bulk/common').BulkWriteResult;

describe('Bulk Writes', function() {
const test = {};
Expand Down Expand Up @@ -62,4 +63,72 @@ describe('Bulk Writes', function() {
});
});
});

it('should cache the upsertedIds in result', function() {
const result = new BulkWriteResult({
upserted: [
{ index: 0, _id: 1 },
{ index: 1, _id: 2 },
{ index: 2, _id: 3 }
]
});

const bulkWriteResultSymbols = Object.getOwnPropertySymbols(result);

expect(bulkWriteResultSymbols.length).to.be.equal(2);

const kUpsertedIds = bulkWriteResultSymbols.filter(
s => s.toString() === 'Symbol(upsertedIds)'
)[0];

expect(kUpsertedIds).to.be.a('symbol');

expect(result[kUpsertedIds]).to.equal(undefined);

const upsertedIds = result.upsertedIds; // calls getter

expect(upsertedIds).to.be.a('object');

expect(result[kUpsertedIds]).to.equal(upsertedIds);

Object.freeze(result); // If the getters try to write to `this`
Object.freeze(result[kUpsertedIds]);
// or either cached object then they will throw in these expects:

expect(() => result.upsertedIds).to.not.throw();
});

it('should cache the insertedIds in result', function() {
const result = new BulkWriteResult({
insertedIds: [
{ index: 0, _id: 4 },
{ index: 1, _id: 5 },
{ index: 2, _id: 6 }
]
});

const bulkWriteResultSymbols = Object.getOwnPropertySymbols(result);

expect(bulkWriteResultSymbols.length).to.be.equal(2);

const kInsertedIds = bulkWriteResultSymbols.filter(
s => s.toString() === 'Symbol(insertedIds)'
)[0];

expect(kInsertedIds).to.be.a('symbol');

expect(result[kInsertedIds]).to.equal(undefined);

const insertedIds = result.insertedIds; // calls getter

expect(insertedIds).to.be.a('object');

expect(result[kInsertedIds]).to.equal(insertedIds);

Object.freeze(result); // If the getters try to write to `this`
Object.freeze(result[kInsertedIds]);
// or either cached object then they will throw in these expects:

expect(() => result.insertedIds).to.not.throw();
});
});

0 comments on commit fac9610

Please sign in to comment.