From bdb31e4d63c5fbcc33c34281869510bf9a78d45e Mon Sep 17 00:00:00 2001 From: Sylvain Lebresne Date: Thu, 29 Dec 2022 17:33:15 +0100 Subject: [PATCH 1/2] Improve generation of plans once all path options are computed At a very high level, query planning works in 2 main steps: 1. for every "leaf" of the query, we compute which possible options (paths) can lead to it. 2. we take all those options and generates the corresponding plans, which means we compute all the (non trivially inefficient) possible plan that would be valid for the query. For each of those plan, we compute the cost of the plan and keep the best plan. In some cases, the 1st step gives quite a few possible options, and generating all the corresponding plans is costly. This patch attempts to first generate plans that are a bit more likely to be the most efficient, or at least fairly good. When then use the cost of those plans to something cut the generation of other plans early. Essentially, as we generate a plan corresponding to a set of options, if we notice that the cost gets higher than the best we've found so far before we've handled all the paths, then we can give up on that set of options without generating everything. Doing so have little to no impact on very simple plans, but for more complex ones it can drastically reduce the generation time (anecdotically, some real world plan gets more than a 10x improvement on generation speed). --- .../src/__tests__/generateAllPlans.test.ts | 96 ++++++++++ query-planner-js/src/buildPlan.ts | 72 +++----- query-planner-js/src/generateAllPlans.ts | 170 ++++++++++++++++++ 3 files changed, 291 insertions(+), 47 deletions(-) create mode 100644 query-planner-js/src/__tests__/generateAllPlans.test.ts create mode 100644 query-planner-js/src/generateAllPlans.ts diff --git a/query-planner-js/src/__tests__/generateAllPlans.test.ts b/query-planner-js/src/__tests__/generateAllPlans.test.ts new file mode 100644 index 000000000..701090bbd --- /dev/null +++ b/query-planner-js/src/__tests__/generateAllPlans.test.ts @@ -0,0 +1,96 @@ +import { assert } from '@apollo/federation-internals'; +import { generateAllPlansAndFindBest } from '../generateAllPlans'; + +function generateTestPlans(initial: string[], choices: string[][]): { best: string[], generated: string[][], evaluated: string[][] } { + const generated: string[][] = []; + const evaluated: string[][] = []; + const { best } = generateAllPlansAndFindBest( + initial, + choices, + (p, c) => { + const r = p.concat(c); + if (r.length === initial.length + choices.length) { + generated.push(r); + } + return r; + }, + (p) => { + evaluated.push(p); + return p.reduce((acc, v) => acc + v.length, 0); + }, + ); + return { best, generated, evaluated }; +} + +function expectSamePlans(expected: string[][], actual: string[][]) { + // We don't want to rely on ordering (the tests ensures we get the best plan that we want, and the rest doesn't matter). + const normalize = (v: string[]) => v.join(''); + const expectedSet = new Set(expected.map((e) => normalize(e))); + for (const value of actual) { + const normalized = normalize(value); + assert(expectedSet.has(normalized), `Unexpected plan [${value.join(', ')}] is not in [\n${expected.map((e) => `[ ${e.join(', ')} ]`).join('\n')}\n]`); + } + + const actualSet = new Set(actual.map((e) => normalize(e))); + for (const value of expected) { + const normalized = normalize(value); + assert(actualSet.has(normalized), `Expected plan [${value.join(', ')}] not found in [\n${actual.map((e) => `[ ${e.join(', ')} ]`).join('\n')}\n]`); + } +} + + +test('Pick elements at same index first', () => { + const { best, generated } = generateTestPlans( + ['I'], + [ + [ 'A1', 'B1'], + [ 'A2', 'B2'], + [ 'A3', 'B3'], + ], + ); + expect(best).toEqual(['I', 'A1', 'A2', 'A3']); + expect(generated[0]).toEqual(['I', 'A1', 'A2', 'A3']); + expect(generated[1]).toEqual(['I', 'B1', 'B2', 'B3']); +}) + +test('Bail early for more costly elements', () => { + const { best, generated } = generateTestPlans( + ['I'], + [ + [ 'A1', 'B1VeryCostly'], + [ 'A2', 'B2Co'], + [ 'A3', 'B3'], + ], + ); + + expect(best).toEqual(['I', 'A1', 'A2', 'A3']); + // We should ignore plans with both B1 and B2 due there cost. So we should have just 2 plans. + expect(generated).toHaveLength(2); + expect(generated[0]).toEqual(['I', 'A1', 'A2', 'A3']); + expect(generated[1]).toEqual(['I', 'A1', 'A2', 'B3']); +}) + +test('Handles branches of various sizes', () => { + const { best, generated } = generateTestPlans( + ['I'], + [ + [ 'A1x', 'B1'], + [ 'A2', 'B2Costly', 'C2'], + [ 'A3'], + [ 'A4', 'B4' ], + ], + ); + + expect(best).toEqual(['I', 'B1', 'A2', 'A3', 'A4']); + // We should generate every option, except those including `B2Costly` + expectSamePlans([ + [ 'I', 'A1x', 'A2', 'A3', 'A4' ], + [ 'I', 'A1x', 'A2', 'A3', 'B4' ], + [ 'I', 'A1x', 'C2', 'A3', 'A4' ], + [ 'I', 'A1x', 'C2', 'A3', 'B4' ], + [ 'I', 'B1', 'A2', 'A3', 'A4' ], + [ 'I', 'B1', 'A2', 'A3', 'B4' ], + [ 'I', 'B1', 'C2', 'A3', 'A4' ], + [ 'I', 'B1', 'C2', 'A3', 'B4' ], + ], generated); +}) diff --git a/query-planner-js/src/buildPlan.ts b/query-planner-js/src/buildPlan.ts index b36d569ce..d0c702575 100644 --- a/query-planner-js/src/buildPlan.ts +++ b/query-planner-js/src/buildPlan.ts @@ -93,6 +93,7 @@ import { import { stripIgnoredCharacters, print, parse, OperationTypeNode } from "graphql"; import { DeferredNode, FetchDataInputRewrite, FetchDataOutputRewrite } from "."; import { enforceQueryPlannerConfigDefaults, QueryPlannerConfig } from "./config"; +import { generateAllPlansAndFindBest } from "./generateAllPlans"; import { QueryPlan, ResponsePath, SequenceNode, PlanNode, ParallelNode, FetchNode, trimSelectionNodes } from "./QueryPlan"; const debug = newDebugLogger('plan'); @@ -507,45 +508,35 @@ class QueryPlanningTaversal { initialDependencyGraph = this.updatedDependencyGraph(this.newDependencyGraph(), initialTree); if (idxFirstOfLengthOne === 0) { // Well, we have the only possible plan; it's also the best. - this.onNewPlan(initialDependencyGraph, initialTree); + this.bestPlan = [initialDependencyGraph, initialTree, this.cost(initialDependencyGraph)]; return; } } const otherTrees = this.closedBranches.slice(0, idxFirstOfLengthOne).map(b => b.map(opt => PathTree.createFromOpPaths(this.subgraphs, this.startVertex, opt))); - this.generateAllPlans(initialDependencyGraph, initialTree, otherTrees); - } - - generateAllPlans(initialDependencyGraph: FetchDependencyGraph, initialTree: OpPathTree, others: OpPathTree[][]) { - // Track, for each element, at which index we are - const eltIndexes = new Array(others.length); - let totalCombinations = 1; - for (let i = 0; i < others.length; ++i) { - const eltSize = others[i].length; - assert(eltSize > 0, "Got empty option: this shouldn't have happened"); - eltIndexes[i] = 0; - totalCombinations *= eltSize; - } - - for (let i = 0; i < totalCombinations; ++i){ - const dependencyGraph = initialDependencyGraph.clone(); - let tree = initialTree; - for (let j = 0; j < others.length; ++j) { - const t = others[j][eltIndexes[j]]; - this.updatedDependencyGraph(dependencyGraph, t); - tree = tree.merge(t); - } - this.onNewPlan(dependencyGraph, tree); - - for (let idx = 0; idx < others.length; ++idx) { - if (eltIndexes[idx] == others[idx].length - 1) { - eltIndexes[idx] = 0; - } else { - eltIndexes[idx] += 1; - break; - } - } - } + const { best, cost} = generateAllPlansAndFindBest( + { graph: initialDependencyGraph, tree: initialTree }, + otherTrees, + (p, t) => { + const updatedDependencyGraph = p.graph.clone(); + this.updatedDependencyGraph(updatedDependencyGraph, t); + const updatedTree = p.tree.merge(t); + return { graph: updatedDependencyGraph, tree: updatedTree }; + }, + (p) => this.cost(p.graph), + (p, cost, prevCost) => { + debug.log(() => { + if (!prevCost) { + return `Computed plan with cost ${cost}: ${p.tree}`; + } else if (cost > prevCost) { + return `Found better with cost ${cost} (previous had cost ${prevCost}: ${p.tree}`; + } else { + return `Ignoring plan with cost ${cost} (a better plan with cost ${prevCost} exists): ${p.tree}` + } + }); + }, + ); + this.bestPlan = [best.graph, best.tree, cost]; } private cost(dependencyGraph: FetchDependencyGraph): number { @@ -581,19 +572,6 @@ class QueryPlanningTaversal { // condition" within `advanceSimultaneousPathsWithOperation`. return bestPlan ? { satisfied: true, cost: bestPlan[2], pathTree: bestPlan[1] } : unsatisfiedConditionsResolution; } - - private onNewPlan(dependencyGraph: FetchDependencyGraph, tree: OpPathTree) { - const cost = this.cost(dependencyGraph); - //if (isTopLevel) { - // console.log(`[PLAN] cost: ${cost}, path:\n${pathSet.toString('', true)}`); - //} - if (!this.bestPlan || cost < this.bestPlan[2]) { - debug.log(() => this.bestPlan ? `Found better with cost ${cost} (previous had cost ${this.bestPlan[2]}): ${tree}`: `Computed plan with cost ${cost}: ${tree}`); - this.bestPlan = [dependencyGraph, tree, cost]; - } else { - debug.log(() => `Ignoring plan with cost ${cost} (a better plan with cost ${this.bestPlan![2]} exists): ${tree}`); - } - } } type UnhandledGroups = [FetchGroup, UnhandledParentRelations][]; diff --git a/query-planner-js/src/generateAllPlans.ts b/query-planner-js/src/generateAllPlans.ts new file mode 100644 index 000000000..4aa9a9816 --- /dev/null +++ b/query-planner-js/src/generateAllPlans.ts @@ -0,0 +1,170 @@ +import { assert } from "@apollo/federation-internals"; + +type Choices = (T | undefined)[]; + +type Partial = { + partial: P, + remaining: Choices[], + isRoot: boolean, + index?: number, +} + +/** + * Given some initial partial plan and a list of options for the remaining parts that need to be added to that plan to make it complete, + * this method "efficiently" generates (or at least evaluate) all the possible complete plans and the returns the "best" one (the one + * with the lowest cost). + * + * Note that this method abstracts the actualy types of both plans and additional elements to add to the plan, and this both for clarity + * and to make testing of this method easier. But type parameter `P` should be though of as abstracting a query plan (in practice, it + * is instanciated to a pair of a [`DependencyGraph`, corresponding `PathTree`]), whith `E` should be though of as an additional element + * to add to the plan to make it complete (instanciated in practice by a `PathTree` for ... reasons ... but one that really correspond to + * a single `GraphPath`). + * + * As said above, this method takes 2 arguments: + * - `initial` is a partial plan, and corresponds to all the parts of the query being planned for which there no choices (and + * theoretically can be empty, though very very rarely is in practice). + * - `toAdd` is the list of additional elements to add to `initial` ot make a full plan of the query being planned. Each element of + * `toAdd` corresponds to one of the query "leaf" and is itself a list of all the possible options for that "leaf". + * + * In other words, a comple plan is obtained by picking one choice in each of the element of `toAdd` (so picking `toAdd.length` element) + * and adding them all to `initial`. The question being, which particular choice for each element of `toAdd` yield the best plan. + * + * Of course, the total number of possible plans is the cartesian product of `toAdd`, which can get large, and so this method is trying + * to trim some of the options. For that, the general idea is that we first generate one of the plan, compute its cost, and then as + * we build other options, we can check as we pick elements of `toAdd` the cost of what we get, and if we ever get a higher cost than + * the one fo the complete plan we already have, then there is no point in checking the remaining elements, and we can thus cut all + * the options for the remaining elements. In other words, if a partial plan is ever already more costly than another full plan we + * have computed, then adding more will never get us a better plan. + * + * Of course, this method is not guaranteed to save work, and in the worst case, we'll still generate all plans. But when a "good" + * plan is generated early, it can save a lot of computing work. + * + * And the 2nd "trick" of this method is that it starts by generating the plans that correspond to picking choices in `toAdd` at + * the same indexes, and this because this often actually generate good plans. The reason is that the order of choices for each + * element of `toAdd` is not necessarily random, because the algorithm generating paths is not random either. In other words, elements + * at similar indexes have some good change to correspond to similar choices, and so will tend to correspond to good plans. + * + * @param initial - the initial partial plan to use. + * @param toAdd - a list of the remaining "elements" to add to `initial`. Each element of `toAdd` correspond to multiple choice we can + * use to plan that particular element. + * @param addFct - how to obtain a new plan by taking some plan and adding a new element to it. + * @param costFct - how to compute the cost of a plan. + * @param onPlan - an optional method called on every _complete_ plan generated by this method, with both the cost of that plan and + * the best cost we have generated thus far (if that's not the first plan generated). This mostly exists to allow some debugging. + */ +export function generateAllPlansAndFindBest( + initial: P, + toAdd: E[][], + addFct: (p: P, e: E) => P, + costFct: (p: P) => number, + onPlan: (p: P, cost: number, previousCost: number | undefined) => void = () => {}, +): { + best: P, + cost: number, +}{ + const stack: Partial[] = [{ + partial: initial, + remaining: toAdd, + isRoot: true, + index: 0, + }]; + + let min: { best: P, cost: number } | undefined = undefined; + + while (stack.length > 0) { + const { partial, remaining, isRoot, index } = stack.pop()!; + const nextChoices = remaining[0]; + const otherChoices = remaining.slice(1); + + const pickedIndex = pickNext(index, nextChoices); + const { extracted, updatedChoices, isLast } = extract(pickedIndex, nextChoices); + + if (!isLast) { + // First, re-insert what correspond to all the choices that dot _not_ pick `extracted`. + insertInStack({ + partial, + remaining: [updatedChoices].concat(otherChoices), + isRoot, + index: isRoot && index !== undefined && index < nextChoices.length - 1 ? index + 1 : undefined, + }, stack); + } + + const newPartial = addFct(partial, extracted); + if (otherChoices.length === 0) { + // We have a complete plan. Compute the cost, check if it is best and based on that, + // provide it to `onGenerated` or discard it. + const cost = costFct(newPartial); + const isNewMin = min === undefined || cost < min.cost; + onPlan(newPartial, cost, min?.cost); + if (isNewMin) { + min = { + best: newPartial, + cost + }; + } + continue; + } + + if (min !== undefined) { + // We're not done, but we've already generated a plan with a score, so we check if + // what we have so far is already more costly, and if it is, we skip this branch + // entirely. + const cost = costFct(newPartial); + if (cost >= min.cost) { + continue; + } + } + + insertInStack({ + partial: newPartial, + remaining: otherChoices, + isRoot: false, + index + }, stack); + } + + assert(min, 'A plan should have been found'); + return min; +} + +function insertInStack(elt: Partial, stack: Partial[]) { + // We push elements with a fixed index at the end so they are handled first. + if (elt.index !== undefined) { + stack.push(elt); + } else { + stack.unshift(elt); + } +} + +function pickNext(index: number | undefined, remaining: Choices): number { + if (index === undefined || index >= remaining.length) { + for (let i = 0; i < remaining.length; i++) { + if (remaining[i] !== undefined) { + return i; + } + } + assert(false, 'Passed a "remaining" with all undefined'); + } else { + assert(remaining[index] !== undefined, () => `Invalid index ${index}`); + return index; + } +} + +function extract(index: number, choices: Choices): { extracted: E, isLast: boolean, updatedChoices: Choices} { + const extracted = choices[index]; + assert(extracted !== undefined, () => `Index ${index} of ${choices} is undefined`) + const updatedChoices = new Array(choices.length); + let isLast = true; + for (let i = 0; i < choices.length; i++) { + if (i !== index) { + isLast &&= choices[i] === undefined; + updatedChoices[i] = choices[i]; + } + } + return { + extracted, + isLast, + updatedChoices, + }; +} + From 18448464c03c11c051742acc9f000107eff077ad Mon Sep 17 00:00:00 2001 From: Sylvain Lebresne Date: Tue, 10 Jan 2023 10:25:21 +0100 Subject: [PATCH 2/2] Review feedback and changelogs --- gateway-js/CHANGELOG.md | 1 + query-planner-js/CHANGELOG.md | 1 + .../src/__tests__/generateAllPlans.test.ts | 10 +++++----- query-planner-js/src/buildPlan.ts | 14 +++++++------- query-planner-js/src/generateAllPlans.ts | 12 +++++++++--- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/gateway-js/CHANGELOG.md b/gateway-js/CHANGELOG.md index 47c2c3020..51b7340df 100644 --- a/gateway-js/CHANGELOG.md +++ b/gateway-js/CHANGELOG.md @@ -19,6 +19,7 @@ This CHANGELOG pertains only to Apollo Federation packages in the 2.x range. The - Adds support for the 0.3 version of the tag spec, which adds `@tag` directive support for the `SCHEMA` location [PR #2314](https://github.com/apollographql/federation/pull/2314). - Fix potential issue with nested `@defer` in non-deferrable case [PR #2312](https://github.com/apollographql/federation/pull/2312). - Fixes composition issues with `@interfaceObject` [PR #2318](https://github.com/apollographql/federation/pull/2318). +- Improves generation of plans once all path options are computed [PR #2316](https://github.com/apollographql/federation/pull/2316). ## 2.2.2 diff --git a/query-planner-js/CHANGELOG.md b/query-planner-js/CHANGELOG.md index 9f368d03c..0506725c7 100644 --- a/query-planner-js/CHANGELOG.md +++ b/query-planner-js/CHANGELOG.md @@ -6,6 +6,7 @@ This CHANGELOG pertains only to Apollo Federation packages in the 2.x range. The - Fix potential issue with nested `@defer` in non-deferrable case [PR #2312](https://github.com/apollographql/federation/pull/2312). - Fix possible assertion error during query planning [PR #2299](https://github.com/apollographql/federation/pull/2299). +- Improves generation of plans once all path options are computed [PR #2316](https://github.com/apollographql/federation/pull/2316). ## 2.2.2 diff --git a/query-planner-js/src/__tests__/generateAllPlans.test.ts b/query-planner-js/src/__tests__/generateAllPlans.test.ts index 701090bbd..b95f50d34 100644 --- a/query-planner-js/src/__tests__/generateAllPlans.test.ts +++ b/query-planner-js/src/__tests__/generateAllPlans.test.ts @@ -4,21 +4,21 @@ import { generateAllPlansAndFindBest } from '../generateAllPlans'; function generateTestPlans(initial: string[], choices: string[][]): { best: string[], generated: string[][], evaluated: string[][] } { const generated: string[][] = []; const evaluated: string[][] = []; - const { best } = generateAllPlansAndFindBest( + const { best } = generateAllPlansAndFindBest({ initial, - choices, - (p, c) => { + toAdd: choices, + addFct: (p, c) => { const r = p.concat(c); if (r.length === initial.length + choices.length) { generated.push(r); } return r; }, - (p) => { + costFct: (p) => { evaluated.push(p); return p.reduce((acc, v) => acc + v.length, 0); }, - ); + }); return { best, generated, evaluated }; } diff --git a/query-planner-js/src/buildPlan.ts b/query-planner-js/src/buildPlan.ts index d0c702575..f8902510b 100644 --- a/query-planner-js/src/buildPlan.ts +++ b/query-planner-js/src/buildPlan.ts @@ -514,17 +514,17 @@ class QueryPlanningTaversal { } const otherTrees = this.closedBranches.slice(0, idxFirstOfLengthOne).map(b => b.map(opt => PathTree.createFromOpPaths(this.subgraphs, this.startVertex, opt))); - const { best, cost} = generateAllPlansAndFindBest( - { graph: initialDependencyGraph, tree: initialTree }, - otherTrees, - (p, t) => { + const { best, cost} = generateAllPlansAndFindBest({ + initial: { graph: initialDependencyGraph, tree: initialTree }, + toAdd: otherTrees, + addFct: (p, t) => { const updatedDependencyGraph = p.graph.clone(); this.updatedDependencyGraph(updatedDependencyGraph, t); const updatedTree = p.tree.merge(t); return { graph: updatedDependencyGraph, tree: updatedTree }; }, - (p) => this.cost(p.graph), - (p, cost, prevCost) => { + costFct: (p) => this.cost(p.graph), + onPlan: (p, cost, prevCost) => { debug.log(() => { if (!prevCost) { return `Computed plan with cost ${cost}: ${p.tree}`; @@ -535,7 +535,7 @@ class QueryPlanningTaversal { } }); }, - ); + }); this.bestPlan = [best.graph, best.tree, cost]; } diff --git a/query-planner-js/src/generateAllPlans.ts b/query-planner-js/src/generateAllPlans.ts index 4aa9a9816..1f7916291 100644 --- a/query-planner-js/src/generateAllPlans.ts +++ b/query-planner-js/src/generateAllPlans.ts @@ -52,13 +52,19 @@ type Partial = { * @param onPlan - an optional method called on every _complete_ plan generated by this method, with both the cost of that plan and * the best cost we have generated thus far (if that's not the first plan generated). This mostly exists to allow some debugging. */ -export function generateAllPlansAndFindBest( +export function generateAllPlansAndFindBest({ + initial, + toAdd, + addFct, + costFct, + onPlan = () => {}, +}: { initial: P, toAdd: E[][], addFct: (p: P, e: E) => P, costFct: (p: P) => number, - onPlan: (p: P, cost: number, previousCost: number | undefined) => void = () => {}, -): { + onPlan?: (p: P, cost: number, previousCost: number | undefined) => void, +}): { best: P, cost: number, }{