From 29f8c9bbdebe1c787fbdf3940a75da0ed842bc73 Mon Sep 17 00:00:00 2001 From: Trevor Scheer Date: Thu, 27 Feb 2020 11:59:23 -0800 Subject: [PATCH 1/6] Implement gateway retry logic for requests to GCS --- .../integration/networkRequests.test.ts | 82 +++++++++++++++++-- packages/apollo-gateway/src/index.ts | 32 ++++++-- 2 files changed, 98 insertions(+), 16 deletions(-) diff --git a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts index 10a97831655..0645abe3a37 100644 --- a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts +++ b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts @@ -1,17 +1,20 @@ import nock from 'nock'; -import { ApolloGateway } from '../..'; +import { fetch } from 'apollo-server-env'; +import { ApolloGateway, GCS_RETRY_COUNT, getDefaultGcsFetcher } from '../..'; import { mockLocalhostSDLQuery, mockStorageSecretSuccess, + mockStorageSecret, mockCompositionConfigLinkSuccess, + mockCompositionConfigLink, mockCompositionConfigsSuccess, + mockCompositionConfigs, mockImplementingServicesSuccess, + mockImplementingServices, mockRawPartialSchemaSuccess, + mockRawPartialSchema, apiKeyHash, graphId, - mockImplementingServices, - mockRawPartialSchema, - mockCompositionConfigLink, } from './nockMocks'; import loadServicesFromStorage = require("../../loadServicesFromStorage"); @@ -20,6 +23,8 @@ import loadServicesFromStorage = require("../../loadServicesFromStorage"); // Anything wrapped within the gql tag within this file is just a string, not an AST. const gql = String.raw; +let fetcher: typeof fetch; + const service = { implementingServicePath: 'service-definition.json', partialSchemaPath: 'accounts-partial-schema.json', @@ -36,7 +41,7 @@ const service = { name: String username: String } - ` + `, }; const updatedService = { @@ -55,11 +60,19 @@ const updatedService = { name: String username: String } - ` -} + `, +}; beforeEach(() => { if (!nock.isActive()) nock.activate(); + + fetcher = getDefaultGcsFetcher().defaults({ + retry: { + retries: GCS_RETRY_COUNT, + minTimeout: 10, + maxTimeout: 100, + } + }); }); afterEach(() => { @@ -195,3 +208,58 @@ it('Rollsback to a previous schema when triggered', async () => { await secondSchemaChangeBlocker; expect(onChange.mock.calls.length).toBe(2); }); + +function failNTimes(n: number, fn: () => nock.Interceptor) { + for (let i = 0; i < n; i++) { + fn().reply(500); + } +} + +it(`Retries GCS (up to ${GCS_RETRY_COUNT} times) on failure for each request and succeeds`, async () => { + failNTimes(GCS_RETRY_COUNT, mockStorageSecret); + mockStorageSecretSuccess(); + + failNTimes(GCS_RETRY_COUNT, mockCompositionConfigLink); + mockCompositionConfigLinkSuccess(); + + failNTimes(GCS_RETRY_COUNT, mockCompositionConfigs); + mockCompositionConfigsSuccess([service.implementingServicePath]); + + failNTimes(GCS_RETRY_COUNT, () => mockImplementingServices(service)); + mockImplementingServicesSuccess(service); + + failNTimes(GCS_RETRY_COUNT, () => mockRawPartialSchema(service)); + mockRawPartialSchemaSuccess(service); + + const gateway = new ApolloGateway({ fetcher }); + + await gateway.load({ engine: { apiKeyHash, graphId } }); + expect(gateway.schema!.getType('User')!.description).toBe('This is my User'); +}); + +it(`Fails after the ${GCS_RETRY_COUNT + 1}th attempt to reach GCS`, async () => { + failNTimes(GCS_RETRY_COUNT + 1, mockStorageSecret); + + const gateway = new ApolloGateway({ fetcher }); + await expect( + gateway.load({ engine: { apiKeyHash, graphId } }), + ).rejects.toThrowErrorMatchingInlineSnapshot( + `"Could not communicate with Apollo Graph Manager storage: "`, + ); +}); + +it(`Errors when the secret isn't hosted on GCS`, async () => { + mockStorageSecret().reply( + 403, + `AccessDenied + Anonymous caller does not have storage.objects.get`, + { 'content-type': 'application/xml' }, + ); + + const gateway = new ApolloGateway({ fetcher }); + await expect( + gateway.load({ engine: { apiKeyHash, graphId } }), + ).rejects.toThrowErrorMatchingInlineSnapshot( + `"Unable to authenticate with Apollo Graph Manager storage while fetching https://storage.googleapis.com/engine-partial-schema-prod/federated-service/storage-secret/dd55a79d467976346d229a7b12b673ce.json"`, + ); +}); diff --git a/packages/apollo-gateway/src/index.ts b/packages/apollo-gateway/src/index.ts index fcc911300d2..e8bba2b2952 100644 --- a/packages/apollo-gateway/src/index.ts +++ b/packages/apollo-gateway/src/index.ts @@ -162,6 +162,28 @@ type WarnedStates = { remoteWithLocalConfig?: boolean; }; +export const GCS_RETRY_COUNT = 5; + +export function getDefaultGcsFetcher() { + return fetcher.defaults({ + cacheManager: new HttpRequestCache(), + // All headers should be lower-cased here, as `make-fetch-happen` + // treats differently cased headers as unique (unlike the `Headers` object). + // @see: https://git.io/JvRUa + headers: { + 'user-agent': `apollo-gateway/${require('../package.json').version}`, + }, + retry: { + retries: GCS_RETRY_COUNT, + // 1 second + minTimeout: 1000, + // 60 seconds - but this shouldn't be reachable based on current settings + maxTimeout: 60 * 1000, + randomize: true, + }, + }); +} + export class ApolloGateway implements GraphQLService { public schema?: GraphQLSchema; protected serviceMap: DataSourceCache = Object.create(null); @@ -176,15 +198,7 @@ export class ApolloGateway implements GraphQLService { private serviceSdlCache = new Map(); private warnedStates: WarnedStates = Object.create(null); - private fetcher: typeof fetch = fetcher.defaults({ - cacheManager: new HttpRequestCache(), - // All headers should be lower-cased here, as `make-fetch-happen` - // treats differently cased headers as unique (unlike the `Headers` object). - // @see: https://git.io/JvRUa - headers: { - 'user-agent': `apollo-gateway/${require('../package.json').version}` - } - }); + private fetcher: typeof fetch = getDefaultGcsFetcher(); // Observe query plan, service info, and operation info prior to execution. // The information made available here will give insight into the resulting From 4872c87e5ff120c7994e56ecce4a8e5d86dfd4c5 Mon Sep 17 00:00:00 2001 From: Trevor Scheer Date: Tue, 10 Mar 2020 17:47:21 -0700 Subject: [PATCH 2/6] Update snapshot --- .../src/__tests__/integration/networkRequests.test.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts index 0645abe3a37..b078be88e8b 100644 --- a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts +++ b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts @@ -71,7 +71,7 @@ beforeEach(() => { retries: GCS_RETRY_COUNT, minTimeout: 10, maxTimeout: 100, - } + }, }); }); @@ -237,7 +237,8 @@ it(`Retries GCS (up to ${GCS_RETRY_COUNT} times) on failure for each request and expect(gateway.schema!.getType('User')!.description).toBe('This is my User'); }); -it(`Fails after the ${GCS_RETRY_COUNT + 1}th attempt to reach GCS`, async () => { +it(`Fails after the ${GCS_RETRY_COUNT + + 1}th attempt to reach GCS`, async () => { failNTimes(GCS_RETRY_COUNT + 1, mockStorageSecret); const gateway = new ApolloGateway({ fetcher }); @@ -260,6 +261,6 @@ it(`Errors when the secret isn't hosted on GCS`, async () => { await expect( gateway.load({ engine: { apiKeyHash, graphId } }), ).rejects.toThrowErrorMatchingInlineSnapshot( - `"Unable to authenticate with Apollo Graph Manager storage while fetching https://storage.googleapis.com/engine-partial-schema-prod/federated-service/storage-secret/dd55a79d467976346d229a7b12b673ce.json"`, + `"Unable to authenticate with Apollo Graph Manager storage while fetching https://storage.googleapis.com/engine-partial-schema-prod/federated-service/storage-secret/dd55a79d467976346d229a7b12b673ce.json. Ensure that the API key is configured properly and that a federated service has been pushed. For details, see https://go.apollo.dev/g/resolve-access-denied."`, ); }); From d972b77568a32d099bb13ca2085a08d614aac10e Mon Sep 17 00:00:00 2001 From: Trevor Scheer Date: Thu, 19 Mar 2020 11:25:42 -0700 Subject: [PATCH 3/6] Incorporate feedback from @abernix --- .../integration/networkRequests.test.ts | 3 +- packages/apollo-gateway/src/index.ts | 37 +++++++++++-------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts index b078be88e8b..d806954f74c 100644 --- a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts +++ b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts @@ -237,8 +237,7 @@ it(`Retries GCS (up to ${GCS_RETRY_COUNT} times) on failure for each request and expect(gateway.schema!.getType('User')!.description).toBe('This is my User'); }); -it(`Fails after the ${GCS_RETRY_COUNT + - 1}th attempt to reach GCS`, async () => { +it(`Fails after the ${GCS_RETRY_COUNT + 1}th attempt to reach GCS`, async () => { failNTimes(GCS_RETRY_COUNT + 1, mockStorageSecret); const gateway = new ApolloGateway({ fetcher }); diff --git a/packages/apollo-gateway/src/index.ts b/packages/apollo-gateway/src/index.ts index e8bba2b2952..57dec9ceff7 100644 --- a/packages/apollo-gateway/src/index.ts +++ b/packages/apollo-gateway/src/index.ts @@ -431,7 +431,7 @@ export class ApolloGateway implements GraphQLService { } this.onSchemaChangeListeners.add(callback); - if (!this.pollingTimer) this.startPollingServices(); + if (!this.pollingTimer) this.pollServices(); return () => { this.onSchemaChangeListeners.delete(callback); @@ -442,21 +442,28 @@ export class ApolloGateway implements GraphQLService { }; } - private startPollingServices() { - if (this.pollingTimer) clearInterval(this.pollingTimer); + private async pollServices() { + if (this.pollingTimer) clearTimeout(this.pollingTimer); - this.pollingTimer = setInterval(async () => { - try { - await this.updateComposition(); - } catch (err) { - this.logger.error(err && err.message || err); - } - }, this.experimental_pollInterval || 10000); + try { + await this.updateComposition(); + } catch (err) { + this.logger.error(err && err.message || err); + } + + // Sleep for the specified pollInterval before kicking off another round of polling + await new Promise(res => { + this.pollingTimer = setTimeout( + res, + this.experimental_pollInterval || 10000, + ); + // Prevent the Node.js event loop from remaining active (and preventing, + // e.g. process shutdown) by calling `unref` on the `Timeout`. For more + // information, see https://nodejs.org/api/timers.html#timers_timeout_unref. + this.pollingTimer?.unref(); + }); - // Prevent the Node.js event loop from remaining active (and preventing, - // e.g. process shutdown) by calling `unref` on the `Timeout`. For more - // information, see https://nodejs.org/api/timers.html#timers_timeout_unref. - this.pollingTimer.unref(); + this.pollServices(); } private createAndCacheDataSource( @@ -522,7 +529,7 @@ export class ApolloGateway implements GraphQLService { this.logger.warn( "A local gateway service list is overriding an Apollo Graph " + "Manager managed configuration. To use the managed " + - "configuration, do not specifiy a service list locally.", + "configuration, do not specify a service list locally.", ); }).catch(() => {}); // Don't mind errors if managed config is missing. } From 1285e1fd185642b5a9bd8c9ec8833d6a5739acd1 Mon Sep 17 00:00:00 2001 From: Trevor Scheer Date: Thu, 19 Mar 2020 17:37:54 -0700 Subject: [PATCH 4/6] Introduce default value with additional comments for clarity --- packages/apollo-gateway/src/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/apollo-gateway/src/index.ts b/packages/apollo-gateway/src/index.ts index 57dec9ceff7..bb8e6f62a48 100644 --- a/packages/apollo-gateway/src/index.ts +++ b/packages/apollo-gateway/src/index.ts @@ -175,10 +175,10 @@ export function getDefaultGcsFetcher() { }, retry: { retries: GCS_RETRY_COUNT, + // The default factor: expected attempts at 0, 1, 3, 7, 15, and 31 seconds elapsed + factor: 2, // 1 second minTimeout: 1000, - // 60 seconds - but this shouldn't be reachable based on current settings - maxTimeout: 60 * 1000, randomize: true, }, }); From ea10d29250be5d29a140309b50997a07f620166d Mon Sep 17 00:00:00 2001 From: Trevor Scheer Date: Thu, 19 Mar 2020 17:59:45 -0700 Subject: [PATCH 5/6] Adjust test fetcher retry parameters --- .../src/__tests__/integration/networkRequests.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts index d806954f74c..0631355f3fa 100644 --- a/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts +++ b/packages/apollo-gateway/src/__tests__/integration/networkRequests.test.ts @@ -23,8 +23,6 @@ import loadServicesFromStorage = require("../../loadServicesFromStorage"); // Anything wrapped within the gql tag within this file is just a string, not an AST. const gql = String.raw; -let fetcher: typeof fetch; - const service = { implementingServicePath: 'service-definition.json', partialSchemaPath: 'accounts-partial-schema.json', @@ -63,14 +61,16 @@ const updatedService = { `, }; +let fetcher: typeof fetch; + beforeEach(() => { if (!nock.isActive()) nock.activate(); fetcher = getDefaultGcsFetcher().defaults({ retry: { retries: GCS_RETRY_COUNT, - minTimeout: 10, - maxTimeout: 100, + minTimeout: 0, + maxTimeout: 0, }, }); }); From 3b25c9cdfc3dd1ebf1aa1f9f9975bbbdc6908965 Mon Sep 17 00:00:00 2001 From: Trevor Scheer Date: Mon, 23 Mar 2020 16:20:41 -0700 Subject: [PATCH 6/6] Update changelog --- packages/apollo-gateway/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/apollo-gateway/CHANGELOG.md b/packages/apollo-gateway/CHANGELOG.md index 23abe4e1107..9bfbc833c7c 100644 --- a/packages/apollo-gateway/CHANGELOG.md +++ b/packages/apollo-gateway/CHANGELOG.md @@ -10,6 +10,7 @@ - Support providing a custom logger implementation (e.g. [`winston`](https://npm.im/winston), [`bunyan`](https://npm.im/bunyan), etc.) to capture gateway-sourced console output. This allows the use of existing, production logging facilities or the possibiltiy to use advanced structure in logging, such as console output which is encapsulated in JSON. The same PR that introduces this support also introduces a `logger` property to the `GraphQLRequestContext` that is exposed to `GraphQLDataSource`s and Apollo Server plugins, making it possible to attach additional properties (as supported by the logger implementation) to specific requests, if desired, by leveraging custom implementations in those components respectively. When not provided, these will still output to `console`. [PR #3894](https://github.com/apollographql/apollo-server/pull/3894) - Drop use of `loglevel-debug`. This removes the very long date and time prefix in front of each log line and also the support for the `DEBUG=apollo-gateway:` environment variable. Both of these were uncommonly necessary or seldom used (with the environment variable also being undocumented). The existing behavior can be preserved by providing a `logger` that uses `loglevel-debug`, if desired, and more details can be found in the PR. [PR #3896](https://github.com/apollographql/apollo-server/pull/3896) - Fix Typescript generic typing for datasource contexts [#3865](https://github.com/apollographql/apollo-server/pull/3865) This is a fix for the `TContext` typings of the gateway's exposed `GraphQLDataSource` implementations. In their current form, they don't work as intended, or in any manner that's useful for typing the `context` property throughout the class methods. This introduces a type argument `TContext` to the class itself (which defaults to `Record` for existing implementations) and removes the non-operational type arguments on the class methods themselves. +- Implement retry logic for requests to GCS [PR #3836](https://github.com/apollographql/apollo-server/pull/3836) Note: coupled with this change is a small alteration in how the gateway polls GCS for updates in managed mode. Previously, the tick was on a specific interval. Now, every tick starts after the round of fetches to GCS completes. For more details, see the linked PR. ## 0.13.2