Skip to content

Commit

Permalink
Query planner cache key improvements (#6206)
Browse files Browse the repository at this point in the history
Fix #5160

This splits part of the work from #5255 to make it easier to merge.
This covers improvements and fixes to the query planner cache key from changes related to the query hashing algorithm and query plan reuse during warmup.

Fixed:
* use prefixes for each part of the redis cache key so they become self describing
* remove the custom Hash implementation for the cache key
* remove JSON serialization
* hash the Rust planner's config only once, not on every cache query

Co-authored-by: Ivan Goncharov <ivan.goncharov.ua@gmail.com>
Co-authored-by: Jeremy Lempereur <jeremy.lempereur@iomentum.com>
Co-authored-by: Gary Pennington <gary@apollographql.com>
Co-authored-by: Jesse Rosenberger <git@jro.cc>
Co-authored-by: Renée <renee.kooi@apollographql.com>
  • Loading branch information
6 people authored Nov 5, 2024
1 parent a8ba726 commit 36bdb5e
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 41 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### Query planner cache key improvements ([Issue #5160](https://github.com/apollographql/router/issues/5160))

> [!IMPORTANT]
> If you have enabled [Distributed query plan caching](https://www.apollographql.com/docs/router/configuration/distributed-caching/#distributed-query-plan-caching), this release changes the hashing algorithm used for the cache keys. On account of this, you should anticipate additional cache regeneration cost when updating between these versions while the new hashing algorithm comes into service.
This brings several performance improvements to the query plan cache key generation. In particular, it changes the distributed cache's key format, adding prefixes to the different key segments, to help in debugging.

By [@Geal](https://github.com/Geal) in https://github.com/apollographql/router/pull/6206
16 changes: 16 additions & 0 deletions apollo-router/src/configuration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,22 @@ impl Configuration {
type_conditioned_fetching: self.experimental_type_conditioned_fetching,
}
}

pub(crate) fn rust_query_planner_config(
&self,
) -> apollo_federation::query_plan::query_planner::QueryPlannerConfig {
apollo_federation::query_plan::query_planner::QueryPlannerConfig {
reuse_query_fragments: self.supergraph.reuse_query_fragments.unwrap_or(true),
subgraph_graphql_validation: false,
generate_query_fragments: self.supergraph.generate_query_fragments,
incremental_delivery:
apollo_federation::query_plan::query_planner::QueryPlanIncrementalDeliveryConfig {
enable_defer: self.supergraph.defer_support,
},
type_conditioned_fetching: self.experimental_type_conditioned_fetching,
debug: Default::default(),
}
}
}

impl Default for Configuration {
Expand Down
99 changes: 64 additions & 35 deletions apollo-router/src/query_planner/caching_query_planner.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::collections::HashMap;
use std::hash::Hash;
use std::hash::Hasher;
use std::ops::Deref;
use std::sync::Arc;
use std::task;
Expand All @@ -14,7 +15,6 @@ use router_bridge::planner::PlanOptions;
use router_bridge::planner::Planner;
use router_bridge::planner::QueryPlannerConfig;
use router_bridge::planner::UsageReporting;
use serde::Serialize;
use sha2::Digest;
use sha2::Sha256;
use tower::BoxError;
Expand Down Expand Up @@ -57,11 +57,11 @@ pub(crate) type InMemoryCachePlanner =
InMemoryCache<CachingQueryKey, Result<QueryPlannerContent, Arc<QueryPlannerError>>>;
pub(crate) const APOLLO_OPERATION_ID: &str = "apollo_operation_id";

#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize)]
#[derive(Debug, Clone, Hash)]
pub(crate) enum ConfigMode {
//FIXME: add the Rust planner structure once it is hashable and serializable,
// for now use the JS config as it expected to be identical to the Rust one
Rust(Arc<QueryPlannerConfig>),
Rust(Arc<apollo_federation::query_plan::query_planner::QueryPlannerConfig>),
Both(Arc<QueryPlannerConfig>),
BothBestEffort(Arc<QueryPlannerConfig>),
Js(Arc<QueryPlannerConfig>),
Expand All @@ -80,7 +80,7 @@ pub(crate) struct CachingQueryPlanner<T: Clone> {
subgraph_schemas: Arc<HashMap<String, Arc<Valid<apollo_compiler::Schema>>>>,
plugins: Arc<Plugins>,
enable_authorization_directives: bool,
config_mode: ConfigMode,
config_mode_hash: Arc<QueryHash>,
}

fn init_query_plan_from_redis(
Expand Down Expand Up @@ -125,28 +125,42 @@ where
let enable_authorization_directives =
AuthorizationPlugin::enable_directives(configuration, &schema).unwrap_or(false);

let config_mode = match configuration.experimental_query_planner_mode {
let mut hasher = StructHasher::new();
match configuration.experimental_query_planner_mode {
crate::configuration::QueryPlannerMode::New => {
ConfigMode::Rust(Arc::new(configuration.js_query_planner_config()))
"PLANNER-NEW".hash(&mut hasher);
ConfigMode::Rust(Arc::new(configuration.rust_query_planner_config()))
.hash(&mut hasher);
}
crate::configuration::QueryPlannerMode::Legacy => {
ConfigMode::Js(Arc::new(configuration.js_query_planner_config()))
"PLANNER-LEGACY".hash(&mut hasher);
ConfigMode::Js(Arc::new(configuration.js_query_planner_config())).hash(&mut hasher);
}
crate::configuration::QueryPlannerMode::Both => {
"PLANNER-BOTH".hash(&mut hasher);
ConfigMode::Both(Arc::new(configuration.js_query_planner_config()))
.hash(&mut hasher);
ConfigMode::Rust(Arc::new(configuration.rust_query_planner_config()))
.hash(&mut hasher);
}
crate::configuration::QueryPlannerMode::BothBestEffort => {
"PLANNER-BOTH-BEST-EFFORT".hash(&mut hasher);
ConfigMode::BothBestEffort(Arc::new(configuration.js_query_planner_config()))
.hash(&mut hasher);
ConfigMode::Rust(Arc::new(configuration.rust_query_planner_config()))
.hash(&mut hasher);
}
};
let config_mode_hash = Arc::new(QueryHash(hasher.finalize()));

Ok(Self {
cache,
delegate,
schema,
subgraph_schemas,
plugins: Arc::new(plugins),
enable_authorization_directives,
config_mode,
config_mode_hash,
})
}

Expand Down Expand Up @@ -204,7 +218,7 @@ where
hash: Some(hash.clone()),
metadata: metadata.clone(),
plan_options: plan_options.clone(),
config_mode: self.config_mode.clone(),
config_mode: self.config_mode_hash.clone(),
},
)
.take(count)
Expand Down Expand Up @@ -249,7 +263,7 @@ where
hash: None,
metadata: CacheKeyMetadata::default(),
plan_options: PlanOptions::default(),
config_mode: self.config_mode.clone(),
config_mode: self.config_mode_hash.clone(),
});
}
}
Expand Down Expand Up @@ -284,7 +298,7 @@ where
schema_id: Arc::clone(&self.schema.schema_id),
metadata,
plan_options,
config_mode: self.config_mode.clone(),
config_mode: self.config_mode_hash.clone(),
};

if experimental_reuse_query_plans {
Expand Down Expand Up @@ -490,7 +504,7 @@ where
schema_id: Arc::clone(&self.schema.schema_id),
metadata,
plan_options,
config_mode: self.config_mode.clone(),
config_mode: self.config_mode_hash.clone(),
};

let context = request.context.clone();
Expand Down Expand Up @@ -632,20 +646,20 @@ fn stats_report_key_hash(stats_report_key: &str) -> String {
hex::encode(result)
}

#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct CachingQueryKey {
pub(crate) query: String,
pub(crate) schema_id: Arc<String>,
pub(crate) operation: Option<String>,
pub(crate) hash: Arc<QueryHash>,
pub(crate) metadata: CacheKeyMetadata,
pub(crate) plan_options: PlanOptions,
pub(crate) config_mode: ConfigMode,
pub(crate) config_mode: Arc<QueryHash>,
}

// Update this key every time the cache key or the query plan format has to change.
// When changed it MUST BE CALLED OUT PROMINENTLY IN THE CHANGELOG.
const CACHE_KEY_VERSION: usize = 0;
const CACHE_KEY_VERSION: usize = 1;
const FEDERATION_VERSION: &str = std::env!("FEDERATION_VERSION");

impl std::fmt::Display for CachingQueryKey {
Expand All @@ -654,42 +668,57 @@ impl std::fmt::Display for CachingQueryKey {
hasher.update(self.operation.as_deref().unwrap_or("-"));
let operation = hex::encode(hasher.finalize());

let mut hasher = Sha256::new();
hasher.update(serde_json::to_vec(&self.metadata).expect("serialization should not fail"));
hasher
.update(serde_json::to_vec(&self.plan_options).expect("serialization should not fail"));
hasher
.update(serde_json::to_vec(&self.config_mode).expect("serialization should not fail"));
hasher.update(&*self.schema_id);
let mut hasher = StructHasher::new();
"^metadata".hash(&mut hasher);
self.metadata.hash(&mut hasher);
"^plan_options".hash(&mut hasher);
self.plan_options.hash(&mut hasher);
"^config_mode".hash(&mut hasher);
self.config_mode.hash(&mut hasher);
let metadata = hex::encode(hasher.finalize());

write!(
f,
"plan:{}:{}:{}:{}:{}",
"plan:cache:{}:federation:{}:{}:opname:{}:metadata:{}",
CACHE_KEY_VERSION, FEDERATION_VERSION, self.hash, operation, metadata,
)
}
}

impl Hash for CachingQueryKey {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.schema_id.hash(state);
self.hash.0.hash(state);
self.operation.hash(state);
self.metadata.hash(state);
self.plan_options.hash(state);
self.config_mode.hash(state);
}
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub(crate) struct WarmUpCachingQueryKey {
pub(crate) query: String,
pub(crate) operation_name: Option<String>,
pub(crate) hash: Option<Arc<QueryHash>>,
pub(crate) metadata: CacheKeyMetadata,
pub(crate) plan_options: PlanOptions,
pub(crate) config_mode: ConfigMode,
pub(crate) config_mode: Arc<QueryHash>,
}

struct StructHasher {
hasher: Sha256,
}

impl StructHasher {
fn new() -> Self {
Self {
hasher: Sha256::new(),
}
}
fn finalize(self) -> Vec<u8> {
self.hasher.finalize().as_slice().into()
}
}

impl Hasher for StructHasher {
fn finish(&self) -> u64 {
unreachable!()
}

fn write(&mut self, bytes: &[u8]) {
self.hasher.update(&[0xFF][..]);
self.hasher.update(bytes);
}
}

impl ValueType for Result<QueryPlannerContent, Arc<QueryPlannerError>> {
Expand Down
12 changes: 6 additions & 6 deletions apollo-router/tests/integration/redis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async fn query_planner_cache() -> Result<(), BoxError> {
}
// If this test fails and the cache key format changed you'll need to update the key here.
// Look at the top of the file for instructions on getting the new cache key.
let known_cache_key = "plan:0:v2.9.3:70f115ebba5991355c17f4f56ba25bb093c519c4db49a30f3b10de279a4e3fa4:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:68e167191994b73c1892549ef57d0ec4cd76d518fad4dac5350846fe9af0b3f1";
let known_cache_key = "plan:cache:1:federation:v2.9.3:70f115ebba5991355c17f4f56ba25bb093c519c4db49a30f3b10de279a4e3fa4:opname:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:metadata:0ade8e18db172d9d51b36a2112513c15032d103100644df418a50596de3adfba";

let config = RedisConfig::from_url("redis://127.0.0.1:6379").unwrap();
let client = RedisClient::new(config, None, None, None);
Expand Down Expand Up @@ -963,7 +963,7 @@ async fn connection_failure_blocks_startup() {
async fn query_planner_redis_update_query_fragments() {
test_redis_query_plan_config_update(
include_str!("fixtures/query_planner_redis_config_update_query_fragments.router.yaml"),
"plan:0:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:d239cf1d493e71f4bcb05e727c38e4cf55b32eb806791fa415bb6f6c8e5352e5",
"plan:cache:1:federation:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:opname:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:metadata:1cfc840090ac76a98f8bd51442f41fd6ca4c8d918b3f8d87894170745acf0734",
)
.await;
}
Expand Down Expand Up @@ -993,7 +993,7 @@ async fn query_planner_redis_update_defer() {
// test just passes locally.
test_redis_query_plan_config_update(
include_str!("fixtures/query_planner_redis_config_update_defer.router.yaml"),
"plan:0:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:752b870a0241594f54b7b593f16ab6cf6529eb5c9fe3d24e6bc4a618c24a5b81",
"plan:cache:1:federation:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:opname:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:metadata:2f7fb939d2a8fc978e5a4e9d17998074fc30366dcc673236237a885819084fc0",
)
.await;
}
Expand All @@ -1015,7 +1015,7 @@ async fn query_planner_redis_update_type_conditional_fetching() {
include_str!(
"fixtures/query_planner_redis_config_update_type_conditional_fetching.router.yaml"
),
"plan:0:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:e2145b320a44bebbd687c714dcfd046c032e56fe394aedcf50d9ab539f4354ea",
"plan:cache:1:federation:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:opname:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:metadata:0fd0a376f59f0565768ea5ad8eadfbbf60d64c593c807457a0776d2f39773a25",
)
.await;
}
Expand All @@ -1037,7 +1037,7 @@ async fn query_planner_redis_update_reuse_query_fragments() {
include_str!(
"fixtures/query_planner_redis_config_update_reuse_query_fragments.router.yaml"
),
"plan:0:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:8b6c1838a55cbc6327adb5507f103eed1d5b1071e9acb9c67e098c5b9ea2887e",
"plan:cache:1:federation:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:opname:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:metadata:3f30f0e2d149d00c9370c8046e4dd5f23d6ceb6f05a6cf06d5eb021510564248",
)
.await;
}
Expand All @@ -1062,7 +1062,7 @@ async fn test_redis_query_plan_config_update(updated_config: &str, new_cache_key
router.clear_redis_cache().await;

// If the tests above are failing, this is the key that needs to be changed first.
let starting_key = "plan:0:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:41ae54204ebb1911412cf23e8f1d458cb08d6fabce16f255f7a497fd2b6fe213";
let starting_key = "plan:cache:1:federation:v2.9.3:e15b4f5cd51b8cc728e3f5171611073455601e81196cd3cbafc5610d9769a370:opname:3973e022e93220f9212c18d0d0c543ae7c309e46640da93a4a0314de999f5112:metadata:0ade8e18db172d9d51b36a2112513c15032d103100644df418a50596de3adfba";
assert_ne!(starting_key, new_cache_key, "starting_key (cache key for the initial config) and new_cache_key (cache key with the updated config) should not be equal. This either means that the cache key is not being generated correctly, or that the test is not actually checking the updated key.");

router.execute_default_query().await;
Expand Down

0 comments on commit 36bdb5e

Please sign in to comment.