graphprotocol · lutter · Nov 23, 2022 · Oct 21, 2022 · Oct 21, 2022 · Oct 22, 2022
diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs
@@ -483,8 +483,47 @@ pub enum StatsCommand {
     Analyze {
         /// The deployment (see `help info`).
         deployment: DeploymentSearch,
-        /// The name of the Entity to ANALYZE, in camel case
-        entity: String,
+        /// The name of the Entity to ANALYZE, in camel case. Analyze all
+        /// tables if omitted
+        entity: Option<String>,
+    },
+    /// Show statistics targets for the statistics collector
+    ///
+    /// For all tables in the given deployment, show the target for each
+    /// column. A value of `-1` means that the global default is used
+    Target {
+        /// The deployment (see `help info`).
+        deployment: DeploymentSearch,
+    },
+    /// Set the statistics targets for the statistics collector
+    ///
+    /// Set (or reset) the target for a deployment. The statistics target
+    /// determines how much of a table Postgres will sample when it analyzes
+    /// a table. This can be particularly beneficial when Postgres chooses
+    /// suboptimal query plans for some queries. Increasing the target will
+    /// make analyzing tables take longer and will require more space in
+    /// Postgres' internal statistics storage.
+    ///
+    /// If no `columns` are provided, change the statistics target for the
+    /// `id` and `block_range` columns which will usually be enough to
+    /// improve query performance, but it might be necessary to increase the
+    /// target for other columns, too.
+    SetTarget {
+        /// The value of the statistics target
+        #[clap(short, long, default_value = "200", conflicts_with = "reset")]
+        target: u32,
+        /// Reset the target so the default is used
+        #[clap(long, conflicts_with = "target")]
+        reset: bool,
+        /// Do not analyze changed tables
+        #[clap(long)]
+        no_analyze: bool,
+        /// The deployment (see `help info`).
+        deployment: DeploymentSearch,
+        /// The table for which to set the target, all if omitted
+        entity: Option<String>,
+        /// The columns to which to apply the target. Defaults to `id, block_range`
+        columns: Vec<String>,
     },
 }
 
@@ -1143,7 +1182,38 @@ async fn main() -> anyhow::Result<()> {
                 Analyze { deployment, entity } => {
                     let (store, primary_pool) = ctx.store_and_primary();
                     let subgraph_store = store.subgraph_store();
-                    commands::stats::analyze(subgraph_store, primary_pool, deployment, &entity)
+                    commands::stats::analyze(
+                        subgraph_store,
+                        primary_pool,
+                        deployment,
+                        entity.as_deref(),
+                    )
+                }
+                Target { deployment } => {
+                    let (store, primary_pool) = ctx.store_and_primary();
+                    let subgraph_store = store.subgraph_store();
+                    commands::stats::target(subgraph_store, primary_pool, &deployment)
+                }
+                SetTarget {
+                    target,
+                    reset,
+                    no_analyze,
+                    deployment,
+                    entity,
+                    columns,
+                } => {
+                    let (store, primary) = ctx.store_and_primary();
+                    let store = store.subgraph_store();
+                    let target = if reset { -1 } else { target as i32 };
+                    commands::stats::set_target(
+                        store,
+                        primary,
+                        &deployment,
+                        entity.as_deref(),
+                        columns,
+                        target,
+                        no_analyze,
+                    )
                 }
             }
         }

diff --git a/node/src/manager/commands/stats.rs b/node/src/manager/commands/stats.rs
@@ -6,6 +6,7 @@ use crate::manager::deployment::DeploymentSearch;
 use diesel::r2d2::ConnectionManager;
 use diesel::r2d2::PooledConnection;
 use diesel::PgConnection;
+use graph::components::store::DeploymentLocator;
 use graph::components::store::VersionStats;
 use graph::prelude::anyhow;
 use graph_store_postgres::command_support::catalog as store_catalog;
@@ -118,9 +119,83 @@ pub fn analyze(
     store: Arc<SubgraphStore>,
     pool: ConnectionPool,
     search: DeploymentSearch,
-    entity_name: &str,
+    entity_name: Option<&str>,
 ) -> Result<(), anyhow::Error> {
     let locator = search.locate_unique(&pool)?;
-    println!("Analyzing table sgd{}.{entity_name}", locator.id);
+    analyze_loc(store, &locator, entity_name)
+}
+
+fn analyze_loc(
+    store: Arc<SubgraphStore>,
+    locator: &DeploymentLocator,
+    entity_name: Option<&str>,
+) -> Result<(), anyhow::Error> {
+    match entity_name {
+        Some(entity_name) => println!("Analyzing table sgd{}.{entity_name}", locator.id),
+        None => println!("Analyzing all tables for sgd{}", locator.id),
+    }
     store.analyze(&locator, entity_name).map_err(|e| anyhow!(e))
 }
+
+pub fn target(
+    store: Arc<SubgraphStore>,
+    primary: ConnectionPool,
+    search: &DeploymentSearch,
+) -> Result<(), anyhow::Error> {
+    let locator = search.locate_unique(&primary)?;
+    let (default, targets) = store.stats_targets(&locator)?;
+
+    let has_targets = targets
+        .values()
+        .any(|cols| cols.values().any(|target| *target > 0));
+
+    if has_targets {
+        println!(
+            "{:^74}",
+            format!(
+                "Statistics targets for sgd{} (default: {default})",
+                locator.id
+            )
+        );
+        println!("{:^30} | {:^30} | {:^8}", "table", "column", "target");
+        println!("{:-^30}-+-{:-^30}-+-{:-^8}", "", "", "");
+        for (table, columns) in targets {
+            for (column, target) in columns {
+                if target > 0 {
+                    println!("{:<30} | {:<30} | {:>8}", table, column, target);
+                }
+            }
+        }
+    } else {
+        println!(
+            "no statistics targets set for sgd{}, global default is {default}",
+            locator.id
+        );
+    }
+    Ok(())
+}
+
+pub fn set_target(
+    store: Arc<SubgraphStore>,
+    primary: ConnectionPool,
+    search: &DeploymentSearch,
+    entity: Option<&str>,
+    columns: Vec<String>,
+    target: i32,
+    no_analyze: bool,
+) -> Result<(), anyhow::Error> {
+    let columns = if columns.is_empty() {
+        vec!["id".to_string(), "block_range".to_string()]
+    } else {
+        columns
+    };
+
+    let locator = search.locate_unique(&primary)?;
+
+    store.set_stats_target(&locator, entity, columns, target)?;
+
+    if !no_analyze {
+        analyze_loc(store, &locator, entity)?;
+    }
+    Ok(())
+}
diff --git a/store/postgres/src/block_range.rs b/store/postgres/src/block_range.rs
@@ -7,9 +7,9 @@ use diesel::sql_types::{Integer, Range};
 use std::io::Write;
 use std::ops::{Bound, RangeBounds, RangeFrom};
 
-use graph::prelude::{BlockNumber, BlockPtr, BLOCK_NUMBER_MAX};
+use graph::prelude::{lazy_static, BlockNumber, BlockPtr, BLOCK_NUMBER_MAX};
 
-use crate::relational::Table;
+use crate::relational::{SqlName, Table};
 
 /// The name of the column in which we store the block range for mutable
 /// entities
@@ -39,6 +39,12 @@ pub(crate) const UNVERSIONED_RANGE: (Bound<i32>, Bound<i32>) =
 /// immutable entity is visible
 pub(crate) const BLOCK_COLUMN: &str = "block$";
 
+lazy_static! {
+    pub(crate) static ref BLOCK_RANGE_COLUMN_SQL: SqlName =
+        SqlName::verbatim(BLOCK_RANGE_COLUMN.to_string());
+    pub(crate) static ref BLOCK_COLUMN_SQL: SqlName = SqlName::verbatim(BLOCK_COLUMN.to_string());
+}
+
 /// The range of blocks for which an entity is valid. We need this struct
 /// to bind ranges into Diesel queries.
 #[derive(Clone, Debug)]

diff --git a/store/postgres/src/catalog.rs b/store/postgres/src/catalog.rs
@@ -7,7 +7,8 @@ use diesel::{
     ExpressionMethods, QueryDsl,
 };
 use graph::components::store::VersionStats;
-use std::collections::{HashMap, HashSet};
+use itertools::Itertools;
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::fmt::Write;
 use std::iter::FromIterator;
 use std::sync::Arc;
@@ -36,13 +37,54 @@ table! {
     }
 }
 
-// Readonly; we only access the name
+// Readonly;  not all columns are mapped
 table! {
-    pg_namespace(nspname) {
-        nspname -> Text,
+    pg_namespace(oid) {
+        oid -> Oid,
+        #[sql_name = "nspname"]
+        name -> Text,
     }
 }
 
+// Readonly; not all columns are mapped
+table! {
+    pg_class(oid) {
+        oid -> Oid,
+        #[sql_name = "relname"]
+        name -> Text,
+        #[sql_name = "relnamespace"]
+        namespace -> Oid,
+        #[sql_name = "relpages"]
+        pages -> Integer,
+        #[sql_name = "reltuples"]
+        tuples -> Integer,
+        #[sql_name = "relkind"]
+        kind -> Char,
+        #[sql_name = "relnatts"]
+        natts -> Smallint,
+    }
+}
+
+// Readonly; not all columns are mapped
+table! {
+    pg_attribute(oid) {
+        #[sql_name = "attrelid"]
+        oid -> Oid,
+        #[sql_name = "attrelid"]
+        relid -> Oid,
+        #[sql_name = "attname"]
+        name -> Text,
+        #[sql_name = "attnum"]
+        num -> Smallint,
+        #[sql_name = "attstattarget"]
+        stats_target -> Integer,
+    }
+}
+
+joinable!(pg_class -> pg_namespace(namespace));
+joinable!(pg_attribute -> pg_class(relid));
+allow_tables_to_appear_in_same_query!(pg_class, pg_namespace, pg_attribute);
+
 table! {
     subgraphs.table_stats {
         id -> Integer,
@@ -245,7 +287,7 @@ pub fn has_namespace(conn: &PgConnection, namespace: &Namespace) -> Result<bool,
     use pg_namespace as nsp;
 
     Ok(select(diesel::dsl::exists(
-        nsp::table.filter(nsp::nspname.eq(namespace.as_str())),
+        nsp::table.filter(nsp::name.eq(namespace.as_str())),
     ))
     .get_result::<bool>(conn)?)
 }
@@ -642,3 +684,61 @@ pub(crate) fn cancel_vacuum(conn: &PgConnection, namespace: &Namespace) -> Resul
     .execute(conn)?;
     Ok(())
 }
+
+pub(crate) fn default_stats_target(conn: &PgConnection) -> Result<i32, StoreError> {
+    #[derive(Queryable, QueryableByName)]
+    struct Target {
+        #[sql_type = "Integer"]
+        setting: i32,
+    }
+
+    let target =
+        sql_query("select setting::int from pg_settings where name = 'default_statistics_target'")
+            .get_result::<Target>(conn)?;
+    Ok(target.setting)
+}
+
+pub(crate) fn stats_targets(
+    conn: &PgConnection,
+    namespace: &Namespace,
+) -> Result<BTreeMap<SqlName, BTreeMap<SqlName, i32>>, StoreError> {
+    use pg_attribute as a;
+    use pg_class as c;
+    use pg_namespace as n;
+
+    let targets = c::table
+        .inner_join(n::table)
+        .inner_join(a::table)
+        .filter(c::kind.eq("r"))
+        .filter(n::name.eq(namespace.as_str()))
+        .filter(a::num.ge(1))
+        .select((c::name, a::name, a::stats_target))
+        .load::<(String, String, i32)>(conn)?
+        .into_iter()
+        .map(|(table, column, target)| (SqlName::from(table), SqlName::from(column), target));
+
+    let map = targets.into_iter().fold(
+        BTreeMap::<SqlName, BTreeMap<SqlName, i32>>::new(),
+        |mut map, (table, column, target)| {
+            map.entry(table).or_default().insert(column, target);
+            map
+        },
+    );
+    Ok(map)
+}
+
+pub(crate) fn set_stats_target(
+    conn: &PgConnection,
+    namespace: &Namespace,
+    table: &SqlName,
+    columns: &[&SqlName],
+    target: i32,
+) -> Result<(), StoreError> {
+    let columns = columns
+        .iter()
+        .map(|column| format!("alter column {} set statistics {}", column.quoted(), target))
+        .join(", ");
+    let query = format!("alter table {}.{} {}", namespace, table.quoted(), columns);
+    conn.batch_execute(&query)?;
+    Ok(())
+}