Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Snapshot schema 72 #13873

Merged
merged 10 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13873.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Create a new snapshot of the database schema.
60 changes: 46 additions & 14 deletions scripts-dev/make_full_schema.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ usage() {
echo " Defaults to 9999."
echo "-h"
echo " Display this help text."
echo ""
echo " NB: make sure to run this against the *oldest* supported version of postgres,"
echo " or else pg_dump might output non-backwards-compatible syntax."
}

SCHEMA_NUMBER="9999"
Expand Down Expand Up @@ -240,25 +243,54 @@ DROP TABLE user_directory_search_stat;

echo "Dumping SQLite3 schema..."

mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER"
sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"

cleanup_pg_schema() {
sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
# Cleanup as follows:
# - Remove empty lines. pg_dump likes to output a lot of these.
# - Remove comment-only lines. pg_dump also likes to output a lot of these to visually
# separate tables etc.
# - Remove "public." prefix --- the schema name.
# - Remove "SET" commands. Last time I ran this, the output commands were
# SET statement_timeout = 0;
# SET lock_timeout = 0;
# SET idle_in_transaction_session_timeout = 0;
# SET client_encoding = 'UTF8';
# SET standard_conforming_strings = on;
# SET check_function_bodies = false;
# SET xmloption = content;
# SET client_min_messages = warning;
# SET row_security = off;
# SET default_table_access_method = heap;
# - Very carefully remove specific SELECT statements. We CANNOT blanket remove all
# SELECT statements because some of those have side-effects which we do want in the
# schema. Last time I ran this, the only SELECTS were
# SELECT pg_catalog.set_config('search_path', '', false);
# and
# SELECT pg_catalog.setval(text, bigint, bool);
# We do want to remove the former, but the latter is important. If the last argument
# is `true` or omitted, this marks the given integer as having been consumed and
# will NOT appear as the nextval.
sed -e '/^$/d' \
-e '/^--/d' \
-e 's/public\.//g' \
-e '/^SET /d' \
-e '/^SELECT pg_catalog.set_config/d'
}

echo "Dumping Postgres schema..."

pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"

echo "Done! Files dumped to: $OUTPUT_DIR"
8 changes: 8 additions & 0 deletions synapse/storage/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,14 @@ def execute(self, sql: str, *args: Any) -> None:
def executemany(self, sql: str, *args: Any) -> None:
self._do_execute(self.txn.executemany, sql, *args)

def executescript(self, sql: str) -> None:
if isinstance(self.database_engine, Sqlite3Engine):
self._do_execute(self.txn.executescript, sql) # type: ignore[attr-defined]
else:
raise NotImplementedError(
f"executescript only exists for sqlite driver, not {type(self.database_engine)}"
)

def _make_sql_one_line(self, sql: str) -> str:
"Strip newlines out of SQL so that the loggers in the DB are on one line"
return " ".join(line.strip() for line in sql.splitlines() if line.strip())
Expand Down
23 changes: 21 additions & 2 deletions synapse/storage/engines/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ class IncorrectDatabaseSetup(RuntimeError):


ConnectionType = TypeVar("ConnectionType", bound=Connection)
CursorType = TypeVar("CursorType", bound=Cursor)


class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCMeta):
def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]):
self.module = module

Expand Down Expand Up @@ -64,7 +65,7 @@ def check_database(
...

@abc.abstractmethod
def check_new_database(self, txn: Cursor) -> None:
def check_new_database(self, txn: CursorType) -> None:
"""Gets called when setting up a brand new database. This allows us to
apply stricter checks on new databases versus existing database.
"""
Expand Down Expand Up @@ -124,3 +125,21 @@ def attempt_to_set_isolation_level(
Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default.
"""
...

@staticmethod
@abc.abstractmethod
def executescript(cursor: CursorType, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.

This is not provided by DBAPI2, and so needs engine-specific support.
"""
...

@classmethod
def execute_script_file(cls, cursor: CursorType, filepath: str) -> None:
"""Execute a file containing multiple semicolon-delimited SQL statements.

This is not provided by DBAPI2, and so needs engine-specific support.
"""
with open(filepath, "rt") as f:
cls.executescript(cursor, f.read())
DMRobertson marked this conversation as resolved.
Show resolved Hide resolved
12 changes: 11 additions & 1 deletion synapse/storage/engines/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
logger = logging.getLogger(__name__)


class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
class PostgresEngine(
BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor]
):
def __init__(self, database_config: Mapping[str, Any]):
super().__init__(psycopg2, database_config)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
Expand Down Expand Up @@ -212,3 +214,11 @@ def attempt_to_set_isolation_level(
else:
isolation_level = self.isolation_level_map[isolation_level]
return conn.set_isolation_level(isolation_level)

@staticmethod
def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.

Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
"""
cursor.execute(script)
21 changes: 20 additions & 1 deletion synapse/storage/engines/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from synapse.storage.database import LoggingDatabaseConnection


class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
def __init__(self, database_config: Mapping[str, Any]):
super().__init__(sqlite3, database_config)

Expand Down Expand Up @@ -120,6 +120,25 @@ def attempt_to_set_isolation_level(
# All transactions are SERIALIZABLE by default in sqlite
pass

@staticmethod
def executescript(cursor: sqlite3.Cursor, script: str) -> None:
Copy link
Contributor Author

@DMRobertson DMRobertson Sep 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be a lie, because the cursor is actually a LoggingTransaction that wraps a sqlite3.Cursor.

I restrained myself from making further type annotations here.

"""Execute a chunk of SQL containing multiple semicolon-delimited statements.

Python's built-in SQLite driver does not allow you to do this with DBAPI2's
`execute`:

> execute() will only execute a single SQL statement. If you try to execute more
> than one statement with it, it will raise a Warning. Use executescript() if
> you want to execute multiple SQL statements with one call.

Though the docs for `executescript` warn:

> If there is a pending transaction, an implicit COMMIT statement is executed
> first. No other implicit transaction control is performed; any transaction
> control must be added to sql_script.
"""
cursor.executescript(script)


# Following functions taken from: https://github.com/coleifer/peewee

Expand Down
8 changes: 4 additions & 4 deletions synapse/storage/prepare_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def _setup_new_database(
".sql." + specific
):
logger.debug("Applying schema %s", entry.absolute_path)
executescript(cur, entry.absolute_path)
database_engine.execute_script_file(cur, entry.absolute_path)

cur.execute(
"INSERT INTO schema_version (version, upgraded) VALUES (?,?)",
Expand Down Expand Up @@ -517,15 +517,15 @@ def _upgrade_existing_database(
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
)
logger.info("Applying schema %s", relative_path)
executescript(cur, absolute_path)
database_engine.execute_script_file(cur, absolute_path)
elif ext == specific_engine_extension and root_name.endswith(".sql"):
# A .sql file specific to our engine; just read and execute it
if is_worker:
raise PrepareDatabaseException(
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
)
logger.info("Applying engine-specific schema %s", relative_path)
executescript(cur, absolute_path)
database_engine.execute_script_file(cur, absolute_path)
elif ext in specific_engine_extensions and root_name.endswith(".sql"):
# A .sql file for a different engine; skip it.
continue
Expand Down Expand Up @@ -666,7 +666,7 @@ def _get_or_create_schema_state(
) -> Optional[_SchemaState]:
# Bluntly try creating the schema_version tables.
sql_path = os.path.join(schema_path, "common", "schema_version.sql")
executescript(txn, sql_path)
database_engine.execute_script_file(txn, sql_path)

txn.execute("SELECT version, upgraded FROM schema_version")
row = txn.fetchone()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE background_updates (
update_name text NOT NULL,
progress_json text NOT NULL,
depends_on text,
ordering integer DEFAULT 0 NOT NULL
);
ALTER TABLE ONLY background_updates
ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name);
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE TABLE background_updates (
update_name text NOT NULL,
progress_json text NOT NULL,
depends_on text, ordering INT NOT NULL DEFAULT 0,
CONSTRAINT background_updates_uniqueness UNIQUE (update_name)
);
Loading