From da6297ad47e3230176b42d4c872375a6b9fa173f Mon Sep 17 00:00:00 2001 From: John Hopper Date: Tue, 3 Dec 2024 13:17:29 -0800 Subject: [PATCH] BED-5092 - Merge stage/v6.2.3 (#996) * fix: BED-5080 - move improper int4 casts to int8 (#989) * fix: BED-5080 - remove additional integer types from SQL schema (#992) --- cmd/api/src/api/tools/pg.go | 2 + .../dawgs/drivers/pg/pg_integration_test.go | 52 ++++++++++++------- packages/go/dawgs/drivers/pg/query/format.go | 4 +- .../dawgs/drivers/pg/query/sql/schema_up.sql | 8 +-- packages/go/dawgs/drivers/pg/statements.go | 4 +- 5 files changed, 43 insertions(+), 27 deletions(-) diff --git a/cmd/api/src/api/tools/pg.go b/cmd/api/src/api/tools/pg.go index b9efd2365d..718c42e198 100644 --- a/cmd/api/src/api/tools/pg.go +++ b/cmd/api/src/api/tools/pg.go @@ -236,6 +236,8 @@ func (s *PGMigrator) SwitchPostgreSQL(response http.ResponseWriter, request *htt api.WriteJSONResponse(request.Context(), map[string]any{ "error": fmt.Errorf("failed connecting to PostgreSQL: %w", err), }, http.StatusInternalServerError, response) + } else if err := pgDB.AssertSchema(request.Context(), s.graphSchema); err != nil { + log.Errorf("Unable to assert graph schema in PostgreSQL: %v", err) } else if err := SetGraphDriver(request.Context(), s.cfg, pg.DriverName); err != nil { api.WriteJSONResponse(request.Context(), map[string]any{ "error": fmt.Errorf("failed updating graph database driver preferences: %w", err), diff --git a/packages/go/dawgs/drivers/pg/pg_integration_test.go b/packages/go/dawgs/drivers/pg/pg_integration_test.go index bde5d11196..657789cb25 100644 --- a/packages/go/dawgs/drivers/pg/pg_integration_test.go +++ b/packages/go/dawgs/drivers/pg/pg_integration_test.go @@ -23,6 +23,8 @@ import ( "context" "testing" + "github.com/specterops/bloodhound/src/test/integration" + "github.com/specterops/bloodhound/dawgs" "github.com/specterops/bloodhound/dawgs/drivers/pg" "github.com/specterops/bloodhound/dawgs/graph" @@ -33,25 +35,10 @@ import ( "github.com/stretchr/testify/require" ) -const murderDB = ` -do -$$ -declare - t text; -begin - for t in select relname from pg_class where oid in (select partrelid from pg_partitioned_table) - loop - execute 'drop table ' || t || ' cascade'; - end loop; - - for t in select table_name from information_schema.tables where table_schema = current_schema() and not table_name ilike '%pg_stat%' - loop - execute 'drop table ' || t || ' cascade'; - end loop; -end -$$;` - func Test_ResetDB(t *testing.T) { + // We don't need the reference to the DB but this will ensure that the canonical DB wipe method is called + integration.SetupDB(t) + ctx, done := context.WithCancel(context.Background()) defer done() @@ -63,7 +50,6 @@ func Test_ResetDB(t *testing.T) { }) require.Nil(t, err) - require.Nil(t, graphDB.Run(ctx, murderDB, nil)) require.Nil(t, graphDB.AssertSchema(ctx, graphschema.DefaultGraphSchema())) require.Nil(t, graphDB.WriteTransaction(ctx, func(tx graph.Transaction) error { @@ -108,3 +94,31 @@ func TestPG(t *testing.T) { return tx.Query("match p = (s:User)-[*..]->(:Computer) return p", nil).Error() })) } + +// TestInt64SequenceIDs is a test that validates against a regression in any logic that originally used graph IDs. +// Before this test and its related changes, graph IDs were stored as uint32 values. Scale of graphs necessitated +// a change to an int64 ID space, however, the nuance of this refactor revealed several bugs and assumptions in +// query formatting as well as business logic. +// +// This test represents validating that an instance can insert a node or an edge that has an ID greater than the maximum +// value of an int32 graph ID. +func TestInt64SequenceIDs(t *testing.T) { + var ( + // We don't need the reference to the DB but this will ensure that the canonical DB wipe method is called + _ = integration.SetupDB(t) + graphTestContext = integration.NewGraphTestContext(t, graphschema.DefaultGraphSchema()) + ) + + if pg.IsPostgreSQLGraph(graphTestContext.Graph.Database) { + // Move the ID space out past an int32's max value + require.Nil(t, graphTestContext.Graph.Database.Run(context.Background(), "alter sequence node_id_seq restart with 2147483648;", make(map[string]any))) + require.Nil(t, graphTestContext.Graph.Database.Run(context.Background(), "alter sequence edge_id_seq restart with 2147483648;", make(map[string]any))) + } + + var ( + // Create two users, chuck and steve where chuck has GenericAll on steve + chuckNode = graphTestContext.NewNode(graph.AsProperties(map[string]any{"name": "chuck"}), ad.User, ad.Entity) + steveNode = graphTestContext.NewNode(graph.AsProperties(map[string]any{"name": "steve"}), ad.User, ad.Entity) + _ = graphTestContext.NewRelationship(chuckNode, steveNode, ad.GenericAll) + ) +} diff --git a/packages/go/dawgs/drivers/pg/query/format.go b/packages/go/dawgs/drivers/pg/query/format.go index ce7fcbe3bd..ca52dcd284 100644 --- a/packages/go/dawgs/drivers/pg/query/format.go +++ b/packages/go/dawgs/drivers/pg/query/format.go @@ -122,7 +122,7 @@ func FormatNodeUpsert(graphTarget model.Graph, identityProperties []string) stri return join( "insert into ", graphTarget.Partitions.Node.Name, " as n ", "(graph_id, kind_ids, properties) ", - "select $1::int4, unnest($2::text[])::int2[], unnest($3::jsonb[]) ", + "select $1, unnest($2::text[])::int2[], unnest($3::jsonb[]) ", formatConflictMatcher(identityProperties, "id, graph_id"), "do update set properties = n.properties || excluded.properties, kind_ids = uniq(sort(n.kind_ids || excluded.kind_ids)) ", "returning id;", @@ -132,7 +132,7 @@ func FormatNodeUpsert(graphTarget model.Graph, identityProperties []string) stri func FormatRelationshipPartitionUpsert(graphTarget model.Graph, identityProperties []string) string { return join("insert into ", graphTarget.Partitions.Edge.Name, " as e ", "(graph_id, start_id, end_id, kind_id, properties) ", - "select $1::int4, unnest($2::int4[]), unnest($3::int4[]), unnest($4::int2[]), unnest($5::jsonb[]) ", + "select $1, unnest($2::int8[]), unnest($3::int8[]), unnest($4::int2[]), unnest($5::jsonb[]) ", formatConflictMatcher(identityProperties, "graph_id, start_id, end_id, kind_id"), "do update set properties = e.properties || excluded.properties;", ) diff --git a/packages/go/dawgs/drivers/pg/query/sql/schema_up.sql b/packages/go/dawgs/drivers/pg/query/sql/schema_up.sql index d4f6444b13..3ba6038445 100644 --- a/packages/go/dawgs/drivers/pg/query/sql/schema_up.sql +++ b/packages/go/dawgs/drivers/pg/query/sql/schema_up.sql @@ -93,7 +93,7 @@ $$ begin create type nodeComposite as ( - id integer, + id bigint, kind_ids smallint[8], properties jsonb ); @@ -133,9 +133,9 @@ $$ begin create type edgeComposite as ( - id integer, - start_id integer, - end_id integer, + id bigint, + start_id bigint, + end_id bigint, kind_id smallint, properties jsonb ); diff --git a/packages/go/dawgs/drivers/pg/statements.go b/packages/go/dawgs/drivers/pg/statements.go index b7d3777f33..4b18c5929d 100644 --- a/packages/go/dawgs/drivers/pg/statements.go +++ b/packages/go/dawgs/drivers/pg/statements.go @@ -19,7 +19,7 @@ package pg const ( createNodeStatement = `insert into node (graph_id, kind_ids, properties) values (@graph_id, @kind_ids, @properties) returning (id, kind_ids, properties)::nodeComposite;` createNodeWithoutIDBatchStatement = `insert into node (graph_id, kind_ids, properties) select $1, unnest($2::text[])::int2[], unnest($3::jsonb[])` - createNodeWithIDBatchStatement = `insert into node (graph_id, id, kind_ids, properties) select $1, unnest($2::int4[]), unnest($3::text[])::int2[], unnest($4::jsonb[])` + createNodeWithIDBatchStatement = `insert into node (graph_id, id, kind_ids, properties) select $1, unnest($2::int8[]), unnest($3::text[])::int2[], unnest($4::jsonb[])` deleteNodeWithIDStatement = `delete from node where node.id = any($1)` createEdgeStatement = `insert into edge (graph_id, start_id, end_id, kind_id, properties) values (@graph_id, @start_id, @end_id, @kind_id, @properties) returning (id, start_id, end_id, kind_id, properties)::edgeComposite;` @@ -28,7 +28,7 @@ const ( // Azure post-processing. This was done because Azure post will submit the same creation request hundreds of // times for the same edge. In PostgreSQL this results in a constraint violation. For now this is best-effort // until Azure post-processing can be refactored. - createEdgeBatchStatement = `insert into edge as e (graph_id, start_id, end_id, kind_id, properties) select $1::int4, unnest($2::int4[]), unnest($3::int4[]), unnest($4::int2[]), unnest($5::jsonb[]) on conflict (graph_id, start_id, end_id, kind_id) do update set properties = e.properties || excluded.properties;` + createEdgeBatchStatement = `insert into edge as e (graph_id, start_id, end_id, kind_id, properties) select $1, unnest($2::int8[]), unnest($3::int8[]), unnest($4::int2[]), unnest($5::jsonb[]) on conflict (graph_id, start_id, end_id, kind_id) do update set properties = e.properties || excluded.properties;` deleteEdgeWithIDStatement = `delete from edge as e where e.id = any($1)` edgePropertySetOnlyStatement = `update edge set properties = properties || $1::jsonb where edge.id = $2`